llm-testrunner-components 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +9 -5
  2. package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +38 -6
  3. package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
  4. package/dist/cjs/index.cjs.js +499 -68
  5. package/dist/cjs/index.cjs.js.map +1 -1
  6. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  7. package/dist/cjs/loader.cjs.js +1 -1
  8. package/dist/collection/collection-manifest.json +1 -0
  9. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js +59 -15
  12. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  13. package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
  14. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
  15. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
  16. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
  17. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
  20. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  21. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  22. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  23. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
  24. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
  25. package/dist/collection/index.js.map +1 -1
  26. package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
  27. package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
  28. package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
  29. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  30. package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
  31. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  32. package/dist/collection/lib/evaluation/types.js.map +1 -1
  33. package/dist/collection/lib/form/components/app-chips.js +1 -1
  34. package/dist/collection/lib/form/components/app-select.js +1 -1
  35. package/dist/collection/lib/form/components/app-textarea.js +2 -2
  36. package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
  37. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  38. package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
  39. package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
  40. package/dist/collection/lib/test-cases/test-case-factory.js +7 -0
  41. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  42. package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
  43. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  44. package/dist/collection/schemas/expected-outcome.js +39 -0
  45. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  46. package/dist/collection/schemas/model-response.js +7 -0
  47. package/dist/collection/schemas/model-response.js.map +1 -0
  48. package/dist/collection/schemas/test-case.js +8 -1
  49. package/dist/collection/schemas/test-case.js.map +1 -1
  50. package/dist/collection/types/expected-outcome.js.map +1 -1
  51. package/dist/collection/types/llm-test-runner.js.map +1 -1
  52. package/dist/collection/types/test-case.js.map +1 -1
  53. package/dist/components/app-chips.js +1 -1
  54. package/dist/components/app-select.js +1 -1
  55. package/dist/components/app-textarea.js +1 -1
  56. package/dist/components/chat-history.d.ts +11 -0
  57. package/dist/components/chat-history.js +2 -0
  58. package/dist/components/chat-history.js.map +1 -0
  59. package/dist/components/index.js +1 -1
  60. package/dist/components/llm-test-runner.js +1 -1
  61. package/dist/components/{p-CVtKFBJl.js → p-D2qDAxFN.js} +2 -2
  62. package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
  63. package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
  64. package/dist/components/p-kmtfMXcQ.js +2 -0
  65. package/dist/components/p-kmtfMXcQ.js.map +1 -0
  66. package/dist/components/{p-BcygfrMf.js → p-wzA48RFK.js} +3 -3
  67. package/dist/components/p-wzA48RFK.js.map +1 -0
  68. package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +38 -7
  69. package/dist/esm/app-chips_5.entry.js.map +1 -0
  70. package/dist/esm/index.js +499 -68
  71. package/dist/esm/index.js.map +1 -1
  72. package/dist/esm/llm-testrunner.js +1 -1
  73. package/dist/esm/loader.js +1 -1
  74. package/dist/llm-testrunner/index.esm.js +2 -2
  75. package/dist/llm-testrunner/index.esm.js.map +1 -1
  76. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  77. package/dist/llm-testrunner/p-5bf1fc78.entry.js +2 -0
  78. package/dist/llm-testrunner/p-5bf1fc78.entry.js.map +1 -0
  79. package/dist/react/components.d.ts +6 -1
  80. package/dist/react/components.d.ts.map +1 -1
  81. package/dist/react/components.js +9 -0
  82. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -4
  83. package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
  84. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
  85. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
  86. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
  87. package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
  88. package/dist/types/components.d.ts +55 -2
  89. package/dist/types/index.d.ts +1 -1
  90. package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
  91. package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
  92. package/dist/types/lib/evaluation/types.d.ts +1 -1
  93. package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
  94. package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
  95. package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
  96. package/dist/types/schemas/expected-outcome.d.ts +116 -0
  97. package/dist/types/schemas/model-response.d.ts +7 -0
  98. package/dist/types/schemas/test-case.d.ts +93 -1
  99. package/dist/types/types/expected-outcome.d.ts +1 -1
  100. package/dist/types/types/llm-test-runner.d.ts +6 -3
  101. package/dist/types/types/test-case.d.ts +1 -1
  102. package/package.json +1 -1
  103. package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
  104. package/dist/components/p-BcygfrMf.js.map +0 -1
  105. package/dist/esm/app-chips_4.entry.js.map +0 -1
  106. package/dist/llm-testrunner/p-5df053b4.entry.js +0 -2
  107. package/dist/llm-testrunner/p-5df053b4.entry.js.map +0 -1
  108. /package/dist/components/{p-CVtKFBJl.js.map → p-D2qDAxFN.js.map} +0 -0
  109. /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
  110. /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
package/dist/esm/index.js CHANGED
@@ -103,6 +103,10 @@ function formatTestSuiteAsJson(testCases) {
103
103
  id: testCase.id,
104
104
  question: testCase.question,
105
105
  expectedOutcome: testCase.expectedOutcome,
106
+ chatHistory: {
107
+ enabled: testCase.chatHistory.enabled,
108
+ value: testCase.chatHistory.value,
109
+ },
106
110
  }));
107
111
  return JSON.stringify(exportData, null, 2);
108
112
  }
@@ -288,6 +292,7 @@ const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
288
292
  function normalizeExpectedOutcomeField(field) {
289
293
  return {
290
294
  ...field,
295
+ evaluationSource: field.evaluationSource || { type: 'text' },
291
296
  evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
292
297
  };
293
298
  }
@@ -300,6 +305,7 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
300
305
  id: v4(),
301
306
  question: '',
302
307
  expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
308
+ chatHistory: { enabled: false, value: '' },
303
309
  isRunning: false,
304
310
  };
305
311
  }
@@ -310,6 +316,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
310
316
  type: 'text',
311
317
  label: schemaField.label,
312
318
  placeholder: schemaField.placeholder,
319
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
313
320
  value: '',
314
321
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
315
322
  };
@@ -318,6 +325,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
318
325
  type: 'textarea',
319
326
  label: schemaField.label,
320
327
  placeholder: schemaField.placeholder,
328
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
321
329
  rows: schemaField.rows,
322
330
  value: '',
323
331
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -327,6 +335,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
327
335
  type: 'chips-input',
328
336
  label: schemaField.label,
329
337
  placeholder: schemaField.placeholder,
338
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
330
339
  value: [],
331
340
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
332
341
  };
@@ -335,6 +344,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
335
344
  type: 'select',
336
345
  label: schemaField.label,
337
346
  placeholder: schemaField.placeholder,
347
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
338
348
  value: schemaField.options[0],
339
349
  options: schemaField.options,
340
350
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -359,6 +369,7 @@ function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
359
369
  function createTestCaseFromInput(data) {
360
370
  return {
361
371
  ...data,
372
+ chatHistory: data.chatHistory ?? { enabled: false, value: '' },
362
373
  expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
363
374
  };
364
375
  }
@@ -2563,6 +2574,122 @@ function handleIntersectionResults(result, left, right) {
2563
2574
  result.value = merged.data;
2564
2575
  return result;
2565
2576
  }
2577
+ const $ZodRecord = /*@__PURE__*/ $constructor("$ZodRecord", (inst, def) => {
2578
+ $ZodType.init(inst, def);
2579
+ inst._zod.parse = (payload, ctx) => {
2580
+ const input = payload.value;
2581
+ if (!isPlainObject(input)) {
2582
+ payload.issues.push({
2583
+ expected: "record",
2584
+ code: "invalid_type",
2585
+ input,
2586
+ inst,
2587
+ });
2588
+ return payload;
2589
+ }
2590
+ const proms = [];
2591
+ const values = def.keyType._zod.values;
2592
+ if (values) {
2593
+ payload.value = {};
2594
+ const recordKeys = new Set();
2595
+ for (const key of values) {
2596
+ if (typeof key === "string" || typeof key === "number" || typeof key === "symbol") {
2597
+ recordKeys.add(typeof key === "number" ? key.toString() : key);
2598
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2599
+ if (result instanceof Promise) {
2600
+ proms.push(result.then((result) => {
2601
+ if (result.issues.length) {
2602
+ payload.issues.push(...prefixIssues(key, result.issues));
2603
+ }
2604
+ payload.value[key] = result.value;
2605
+ }));
2606
+ }
2607
+ else {
2608
+ if (result.issues.length) {
2609
+ payload.issues.push(...prefixIssues(key, result.issues));
2610
+ }
2611
+ payload.value[key] = result.value;
2612
+ }
2613
+ }
2614
+ }
2615
+ let unrecognized;
2616
+ for (const key in input) {
2617
+ if (!recordKeys.has(key)) {
2618
+ unrecognized = unrecognized ?? [];
2619
+ unrecognized.push(key);
2620
+ }
2621
+ }
2622
+ if (unrecognized && unrecognized.length > 0) {
2623
+ payload.issues.push({
2624
+ code: "unrecognized_keys",
2625
+ input,
2626
+ inst,
2627
+ keys: unrecognized,
2628
+ });
2629
+ }
2630
+ }
2631
+ else {
2632
+ payload.value = {};
2633
+ for (const key of Reflect.ownKeys(input)) {
2634
+ if (key === "__proto__")
2635
+ continue;
2636
+ let keyResult = def.keyType._zod.run({ value: key, issues: [] }, ctx);
2637
+ if (keyResult instanceof Promise) {
2638
+ throw new Error("Async schemas not supported in object keys currently");
2639
+ }
2640
+ // Numeric string fallback: if key is a numeric string and failed, retry with Number(key)
2641
+ // This handles z.number(), z.literal([1, 2, 3]), and unions containing numeric literals
2642
+ const checkNumericKey = typeof key === "string" && number$1.test(key) && keyResult.issues.length;
2643
+ if (checkNumericKey) {
2644
+ const retryResult = def.keyType._zod.run({ value: Number(key), issues: [] }, ctx);
2645
+ if (retryResult instanceof Promise) {
2646
+ throw new Error("Async schemas not supported in object keys currently");
2647
+ }
2648
+ if (retryResult.issues.length === 0) {
2649
+ keyResult = retryResult;
2650
+ }
2651
+ }
2652
+ if (keyResult.issues.length) {
2653
+ if (def.mode === "loose") {
2654
+ // Pass through unchanged
2655
+ payload.value[key] = input[key];
2656
+ }
2657
+ else {
2658
+ // Default "strict" behavior: error on invalid key
2659
+ payload.issues.push({
2660
+ code: "invalid_key",
2661
+ origin: "record",
2662
+ issues: keyResult.issues.map((iss) => finalizeIssue(iss, ctx, config())),
2663
+ input: key,
2664
+ path: [key],
2665
+ inst,
2666
+ });
2667
+ }
2668
+ continue;
2669
+ }
2670
+ const result = def.valueType._zod.run({ value: input[key], issues: [] }, ctx);
2671
+ if (result instanceof Promise) {
2672
+ proms.push(result.then((result) => {
2673
+ if (result.issues.length) {
2674
+ payload.issues.push(...prefixIssues(key, result.issues));
2675
+ }
2676
+ payload.value[keyResult.value] = result.value;
2677
+ }));
2678
+ }
2679
+ else {
2680
+ if (result.issues.length) {
2681
+ payload.issues.push(...prefixIssues(key, result.issues));
2682
+ }
2683
+ payload.value[keyResult.value] = result.value;
2684
+ }
2685
+ }
2686
+ }
2687
+ if (proms.length) {
2688
+ return Promise.all(proms).then(() => payload);
2689
+ }
2690
+ return payload;
2691
+ };
2692
+ });
2566
2693
  const $ZodEnum = /*@__PURE__*/ $constructor("$ZodEnum", (inst, def) => {
2567
2694
  $ZodType.init(inst, def);
2568
2695
  const values = getEnumValues(def.entries);
@@ -4146,6 +4273,49 @@ const intersectionProcessor = (schema, ctx, json, params) => {
4146
4273
  ];
4147
4274
  json.allOf = allOf;
4148
4275
  };
4276
+ const recordProcessor = (schema, ctx, _json, params) => {
4277
+ const json = _json;
4278
+ const def = schema._zod.def;
4279
+ json.type = "object";
4280
+ // For looseRecord with regex patterns, use patternProperties
4281
+ // This correctly represents "only validate keys matching the pattern" semantics
4282
+ // and composes well with allOf (intersections)
4283
+ const keyType = def.keyType;
4284
+ const keyBag = keyType._zod.bag;
4285
+ const patterns = keyBag?.patterns;
4286
+ if (def.mode === "loose" && patterns && patterns.size > 0) {
4287
+ // Use patternProperties for looseRecord with regex patterns
4288
+ const valueSchema = process$1(def.valueType, ctx, {
4289
+ ...params,
4290
+ path: [...params.path, "patternProperties", "*"],
4291
+ });
4292
+ json.patternProperties = {};
4293
+ for (const pattern of patterns) {
4294
+ json.patternProperties[pattern.source] = valueSchema;
4295
+ }
4296
+ }
4297
+ else {
4298
+ // Default behavior: use propertyNames + additionalProperties
4299
+ if (ctx.target === "draft-07" || ctx.target === "draft-2020-12") {
4300
+ json.propertyNames = process$1(def.keyType, ctx, {
4301
+ ...params,
4302
+ path: [...params.path, "propertyNames"],
4303
+ });
4304
+ }
4305
+ json.additionalProperties = process$1(def.valueType, ctx, {
4306
+ ...params,
4307
+ path: [...params.path, "additionalProperties"],
4308
+ });
4309
+ }
4310
+ // Add required for keys with discrete values (enum, literal, etc.)
4311
+ const keyValues = keyType._zod.values;
4312
+ if (keyValues) {
4313
+ const validKeyValues = [...keyValues].filter((v) => typeof v === "string" || typeof v === "number");
4314
+ if (validKeyValues.length > 0) {
4315
+ json.required = validKeyValues;
4316
+ }
4317
+ }
4318
+ };
4149
4319
  const nullableProcessor = (schema, ctx, json, params) => {
4150
4320
  const def = schema._zod.def;
4151
4321
  const inner = process$1(def.innerType, ctx, params);
@@ -4700,6 +4870,21 @@ function intersection(left, right) {
4700
4870
  right: right,
4701
4871
  });
4702
4872
  }
4873
+ const ZodRecord = /*@__PURE__*/ $constructor("ZodRecord", (inst, def) => {
4874
+ $ZodRecord.init(inst, def);
4875
+ ZodType.init(inst, def);
4876
+ inst._zod.processJSONSchema = (ctx, json, params) => recordProcessor(inst, ctx, json, params);
4877
+ inst.keyType = def.keyType;
4878
+ inst.valueType = def.valueType;
4879
+ });
4880
+ function record(keyType, valueType, params) {
4881
+ return new ZodRecord({
4882
+ type: "record",
4883
+ keyType,
4884
+ valueType: valueType,
4885
+ ...normalizeParams(params),
4886
+ });
4887
+ }
4703
4888
  const ZodEnum = /*@__PURE__*/ $constructor("ZodEnum", (inst, def) => {
4704
4889
  $ZodEnum.init(inst, def);
4705
4890
  ZodType.init(inst, def);
@@ -4937,7 +5122,7 @@ const ZodCustom = /*@__PURE__*/ $constructor("ZodCustom", (inst, def) => {
4937
5122
  inst._zod.processJSONSchema = (ctx, json, params) => customProcessor(inst, ctx);
4938
5123
  });
4939
5124
  function custom(fn, _params) {
4940
- return _custom(ZodCustom, (() => true), _params);
5125
+ return _custom(ZodCustom, fn ?? (() => true), _params);
4941
5126
  }
4942
5127
  function refine(fn, _params = {}) {
4943
5128
  return _refine(ZodCustom, fn, _params);
@@ -4952,6 +5137,19 @@ const optionalPositiveInt = number().int().positive().optional();
4952
5137
  const optionalString = string().optional();
4953
5138
  const selectOptionsSchema = array(nonEmptyString).min(1);
4954
5139
  const optionalNumber = number().optional();
5140
+ const textEvaluationSourceSchema = object({
5141
+ type: literal('text'),
5142
+ });
5143
+ const customEvaluationSourceSchema = object({
5144
+ type: literal('custom'),
5145
+ extractorId: nonEmptyString,
5146
+ });
5147
+ const evaluationSourceExtractorSchema = custom(value => typeof value === 'function', 'Extractor must be a function.');
5148
+ record(string().min(1), evaluationSourceExtractorSchema);
5149
+ const evaluationSourceSchema = discriminatedUnion('type', [
5150
+ textEvaluationSourceSchema,
5151
+ customEvaluationSourceSchema,
5152
+ ]);
4955
5153
  const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
4956
5154
  const evaluationParametersSchema = object({
4957
5155
  approach: _enum(EvaluationApproach),
@@ -4969,6 +5167,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
4969
5167
  const defaultExpectedOutcomeBaseSchema = object({
4970
5168
  label: nonEmptyString,
4971
5169
  placeholder: optionalString,
5170
+ evaluationSource: evaluationSourceSchema.optional(),
4972
5171
  });
4973
5172
  const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
4974
5173
  text: baseSchema.extend({
@@ -5061,18 +5260,55 @@ function validateExpectedOutcomeSchema(schema) {
5061
5260
  throw new Error(`Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`);
5062
5261
  }
5063
5262
  }
5263
+ function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
5264
+ const allowed = new Set(allowedExtractorIds);
5265
+ const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
5266
+ fields.forEach((field, index) => {
5267
+ if (field.evaluationSource?.type !== 'custom') {
5268
+ return;
5269
+ }
5270
+ if (allowed.has(field.evaluationSource.extractorId)) {
5271
+ return;
5272
+ }
5273
+ ctx.addIssue({
5274
+ code: 'custom',
5275
+ path: [index, 'evaluationSource', 'extractorId'],
5276
+ message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
5277
+ });
5278
+ });
5279
+ });
5280
+ const parsed = schema.safeParse(expectedOutcome);
5281
+ if (!parsed.success) {
5282
+ throw new Error(parsed.error.issues[0].message);
5283
+ }
5284
+ }
5285
+ function getExtractorIds(extractors) {
5286
+ return Object.keys(extractors || {});
5287
+ }
5064
5288
 
5289
+ const modelResponseMetadataSchema = record(string(), unknown());
5290
+ const modelResponsePayloadSchema = object({
5291
+ text: string().optional(),
5292
+ metadata: modelResponseMetadataSchema.optional(),
5293
+ });
5294
+
5295
+ const testCaseChatHistorySchema = object({
5296
+ enabled: boolean(),
5297
+ value: string(),
5298
+ });
5065
5299
  const testCaseInputSchema = object({
5066
5300
  id: string(),
5067
5301
  question: string(),
5068
5302
  expectedOutcome: expectedOutcomeArraySchema,
5303
+ chatHistory: testCaseChatHistorySchema.optional(),
5069
5304
  });
5070
5305
  const testCaseInputArraySchema = array(testCaseInputSchema);
5071
5306
  object({
5072
5307
  id: string(),
5073
5308
  question: string(),
5074
5309
  expectedOutcome: expectedOutcomeArraySchema,
5075
- output: string().optional(),
5310
+ output: modelResponsePayloadSchema.optional(),
5311
+ chatHistory: testCaseChatHistorySchema,
5076
5312
  isRunning: boolean().optional(),
5077
5313
  error: string().optional(),
5078
5314
  evaluationResult: custom().optional(),
@@ -5094,10 +5330,15 @@ function validateTestCaseInputArray(data) {
5094
5330
  * @param jsonContent - The JSON string to parse and validate
5095
5331
  * @returns Validation result with test cases or error message
5096
5332
  */
5097
- function importTestSuite(jsonContent) {
5333
+ function importTestSuite(jsonContent, allowedExtractorIds = []) {
5098
5334
  try {
5099
5335
  const parsed = JSON.parse(jsonContent);
5100
5336
  validateTestCaseInputArray(parsed);
5337
+ if (allowedExtractorIds.length > 0) {
5338
+ parsed.forEach((testCase) => {
5339
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
5340
+ });
5341
+ }
5101
5342
  const testCases = parsed.map((item, index) => {
5102
5343
  try {
5103
5344
  return createTestCaseFromInput(item);
@@ -5123,7 +5364,7 @@ function importTestSuite(jsonContent) {
5123
5364
  }
5124
5365
 
5125
5366
  const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
5126
- function isDynamicTextareaField(field) {
5367
+ function isDynamicTextareaField$1(field) {
5127
5368
  return field.type === 'textarea' && field.outcomeMode === 'dynamic';
5128
5369
  }
5129
5370
  function applyResolvedDynamicValues(testCase, resolvedValues) {
@@ -5133,7 +5374,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5133
5374
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
5134
5375
  for (const resolved of resolvedValues) {
5135
5376
  const field = expectedOutcome[resolved.index];
5136
- if (!field || !isDynamicTextareaField(field)) {
5377
+ if (!field || !isDynamicTextareaField$1(field)) {
5137
5378
  continue;
5138
5379
  }
5139
5380
  expectedOutcome[resolved.index] = {
@@ -5148,7 +5389,7 @@ function applyResolvedDynamicValues(testCase, resolvedValues) {
5148
5389
  }
5149
5390
  async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5150
5391
  const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
5151
- if (!isDynamicTextareaField(field)) {
5392
+ if (!isDynamicTextareaField$1(field)) {
5152
5393
  return [];
5153
5394
  }
5154
5395
  return [{ field, index }];
@@ -5166,6 +5407,15 @@ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5166
5407
  return applyResolvedDynamicValues(testCase, resolvedValues);
5167
5408
  }
5168
5409
 
5410
+ function isChipsInputField(field) {
5411
+ return field.type === 'chips-input';
5412
+ }
5413
+ function isTextareaField(field) {
5414
+ return field.type === 'textarea';
5415
+ }
5416
+ function isDynamicTextareaField(field) {
5417
+ return isTextareaField(field) && field.outcomeMode === 'dynamic';
5418
+ }
5169
5419
  function applyExpectedOutcomeChange(testCase, change) {
5170
5420
  const { index } = change;
5171
5421
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -5173,73 +5423,99 @@ function applyExpectedOutcomeChange(testCase, change) {
5173
5423
  if (!target) {
5174
5424
  return testCase;
5175
5425
  }
5426
+ const commit = (updatedField) => {
5427
+ expectedOutcome[index] = updatedField;
5428
+ return { ...testCase, expectedOutcome };
5429
+ };
5176
5430
  switch (change.operation) {
5177
5431
  case 'set-value': {
5178
- if (target.type === 'chips-input') {
5432
+ if (isChipsInputField(target)) {
5179
5433
  return testCase;
5180
5434
  }
5181
- if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
5435
+ if (isDynamicTextareaField(target)) {
5182
5436
  return testCase;
5183
5437
  }
5184
- expectedOutcome[index] = {
5438
+ return commit({
5185
5439
  ...target,
5186
5440
  value: change.value,
5187
- };
5188
- return { ...testCase, expectedOutcome };
5441
+ });
5189
5442
  }
5190
5443
  case 'add-chip': {
5191
- if (target.type !== 'chips-input') {
5444
+ if (!isChipsInputField(target)) {
5192
5445
  return testCase;
5193
5446
  }
5194
- expectedOutcome[index] = {
5447
+ return commit({
5195
5448
  ...target,
5196
5449
  value: [...target.value, change.value],
5197
- };
5198
- return { ...testCase, expectedOutcome };
5450
+ });
5199
5451
  }
5200
5452
  case 'remove-chip': {
5201
- if (target.type !== 'chips-input') {
5453
+ if (!isChipsInputField(target)) {
5202
5454
  return testCase;
5203
5455
  }
5204
- expectedOutcome[index] = {
5456
+ return commit({
5205
5457
  ...target,
5206
5458
  value: target.value.filter(chip => chip !== change.value),
5207
- };
5208
- return { ...testCase, expectedOutcome };
5459
+ });
5209
5460
  }
5210
5461
  case 'set-evaluation-approach':
5211
5462
  return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
5212
5463
  case 'set-outcome-mode': {
5213
- if (target.type !== 'textarea') {
5464
+ if (!isTextareaField(target)) {
5214
5465
  return testCase;
5215
5466
  }
5216
5467
  const mode = change.value;
5217
5468
  if (mode === 'static') {
5218
5469
  const { resolutionQuery: _, ...rest } = target;
5219
- expectedOutcome[index] = {
5470
+ return commit({
5220
5471
  ...rest,
5221
5472
  outcomeMode: 'static',
5222
5473
  value: '',
5223
- };
5474
+ });
5224
5475
  }
5225
5476
  else {
5226
- expectedOutcome[index] = {
5477
+ return commit({
5227
5478
  ...target,
5228
5479
  outcomeMode: 'dynamic',
5229
5480
  value: '',
5230
- };
5481
+ });
5231
5482
  }
5232
- return { ...testCase, expectedOutcome };
5233
5483
  }
5234
5484
  case 'set-resolution-query': {
5235
- if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
5485
+ if (!isDynamicTextareaField(target)) {
5236
5486
  return testCase;
5237
5487
  }
5238
- expectedOutcome[index] = {
5488
+ return commit({
5239
5489
  ...target,
5240
5490
  resolutionQuery: change.value,
5241
- };
5242
- return { ...testCase, expectedOutcome };
5491
+ });
5492
+ }
5493
+ case 'set-evaluation-source-type': {
5494
+ if (change.value === 'text') {
5495
+ return commit({
5496
+ ...target,
5497
+ evaluationSource: { type: 'text' },
5498
+ });
5499
+ }
5500
+ const extractorId = target.evaluationSource?.type === 'custom'
5501
+ ? target.evaluationSource.extractorId
5502
+ : (change.fallbackExtractorId ?? '');
5503
+ return commit({
5504
+ ...target,
5505
+ evaluationSource: {
5506
+ type: 'custom',
5507
+ extractorId,
5508
+ },
5509
+ });
5510
+ }
5511
+ case 'set-evaluation-source-extractor': {
5512
+ return commit({
5513
+ ...target,
5514
+ evaluationSource: {
5515
+ type: 'custom',
5516
+ extractorId: change.value,
5517
+ },
5518
+ });
5243
5519
  }
5244
5520
  }
5245
5521
  }
@@ -30020,7 +30296,7 @@ class LLMEvaluationEngine {
30020
30296
  const fieldRequest = {
30021
30297
  testCaseId: request.testCaseId,
30022
30298
  question: request.question,
30023
- actualResponse: request.actualResponse,
30299
+ actualResponse: field.actualResponse,
30024
30300
  expectedOutcome: field.expectedValue,
30025
30301
  evaluationParameters: field.evaluationParameters,
30026
30302
  };
@@ -30090,6 +30366,58 @@ class LLMEvaluationEngine {
30090
30366
  }
30091
30367
  }
30092
30368
 
30369
+ function toTextSource() {
30370
+ return { type: 'text' };
30371
+ }
30372
+ async function resolveActualValue(field, output, extractors) {
30373
+ const source = field.evaluationSource || toTextSource();
30374
+ if (source.type === 'text') {
30375
+ const text = output?.text?.trim();
30376
+ if (!text) {
30377
+ return {
30378
+ success: false,
30379
+ error: 'Model response text is empty.',
30380
+ };
30381
+ }
30382
+ return { success: true, value: text };
30383
+ }
30384
+ const extractor = extractors?.[source.extractorId];
30385
+ if (!extractor) {
30386
+ return {
30387
+ success: false,
30388
+ error: `Extractor "${source.extractorId}" is not registered.`,
30389
+ };
30390
+ }
30391
+ try {
30392
+ const extractedRaw = await extractor(output || {});
30393
+ if (typeof extractedRaw !== 'string') {
30394
+ return {
30395
+ success: false,
30396
+ error: `Extractor "${source.extractorId}" must return a string.`,
30397
+ };
30398
+ }
30399
+ const extracted = extractedRaw.trim();
30400
+ if (!extracted) {
30401
+ return {
30402
+ success: false,
30403
+ error: `Extractor "${source.extractorId}" returned an empty value.`,
30404
+ };
30405
+ }
30406
+ return {
30407
+ success: true,
30408
+ value: extracted,
30409
+ };
30410
+ }
30411
+ catch (error) {
30412
+ return {
30413
+ success: false,
30414
+ error: error instanceof Error
30415
+ ? error.message
30416
+ : `Extractor "${source.extractorId}" failed.`,
30417
+ };
30418
+ }
30419
+ }
30420
+
30093
30421
  /**
30094
30422
  * Service for evaluating test case responses
30095
30423
  */
@@ -30103,34 +30431,71 @@ class EvaluationService {
30103
30431
  * @param testCase - The test case to evaluate
30104
30432
  * @param onResult - Callback to handle the evaluation result
30105
30433
  */
30106
- async evaluateTestCase(testCase, onResult) {
30107
- if (!testCase.output) {
30108
- console.warn('⚠️ No output to evaluate for test case:', testCase.id);
30109
- return;
30110
- }
30111
- const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
30434
+ async evaluateTestCase(testCase, onResult, extractors) {
30435
+ const fields = [];
30436
+ const failedFields = [];
30437
+ for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
30112
30438
  if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
30113
- return [];
30439
+ continue;
30114
30440
  }
30115
- return [
30116
- {
30441
+ const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
30442
+ const expectedValue = getFieldExpectedValue(field);
30443
+ const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
30444
+ if (resolvedActualValue.success) {
30445
+ fields.push({
30117
30446
  index,
30118
30447
  label: field.label,
30119
30448
  type: field.type,
30120
- expectedValue: getFieldExpectedValue(field),
30121
- evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
30122
- },
30123
- ];
30124
- });
30449
+ expectedValue,
30450
+ actualResponse: resolvedActualValue.value,
30451
+ evaluationParameters,
30452
+ });
30453
+ }
30454
+ else {
30455
+ failedFields.push({
30456
+ index,
30457
+ label: field.label,
30458
+ type: field.type,
30459
+ expectedValue,
30460
+ passed: false,
30461
+ keywordMatches: [],
30462
+ evaluationParameters,
30463
+ evaluationApproachResult: {
30464
+ score: 0,
30465
+ approachUsed: evaluationParameters.approach,
30466
+ },
30467
+ error: 'error' in resolvedActualValue
30468
+ ? resolvedActualValue.error
30469
+ : 'Failed to resolve actual value.',
30470
+ });
30471
+ }
30472
+ }
30473
+ if (fields.length === 0) {
30474
+ if (failedFields.length === 0) {
30475
+ console.warn('⚠️ No evaluable fields for test case:', testCase.id);
30476
+ return;
30477
+ }
30478
+ onResult({
30479
+ testCaseId: testCase.id,
30480
+ passed: false,
30481
+ keywordMatches: [],
30482
+ fieldResults: failedFields,
30483
+ timestamp: new Date().toISOString(),
30484
+ });
30485
+ return;
30486
+ }
30125
30487
  const evaluationRequest = {
30126
30488
  testCaseId: testCase.id,
30127
30489
  question: testCase.question,
30128
- actualResponse: testCase.output,
30129
30490
  fields,
30130
30491
  };
30131
30492
  await this.engine.evaluateResponse(evaluationRequest, (result) => {
30132
- console.log('📊 Evaluation result received:', result);
30133
- onResult(result);
30493
+ const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
30494
+ onResult({
30495
+ ...result,
30496
+ passed: combinedResults.every(field => field.passed && !field.error),
30497
+ fieldResults: combinedResults,
30498
+ });
30134
30499
  });
30135
30500
  }
30136
30501
  }
@@ -30178,7 +30543,7 @@ const LLMTestRunnerHeader = ({ isExportingTestSuite, isExportingTestResults, isR
30178
30543
  };
30179
30544
 
30180
30545
  const ResponseOutput = ({ output, isRunning, }) => {
30181
- return (h("div", { class: "response-output" }, output ? (h("div", { class: "response-output__content" }, output)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30546
+ return (h("div", { class: "response-output" }, output?.text ? (h("div", { class: "response-output__content" }, output.text)) : (h("div", { class: "response-output__placeholder" }, isRunning ? 'Running...' : ''))));
30182
30547
  };
30183
30548
 
30184
30549
  const EvaluationSummary = ({ result, isRunning, }) => {
@@ -30216,7 +30581,9 @@ var FormFieldType;
30216
30581
  FormFieldType["SELECT"] = "select";
30217
30582
  })(FormFieldType || (FormFieldType = {}));
30218
30583
 
30219
- const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
30584
+ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, extractorIds = [], onExpectedOutcomeChange, }) => {
30585
+ const hasExtractorOptions = extractorIds.length > 0;
30586
+ const firstExtractorId = extractorIds[0];
30220
30587
  const emit = (detail) => onExpectedOutcomeChange({
30221
30588
  detail,
30222
30589
  });
@@ -30246,6 +30613,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30246
30613
  required: false,
30247
30614
  rows: 2,
30248
30615
  });
30616
+ const buildEvaluationSourceConfig = (index) => ({
30617
+ name: `expectedOutcomeEvaluationSource-${index}`,
30618
+ fieldType: FormFieldType.SELECT,
30619
+ label: 'Evaluation Source',
30620
+ placeholder: 'Select evaluation source',
30621
+ required: true,
30622
+ optionList: ['text', 'custom'],
30623
+ defaultValue: 'text',
30624
+ });
30625
+ const buildExtractorConfig = (index) => ({
30626
+ name: `expectedOutcomeEvaluationSourceExtractor-${index}`,
30627
+ fieldType: FormFieldType.SELECT,
30628
+ label: 'Extractor',
30629
+ placeholder: 'Select extractor',
30630
+ required: true,
30631
+ optionList: extractorIds,
30632
+ });
30249
30633
  const renderEvaluationSelector = (field, index) => {
30250
30634
  const optionList = getAllowedApproachesForFieldType(field.type);
30251
30635
  return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
@@ -30255,6 +30639,27 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30255
30639
  value: e.detail.value,
30256
30640
  }) }));
30257
30641
  };
30642
+ const renderEvaluationSourceSelector = (field, index) => {
30643
+ if (!hasExtractorOptions) {
30644
+ return null;
30645
+ }
30646
+ const sourceType = field.evaluationSource?.type || 'text';
30647
+ return (h("div", null, h("app-select", { config: buildEvaluationSourceConfig(index), value: sourceType, onValueChange: (e) => emit({
30648
+ testCaseId,
30649
+ index,
30650
+ operation: 'set-evaluation-source-type',
30651
+ value: e.detail.value,
30652
+ fallbackExtractorId: firstExtractorId,
30653
+ }) }), sourceType === 'custom' && (h("app-select", { config: buildExtractorConfig(index), value: field.evaluationSource?.type === 'custom'
30654
+ ? field.evaluationSource.extractorId
30655
+ : '', onValueChange: (e) => emit({
30656
+ testCaseId,
30657
+ index,
30658
+ operation: 'set-evaluation-source-extractor',
30659
+ value: e.detail.value,
30660
+ }) }))));
30661
+ };
30662
+ const renderEvaluationOptions = (field, index) => (h("details", { class: "expected-outcome-renderer__options" }, h("summary", { class: "expected-outcome-renderer__options-summary" }, "More options"), h("div", { class: "expected-outcome-renderer__options-content" }, renderEvaluationSelector(field, index), renderEvaluationSourceSelector(field, index))));
30258
30663
  return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
30259
30664
  if (field.type === 'textarea') {
30260
30665
  const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
@@ -30286,7 +30691,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30286
30691
  index,
30287
30692
  operation: 'set-resolution-query',
30288
30693
  value: e.detail.value,
30289
- }) })), !isDynamic && renderEvaluationSelector(field, index)));
30694
+ }) })), !isDynamic && renderEvaluationOptions(field, index)));
30290
30695
  }
30291
30696
  if (field.type === 'chips-input') {
30292
30697
  const config = {
@@ -30306,7 +30711,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30306
30711
  index,
30307
30712
  operation: 'remove-chip',
30308
30713
  value: e.detail.value,
30309
- }) }), renderEvaluationSelector(field, index)));
30714
+ }) }), renderEvaluationOptions(field, index)));
30310
30715
  }
30311
30716
  if (field.type === 'select') {
30312
30717
  const config = {
@@ -30322,18 +30727,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupporte
30322
30727
  index,
30323
30728
  operation: 'set-value',
30324
30729
  value: e.detail.value,
30325
- }) }), renderEvaluationSelector(field, index)));
30730
+ }) }), renderEvaluationOptions(field, index)));
30326
30731
  }
30327
30732
  return (h("div", { class: "expected-outcome-renderer__group" }, h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
30328
30733
  testCaseId,
30329
30734
  index,
30330
30735
  operation: 'set-value',
30331
30736
  value: e.target.value,
30332
- }) })), renderEvaluationSelector(field, index)));
30737
+ }) })), renderEvaluationOptions(field, index)));
30333
30738
  })));
30334
30739
  };
30335
30740
 
30336
- const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
30741
+ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30337
30742
  const questionConfig = {
30338
30743
  name: 'question',
30339
30744
  fieldType: FormFieldType.TEXT_AREA,
@@ -30349,11 +30754,21 @@ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, o
30349
30754
  key: 'question',
30350
30755
  value: e.detail.value,
30351
30756
  },
30352
- }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30757
+ }) }), h("chat-history", { chatHistoryEnabled: testCase.chatHistory?.enabled ?? false, chatHistoryValue: testCase.chatHistory?.value ?? '', onChatHistoryChange: (e) => {
30758
+ const { enabled, value } = e
30759
+ .detail;
30760
+ onChatHistoryChange({
30761
+ detail: {
30762
+ testCaseId: testCase.id,
30763
+ enabled,
30764
+ value,
30765
+ },
30766
+ });
30767
+ } }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30353
30768
  };
30354
30769
 
30355
- const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
30356
- return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30770
+ const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, extractorIds = [], onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30771
+ return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, extractorIds: extractorIds, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30357
30772
  };
30358
30773
 
30359
30774
  const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30364,7 +30779,7 @@ const llmTestRunnerHeaderCss = () => `.test-runner-header{display:flex;justify-c
30364
30779
 
30365
30780
  const llmTestCasesCss = () => `.test-cases{background:var(--background)}.test-cases__column-headers{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);background:var(--border);border-bottom:2px solid var(--border)}.test-cases__column-header{background:var(--muted);padding:var(--spacing-4) var(--spacing-5);font-weight:var(--font-weight-semibold);color:var(--foreground);font-size:var(--font-size-sm);text-transform:uppercase;letter-spacing:var(--letter-spacing-wide)}.test-cases__add-section{padding:var(--spacing-6);text-align:center;background:var(--muted);border-top:var(--border-width) solid var(--border)}@media (max-width: 1200px){.test-cases__column-headers{display:none}}`;
30366
30781
 
30367
- const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30782
+ const llmTestCaseRowCss = () => `.test-case-row{display:grid;grid-template-columns:1fr 1.5fr 0.5fr 120px;gap:var(--border-width);border-bottom:var(--border-width) solid var(--border);min-height:200px}.test-case-row:hover{background:var(--muted)}.test-case-row__input-column{padding:var(--spacing-5);background:var(--background);border-right:var(--border-width) solid var(--border)}.expected-outcome-renderer{display:flex;flex-direction:column;gap:var(--spacing-4);margin-top:var(--spacing-4)}.expected-outcome-renderer__group{display:flex;flex-direction:column;gap:var(--spacing-2);padding:var(--spacing-3);border:var(--border-width) solid var(--border);border-radius:var(--radius-md);background:var(--background)}.expected-outcome-renderer__options{border:var(--border-width) solid var(--border);border-radius:var(--radius-sm);background:var(--muted)}.expected-outcome-renderer__options-summary{cursor:pointer;font-size:var(--font-size-sm);color:var(--foreground);padding:var(--spacing-2) var(--spacing-3);user-select:none}.expected-outcome-renderer__options-content{display:flex;flex-direction:column;gap:var(--spacing-2);padding:0 var(--spacing-3) var(--spacing-3)}@media (max-width: 1200px){.test-case-row{grid-template-columns:1fr;gap:0}.test-case-row__input-column{border-right:none;border-bottom:var(--border-width) solid var(--border)}}@media (max-width: 768px){.test-case-row__input-column{padding:var(--spacing-4)}.test-case-row{min-height:auto}}`;
30368
30783
 
30369
30784
  const rowActionsCss = () => `.row-actions{height:100%;padding:var(--spacing-5);background:var(--background);display:flex;flex-direction:column;gap:var(--spacing-3);align-items:center;justify-content:flex-start;align-self:flex-start}@media (max-width: 1200px){.row-actions{border-right:none;border-bottom:var(--border-width) solid var(--border);flex-direction:row;justify-content:center}}@media (max-width: 768px){.row-actions{padding:var(--spacing-4)}}`;
30370
30785
 
@@ -30390,6 +30805,7 @@ const LLMTestRunner = class {
30390
30805
  useSave = false;
30391
30806
  usePromptEditor = false;
30392
30807
  resolveExpectedOutcome;
30808
+ evaluationSourceExtractors;
30393
30809
  initialTestCases;
30394
30810
  defaultExpectedOutcomeSchema;
30395
30811
  testCases = [
@@ -30403,6 +30819,7 @@ const LLMTestRunner = class {
30403
30819
  value: '',
30404
30820
  },
30405
30821
  ],
30822
+ chatHistory: { enabled: false, value: '' },
30406
30823
  isRunning: false,
30407
30824
  },
30408
30825
  ];
@@ -30425,6 +30842,12 @@ const LLMTestRunner = class {
30425
30842
  // Initialize testCases from prop if provided
30426
30843
  if (this.initialTestCases !== undefined) {
30427
30844
  validateTestCaseInputArray(this.initialTestCases);
30845
+ const extractorIds = getExtractorIds(this.evaluationSourceExtractors);
30846
+ if (extractorIds.length > 0) {
30847
+ this.initialTestCases.forEach(testCase => {
30848
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, extractorIds);
30849
+ });
30850
+ }
30428
30851
  this.testCases = this.initialTestCases.map((rawTestCase, index) => {
30429
30852
  try {
30430
30853
  return createTestCaseFromInput(rawTestCase);
@@ -30448,8 +30871,6 @@ const LLMTestRunner = class {
30448
30871
  this.testCases = [];
30449
30872
  }
30450
30873
  }
30451
- componentDidLoad() { }
30452
- disconnectedCallback() { }
30453
30874
  async resetSavingState() {
30454
30875
  this.isSaving = false;
30455
30876
  }
@@ -30460,6 +30881,12 @@ const LLMTestRunner = class {
30460
30881
  const { testCaseId, key, value } = event.detail;
30461
30882
  this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
30462
30883
  };
30884
+ handleChatHistoryChange = (event) => {
30885
+ const { testCaseId, enabled, value } = event.detail;
30886
+ this.updateTestCase(testCaseId, {
30887
+ chatHistory: { enabled, value },
30888
+ });
30889
+ };
30463
30890
  addNewTestCase() {
30464
30891
  try {
30465
30892
  const schema = this.getResolvedExpectedOutcomeSchema();
@@ -30476,13 +30903,17 @@ const LLMTestRunner = class {
30476
30903
  updateTestCase(id, updates) {
30477
30904
  this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
30478
30905
  }
30479
- requestLlmText(testCase) {
30906
+ requestLlmResponse(testCase) {
30480
30907
  return new Promise((resolve, reject) => {
30481
- this.llmRequest.emit({
30908
+ const payload = {
30482
30909
  prompt: testCase.question,
30483
30910
  resolve,
30484
30911
  reject,
30485
- });
30912
+ };
30913
+ if (testCase.chatHistory?.enabled) {
30914
+ payload.chatHistory = testCase.chatHistory.value;
30915
+ }
30916
+ this.llmRequest.emit(payload);
30486
30917
  });
30487
30918
  }
30488
30919
  throwError(reason) {
@@ -30495,14 +30926,14 @@ const LLMTestRunner = class {
30495
30926
  const startTime = Date.now();
30496
30927
  this.updateTestCase(testCase.id, { isRunning: true });
30497
30928
  const [llmSettled, resolutionSettled] = await Promise.allSettled([
30498
- this.requestLlmText(testCase),
30929
+ this.requestLlmResponse(testCase),
30499
30930
  resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
30500
30931
  ]);
30501
30932
  const responseTime = Date.now() - startTime;
30502
30933
  if (llmSettled.status === 'rejected') {
30503
30934
  this.updateTestCase(testCase.id, {
30504
30935
  isRunning: false,
30505
- output: null,
30936
+ output: undefined,
30506
30937
  error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
30507
30938
  responseTime,
30508
30939
  });
@@ -30550,7 +30981,7 @@ const LLMTestRunner = class {
30550
30981
  this.updateTestCase(testCase.id, {
30551
30982
  evaluationResult: result,
30552
30983
  });
30553
- });
30984
+ }, this.evaluationSourceExtractors);
30554
30985
  }
30555
30986
  async runAllTests() {
30556
30987
  this.isRunningAll = true;
@@ -30581,7 +31012,7 @@ const LLMTestRunner = class {
30581
31012
  this.error = '';
30582
31013
  try {
30583
31014
  const content = await readFileAsync(file);
30584
- const result = importTestSuite(content);
31015
+ const result = importTestSuite(content, getExtractorIds(this.evaluationSourceExtractors));
30585
31016
  if (!result.success) {
30586
31017
  this.error = result.error || 'Unknown error occurred during import.';
30587
31018
  return;
@@ -30642,7 +31073,7 @@ const LLMTestRunner = class {
30642
31073
  }
30643
31074
  }
30644
31075
  render() {
30645
- return (h("div", { key: '5536c02dcbf03e1d21de2df307f5255a17c000c7', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: 'b6b7db13d7b5576986f4de469c4eeff62b9be873', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: '48407658a40dd79864ddf24426df743df9206b30', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '2b1db34dbb4defc98c255ca7bcb9318ca1c52cba', class: "test-runner-container__content" }, h(LLMTestCases, { key: '90b4d25f51d5de43790cabeb4fa6fc1c90c246cd', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
31076
+ return (h("div", { key: '7433beaa1d60d48f65600c43e11b302b892a7bca', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: '8083cc39376e7a710bd3f52efb184b959e885a87', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'ddced98c13cd595c4cfb6eef11b27cb173769518', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '8d6f65c4d68d34869b644709eacb97fec93683c6', class: "test-runner-container__content" }, h(LLMTestCases, { key: '5ccb186132b23af6209209b0a14086e03cf790af', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, extractorIds: getExtractorIds(this.evaluationSourceExtractors), onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
30646
31077
  }
30647
31078
  };
30648
31079
  LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));