llm-testrunner-components 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +41 -6
  2. package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
  3. package/dist/cjs/index.cjs.js +235 -44
  4. package/dist/cjs/index.cjs.js.map +1 -1
  5. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  6. package/dist/cjs/loader.cjs.js +1 -1
  7. package/dist/collection/collection-manifest.json +1 -0
  8. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
  9. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.js +91 -30
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  12. package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
  13. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
  14. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
  15. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +37 -4
  16. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  17. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  20. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  21. package/dist/collection/index.js.map +1 -1
  22. package/dist/collection/lib/evaluation/evaluation-service.js +14 -7
  23. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  24. package/dist/collection/lib/form/components/app-chips.js +1 -1
  25. package/dist/collection/lib/form/components/app-select.js +1 -1
  26. package/dist/collection/lib/form/components/app-textarea.css +17 -0
  27. package/dist/collection/lib/form/components/app-textarea.js +4 -1
  28. package/dist/collection/lib/form/components/app-textarea.js.map +1 -1
  29. package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
  30. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  31. package/dist/collection/lib/test-cases/dynamic-expected-outcome-resolver.js +44 -0
  32. package/dist/collection/lib/test-cases/dynamic-expected-outcome-resolver.js.map +1 -0
  33. package/dist/collection/lib/test-cases/test-case-factory.js +2 -0
  34. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  35. package/dist/collection/lib/test-cases/test-case-mutations.js +35 -0
  36. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  37. package/dist/collection/schemas/expected-outcome.js +15 -1
  38. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  39. package/dist/collection/schemas/test-case.js +6 -0
  40. package/dist/collection/schemas/test-case.js.map +1 -1
  41. package/dist/collection/types/expected-outcome.js.map +1 -1
  42. package/dist/collection/types/llm-test-runner.js.map +1 -1
  43. package/dist/collection/types/test-case.js.map +1 -1
  44. package/dist/components/app-chips.js +1 -1
  45. package/dist/components/app-select.js +1 -1
  46. package/dist/components/app-textarea.js +1 -1
  47. package/dist/components/chat-history.d.ts +11 -0
  48. package/dist/components/chat-history.js +2 -0
  49. package/dist/components/chat-history.js.map +1 -0
  50. package/dist/components/index.js +1 -1
  51. package/dist/components/llm-test-runner.js +1 -1
  52. package/dist/components/{p-CJBscebi.js → p-B87Lt3z4.js} +3 -3
  53. package/dist/components/p-B87Lt3z4.js.map +1 -0
  54. package/dist/components/p-Bx2jqguC.js +2 -0
  55. package/dist/components/p-Bx2jqguC.js.map +1 -0
  56. package/dist/components/p-D2qDAxFN.js +2 -0
  57. package/dist/components/p-D2qDAxFN.js.map +1 -0
  58. package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
  59. package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
  60. package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +41 -7
  61. package/dist/esm/app-chips_5.entry.js.map +1 -0
  62. package/dist/esm/index.js +235 -44
  63. package/dist/esm/index.js.map +1 -1
  64. package/dist/esm/llm-testrunner.js +1 -1
  65. package/dist/esm/loader.js +1 -1
  66. package/dist/llm-testrunner/index.esm.js +2 -2
  67. package/dist/llm-testrunner/index.esm.js.map +1 -1
  68. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  69. package/dist/llm-testrunner/p-21202f12.entry.js +2 -0
  70. package/dist/llm-testrunner/p-21202f12.entry.js.map +1 -0
  71. package/dist/react/components.d.ts +6 -1
  72. package/dist/react/components.d.ts.map +1 -1
  73. package/dist/react/components.js +9 -0
  74. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +6 -0
  75. package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
  76. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
  77. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
  78. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
  79. package/dist/types/components.d.ts +55 -0
  80. package/dist/types/index.d.ts +1 -1
  81. package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
  82. package/dist/types/lib/test-cases/dynamic-expected-outcome-resolver.d.ts +7 -0
  83. package/dist/types/lib/test-cases/test-case-mutations.d.ts +9 -1
  84. package/dist/types/schemas/expected-outcome.d.ts +16 -1
  85. package/dist/types/schemas/test-case.d.ts +34 -2
  86. package/dist/types/types/expected-outcome.d.ts +1 -1
  87. package/dist/types/types/llm-test-runner.d.ts +3 -2
  88. package/dist/types/types/test-case.d.ts +1 -1
  89. package/package.json +1 -1
  90. package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
  91. package/dist/components/p-BZrzx5jG.js +0 -2
  92. package/dist/components/p-BZrzx5jG.js.map +0 -1
  93. package/dist/components/p-CJBscebi.js.map +0 -1
  94. package/dist/esm/app-chips_4.entry.js.map +0 -1
  95. package/dist/llm-testrunner/p-2cc09217.entry.js +0 -2
  96. package/dist/llm-testrunner/p-2cc09217.entry.js.map +0 -1
  97. /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
  98. /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
package/dist/esm/index.js CHANGED
@@ -103,6 +103,10 @@ function formatTestSuiteAsJson(testCases) {
103
103
  id: testCase.id,
104
104
  question: testCase.question,
105
105
  expectedOutcome: testCase.expectedOutcome,
106
+ chatHistory: {
107
+ enabled: testCase.chatHistory.enabled,
108
+ value: testCase.chatHistory.value,
109
+ },
106
110
  }));
107
111
  return JSON.stringify(exportData, null, 2);
108
112
  }
@@ -300,6 +304,7 @@ function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA)
300
304
  id: v4(),
301
305
  question: '',
302
306
  expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
307
+ chatHistory: { enabled: false, value: '' },
303
308
  isRunning: false,
304
309
  };
305
310
  }
@@ -359,6 +364,7 @@ function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
359
364
  function createTestCaseFromInput(data) {
360
365
  return {
361
366
  ...data,
367
+ chatHistory: data.chatHistory ?? { enabled: false, value: '' },
362
368
  expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
363
369
  };
364
370
  }
@@ -4952,6 +4958,7 @@ const optionalPositiveInt = number().int().positive().optional();
4952
4958
  const optionalString = string().optional();
4953
4959
  const selectOptionsSchema = array(nonEmptyString).min(1);
4954
4960
  const optionalNumber = number().optional();
4961
+ const expectedOutcomeModeSchema = _enum(['static', 'dynamic']);
4955
4962
  const evaluationParametersSchema = object({
4956
4963
  approach: _enum(EvaluationApproach),
4957
4964
  threshold: optionalNumber,
@@ -5013,8 +5020,21 @@ const expectedOutcomeFieldSchema = discriminatedUnion('type', [
5013
5020
  defaultFieldDefinitions.text.extend({
5014
5021
  value: string(),
5015
5022
  }),
5016
- defaultFieldDefinitions.textarea.extend({
5023
+ defaultFieldDefinitions.textarea
5024
+ .extend({
5017
5025
  value: string(),
5026
+ outcomeMode: expectedOutcomeModeSchema.default('static'),
5027
+ resolutionQuery: string().optional(),
5028
+ })
5029
+ .superRefine((field, ctx) => {
5030
+ if (field.outcomeMode === 'dynamic' &&
5031
+ (!field.resolutionQuery || field.resolutionQuery.trim().length === 0)) {
5032
+ ctx.addIssue({
5033
+ code: 'custom',
5034
+ path: ['resolutionQuery'],
5035
+ message: 'resolutionQuery is required when outcomeMode is dynamic.',
5036
+ });
5037
+ }
5018
5038
  }),
5019
5039
  defaultFieldDefinitions.chipsInput.extend({
5020
5040
  value: array(string()).superRefine((values, ctx) => {
@@ -5048,16 +5068,22 @@ function validateExpectedOutcomeSchema(schema) {
5048
5068
  }
5049
5069
  }
5050
5070
 
5071
+ const testCaseChatHistorySchema = object({
5072
+ enabled: boolean(),
5073
+ value: string(),
5074
+ });
5051
5075
  const testCaseInputSchema = object({
5052
5076
  id: string(),
5053
5077
  question: string(),
5054
5078
  expectedOutcome: expectedOutcomeArraySchema,
5079
+ chatHistory: testCaseChatHistorySchema.optional(),
5055
5080
  });
5056
5081
  const testCaseInputArraySchema = array(testCaseInputSchema);
5057
5082
  object({
5058
5083
  id: string(),
5059
5084
  question: string(),
5060
5085
  expectedOutcome: expectedOutcomeArraySchema,
5086
+ chatHistory: testCaseChatHistorySchema,
5061
5087
  output: string().optional(),
5062
5088
  isRunning: boolean().optional(),
5063
5089
  error: string().optional(),
@@ -5108,6 +5134,50 @@ function importTestSuite(jsonContent) {
5108
5134
  }
5109
5135
  }
5110
5136
 
5137
+ const MISSING_RESOLVER_MESSAGE = 'resolveExpectedOutcome is required when a test case has dynamic expected outcomes.';
5138
+ function isDynamicTextareaField(field) {
5139
+ return field.type === 'textarea' && field.outcomeMode === 'dynamic';
5140
+ }
5141
+ function applyResolvedDynamicValues(testCase, resolvedValues) {
5142
+ if (resolvedValues.length === 0) {
5143
+ return testCase;
5144
+ }
5145
+ const expectedOutcome = [...(testCase.expectedOutcome || [])];
5146
+ for (const resolved of resolvedValues) {
5147
+ const field = expectedOutcome[resolved.index];
5148
+ if (!field || !isDynamicTextareaField(field)) {
5149
+ continue;
5150
+ }
5151
+ expectedOutcome[resolved.index] = {
5152
+ ...field,
5153
+ value: resolved.value,
5154
+ };
5155
+ }
5156
+ return {
5157
+ ...testCase,
5158
+ expectedOutcome,
5159
+ };
5160
+ }
5161
+ async function resolveDynamicExpectedOutcomes(testCase, resolver) {
5162
+ const dynamicFields = (testCase.expectedOutcome || []).flatMap((field, index) => {
5163
+ if (!isDynamicTextareaField(field)) {
5164
+ return [];
5165
+ }
5166
+ return [{ field, index }];
5167
+ });
5168
+ if (dynamicFields.length === 0) {
5169
+ return testCase;
5170
+ }
5171
+ if (!resolver) {
5172
+ throw new Error(MISSING_RESOLVER_MESSAGE);
5173
+ }
5174
+ const resolvedValues = await Promise.all(dynamicFields.map(async ({ field, index }) => ({
5175
+ index,
5176
+ value: await resolver(field.resolutionQuery || '', { testCase, fieldIndex: index }),
5177
+ })));
5178
+ return applyResolvedDynamicValues(testCase, resolvedValues);
5179
+ }
5180
+
5111
5181
  function applyExpectedOutcomeChange(testCase, change) {
5112
5182
  const { index } = change;
5113
5183
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -5120,6 +5190,9 @@ function applyExpectedOutcomeChange(testCase, change) {
5120
5190
  if (target.type === 'chips-input') {
5121
5191
  return testCase;
5122
5192
  }
5193
+ if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
5194
+ return testCase;
5195
+ }
5123
5196
  expectedOutcome[index] = {
5124
5197
  ...target,
5125
5198
  value: change.value,
@@ -5148,6 +5221,38 @@ function applyExpectedOutcomeChange(testCase, change) {
5148
5221
  }
5149
5222
  case 'set-evaluation-approach':
5150
5223
  return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
5224
+ case 'set-outcome-mode': {
5225
+ if (target.type !== 'textarea') {
5226
+ return testCase;
5227
+ }
5228
+ const mode = change.value;
5229
+ if (mode === 'static') {
5230
+ const { resolutionQuery: _, ...rest } = target;
5231
+ expectedOutcome[index] = {
5232
+ ...rest,
5233
+ outcomeMode: 'static',
5234
+ value: '',
5235
+ };
5236
+ }
5237
+ else {
5238
+ expectedOutcome[index] = {
5239
+ ...target,
5240
+ outcomeMode: 'dynamic',
5241
+ value: '',
5242
+ };
5243
+ }
5244
+ return { ...testCase, expectedOutcome };
5245
+ }
5246
+ case 'set-resolution-query': {
5247
+ if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
5248
+ return testCase;
5249
+ }
5250
+ expectedOutcome[index] = {
5251
+ ...target,
5252
+ resolutionQuery: change.value,
5253
+ };
5254
+ return { ...testCase, expectedOutcome };
5255
+ }
5151
5256
  }
5152
5257
  }
5153
5258
  /**
@@ -30015,13 +30120,20 @@ class EvaluationService {
30015
30120
  console.warn('⚠️ No output to evaluate for test case:', testCase.id);
30016
30121
  return;
30017
30122
  }
30018
- const fields = (testCase.expectedOutcome || []).map((field, index) => ({
30019
- index,
30020
- label: field.label,
30021
- type: field.type,
30022
- expectedValue: getFieldExpectedValue(field),
30023
- evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
30024
- }));
30123
+ const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
30124
+ if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
30125
+ return [];
30126
+ }
30127
+ return [
30128
+ {
30129
+ index,
30130
+ label: field.label,
30131
+ type: field.type,
30132
+ expectedValue: getFieldExpectedValue(field),
30133
+ evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
30134
+ },
30135
+ ];
30136
+ });
30025
30137
  const evaluationRequest = {
30026
30138
  testCaseId: testCase.id,
30027
30139
  question: testCase.question,
@@ -30116,7 +30228,7 @@ var FormFieldType;
30116
30228
  FormFieldType["SELECT"] = "select";
30117
30229
  })(FormFieldType || (FormFieldType = {}));
30118
30230
 
30119
- const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange, }) => {
30231
+ const ExpectedOutcomeRenderer = ({ testCaseId, fields, dynamicResolutionSupported = false, onExpectedOutcomeChange, }) => {
30120
30232
  const emit = (detail) => onExpectedOutcomeChange({
30121
30233
  detail,
30122
30234
  });
@@ -30129,6 +30241,23 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
30129
30241
  optionList,
30130
30242
  defaultValue: EvaluationApproach.EXACT,
30131
30243
  });
30244
+ const buildOutcomeModeConfig = (index) => ({
30245
+ name: `expectedOutcomeMode-${index}`,
30246
+ fieldType: FormFieldType.SELECT,
30247
+ label: 'Outcome Mode',
30248
+ placeholder: 'Select outcome mode',
30249
+ required: true,
30250
+ optionList: ['static', 'dynamic'],
30251
+ defaultValue: 'static',
30252
+ });
30253
+ const buildResolutionQueryConfig = (index) => ({
30254
+ name: `expectedOutcomeResolutionQuery-${index}`,
30255
+ fieldType: FormFieldType.TEXT_AREA,
30256
+ label: 'Resolution Query',
30257
+ placeholder: 'Query used to resolve expected value',
30258
+ required: false,
30259
+ rows: 2,
30260
+ });
30132
30261
  const renderEvaluationSelector = (field, index) => {
30133
30262
  const optionList = getAllowedApproachesForFieldType(field.type);
30134
30263
  return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
@@ -30140,12 +30269,17 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
30140
30269
  };
30141
30270
  return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
30142
30271
  if (field.type === 'textarea') {
30272
+ const isDynamic = dynamicResolutionSupported && field.outcomeMode === 'dynamic';
30143
30273
  const config = {
30144
30274
  name: `expectedOutcome-${index}`,
30145
30275
  fieldType: FormFieldType.TEXT_AREA,
30146
30276
  label: field.label,
30147
- placeholder: field.placeholder,
30148
- required: true,
30277
+ placeholder: isDynamic ? 'Resolved on run' : field.placeholder,
30278
+ required: !isDynamic,
30279
+ readOnly: isDynamic,
30280
+ helpText: isDynamic
30281
+ ? 'Filled automatically when the test is run'
30282
+ : undefined,
30149
30283
  rows: field.rows || 2,
30150
30284
  };
30151
30285
  return (h("div", { class: "expected-outcome-renderer__group" }, h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
@@ -30153,7 +30287,18 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
30153
30287
  index,
30154
30288
  operation: 'set-value',
30155
30289
  value: e.detail.value,
30156
- }) }), renderEvaluationSelector(field, index)));
30290
+ }) }), dynamicResolutionSupported && (h("app-select", { config: buildOutcomeModeConfig(index), value: field.outcomeMode || 'static', onValueChange: (e) => emit({
30291
+ testCaseId,
30292
+ index,
30293
+ operation: 'set-outcome-mode',
30294
+ value: e.detail.value,
30295
+ }) })), dynamicResolutionSupported &&
30296
+ field.outcomeMode === 'dynamic' && (h("app-textarea", { config: buildResolutionQueryConfig(index), value: field.resolutionQuery || '', onValueChange: (e) => emit({
30297
+ testCaseId,
30298
+ index,
30299
+ operation: 'set-resolution-query',
30300
+ value: e.detail.value,
30301
+ }) })), !isDynamic && renderEvaluationSelector(field, index)));
30157
30302
  }
30158
30303
  if (field.type === 'chips-input') {
30159
30304
  const config = {
@@ -30200,7 +30345,7 @@ const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange,
30200
30345
  })));
30201
30346
  };
30202
30347
 
30203
- const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
30348
+ const LLMTestCaseRow = ({ testCase, dynamicResolutionSupported = false, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30204
30349
  const questionConfig = {
30205
30350
  name: 'question',
30206
30351
  fieldType: FormFieldType.TEXT_AREA,
@@ -30216,11 +30361,21 @@ const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExp
30216
30361
  key: 'question',
30217
30362
  value: e.detail.value,
30218
30363
  },
30219
- }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30364
+ }) }), h("chat-history", { chatHistoryEnabled: testCase.chatHistory?.enabled ?? false, chatHistoryValue: testCase.chatHistory?.value ?? '', onChatHistoryChange: (e) => {
30365
+ const { enabled, value } = e
30366
+ .detail;
30367
+ onChatHistoryChange({
30368
+ detail: {
30369
+ testCaseId: testCase.id,
30370
+ enabled,
30371
+ value,
30372
+ },
30373
+ });
30374
+ } }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], dynamicResolutionSupported: dynamicResolutionSupported, onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
30220
30375
  };
30221
30376
 
30222
- const LLMTestCases = ({ testCases, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
30223
- return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30377
+ const LLMTestCases = ({ testCases, dynamicResolutionSupported = false, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, onChatHistoryChange, }) => {
30378
+ return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, dynamicResolutionSupported: dynamicResolutionSupported, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange, onChatHistoryChange: onChatHistoryChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
30224
30379
  };
30225
30380
 
30226
30381
  const tokensCss = () => `:host{--spacing:0.25rem;--spacing-1:calc(var(--spacing) * 1);--spacing-2:calc(var(--spacing) * 2);--spacing-3:calc(var(--spacing) * 3);--spacing-4:calc(var(--spacing) * 4);--spacing-5:calc(var(--spacing) * 5);--spacing-6:calc(var(--spacing) * 6);--spacing-8:calc(var(--spacing) * 8);--spacing-10:calc(var(--spacing) * 10);--spacing-12:calc(var(--spacing) * 12);--spacing-16:calc(var(--spacing) * 16);--spacing-20:calc(var(--spacing) * 20);--spacing-24:calc(var(--spacing) * 24);--radius-none:0;--radius-sm:0.125rem;--radius-md:0.375rem;--radius-lg:0.5rem;--radius-xl:0.75rem;--radius-2xl:1rem;--radius-3xl:1.5rem;--radius-full:9999px;--radius:var(--radius-lg);--font-size-xs:0.75rem;--font-size-sm:0.875rem;--font-size-base:1rem;--font-size-lg:1.125rem;--font-size-xl:1.25rem;--font-size-2xl:1.5rem;--font-size-3xl:1.875rem;--font-size-4xl:2.25rem;--font-weight-normal:400;--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--line-height-none:1;--line-height-tight:1.25;--line-height-snug:1.375;--line-height-normal:1.5;--line-height-relaxed:1.625;--line-height-loose:2;--letter-spacing-tight:-0.025em;--letter-spacing-normal:0;--letter-spacing-wide:0.05em;--shadow-sm:0 1px 2px 0 rgba(0, 0, 0, 0.05);--shadow-md:0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);--shadow-lg:0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);--shadow-xl:0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);--shadow-2xl:0 25px 50px -12px rgba(0, 0, 0, 0.25);--border-width:1px;--z-base:0;--z-dropdown:1000;--z-sticky:1100;--z-modal:1200;--z-popover:1300;--z-tooltip:1400;--opacity-disabled:0.5;--opacity-hover:0.8;--opacity-muted:0.6;--max-w-sm:24rem;--max-w-md:28rem;--max-w-lg:32rem;--max-w-xl:42rem;--max-w-2xl:48rem;--max-w-full:100%;--breakpoint-sm:640px;--breakpoint-md:768px;--breakpoint-lg:1024px;--breakpoint-xl:1280px;--breakpoint-2xl:1536px;--background:#ffffff;--foreground:#0a0a0a;--card:#ffffff;--card-foreground:#0a0a0a;--popover:#ffffff;--popover-foreground:#0a0a0a;--primary:#0a0a0a;--primary-foreground:#fafafa;--secondary:#f4f4f5;--secondary-foreground:#0a0a0a;--muted:#f4f4f5;--muted-foreground:#71717a;--accent:#f4f4f5;--accent-foreground:#0a0a0a;--destructive:#ef4444;--destructive-foreground:#fafafa;--border:#e4e4e7;--input:#e4e4e7;--ring:#3b82f6;--success:#10b981;--success-foreground:#fafafa;--warning:#f59e0b;--warning-foreground:#fafafa;--info:#3b82f6;--info-foreground:#fafafa}:host([data-theme='dark']){--background:#0a0a0a;--foreground:#fafafa;--card:#171717;--card-foreground:#fafafa;--popover:#171717;--popover-foreground:#fafafa;--primary:#fafafa;--primary-foreground:#0a0a0a;--secondary:#27272a;--secondary-foreground:#fafafa;--muted:#27272a;--muted-foreground:#a1a1aa;--accent:#27272a;--accent-foreground:#fafafa;--destructive:#dc2626;--destructive-foreground:#fafafa;--border:#27272a;--input:#27272a;--ring:#3b82f6;--success:#059669;--success-foreground:#fafafa;--warning:#d97706;--warning-foreground:#fafafa;--info:#2563eb;--info-foreground:#fafafa}`;
@@ -30256,6 +30411,7 @@ const LLMTestRunner = class {
30256
30411
  delayMs = 500;
30257
30412
  useSave = false;
30258
30413
  usePromptEditor = false;
30414
+ resolveExpectedOutcome;
30259
30415
  initialTestCases;
30260
30416
  defaultExpectedOutcomeSchema;
30261
30417
  testCases = [
@@ -30269,6 +30425,7 @@ const LLMTestRunner = class {
30269
30425
  value: '',
30270
30426
  },
30271
30427
  ],
30428
+ chatHistory: { enabled: false, value: '' },
30272
30429
  isRunning: false,
30273
30430
  },
30274
30431
  ];
@@ -30326,6 +30483,12 @@ const LLMTestRunner = class {
30326
30483
  const { testCaseId, key, value } = event.detail;
30327
30484
  this.testCases = this.testCases.map(tc => tc.id === testCaseId ? { ...tc, [key]: value } : tc);
30328
30485
  };
30486
+ handleChatHistoryChange = (event) => {
30487
+ const { testCaseId, enabled, value } = event.detail;
30488
+ this.updateTestCase(testCaseId, {
30489
+ chatHistory: { enabled, value },
30490
+ });
30491
+ };
30329
30492
  addNewTestCase() {
30330
30493
  try {
30331
30494
  const schema = this.getResolvedExpectedOutcomeSchema();
@@ -30342,38 +30505,66 @@ const LLMTestRunner = class {
30342
30505
  updateTestCase(id, updates) {
30343
30506
  this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
30344
30507
  }
30508
+ requestLlmText(testCase) {
30509
+ return new Promise((resolve, reject) => {
30510
+ const payload = {
30511
+ prompt: testCase.question,
30512
+ resolve,
30513
+ reject,
30514
+ };
30515
+ if (testCase.chatHistory?.enabled) {
30516
+ payload.chatHistory = testCase.chatHistory.value;
30517
+ }
30518
+ this.llmRequest.emit(payload);
30519
+ });
30520
+ }
30521
+ throwError(reason) {
30522
+ throw reason instanceof Error ? reason : new Error(String(reason));
30523
+ }
30524
+ addErrorMessage(reason, fallback) {
30525
+ return reason instanceof Error ? reason.message : fallback;
30526
+ }
30345
30527
  async runSingleTest(testCase) {
30346
30528
  const startTime = Date.now();
30347
30529
  this.updateTestCase(testCase.id, { isRunning: true });
30348
- return new Promise((resolve, reject) => {
30349
- this.llmRequest.emit({
30350
- prompt: testCase.question,
30351
- resolve: async (aiResponse) => {
30352
- const endTime = Date.now();
30353
- const responseTime = endTime - startTime;
30354
- this.updateTestCase(testCase.id, {
30355
- isRunning: false,
30356
- output: aiResponse,
30357
- error: null,
30358
- responseTime: responseTime,
30359
- });
30360
- await this.evaluateResponse({
30361
- ...testCase,
30362
- output: aiResponse,
30363
- responseTime: responseTime,
30364
- });
30365
- resolve();
30366
- },
30367
- reject: (error) => {
30368
- this.updateTestCase(testCase.id, {
30369
- isRunning: false,
30370
- output: null,
30371
- error: error instanceof Error ? error.message : 'Unknown error',
30372
- });
30373
- reject(error);
30374
- },
30530
+ const [llmSettled, resolutionSettled] = await Promise.allSettled([
30531
+ this.requestLlmText(testCase),
30532
+ resolveDynamicExpectedOutcomes(testCase, this.resolveExpectedOutcome),
30533
+ ]);
30534
+ const responseTime = Date.now() - startTime;
30535
+ if (llmSettled.status === 'rejected') {
30536
+ this.updateTestCase(testCase.id, {
30537
+ isRunning: false,
30538
+ output: null,
30539
+ error: this.addErrorMessage(llmSettled.reason, 'Unknown error'),
30540
+ responseTime,
30375
30541
  });
30542
+ this.throwError(llmSettled.reason);
30543
+ }
30544
+ const aiResponse = llmSettled.value;
30545
+ if (resolutionSettled.status === 'rejected') {
30546
+ this.updateTestCase(testCase.id, {
30547
+ isRunning: false,
30548
+ output: aiResponse,
30549
+ error: this.addErrorMessage(resolutionSettled.reason, 'Failed to resolve dynamic expected outcome.'),
30550
+ responseTime,
30551
+ });
30552
+ this.throwError(resolutionSettled.reason);
30553
+ }
30554
+ const resolvedTestCase = resolutionSettled.value;
30555
+ const forEvaluationTestCase = {
30556
+ ...resolvedTestCase,
30557
+ output: aiResponse,
30558
+ responseTime,
30559
+ };
30560
+ this.updateTestCase(testCase.id, {
30561
+ isRunning: false,
30562
+ output: aiResponse,
30563
+ error: null,
30564
+ responseTime,
30565
+ expectedOutcome: forEvaluationTestCase.expectedOutcome,
30376
30566
  });
30567
+ await this.evaluateResponse(forEvaluationTestCase);
30377
30568
  }
30378
30569
  deleteTestCase(id) {
30379
30570
  this.testCases = this.testCases.filter(tc => tc.id !== id);
@@ -30484,7 +30675,7 @@ const LLMTestRunner = class {
30484
30675
  }
30485
30676
  }
30486
30677
  render() {
30487
- return (h("div", { key: '29cf8a93402ebad6f6df43e147fa10406577c9aa', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: 'a07d3d1d823f8d473808752932cd1b2ab72d9e08', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'ec68912728b06fc4a76c330fb1b7d5acde92c3d1', message: this.error, onClear: () => (this.error = '') }), h("div", { key: 'ce308dd4bd5437c94ae6e3e8a28970b799865281', class: "test-runner-container__content" }, h(LLMTestCases, { key: '3368df0bb7de4d099da1fad400f59dfc9a2cfb62', testCases: this.testCases, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange }))));
30678
+ return (h("div", { key: 'cc808096f929b2e1c570c53144aab195d177c187', class: "test-runner-container" }, h(LLMTestRunnerHeader, { key: 'b91cf3df7df0e95bfd4908a2f91c7310b5b7a09a', isExportingTestSuite: this.isExportingTestSuite, isExportingTestResults: this.isExportingTestResults, isRunningAll: this.isRunningAll, useSave: this.useSave, isSaving: this.isSaving, usePromptEditor: this.usePromptEditor, onImport: file => this.handleImport(file), onExportSuite: () => this.handleExportTestSuite(), onExportResults: () => this.handleExportTestResults(), onRunAll: () => this.runAllTests(), onSave: () => this.handleSave() }), h(ErrorMessage, { key: 'c7991497173fa9843e7aa42f5283d0897ddff2e2', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '2b57132564442b8047d8672c6adcba62cdc9ae87', class: "test-runner-container__content" }, h(LLMTestCases, { key: '146e9d8c76a34980a2a274dd856887c22e1ed0e9', testCases: this.testCases, dynamicResolutionSupported: !!this.resolveExpectedOutcome, onRun: testCase => this.runSingleTest(testCase).catch(() => { }), onDelete: id => this.deleteTestCase(id), onAddTestCase: () => this.addNewTestCase(), handleTestCaseChange: this.handleTestCaseChange, onExpectedOutcomeChange: this.handleExpectedOutcomeChange, onChatHistoryChange: this.handleChatHistoryChange }))));
30488
30679
  }
30489
30680
  };
30490
30681
  LLMTestRunner.style = tokensCss() + (llmTestRunnerCss() + (llmTestRunnerHeaderCss() + (llmTestCasesCss() + (llmTestCaseRowCss() + (rowActionsCss() + (evaluationSummaryCss() + (responseOutputCss() + (errorMessageCss() + (buttonCss() + iconButtonCss())))))))));