llm-testrunner-components 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +165 -242
  3. package/dist/cjs/index.cjs.js +298 -232
  4. package/dist/cjs/index.cjs.js.map +1 -1
  5. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +25 -54
  6. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  7. package/dist/collection/components/llm-test-runner/llm-test-runner.js +6 -49
  8. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  9. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.css +60 -21
  10. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js +3 -1
  11. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js.map +1 -1
  12. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +31 -11
  13. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  14. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +17 -0
  15. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -12
  16. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  17. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  19. package/dist/collection/lib/evaluation/evaluation-engine.js +63 -42
  20. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  21. package/dist/collection/lib/evaluation/evaluation-service.js +15 -3
  22. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  23. package/dist/collection/lib/evaluation/{rouge1-evaluator.test.js → evaluators/rouge1-evaluator.test.js} +2 -2
  24. package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.test.js.map +1 -0
  25. package/dist/collection/lib/evaluation/field-evaluation-approach.js +24 -0
  26. package/dist/collection/lib/evaluation/field-evaluation-approach.js.map +1 -0
  27. package/dist/collection/lib/evaluation/index.js +0 -4
  28. package/dist/collection/lib/evaluation/index.js.map +1 -1
  29. package/dist/collection/lib/evaluation/types.js.map +1 -1
  30. package/dist/collection/lib/import-export/test-results-csv.js +47 -33
  31. package/dist/collection/lib/import-export/test-results-csv.js.map +1 -1
  32. package/dist/collection/lib/import-export/test-suite-exporter.js +0 -1
  33. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  34. package/dist/collection/lib/test-cases/test-case-factory.js +17 -27
  35. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  36. package/dist/collection/lib/test-cases/test-case-mutations.js +60 -9
  37. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  38. package/dist/collection/schemas/expected-outcome.js +20 -2
  39. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  40. package/dist/collection/schemas/test-case.js +2 -20
  41. package/dist/collection/schemas/test-case.js.map +1 -1
  42. package/dist/collection/types/llm-test-runner.js.map +1 -1
  43. package/dist/collection/types/test-case.js.map +1 -1
  44. package/dist/components/index.js +1 -1
  45. package/dist/components/llm-test-runner.js +1 -1
  46. package/dist/components/p-Bb89MYYu.js +7 -0
  47. package/dist/components/p-Bb89MYYu.js.map +1 -0
  48. package/dist/esm/index.js +298 -232
  49. package/dist/esm/index.js.map +1 -1
  50. package/dist/llm-testrunner/index.esm.js +2 -2
  51. package/dist/llm-testrunner/index.esm.js.map +1 -1
  52. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +0 -1
  53. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +3 -6
  54. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +0 -2
  55. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +0 -2
  56. package/dist/types/lib/evaluation/evaluation-engine.d.ts +4 -2
  57. package/dist/types/lib/evaluation/field-evaluation-approach.d.ts +6 -0
  58. package/dist/types/lib/evaluation/index.d.ts +0 -1
  59. package/dist/types/lib/evaluation/types.d.ts +26 -0
  60. package/dist/types/lib/import-export/test-suite-exporter.d.ts +0 -4
  61. package/dist/types/lib/test-cases/test-case-factory.d.ts +2 -3
  62. package/dist/types/lib/test-cases/test-case-mutations.d.ts +21 -5
  63. package/dist/types/schemas/expected-outcome.d.ts +65 -17
  64. package/dist/types/schemas/test-case.d.ts +51 -95
  65. package/dist/types/types/llm-test-runner.d.ts +1 -1
  66. package/dist/types/types/test-case.d.ts +1 -1
  67. package/package.json +9 -2
  68. package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +0 -1
  69. package/dist/components/p-BF90yb1z.js +0 -7
  70. package/dist/components/p-BF90yb1z.js.map +0 -1
  71. /package/dist/types/lib/evaluation/{rouge1-evaluator.test.d.ts → evaluators/rouge1-evaluator.test.d.ts} +0 -0
@@ -24,7 +24,6 @@ export declare class LLMTestRunner {
24
24
  private updateTestCase;
25
25
  private runSingleTest;
26
26
  private deleteTestCase;
27
- private updateApproach;
28
27
  private handleExpectedOutcomeChange;
29
28
  private evaluateResponse;
30
29
  private runAllTests;
@@ -1,12 +1,9 @@
1
1
  import { FunctionalComponent } from '../../../stencil-public-runtime';
2
2
  import { ExpectedOutcomeField } from '../../../types/llm-test-runner';
3
- export type ExpectedOutcomeOperation = 'set-value' | 'add-chip' | 'remove-chip';
4
- export interface ExpectedOutcomeChangeDetail {
3
+ import { ExpectedOutcomeChange } from '../../../lib/test-cases/test-case-mutations';
4
+ export type ExpectedOutcomeChangeDetail = {
5
5
  testCaseId: string;
6
- index: number;
7
- operation: ExpectedOutcomeOperation;
8
- value?: string;
9
- }
6
+ } & ExpectedOutcomeChange;
10
7
  interface ExpectedOutcomeRendererProps {
11
8
  testCaseId: string;
12
9
  fields: ExpectedOutcomeField[];
@@ -1,12 +1,10 @@
1
1
  import { FunctionalComponent } from '../../../stencil-public-runtime';
2
2
  import { TestCase } from '../../../types/llm-test-runner';
3
- import { EvaluationApproach } from '../../../lib/evaluation/constants';
4
3
  import { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';
5
4
  export interface LLMTestCaseRowProps {
6
5
  testCase: TestCase;
7
6
  onRun: (testCase: TestCase) => void;
8
7
  onDelete: (id: string) => void;
9
- onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;
10
8
  handleTestCaseChange: (e: CustomEvent<{
11
9
  testCaseId: string;
12
10
  key: string;
@@ -1,12 +1,10 @@
1
1
  import { FunctionalComponent } from '../../../stencil-public-runtime';
2
2
  import { TestCase } from '../../../types/llm-test-runner';
3
- import { EvaluationApproach } from '../../../lib/evaluation/constants';
4
3
  import { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';
5
4
  export interface LLMTestCasesProps {
6
5
  testCases: TestCase[];
7
6
  onRun: (testCase: TestCase) => void;
8
7
  onDelete: (id: string) => void;
9
- onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;
10
8
  onAddTestCase: () => void;
11
9
  handleTestCaseChange: (e: CustomEvent<{
12
10
  testCaseId: string;
@@ -1,4 +1,6 @@
1
- import { EvaluationRequest, EvaluationCallback } from './types';
1
+ import { EvaluationCallback, EvaluationRequestV2 } from './types';
2
2
  export declare class LLMEvaluationEngine {
3
- evaluateResponse(request: EvaluationRequest, callback: EvaluationCallback): Promise<void>;
3
+ evaluateResponse(request: EvaluationRequestV2, callback: EvaluationCallback): Promise<void>;
4
+ private evaluateField;
5
+ private getSafeErrorMessage;
4
6
  }
@@ -0,0 +1,6 @@
1
+ import { EvaluationApproach } from './constants';
2
+ import type { EvaluationParameters } from '../../types/evaluation';
3
+ export type EvaluationFieldType = 'text' | 'textarea' | 'chips-input' | 'select';
4
+ export declare function getAllowedApproachesForFieldType(fieldType: EvaluationFieldType): EvaluationApproach[];
5
+ export declare function isApproachAllowedForFieldType(fieldType: EvaluationFieldType, approach: EvaluationApproach): boolean;
6
+ export declare function normalizeEvaluationParametersForField(fieldType: EvaluationFieldType, evaluationParameters?: EvaluationParameters): EvaluationParameters;
@@ -2,4 +2,3 @@ import { LLMEvaluationEngine } from './evaluation-engine';
2
2
  import type { EvaluationRequest, EvaluationResult, KeywordMatch, EvaluationCallback } from './types';
3
3
  export { LLMEvaluationEngine };
4
4
  export type { EvaluationRequest, EvaluationResult, KeywordMatch, EvaluationCallback, };
5
- export declare function evaluateLLMResponse(request: EvaluationRequest, callback: EvaluationCallback): Promise<void>;
@@ -1,4 +1,5 @@
1
1
  import { EvaluationParameters, EvaluationApproachResult } from '../../types/evaluation';
2
+ import type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';
2
3
  export interface EvaluationRequest {
3
4
  testCaseId: string;
4
5
  question: string;
@@ -6,13 +7,38 @@ export interface EvaluationRequest {
6
7
  actualResponse: string;
7
8
  evaluationParameters: EvaluationParameters;
8
9
  }
10
+ export interface FieldEvaluationInput {
11
+ index: number;
12
+ label: string;
13
+ type: ExpectedOutcomeFieldType;
14
+ expectedValue: string;
15
+ evaluationParameters: EvaluationParameters;
16
+ }
17
+ export interface EvaluationRequestV2 {
18
+ testCaseId: string;
19
+ question: string;
20
+ actualResponse: string;
21
+ fields: FieldEvaluationInput[];
22
+ }
9
23
  export interface EvaluationResult {
10
24
  testCaseId: string;
11
25
  passed: boolean;
12
26
  keywordMatches: KeywordMatch[];
27
+ fieldResults?: FieldEvaluationResult[];
13
28
  timestamp?: string;
29
+ evaluationParameters?: EvaluationParameters;
30
+ evaluationApproachResult?: EvaluationApproachResult;
31
+ }
32
+ export interface FieldEvaluationResult {
33
+ index: number;
34
+ label: string;
35
+ type: ExpectedOutcomeFieldType;
36
+ expectedValue: string;
37
+ passed: boolean;
38
+ keywordMatches: KeywordMatch[];
14
39
  evaluationParameters: EvaluationParameters;
15
40
  evaluationApproachResult: EvaluationApproachResult;
41
+ error?: string;
16
42
  }
17
43
  export interface KeywordMatch {
18
44
  keyword: string;
@@ -3,10 +3,6 @@ export interface TestSuiteExportData {
3
3
  id: string;
4
4
  question: string;
5
5
  expectedOutcome: ExpectedOutcomeField[];
6
- evaluationParameters?: {
7
- approach: string;
8
- threshold?: number;
9
- };
10
6
  }
11
7
  /**
12
8
  * Formats test cases as a JSON string suitable for saving as a test suite
@@ -6,11 +6,10 @@ export declare const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema;
6
6
  */
7
7
  export declare function createTestCase(expectedOutcomeSchema?: ExpectedOutcomeSchema): TestCase;
8
8
  export declare function createExpectedOutcomeFromSchema(expectedOutcomeSchema: ExpectedOutcomeSchema): ExpectedOutcomeField[];
9
- export declare function migrateLegacyExpectedOutcomeString(value: string): ExpectedOutcomeField[];
10
9
  /**
11
10
  * Creates a runtime test case from validated input data.
12
- * The input is expected to already satisfy `TestCaseInput` (legacy string or v2 shape),
13
- * and this function only performs normalization/defaulting (including legacy migration).
11
+ * The input is expected to already satisfy `TestCaseInput`,
12
+ * and this function only performs normalization/defaulting.
14
13
  *
15
14
  * @param data - Validated test case input
16
15
  * @returns A normalized TestCase object with runtime defaults applied
@@ -1,9 +1,25 @@
1
1
  import { TestCase } from '../../types/llm-test-runner';
2
2
  import { EvaluationApproach } from '../evaluation/constants';
3
+ export type ExpectedOutcomeChange = {
4
+ index: number;
5
+ operation: 'set-value';
6
+ value: string;
7
+ } | {
8
+ index: number;
9
+ operation: 'add-chip';
10
+ value: string;
11
+ } | {
12
+ index: number;
13
+ operation: 'remove-chip';
14
+ value: string;
15
+ } | {
16
+ index: number;
17
+ operation: 'set-evaluation-approach';
18
+ value: EvaluationApproach;
19
+ };
20
+ export declare function applyExpectedOutcomeChange(testCase: TestCase, change: ExpectedOutcomeChange): TestCase;
3
21
  /**
4
- * Updates the evaluation approach for a test case
5
- * @param testCase - The test case to update
6
- * @param approach - The new evaluation approach
7
- * @returns Updated test case with the new evaluation approach
22
+ * Updates the evaluation approach for a specific expected outcome field.
23
+ * Select fields always use exact matching.
8
24
  */
9
- export declare function updateApproach(testCase: TestCase, approach: EvaluationApproach): TestCase;
25
+ export declare function updateExpectedOutcomeFieldApproach(testCase: TestCase, fieldIndex: number, approach: EvaluationApproach): TestCase;
@@ -1,107 +1,155 @@
1
1
  import { z } from 'zod';
2
+ import { EvaluationApproach } from '../lib/evaluation/constants';
2
3
  declare const defaultExpectedOutcomeBaseSchema: z.ZodObject<{
3
4
  label: z.ZodString;
4
- required: z.ZodOptional<z.ZodBoolean>;
5
5
  placeholder: z.ZodOptional<z.ZodString>;
6
6
  }, z.core.$strip>;
7
7
  export declare const expectedOutcomeSchemaFieldSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
8
8
  label: z.ZodString;
9
- required: z.ZodOptional<z.ZodBoolean>;
10
9
  placeholder: z.ZodOptional<z.ZodString>;
11
10
  type: z.ZodLiteral<"text">;
11
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
12
+ approach: z.ZodEnum<typeof EvaluationApproach>;
13
+ threshold: z.ZodOptional<z.ZodNumber>;
14
+ }, z.core.$strip>>;
12
15
  }, z.core.$strip>, z.ZodObject<{
13
16
  label: z.ZodString;
14
- required: z.ZodOptional<z.ZodBoolean>;
15
17
  placeholder: z.ZodOptional<z.ZodString>;
16
18
  type: z.ZodLiteral<"textarea">;
17
19
  rows: z.ZodOptional<z.ZodNumber>;
20
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
21
+ approach: z.ZodEnum<typeof EvaluationApproach>;
22
+ threshold: z.ZodOptional<z.ZodNumber>;
23
+ }, z.core.$strip>>;
18
24
  }, z.core.$strip>, z.ZodObject<{
19
25
  label: z.ZodString;
20
- required: z.ZodOptional<z.ZodBoolean>;
21
26
  placeholder: z.ZodOptional<z.ZodString>;
22
27
  type: z.ZodLiteral<"chips-input">;
28
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
29
+ approach: z.ZodEnum<typeof EvaluationApproach>;
30
+ threshold: z.ZodOptional<z.ZodNumber>;
31
+ }, z.core.$strip>>;
23
32
  }, z.core.$strip>, z.ZodObject<{
24
33
  label: z.ZodString;
25
- required: z.ZodOptional<z.ZodBoolean>;
26
34
  placeholder: z.ZodOptional<z.ZodString>;
27
35
  type: z.ZodLiteral<"select">;
28
36
  options: z.ZodArray<z.ZodString>;
37
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
38
+ approach: z.ZodEnum<typeof EvaluationApproach>;
39
+ threshold: z.ZodOptional<z.ZodNumber>;
40
+ }, z.core.$strip>>;
29
41
  }, z.core.$strip>], "type">;
30
42
  export declare const expectedOutcomeSchemaSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
31
43
  label: z.ZodString;
32
- required: z.ZodOptional<z.ZodBoolean>;
33
44
  placeholder: z.ZodOptional<z.ZodString>;
34
45
  type: z.ZodLiteral<"text">;
46
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
47
+ approach: z.ZodEnum<typeof EvaluationApproach>;
48
+ threshold: z.ZodOptional<z.ZodNumber>;
49
+ }, z.core.$strip>>;
35
50
  }, z.core.$strip>, z.ZodObject<{
36
51
  label: z.ZodString;
37
- required: z.ZodOptional<z.ZodBoolean>;
38
52
  placeholder: z.ZodOptional<z.ZodString>;
39
53
  type: z.ZodLiteral<"textarea">;
40
54
  rows: z.ZodOptional<z.ZodNumber>;
55
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
56
+ approach: z.ZodEnum<typeof EvaluationApproach>;
57
+ threshold: z.ZodOptional<z.ZodNumber>;
58
+ }, z.core.$strip>>;
41
59
  }, z.core.$strip>, z.ZodObject<{
42
60
  label: z.ZodString;
43
- required: z.ZodOptional<z.ZodBoolean>;
44
61
  placeholder: z.ZodOptional<z.ZodString>;
45
62
  type: z.ZodLiteral<"chips-input">;
63
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
64
+ approach: z.ZodEnum<typeof EvaluationApproach>;
65
+ threshold: z.ZodOptional<z.ZodNumber>;
66
+ }, z.core.$strip>>;
46
67
  }, z.core.$strip>, z.ZodObject<{
47
68
  label: z.ZodString;
48
- required: z.ZodOptional<z.ZodBoolean>;
49
69
  placeholder: z.ZodOptional<z.ZodString>;
50
70
  type: z.ZodLiteral<"select">;
51
71
  options: z.ZodArray<z.ZodString>;
72
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
73
+ approach: z.ZodEnum<typeof EvaluationApproach>;
74
+ threshold: z.ZodOptional<z.ZodNumber>;
75
+ }, z.core.$strip>>;
52
76
  }, z.core.$strip>], "type">>;
53
77
  export declare const expectedOutcomeFieldSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
54
78
  label: z.ZodString;
55
- required: z.ZodOptional<z.ZodBoolean>;
56
79
  placeholder: z.ZodOptional<z.ZodString>;
57
80
  type: z.ZodLiteral<"text">;
81
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
82
+ approach: z.ZodEnum<typeof EvaluationApproach>;
83
+ threshold: z.ZodOptional<z.ZodNumber>;
84
+ }, z.core.$strip>>;
58
85
  value: z.ZodString;
59
86
  }, z.core.$strip>, z.ZodObject<{
60
87
  label: z.ZodString;
61
- required: z.ZodOptional<z.ZodBoolean>;
62
88
  placeholder: z.ZodOptional<z.ZodString>;
63
89
  type: z.ZodLiteral<"textarea">;
64
90
  rows: z.ZodOptional<z.ZodNumber>;
91
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
92
+ approach: z.ZodEnum<typeof EvaluationApproach>;
93
+ threshold: z.ZodOptional<z.ZodNumber>;
94
+ }, z.core.$strip>>;
65
95
  value: z.ZodString;
66
96
  }, z.core.$strip>, z.ZodObject<{
67
97
  label: z.ZodString;
68
- required: z.ZodOptional<z.ZodBoolean>;
69
98
  placeholder: z.ZodOptional<z.ZodString>;
70
99
  type: z.ZodLiteral<"chips-input">;
100
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
101
+ approach: z.ZodEnum<typeof EvaluationApproach>;
102
+ threshold: z.ZodOptional<z.ZodNumber>;
103
+ }, z.core.$strip>>;
71
104
  value: z.ZodArray<z.ZodString>;
72
105
  }, z.core.$strip>, z.ZodObject<{
73
106
  label: z.ZodString;
74
- required: z.ZodOptional<z.ZodBoolean>;
75
107
  placeholder: z.ZodOptional<z.ZodString>;
76
108
  type: z.ZodLiteral<"select">;
77
109
  options: z.ZodArray<z.ZodString>;
110
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
111
+ approach: z.ZodEnum<typeof EvaluationApproach>;
112
+ threshold: z.ZodOptional<z.ZodNumber>;
113
+ }, z.core.$strip>>;
78
114
  value: z.ZodString;
79
115
  }, z.core.$strip>], "type">;
80
116
  export declare const expectedOutcomeArraySchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
81
117
  label: z.ZodString;
82
- required: z.ZodOptional<z.ZodBoolean>;
83
118
  placeholder: z.ZodOptional<z.ZodString>;
84
119
  type: z.ZodLiteral<"text">;
120
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
121
+ approach: z.ZodEnum<typeof EvaluationApproach>;
122
+ threshold: z.ZodOptional<z.ZodNumber>;
123
+ }, z.core.$strip>>;
85
124
  value: z.ZodString;
86
125
  }, z.core.$strip>, z.ZodObject<{
87
126
  label: z.ZodString;
88
- required: z.ZodOptional<z.ZodBoolean>;
89
127
  placeholder: z.ZodOptional<z.ZodString>;
90
128
  type: z.ZodLiteral<"textarea">;
91
129
  rows: z.ZodOptional<z.ZodNumber>;
130
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
131
+ approach: z.ZodEnum<typeof EvaluationApproach>;
132
+ threshold: z.ZodOptional<z.ZodNumber>;
133
+ }, z.core.$strip>>;
92
134
  value: z.ZodString;
93
135
  }, z.core.$strip>, z.ZodObject<{
94
136
  label: z.ZodString;
95
- required: z.ZodOptional<z.ZodBoolean>;
96
137
  placeholder: z.ZodOptional<z.ZodString>;
97
138
  type: z.ZodLiteral<"chips-input">;
139
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
140
+ approach: z.ZodEnum<typeof EvaluationApproach>;
141
+ threshold: z.ZodOptional<z.ZodNumber>;
142
+ }, z.core.$strip>>;
98
143
  value: z.ZodArray<z.ZodString>;
99
144
  }, z.core.$strip>, z.ZodObject<{
100
145
  label: z.ZodString;
101
- required: z.ZodOptional<z.ZodBoolean>;
102
146
  placeholder: z.ZodOptional<z.ZodString>;
103
147
  type: z.ZodLiteral<"select">;
104
148
  options: z.ZodArray<z.ZodString>;
149
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
150
+ approach: z.ZodEnum<typeof EvaluationApproach>;
151
+ threshold: z.ZodOptional<z.ZodNumber>;
152
+ }, z.core.$strip>>;
105
153
  value: z.ZodString;
106
154
  }, z.core.$strip>], "type">>;
107
155
  export type ExpectedOutcomeSchemaField = z.infer<typeof expectedOutcomeSchemaFieldSchema>;
@@ -1,184 +1,140 @@
1
1
  import { z } from 'zod';
2
- import { EvaluationApproach } from '../lib/evaluation/constants';
3
2
  import type { EvaluationResult } from '../lib/evaluation/types';
4
- export declare const evaluationParametersSchema: z.ZodObject<{
5
- approach: z.ZodEnum<typeof EvaluationApproach>;
6
- threshold: z.ZodOptional<z.ZodNumber>;
7
- }, z.core.$strip>;
8
- export declare const legacyTestCaseInputSchema: z.ZodObject<{
3
+ export declare const testCaseInputSchema: z.ZodObject<{
9
4
  id: z.ZodString;
10
5
  question: z.ZodString;
11
- evaluationParameters: z.ZodOptional<z.ZodObject<{
12
- approach: z.ZodEnum<typeof EvaluationApproach>;
13
- threshold: z.ZodOptional<z.ZodNumber>;
14
- }, z.core.$strip>>;
15
- expectedOutcome: z.ZodString;
16
- }, z.core.$strip>;
17
- export declare const v2TestCaseInputSchema: z.ZodObject<{
18
- id: z.ZodString;
19
- question: z.ZodString;
20
- evaluationParameters: z.ZodOptional<z.ZodObject<{
21
- approach: z.ZodEnum<typeof EvaluationApproach>;
22
- threshold: z.ZodOptional<z.ZodNumber>;
23
- }, z.core.$strip>>;
24
6
  expectedOutcome: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
25
7
  label: z.ZodString;
26
- required: z.ZodOptional<z.ZodBoolean>;
27
8
  placeholder: z.ZodOptional<z.ZodString>;
28
9
  type: z.ZodLiteral<"text">;
10
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
11
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
12
+ threshold: z.ZodOptional<z.ZodNumber>;
13
+ }, z.core.$strip>>;
29
14
  value: z.ZodString;
30
15
  }, z.core.$strip>, z.ZodObject<{
31
16
  label: z.ZodString;
32
- required: z.ZodOptional<z.ZodBoolean>;
33
17
  placeholder: z.ZodOptional<z.ZodString>;
34
18
  type: z.ZodLiteral<"textarea">;
35
19
  rows: z.ZodOptional<z.ZodNumber>;
20
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
21
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
22
+ threshold: z.ZodOptional<z.ZodNumber>;
23
+ }, z.core.$strip>>;
36
24
  value: z.ZodString;
37
25
  }, z.core.$strip>, z.ZodObject<{
38
26
  label: z.ZodString;
39
- required: z.ZodOptional<z.ZodBoolean>;
40
27
  placeholder: z.ZodOptional<z.ZodString>;
41
28
  type: z.ZodLiteral<"chips-input">;
29
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
30
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
31
+ threshold: z.ZodOptional<z.ZodNumber>;
32
+ }, z.core.$strip>>;
42
33
  value: z.ZodArray<z.ZodString>;
43
34
  }, z.core.$strip>, z.ZodObject<{
44
35
  label: z.ZodString;
45
- required: z.ZodOptional<z.ZodBoolean>;
46
36
  placeholder: z.ZodOptional<z.ZodString>;
47
37
  type: z.ZodLiteral<"select">;
48
38
  options: z.ZodArray<z.ZodString>;
39
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
40
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
41
+ threshold: z.ZodOptional<z.ZodNumber>;
42
+ }, z.core.$strip>>;
49
43
  value: z.ZodString;
50
44
  }, z.core.$strip>], "type">>;
51
45
  }, z.core.$strip>;
52
- export declare const testCaseInputSchema: z.ZodUnion<readonly [z.ZodObject<{
53
- id: z.ZodString;
54
- question: z.ZodString;
55
- evaluationParameters: z.ZodOptional<z.ZodObject<{
56
- approach: z.ZodEnum<typeof EvaluationApproach>;
57
- threshold: z.ZodOptional<z.ZodNumber>;
58
- }, z.core.$strip>>;
59
- expectedOutcome: z.ZodString;
60
- }, z.core.$strip>, z.ZodObject<{
61
- id: z.ZodString;
62
- question: z.ZodString;
63
- evaluationParameters: z.ZodOptional<z.ZodObject<{
64
- approach: z.ZodEnum<typeof EvaluationApproach>;
65
- threshold: z.ZodOptional<z.ZodNumber>;
66
- }, z.core.$strip>>;
67
- expectedOutcome: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
68
- label: z.ZodString;
69
- required: z.ZodOptional<z.ZodBoolean>;
70
- placeholder: z.ZodOptional<z.ZodString>;
71
- type: z.ZodLiteral<"text">;
72
- value: z.ZodString;
73
- }, z.core.$strip>, z.ZodObject<{
74
- label: z.ZodString;
75
- required: z.ZodOptional<z.ZodBoolean>;
76
- placeholder: z.ZodOptional<z.ZodString>;
77
- type: z.ZodLiteral<"textarea">;
78
- rows: z.ZodOptional<z.ZodNumber>;
79
- value: z.ZodString;
80
- }, z.core.$strip>, z.ZodObject<{
81
- label: z.ZodString;
82
- required: z.ZodOptional<z.ZodBoolean>;
83
- placeholder: z.ZodOptional<z.ZodString>;
84
- type: z.ZodLiteral<"chips-input">;
85
- value: z.ZodArray<z.ZodString>;
86
- }, z.core.$strip>, z.ZodObject<{
87
- label: z.ZodString;
88
- required: z.ZodOptional<z.ZodBoolean>;
89
- placeholder: z.ZodOptional<z.ZodString>;
90
- type: z.ZodLiteral<"select">;
91
- options: z.ZodArray<z.ZodString>;
92
- value: z.ZodString;
93
- }, z.core.$strip>], "type">>;
94
- }, z.core.$strip>]>;
95
- export declare const testCaseInputArraySchema: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
96
- id: z.ZodString;
97
- question: z.ZodString;
98
- evaluationParameters: z.ZodOptional<z.ZodObject<{
99
- approach: z.ZodEnum<typeof EvaluationApproach>;
100
- threshold: z.ZodOptional<z.ZodNumber>;
101
- }, z.core.$strip>>;
102
- expectedOutcome: z.ZodString;
103
- }, z.core.$strip>, z.ZodObject<{
46
+ export declare const testCaseInputArraySchema: z.ZodArray<z.ZodObject<{
104
47
  id: z.ZodString;
105
48
  question: z.ZodString;
106
- evaluationParameters: z.ZodOptional<z.ZodObject<{
107
- approach: z.ZodEnum<typeof EvaluationApproach>;
108
- threshold: z.ZodOptional<z.ZodNumber>;
109
- }, z.core.$strip>>;
110
49
  expectedOutcome: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
111
50
  label: z.ZodString;
112
- required: z.ZodOptional<z.ZodBoolean>;
113
51
  placeholder: z.ZodOptional<z.ZodString>;
114
52
  type: z.ZodLiteral<"text">;
53
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
54
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
55
+ threshold: z.ZodOptional<z.ZodNumber>;
56
+ }, z.core.$strip>>;
115
57
  value: z.ZodString;
116
58
  }, z.core.$strip>, z.ZodObject<{
117
59
  label: z.ZodString;
118
- required: z.ZodOptional<z.ZodBoolean>;
119
60
  placeholder: z.ZodOptional<z.ZodString>;
120
61
  type: z.ZodLiteral<"textarea">;
121
62
  rows: z.ZodOptional<z.ZodNumber>;
63
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
64
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
65
+ threshold: z.ZodOptional<z.ZodNumber>;
66
+ }, z.core.$strip>>;
122
67
  value: z.ZodString;
123
68
  }, z.core.$strip>, z.ZodObject<{
124
69
  label: z.ZodString;
125
- required: z.ZodOptional<z.ZodBoolean>;
126
70
  placeholder: z.ZodOptional<z.ZodString>;
127
71
  type: z.ZodLiteral<"chips-input">;
72
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
73
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
74
+ threshold: z.ZodOptional<z.ZodNumber>;
75
+ }, z.core.$strip>>;
128
76
  value: z.ZodArray<z.ZodString>;
129
77
  }, z.core.$strip>, z.ZodObject<{
130
78
  label: z.ZodString;
131
- required: z.ZodOptional<z.ZodBoolean>;
132
79
  placeholder: z.ZodOptional<z.ZodString>;
133
80
  type: z.ZodLiteral<"select">;
134
81
  options: z.ZodArray<z.ZodString>;
82
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
83
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
84
+ threshold: z.ZodOptional<z.ZodNumber>;
85
+ }, z.core.$strip>>;
135
86
  value: z.ZodString;
136
87
  }, z.core.$strip>], "type">>;
137
- }, z.core.$strip>]>>;
88
+ }, z.core.$strip>>;
138
89
  export declare const testCaseSchema: z.ZodObject<{
139
90
  id: z.ZodString;
140
91
  question: z.ZodString;
141
92
  expectedOutcome: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
142
93
  label: z.ZodString;
143
- required: z.ZodOptional<z.ZodBoolean>;
144
94
  placeholder: z.ZodOptional<z.ZodString>;
145
95
  type: z.ZodLiteral<"text">;
96
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
97
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
98
+ threshold: z.ZodOptional<z.ZodNumber>;
99
+ }, z.core.$strip>>;
146
100
  value: z.ZodString;
147
101
  }, z.core.$strip>, z.ZodObject<{
148
102
  label: z.ZodString;
149
- required: z.ZodOptional<z.ZodBoolean>;
150
103
  placeholder: z.ZodOptional<z.ZodString>;
151
104
  type: z.ZodLiteral<"textarea">;
152
105
  rows: z.ZodOptional<z.ZodNumber>;
106
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
107
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
108
+ threshold: z.ZodOptional<z.ZodNumber>;
109
+ }, z.core.$strip>>;
153
110
  value: z.ZodString;
154
111
  }, z.core.$strip>, z.ZodObject<{
155
112
  label: z.ZodString;
156
- required: z.ZodOptional<z.ZodBoolean>;
157
113
  placeholder: z.ZodOptional<z.ZodString>;
158
114
  type: z.ZodLiteral<"chips-input">;
115
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
116
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
117
+ threshold: z.ZodOptional<z.ZodNumber>;
118
+ }, z.core.$strip>>;
159
119
  value: z.ZodArray<z.ZodString>;
160
120
  }, z.core.$strip>, z.ZodObject<{
161
121
  label: z.ZodString;
162
- required: z.ZodOptional<z.ZodBoolean>;
163
122
  placeholder: z.ZodOptional<z.ZodString>;
164
123
  type: z.ZodLiteral<"select">;
165
124
  options: z.ZodArray<z.ZodString>;
125
+ evaluationParameters: z.ZodOptional<z.ZodObject<{
126
+ approach: z.ZodEnum<typeof import("../lib/evaluation/constants").EvaluationApproach>;
127
+ threshold: z.ZodOptional<z.ZodNumber>;
128
+ }, z.core.$strip>>;
166
129
  value: z.ZodString;
167
130
  }, z.core.$strip>], "type">>;
168
- evaluationParameters: z.ZodOptional<z.ZodObject<{
169
- approach: z.ZodEnum<typeof EvaluationApproach>;
170
- threshold: z.ZodOptional<z.ZodNumber>;
171
- }, z.core.$strip>>;
172
131
  output: z.ZodOptional<z.ZodString>;
173
132
  isRunning: z.ZodOptional<z.ZodBoolean>;
174
133
  error: z.ZodOptional<z.ZodString>;
175
134
  evaluationResult: z.ZodOptional<z.ZodCustom<EvaluationResult, EvaluationResult>>;
176
135
  responseTime: z.ZodOptional<z.ZodNumber>;
177
136
  }, z.core.$strip>;
178
- export type EvaluationParameters = z.infer<typeof evaluationParametersSchema>;
179
137
  export type TestCaseInput = z.infer<typeof testCaseInputSchema>;
180
- export type LegacyTestCaseInput = z.infer<typeof legacyTestCaseInputSchema>;
181
- export type V2TestCaseInput = z.infer<typeof v2TestCaseInputSchema>;
182
138
  export type TestCase = z.infer<typeof testCaseSchema>;
183
139
  export declare function validateTestCaseInput(data: unknown): asserts data is TestCaseInput;
184
140
  export declare function validateTestCaseInputArray(data: unknown): asserts data is TestCaseInput[];
@@ -1,6 +1,6 @@
1
1
  import type { TestCase } from './test-case';
2
2
  export type { ExpectedOutcomeFieldType, ExpectedOutcomeBase, ExpectedOutcomeSchema, ExpectedOutcomeSchemaField, ExpectedOutcomeField, TextExpectedOutcomeSchemaField, TextareaExpectedOutcomeSchemaField, ChipsExpectedOutcomeSchemaField, SelectExpectedOutcomeSchemaField, TextExpectedOutcomeField, TextareaExpectedOutcomeField, ChipsExpectedOutcomeField, SelectExpectedOutcomeField, } from './expected-outcome';
3
- export type { TestCase, TestCaseInput, LegacyTestCaseInput, V2TestCaseInput, } from './test-case';
3
+ export type { TestCase, TestCaseInput, } from './test-case';
4
4
  export interface LLMRequestPayload {
5
5
  prompt: string;
6
6
  resolve: (result: string) => void;
@@ -1 +1 @@
1
- export type { EvaluationParameters, TestCaseInput, LegacyTestCaseInput, V2TestCaseInput, TestCase, } from '../schemas/test-case';
1
+ export type { TestCaseInput, TestCase, } from '../schemas/test-case';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-testrunner-components",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "A Stencil web component library for LLM test runner functionality",
5
5
  "main": "dist/index.cjs.js",
6
6
  "module": "dist/index.js",
@@ -36,6 +36,11 @@
36
36
  "type": "git",
37
37
  "url": "https://github.com/FluxonApps/llm-testrunner-lib.git"
38
38
  },
39
+ "bugs": {
40
+ "url": "https://github.com/FluxonApps/llm-testrunner-lib/issues"
41
+ },
42
+ "homepage": "https://github.com/FluxonApps/llm-testrunner-lib#readme",
43
+ "author": "Fluxon Apps LLC",
39
44
  "files": [
40
45
  "dist/",
41
46
  "loader/"
@@ -53,7 +58,8 @@
53
58
  "build-publish": "npm run build:all && npm run just-publish",
54
59
  "just-publish": "npm publish --access=public",
55
60
  "lint": "eslint src --ext .ts,.tsx",
56
- "lint:fix": "eslint src --ext .ts,.tsx --fix"
61
+ "lint:fix": "eslint src --ext .ts,.tsx --fix",
62
+ "license-check": "licensee"
57
63
  },
58
64
  "dependencies": {
59
65
  "@google/genai": "^1.40.0",
@@ -78,6 +84,7 @@
78
84
  "eslint-config-prettier": "^10.1.8",
79
85
  "jest": "^29.7.0",
80
86
  "jest-cli": "^29.7.0",
87
+ "licensee": "^12.0.1",
81
88
  "nodemon": "^3.1.11",
82
89
  "prettier": "^3.6.2",
83
90
  "puppeteer": "^24.3.0",