llm-testrunner-components 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +165 -242
- package/dist/cjs/index.cjs.js +298 -232
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +25 -54
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +6 -49
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.css +60 -21
- package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js +3 -1
- package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +31 -11
- package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +17 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -12
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-engine.js +63 -42
- package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-service.js +15 -3
- package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
- package/dist/collection/lib/evaluation/{rouge1-evaluator.test.js → evaluators/rouge1-evaluator.test.js} +2 -2
- package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.test.js.map +1 -0
- package/dist/collection/lib/evaluation/field-evaluation-approach.js +24 -0
- package/dist/collection/lib/evaluation/field-evaluation-approach.js.map +1 -0
- package/dist/collection/lib/evaluation/index.js +0 -4
- package/dist/collection/lib/evaluation/index.js.map +1 -1
- package/dist/collection/lib/evaluation/types.js.map +1 -1
- package/dist/collection/lib/import-export/test-results-csv.js +47 -33
- package/dist/collection/lib/import-export/test-results-csv.js.map +1 -1
- package/dist/collection/lib/import-export/test-suite-exporter.js +0 -1
- package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +17 -27
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-mutations.js +60 -9
- package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
- package/dist/collection/schemas/expected-outcome.js +20 -2
- package/dist/collection/schemas/expected-outcome.js.map +1 -1
- package/dist/collection/schemas/test-case.js +2 -20
- package/dist/collection/schemas/test-case.js.map +1 -1
- package/dist/collection/types/llm-test-runner.js.map +1 -1
- package/dist/collection/types/test-case.js.map +1 -1
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/p-Bb89MYYu.js +7 -0
- package/dist/components/p-Bb89MYYu.js.map +1 -0
- package/dist/esm/index.js +298 -232
- package/dist/esm/index.js.map +1 -1
- package/dist/llm-testrunner/index.esm.js +2 -2
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +0 -1
- package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +3 -6
- package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +0 -2
- package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +0 -2
- package/dist/types/lib/evaluation/evaluation-engine.d.ts +4 -2
- package/dist/types/lib/evaluation/field-evaluation-approach.d.ts +6 -0
- package/dist/types/lib/evaluation/index.d.ts +0 -1
- package/dist/types/lib/evaluation/types.d.ts +26 -0
- package/dist/types/lib/import-export/test-suite-exporter.d.ts +0 -4
- package/dist/types/lib/test-cases/test-case-factory.d.ts +2 -3
- package/dist/types/lib/test-cases/test-case-mutations.d.ts +21 -5
- package/dist/types/schemas/expected-outcome.d.ts +65 -17
- package/dist/types/schemas/test-case.d.ts +51 -95
- package/dist/types/types/llm-test-runner.d.ts +1 -1
- package/dist/types/types/test-case.d.ts +1 -1
- package/package.json +9 -2
- package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +0 -1
- package/dist/components/p-BF90yb1z.js +0 -7
- package/dist/components/p-BF90yb1z.js.map +0 -1
- /package/dist/types/lib/evaluation/{rouge1-evaluator.test.d.ts → evaluators/rouge1-evaluator.test.d.ts} +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { serializeExpectedOutcome } from "../expected-outcome-serializer";
|
|
2
1
|
/**
|
|
3
2
|
* Escapes a CSV field by wrapping it in quotes if it contains special characters
|
|
4
3
|
* @param field - The field to escape
|
|
@@ -17,48 +16,63 @@ export function escapeCsvField(field) {
|
|
|
17
16
|
*/
|
|
18
17
|
export function exportTestResultsToCsv(testCases) {
|
|
19
18
|
const csvRows = [];
|
|
19
|
+
const maxFieldCount = testCases.reduce((max, testCase) => Math.max(max, (testCase.expectedOutcome || []).length), 0);
|
|
20
20
|
// Add header row
|
|
21
21
|
const headers = [
|
|
22
22
|
'Question',
|
|
23
|
-
'Expected Keywords',
|
|
24
|
-
'Generated Keywords',
|
|
25
|
-
'Keywords Match',
|
|
26
23
|
'Response Time (s)',
|
|
27
|
-
'Evaluation Approach',
|
|
28
|
-
'Evaluation Score',
|
|
29
24
|
];
|
|
25
|
+
for (let i = 1; i <= maxFieldCount; i++) {
|
|
26
|
+
headers.push('Field Name');
|
|
27
|
+
headers.push('Expected Keywords');
|
|
28
|
+
headers.push('Generated Keywords');
|
|
29
|
+
headers.push('Evaluation Strategy');
|
|
30
|
+
headers.push('Passed Evaluation');
|
|
31
|
+
headers.push('Keyword Match');
|
|
32
|
+
headers.push('Score');
|
|
33
|
+
if (i < maxFieldCount) {
|
|
34
|
+
headers.push('');
|
|
35
|
+
}
|
|
36
|
+
}
|
|
30
37
|
csvRows.push(headers.join(','));
|
|
31
|
-
// Add data rows
|
|
38
|
+
// Add data rows (one row per test case)
|
|
32
39
|
testCases.forEach(testCase => {
|
|
33
|
-
const expectedOutcome = serializeExpectedOutcome(testCase.expectedOutcome || [], ' | ');
|
|
34
|
-
const evaluationApproach = testCase.evaluationParameters?.approach || '';
|
|
35
|
-
const score = testCase.evaluationResult?.evaluationApproachResult?.score;
|
|
36
|
-
const evaluationScore = score !== undefined ? score.toString() : '';
|
|
37
|
-
let generatedKeywords = '';
|
|
38
|
-
let keywordsMatch = '';
|
|
39
|
-
if (testCase.evaluationResult) {
|
|
40
|
-
const foundKeywords = testCase.evaluationResult.keywordMatches
|
|
41
|
-
.filter(match => match.found)
|
|
42
|
-
.map(match => match.keyword);
|
|
43
|
-
generatedKeywords = foundKeywords.join('; ');
|
|
44
|
-
// Calculate match percentages
|
|
45
|
-
const keywordMatchCount = testCase.evaluationResult.keywordMatches.filter(m => m.found).length;
|
|
46
|
-
const totalKeywords = testCase.evaluationResult.keywordMatches.length;
|
|
47
|
-
keywordsMatch =
|
|
48
|
-
totalKeywords > 0 ? `${keywordMatchCount}/${totalKeywords}` : 'N/A';
|
|
49
|
-
}
|
|
50
40
|
const responseTime = testCase.responseTime
|
|
51
41
|
? (testCase.responseTime / 1000).toFixed(3)
|
|
52
42
|
: 'N/A';
|
|
53
|
-
const row = [
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
43
|
+
const row = [escapeCsvField(testCase.question), responseTime];
|
|
44
|
+
for (let i = 0; i < maxFieldCount; i++) {
|
|
45
|
+
const field = testCase.expectedOutcome?.[i];
|
|
46
|
+
const fieldResult = testCase.evaluationResult?.fieldResults?.find(result => result.index === i);
|
|
47
|
+
const expectedKeywords = fieldResult?.expectedValue ??
|
|
48
|
+
(field
|
|
49
|
+
? field.type === 'chips-input'
|
|
50
|
+
? field.value.join(', ')
|
|
51
|
+
: field.value
|
|
52
|
+
: '');
|
|
53
|
+
const generatedKeywords = (fieldResult?.keywordMatches || [])
|
|
54
|
+
.filter(match => match.found)
|
|
55
|
+
.map(match => match.keyword)
|
|
56
|
+
.join('; ');
|
|
57
|
+
const matchedCount = (fieldResult?.keywordMatches || []).filter(match => match.found).length;
|
|
58
|
+
const totalMatches = fieldResult?.keywordMatches?.length || 0;
|
|
59
|
+
const keywordMatch = totalMatches > 0 ? `${matchedCount}/${totalMatches}` : '';
|
|
60
|
+
const score = fieldResult?.evaluationApproachResult?.score !== undefined
|
|
61
|
+
? fieldResult.evaluationApproachResult.score.toFixed(2)
|
|
62
|
+
: '';
|
|
63
|
+
row.push(escapeCsvField(field?.label || ''));
|
|
64
|
+
row.push(escapeCsvField(expectedKeywords || ''));
|
|
65
|
+
row.push(escapeCsvField(generatedKeywords));
|
|
66
|
+
row.push(escapeCsvField(fieldResult?.evaluationParameters.approach ||
|
|
67
|
+
field?.evaluationParameters?.approach ||
|
|
68
|
+
''));
|
|
69
|
+
row.push(fieldResult ? (fieldResult.passed ? 'TRUE' : 'FALSE') : '');
|
|
70
|
+
row.push(keywordMatch);
|
|
71
|
+
row.push(score);
|
|
72
|
+
if (i < maxFieldCount - 1) {
|
|
73
|
+
row.push('');
|
|
74
|
+
}
|
|
75
|
+
}
|
|
62
76
|
csvRows.push(row.join(','));
|
|
63
77
|
});
|
|
64
78
|
return csvRows.join('\n');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-results-csv.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-results-csv.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"test-results-csv.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-results-csv.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;IAC1C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAqB;IAC1D,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,aAAa,GAAG,SAAS,CAAC,MAAM,CACpC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,EACzE,CAAC,CACF,CAAC;IAEF,iBAAiB;IACjB,MAAM,OAAO,GAAa;QACxB,UAAU;QACV,mBAAmB;KACpB,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC3B,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAClC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QACnC,OAAO,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACpC,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAClC,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,IAAI,CAAC,GAAG,aAAa,EAAE,CAAC;YACtB,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAEhC,wCAAwC;IACxC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;QAC3B,MAAM,YAAY,GAAG,QAAQ,CAAC,YAAY;YACxC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,KAAK,CAAC;QACV,MAAM,GAAG,GAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,CAAC;QAExE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,gBAAgB,EAAE,YAAY,EAAE,IAAI,CAC/D,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,KAAK,CAAC,CAC7B,CAAC;YAEF,MAAM,gBAAgB,GACpB,WAAW,EAAE,aAAa;gBAC1B,CAAC,KAAK;oBACJ,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,aAAa;wBAC5B,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;wBACxB,CAAC,CAAC,KAAK,CAAC,KAAK;oBACf,CAAC,CAAC,EAAE,CAAC,CAAC;YACV,MAAM,iBAAiB,GAAG,CAAC,WAAW,EAAE,cAAc,IAAI,EAAE,CAAC;iBAC1D,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC;iBAC5B,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC;iBAC3B,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,MAAM,YAAY,GAAG,CAAC,WAAW,EAAE,cAAc,IAAI,EAAE,CAAC,CAAC,MAAM,CAC7D,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CACrB,CAAC,MAAM,CAAC;YACT,MAAM,YAAY,GAAG,WAAW,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC,CAAC;YAC9D,MAAM,YAAY,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,YAAY,IAAI,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/E,MAAM,KAAK,GACT,WAAW,EAAE,wBAAwB,EAAE,KAAK,KAAK,SAAS;gBACxD,CAAC,CAAC,WAAW,CAAC,wBAAwB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBACvD,CAAC,CAAC,EAAE,CAAC;YAET,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7C,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,CAAC;YACjD,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,iBAAiB,CAAC,CAAC,CAAC;YAC5C,GAAG,CAAC,IAAI,CACN,cAAc,CACZ,WAAW,EAAE,oBAAoB,CAAC,QAAQ;gBACxC,KAAK,EAAE,oBAAoB,EAAE,QAAQ;gBACrC,EAAE,CACL,CACF,CAAC;YACF,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACrE,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAEhB,IAAI,CAAC,GAAG,aAAa,GAAG,CAAC,EAAE,CAAC;gBAC1B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC","sourcesContent":["import { TestCase } from '../../types/llm-test-runner';\n\n/**\n * Escapes a CSV field by wrapping it in quotes if it contains special characters\n * @param field - The field to escape\n * @returns Escaped field string\n */\nexport function escapeCsvField(field: string): string {\n if (field.includes(',') || field.includes('\"') || field.includes('\\n')) {\n return `\"${field.replace(/\"/g, '\"\"')}\"`;\n }\n return field;\n}\n\n/**\n * Exports test results to a CSV string\n * @param testCases - Array of test cases with results to export\n * @returns CSV string representation of the test results\n */\nexport function exportTestResultsToCsv(testCases: TestCase[]): string {\n const csvRows: string[] = [];\n const maxFieldCount = testCases.reduce(\n (max, testCase) => Math.max(max, (testCase.expectedOutcome || []).length),\n 0,\n );\n\n // Add header row\n const headers: string[] = [\n 'Question',\n 'Response Time (s)',\n ];\n for (let i = 1; i <= maxFieldCount; i++) {\n headers.push('Field Name');\n headers.push('Expected Keywords');\n headers.push('Generated Keywords');\n headers.push('Evaluation Strategy');\n headers.push('Passed Evaluation');\n headers.push('Keyword Match');\n headers.push('Score');\n if (i < maxFieldCount) {\n headers.push('');\n }\n }\n csvRows.push(headers.join(','));\n\n // Add data rows (one row per test case)\n testCases.forEach(testCase => {\n const responseTime = testCase.responseTime\n ? (testCase.responseTime / 1000).toFixed(3)\n : 'N/A';\n const row: string[] = [escapeCsvField(testCase.question), responseTime];\n\n for (let i = 0; i < maxFieldCount; i++) {\n const field = testCase.expectedOutcome?.[i];\n const fieldResult = testCase.evaluationResult?.fieldResults?.find(\n result => result.index === i,\n );\n\n const expectedKeywords =\n fieldResult?.expectedValue ??\n (field\n ? field.type === 'chips-input'\n ? field.value.join(', ')\n : field.value\n : '');\n const generatedKeywords = (fieldResult?.keywordMatches || [])\n .filter(match => match.found)\n .map(match => match.keyword)\n .join('; ');\n const matchedCount = (fieldResult?.keywordMatches || []).filter(\n match => match.found,\n ).length;\n const totalMatches = fieldResult?.keywordMatches?.length || 0;\n const keywordMatch = totalMatches > 0 ? `${matchedCount}/${totalMatches}` : '';\n const score =\n fieldResult?.evaluationApproachResult?.score !== undefined\n ? fieldResult.evaluationApproachResult.score.toFixed(2)\n : '';\n\n row.push(escapeCsvField(field?.label || ''));\n row.push(escapeCsvField(expectedKeywords || ''));\n row.push(escapeCsvField(generatedKeywords));\n row.push(\n escapeCsvField(\n fieldResult?.evaluationParameters.approach ||\n field?.evaluationParameters?.approach ||\n '',\n ),\n );\n row.push(fieldResult ? (fieldResult.passed ? 'TRUE' : 'FALSE') : '');\n row.push(keywordMatch);\n row.push(score);\n\n if (i < maxFieldCount - 1) {\n row.push('');\n }\n }\n\n csvRows.push(row.join(','));\n });\n\n return csvRows.join('\\n');\n}\n\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-suite-exporter.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-exporter.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"test-suite-exporter.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-exporter.ts"],"names":[],"mappings":"AAQA;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,SAAqB;IACzD,MAAM,UAAU,GAA0B,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACnE,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,eAAe,EAAE,QAAQ,CAAC,eAAe;KAC1C,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC7C,CAAC","sourcesContent":["import { ExpectedOutcomeField, TestCase } from '../../types/llm-test-runner';\n\nexport interface TestSuiteExportData {\n id: string;\n question: string;\n expectedOutcome: ExpectedOutcomeField[];\n}\n\n/**\n * Formats test cases as a JSON string suitable for saving as a test suite\n * @param testCases - Array of test cases to format\n * @returns JSON string representation of the test suite\n */\nexport function formatTestSuiteAsJson(testCases: TestCase[]): string {\n const exportData: TestSuiteExportData[] = testCases.map(testCase => ({\n id: testCase.id,\n question: testCase.question,\n expectedOutcome: testCase.expectedOutcome,\n }));\n\n return JSON.stringify(exportData, null, 2);\n}\n"]}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { v4 as uuidv4 } from "uuid";
|
|
2
|
-
import {
|
|
2
|
+
import { normalizeEvaluationParametersForField } from "../evaluation/field-evaluation-approach";
|
|
3
3
|
export const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
|
|
4
4
|
{
|
|
5
5
|
type: 'textarea',
|
|
@@ -8,6 +8,12 @@ export const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
|
|
|
8
8
|
rows: 2,
|
|
9
9
|
},
|
|
10
10
|
];
|
|
11
|
+
function normalizeExpectedOutcomeField(field) {
|
|
12
|
+
return {
|
|
13
|
+
...field,
|
|
14
|
+
evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
|
|
15
|
+
};
|
|
16
|
+
}
|
|
11
17
|
/**
|
|
12
18
|
* Creates a new test case with default values
|
|
13
19
|
* @returns A new TestCase object with a unique ID
|
|
@@ -17,9 +23,6 @@ export function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_
|
|
|
17
23
|
id: uuidv4(),
|
|
18
24
|
question: '',
|
|
19
25
|
expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
|
|
20
|
-
evaluationParameters: {
|
|
21
|
-
approach: EvaluationApproach.EXACT,
|
|
22
|
-
},
|
|
23
26
|
isRunning: false,
|
|
24
27
|
};
|
|
25
28
|
}
|
|
@@ -29,35 +32,35 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
29
32
|
return {
|
|
30
33
|
type: 'text',
|
|
31
34
|
label: schemaField.label,
|
|
32
|
-
required: schemaField.required,
|
|
33
35
|
placeholder: schemaField.placeholder,
|
|
34
36
|
value: '',
|
|
37
|
+
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
35
38
|
};
|
|
36
39
|
case 'textarea':
|
|
37
40
|
return {
|
|
38
41
|
type: 'textarea',
|
|
39
42
|
label: schemaField.label,
|
|
40
|
-
required: schemaField.required,
|
|
41
43
|
placeholder: schemaField.placeholder,
|
|
42
44
|
rows: schemaField.rows,
|
|
43
45
|
value: '',
|
|
46
|
+
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
44
47
|
};
|
|
45
48
|
case 'chips-input':
|
|
46
49
|
return {
|
|
47
50
|
type: 'chips-input',
|
|
48
51
|
label: schemaField.label,
|
|
49
|
-
required: schemaField.required,
|
|
50
52
|
placeholder: schemaField.placeholder,
|
|
51
53
|
value: [],
|
|
54
|
+
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
52
55
|
};
|
|
53
56
|
case 'select':
|
|
54
57
|
return {
|
|
55
58
|
type: 'select',
|
|
56
59
|
label: schemaField.label,
|
|
57
|
-
required: schemaField.required,
|
|
58
60
|
placeholder: schemaField.placeholder,
|
|
59
61
|
value: '',
|
|
60
62
|
options: schemaField.options,
|
|
63
|
+
evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
|
|
61
64
|
};
|
|
62
65
|
default: {
|
|
63
66
|
const _exhaustiveCheck = schemaField;
|
|
@@ -68,31 +71,18 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
|
|
|
68
71
|
export function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
|
|
69
72
|
return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);
|
|
70
73
|
}
|
|
71
|
-
export function migrateLegacyExpectedOutcomeString(value) {
|
|
72
|
-
return [
|
|
73
|
-
{
|
|
74
|
-
type: 'textarea',
|
|
75
|
-
label: 'Expected Outcome',
|
|
76
|
-
value,
|
|
77
|
-
},
|
|
78
|
-
];
|
|
79
|
-
}
|
|
80
74
|
/**
|
|
81
75
|
* Creates a runtime test case from validated input data.
|
|
82
|
-
* The input is expected to already satisfy `TestCaseInput
|
|
83
|
-
* and this function only performs normalization/defaulting
|
|
76
|
+
* The input is expected to already satisfy `TestCaseInput`,
|
|
77
|
+
* and this function only performs normalization/defaulting.
|
|
84
78
|
*
|
|
85
79
|
* @param data - Validated test case input
|
|
86
80
|
* @returns A normalized TestCase object with runtime defaults applied
|
|
87
81
|
*/
|
|
88
82
|
export function createTestCaseFromInput(data) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
expectedOutcome
|
|
92
|
-
}
|
|
93
|
-
else {
|
|
94
|
-
expectedOutcome = data.expectedOutcome;
|
|
95
|
-
}
|
|
96
|
-
return { ...data, expectedOutcome };
|
|
83
|
+
return {
|
|
84
|
+
...data,
|
|
85
|
+
expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
|
|
86
|
+
};
|
|
97
87
|
}
|
|
98
88
|
//# sourceMappingURL=test-case-factory.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/**\n * Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n */\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/**\n * Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n *\n * @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}
|
|
@@ -1,16 +1,67 @@
|
|
|
1
|
+
import { normalizeEvaluationParametersForField } from "../evaluation/field-evaluation-approach";
|
|
2
|
+
export function applyExpectedOutcomeChange(testCase, change) {
|
|
3
|
+
const { index } = change;
|
|
4
|
+
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
5
|
+
const target = expectedOutcome[index];
|
|
6
|
+
if (!target) {
|
|
7
|
+
return testCase;
|
|
8
|
+
}
|
|
9
|
+
switch (change.operation) {
|
|
10
|
+
case 'set-value': {
|
|
11
|
+
if (target.type === 'chips-input') {
|
|
12
|
+
return testCase;
|
|
13
|
+
}
|
|
14
|
+
expectedOutcome[index] = {
|
|
15
|
+
...target,
|
|
16
|
+
value: change.value,
|
|
17
|
+
};
|
|
18
|
+
return { ...testCase, expectedOutcome };
|
|
19
|
+
}
|
|
20
|
+
case 'add-chip': {
|
|
21
|
+
if (target.type !== 'chips-input') {
|
|
22
|
+
return testCase;
|
|
23
|
+
}
|
|
24
|
+
expectedOutcome[index] = {
|
|
25
|
+
...target,
|
|
26
|
+
value: [...target.value, change.value],
|
|
27
|
+
};
|
|
28
|
+
return { ...testCase, expectedOutcome };
|
|
29
|
+
}
|
|
30
|
+
case 'remove-chip': {
|
|
31
|
+
if (target.type !== 'chips-input') {
|
|
32
|
+
return testCase;
|
|
33
|
+
}
|
|
34
|
+
expectedOutcome[index] = {
|
|
35
|
+
...target,
|
|
36
|
+
value: target.value.filter(chip => chip !== change.value),
|
|
37
|
+
};
|
|
38
|
+
return { ...testCase, expectedOutcome };
|
|
39
|
+
}
|
|
40
|
+
case 'set-evaluation-approach':
|
|
41
|
+
return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
1
44
|
/**
|
|
2
|
-
* Updates the evaluation approach for a
|
|
3
|
-
*
|
|
4
|
-
* @param approach - The new evaluation approach
|
|
5
|
-
* @returns Updated test case with the new evaluation approach
|
|
45
|
+
* Updates the evaluation approach for a specific expected outcome field.
|
|
46
|
+
* Select fields always use exact matching.
|
|
6
47
|
*/
|
|
7
|
-
export function
|
|
48
|
+
export function updateExpectedOutcomeFieldApproach(testCase, fieldIndex, approach) {
|
|
49
|
+
const expectedOutcome = [...(testCase.expectedOutcome || [])];
|
|
50
|
+
const target = expectedOutcome[fieldIndex];
|
|
51
|
+
if (!target) {
|
|
52
|
+
return testCase;
|
|
53
|
+
}
|
|
54
|
+
const currentEvaluationParameters = target.evaluationParameters;
|
|
55
|
+
expectedOutcome[fieldIndex] = {
|
|
56
|
+
...target,
|
|
57
|
+
evaluationParameters: normalizeEvaluationParametersForField(target.type, {
|
|
58
|
+
...currentEvaluationParameters,
|
|
59
|
+
approach,
|
|
60
|
+
}),
|
|
61
|
+
};
|
|
8
62
|
return {
|
|
9
63
|
...testCase,
|
|
10
|
-
|
|
11
|
-
...testCase.evaluationParameters,
|
|
12
|
-
approach: approach,
|
|
13
|
-
},
|
|
64
|
+
expectedOutcome,
|
|
14
65
|
};
|
|
15
66
|
}
|
|
16
67
|
//# sourceMappingURL=test-case-mutations.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAwBhG,MAAM,UAAU,0BAA0B,CACxC,QAAkB,EAClB,MAA6B;IAE7B,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACzB,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,QAAQ,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;aACvC,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,CAAC;aAC1D,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,yBAAyB;YAC5B,OAAO,kCAAkC,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7E,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kCAAkC,CAChD,QAAkB,EAClB,UAAkB,EAClB,QAA4B;IAE5B,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAE3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,2BAA2B,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAChE,eAAe,CAAC,UAAU,CAAC,GAAG;QAC5B,GAAG,MAAM;QACT,oBAAoB,EAAE,qCAAqC,CAAC,MAAM,CAAC,IAAI,EAAE;YACvE,GAAG,2BAA2B;YAC9B,QAAQ;SACT,CAAC;KACH,CAAC;IAEF,OAAO;QACL,GAAG,QAAQ;QACX,eAAe;KAChB,CAAC;AACJ,CAAC","sourcesContent":["import { TestCase } from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport type ExpectedOutcomeChange =\n | {\n index: number;\n operation: 'set-value';\n value: string;\n }\n | {\n index: number;\n operation: 'add-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'remove-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-approach';\n value: EvaluationApproach;\n };\n\nexport function applyExpectedOutcomeChange(\n testCase: TestCase,\n change: ExpectedOutcomeChange,\n): TestCase {\n const { index } = change;\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[index];\n\n if (!target) {\n return testCase;\n }\n\n switch (change.operation) {\n case 'set-value': {\n if (target.type === 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: change.value,\n };\n return { ...testCase, expectedOutcome };\n }\n case 'add-chip': {\n if (target.type !== 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: [...target.value, change.value],\n };\n return { ...testCase, expectedOutcome };\n }\n case 'remove-chip': {\n if (target.type !== 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: target.value.filter(chip => chip !== change.value),\n };\n return { ...testCase, expectedOutcome };\n }\n case 'set-evaluation-approach':\n return updateExpectedOutcomeFieldApproach(testCase, index, change.value);\n }\n}\n\n/**\n * Updates the evaluation approach for a specific expected outcome field.\n * Select fields always use exact matching.\n */\nexport function updateExpectedOutcomeFieldApproach(\n testCase: TestCase,\n fieldIndex: number,\n approach: EvaluationApproach,\n): TestCase {\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[fieldIndex];\n\n if (!target) {\n return testCase;\n }\n\n const currentEvaluationParameters = target.evaluationParameters;\n expectedOutcome[fieldIndex] = {\n ...target,\n evaluationParameters: normalizeEvaluationParametersForField(target.type, {\n ...currentEvaluationParameters,\n approach,\n }),\n };\n\n return {\n ...testCase,\n expectedOutcome,\n };\n}\n"]}
|
|
@@ -1,28 +1,46 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { EvaluationApproach } from "../lib/evaluation/constants";
|
|
3
|
+
import { isApproachAllowedForFieldType } from "../lib/evaluation/field-evaluation-approach";
|
|
2
4
|
const nonEmptyString = z.string().trim().min(1);
|
|
3
5
|
const optionalPositiveInt = z.number().int().positive().optional();
|
|
4
6
|
const optionalString = z.string().optional();
|
|
5
|
-
const optionalBoolean = z.boolean().optional();
|
|
6
7
|
const selectOptionsSchema = z.array(nonEmptyString).min(1);
|
|
8
|
+
const optionalNumber = z.number().optional();
|
|
9
|
+
const evaluationParametersSchema = z.object({
|
|
10
|
+
approach: z.enum(EvaluationApproach),
|
|
11
|
+
threshold: optionalNumber,
|
|
12
|
+
});
|
|
13
|
+
const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine((parameters, ctx) => {
|
|
14
|
+
if (!isApproachAllowedForFieldType('select', parameters.approach)) {
|
|
15
|
+
ctx.addIssue({
|
|
16
|
+
code: 'custom',
|
|
17
|
+
path: ['approach'],
|
|
18
|
+
message: `select fields only support "${EvaluationApproach.EXACT}" evaluation approach.`,
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
});
|
|
7
22
|
const defaultExpectedOutcomeBaseSchema = z.object({
|
|
8
23
|
label: nonEmptyString,
|
|
9
|
-
required: optionalBoolean,
|
|
10
24
|
placeholder: optionalString,
|
|
11
25
|
});
|
|
12
26
|
const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
|
|
13
27
|
text: baseSchema.extend({
|
|
14
28
|
type: z.literal('text'),
|
|
29
|
+
evaluationParameters: evaluationParametersSchema.optional(),
|
|
15
30
|
}),
|
|
16
31
|
textarea: baseSchema.extend({
|
|
17
32
|
type: z.literal('textarea'),
|
|
18
33
|
rows: optionalPositiveInt,
|
|
34
|
+
evaluationParameters: evaluationParametersSchema.optional(),
|
|
19
35
|
}),
|
|
20
36
|
chipsInput: baseSchema.extend({
|
|
21
37
|
type: z.literal('chips-input'),
|
|
38
|
+
evaluationParameters: evaluationParametersSchema.optional(),
|
|
22
39
|
}),
|
|
23
40
|
select: baseSchema.extend({
|
|
24
41
|
type: z.literal('select'),
|
|
25
42
|
options: selectOptionsSchema,
|
|
43
|
+
evaluationParameters: selectEvaluationParametersSchema.optional(),
|
|
26
44
|
}),
|
|
27
45
|
});
|
|
28
46
|
function hasDuplicateChips(values) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"expected-outcome.js","sourceRoot":"","sources":["../../src/schemas/expected-outcome.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"expected-outcome.js","sourceRoot":"","sources":["../../src/schemas/expected-outcome.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EAAE,6BAA6B,EAAE,MAAM,6CAA6C,CAAC;AAE5F,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AAChD,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,CAAC;AACnE,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC;AAC7C,MAAM,mBAAmB,GAAG,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AAC3D,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC;AAE7C,MAAM,0BAA0B,GAAG,CAAC,CAAC,MAAM,CAAC;IAC1C,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC;IACpC,SAAS,EAAE,cAAc;CAC1B,CAAC,CAAC;AAEH,MAAM,gCAAgC,GAAG,0BAA0B,CAAC,WAAW,CAC7E,CAAC,UAAU,EAAE,GAAG,EAAE,EAAE;IAClB,IAAI,CAAC,6BAA6B,CAAC,QAAQ,EAAE,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAClE,GAAG,CAAC,QAAQ,CAAC;YACX,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,CAAC,UAAU,CAAC;YAClB,OAAO,EAAE,+BAA+B,kBAAkB,CAAC,KAAK,wBAAwB;SACzF,CAAC,CAAC;IACL,CAAC;AACH,CAAC,CACF,CAAC;AAEF,MAAM,gCAAgC,GAAG,CAAC,CAAC,MAAM,CAAC;IAChD,KAAK,EAAE,cAAc;IACrB,WAAW,EAAE,cAAc;CAC5B,CAAC,CAAC;AAEH,MAAM,wCAAwC,GAAG,CAC/C,UAAmD,EACnD,EAAE,CAAC,CAAC;IACJ,IAAI,EAAE,UAAU,CAAC,MAAM,CAAC;QACtB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;QACvB,oBAAoB,EAAE,0BAA0B,CAAC,QAAQ,EAAE;KAC5D,CAAC;IACF,QAAQ,EAAE,UAAU,CAAC,MAAM,CAAC;QAC1B,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC;QAC3B,IAAI,EAAE,mBAAmB;QACzB,oBAAoB,EAAE,0BAA0B,CAAC,QAAQ,EAAE;KAC5D,CAAC;IACF,UAAU,EAAE,UAAU,CAAC,MAAM,CAAC;QAC5B,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC;QAC9B,oBAAoB,EAAE,0BAA0B,CAAC,QAAQ,EAAE;KAC5D,CAAC;IACF,MAAM,EAAE,UAAU,CAAC,MAAM,CAAC;QACxB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;QACzB,OAAO,EAAE,mBAAmB;QAC5B,oBAAoB,EAAE,gCAAgC,CAAC,QAAQ,EAAE;KAClE,CAAC;CACH,CAAC,CAAC;AAEH,SAAS,iBAAiB,CAAC,MAAgB;IACzC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC9C,IAAI,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACvB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,uBAAuB,GAC3B,wCAAwC,CAAC,gCAAgC,CAAC,CAAC;AAE7E,MAAM,CAAC,MAAM,gCAAgC,GAAG,CAAC,CAAC,kBAAkB,CAAC,MAAM,EAAE;IAC3E,uBAAuB,CAAC,IAAI;IAC5B,uBAAuB,CAAC,QAAQ;IAChC,uBAAuB,CAAC,UAAU;IAClC,uBAAuB,CAAC,MAAM;CAC/B,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,2BAA2B,GAAG,CAAC;KACzC,KAAK,CAAC,gCAAgC,CAAC;KACvC,GAAG,CAAC,CAAC,CAAC,CAAC;AAEV,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,kBAAkB,CAAC,MAAM,EAAE;IACrE,uBAAuB,CAAC,IAAI,CAAC,MAAM,CAAC;QAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;KAClB,CAAC;IACF,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC;QACtC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;KAClB,CAAC;IACF,uBAAuB,CAAC,UAAU,CAAC,MAAM,CAAC;QACxC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;YACrD,IAAI,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,QAAQ,CAAC;oBACX,IAAI,EAAE,QAAQ;oBACd,OAAO,EACL,gEAAgE;iBACnE,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC;KACH,CAAC;IACF,uBAAuB,CAAC,MAAM;SAC3B,MAAM,CAAC;QACN,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;KAClB,CAAC;SACD,WAAW,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;QAC1B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;YACzC,GAAG,CAAC,QAAQ,CAAC;gBACX,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,CAAC,OAAO,CAAC;gBACf,OAAO,EAAE,mDAAmD;aAC7D,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC;CACL,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AA4CrF,MAAM,UAAU,6BAA6B,CAC3C,MAAe;IAEf,MAAM,MAAM,GAAG,2BAA2B,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAC7D,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CACb,kCAAkC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CACnE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,UAAU,4BAA4B,CAC1C,eAAwB;IAExB,MAAM,MAAM,GAAG,0BAA0B,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACrE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC","sourcesContent":["import { z } from 'zod';\nimport { EvaluationApproach } from '../lib/evaluation/constants';\nimport { isApproachAllowedForFieldType } from '../lib/evaluation/field-evaluation-approach';\n\nconst nonEmptyString = z.string().trim().min(1);\nconst optionalPositiveInt = z.number().int().positive().optional();\nconst optionalString = z.string().optional();\nconst selectOptionsSchema = z.array(nonEmptyString).min(1);\nconst optionalNumber = z.number().optional();\n\nconst evaluationParametersSchema = z.object({\n approach: z.enum(EvaluationApproach),\n threshold: optionalNumber,\n});\n\nconst selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(\n (parameters, ctx) => {\n if (!isApproachAllowedForFieldType('select', parameters.approach)) {\n ctx.addIssue({\n code: 'custom',\n path: ['approach'],\n message: `select fields only support \"${EvaluationApproach.EXACT}\" evaluation approach.`,\n });\n }\n },\n);\n\nconst defaultExpectedOutcomeBaseSchema = z.object({\n label: nonEmptyString,\n placeholder: optionalString,\n});\n\nconst createDefaultExpectedOutcomeFieldSchemas = (\n baseSchema: typeof defaultExpectedOutcomeBaseSchema,\n) => ({\n text: baseSchema.extend({\n type: z.literal('text'),\n evaluationParameters: evaluationParametersSchema.optional(),\n }),\n textarea: baseSchema.extend({\n type: z.literal('textarea'),\n rows: optionalPositiveInt,\n evaluationParameters: evaluationParametersSchema.optional(),\n }),\n chipsInput: baseSchema.extend({\n type: z.literal('chips-input'),\n evaluationParameters: evaluationParametersSchema.optional(),\n }),\n select: baseSchema.extend({\n type: z.literal('select'),\n options: selectOptionsSchema,\n evaluationParameters: selectEvaluationParametersSchema.optional(),\n }),\n});\n\nfunction hasDuplicateChips(values: string[]): boolean {\n const seen = new Set<string>();\n for (const value of values) {\n const normalized = value.trim().toLowerCase();\n if (seen.has(normalized)) {\n return true;\n }\n seen.add(normalized);\n }\n return false;\n}\n\nconst defaultFieldDefinitions =\n createDefaultExpectedOutcomeFieldSchemas(defaultExpectedOutcomeBaseSchema);\n\nexport const expectedOutcomeSchemaFieldSchema = z.discriminatedUnion('type', [\n defaultFieldDefinitions.text,\n defaultFieldDefinitions.textarea,\n defaultFieldDefinitions.chipsInput,\n defaultFieldDefinitions.select,\n]);\n\nexport const expectedOutcomeSchemaSchema = z\n .array(expectedOutcomeSchemaFieldSchema)\n .min(1);\n\nexport const expectedOutcomeFieldSchema = z.discriminatedUnion('type', [\n defaultFieldDefinitions.text.extend({\n value: z.string(),\n }),\n defaultFieldDefinitions.textarea.extend({\n value: z.string(),\n }),\n defaultFieldDefinitions.chipsInput.extend({\n value: z.array(z.string()).superRefine((values, ctx) => {\n if (hasDuplicateChips(values)) {\n ctx.addIssue({\n code: 'custom',\n message:\n 'chips-input values must be unique (case-insensitive, trimmed).',\n });\n }\n }),\n }),\n defaultFieldDefinitions.select\n .extend({\n value: z.string(),\n })\n .superRefine((field, ctx) => {\n if (!field.options.includes(field.value)) {\n ctx.addIssue({\n code: 'custom',\n path: ['value'],\n message: 'select value must be one of the provided options.',\n });\n }\n }),\n]);\n\nexport const expectedOutcomeArraySchema = z.array(expectedOutcomeFieldSchema).min(1);\n\nexport type ExpectedOutcomeSchemaField = z.infer<\n typeof expectedOutcomeSchemaFieldSchema\n>;\nexport type ExpectedOutcomeSchema = z.infer<typeof expectedOutcomeSchemaSchema>;\nexport type ExpectedOutcomeField = z.infer<typeof expectedOutcomeFieldSchema>;\nexport type ExpectedOutcomeFieldType = ExpectedOutcomeField['type'];\nexport type ExpectedOutcomeBase = z.infer<typeof defaultExpectedOutcomeBaseSchema>;\n\nexport type TextExpectedOutcomeSchemaField = Extract<\n ExpectedOutcomeSchemaField,\n { type: 'text' }\n>;\nexport type TextareaExpectedOutcomeSchemaField = Extract<\n ExpectedOutcomeSchemaField,\n { type: 'textarea' }\n>;\nexport type ChipsExpectedOutcomeSchemaField = Extract<\n ExpectedOutcomeSchemaField,\n { type: 'chips-input' }\n>;\nexport type SelectExpectedOutcomeSchemaField = Extract<\n ExpectedOutcomeSchemaField,\n { type: 'select' }\n>;\n\nexport type TextExpectedOutcomeField = Extract<\n ExpectedOutcomeField,\n { type: 'text' }\n>;\nexport type TextareaExpectedOutcomeField = Extract<\n ExpectedOutcomeField,\n { type: 'textarea' }\n>;\nexport type ChipsExpectedOutcomeField = Extract<\n ExpectedOutcomeField,\n { type: 'chips-input' }\n>;\nexport type SelectExpectedOutcomeField = Extract<\n ExpectedOutcomeField,\n { type: 'select' }\n>;\n\nexport function validateExpectedOutcomeSchema(\n schema: unknown,\n): asserts schema is ExpectedOutcomeSchema {\n const parsed = expectedOutcomeSchemaSchema.safeParse(schema);\n if (!parsed.success) {\n throw new Error(\n `Invalid expectedOutcomeSchema: ${parsed.error.issues[0].message}`,\n );\n }\n}\n\nexport function validateExpectedOutcomeArray(\n expectedOutcome: unknown,\n): asserts expectedOutcome is ExpectedOutcomeField[] {\n const parsed = expectedOutcomeArraySchema.safeParse(expectedOutcome);\n if (!parsed.success) {\n throw new Error(`Invalid expectedOutcome: ${parsed.error.issues[0].message}`);\n }\n}\n"]}
|
|
@@ -1,33 +1,15 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
import { EvaluationApproach } from "../lib/evaluation/constants";
|
|
3
2
|
import { expectedOutcomeArraySchema } from "./expected-outcome";
|
|
4
|
-
export const
|
|
5
|
-
approach: z.enum(EvaluationApproach),
|
|
6
|
-
threshold: z.number().optional(),
|
|
7
|
-
});
|
|
8
|
-
const baseTestCaseInputSchema = z.object({
|
|
3
|
+
export const testCaseInputSchema = z.object({
|
|
9
4
|
id: z.string(),
|
|
10
5
|
question: z.string(),
|
|
11
|
-
evaluationParameters: evaluationParametersSchema.optional(),
|
|
12
|
-
});
|
|
13
|
-
export const legacyTestCaseInputSchema = baseTestCaseInputSchema.extend({
|
|
14
|
-
expectedOutcome: z.string(),
|
|
15
|
-
});
|
|
16
|
-
export const v2TestCaseInputSchema = baseTestCaseInputSchema.extend({
|
|
17
6
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
18
7
|
});
|
|
19
|
-
export const
|
|
20
|
-
legacyTestCaseInputSchema,
|
|
21
|
-
v2TestCaseInputSchema,
|
|
22
|
-
]);
|
|
23
|
-
export const testCaseInputArraySchema = z.array(testCaseInputSchema).min(1, {
|
|
24
|
-
message: 'The test suite is empty. Please provide at least one test case.',
|
|
25
|
-
});
|
|
8
|
+
export const testCaseInputArraySchema = z.array(testCaseInputSchema);
|
|
26
9
|
export const testCaseSchema = z.object({
|
|
27
10
|
id: z.string(),
|
|
28
11
|
question: z.string(),
|
|
29
12
|
expectedOutcome: expectedOutcomeArraySchema,
|
|
30
|
-
evaluationParameters: evaluationParametersSchema.optional(),
|
|
31
13
|
output: z.string().optional(),
|
|
32
14
|
isRunning: z.boolean().optional(),
|
|
33
15
|
error: z.string().optional(),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case.js","sourceRoot":"","sources":["../../src/schemas/test-case.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"test-case.js","sourceRoot":"","sources":["../../src/schemas/test-case.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAEhE,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC1C,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE;IACd,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE;IACpB,eAAe,EAAE,0BAA0B;CAC5C,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;AAErE,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE;IACd,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE;IACpB,eAAe,EAAE,0BAA0B;IAC3C,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC7B,SAAS,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACjC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,gBAAgB,EAAE,CAAC,CAAC,MAAM,EAAoB,CAAC,QAAQ,EAAE;IACzD,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACpC,CAAC,CAAC;AAKH,MAAM,UAAU,qBAAqB,CACnC,IAAa;IAEb,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACnD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,IAAa;IAEb,MAAM,MAAM,GAAG,wBAAwB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,OAAO,GACX,UAAU,CAAC,IAAI,KAAK,cAAc;YAChC,CAAC,CAAC,gDAAgD;YAClD,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC","sourcesContent":["import { z } from 'zod';\nimport type { EvaluationResult } from '../lib/evaluation/types';\nimport { expectedOutcomeArraySchema } from './expected-outcome';\n\nexport const testCaseInputSchema = z.object({\n id: z.string(),\n question: z.string(),\n expectedOutcome: expectedOutcomeArraySchema,\n});\n\nexport const testCaseInputArraySchema = z.array(testCaseInputSchema);\n\nexport const testCaseSchema = z.object({\n id: z.string(),\n question: z.string(),\n expectedOutcome: expectedOutcomeArraySchema,\n output: z.string().optional(),\n isRunning: z.boolean().optional(),\n error: z.string().optional(),\n evaluationResult: z.custom<EvaluationResult>().optional(),\n responseTime: z.number().optional(),\n});\n\nexport type TestCaseInput = z.infer<typeof testCaseInputSchema>;\nexport type TestCase = z.infer<typeof testCaseSchema>;\n\nexport function validateTestCaseInput(\n data: unknown,\n): asserts data is TestCaseInput {\n const parsed = testCaseInputSchema.safeParse(data);\n if (!parsed.success) {\n throw new Error(`Invalid test case input: ${parsed.error.issues[0].message}`);\n }\n}\n\nexport function validateTestCaseInputArray(\n data: unknown,\n): asserts data is TestCaseInput[] {\n const parsed = testCaseInputArraySchema.safeParse(data);\n if (!parsed.success) {\n const firstIssue = parsed.error.issues[0];\n const message =\n firstIssue.code === 'invalid_type'\n ? 'Invalid JSON structure. Expected a JSON array.'\n : firstIssue.message;\n throw new Error(message);\n }\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm-test-runner.js","sourceRoot":"","sources":["../../src/types/llm-test-runner.ts"],"names":[],"mappings":"","sourcesContent":["import type { TestCase } from './test-case';\n\nexport type {\n ExpectedOutcomeFieldType,\n ExpectedOutcomeBase,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n ExpectedOutcomeField,\n TextExpectedOutcomeSchemaField,\n TextareaExpectedOutcomeSchemaField,\n ChipsExpectedOutcomeSchemaField,\n SelectExpectedOutcomeSchemaField,\n TextExpectedOutcomeField,\n TextareaExpectedOutcomeField,\n ChipsExpectedOutcomeField,\n SelectExpectedOutcomeField,\n} from './expected-outcome';\nexport type {\n TestCase,\n TestCaseInput,\n
|
|
1
|
+
{"version":3,"file":"llm-test-runner.js","sourceRoot":"","sources":["../../src/types/llm-test-runner.ts"],"names":[],"mappings":"","sourcesContent":["import type { TestCase } from './test-case';\n\nexport type {\n ExpectedOutcomeFieldType,\n ExpectedOutcomeBase,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n ExpectedOutcomeField,\n TextExpectedOutcomeSchemaField,\n TextareaExpectedOutcomeSchemaField,\n ChipsExpectedOutcomeSchemaField,\n SelectExpectedOutcomeSchemaField,\n TextExpectedOutcomeField,\n TextareaExpectedOutcomeField,\n ChipsExpectedOutcomeField,\n SelectExpectedOutcomeField,\n} from './expected-outcome';\nexport type {\n TestCase,\n TestCaseInput,\n} from './test-case';\n\nexport interface LLMRequestPayload {\n prompt: string;\n resolve: (result: string) => void;\n reject: (err: Error | unknown) => void;\n}\n\nexport interface SavePayload {\n timestamp: string;\n testCases: TestCase[];\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-case.js","sourceRoot":"","sources":["../../src/types/test-case.ts"],"names":[],"mappings":"","sourcesContent":["export type {\n
|
|
1
|
+
{"version":3,"file":"test-case.js","sourceRoot":"","sources":["../../src/types/test-case.ts"],"names":[],"mappings":"","sourcesContent":["export type {\n TestCaseInput,\n TestCase,\n} from '../schemas/test-case';\n"]}
|
package/dist/components/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{G as t}from"./p-D9BrlHdP.js";export{g as getAssetPath,r as render,s as setAssetPath,a as setNonce,b as setPlatformOptions}from"./p-D9BrlHdP.js";export{L as LLMTestRunner}from"./p-
|
|
1
|
+
import{G as t}from"./p-D9BrlHdP.js";export{g as getAssetPath,r as render,s as setAssetPath,a as setNonce,b as setPlatformOptions}from"./p-D9BrlHdP.js";export{L as LLMTestRunner}from"./p-Bb89MYYu.js";class e{sdk;constructor(s){this.sdk=new t({apiKey:s})}async invoke(s){const t=await this.sdk.models.generateContent({model:"gemini-3-flash-preview",contents:s});return t.text}}function n(){window.env={API_KEY:""};window.GeminiAdapter=e}const o=n||(()=>{});const i=o;i();
|
|
2
2
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import{L as o,d as s}from"./p-
|
|
1
|
+
import{L as o,d as s}from"./p-Bb89MYYu.js";const t=o;const p=s;export{t as LlmTestRunner,p as defineCustomElement};
|
|
2
2
|
//# sourceMappingURL=llm-test-runner.js.map
|