npm - llm-testrunner-components - Versions diffs - 1.1.0 → 1.2.1 - Mend

llm-testrunner-components 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/collection/lib/evaluation/{rouge1-evaluator.test.js → evaluators/rouge1-evaluator.test.js} RENAMED Viewed

@@ -1,8 +1,8 @@
 import { describe, it, expect } from "@jest/globals";
-import { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from "./constants";
+import { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from "../constants";
 // Using integration tests with actual js-rouge library (no mocks).
 // This approach tests the real ROUGE-1 scoring behavior rather than just orchestration logic.
-import { performRouge1Evaluation } from "./evaluators/rouge1-evaluator";
+import { performRouge1Evaluation } from "./rouge1-evaluator";
 const mockRequest = {
     testCaseId: 'test-000',
     question: 'What is your name?',

package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.test.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"rouge1-evaluator.test.js","sourceRoot":"","sources":["../../../../src/lib/evaluation/evaluators/rouge1-evaluator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAC5E,mEAAmE;AACnE,8FAA8F;AAC9F,OAAO,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAE7D,MAAM,WAAW,GAAsB;IACrC,UAAU,EAAE,UAAU;IACtB,QAAQ,EAAE,oBAAoB;IAC9B,cAAc,EAAE,6BAA6B;IAC7C,eAAe,EAAE,iBAAiB;IAClC,oBAAoB,EAAE;QACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;QACpC,SAAS,EAAE,GAAG;KACf;CACF,CAAC;AAEF,MAAM,sBAAsB,GAAsB;IAChD,GAAG,WAAW;IACd,oBAAoB,EAAE;QACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;QACpC,SAAS,EAAE,SAAS;KACrB;CACF,CAAC;AAEF,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACxE,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,eAAe,EAAE,iBAAiB;aACnC,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;YACvB,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACzB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,oDAAoD;gBACpE,eAAe,EAAE,2CAA2C;aAC7D,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;YACpB,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;YAC5E,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,eAAe,EAAE,oCAAoC;aACtD,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;YAC9B,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,sBAAsB,CAAC,CAAC;YAErE,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAChD,wBAAwB,CACzB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC3D,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,yCAAyC;gBACzD,eAAe,EAAE,8BAA8B;gBAC/C,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7D,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;YAC1E,MAAM,OAAO,GAAsB;gBACjC,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,eAAe,EAAE,kBAAkB;gBACnC,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,OAAO,GAAG,EAAE,GAAG,WAAW,EAAE,cAAc,EAAE,EAAE,EAAE,CAAC;YAEvD,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACxE,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,MAAM,OAAO,GAAG,EAAE,GAAG,WAAW,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;YAExD,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC","sourcesContent":["import { describe, it, expect } from '@jest/globals';\nimport { EvaluationRequest } from '../types';\nimport { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from '../constants';\n// Using integration tests with actual js-rouge library (no mocks).\n// This approach tests the real ROUGE-1 scoring behavior rather than just orchestration logic.\nimport { performRouge1Evaluation } from './rouge1-evaluator';\n\nconst mockRequest: EvaluationRequest = {\n testCaseId: 'test-000',\n question: 'What is your name?',\n actualResponse: 'I am a large language model',\n expectedOutcome: 'model\\nlanguage',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: 0.5,\n },\n};\n\nconst mockRequestNoThreshold: EvaluationRequest = {\n ...mockRequest,\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: undefined,\n },\n};\n\ndescribe('performRouge1Evaluation', () => {\n describe('Basic functionality', () => {\n it('should pass when response contains exact keyword matches', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is a language model system',\n expectedOutcome: 'language\\nmodel',\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.length).toBe(2);\n expect(result.keywordMatches[0].found).toBe(true);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeGreaterThan(0.5);\n expect(result.keywordMatches[1].found).toBe(true);\n expect(\n result.keywordMatches[1].evaluationApproachResult.score,\n ).toBeGreaterThan(0.5);\n });\n\n it('should fail when keywords are not sufficiently present', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is completely unrelated content about cooking',\n expectedOutcome: 'machine learning\\nartificial intelligence',\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].found).toBe(false);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeLessThan(0.5);\n expect(result.keywordMatches[1].found).toBe(false);\n expect(\n result.keywordMatches[1].evaluationApproachResult.score,\n ).toBeLessThan(0.5);\n });\n\n it('should partially pass when only some keywords meet threshold', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'Machine learning is fascinating',\n expectedOutcome: 'machine learning\\ndatabase systems',\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].found).toBe(true);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeGreaterThanOrEqual(0.5);\n expect(result.keywordMatches[1].found).toBe(false);\n expect(\n result.keywordMatches[1].evaluationApproachResult.score,\n ).toBeLessThan(0.5);\n });\n });\n\n describe('Threshold handling', () => {\n it('should use default threshold when not provided', async () => {\n const result = await performRouge1Evaluation(mockRequestNoThreshold);\n\n expect(result.evaluationParameters.threshold).toBe(\n DEFAULT_ROUGE_PASS_SCORE,\n );\n });\n\n it('should pass all keywords with threshold 0.0', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'completely unrelated text about cooking',\n expectedOutcome: 'quantum physics\\nmathematics',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: 0.0,\n },\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.every(m => m.found)).toBe(true);\n expect(result.evaluationParameters.threshold).toBe(0.0);\n });\n\n it('should fail when threshold is 1.0 and match is not perfect', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is about learning concepts',\n expectedOutcome: 'machine learning',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_1,\n threshold: 1.0,\n },\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.evaluationParameters.threshold).toBe(1.0);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeLessThan(1.0);\n });\n });\n\n describe('Edge cases', () => {\n it('should handle empty actualResponse', async () => {\n const request = { ...mockRequest, actualResponse: '' };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBe(0);\n expect(result.keywordMatches[1].evaluationApproachResult.score).toBe(0);\n });\n\n it('should handle empty expectedOutcome string', async () => {\n const request = { ...mockRequest, expectedOutcome: '' };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.length).toBe(0);\n });\n });\n});\n"]}

package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js CHANGED Viewed

@@ -16,6 +16,7 @@ export class SemanticEvaluator {
         }
     }
     async performEvaluation(request) {
+        const threshold = request.evaluationParameters?.threshold ?? DEFAULT_SEMANTIC_PASS_SCORE;
         try {
             await this.initialize();
             // Split expectedOutcome by newlines to create keywords array
@@ -25,7 +26,7 @@ export class SemanticEvaluator {
                     .map(k => k.trim())
                     .filter(k => k.length > 0)
                 : [];
-            const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, DEFAULT_SEMANTIC_PASS_SCORE);
+            const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, threshold);
             const totalItems = keywordMatches.length;
             // calculate the overall score by averaging the score of the keyword matches
             const keywordScore = keywordMatches.reduce((acc, curr) => acc + curr.evaluationApproachResult.score, 0);
@@ -33,7 +34,7 @@ export class SemanticEvaluator {
             const passed = keywordMatches.every(match => match.found);
             const evaluationParameters = {
                 approach: EvaluationApproach.SEMANTIC,
-                threshold: DEFAULT_SEMANTIC_PASS_SCORE,
+                threshold,
             };
             return {
                 testCaseId: request.testCaseId,
@@ -55,7 +56,7 @@ export class SemanticEvaluator {
                 keywordMatches: [],
                 evaluationParameters: {
                     approach: EvaluationApproach.SEMANTIC,
-                    threshold: DEFAULT_SEMANTIC_PASS_SCORE,
+                    threshold,
                 },
                 evaluationApproachResult: {
                     score: 0,

package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"SemanticEvaluator.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/SemanticEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,4BAA4B,EAAE,MAAM,qBAAqB,CAAC;AAGnE,OAAO,EACL,2BAA2B,EAC3B,kBAAkB,GACnB,MAAM,iBAAiB,CAAC;AAEzB,MAAM,OAAO,iBAAiB;IAC5B,qEAAqE;IAC7D,MAAM,CAAC,SAAS,GAA8B,IAAI,CAAC;IAE3D,KAAK,CAAC,UAAU;QACd,IAAI,iBAAiB,CAAC,SAAS;YAAE,OAAO;QACxC,IAAI,CAAC;YACH,iBAAiB,CAAC,SAAS,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAC1D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,iBAAiB,CACrB,OAA0B;QAE1B,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAExB,6DAA6D;YAC7D,MAAM,gBAAgB,GAAG,OAAO,CAAC,eAAe;gBAC9C,CAAC,CAAC,OAAO,CAAC,eAAe;qBACpB,KAAK,CAAC,QAAQ,CAAC;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;qBAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC9B,CAAC,CAAC,EAAE,CAAC;YAEP,MAAM,cAAc,GAAG,MAAM,4BAA4B,CACvD,iBAAiB,CAAC,SAAS,EAC3B,OAAO,CAAC,cAAc,EACtB,gBAAgB,EAChB,~~2BAA2B~~,~~CAC5B~~,CAAC;YAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC;YACzC,4EAA4E;YAC5E,MAAM,YAAY,GAAG,cAAc,CAAC,MAAM,CACxC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,wBAAwB,CAAC,KAAK,EACxD,CAAC,CACF,CAAC;YACF,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,4BAA4B;YACjG,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE1D,MAAM,oBAAoB,GAAG;gBAC3B,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;gBACrC,SAAS~~,EAAE,2BAA2B~~;~~aACf~~,CAAC;YAE1B,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM;gBACN,cAAc;gBACd,oBAAoB;gBACpB,wBAAwB,EAAE;oBACxB,KAAK,EAAE,YAAY;oBACnB,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAC;YAC/D,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;oBACrC,SAAS~~,EAAE,2BAA2B~~;~~iBACvC~~;gBACD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC","sourcesContent":["import { EvaluationResult, EvaluationRequest } from '../../types';\nimport { loadSemanticModel } from './model-loader';\nimport { evaluateKeywordsSemantically } from './evaluate-keywords';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { EvaluationParameters } from '../../../../types/evaluation';\nimport {\n DEFAULT_SEMANTIC_PASS_SCORE,\n EvaluationApproach,\n} from '../../constants';\n\nexport class SemanticEvaluator {\n // TODO(LLM-39): Refactor SemanticEvaluator into a singleton pattern.\n private static extractor: FeatureExtractionPipeline = null;\n\n async initialize(): Promise<void> {\n if (SemanticEvaluator.extractor) return;\n try {\n SemanticEvaluator.extractor = await loadSemanticModel();\n } catch (error) {\n console.error('Failed to load semantic evaluation model:', error);\n throw error;\n }\n }\n\n async performEvaluation(\n request: EvaluationRequest,\n ): Promise<EvaluationResult> {\n try {\n await this.initialize();\n\n // Split expectedOutcome by newlines to create keywords array\n const expectedKeywords = request.expectedOutcome\n ? request.expectedOutcome\n .split(/[\\n,]+/)\n .map(k => k.trim())\n .filter(k => k.length > 0)\n : [];\n\n const keywordMatches = await evaluateKeywordsSemantically(\n SemanticEvaluator.extractor,\n request.actualResponse,\n expectedKeywords,\n ~~DEFAULT_SEMANTIC_PASS_SCORE~~,\n );\n\n const totalItems = keywordMatches.length;\n // calculate the overall score by averaging the score of the keyword matches\n const keywordScore = keywordMatches.reduce(\n (acc, curr) => acc + curr.evaluationApproachResult.score,\n 0,\n );\n const overallScore = totalItems > 0 ? keywordScore / totalItems : 0; // to avoid division by zero\n const passed = keywordMatches.every(match => match.found);\n\n const evaluationParameters = {\n approach: EvaluationApproach.SEMANTIC,\n threshold~~: DEFAULT_SEMANTIC_PASS_SCORE~~,\n } as EvaluationParameters;\n\n return {\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n evaluationParameters,\n evaluationApproachResult: {\n score: overallScore,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n } catch (error) {\n console.error('Failed to perform semantic evaluation:', error);\n return {\n testCaseId: request.testCaseId,\n passed: false,\n keywordMatches: [],\n evaluationParameters: {\n approach: EvaluationApproach.SEMANTIC,\n threshold~~: DEFAULT_SEMANTIC_PASS_SCORE~~,\n },\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n }\n }\n}\n"]}
1	+ {"version":3,"file":"SemanticEvaluator.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/SemanticEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,4BAA4B,EAAE,MAAM,qBAAqB,CAAC;AAGnE,OAAO,EACL,2BAA2B,EAC3B,kBAAkB,GACnB,MAAM,iBAAiB,CAAC;AAEzB,MAAM,OAAO,iBAAiB;IAC5B,qEAAqE;IAC7D,MAAM,CAAC,SAAS,GAA8B,IAAI,CAAC;IAE3D,KAAK,CAAC,UAAU;QACd,IAAI,iBAAiB,CAAC,SAAS;YAAE,OAAO;QACxC,IAAI,CAAC;YACH,iBAAiB,CAAC,SAAS,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAC1D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,iBAAiB,CACrB,OAA0B;QAE1B,MAAM,SAAS,GACb,OAAO,CAAC,oBAAoB,EAAE,SAAS,IAAI,2BAA2B,CAAC;QAEzE,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAExB,6DAA6D;YAC7D,MAAM,gBAAgB,GAAG,OAAO,CAAC,eAAe;gBAC9C,CAAC,CAAC,OAAO,CAAC,eAAe;qBACpB,KAAK,CAAC,QAAQ,CAAC;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;qBAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC9B,CAAC,CAAC,EAAE,CAAC;YAEP,MAAM,cAAc,GAAG,MAAM,4BAA4B,CACvD,iBAAiB,CAAC,SAAS,EAC3B,OAAO,CAAC,cAAc,EACtB,gBAAgB,EAChB,SAAS,CACV,CAAC;YAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC;YACzC,4EAA4E;YAC5E,MAAM,YAAY,GAAG,cAAc,CAAC,MAAM,CACxC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,wBAAwB,CAAC,KAAK,EACxD,CAAC,CACF,CAAC;YACF,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,4BAA4B;YACjG,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE1D,MAAM,oBAAoB,GAAG;gBAC3B,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;gBACrC,SAAS;aACc,CAAC;YAE1B,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM;gBACN,cAAc;gBACd,oBAAoB;gBACpB,wBAAwB,EAAE;oBACxB,KAAK,EAAE,YAAY;oBACnB,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAC;YAC/D,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;oBACrC,SAAS;iBACV;gBACD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC","sourcesContent":["import { EvaluationResult, EvaluationRequest } from '../../types';\nimport { loadSemanticModel } from './model-loader';\nimport { evaluateKeywordsSemantically } from './evaluate-keywords';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { EvaluationParameters } from '../../../../types/evaluation';\nimport {\n DEFAULT_SEMANTIC_PASS_SCORE,\n EvaluationApproach,\n} from '../../constants';\n\nexport class SemanticEvaluator {\n // TODO(LLM-39): Refactor SemanticEvaluator into a singleton pattern.\n private static extractor: FeatureExtractionPipeline = null;\n\n async initialize(): Promise<void> {\n if (SemanticEvaluator.extractor) return;\n try {\n SemanticEvaluator.extractor = await loadSemanticModel();\n } catch (error) {\n console.error('Failed to load semantic evaluation model:', error);\n throw error;\n }\n }\n\n async performEvaluation(\n request: EvaluationRequest,\n ): Promise<EvaluationResult> {\n const threshold =\n request.evaluationParameters?.threshold ?? DEFAULT_SEMANTIC_PASS_SCORE;\n\n try {\n await this.initialize();\n\n // Split expectedOutcome by newlines to create keywords array\n const expectedKeywords = request.expectedOutcome\n ? request.expectedOutcome\n .split(/[\\n,]+/)\n .map(k => k.trim())\n .filter(k => k.length > 0)\n : [];\n\n const keywordMatches = await evaluateKeywordsSemantically(\n SemanticEvaluator.extractor,\n request.actualResponse,\n expectedKeywords,\n threshold,\n );\n\n const totalItems = keywordMatches.length;\n // calculate the overall score by averaging the score of the keyword matches\n const keywordScore = keywordMatches.reduce(\n (acc, curr) => acc + curr.evaluationApproachResult.score,\n 0,\n );\n const overallScore = totalItems > 0 ? keywordScore / totalItems : 0; // to avoid division by zero\n const passed = keywordMatches.every(match => match.found);\n\n const evaluationParameters = {\n approach: EvaluationApproach.SEMANTIC,\n threshold,\n } as EvaluationParameters;\n\n return {\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n evaluationParameters,\n evaluationApproachResult: {\n score: overallScore,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n } catch (error) {\n console.error('Failed to perform semantic evaluation:', error);\n return {\n testCaseId: request.testCaseId,\n passed: false,\n keywordMatches: [],\n evaluationParameters: {\n approach: EvaluationApproach.SEMANTIC,\n threshold,\n },\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n }\n }\n}\n"]}

package/dist/collection/lib/evaluation/field-evaluation-approach.js ADDED Viewed

@@ -0,0 +1,24 @@
+import { EvaluationApproach, EvaluationApproachValues } from "./constants";
+const SELECT_ONLY_APPROACHES = [EvaluationApproach.EXACT];
+export function getAllowedApproachesForFieldType(fieldType) {
+    if (fieldType === 'select') {
+        return SELECT_ONLY_APPROACHES;
+    }
+    return EvaluationApproachValues;
+}
+export function isApproachAllowedForFieldType(fieldType, approach) {
+    return getAllowedApproachesForFieldType(fieldType).includes(approach);
+}
+export function normalizeEvaluationParametersForField(fieldType, evaluationParameters) {
+    const allowedApproaches = getAllowedApproachesForFieldType(fieldType);
+    const fallbackApproach = allowedApproaches[0];
+    const rawApproach = evaluationParameters?.approach;
+    const approach = rawApproach && allowedApproaches.includes(rawApproach)
+        ? rawApproach
+        : fallbackApproach;
+    return {
+        ...evaluationParameters,
+        approach,
+    };
+}
+//# sourceMappingURL=field-evaluation-approach.js.map

package/dist/collection/lib/evaluation/field-evaluation-approach.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"field-evaluation-approach.js","sourceRoot":"","sources":["../../../src/lib/evaluation/field-evaluation-approach.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,aAAa,CAAC;AAK3E,MAAM,sBAAsB,GAAyB,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;AAEhF,MAAM,UAAU,gCAAgC,CAC9C,SAA8B;IAE9B,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,OAAO,sBAAsB,CAAC;IAChC,CAAC;IACD,OAAO,wBAAwB,CAAC;AAClC,CAAC;AAED,MAAM,UAAU,6BAA6B,CAC3C,SAA8B,EAC9B,QAA4B;IAE5B,OAAO,gCAAgC,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,UAAU,qCAAqC,CACnD,SAA8B,EAC9B,oBAA2C;IAE3C,MAAM,iBAAiB,GAAG,gCAAgC,CAAC,SAAS,CAAC,CAAC;IACtE,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,CAAC,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,oBAAoB,EAAE,QAAQ,CAAC;IACnD,MAAM,QAAQ,GACZ,WAAW,IAAI,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC;QACpD,CAAC,CAAC,WAAW;QACb,CAAC,CAAC,gBAAgB,CAAC;IAEvB,OAAO;QACL,GAAG,oBAAoB;QACvB,QAAQ;KACT,CAAC;AACJ,CAAC","sourcesContent":["import { EvaluationApproach, EvaluationApproachValues } from './constants';\nimport type { EvaluationParameters } from '../../types/evaluation';\n\nexport type EvaluationFieldType = 'text' | 'textarea' | 'chips-input' | 'select';\n\nconst SELECT_ONLY_APPROACHES: EvaluationApproach[] = [EvaluationApproach.EXACT];\n\nexport function getAllowedApproachesForFieldType(\n fieldType: EvaluationFieldType,\n): EvaluationApproach[] {\n if (fieldType === 'select') {\n return SELECT_ONLY_APPROACHES;\n }\n return EvaluationApproachValues;\n}\n\nexport function isApproachAllowedForFieldType(\n fieldType: EvaluationFieldType,\n approach: EvaluationApproach,\n): boolean {\n return getAllowedApproachesForFieldType(fieldType).includes(approach);\n}\n\nexport function normalizeEvaluationParametersForField(\n fieldType: EvaluationFieldType,\n evaluationParameters?: EvaluationParameters,\n): EvaluationParameters {\n const allowedApproaches = getAllowedApproachesForFieldType(fieldType);\n const fallbackApproach = allowedApproaches[0];\n const rawApproach = evaluationParameters?.approach;\n const approach =\n rawApproach && allowedApproaches.includes(rawApproach)\n ? rawApproach\n : fallbackApproach;\n\n return {\n ...evaluationParameters,\n approach,\n };\n}\n\n"]}

package/dist/collection/lib/evaluation/index.js CHANGED Viewed

@@ -1,7 +1,3 @@
 import { LLMEvaluationEngine } from "./evaluation-engine";
 export { LLMEvaluationEngine };
-export async function evaluateLLMResponse(request, callback) {
-    const engine = new LLMEvaluationEngine();
-    await engine.evaluateResponse(request, callback);
-}
 //# sourceMappingURL=index.js.map

package/dist/collection/lib/evaluation/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/evaluation/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAQ1D,OAAO,EAAE,mBAAmB,EAAE,CAAC~~;AAQ/B,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,OAA0B,EAC1B,QAA4B;IAE5B,MAAM,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IACzC,MAAM,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;AACnD,CAAC~~","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n} from './types';\n\nexport { LLMEvaluationEngine };\nexport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n};\n\nexport async function evaluateLLMResponse(\n request: EvaluationRequest,\n callback: EvaluationCallback,\n): Promise<void> {\n const engine = new LLMEvaluationEngine();\n await engine.evaluateResponse(request, callback);\n}\n"]}
1	+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/evaluation/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAQ1D,OAAO,EAAE,mBAAmB,EAAE,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n} from './types';\n\nexport { LLMEvaluationEngine };\nexport type {\n EvaluationRequest,\n EvaluationResult,\n KeywordMatch,\n EvaluationCallback,\n};"]}

package/dist/collection/lib/evaluation/types.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n timestamp?: string;\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\nimport type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface FieldEvaluationInput {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationRequestV2 {\n testCaseId: string;\n question: string;\n actualResponse: string;\n fields: FieldEvaluationInput[];\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n fieldResults?: FieldEvaluationResult[];\n timestamp?: string;\n evaluationParameters?: EvaluationParameters;\n evaluationApproachResult?: EvaluationApproachResult;\n}\n\nexport interface FieldEvaluationResult {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n error?: string;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}

package/dist/collection/lib/import-export/test-results-csv.js CHANGED Viewed

@@ -1,4 +1,3 @@
-import { serializeExpectedOutcome } from "../expected-outcome-serializer";
 /**
  * Escapes a CSV field by wrapping it in quotes if it contains special characters
  * @param field - The field to escape
@@ -17,48 +16,63 @@ export function escapeCsvField(field) {
  */
 export function exportTestResultsToCsv(testCases) {
     const csvRows = [];
+    const maxFieldCount = testCases.reduce((max, testCase) => Math.max(max, (testCase.expectedOutcome || []).length), 0);
     // Add header row
     const headers = [
         'Question',
-        'Expected Keywords',
-        'Generated Keywords',
-        'Keywords Match',
         'Response Time (s)',
-        'Evaluation Approach',
-        'Evaluation Score',
     ];
+    for (let i = 1; i <= maxFieldCount; i++) {
+        headers.push('Field Name');
+        headers.push('Expected Keywords');
+        headers.push('Generated Keywords');
+        headers.push('Evaluation Strategy');
+        headers.push('Passed Evaluation');
+        headers.push('Keyword Match');
+        headers.push('Score');
+        if (i < maxFieldCount) {
+            headers.push('');
+        }
+    }
     csvRows.push(headers.join(','));
-    // Add data rows
+    // Add data rows (one row per test case)
     testCases.forEach(testCase => {
-        const expectedOutcome = serializeExpectedOutcome(testCase.expectedOutcome || [], ' | ');
-        const evaluationApproach = testCase.evaluationParameters?.approach || '';
-        const score = testCase.evaluationResult?.evaluationApproachResult?.score;
-        const evaluationScore = score !== undefined ? score.toString() : '';
-        let generatedKeywords = '';
-        let keywordsMatch = '';
-        if (testCase.evaluationResult) {
-            const foundKeywords = testCase.evaluationResult.keywordMatches
-                .filter(match => match.found)
-                .map(match => match.keyword);
-            generatedKeywords = foundKeywords.join('; ');
-            // Calculate match percentages
-            const keywordMatchCount = testCase.evaluationResult.keywordMatches.filter(m => m.found).length;
-            const totalKeywords = testCase.evaluationResult.keywordMatches.length;
-            keywordsMatch =
-                totalKeywords > 0 ? `${keywordMatchCount}/${totalKeywords}` : 'N/A';
-        }
         const responseTime = testCase.responseTime
             ? (testCase.responseTime / 1000).toFixed(3)
             : 'N/A';
-        const row = [
-            escapeCsvField(testCase.question),
-            escapeCsvField(expectedOutcome),
-            escapeCsvField(generatedKeywords),
-            keywordsMatch,
-            responseTime,
-            escapeCsvField(evaluationApproach),
-            escapeCsvField(evaluationScore),
-        ];
+        const row = [escapeCsvField(testCase.question), responseTime];
+        for (let i = 0; i < maxFieldCount; i++) {
+            const field = testCase.expectedOutcome?.[i];
+            const fieldResult = testCase.evaluationResult?.fieldResults?.find(result => result.index === i);
+            const expectedKeywords = fieldResult?.expectedValue ??
+                (field
+                    ? field.type === 'chips-input'
+                        ? field.value.join(', ')
+                        : field.value
+                    : '');
+            const generatedKeywords = (fieldResult?.keywordMatches || [])
+                .filter(match => match.found)
+                .map(match => match.keyword)
+                .join('; ');
+            const matchedCount = (fieldResult?.keywordMatches || []).filter(match => match.found).length;
+            const totalMatches = fieldResult?.keywordMatches?.length || 0;
+            const keywordMatch = totalMatches > 0 ? `${matchedCount}/${totalMatches}` : '';
+            const score = fieldResult?.evaluationApproachResult?.score !== undefined
+                ? fieldResult.evaluationApproachResult.score.toFixed(2)
+                : '';
+            row.push(escapeCsvField(field?.label || ''));
+            row.push(escapeCsvField(expectedKeywords || ''));
+            row.push(escapeCsvField(generatedKeywords));
+            row.push(escapeCsvField(fieldResult?.evaluationParameters.approach ||
+                field?.evaluationParameters?.approach ||
+                ''));
+            row.push(fieldResult ? (fieldResult.passed ? 'TRUE' : 'FALSE') : '');
+            row.push(keywordMatch);
+            row.push(score);
+            if (i < maxFieldCount - 1) {
+                row.push('');
+            }
+        }
         csvRows.push(row.join(','));
     });
     return csvRows.join('\n');

package/dist/collection/lib/import-export/test-results-csv.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-results-csv.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-results-csv.ts"],"names":[],"mappings":"~~AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,gCAAgC,CAAC;AAE1E~~;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;IAC1C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAqB;IAC1D,MAAM,OAAO,GAAa,EAAE,CAAC;~~IAE7B~~,iBAAiB;IACjB,MAAM,OAAO,~~GAAG~~;~~QACd~~,UAAU;QACV,mBAAmB;~~QACnB~~,~~oBAAoB~~;~~QACpB~~,~~gBAAgB~~;~~QAChB~~,mBAAmB;~~QACnB~~,qBAAqB;~~QACrB~~,~~kBAAkB~~;~~KACnB~~,CAAC;~~IACF~~,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAEhC,~~gBAAgB~~;~~IAChB~~,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;QAC3B,MAAM,~~eAAe~~,GAAG,~~wBAAwB~~,~~CAC9C~~,QAAQ,CAAC,~~eAAe~~,IAAI,~~EAAE~~,~~EAC9B~~,~~KAAK~~,~~CACN~~,CAAC;~~QAEF~~,MAAM,~~kBAAkB,~~GAAG,QAAQ,CAAC,~~oBAAoB~~,EAAE,~~QAAQ~~,IAAI,EAAE,CAAC;~~QACzE~~,MAAM,KAAK,GAAG,QAAQ,CAAC,~~gBAAgB~~,EAAE,~~wBAAwB~~,~~EAAE~~,~~KAAK~~,CAAC;~~QACzE~~,MAAM,~~eAAe~~,GAAG,~~KAAK~~,~~KAAK~~,~~SAAS~~,CAAC,CAAC,CAAC,KAAK,CAAC,~~QAAQ~~,EAAE,CAAC,CAAC,CAAC,~~EAAE~~,CAAC~~;QAEpE~~,IAAI,~~iBAAiB~~,~~GAAG~~,~~EAAE~~,CAAC~~;QAC3B~~,~~IAAI~~,~~aAAa~~,~~GAAG~~,~~EAAE~~,CAAC~~;QAEvB~~,IAAI,~~QAAQ~~,CAAC,~~gBAAgB~~,EAAE,CAAC;~~YAC9B~~,MAAM,~~aAAa~~,GAAG,~~QAAQ,~~CAAC,~~gBAAgB~~,~~CAAC~~,cAAc;~~iBAC3D~~,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC;iBAC5B,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC~~,CAAC~~;~~YAE/B~~,~~iBAAiB,GAAG,aAAa,CAAC,~~IAAI,CAAC,IAAI,CAAC,CAAC;~~YAE7C~~,~~8BAA8B;YAC9B,~~MAAM,~~iBAAiB~~,GAAG,~~QAAQ,~~CAAC,~~gBAAgB~~,~~CAAC~~,cAAc,CAAC,MAAM,~~CACvE~~,~~CAAC~~,CAAC,EAAE,CAAC,~~CAAC~~,CAAC,KAAK,~~CACb~~,CAAC,MAAM,CAAC;YACT,MAAM,~~aAAa~~,GAAG,~~QAAQ~~,~~CAAC~~,~~gBAAgB,CAAC,~~cAAc,~~CAAC~~,MAAM,CAAC~~;YAEtE~~,~~aAAa~~;~~gBACX~~,~~aAAa~~,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,~~iBAAiB~~,IAAI,~~aAAa~~,EAAE,CAAC,CAAC,CAAC,~~KAAK~~,CAAC;~~QACxE~~,CAAC;~~QAED~~,~~MAAM~~,~~YAAY~~,GAAG,~~QAAQ~~,CAAC,~~YAAY~~;~~YACxC~~,CAAC,CAAC,CAAC,~~QAAQ~~,CAAC,~~YAAY~~,GAAG,IAAI,CAAC,CAAC,~~OAAO~~,CAAC,CAAC,CAAC;~~YAC3C~~,CAAC,CAAC,KAAK,CAAC;~~QAEV~~,MAAM,~~GAAG~~,~~GAAG;YACV~~,~~cAAc~~,CAAC,~~QAAQ~~,CAAC,~~QAAQ~~,CAAC;~~YACjC~~,~~cAAc~~,CAAC,~~eAAe~~,CAAC;~~YAC/B~~,~~cAAc~~,CAAC,~~iBAAiB~~,CAAC;~~YACjC~~,aAAa~~;YACb~~,~~YAAY~~;~~YACZ~~,~~cAAc~~,CAAC,~~kBAAkB~~,CAAC~~;YAClC~~,~~cAAc~~,CAAC,~~eAAe~~,CAAC;~~SAChC~~,CAAC;~~QAEF~~,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC","sourcesContent":["import { TestCase } from '../../types/llm-test-runner';\~~nimport { serializeExpectedOutcome } from '../expected-outcome-serializer';\~~n\n/*\n Escapes a CSV field by wrapping it in quotes if it contains special characters\n * @param field - The field to escape\n * @returns Escaped field string\n /\nexport function escapeCsvField(field: string): string {\n if (field.includes(',') \|\| field.includes('\"') \|\| field.includes('\\n')) {\n return `\"${field.replace(/\"/g, '\"\"')}\"`;\n }\n return field;\n}\n\n/\n Exports test results to a CSV string\n * @param testCases - Array of test cases with results to export\n * @returns CSV string representation of the test results\n */\nexport function exportTestResultsToCsv(testCases: TestCase[]): string {\n const csvRows: string[] = [];\n\n // Add header row\n const headers = [\n 'Question',\n 'Expected Keywords',\n 'Generated Keywords',\n '~~Keywords~~ ~~Match~~',\n '~~Response~~ ~~Time (s~~)~~',\~~n '~~Evaluation~~ ~~Approach~~',\n '~~Evaluation~~ Score',\n ];\n csvRows.push(headers.join(','));\n\n // Add data rows\n testCases.forEach(testCase => {\n const ~~expectedOutcome~~ = ~~serializeExpectedOutcome(~~\n testCase.~~expectedOutcome~~ \|\| ~~[],\~~n ' \| ',\n );\n\n ~~const~~ ~~evaluationApproach~~ = ~~testCase.evaluationParameters?.approach~~ \|\| ~~'';\~~n const ~~score~~ = testCase.~~evaluationResult~~?.~~evaluationApproachResult?.score~~;\n const ~~evaluationScore~~ = ~~score~~ ~~!==~~ ~~undefined~~ ? ~~score.toString(~~) ~~: ''~~;\n \n ~~let~~ ~~generatedKeywords~~ = ~~'';\~~n ~~let~~ ~~keywordsMatch~~ = ''~~;\n~~\n if (~~testCase.evaluationResult~~) {\n const ~~foundKeywords~~ = ~~testCase.evaluationResult.~~keywordMatches\n .filter(match => match.found)\n .map(match => match.keyword)~~;\n~~\n ~~generatedKeywords = foundKeywords~~.join('; ');\n\n ~~// Calculate match percentages\n~~ const ~~keywordMatchCount~~ = ~~testCase.evaluationResult.~~keywordMatches.filter(\n m => m.found,\n ).length;\n const ~~totalKeywords~~ = ~~testCase.evaluationResult.~~keywordMatches.length;\n\n ~~keywordsMatch~~ ~~=\n totalKeywords~~ > 0 ? `${~~keywordMatchCount~~}/${~~totalKeywords~~}` : '~~N/A~~';\n ~~}\n\n~~ const ~~responseTime~~ = ~~testCase.responseTime~~\n ? ~~(testCase~~.~~responseTime / 1000)~~.toFixed(3)\n : '~~N/A~~';\n\n ~~const~~ row = [\n escapeCsvField(~~testCase.question~~),\n escapeCsvField(~~expectedOutcome~~),\n escapeCsvField(~~generatedKeywords~~),\n ~~keywordsMatch,\~~n ~~responseTime,\~~n ~~escapeCsvField~~(~~evaluationApproach~~),\n ~~escapeCsvField~~(~~evaluationScore~~),\n ];\n\n csvRows.push(row.join(','));\n });\n\n return csvRows.join('\\n');\n}\n\n"]}
1	+ {"version":3,"file":"test-results-csv.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-results-csv.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;IAC1C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CAAC,SAAqB;IAC1D,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,aAAa,GAAG,SAAS,CAAC,MAAM,CACpC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,EACzE,CAAC,CACF,CAAC;IAEF,iBAAiB;IACjB,MAAM,OAAO,GAAa;QACxB,UAAU;QACV,mBAAmB;KACpB,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC3B,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAClC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QACnC,OAAO,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACpC,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAClC,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,IAAI,CAAC,GAAG,aAAa,EAAE,CAAC;YACtB,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAEhC,wCAAwC;IACxC,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE;QAC3B,MAAM,YAAY,GAAG,QAAQ,CAAC,YAAY;YACxC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YAC3C,CAAC,CAAC,KAAK,CAAC;QACV,MAAM,GAAG,GAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,CAAC;QAExE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,gBAAgB,EAAE,YAAY,EAAE,IAAI,CAC/D,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,KAAK,CAAC,CAC7B,CAAC;YAEF,MAAM,gBAAgB,GACpB,WAAW,EAAE,aAAa;gBAC1B,CAAC,KAAK;oBACJ,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,aAAa;wBAC5B,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;wBACxB,CAAC,CAAC,KAAK,CAAC,KAAK;oBACf,CAAC,CAAC,EAAE,CAAC,CAAC;YACV,MAAM,iBAAiB,GAAG,CAAC,WAAW,EAAE,cAAc,IAAI,EAAE,CAAC;iBAC1D,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC;iBAC5B,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC;iBAC3B,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,MAAM,YAAY,GAAG,CAAC,WAAW,EAAE,cAAc,IAAI,EAAE,CAAC,CAAC,MAAM,CAC7D,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CACrB,CAAC,MAAM,CAAC;YACT,MAAM,YAAY,GAAG,WAAW,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC,CAAC;YAC9D,MAAM,YAAY,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,YAAY,IAAI,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/E,MAAM,KAAK,GACT,WAAW,EAAE,wBAAwB,EAAE,KAAK,KAAK,SAAS;gBACxD,CAAC,CAAC,WAAW,CAAC,wBAAwB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBACvD,CAAC,CAAC,EAAE,CAAC;YAET,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7C,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,CAAC;YACjD,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,iBAAiB,CAAC,CAAC,CAAC;YAC5C,GAAG,CAAC,IAAI,CACN,cAAc,CACZ,WAAW,EAAE,oBAAoB,CAAC,QAAQ;gBACxC,KAAK,EAAE,oBAAoB,EAAE,QAAQ;gBACrC,EAAE,CACL,CACF,CAAC;YACF,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACrE,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAEhB,IAAI,CAAC,GAAG,aAAa,GAAG,CAAC,EAAE,CAAC;gBAC1B,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5B,CAAC","sourcesContent":["import { TestCase } from '../../types/llm-test-runner';\n\n/*\n Escapes a CSV field by wrapping it in quotes if it contains special characters\n * @param field - The field to escape\n * @returns Escaped field string\n /\nexport function escapeCsvField(field: string): string {\n if (field.includes(',') \|\| field.includes('\"') \|\| field.includes('\\n')) {\n return `\"${field.replace(/\"/g, '\"\"')}\"`;\n }\n return field;\n}\n\n/\n Exports test results to a CSV string\n * @param testCases - Array of test cases with results to export\n * @returns CSV string representation of the test results\n */\nexport function exportTestResultsToCsv(testCases: TestCase[]): string {\n const csvRows: string[] = [];\n const maxFieldCount = testCases.reduce(\n (max, testCase) => Math.max(max, (testCase.expectedOutcome \|\| []).length),\n 0,\n );\n\n // Add header row\n const headers: string[] = [\n 'Question',\n 'Response Time (s)',\n ];\n for (let i = 1; i <= maxFieldCount; i++) {\n headers.push('Field Name');\n headers.push('Expected Keywords');\n headers.push('Generated Keywords');\n headers.push('Evaluation Strategy');\n headers.push('Passed Evaluation');\n headers.push('Keyword Match');\n headers.push('Score');\n if (i < maxFieldCount) {\n headers.push('');\n }\n }\n csvRows.push(headers.join(','));\n\n // Add data rows (one row per test case)\n testCases.forEach(testCase => {\n const responseTime = testCase.responseTime\n ? (testCase.responseTime / 1000).toFixed(3)\n : 'N/A';\n const row: string[] = [escapeCsvField(testCase.question), responseTime];\n\n for (let i = 0; i < maxFieldCount; i++) {\n const field = testCase.expectedOutcome?.[i];\n const fieldResult = testCase.evaluationResult?.fieldResults?.find(\n result => result.index === i,\n );\n\n const expectedKeywords =\n fieldResult?.expectedValue ??\n (field\n ? field.type === 'chips-input'\n ? field.value.join(', ')\n : field.value\n : '');\n const generatedKeywords = (fieldResult?.keywordMatches \|\| [])\n .filter(match => match.found)\n .map(match => match.keyword)\n .join('; ');\n const matchedCount = (fieldResult?.keywordMatches \|\| []).filter(\n match => match.found,\n ).length;\n const totalMatches = fieldResult?.keywordMatches?.length \|\| 0;\n const keywordMatch = totalMatches > 0 ? `${matchedCount}/${totalMatches}` : '';\n const score =\n fieldResult?.evaluationApproachResult?.score !== undefined\n ? fieldResult.evaluationApproachResult.score.toFixed(2)\n : '';\n\n row.push(escapeCsvField(field?.label \|\| ''));\n row.push(escapeCsvField(expectedKeywords \|\| ''));\n row.push(escapeCsvField(generatedKeywords));\n row.push(\n escapeCsvField(\n fieldResult?.evaluationParameters.approach \|\|\n field?.evaluationParameters?.approach \|\|\n '',\n ),\n );\n row.push(fieldResult ? (fieldResult.passed ? 'TRUE' : 'FALSE') : '');\n row.push(keywordMatch);\n row.push(score);\n\n if (i < maxFieldCount - 1) {\n row.push('');\n }\n }\n\n csvRows.push(row.join(','));\n });\n\n return csvRows.join('\\n');\n}\n\n"]}

package/dist/collection/lib/import-export/test-suite-exporter.js CHANGED Viewed

@@ -8,7 +8,6 @@ export function formatTestSuiteAsJson(testCases) {
         id: testCase.id,
         question: testCase.question,
         expectedOutcome: testCase.expectedOutcome,
-        evaluationParameters: testCase.evaluationParameters,
     }));
     return JSON.stringify(exportData, null, 2);
 }

package/dist/collection/lib/import-export/test-suite-exporter.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-suite-exporter.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-exporter.ts"],"names":[],"mappings":"~~AAYA~~;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,SAAqB;IACzD,MAAM,UAAU,GAA0B,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACnE,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,eAAe,EAAE,QAAQ,CAAC,eAAe;~~QAEzC~~,~~oBAAoB,EAAE,QAAQ,~~CAAC,~~oBAAoB;KACpD,~~CAAC,CAAC~~,CAAC~~;IAEJ,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC7C,CAAC","sourcesContent":["import { ExpectedOutcomeField, TestCase } from '../../types/llm-test-runner';\n\nexport interface TestSuiteExportData {\n id: string;\n question: string;\n expectedOutcome: ExpectedOutcomeField[];\n ~~evaluationParameters?: {\n approach: string;\n threshold?: number;\n~~ }~~;\n}~~\n\n/*\n Formats test cases as a JSON string suitable for saving as a test suite\n * @param testCases - Array of test cases to format\n * @returns JSON string representation of the test suite\n */\nexport function formatTestSuiteAsJson(testCases: TestCase[]): string {\n const exportData: TestSuiteExportData[] = testCases.map(testCase => ({\n id: testCase.id,\n question: testCase.question,\n expectedOutcome: testCase.expectedOutcome,\n~~\n evaluationParameters: testCase.evaluationParameters,\n~~ }));\n\n return JSON.stringify(exportData, null, 2);\n}\n"]}
1	+ {"version":3,"file":"test-suite-exporter.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-exporter.ts"],"names":[],"mappings":"AAQA;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,SAAqB;IACzD,MAAM,UAAU,GAA0B,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACnE,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,eAAe,EAAE,QAAQ,CAAC,eAAe;KAC1C,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC7C,CAAC","sourcesContent":["import { ExpectedOutcomeField, TestCase } from '../../types/llm-test-runner';\n\nexport interface TestSuiteExportData {\n id: string;\n question: string;\n expectedOutcome: ExpectedOutcomeField[];\n}\n\n/*\n Formats test cases as a JSON string suitable for saving as a test suite\n * @param testCases - Array of test cases to format\n * @returns JSON string representation of the test suite\n */\nexport function formatTestSuiteAsJson(testCases: TestCase[]): string {\n const exportData: TestSuiteExportData[] = testCases.map(testCase => ({\n id: testCase.id,\n question: testCase.question,\n expectedOutcome: testCase.expectedOutcome,\n }));\n\n return JSON.stringify(exportData, null, 2);\n}\n"]}

package/dist/collection/lib/test-cases/test-case-factory.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { v4 as uuidv4 } from "uuid";
-import { EvaluationApproach } from "../evaluation/constants";
+import { normalizeEvaluationParametersForField } from "../evaluation/field-evaluation-approach";
 export const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
     {
         type: 'textarea',
@@ -8,6 +8,12 @@ export const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
         rows: 2,
     },
 ];
+function normalizeExpectedOutcomeField(field) {
+    return {
+        ...field,
+        evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
+    };
+}
 /**
  * Creates a new test case with default values
  * @returns A new TestCase object with a unique ID
@@ -17,9 +23,6 @@ export function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_
         id: uuidv4(),
         question: '',
         expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
-        evaluationParameters: {
-            approach: EvaluationApproach.EXACT,
-        },
         isRunning: false,
     };
 }
@@ -29,35 +32,35 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
             return {
                 type: 'text',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 value: '',
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         case 'textarea':
             return {
                 type: 'textarea',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 rows: schemaField.rows,
                 value: '',
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         case 'chips-input':
             return {
                 type: 'chips-input',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 value: [],
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         case 'select':
             return {
                 type: 'select',
                 label: schemaField.label,
-                required: schemaField.required,
                 placeholder: schemaField.placeholder,
                 value: '',
                 options: schemaField.options,
+                evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
             };
         default: {
             const _exhaustiveCheck = schemaField;
@@ -68,31 +71,18 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
 export function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
     return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);
 }
-export function migrateLegacyExpectedOutcomeString(value) {
-    return [
-        {
-            type: 'textarea',
-            label: 'Expected Outcome',
-            value,
-        },
-    ];
-}
 /**
  * Creates a runtime test case from validated input data.
- * The input is expected to already satisfy `TestCaseInput` (legacy string or v2 shape),
- * and this function only performs normalization/defaulting (including legacy migration).
+ * The input is expected to already satisfy `TestCaseInput`,
+ * and this function only performs normalization/defaulting.
  *
  * @param data - Validated test case input
  * @returns A normalized TestCase object with runtime defaults applied
  */
 export function createTestCaseFromInput(data) {
-    let expectedOutcome;
-    if (typeof data.expectedOutcome === 'string') {
-        expectedOutcome = migrateLegacyExpectedOutcomeString(data.expectedOutcome);
-    }
-    else {
-        expectedOutcome = data.expectedOutcome;
-    }
-    return { ...data, expectedOutcome };
+    return {
+        ...data,
+        expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
+    };
 }
 //# sourceMappingURL=test-case-factory.js.map

package/dist/collection/lib/test-cases/test-case-factory.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;~~AAQpC~~,OAAO,EAAE,~~kBAAkB~~,EAAE,MAAM,~~yBAAyB~~,CAAC;~~AAE7D~~,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,~~oBAAoB,EAAE;YACpB,QAAQ,EAAE,kBAAkB,CAAC,KAAK;SACnC;QACD,~~SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,~~QAAQ~~,EAAE,WAAW,CAAC,~~QAAQ~~;~~gBAC9B~~,~~WAAW~~,EAAE,WAAW,CAAC,WAAW~~;gBACpC~~,~~KAAK~~,~~EAAE~~,~~EAAE~~;~~aACV~~,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,~~QAAQ,EAAE,~~WAAW,~~CAAC,QAAQ;gBAC9B,WAAW,~~EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;~~aACV~~,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,~~QAAQ~~,EAAE,WAAW,CAAC,~~QAAQ~~;~~gBAC9B~~,~~WAAW~~,EAAE,WAAW,CAAC,WAAW~~;gBACpC~~,~~KAAK~~,~~EAAE~~,~~EAAE~~;~~aACV~~,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,~~QAAQ,EAAE,~~WAAW,~~CAAC,QAAQ;gBAC9B,WAAW,~~EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,WAAW,CAAC,OAAO;~~aAC7B~~,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED~~,MAAM,UAAU,kCAAkC,CAChD,KAAa;IAEb,OAAO;QACL;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,kBAAkB;YACzB,KAAK;SACN;KACF,CAAC;AACJ,CAAC;AAED~~;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,~~IAAI,eAAuC,CAAC~~;~~IAC5C~~,~~IAAI~~,~~OAAO,~~IAAI,~~CAAC,~~eAAe,~~KAAK,QAAQ,~~EAAE,~~CAAC;QAC7C,eAAe,GAAG,kCAAkC,CAAC,~~IAAI,CAAC,eAAe,CAAC,~~CAAC;IAC7E,CAAC;SAAM,CAAC;QACN,eAAe,~~GAAG,~~IAAI,~~CAAC,~~eAAe~~,CAAC;~~IACzC~~,CAAC;~~IAED~~,~~OAAO,EAAE,GAAG,IAAI,EAAE,eAAe,EAAE,~~CAAC~~;AACtC,CAAC~~","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\n/*\n Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n /\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n ~~evaluationParameters: {\n approach: EvaluationApproach.EXACT,\n },\n~~ isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n ~~required: schemaField.required,\n~~ placeholder: schemaField.placeholder,\n value: '',\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n ~~required: schemaField.required,\n~~ placeholder: schemaField.placeholder,\n rows: schemaField.rows,\n value: '',\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n ~~required: schemaField.required,\n~~ placeholder: schemaField.placeholder,\n value: [],\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n ~~required: schemaField.required,\n~~ placeholder: schemaField.placeholder,\n value: '',\n options: schemaField.options,\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\~~nexport function migrateLegacyExpectedOutcomeString(\~~n ~~value: string,\n): ExpectedOutcomeField[] {\n return [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n value,\n },\n ];\n}\n\n~~/\n Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput~~` (legacy string or v2 shape),\~~n * and this function only performs normalization/defaulting ~~(including legacy migration)~~.\n \n @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n ~~let~~ ~~expectedOutcome: ExpectedOutcomeField[];\n if (typeof data.expectedOutcome === 'string')~~ {\n ~~expectedOutcome = migrateLegacyExpectedOutcomeString(~~data~~.expectedOutcome);\~~n ~~} else {\n~~ expectedOutcome = data.expectedOutcome;\n }~~\n\n return { ...data, expectedOutcome }~~;\n}\n"]}
1	+ {"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/*\n Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n /\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/\n Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n \n @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}

package/dist/collection/lib/test-cases/test-case-mutations.js CHANGED Viewed

@@ -1,16 +1,67 @@
+import { normalizeEvaluationParametersForField } from "../evaluation/field-evaluation-approach";
+export function applyExpectedOutcomeChange(testCase, change) {
+    const { index } = change;
+    const expectedOutcome = [...(testCase.expectedOutcome || [])];
+    const target = expectedOutcome[index];
+    if (!target) {
+        return testCase;
+    }
+    switch (change.operation) {
+        case 'set-value': {
+            if (target.type === 'chips-input') {
+                return testCase;
+            }
+            expectedOutcome[index] = {
+                ...target,
+                value: change.value,
+            };
+            return { ...testCase, expectedOutcome };
+        }
+        case 'add-chip': {
+            if (target.type !== 'chips-input') {
+                return testCase;
+            }
+            expectedOutcome[index] = {
+                ...target,
+                value: [...target.value, change.value],
+            };
+            return { ...testCase, expectedOutcome };
+        }
+        case 'remove-chip': {
+            if (target.type !== 'chips-input') {
+                return testCase;
+            }
+            expectedOutcome[index] = {
+                ...target,
+                value: target.value.filter(chip => chip !== change.value),
+            };
+            return { ...testCase, expectedOutcome };
+        }
+        case 'set-evaluation-approach':
+            return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
+    }
+}
 /**
- * Updates the evaluation approach for a test case
- * @param testCase - The test case to update
- * @param approach - The new evaluation approach
- * @returns Updated test case with the new evaluation approach
+ * Updates the evaluation approach for a specific expected outcome field.
+ * Select fields always use exact matching.
  */
-export function updateApproach(testCase, approach) {
+export function updateExpectedOutcomeFieldApproach(testCase, fieldIndex, approach) {
+    const expectedOutcome = [...(testCase.expectedOutcome || [])];
+    const target = expectedOutcome[fieldIndex];
+    if (!target) {
+        return testCase;
+    }
+    const currentEvaluationParameters = target.evaluationParameters;
+    expectedOutcome[fieldIndex] = {
+        ...target,
+        evaluationParameters: normalizeEvaluationParametersForField(target.type, {
+            ...currentEvaluationParameters,
+            approach,
+        }),
+    };
     return {
         ...testCase,
-        evaluationParameters: {
-            ...testCase.evaluationParameters,
-            approach: approach,
-        },
+        expectedOutcome,
     };
 }
 //# sourceMappingURL=test-case-mutations.js.map

package/dist/collection/lib/test-cases/test-case-mutations.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"~~AAGA;;;;;GAKG~~;AACH,MAAM,UAAU,~~cAAc~~,~~CAC5B~~,QAAkB,EAClB,QAA4B;IAE5B,~~OAAO~~;~~QACL~~,GAAG,QAAQ;~~QACX~~,oBAAoB,EAAE;~~YACpB~~,GAAG,QAAQ,CAAC,~~oBAAoB~~;~~YAChC~~,~~QAAQ~~,~~EAAE~~,QAAQ;~~SACnB~~;~~KACF~~,CAAC;AACJ,CAAC","sourcesContent":["import { TestCase } from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\n\n~~/*\~~n ~~Updates~~ ~~the~~ evaluation approach ~~for~~ a ~~test~~ ~~case~~\n * ~~@param~~ testCase - ~~The~~ ~~test~~ case to ~~update~~\n * ~~@param~~ ~~approach~~ - ~~The~~ ~~new~~ ~~evaluation~~ ~~approach~~\n * ~~@returns~~ ~~Updated~~ ~~test~~ case ~~with~~ the ~~new~~ evaluation approach\n */\nexport function ~~updateApproach~~(\n testCase: TestCase,\n approach: EvaluationApproach,\n): TestCase {\n return {\n ...~~testCase~~,\n evaluationParameters: {\n ...~~testCase.evaluationParameters~~,\n approach~~: approach~~,\n },\n };\n}\n"]}
1	+ {"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAwBhG,MAAM,UAAU,0BAA0B,CACxC,QAAkB,EAClB,MAA6B;IAE7B,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACzB,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,QAAQ,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;aACvC,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,CAAC;aAC1D,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,yBAAyB;YAC5B,OAAO,kCAAkC,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7E,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kCAAkC,CAChD,QAAkB,EAClB,UAAkB,EAClB,QAA4B;IAE5B,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAE3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,2BAA2B,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAChE,eAAe,CAAC,UAAU,CAAC,GAAG;QAC5B,GAAG,MAAM;QACT,oBAAoB,EAAE,qCAAqC,CAAC,MAAM,CAAC,IAAI,EAAE;YACvE,GAAG,2BAA2B;YAC9B,QAAQ;SACT,CAAC;KACH,CAAC;IAEF,OAAO;QACL,GAAG,QAAQ;QACX,eAAe;KAChB,CAAC;AACJ,CAAC","sourcesContent":["import { TestCase } from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport type ExpectedOutcomeChange =\n \| {\n index: number;\n operation: 'set-value';\n value: string;\n }\n \| {\n index: number;\n operation: 'add-chip';\n value: string;\n }\n \| {\n index: number;\n operation: 'remove-chip';\n value: string;\n }\n \| {\n index: number;\n operation: 'set-evaluation-approach';\n value: EvaluationApproach;\n };\n\nexport function applyExpectedOutcomeChange(\n testCase: TestCase,\n change: ExpectedOutcomeChange,\n): TestCase {\n const { index } = change;\n const expectedOutcome = [...(testCase.expectedOutcome \|\| [])];\n const target = expectedOutcome[index];\n\n if (!target) {\n return testCase;\n }\n\n switch (change.operation) {\n case 'set-value': {\n if (target.type === 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: change.value,\n };\n return { ...testCase, expectedOutcome };\n }\n case 'add-chip': {\n if (target.type !== 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: [...target.value, change.value],\n };\n return { ...testCase, expectedOutcome };\n }\n case 'remove-chip': {\n if (target.type !== 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: target.value.filter(chip => chip !== change.value),\n };\n return { ...testCase, expectedOutcome };\n }\n case 'set-evaluation-approach':\n return updateExpectedOutcomeFieldApproach(testCase, index, change.value);\n }\n}\n\n/*\n Updates the evaluation approach for a specific expected outcome field.\n * Select fields always use exact matching.\n */\nexport function updateExpectedOutcomeFieldApproach(\n testCase: TestCase,\n fieldIndex: number,\n approach: EvaluationApproach,\n): TestCase {\n const expectedOutcome = [...(testCase.expectedOutcome \|\| [])];\n const target = expectedOutcome[fieldIndex];\n\n if (!target) {\n return testCase;\n }\n\n const currentEvaluationParameters = target.evaluationParameters;\n expectedOutcome[fieldIndex] = {\n ...target,\n evaluationParameters: normalizeEvaluationParametersForField(target.type, {\n ...currentEvaluationParameters,\n approach,\n }),\n };\n\n return {\n ...testCase,\n expectedOutcome,\n };\n}\n"]}

package/dist/collection/schemas/expected-outcome.js CHANGED Viewed

@@ -1,28 +1,46 @@
 import { z } from "zod";
+import { EvaluationApproach } from "../lib/evaluation/constants";
+import { isApproachAllowedForFieldType } from "../lib/evaluation/field-evaluation-approach";
 const nonEmptyString = z.string().trim().min(1);
 const optionalPositiveInt = z.number().int().positive().optional();
 const optionalString = z.string().optional();
-const optionalBoolean = z.boolean().optional();
 const selectOptionsSchema = z.array(nonEmptyString).min(1);
+const optionalNumber = z.number().optional();
+const evaluationParametersSchema = z.object({
+    approach: z.enum(EvaluationApproach),
+    threshold: optionalNumber,
+});
+const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine((parameters, ctx) => {
+    if (!isApproachAllowedForFieldType('select', parameters.approach)) {
+        ctx.addIssue({
+            code: 'custom',
+            path: ['approach'],
+            message: `select fields only support "${EvaluationApproach.EXACT}" evaluation approach.`,
+        });
+    }
+});
 const defaultExpectedOutcomeBaseSchema = z.object({
     label: nonEmptyString,
-    required: optionalBoolean,
     placeholder: optionalString,
 });
 const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
     text: baseSchema.extend({
         type: z.literal('text'),
+        evaluationParameters: evaluationParametersSchema.optional(),
     }),
     textarea: baseSchema.extend({
         type: z.literal('textarea'),
         rows: optionalPositiveInt,
+        evaluationParameters: evaluationParametersSchema.optional(),
     }),
     chipsInput: baseSchema.extend({
         type: z.literal('chips-input'),
+        evaluationParameters: evaluationParametersSchema.optional(),
     }),
     select: baseSchema.extend({
         type: z.literal('select'),
         options: selectOptionsSchema,
+        evaluationParameters: selectEvaluationParametersSchema.optional(),
     }),
 });
 function hasDuplicateChips(values) {