npm - llm-testrunner-components - Versions diffs - 1.0.6 → 1.0.9 - Mend

llm-testrunner-components 1.0.6 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (266) hide show

package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"rougeL-evaluator.js","sourceRoot":"","sources":["../../../../src/lib/evaluation/evaluators/rougeL-evaluator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,UAAU,CAAC;AAGlC,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAE5E,SAAS,eAAe,CACtB,OAAe,EACf,SAAiB,EACjB,cAAsB;IAEtB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,IAAI,CAAC;QACH,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtD,MAAM,eAAe,GAAG,cAAc,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAClE,MAAM,eAAe,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE7D,IACE,eAAe,CAAC,MAAM,KAAK,CAAC;gBAC5B,eAAe,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,EAC5C,CAAC;gBACD,WAAW,GAAG,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;gBAC9D,MAAM,SAAS,GACb,OAAO,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,MAAM,IAAI,CAAC,CAAC,CAAC;gBAEvE,MAAM,MAAM,GACV,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACtE,MAAM,SAAS,GACb,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACtE,MAAM,WAAW,GAAG,SAAS,GAAG,MAAM,CAAC;gBAEvC,MAAM,OAAO,GACX,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,GAAG,MAAM,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC/D,WAAW,GAAG,OAAO,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,IAAI,CACV,qCAAqC,OAAO,kCAAkC,CAC/E,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2CAA2C,OAAO,IAAI,EAAE,GAAG,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,aAAa,GAAG,WAAW,IAAI,cAAc,CAAC;IAEpD,MAAM,qBAAqB,GAA6B;QACtD,KAAK,EAAE,WAAW;QAClB,YAAY,EAAE,kBAAkB,CAAC,OAAO;KACzC,CAAC;IAEF,OAAO;QACL,OAAO;QACP,KAAK,EAAE,aAAa;QACpB,wBAAwB,EAAE,qBAAqB;KAChD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,OAA0B;IAE1B,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,eAAe,EAAE,oBAAoB,EAAE,GACzE,OAAO,CAAC;IAEV,kFAAkF;IAClF,IAAI,gBAAgB,GAAG,eAAe;QACpC,CAAC,CAAC,eAAe;aACZ,KAAK,CAAC,SAAS,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9B,CAAC,CAAC,EAAE,CAAC;IAEP,6GAA6G;IAC7G,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,eAAe,EAAE,CAAC;QACrD,gBAAgB,GAAG,CAAC,eAAe,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,SAAS,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAChD,MAAM,cAAc,GAClB,oBAAoB,CAAC,SAAS,IAAI,wBAAwB,CAAC;IAE7D,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC;IAE9C,MAAM,cAAc,GAAmB,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE;QACpE,MAAM,KAAK,GAAG,eAAe,CAAC,OAAO,EAAE,SAAS,EAAE,cAAc,CAAC,CAAC;QAClE,IAAI,KAAK,CAAC,KAAK;YAAE,cAAc,EAAE,CAAC;QAClC,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;IAEH,MAAM,aAAa,GAAG,cAAc,KAAK,aAAa,CAAC;IAEvD,MAAM,qBAAqB,GAA6B;QACtD,KAAK,EAAE,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,cAAc,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;QAC7D,YAAY,EAAE,kBAAkB,CAAC,OAAO;KACzC,CAAC;IAEF,OAAO;QACL,UAAU;QACV,MAAM,EAAE,aAAa;QACrB,cAAc;QACd,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,oBAAoB,EAAE;YACpB,GAAG,oBAAoB;YACvB,SAAS,EAAE,cAAc;SAC1B;QACD,wBAAwB,EAAE,qBAAqB;KAChD,CAAC;AACJ,CAAC","sourcesContent":["import * as rouge from 'js-rouge';\nimport { EvaluationApproachResult } from '../../../types/evaluation';\nimport { EvaluationRequest, EvaluationResult, KeywordMatch } from '../types';\nimport { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from '../constants';\n\nfunction evaluateKeyword(\n keyword: string,\n candidate: string,\n rougeThreshold: number,\n): KeywordMatch {\n let rougeLScore = 0;\n\n try {\n const trimmedKeyword = keyword.trim();\n if (trimmedKeyword.length > 0 && candidate.length > 0) {\n const referenceTokens = trimmedKeyword.toLowerCase().split(/\\s+/);\n const candidateTokens = candidate.toLowerCase().split(/\\s+/);\n\n if (\n referenceTokens.length === 1 &&\n candidateTokens.includes(referenceTokens[0])\n ) {\n rougeLScore = 1;\n } else {\n const lcsResult = rouge.lcs(candidateTokens, referenceTokens);\n const lcsLength =\n typeof lcsResult === 'number' ? lcsResult : (lcsResult?.length ?? 0);\n\n const recall =\n referenceTokens.length > 0 ? lcsLength / referenceTokens.length : 0;\n const precision =\n candidateTokens.length > 0 ? lcsLength / candidateTokens.length : 0;\n const denominator = precision + recall;\n\n const f1Score =\n denominator > 0 ? (2 * precision * recall) / denominator : 0;\n rougeLScore = f1Score;\n }\n } else {\n console.warn(\n `ROUGE-L not computed for keyword \"${keyword}\": Keyword or candidate missing.`,\n );\n }\n } catch (err) {\n console.error(`ROUGE-L computation failed for keyword \"${keyword}\":`, err);\n }\n\n const keywordPassed = rougeLScore >= rougeThreshold;\n\n const keywordApproachResult: EvaluationApproachResult = {\n score: rougeLScore,\n approachUsed: EvaluationApproach.ROUGE_L,\n };\n\n return {\n keyword,\n found: keywordPassed,\n evaluationApproachResult: keywordApproachResult,\n };\n}\n\nexport function performRougeLEvaluation(\n request: EvaluationRequest,\n): EvaluationResult {\n const { testCaseId, actualResponse, expectedOutcome, evaluationParameters } =\n request;\n\n // Split expectedOutcome by newlines, commas, and periods to create keywords array\n let expectedKeywords = expectedOutcome\n ? expectedOutcome\n .split(/[\\n,.]+/)\n .map(k => k.trim())\n .filter(k => k.length > 0)\n : [];\n\n // If no keywords after filtering (e.g., whitespace-only input), treat the original input as a single keyword\n if (expectedKeywords.length === 0 && expectedOutcome) {\n expectedKeywords = [expectedOutcome];\n }\n\n const candidate = (actualResponse || '').trim();\n const rougeThreshold =\n evaluationParameters.threshold ?? DEFAULT_ROUGE_PASS_SCORE;\n\n let keywordsPassed = 0;\n const totalKeywords = expectedKeywords.length;\n\n const keywordMatches: KeywordMatch[] = expectedKeywords.map(keyword => {\n const match = evaluateKeyword(keyword, candidate, rougeThreshold);\n if (match.found) keywordsPassed++;\n return match;\n });\n\n const overallPassed = keywordsPassed === totalKeywords;\n\n const overallApproachResult: EvaluationApproachResult = {\n score: totalKeywords > 0 ? keywordsPassed / totalKeywords : 1,\n approachUsed: EvaluationApproach.ROUGE_L,\n };\n\n return {\n testCaseId,\n passed: overallPassed,\n keywordMatches,\n timestamp: new Date().toISOString(),\n evaluationParameters: {\n ...evaluationParameters,\n threshold: rougeThreshold,\n },\n evaluationApproachResult: overallApproachResult,\n };\n}\n"]}

package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js ADDED Viewed

@@ -0,0 +1,326 @@
+import { jest, describe, it, expect } from "@jest/globals";
+import { performRougeLEvaluation } from "./rougeL-evaluator";
+import { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from "../constants";
+describe('performRougeLEvaluation', () => {
+    // Helper function to create a base request with optional overrides
+    const createRequest = (overrides = {}) => {
+        const defaults = {
+            testCaseId: 'test-001',
+            question: 'Test question',
+            expectedOutcome: 'keyword',
+            actualResponse: 'response with keyword',
+            evaluationParameters: {
+                approach: EvaluationApproach.ROUGE_L,
+                threshold: DEFAULT_ROUGE_PASS_SCORE,
+            },
+        };
+        return {
+            ...defaults,
+            ...overrides,
+            evaluationParameters: {
+                ...defaults.evaluationParameters,
+                ...overrides.evaluationParameters,
+            },
+        };
+    };
+    describe('basic functionality', () => {
+        it('should return a valid EvaluationResult structure', () => {
+            const request = createRequest({
+                actualResponse: 'AI stands for artificial intelligence',
+                expectedOutcome: 'artificial intelligence',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                testCaseId: 'test-001',
+                passed: expect.any(Boolean),
+                keywordMatches: expect.any(Array),
+                timestamp: expect.any(String),
+                evaluationParameters: expect.any(Object),
+                evaluationApproachResult: expect.any(Object),
+            });
+        });
+        it('should use default threshold when not provided', () => {
+            const request = createRequest({
+                evaluationParameters: { approach: EvaluationApproach.ROUGE_L },
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.evaluationParameters.threshold).toBe(DEFAULT_ROUGE_PASS_SCORE);
+        });
+        it('should use provided threshold when specified', () => {
+            const customThreshold = 0.85;
+            const request = createRequest({
+                actualResponse: 'response',
+                evaluationParameters: {
+                    approach: EvaluationApproach.ROUGE_L,
+                    threshold: customThreshold,
+                },
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.evaluationParameters.threshold).toBe(customThreshold);
+        });
+    });
+    describe('single keyword evaluation', () => {
+        it('should pass when single-word keyword is found in candidate', () => {
+            const request = createRequest({
+                expectedOutcome: 'machine',
+                actualResponse: 'This is about machine learning',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: true,
+                keywordMatches: [
+                    {
+                        keyword: 'machine',
+                        found: true,
+                        evaluationApproachResult: {
+                            score: 1,
+                            approachUsed: EvaluationApproach.ROUGE_L,
+                        },
+                    },
+                ],
+            });
+        });
+        it('should fail when single-word keyword is not found in candidate', () => {
+            const request = createRequest({
+                expectedOutcome: 'quantum',
+                actualResponse: 'This is about machine learning',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: false,
+                keywordMatches: [
+                    {
+                        found: false,
+                        evaluationApproachResult: {
+                            score: 0,
+                        },
+                    },
+                ],
+            });
+        });
+        it('should calculate ROUGE-L score for multi-word keywords', () => {
+            const request = createRequest({
+                expectedOutcome: 'machine learning',
+                actualResponse: 'AI and machine learning are related',
+                evaluationParameters: {
+                    approach: EvaluationApproach.ROUGE_L,
+                    threshold: 0.5,
+                },
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                keywordMatches: [
+                    {
+                        found: true,
+                        evaluationApproachResult: {
+                            score: expect.closeTo(0.5),
+                            approachUsed: EvaluationApproach.ROUGE_L,
+                        },
+                    },
+                ],
+            });
+        });
+        it('should handle LCS result as object with length property', () => {
+            const request = createRequest({
+                expectedOutcome: 'deep learning',
+                actualResponse: 'Deep learning is a subset of machine learning',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeGreaterThan(0);
+        });
+    });
+    describe('multiple keywords evaluation', () => {
+        it('should pass when all keywords meet threshold', () => {
+            const request = createRequest({
+                expectedOutcome: 'machine\nlearning\nAI',
+                actualResponse: 'Machine learning and AI are transformative technologies',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: true,
+                evaluationApproachResult: {
+                    score: 1,
+                    approachUsed: EvaluationApproach.ROUGE_L,
+                },
+            });
+            expect(result.keywordMatches).toHaveLength(3);
+            expect(result.keywordMatches.every(match => match.found)).toBe(true);
+        });
+        it('should fail when not all keywords meet threshold', () => {
+            const request = createRequest({
+                expectedOutcome: 'machine\nquantum\nAI',
+                actualResponse: 'Machine learning and AI are transformative',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.passed).toBe(false);
+            expect(result.keywordMatches).toHaveLength(3);
+            expect(result.keywordMatches.filter(match => match.found)).toHaveLength(2);
+            expect(result.evaluationApproachResult.score).toBeCloseTo(2 / 3);
+        });
+        it('should calculate overall score as ratio of passed keywords', () => {
+            const request = createRequest({
+                expectedOutcome: 'alpha\nbeta\ngamma\ndelta',
+                actualResponse: 'alpha and beta are here',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: false,
+                evaluationApproachResult: {
+                    score: 0.5, // 2 out of 4
+                },
+            });
+        });
+    });
+    describe('edge cases', () => {
+        it('should handle empty keywords array', () => {
+            const request = createRequest({
+                expectedOutcome: '',
+                actualResponse: 'Some response',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: true,
+                keywordMatches: [],
+                evaluationApproachResult: {
+                    score: 1,
+                },
+            });
+        });
+        it('should handle empty actual response', () => {
+            const request = createRequest({
+                expectedOutcome: 'machine',
+                actualResponse: '',
+            });
+            // Suppress expected warning
+            const consoleWarnSpy = jest
+                .spyOn(console, 'warn')
+                .mockImplementation(() => { });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: false,
+                keywordMatches: [
+                    {
+                        found: false,
+                        evaluationApproachResult: {
+                            score: 0,
+                        },
+                    },
+                ],
+            });
+            consoleWarnSpy.mockRestore();
+        });
+        it('should handle whitespace-only keyword', () => {
+            const request = createRequest({
+                expectedOutcome: '   ',
+                actualResponse: 'Some response',
+            });
+            // Suppress expected warning
+            const consoleWarnSpy = jest
+                .spyOn(console, 'warn')
+                .mockImplementation(() => { });
+            const result = performRougeLEvaluation(request);
+            expect(result.keywordMatches[0]).toMatchObject({
+                found: false,
+                evaluationApproachResult: {
+                    score: 0,
+                },
+            });
+            consoleWarnSpy.mockRestore();
+        });
+        it('should handle null/undefined actualResponse gracefully', () => {
+            const request = createRequest({
+                expectedOutcome: 'machine',
+                actualResponse: null,
+            });
+            // Suppress expected warning
+            const consoleWarnSpy = jest
+                .spyOn(console, 'warn')
+                .mockImplementation(() => { });
+            const result = performRougeLEvaluation(request);
+            expect(result).toMatchObject({
+                passed: false,
+                keywordMatches: [
+                    {
+                        found: false,
+                    },
+                ],
+            });
+            consoleWarnSpy.mockRestore();
+        });
+    });
+    describe('case insensitivity', () => {
+        it('should perform case-insensitive matching', () => {
+            const request = createRequest({
+                expectedOutcome: 'MACHINE',
+                actualResponse: 'machine learning is important',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.keywordMatches[0]).toMatchObject({
+                found: true,
+                evaluationApproachResult: {
+                    score: 1,
+                },
+            });
+        });
+        it('should match keywords with mixed case', () => {
+            const request = createRequest({
+                expectedOutcome: 'MaChInE LeArNiNg',
+                actualResponse: 'MACHINE LEARNING is a field of AI',
+                evaluationParameters: {
+                    approach: EvaluationApproach.ROUGE_L,
+                    threshold: 0.4,
+                }, // Lower threshold for real ROUGE-L behavior
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.keywordMatches[0]).toMatchObject({
+                found: true,
+            });
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeGreaterThanOrEqual(0.4);
+        });
+    });
+    describe('ROUGE-L score calculation', () => {
+        it('should calculate correct F-score from precision and recall', () => {
+            const request = createRequest({
+                expectedOutcome: 'neural network',
+                actualResponse: 'A neural network processes data',
+            });
+            const result = performRougeLEvaluation(request);
+            // With actual ROUGE-L: both words 'neural' and 'network' are found
+            // LCS length = 2, reference length = 2, candidate length = 5
+            // recall = 2/2 = 1.0, precision = 2/5 = 0.4
+            // F-score = 2 * (1.0 * 0.4) / (1.0 + 0.4) ≈ 0.571
+            const expectedFScore = (2 * 1.0 * 0.4) / (1.0 + 0.4);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeCloseTo(expectedFScore, 2);
+        });
+        it('should handle partial matches', () => {
+            const request = createRequest({
+                expectedOutcome: 'artificial intelligence systems',
+                actualResponse: 'Artificial intelligence is growing',
+                evaluationParameters: {
+                    approach: EvaluationApproach.ROUGE_L,
+                    threshold: 0.5,
+                },
+            });
+            const result = performRougeLEvaluation(request);
+            // With actual ROUGE-L: 'artificial' and 'intelligence' are found, 'systems' is not
+            // LCS length = 2, reference length = 3, candidate length = 4
+            // recall = 2/3, precision = 2/4 = 0.5
+            const recall = 2 / 3;
+            const precision = 2 / 4;
+            const expectedFScore = (2 * precision * recall) / (precision + recall);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeCloseTo(expectedFScore, 2);
+        });
+    });
+    describe('timestamp', () => {
+        it('should include a valid ISO timestamp', () => {
+            const request = createRequest({
+                expectedOutcome: 'test',
+                actualResponse: 'test response',
+            });
+            const result = performRougeLEvaluation(request);
+            expect(result.timestamp).toBeDefined();
+            expect(new Date(result.timestamp).toISOString()).toBe(result.timestamp);
+        });
+    });
+});
+//# sourceMappingURL=rougeL-evaluator.test.js.map

package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"rougeL-evaluator.test.js","sourceRoot":"","sources":["../../../../src/lib/evaluation/evaluators/rougeL-evaluator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAE7D,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAE5E,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,mEAAmE;IACnE,MAAM,aAAa,GAAG,CACpB,YAAwC,EAAE,EACvB,EAAE;QACrB,MAAM,QAAQ,GAAsB;YAClC,UAAU,EAAE,UAAU;YACtB,QAAQ,EAAE,eAAe;YACzB,eAAe,EAAE,SAAS;YAC1B,cAAc,EAAE,uBAAuB;YACvC,oBAAoB,EAAE;gBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;gBACpC,SAAS,EAAE,wBAAwB;aACpC;SACF,CAAC;QAEF,OAAO;YACL,GAAG,QAAQ;YACX,GAAG,SAAS;YACZ,oBAAoB,EAAE;gBACpB,GAAG,QAAQ,CAAC,oBAAoB;gBAChC,GAAG,SAAS,CAAC,oBAAoB;aAClC;SACF,CAAC;IACJ,CAAC,CAAC;IAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,cAAc,EAAE,uCAAuC;gBACvD,eAAe,EAAE,yBAAyB;aAC3C,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,UAAU,EAAE,UAAU;gBACtB,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC;gBAC3B,cAAc,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC;gBACjC,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC;gBAC7B,oBAAoB,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC;gBACxC,wBAAwB,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC;aAC7C,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;YACxD,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,oBAAoB,EAAE,EAAE,QAAQ,EAAE,kBAAkB,CAAC,OAAO,EAAE;aAC/D,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAChD,wBAAwB,CACzB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;YACtD,MAAM,eAAe,GAAG,IAAI,CAAC;YAC7B,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,cAAc,EAAE,UAAU;gBAC1B,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,eAAe;iBAC3B;aACF,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACzC,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;YACpE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,SAAS;gBAC1B,cAAc,EAAE,gCAAgC;aACjD,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,IAAI;gBACZ,cAAc,EAAE;oBACd;wBACE,OAAO,EAAE,SAAS;wBAClB,KAAK,EAAE,IAAI;wBACX,wBAAwB,EAAE;4BACxB,KAAK,EAAE,CAAC;4BACR,YAAY,EAAE,kBAAkB,CAAC,OAAO;yBACzC;qBACF;iBACF;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gEAAgE,EAAE,GAAG,EAAE;YACxE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,SAAS;gBAC1B,cAAc,EAAE,gCAAgC;aACjD,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE;oBACd;wBACE,KAAK,EAAE,KAAK;wBACZ,wBAAwB,EAAE;4BACxB,KAAK,EAAE,CAAC;yBACT;qBACF;iBACF;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;YAChE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,kBAAkB;gBACnC,cAAc,EAAE,qCAAqC;gBACrD,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf;aACF,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,cAAc,EAAE;oBACd;wBACE,KAAK,EAAE,IAAI;wBACX,wBAAwB,EAAE;4BACxB,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC;4BAC1B,YAAY,EAAE,kBAAkB,CAAC,OAAO;yBACzC;qBACF;iBACF;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;YACjE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,eAAe;gBAChC,cAAc,EAAE,+CAA+C;aAChE,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACvB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;QAC5C,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;YACtD,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,uBAAuB;gBACxC,cAAc,EACZ,yDAAyD;aAC5D,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,IAAI;gBACZ,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,OAAO;iBACzC;aACF,CAAC,CAAC;YACH,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,sBAAsB;gBACvC,cAAc,EAAE,4CAA4C;aAC7D,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,YAAY,CACrE,CAAC,CACF,CAAC;YACF,MAAM,CAAC,MAAM,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;YACpE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,2BAA2B;gBAC5C,cAAc,EAAE,yBAAyB;aAC1C,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,KAAK;gBACb,wBAAwB,EAAE;oBACxB,KAAK,EAAE,GAAG,EAAE,aAAa;iBAC1B;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC5C,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,EAAE;gBACnB,cAAc,EAAE,eAAe;aAChC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,IAAI;gBACZ,cAAc,EAAE,EAAE;gBAClB,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;iBACT;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,SAAS;gBAC1B,cAAc,EAAE,EAAE;aACnB,CAAC,CAAC;YAEH,4BAA4B;YAC5B,MAAM,cAAc,GAAG,IAAI;iBACxB,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;iBACtB,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAEhC,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE;oBACd;wBACE,KAAK,EAAE,KAAK;wBACZ,wBAAwB,EAAE;4BACxB,KAAK,EAAE,CAAC;yBACT;qBACF;iBACF;aACF,CAAC,CAAC;YAEH,cAAc,CAAC,WAAW,EAAE,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,KAAK;gBACtB,cAAc,EAAE,eAAe;aAChC,CAAC,CAAC;YAEH,4BAA4B;YAC5B,MAAM,cAAc,GAAG,IAAI;iBACxB,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;iBACtB,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAEhC,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;gBAC7C,KAAK,EAAE,KAAK;gBACZ,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;iBACT;aACF,CAAC,CAAC;YAEH,cAAc,CAAC,WAAW,EAAE,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;YAChE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,SAAS;gBAC1B,cAAc,EAAE,IAAyB;aAC1C,CAAC,CAAC;YAEH,4BAA4B;YAC5B,MAAM,cAAc,GAAG,IAAI;iBACxB,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;iBACtB,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAEhC,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC;gBAC3B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE;oBACd;wBACE,KAAK,EAAE,KAAK;qBACb;iBACF;aACF,CAAC,CAAC;YAEH,cAAc,CAAC,WAAW,EAAE,CAAC;QAC/B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,SAAS;gBAC1B,cAAc,EAAE,+BAA+B;aAChD,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;gBAC7C,KAAK,EAAE,IAAI;gBACX,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;iBACT;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,kBAAkB;gBACnC,cAAc,EAAE,mCAAmC;gBACnD,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf,EAAE,4CAA4C;aAChD,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;gBAC7C,KAAK,EAAE,IAAI;aACZ,CAAC,CAAC;YACH,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACzC,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;YACpE,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,gBAAgB;gBACjC,cAAc,EAAE,iCAAiC;aAClD,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,mEAAmE;YACnE,6DAA6D;YAC7D,4CAA4C;YAC5C,kDAAkD;YAClD,MAAM,cAAc,GAAG,CAAC,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;YACrD,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,iCAAiC;gBAClD,cAAc,EAAE,oCAAoC;gBACpD,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,OAAO;oBACpC,SAAS,EAAE,GAAG;iBACf;aACF,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,mFAAmF;YACnF,6DAA6D;YAC7D,sCAAsC;YACtC,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC;YACrB,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;YACxB,MAAM,cAAc,GAAG,CAAC,CAAC,GAAG,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;YAEvE,MAAM,CACJ,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CACxD,CAAC,WAAW,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;QACzB,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;YAC9C,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,eAAe,EAAE,MAAM;gBACvB,cAAc,EAAE,eAAe;aAChC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;YACvC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,SAAU,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC3E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC","sourcesContent":["import { jest, describe, it, expect } from '@jest/globals';\nimport { performRougeLEvaluation } from './rougeL-evaluator';\nimport { EvaluationRequest } from '../types';\nimport { DEFAULT_ROUGE_PASS_SCORE, EvaluationApproach } from '../constants';\n\ndescribe('performRougeLEvaluation', () => {\n // Helper function to create a base request with optional overrides\n const createRequest = (\n overrides: Partial<EvaluationRequest> = {},\n ): EvaluationRequest => {\n const defaults: EvaluationRequest = {\n testCaseId: 'test-001',\n question: 'Test question',\n expectedOutcome: 'keyword',\n actualResponse: 'response with keyword',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_L,\n threshold: DEFAULT_ROUGE_PASS_SCORE,\n },\n };\n\n return {\n ...defaults,\n ...overrides,\n evaluationParameters: {\n ...defaults.evaluationParameters,\n ...overrides.evaluationParameters,\n },\n };\n };\n\n describe('basic functionality', () => {\n it('should return a valid EvaluationResult structure', () => {\n const request = createRequest({\n actualResponse: 'AI stands for artificial intelligence',\n expectedOutcome: 'artificial intelligence',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n testCaseId: 'test-001',\n passed: expect.any(Boolean),\n keywordMatches: expect.any(Array),\n timestamp: expect.any(String),\n evaluationParameters: expect.any(Object),\n evaluationApproachResult: expect.any(Object),\n });\n });\n\n it('should use default threshold when not provided', () => {\n const request = createRequest({\n evaluationParameters: { approach: EvaluationApproach.ROUGE_L },\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result.evaluationParameters.threshold).toBe(\n DEFAULT_ROUGE_PASS_SCORE,\n );\n });\n\n it('should use provided threshold when specified', () => {\n const customThreshold = 0.85;\n const request = createRequest({\n actualResponse: 'response',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_L,\n threshold: customThreshold,\n },\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result.evaluationParameters.threshold).toBe(customThreshold);\n });\n });\n\n describe('single keyword evaluation', () => {\n it('should pass when single-word keyword is found in candidate', () => {\n const request = createRequest({\n expectedOutcome: 'machine',\n actualResponse: 'This is about machine learning',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: true,\n keywordMatches: [\n {\n keyword: 'machine',\n found: true,\n evaluationApproachResult: {\n score: 1,\n approachUsed: EvaluationApproach.ROUGE_L,\n },\n },\n ],\n });\n });\n\n it('should fail when single-word keyword is not found in candidate', () => {\n const request = createRequest({\n expectedOutcome: 'quantum',\n actualResponse: 'This is about machine learning',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: false,\n keywordMatches: [\n {\n found: false,\n evaluationApproachResult: {\n score: 0,\n },\n },\n ],\n });\n });\n\n it('should calculate ROUGE-L score for multi-word keywords', () => {\n const request = createRequest({\n expectedOutcome: 'machine learning',\n actualResponse: 'AI and machine learning are related',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_L,\n threshold: 0.5,\n },\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n keywordMatches: [\n {\n found: true,\n evaluationApproachResult: {\n score: expect.closeTo(0.5),\n approachUsed: EvaluationApproach.ROUGE_L,\n },\n },\n ],\n });\n });\n\n it('should handle LCS result as object with length property', () => {\n const request = createRequest({\n expectedOutcome: 'deep learning',\n actualResponse: 'Deep learning is a subset of machine learning',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeGreaterThan(0);\n });\n });\n\n describe('multiple keywords evaluation', () => {\n it('should pass when all keywords meet threshold', () => {\n const request = createRequest({\n expectedOutcome: 'machine\\nlearning\\nAI',\n actualResponse:\n 'Machine learning and AI are transformative technologies',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: true,\n evaluationApproachResult: {\n score: 1,\n approachUsed: EvaluationApproach.ROUGE_L,\n },\n });\n expect(result.keywordMatches).toHaveLength(3);\n expect(result.keywordMatches.every(match => match.found)).toBe(true);\n });\n\n it('should fail when not all keywords meet threshold', () => {\n const request = createRequest({\n expectedOutcome: 'machine\\nquantum\\nAI',\n actualResponse: 'Machine learning and AI are transformative',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches).toHaveLength(3);\n expect(result.keywordMatches.filter(match => match.found)).toHaveLength(\n 2,\n );\n expect(result.evaluationApproachResult.score).toBeCloseTo(2 / 3);\n });\n\n it('should calculate overall score as ratio of passed keywords', () => {\n const request = createRequest({\n expectedOutcome: 'alpha\\nbeta\\ngamma\\ndelta',\n actualResponse: 'alpha and beta are here',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: false,\n evaluationApproachResult: {\n score: 0.5, // 2 out of 4\n },\n });\n });\n });\n\n describe('edge cases', () => {\n it('should handle empty keywords array', () => {\n const request = createRequest({\n expectedOutcome: '',\n actualResponse: 'Some response',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: true,\n keywordMatches: [],\n evaluationApproachResult: {\n score: 1,\n },\n });\n });\n\n it('should handle empty actual response', () => {\n const request = createRequest({\n expectedOutcome: 'machine',\n actualResponse: '',\n });\n\n // Suppress expected warning\n const consoleWarnSpy = jest\n .spyOn(console, 'warn')\n .mockImplementation(() => {});\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: false,\n keywordMatches: [\n {\n found: false,\n evaluationApproachResult: {\n score: 0,\n },\n },\n ],\n });\n\n consoleWarnSpy.mockRestore();\n });\n\n it('should handle whitespace-only keyword', () => {\n const request = createRequest({\n expectedOutcome: ' ',\n actualResponse: 'Some response',\n });\n\n // Suppress expected warning\n const consoleWarnSpy = jest\n .spyOn(console, 'warn')\n .mockImplementation(() => {});\n\n const result = performRougeLEvaluation(request);\n\n expect(result.keywordMatches[0]).toMatchObject({\n found: false,\n evaluationApproachResult: {\n score: 0,\n },\n });\n\n consoleWarnSpy.mockRestore();\n });\n\n it('should handle null/undefined actualResponse gracefully', () => {\n const request = createRequest({\n expectedOutcome: 'machine',\n actualResponse: null as unknown as string,\n });\n\n // Suppress expected warning\n const consoleWarnSpy = jest\n .spyOn(console, 'warn')\n .mockImplementation(() => {});\n\n const result = performRougeLEvaluation(request);\n\n expect(result).toMatchObject({\n passed: false,\n keywordMatches: [\n {\n found: false,\n },\n ],\n });\n\n consoleWarnSpy.mockRestore();\n });\n });\n\n describe('case insensitivity', () => {\n it('should perform case-insensitive matching', () => {\n const request = createRequest({\n expectedOutcome: 'MACHINE',\n actualResponse: 'machine learning is important',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result.keywordMatches[0]).toMatchObject({\n found: true,\n evaluationApproachResult: {\n score: 1,\n },\n });\n });\n\n it('should match keywords with mixed case', () => {\n const request = createRequest({\n expectedOutcome: 'MaChInE LeArNiNg',\n actualResponse: 'MACHINE LEARNING is a field of AI',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_L,\n threshold: 0.4,\n }, // Lower threshold for real ROUGE-L behavior\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result.keywordMatches[0]).toMatchObject({\n found: true,\n });\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeGreaterThanOrEqual(0.4);\n });\n });\n\n describe('ROUGE-L score calculation', () => {\n it('should calculate correct F-score from precision and recall', () => {\n const request = createRequest({\n expectedOutcome: 'neural network',\n actualResponse: 'A neural network processes data',\n });\n\n const result = performRougeLEvaluation(request);\n\n // With actual ROUGE-L: both words 'neural' and 'network' are found\n // LCS length = 2, reference length = 2, candidate length = 5\n // recall = 2/2 = 1.0, precision = 2/5 = 0.4\n // F-score = 2 * (1.0 * 0.4) / (1.0 + 0.4) ≈ 0.571\n const expectedFScore = (2 * 1.0 * 0.4) / (1.0 + 0.4);\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeCloseTo(expectedFScore, 2);\n });\n\n it('should handle partial matches', () => {\n const request = createRequest({\n expectedOutcome: 'artificial intelligence systems',\n actualResponse: 'Artificial intelligence is growing',\n evaluationParameters: {\n approach: EvaluationApproach.ROUGE_L,\n threshold: 0.5,\n },\n });\n\n const result = performRougeLEvaluation(request);\n\n // With actual ROUGE-L: 'artificial' and 'intelligence' are found, 'systems' is not\n // LCS length = 2, reference length = 3, candidate length = 4\n // recall = 2/3, precision = 2/4 = 0.5\n const recall = 2 / 3;\n const precision = 2 / 4;\n const expectedFScore = (2 * precision * recall) / (precision + recall);\n\n expect(\n result.keywordMatches[0].evaluationApproachResult.score,\n ).toBeCloseTo(expectedFScore, 2);\n });\n });\n\n describe('timestamp', () => {\n it('should include a valid ISO timestamp', () => {\n const request = createRequest({\n expectedOutcome: 'test',\n actualResponse: 'test response',\n });\n\n const result = performRougeLEvaluation(request);\n\n expect(result.timestamp).toBeDefined();\n expect(new Date(result.timestamp!).toISOString()).toBe(result.timestamp);\n });\n });\n});\n"]}

package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js ADDED Viewed

@@ -0,0 +1,69 @@
+import { loadSemanticModel } from "./model-loader";
+import { evaluateKeywordsSemantically } from "./evaluate-keywords";
+import { DEFAULT_SEMANTIC_PASS_SCORE, EvaluationApproach, } from "../../constants";
+export class SemanticEvaluator {
+    // TODO(LLM-39): Refactor SemanticEvaluator into a singleton pattern.
+    static extractor = null;
+    async initialize() {
+        if (SemanticEvaluator.extractor)
+            return;
+        try {
+            SemanticEvaluator.extractor = await loadSemanticModel();
+        }
+        catch (error) {
+            console.error('Failed to load semantic evaluation model:', error);
+            throw error;
+        }
+    }
+    async performEvaluation(request) {
+        try {
+            await this.initialize();
+            // Split expectedOutcome by newlines to create keywords array
+            const expectedKeywords = request.expectedOutcome
+                ? request.expectedOutcome
+                    .split(/[\n,]+/)
+                    .map(k => k.trim())
+                    .filter(k => k.length > 0)
+                : [];
+            const keywordMatches = await evaluateKeywordsSemantically(SemanticEvaluator.extractor, request.actualResponse, expectedKeywords, DEFAULT_SEMANTIC_PASS_SCORE);
+            const totalItems = keywordMatches.length;
+            // calculate the overall score by averaging the score of the keyword matches
+            const keywordScore = keywordMatches.reduce((acc, curr) => acc + curr.evaluationApproachResult.score, 0);
+            const overallScore = totalItems > 0 ? keywordScore / totalItems : 0; // to avoid division by zero
+            const passed = keywordMatches.every(match => match.found);
+            const evaluationParameters = {
+                approach: EvaluationApproach.SEMANTIC,
+                threshold: DEFAULT_SEMANTIC_PASS_SCORE,
+            };
+            return {
+                testCaseId: request.testCaseId,
+                passed,
+                keywordMatches,
+                evaluationParameters,
+                evaluationApproachResult: {
+                    score: overallScore,
+                    approachUsed: EvaluationApproach.SEMANTIC,
+                },
+                timestamp: new Date().toISOString(),
+            };
+        }
+        catch (error) {
+            console.error('Failed to perform semantic evaluation:', error);
+            return {
+                testCaseId: request.testCaseId,
+                passed: false,
+                keywordMatches: [],
+                evaluationParameters: {
+                    approach: EvaluationApproach.SEMANTIC,
+                    threshold: DEFAULT_SEMANTIC_PASS_SCORE,
+                },
+                evaluationApproachResult: {
+                    score: 0,
+                    approachUsed: EvaluationApproach.SEMANTIC,
+                },
+                timestamp: new Date().toISOString(),
+            };
+        }
+    }
+}
+//# sourceMappingURL=SemanticEvaluator.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"SemanticEvaluator.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/SemanticEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,4BAA4B,EAAE,MAAM,qBAAqB,CAAC;AAGnE,OAAO,EACL,2BAA2B,EAC3B,kBAAkB,GACnB,MAAM,iBAAiB,CAAC;AAEzB,MAAM,OAAO,iBAAiB;IAC5B,qEAAqE;IAC7D,MAAM,CAAC,SAAS,GAA8B,IAAI,CAAC;IAE3D,KAAK,CAAC,UAAU;QACd,IAAI,iBAAiB,CAAC,SAAS;YAAE,OAAO;QACxC,IAAI,CAAC;YACH,iBAAiB,CAAC,SAAS,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAC1D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,iBAAiB,CACrB,OAA0B;QAE1B,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAExB,6DAA6D;YAC7D,MAAM,gBAAgB,GAAG,OAAO,CAAC,eAAe;gBAC9C,CAAC,CAAC,OAAO,CAAC,eAAe;qBACpB,KAAK,CAAC,QAAQ,CAAC;qBACf,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;qBAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC9B,CAAC,CAAC,EAAE,CAAC;YAEP,MAAM,cAAc,GAAG,MAAM,4BAA4B,CACvD,iBAAiB,CAAC,SAAS,EAC3B,OAAO,CAAC,cAAc,EACtB,gBAAgB,EAChB,2BAA2B,CAC5B,CAAC;YAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC;YACzC,4EAA4E;YAC5E,MAAM,YAAY,GAAG,cAAc,CAAC,MAAM,CACxC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,wBAAwB,CAAC,KAAK,EACxD,CAAC,CACF,CAAC;YACF,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,4BAA4B;YACjG,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE1D,MAAM,oBAAoB,GAAG;gBAC3B,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;gBACrC,SAAS,EAAE,2BAA2B;aACf,CAAC;YAE1B,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM;gBACN,cAAc;gBACd,oBAAoB;gBACpB,wBAAwB,EAAE;oBACxB,KAAK,EAAE,YAAY;oBACnB,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,CAAC,CAAC;YAC/D,OAAO;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE;oBACpB,QAAQ,EAAE,kBAAkB,CAAC,QAAQ;oBACrC,SAAS,EAAE,2BAA2B;iBACvC;gBACD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;gBACD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC","sourcesContent":["import { EvaluationResult, EvaluationRequest } from '../../types';\nimport { loadSemanticModel } from './model-loader';\nimport { evaluateKeywordsSemantically } from './evaluate-keywords';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { EvaluationParameters } from '../../../../types/evaluation';\nimport {\n DEFAULT_SEMANTIC_PASS_SCORE,\n EvaluationApproach,\n} from '../../constants';\n\nexport class SemanticEvaluator {\n // TODO(LLM-39): Refactor SemanticEvaluator into a singleton pattern.\n private static extractor: FeatureExtractionPipeline = null;\n\n async initialize(): Promise<void> {\n if (SemanticEvaluator.extractor) return;\n try {\n SemanticEvaluator.extractor = await loadSemanticModel();\n } catch (error) {\n console.error('Failed to load semantic evaluation model:', error);\n throw error;\n }\n }\n\n async performEvaluation(\n request: EvaluationRequest,\n ): Promise<EvaluationResult> {\n try {\n await this.initialize();\n\n // Split expectedOutcome by newlines to create keywords array\n const expectedKeywords = request.expectedOutcome\n ? request.expectedOutcome\n .split(/[\\n,]+/)\n .map(k => k.trim())\n .filter(k => k.length > 0)\n : [];\n\n const keywordMatches = await evaluateKeywordsSemantically(\n SemanticEvaluator.extractor,\n request.actualResponse,\n expectedKeywords,\n DEFAULT_SEMANTIC_PASS_SCORE,\n );\n\n const totalItems = keywordMatches.length;\n // calculate the overall score by averaging the score of the keyword matches\n const keywordScore = keywordMatches.reduce(\n (acc, curr) => acc + curr.evaluationApproachResult.score,\n 0,\n );\n const overallScore = totalItems > 0 ? keywordScore / totalItems : 0; // to avoid division by zero\n const passed = keywordMatches.every(match => match.found);\n\n const evaluationParameters = {\n approach: EvaluationApproach.SEMANTIC,\n threshold: DEFAULT_SEMANTIC_PASS_SCORE,\n } as EvaluationParameters;\n\n return {\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n evaluationParameters,\n evaluationApproachResult: {\n score: overallScore,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n } catch (error) {\n console.error('Failed to perform semantic evaluation:', error);\n return {\n testCaseId: request.testCaseId,\n passed: false,\n keywordMatches: [],\n evaluationParameters: {\n approach: EvaluationApproach.SEMANTIC,\n threshold: DEFAULT_SEMANTIC_PASS_SCORE,\n },\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n timestamp: new Date().toISOString(),\n };\n }\n }\n}\n"]}

package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js ADDED Viewed

@@ -0,0 +1,56 @@
+import { splitIntoWords } from "./text-utils";
+import { cosineSimilarity } from "./similarity-utils";
+import { EvaluationApproach } from "../../constants";
+/**
+ * Evaluates whether each keyword is semantically present in the response text.
+ * Uses embeddings and cosine similarity instead of direct string matching.
+ */
+export async function evaluateKeywordsSemantically(extractor, response, keywords, threshold) {
+    if (keywords.length === 0)
+        return [];
+    const words = splitIntoWords(response);
+    // Generate embeddings for both response words and keywords in parallel
+    const [wordsEmbeddings, keywordsEmbeddings] = await Promise.all([
+        Promise.all(words.map(async (word) => ({
+            word,
+            emb: await extractor(word, { pooling: 'mean', normalize: true }),
+        }))),
+        Promise.all(keywords.map(async (keyword) => ({
+            keyword,
+            emb: await extractor(keyword, { pooling: 'mean', normalize: true }),
+        }))),
+    ]);
+    // For each keyword, find the most semantically similar word in the response
+    const matches = keywordsEmbeddings.map(({ keyword, emb: keywordEmb }) => {
+        let bestSimilarity = 0;
+        try {
+            for (const { emb: wordEmb } of wordsEmbeddings) {
+                const similarity = cosineSimilarity(Array.from(keywordEmb.data), Array.from(wordEmb.data));
+                if (similarity > bestSimilarity)
+                    bestSimilarity = similarity;
+            }
+            // Consider the keyword "found" if similarity exceeds the threshold
+            return {
+                keyword,
+                found: bestSimilarity >= threshold,
+                evaluationApproachResult: {
+                    score: bestSimilarity,
+                    approachUsed: EvaluationApproach.SEMANTIC,
+                },
+            };
+        }
+        catch (err) {
+            console.error(`Error evaluating "${keyword}":`, err);
+            return {
+                keyword,
+                found: false,
+                evaluationApproachResult: {
+                    score: 0,
+                    approachUsed: EvaluationApproach.SEMANTIC,
+                },
+            };
+        }
+    });
+    return matches;
+}
+//# sourceMappingURL=evaluate-keywords.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"evaluate-keywords.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/evaluate-keywords.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AAErD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,SAAoC,EACpC,QAAgB,EAChB,QAAkB,EAClB,SAAiB;IAEjB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAEvC,uEAAuE;IACvE,MAAM,CAAC,eAAe,EAAE,kBAAkB,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC9D,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CAAC,KAAK,EAAC,IAAI,EAAC,EAAE,CAAC,CAAC;YACvB,IAAI;YACJ,GAAG,EAAE,MAAM,SAAS,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;SACjE,CAAC,CAAC,CACJ;QAED,OAAO,CAAC,GAAG,CACT,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAC,OAAO,EAAC,EAAE,CAAC,CAAC;YAC7B,OAAO;YACP,GAAG,EAAE,MAAM,SAAS,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;SACpE,CAAC,CAAC,CACJ;KACF,CAAC,CAAC;IAEH,4EAA4E;IAC5E,MAAM,OAAO,GAAmB,kBAAkB,CAAC,GAAG,CACpD,CAAC,EAAE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE;QAC/B,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,IAAI,CAAC;YACH,KAAK,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,eAAe,EAAE,CAAC;gBAC/C,MAAM,UAAU,GAAG,gBAAgB,CACjC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CACzB,CAAC;gBACF,IAAI,UAAU,GAAG,cAAc;oBAAE,cAAc,GAAG,UAAU,CAAC;YAC/D,CAAC;YAED,mEAAmE;YACnE,OAAO;gBACL,OAAO;gBACP,KAAK,EAAE,cAAc,IAAI,SAAS;gBAClC,wBAAwB,EAAE;oBACxB,KAAK,EAAE,cAAc;oBACrB,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,qBAAqB,OAAO,IAAI,EAAE,GAAG,CAAC,CAAC;YACrD,OAAO;gBACL,OAAO;gBACP,KAAK,EAAE,KAAK;gBACZ,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;aACF,CAAC;QACJ,CAAC;IACH,CAAC,CACF,CAAC;IAEF,OAAO,OAAO,CAAC;AACjB,CAAC","sourcesContent":["import { KeywordMatch } from '../../types';\nimport { splitIntoWords } from './text-utils';\nimport { cosineSimilarity } from './similarity-utils';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { EvaluationApproach } from '../../constants';\n\n/**\n * Evaluates whether each keyword is semantically present in the response text.\n * Uses embeddings and cosine similarity instead of direct string matching.\n */\nexport async function evaluateKeywordsSemantically(\n extractor: FeatureExtractionPipeline,\n response: string,\n keywords: string[],\n threshold: number,\n): Promise<KeywordMatch[]> {\n if (keywords.length === 0) return [];\n\n const words = splitIntoWords(response);\n\n // Generate embeddings for both response words and keywords in parallel\n const [wordsEmbeddings, keywordsEmbeddings] = await Promise.all([\n Promise.all(\n words.map(async word => ({\n word,\n emb: await extractor(word, { pooling: 'mean', normalize: true }),\n })),\n ),\n\n Promise.all(\n keywords.map(async keyword => ({\n keyword,\n emb: await extractor(keyword, { pooling: 'mean', normalize: true }),\n })),\n ),\n ]);\n\n // For each keyword, find the most semantically similar word in the response\n const matches: KeywordMatch[] = keywordsEmbeddings.map(\n ({ keyword, emb: keywordEmb }) => {\n let bestSimilarity = 0;\n\n try {\n for (const { emb: wordEmb } of wordsEmbeddings) {\n const similarity = cosineSimilarity(\n Array.from(keywordEmb.data),\n Array.from(wordEmb.data),\n );\n if (similarity > bestSimilarity) bestSimilarity = similarity;\n }\n\n // Consider the keyword \"found\" if similarity exceeds the threshold\n return {\n keyword,\n found: bestSimilarity >= threshold,\n evaluationApproachResult: {\n score: bestSimilarity,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n };\n } catch (err) {\n console.error(`Error evaluating \"${keyword}\":`, err);\n return {\n keyword,\n found: false,\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n };\n }\n },\n );\n\n return matches;\n}"]}

package/dist/collection/lib/evaluation/evaluators/semantic/index.js ADDED Viewed

@@ -0,0 +1,7 @@
+import { SemanticEvaluator } from "./SemanticEvaluator";
+const semanticEvaluator = new SemanticEvaluator();
+export async function performSemanticEvaluation(request) {
+    await semanticEvaluator.initialize();
+    return semanticEvaluator.performEvaluation(request);
+}
+//# sourceMappingURL=index.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAGxD,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,EAAE,CAAC;AAElD,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAA0B;IAE1B,MAAM,iBAAiB,CAAC,UAAU,EAAE,CAAC;IACrC,OAAO,iBAAiB,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;AACtD,CAAC","sourcesContent":["import { SemanticEvaluator } from './SemanticEvaluator';\nimport { EvaluationRequest, EvaluationResult } from '../../types';\n\nconst semanticEvaluator = new SemanticEvaluator();\n\nexport async function performSemanticEvaluation(\n request: EvaluationRequest,\n): Promise<EvaluationResult> {\n await semanticEvaluator.initialize();\n return semanticEvaluator.performEvaluation(request);\n}\n"]}

package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js ADDED Viewed

@@ -0,0 +1,19 @@
+import { env, pipeline } from "@xenova/transformers";
+// Force remote loads so Vite dev server does not serve index.html in place of model metadata
+// TODO: LLM-52 Revisit this workaround
+env.useBrowserCache = false;
+env.allowLocalModels = false;
+// Loads a semantic feature extraction model to generate embeddings
+export async function loadSemanticModel() {
+    try {
+        const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
+            quantized: true, // use quantized model to reduce memory usage
+        });
+        return extractor;
+    }
+    catch (error) {
+        console.error('Failed to load semantic evaluation model:', error);
+        throw error;
+    }
+}
+//# sourceMappingURL=model-loader.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"model-loader.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/model-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,6FAA6F;AAC7F,uCAAuC;AACvC,GAAG,CAAC,eAAe,GAAG,KAAK,CAAC;AAC5B,GAAG,CAAC,gBAAgB,GAAG,KAAK,CAAC;AAE7B,mEAAmE;AACnE,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,QAAQ,CAC9B,oBAAoB,EACpB,yBAAyB,EACzB;YACE,SAAS,EAAE,IAAI,EAAE,6CAA6C;SAC/D,CACF,CAAC;QACF,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;QAClE,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC","sourcesContent":["import { env, pipeline } from '@xenova/transformers';\n\n// Force remote loads so Vite dev server does not serve index.html in place of model metadata\n// TODO: LLM-52 Revisit this workaround\nenv.useBrowserCache = false;\nenv.allowLocalModels = false;\n\n// Loads a semantic feature extraction model to generate embeddings\nexport async function loadSemanticModel() {\n try {\n const extractor = await pipeline(\n 'feature-extraction',\n 'Xenova/all-MiniLM-L6-v2',\n {\n quantized: true, // use quantized model to reduce memory usage\n },\n );\n return extractor;\n } catch (error) {\n console.error('Failed to load semantic evaluation model:', error);\n throw error;\n }\n}\n"]}

package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js ADDED Viewed

@@ -0,0 +1,16 @@
+// Computes cosine similarity between two numeric vectors.
+// Returns a value between -1 and 1 indicating similarity.(1 means identical, 0 means completely different and negative values mean opposite directions)
+export function cosineSimilarity(vecA, vecB) {
+    if (vecA.length !== vecB.length)
+        throw new Error('Vectors must have the same length');
+    let dot = 0, normA = 0, normB = 0;
+    for (let i = 0; i < vecA.length; i++) {
+        dot += vecA[i] * vecB[i];
+        normA += vecA[i] ** 2;
+        normB += vecB[i] ** 2;
+    }
+    if (normA === 0 || normB === 0)
+        return 0;
+    return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
+//# sourceMappingURL=similarity-utils.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"similarity-utils.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/similarity-utils.ts"],"names":[],"mappings":"AAAA,0DAA0D;AAC1D,wJAAwJ;AACxJ,MAAM,UAAU,gBAAgB,CAAC,IAAc,EAAE,IAAc;IAC7D,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM;QAC7B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IAEvD,IAAI,GAAG,GAAG,CAAC,EACT,KAAK,GAAG,CAAC,EACT,KAAK,GAAG,CAAC,CAAC;IAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACzB,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACxB,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC","sourcesContent":["// Computes cosine similarity between two numeric vectors.\n// Returns a value between -1 and 1 indicating similarity.(1 means identical, 0 means completely different and negative values mean opposite directions)\nexport function cosineSimilarity(vecA: number[], vecB: number[]): number {\n if (vecA.length !== vecB.length)\n throw new Error('Vectors must have the same length');\n\n let dot = 0,\n normA = 0,\n normB = 0;\n\n for (let i = 0; i < vecA.length; i++) {\n dot += vecA[i] * vecB[i];\n normA += vecA[i] ** 2;\n normB += vecB[i] ** 2;\n }\n if (normA === 0 || normB === 0) return 0;\n return dot / (Math.sqrt(normA) * Math.sqrt(normB));\n}\n"]}