npm - llm-testrunner-components - Versions diffs - 1.0.5 → 1.0.7 - Mend

llm-testrunner-components 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js ADDED Viewed

@@ -0,0 +1,56 @@
+import { splitIntoWords } from "./text-utils";
+import { cosineSimilarity } from "./similarity-utils";
+import { EvaluationApproach } from "../../constants/evaluation-approach";
+/**
+ * Evaluates whether each keyword is semantically present in the response text.
+ * Uses embeddings and cosine similarity instead of direct string matching.
+ */
+export async function evaluateKeywordsSemantically(extractor, response, keywords, threshold) {
+    if (keywords.length === 0)
+        return [];
+    const words = splitIntoWords(response);
+    // Generate embeddings for both response words and keywords in parallel
+    const [wordsEmbeddings, keywordsEmbeddings] = await Promise.all([
+        Promise.all(words.map(async (word) => ({
+            word,
+            emb: await extractor(word, { pooling: 'mean', normalize: true }),
+        }))),
+        Promise.all(keywords.map(async (keyword) => ({
+            keyword,
+            emb: await extractor(keyword, { pooling: 'mean', normalize: true }),
+        }))),
+    ]);
+    // For each keyword, find the most semantically similar word in the response
+    const matches = keywordsEmbeddings.map(({ keyword, emb: keywordEmb }) => {
+        let bestSimilarity = 0;
+        try {
+            for (const { emb: wordEmb } of wordsEmbeddings) {
+                const similarity = cosineSimilarity(Array.from(keywordEmb.data), Array.from(wordEmb.data));
+                if (similarity > bestSimilarity)
+                    bestSimilarity = similarity;
+            }
+            // Consider the keyword "found" if similarity exceeds the threshold
+            return {
+                keyword,
+                found: bestSimilarity >= threshold,
+                evaluationApproachResult: {
+                    score: bestSimilarity,
+                    approachUsed: EvaluationApproach.SEMANTIC,
+                },
+            };
+        }
+        catch (err) {
+            console.error(`Error evaluating "${keyword}":`, err);
+            return {
+                keyword,
+                found: false,
+                evaluationApproachResult: {
+                    score: 0,
+                    approachUsed: EvaluationApproach.SEMANTIC,
+                },
+            };
+        }
+    });
+    return matches;
+}
+//# sourceMappingURL=evaluate-keywords.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"evaluate-keywords.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/evaluate-keywords.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,EAAE,kBAAkB,EAAE,MAAM,qCAAqC,CAAC;AAEzE;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAAC,SAAoC,EAAE,QAAgB,EAAE,QAAkB,EAAE,SAAiB;IAC9I,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErC,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAEvC,uEAAuE;IACvE,MAAM,CAAC,eAAe,EAAE,kBAAkB,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC9D,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CAAC,KAAK,EAAC,IAAI,EAAC,EAAE,CAAC,CAAC;YACvB,IAAI;YACJ,GAAG,EAAE,MAAM,SAAS,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;SACjE,CAAC,CAAC,CACJ;QAED,OAAO,CAAC,GAAG,CACT,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAC,OAAO,EAAC,EAAE,CAAC,CAAC;YAC7B,OAAO;YACP,GAAG,EAAE,MAAM,SAAS,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;SACpE,CAAC,CAAC,CACJ;KACF,CAAC,CAAC;IAEH,4EAA4E;IAC5E,MAAM,OAAO,GAAmB,kBAAkB,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE;QACtF,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,IAAI,CAAC;YACH,KAAK,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,IAAI,eAAe,EAAE,CAAC;gBAC/C,MAAM,UAAU,GAAG,gBAAgB,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;gBAC3F,IAAI,UAAU,GAAG,cAAc;oBAAE,cAAc,GAAG,UAAU,CAAC;YAC/D,CAAC;YAED,mEAAmE;YACnE,OAAO;gBACL,OAAO;gBACP,KAAK,EAAE,cAAc,IAAI,SAAS;gBAClC,wBAAwB,EAAE;oBACxB,KAAK,EAAE,cAAc;oBACrB,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,qBAAqB,OAAO,IAAI,EAAE,GAAG,CAAC,CAAC;YACrD,OAAO;gBACL,OAAO;gBACP,KAAK,EAAE,KAAK;gBACZ,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,QAAQ;iBAC1C;aACF,CAAC;QACJ,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC","sourcesContent":["import { KeywordMatch } from '../../types';\nimport { splitIntoWords } from './text-utils';\nimport { cosineSimilarity } from './similarity-utils';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { EvaluationApproach } from '../../constants/evaluation-approach';\n\n/**\n * Evaluates whether each keyword is semantically present in the response text.\n * Uses embeddings and cosine similarity instead of direct string matching.\n */\nexport async function evaluateKeywordsSemantically(extractor: FeatureExtractionPipeline, response: string, keywords: string[], threshold: number): Promise<KeywordMatch[]> {\n if (keywords.length === 0) return [];\n\n const words = splitIntoWords(response);\n\n // Generate embeddings for both response words and keywords in parallel\n const [wordsEmbeddings, keywordsEmbeddings] = await Promise.all([\n Promise.all(\n words.map(async word => ({\n word,\n emb: await extractor(word, { pooling: 'mean', normalize: true }),\n })),\n ),\n\n Promise.all(\n keywords.map(async keyword => ({\n keyword,\n emb: await extractor(keyword, { pooling: 'mean', normalize: true }),\n })),\n ),\n ]);\n\n // For each keyword, find the most semantically similar word in the response\n const matches: KeywordMatch[] = keywordsEmbeddings.map(({ keyword, emb: keywordEmb }) => {\n let bestSimilarity = 0;\n\n try {\n for (const { emb: wordEmb } of wordsEmbeddings) {\n const similarity = cosineSimilarity(Array.from(keywordEmb.data), Array.from(wordEmb.data));\n if (similarity > bestSimilarity) bestSimilarity = similarity;\n }\n\n // Consider the keyword \"found\" if similarity exceeds the threshold\n return {\n keyword,\n found: bestSimilarity >= threshold,\n evaluationApproachResult: {\n score: bestSimilarity,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n };\n } catch (err) {\n console.error(`Error evaluating \"${keyword}\":`, err);\n return {\n keyword,\n found: false,\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.SEMANTIC,\n },\n };\n }\n });\n\n return matches;\n}\n"]}

package/dist/collection/lib/evaluation/evaluators/semantic/index.js ADDED Viewed

@@ -0,0 +1,7 @@
+import { SemanticEvaluator } from "./SemanticEvaluator";
+const semanticEvaluator = new SemanticEvaluator();
+export async function performSemanticEvaluation(request) {
+    await semanticEvaluator.initialize();
+    return semanticEvaluator.performEvaluation(request);
+}
+//# sourceMappingURL=index.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAGxD,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,EAAE,CAAC;AAElD,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAA0B;IAE1B,MAAM,iBAAiB,CAAC,UAAU,EAAE,CAAC;IACrC,OAAO,iBAAiB,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;AACtD,CAAC","sourcesContent":["import { SemanticEvaluator } from './SemanticEvaluator';\nimport { EvaluationRequest, EvaluationResult } from '../../types';\n\nconst semanticEvaluator = new SemanticEvaluator();\n\nexport async function performSemanticEvaluation(\n request: EvaluationRequest\n): Promise<EvaluationResult> {\n await semanticEvaluator.initialize();\n return semanticEvaluator.performEvaluation(request);\n}"]}

package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js ADDED Viewed

@@ -0,0 +1,15 @@
+import { pipeline } from "@xenova/transformers";
+// Loads a semantic feature extraction model to generate embeddings
+export async function loadSemanticModel() {
+    try {
+        const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
+            quantized: true, // use quantized model to reduce memory usage
+        });
+        return extractor;
+    }
+    catch (error) {
+        console.error('Failed to load semantic evaluation model:', error);
+        throw error;
+    }
+}
+//# sourceMappingURL=model-loader.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"model-loader.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/model-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAEhD,mEAAmE;AACnE,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,yBAAyB,EAAE;YAChF,SAAS,EAAE,IAAI,EAAE,6CAA6C;SAC/D,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,2CAA2C,EAAE,KAAK,CAAC,CAAC;QAClE,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC","sourcesContent":["import { pipeline } from '@xenova/transformers';\n\n// Loads a semantic feature extraction model to generate embeddings\nexport async function loadSemanticModel() {\n try {\n const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {\n quantized: true, // use quantized model to reduce memory usage\n });\n return extractor;\n } catch (error) {\n console.error('Failed to load semantic evaluation model:', error);\n throw error;\n }\n}"]}

package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js ADDED Viewed

@@ -0,0 +1,16 @@
+// Computes cosine similarity between two numeric vectors.
+// Returns a value between -1 and 1 indicating similarity.(1 means identical, 0 means completely different and negative values mean opposite directions)
+export function cosineSimilarity(vecA, vecB) {
+    if (vecA.length !== vecB.length)
+        throw new Error('Vectors must have the same length');
+    let dot = 0, normA = 0, normB = 0;
+    for (let i = 0; i < vecA.length; i++) {
+        dot += vecA[i] * vecB[i];
+        normA += vecA[i] ** 2;
+        normB += vecB[i] ** 2;
+    }
+    if (normA === 0 || normB === 0)
+        return 0;
+    return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
+//# sourceMappingURL=similarity-utils.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"similarity-utils.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/similarity-utils.ts"],"names":[],"mappings":"AAAA,0DAA0D;AAC1D,wJAAwJ;AACxJ,MAAM,UAAU,gBAAgB,CAAC,IAAc,EAAE,IAAc;IAC3D,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IAEtF,IAAI,GAAG,GAAG,CAAC,EACP,KAAK,GAAG,CAAC,EACT,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACzB,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACtB,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACvD,CAAC","sourcesContent":["// Computes cosine similarity between two numeric vectors.\n// Returns a value between -1 and 1 indicating similarity.(1 means identical, 0 means completely different and negative values mean opposite directions)\nexport function cosineSimilarity(vecA: number[], vecB: number[]): number {\n if (vecA.length !== vecB.length) throw new Error('Vectors must have the same length');\n\n let dot = 0,\n normA = 0,\n normB = 0;\n\n for (let i = 0; i < vecA.length; i++) {\n dot += vecA[i] * vecB[i];\n normA += vecA[i] ** 2;\n normB += vecB[i] ** 2;\n }\n if (normA === 0 || normB === 0) return 0; \n return dot / (Math.sqrt(normA) * Math.sqrt(normB));\n}"]}

package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js ADDED Viewed

@@ -0,0 +1,65 @@
+import { jest, describe, it, expect } from "@jest/globals";
+import { evaluateKeywordsSemantically } from "../evaluate-keywords";
+import { DEFAULT_SEMANTIC_PASS_SCORE as DEFAULT_SEMANTIC_THRESHOLD } from "../../../constant";
+describe('evaluateKeywordsSemantically (only extractor mocked)', () => {
+    it('should return empty array when no keywords provided', async () => {
+        const mockExtractor = jest.fn();
+        const result = await evaluateKeywordsSemantically(mockExtractor, 'some response', [], DEFAULT_SEMANTIC_THRESHOLD);
+        expect(result).toEqual([]);
+    });
+    it('should return matches above threshold', async () => {
+        const response = 'The quick brown fox';
+        const keywords = ['fast', 'animal'];
+        const mockExtractor = jest.fn();
+        mockExtractor.mockImplementation(async (text) => {
+            const data = new Float32Array(text.length).fill(1);
+            return { data };
+        });
+        const cosSpy = jest.spyOn(require('../similarity-utils'), 'cosineSimilarity');
+        cosSpy
+            .mockReturnValueOnce(0.91) // these are the similarity scores for the keyword 'fast' in the response.
+            .mockReturnValueOnce(0.4)
+            .mockReturnValueOnce(0.3)
+            .mockReturnValueOnce(0.85)
+            .mockReturnValueOnce(0.6) // these are the similarity scores for the keyword 'animal' in the response.
+            .mockReturnValueOnce(0.5)
+            .mockReturnValueOnce(0.7)
+            .mockReturnValueOnce(0.8);
+        const result = await evaluateKeywordsSemantically(mockExtractor, response, keywords, DEFAULT_SEMANTIC_THRESHOLD);
+        expect(result).toHaveLength(2);
+        expect(result).toEqual([
+            {
+                keyword: 'fast',
+                found: true,
+                evaluationApproachResult: { score: 0.91, approachUsed: 'semantic' }
+            },
+            {
+                keyword: 'animal',
+                found: true,
+                evaluationApproachResult: { score: 0.8, approachUsed: 'semantic' }
+            }
+        ]);
+    });
+    it('should mark below-threshold as not found', async () => {
+        const response = 'A sunny day';
+        const keywords = ['rain'];
+        const mockExtractor = jest.fn();
+        mockExtractor.mockImplementation(async (text) => {
+            return { data: new Float32Array(text.length).fill(1) };
+        });
+        const cosSpy = jest.spyOn(require('../similarity-utils'), 'cosineSimilarity');
+        cosSpy
+            .mockReturnValueOnce(0.5) // this is the similarity score for the keyword 'rain' in the response.
+            .mockReturnValueOnce(0.49)
+            .mockReturnValueOnce(0.4);
+        const result = await evaluateKeywordsSemantically(mockExtractor, response, keywords, DEFAULT_SEMANTIC_THRESHOLD);
+        expect(result).toEqual([
+            {
+                keyword: 'rain',
+                found: false,
+                evaluationApproachResult: { score: 0.5, approachUsed: 'semantic' }
+            }
+        ]);
+    });
+});
+//# sourceMappingURL=evaluate-keywords.test.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"evaluate-keywords.test.js","sourceRoot":"","sources":["../../../../../../src/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAC3D,OAAO,EAAE,4BAA4B,EAAE,MAAM,sBAAsB,CAAC;AAEpE,OAAO,EAAE,2BAA2B,IAAI,0BAA0B,EAAE,MAAM,mBAAmB,CAAC;AAG9F,QAAQ,CAAC,sDAAsD,EAAE,GAAG,EAAE;IAEpE,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,aAAa,GAAI,IAAI,CAAC,EAAE,EAAe,CAAC;QAC9C,MAAM,MAAM,GAAG,MAAM,4BAA4B,CAC/C,aAAqD,EACrD,eAAe,EACf,EAAE,EACF,0BAA0B,CAC3B,CAAC;QAEF,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,QAAQ,GAAG,qBAAqB,CAAC;QACvC,MAAM,QAAQ,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QACpC,MAAM,aAAa,GAAI,IAAI,CAAC,EAAE,EAAe,CAAC;QAC9C,aAAa,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;YACtD,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACnD,OAAO,EAAE,IAAI,EAAE,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,qBAAqB,CAAC,EAAE,kBAAkB,CAAC,CAAC;QAC9E,MAAM;aACH,mBAAmB,CAAC,IAAI,CAAC,CAAC,0EAA0E;aACpG,mBAAmB,CAAC,GAAG,CAAC;aACxB,mBAAmB,CAAC,GAAG,CAAC;aACxB,mBAAmB,CAAC,IAAI,CAAC;aACzB,mBAAmB,CAAC,GAAG,CAAC,CAAC,4EAA4E;aACrG,mBAAmB,CAAC,GAAG,CAAC;aACxB,mBAAmB,CAAC,GAAG,CAAC;aACxB,mBAAmB,CAAC,GAAG,CAAC,CAAC;QAE5B,MAAM,MAAM,GAAG,MAAM,4BAA4B,CAC/C,aAAqD,EACrD,QAAQ,EACR,QAAQ,EACR,0BAA0B,CAC3B,CAAC;QAEF,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB;gBACE,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,IAAI;gBACX,wBAAwB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,EAAE,UAAU,EAAE;aACpE;YACD;gBACE,OAAO,EAAE,QAAQ;gBACjB,KAAK,EAAE,IAAI;gBACX,wBAAwB,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,YAAY,EAAE,UAAU,EAAE;aACnE;SACF,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,QAAQ,GAAG,aAAa,CAAC;QAC/B,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1B,MAAM,aAAa,GAAI,IAAI,CAAC,EAAE,EAAe,CAAC;QAC9C,aAAa,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;YACtD,OAAO,EAAE,IAAI,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACzD,CAAC,CAAC,CAAC;QAGH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,qBAAqB,CAAC,EAAE,kBAAkB,CAAC,CAAC;QAC9E,MAAM;aACH,mBAAmB,CAAC,GAAG,CAAC,CAAC,uEAAuE;aAChG,mBAAmB,CAAC,IAAI,CAAC;aACzB,mBAAmB,CAAC,GAAG,CAAC,CAAC;QAE5B,MAAM,MAAM,GAAG,MAAM,4BAA4B,CAC/C,aAAqD,EACrD,QAAQ,EACR,QAAQ,EACR,0BAA0B,CAC3B,CAAC;QAEF,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB;gBACE,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,KAAK;gBACZ,wBAAwB,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,YAAY,EAAE,UAAU,EAAE;aACnE;SACF,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC","sourcesContent":["import { jest, describe, it, expect } from '@jest/globals';\nimport { evaluateKeywordsSemantically } from '../evaluate-keywords';\nimport { FeatureExtractionPipeline } from '@xenova/transformers';\nimport { DEFAULT_SEMANTIC_PASS_SCORE as DEFAULT_SEMANTIC_THRESHOLD } from '../../../constant';\n\n\ndescribe('evaluateKeywordsSemantically (only extractor mocked)', () => {\n\n it('should return empty array when no keywords provided', async () => {\n const mockExtractor = jest.fn() as jest.Mock;\n const result = await evaluateKeywordsSemantically(\n mockExtractor as unknown as FeatureExtractionPipeline,\n 'some response',\n [],\n DEFAULT_SEMANTIC_THRESHOLD\n );\n\n expect(result).toEqual([]);\n });\n\n it('should return matches above threshold', async () => {\n const response = 'The quick brown fox';\n const keywords = ['fast', 'animal'];\n const mockExtractor = jest.fn() as jest.Mock;\n mockExtractor.mockImplementation(async (text: string) => {\n const data = new Float32Array(text.length).fill(1);\n return { data };\n });\n\n const cosSpy = jest.spyOn(require('../similarity-utils'), 'cosineSimilarity');\n cosSpy\n .mockReturnValueOnce(0.91) // these are the similarity scores for the keyword 'fast' in the response.\n .mockReturnValueOnce(0.4) \n .mockReturnValueOnce(0.3) \n .mockReturnValueOnce(0.85)\n .mockReturnValueOnce(0.6) // these are the similarity scores for the keyword 'animal' in the response.\n .mockReturnValueOnce(0.5)\n .mockReturnValueOnce(0.7)\n .mockReturnValueOnce(0.8);\n\n const result = await evaluateKeywordsSemantically(\n mockExtractor as unknown as FeatureExtractionPipeline,\n response,\n keywords,\n DEFAULT_SEMANTIC_THRESHOLD\n );\n\n expect(result).toHaveLength(2);\n expect(result).toEqual([\n {\n keyword: 'fast',\n found: true,\n evaluationApproachResult: { score: 0.91, approachUsed: 'semantic' }\n },\n {\n keyword: 'animal',\n found: true,\n evaluationApproachResult: { score: 0.8, approachUsed: 'semantic' }\n }\n ]);\n });\n\n it('should mark below-threshold as not found', async () => {\n const response = 'A sunny day';\n const keywords = ['rain'];\n const mockExtractor = jest.fn() as jest.Mock;\n mockExtractor.mockImplementation(async (text: string) => {\n return { data: new Float32Array(text.length).fill(1) };\n });\n\n \n const cosSpy = jest.spyOn(require('../similarity-utils'), 'cosineSimilarity');\n cosSpy\n .mockReturnValueOnce(0.5) // this is the similarity score for the keyword 'rain' in the response.\n .mockReturnValueOnce(0.49)\n .mockReturnValueOnce(0.4); \n\n const result = await evaluateKeywordsSemantically(\n mockExtractor as unknown as FeatureExtractionPipeline,\n response,\n keywords,\n DEFAULT_SEMANTIC_THRESHOLD\n );\n \n expect(result).toEqual([\n {\n keyword: 'rain',\n found: false,\n evaluationApproachResult: { score: 0.5, approachUsed: 'semantic' }\n }\n ]);\n });\n});"]}

package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js ADDED Viewed

@@ -0,0 +1,5 @@
+// Splits the response into unique lowercase words.
+export function splitIntoWords(text) {
+    return [...new Set(text.toLowerCase().split(/[^\w]+/))];
+}
+//# sourceMappingURL=text-utils.js.map

package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"text-utils.js","sourceRoot":"","sources":["../../../../../src/lib/evaluation/evaluators/semantic/text-utils.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,MAAM,UAAU,cAAc,CAAC,IAAY;IACvC,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;AAC5D,CAAC","sourcesContent":["// Splits the response into unique lowercase words.\nexport function splitIntoWords(text: string): string[] {\n return [...new Set(text.toLowerCase().split(/[^\\w]+/))];\n}"]}

package/dist/collection/lib/evaluation/rouge1-evaluator.test.js ADDED Viewed

@@ -0,0 +1,118 @@
+import { describe, it, expect } from "@jest/globals";
+import { DEFAULT_ROUGE_PASS_SCORE } from "./constant";
+// Using integration tests with actual js-rouge library (no mocks).
+// This approach tests the real ROUGE-1 scoring behavior rather than just orchestration logic.
+import { performRouge1Evaluation } from "./evaluators/rouge1-evaluator";
+const mockRequest = {
+    testCaseId: 'test-000',
+    question: 'What is your name?',
+    actualResponse: 'I am a large language model',
+    expectedKeywords: ['model', 'language'],
+    expectedSourceLinks: [],
+    evaluationParameters: {
+        approach: 'rouge',
+        threshold: 0.5,
+    }
+};
+const mockRequestNoThreshold = {
+    ...mockRequest,
+    evaluationParameters: {
+        approach: 'rouge',
+        threshold: undefined,
+    }
+};
+describe('performRouge1Evaluation', () => {
+    describe('Basic functionality', () => {
+        it('should pass when response contains exact keyword matches', async () => {
+            const request = {
+                ...mockRequest,
+                actualResponse: 'This is a language model system',
+                expectedKeywords: ['language', 'model'],
+            };
+            const result = await performRouge1Evaluation(request);
+            expect(result.passed).toBe(true);
+            expect(result.keywordMatches.length).toBe(2);
+            expect(result.keywordMatches[0].found).toBe(true);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeGreaterThan(0.5);
+            expect(result.keywordMatches[1].found).toBe(true);
+            expect(result.keywordMatches[1].evaluationApproachResult.score).toBeGreaterThan(0.5);
+        });
+        it('should fail when keywords are not sufficiently present', async () => {
+            const request = {
+                ...mockRequest,
+                actualResponse: 'This is completely unrelated content about cooking',
+                expectedKeywords: ['machine learning', 'artificial intelligence'],
+            };
+            const result = await performRouge1Evaluation(request);
+            expect(result.passed).toBe(false);
+            expect(result.keywordMatches[0].found).toBe(false);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeLessThan(0.5);
+            expect(result.keywordMatches[1].found).toBe(false);
+            expect(result.keywordMatches[1].evaluationApproachResult.score).toBeLessThan(0.5);
+        });
+        it('should partially pass when only some keywords meet threshold', async () => {
+            const request = {
+                ...mockRequest,
+                actualResponse: 'Machine learning is fascinating',
+                expectedKeywords: ['machine learning', 'database systems'],
+            };
+            const result = await performRouge1Evaluation(request);
+            expect(result.passed).toBe(false);
+            expect(result.keywordMatches[0].found).toBe(true);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeGreaterThanOrEqual(0.5);
+            expect(result.keywordMatches[1].found).toBe(false);
+            expect(result.keywordMatches[1].evaluationApproachResult.score).toBeLessThan(0.5);
+        });
+    });
+    describe('Threshold handling', () => {
+        it('should use default threshold when not provided', async () => {
+            const result = await performRouge1Evaluation(mockRequestNoThreshold);
+            expect(result.evaluationParameters.threshold).toBe(DEFAULT_ROUGE_PASS_SCORE);
+        });
+        it('should pass all keywords with threshold 0.0', async () => {
+            const request = {
+                ...mockRequest,
+                actualResponse: 'completely unrelated text about cooking',
+                expectedKeywords: ['quantum physics', 'mathematics'],
+                evaluationParameters: {
+                    approach: 'rouge',
+                    threshold: 0.0,
+                },
+            };
+            const result = await performRouge1Evaluation(request);
+            expect(result.passed).toBe(true);
+            expect(result.keywordMatches.every(m => m.found)).toBe(true);
+            expect(result.evaluationParameters.threshold).toBe(0.0);
+        });
+        it('should fail when threshold is 1.0 and match is not perfect', async () => {
+            const request = {
+                ...mockRequest,
+                actualResponse: 'This is about learning concepts',
+                expectedKeywords: ['machine learning'],
+                evaluationParameters: {
+                    approach: 'rouge',
+                    threshold: 1.0,
+                },
+            };
+            const result = await performRouge1Evaluation(request);
+            expect(result.evaluationParameters.threshold).toBe(1.0);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBeLessThan(1.0);
+        });
+    });
+    describe('Edge cases', () => {
+        it('should handle empty actualResponse', async () => {
+            const request = { ...mockRequest, actualResponse: '' };
+            const result = await performRouge1Evaluation(request);
+            expect(result.passed).toBe(false);
+            expect(result.keywordMatches[0].evaluationApproachResult.score).toBe(0);
+            expect(result.keywordMatches[1].evaluationApproachResult.score).toBe(0);
+        });
+        it('should handle empty expectedKeywords array', async () => {
+            const request = { ...mockRequest, expectedKeywords: [] };
+            const result = await performRouge1Evaluation(request);
+            expect(result.passed).toBe(true);
+            expect(result.keywordMatches.length).toBe(0);
+        });
+    });
+});
+//# sourceMappingURL=rouge1-evaluator.test.js.map

package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"rouge1-evaluator.test.js","sourceRoot":"","sources":["../../../src/lib/evaluation/rouge1-evaluator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAC,MAAM,eAAe,CAAC;AAEpD,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AACtD,mEAAmE;AACnE,8FAA8F;AAC9F,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AAExE,MAAM,WAAW,GAAsB;IACnC,UAAU,EAAE,UAAU;IACtB,QAAQ,EAAE,oBAAoB;IAC9B,cAAc,EAAE,6BAA6B;IAC7C,gBAAgB,EAAE,CAAC,OAAO,EAAE,UAAU,CAAC;IACvC,mBAAmB,EAAE,EAAE;IACvB,oBAAoB,EAAE;QAClB,QAAQ,EAAE,OAAO;QACjB,SAAS,EAAE,GAAG;KACjB;CACJ,CAAC;AAEF,MAAM,sBAAsB,GAAsB;IAC9C,GAAG,WAAW;IACd,oBAAoB,EAAE;QAClB,QAAQ,EAAE,OAAO;QACjB,SAAS,EAAE,SAAS;KACvB;CACJ,CAAC;AAEF,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IAErC,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,OAAO,GAAsB;gBAC/B,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,gBAAgB,EAAE,CAAC,UAAU,EAAE,OAAO,CAAC;aAC1C,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;YACrF,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACzF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;YACpE,MAAM,OAAO,GAAsB;gBAC/B,GAAG,WAAW;gBACd,cAAc,EAAE,oDAAoD;gBACpE,gBAAgB,EAAE,CAAC,kBAAkB,EAAE,yBAAyB,CAAC;aACpE,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;YAClF,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;YAC1E,MAAM,OAAO,GAAsB;gBAC/B,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,gBAAgB,EAAE,CAAC,kBAAkB,EAAE,kBAAkB,CAAC;aAC7D,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;YAC5F,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtF,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,sBAAsB,CAAC,CAAC;YAErE,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QACjF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,OAAO,GAAsB;gBAC/B,GAAG,WAAW;gBACd,cAAc,EAAE,yCAAyC;gBACzD,gBAAgB,EAAE,CAAC,iBAAiB,EAAE,aAAa,CAAC;gBACpD,oBAAoB,EAAE;oBAClB,QAAQ,EAAE,OAAO;oBACjB,SAAS,EAAE,GAAG;iBACjB;aACJ,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7D,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;YACxE,MAAM,OAAO,GAAsB;gBAC/B,GAAG,WAAW;gBACd,cAAc,EAAE,iCAAiC;gBACjD,gBAAgB,EAAE,CAAC,kBAAkB,CAAC;gBACtC,oBAAoB,EAAE;oBAClB,QAAQ,EAAE,OAAO;oBACjB,SAAS,EAAE,GAAG;iBACjB;aACJ,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QACtF,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QACxB,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAChD,MAAM,OAAO,GAAG,EAAE,GAAG,WAAW,EAAE,cAAc,EAAE,EAAE,EAAE,CAAC;YAEvD,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACxE,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,OAAO,GAAG,EAAE,GAAG,WAAW,EAAE,gBAAgB,EAAE,EAAE,EAAE,CAAC;YAEzD,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAEtD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC","sourcesContent":["import { describe, it, expect} from '@jest/globals';\nimport { EvaluationRequest } from './types';\nimport { DEFAULT_ROUGE_PASS_SCORE } from './constant';\n// Using integration tests with actual js-rouge library (no mocks).\n// This approach tests the real ROUGE-1 scoring behavior rather than just orchestration logic.\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\n\nconst mockRequest: EvaluationRequest = {\n testCaseId: 'test-000',\n question: 'What is your name?',\n actualResponse: 'I am a large language model',\n expectedKeywords: ['model', 'language'],\n expectedSourceLinks: [],\n evaluationParameters: {\n approach: 'rouge',\n threshold: 0.5,\n }\n};\n\nconst mockRequestNoThreshold: EvaluationRequest = {\n ...mockRequest,\n evaluationParameters: {\n approach: 'rouge',\n threshold: undefined, \n }\n};\n\ndescribe('performRouge1Evaluation', () => {\n\n describe('Basic functionality', () => {\n it('should pass when response contains exact keyword matches', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is a language model system',\n expectedKeywords: ['language', 'model'],\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.length).toBe(2);\n expect(result.keywordMatches[0].found).toBe(true);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBeGreaterThan(0.5);\n expect(result.keywordMatches[1].found).toBe(true);\n expect(result.keywordMatches[1].evaluationApproachResult.score).toBeGreaterThan(0.5);\n });\n\n it('should fail when keywords are not sufficiently present', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is completely unrelated content about cooking',\n expectedKeywords: ['machine learning', 'artificial intelligence'],\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].found).toBe(false);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBeLessThan(0.5);\n expect(result.keywordMatches[1].found).toBe(false);\n expect(result.keywordMatches[1].evaluationApproachResult.score).toBeLessThan(0.5);\n });\n\n it('should partially pass when only some keywords meet threshold', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'Machine learning is fascinating',\n expectedKeywords: ['machine learning', 'database systems'],\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].found).toBe(true);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBeGreaterThanOrEqual(0.5);\n expect(result.keywordMatches[1].found).toBe(false);\n expect(result.keywordMatches[1].evaluationApproachResult.score).toBeLessThan(0.5);\n });\n });\n\n describe('Threshold handling', () => {\n it('should use default threshold when not provided', async () => {\n const result = await performRouge1Evaluation(mockRequestNoThreshold);\n\n expect(result.evaluationParameters.threshold).toBe(DEFAULT_ROUGE_PASS_SCORE);\n });\n\n it('should pass all keywords with threshold 0.0', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'completely unrelated text about cooking',\n expectedKeywords: ['quantum physics', 'mathematics'],\n evaluationParameters: {\n approach: 'rouge',\n threshold: 0.0,\n },\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.every(m => m.found)).toBe(true);\n expect(result.evaluationParameters.threshold).toBe(0.0);\n });\n\n it('should fail when threshold is 1.0 and match is not perfect', async () => {\n const request: EvaluationRequest = {\n ...mockRequest,\n actualResponse: 'This is about learning concepts', \n expectedKeywords: ['machine learning'],\n evaluationParameters: {\n approach: 'rouge',\n threshold: 1.0,\n },\n };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.evaluationParameters.threshold).toBe(1.0);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBeLessThan(1.0);\n });\n });\n\n describe('Edge cases', () => {\n it('should handle empty actualResponse', async () => {\n const request = { ...mockRequest, actualResponse: '' };\n\n const result = await performRouge1Evaluation(request);\n \n expect(result.passed).toBe(false);\n expect(result.keywordMatches[0].evaluationApproachResult.score).toBe(0);\n expect(result.keywordMatches[1].evaluationApproachResult.score).toBe(0);\n });\n\n it('should handle empty expectedKeywords array', async () => {\n const request = { ...mockRequest, expectedKeywords: [] };\n\n const result = await performRouge1Evaluation(request);\n\n expect(result.passed).toBe(true);\n expect(result.keywordMatches.length).toBe(0);\n });\n });\n});"]}

package/dist/collection/lib/evaluation/types.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["~~export~~ interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedKeywords: string[];\n expectedSourceLinks: string[];\n actualResponse: string;\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n sourceLinkMatches: SourceLinkMatch[];\n timestamp?: string;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n}\n\nexport interface SourceLinkMatch {\n link: string;\n found: boolean;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\n"]}
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult\n} from '../../types/evaluation';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedKeywords: string[];\n expectedSourceLinks: string[];\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n sourceLinkMatches: SourceLinkMatch[];\n timestamp?: string;\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport interface SourceLinkMatch {\n link: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}"]}

package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js CHANGED Viewed

@@ -1,14 +1,14 @@
 export class RateLimitedFetcher {
     queue = [];
     delay; // delay in milliseconds
-    intervalId;
+    timer;
     constructor(delayMs) {
         this.delay = delayMs;
     }
     startQueue() {
-        if (this.intervalId)
+        if (this.timer)
             return;
-        this.intervalId = setInterval(() => {
+        this.timer = setInterval(() => {
             const task = this.queue.shift();
             if (task)
                 task();
@@ -26,9 +26,9 @@ export class RateLimitedFetcher {
         });
     }
     stop() {
-        if (this.intervalId) {
-            clearInterval(this.intervalId);
-            this.intervalId = undefined;
+        if (this.timer) {
+            clearInterval(this.timer);
+            this.timer = undefined;
         }
     }
     async runAll(tasks) {

package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"rate-limited-fetcher.js","sourceRoot":"","sources":["../../../src/lib/rate-limited-fetcher/rate-limited-fetcher.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,kBAAkB;IACrB,KAAK,GAAmB,EAAE,CAAC;IAC3B,KAAK,CAAS,CAAC,wBAAwB;IACvC,~~UAAU~~,~~CAAO~~;~~IAEzB~~,YAAY,OAAe;QACzB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;IACvB,CAAC;IAEO,UAAU;QAChB,IAAI,IAAI,CAAC,~~UAAU~~;YAAE,OAAO;~~QAC5B~~,IAAI,CAAC,~~UAAU~~,GAAG,WAAW,CAAC,GAAG,EAAE;~~YACjC~~,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;YACjB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,CAAC;QACH,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IACjB,CAAC;IAEM,QAAQ,CAAI,IAAsB;QACvC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE;gBACnB,IAAI,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACrC,CAAC,CAAC,CAAC;YACH,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC,CAAC,CAAC;IACL,CAAC;IAEM,IAAI;QACT,IAAI,IAAI,CAAC,~~UAAU~~,EAAE,CAAC;~~YACpB~~,aAAa,CAAC,IAAI,CAAC,~~UAAU~~,CAAC,CAAC;~~YAC/B~~,IAAI,CAAC,~~UAAU~~,GAAG,SAAS,CAAC;~~QAC9B~~,CAAC;IACH,CAAC;IAEM,KAAK,CAAC,MAAM,CAAI,KAA8B;QACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QACxD,OAAO,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC/B,CAAC;CACF","sourcesContent":["export class RateLimitedFetcher {\n private queue: (() => void)[] = [];\n private delay: number; // delay in milliseconds\n private ~~intervalId~~?: ~~any;\~~n\n constructor(delayMs: number) {\n this.delay = delayMs;\n }\n\n private startQueue() {\n if (this.~~intervalId~~) return;\n this.~~intervalId~~ = setInterval(() => {\n const task = this.queue.shift();\n if (task) task();\n if (this.queue.length === 0) {\n this.stop();\n }\n }, this.delay);\n }\n\n public schedule<T>(task: () => Promise<T>): Promise<T> {\n return new Promise((resolve, reject) => {\n this.queue.push(() => {\n task().then(resolve).catch(reject);\n });\n this.startQueue(); \n });\n }\n\n public stop() {\n if (this.~~intervalId~~) {\n clearInterval(this.~~intervalId~~);\n this.~~intervalId~~ = undefined;\n }\n }\n\n public async runAll<T>(tasks: Array<() => Promise<T>>): Promise<T[]> {\n const promises = tasks.map(task => this.schedule(task));\n return Promise.all(promises);\n }\n}\n"]}
1	+ {"version":3,"file":"rate-limited-fetcher.js","sourceRoot":"","sources":["../../../src/lib/rate-limited-fetcher/rate-limited-fetcher.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,kBAAkB;IACrB,KAAK,GAAmB,EAAE,CAAC;IAC3B,KAAK,CAAS,CAAC,wBAAwB;IACvC,KAAK,CAAkC;IAE/C,YAAY,OAAe;QACzB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;IACvB,CAAC;IAEO,UAAU;QAChB,IAAI,IAAI,CAAC,KAAK;YAAE,OAAO;QACvB,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE;YAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;YACjB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,CAAC;QACH,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IACjB,CAAC;IAEM,QAAQ,CAAI,IAAsB;QACvC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE;gBACnB,IAAI,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACrC,CAAC,CAAC,CAAC;YACH,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC,CAAC,CAAC;IACL,CAAC;IAEM,IAAI;QACT,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC1B,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC;QACzB,CAAC;IACH,CAAC;IAEM,KAAK,CAAC,MAAM,CAAI,KAA8B;QACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QACxD,OAAO,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC/B,CAAC;CACF","sourcesContent":["export class RateLimitedFetcher {\n private queue: (() => void)[] = [];\n private delay: number; // delay in milliseconds\n private timer?: ReturnType<typeof setInterval>;\n\n constructor(delayMs: number) {\n this.delay = delayMs;\n }\n\n private startQueue() {\n if (this.timer) return;\n this.timer = setInterval(() => {\n const task = this.queue.shift();\n if (task) task();\n if (this.queue.length === 0) {\n this.stop();\n }\n }, this.delay);\n }\n\n public schedule<T>(task: () => Promise<T>): Promise<T> {\n return new Promise((resolve, reject) => {\n this.queue.push(() => {\n task().then(resolve).catch(reject);\n });\n this.startQueue(); \n });\n }\n\n public stop() {\n if (this.timer) {\n clearInterval(this.timer);\n this.timer = undefined;\n }\n }\n\n public async runAll<T>(tasks: Array<() => Promise<T>>): Promise<T[]> {\n const promises = tasks.map(task => this.schedule(task));\n return Promise.all(promises);\n }\n}\n"]}

package/dist/collection/types/evaluation.js ADDED Viewed

@@ -0,0 +1,6 @@
+export const EvaluationApproachValues = [
+    'exact',
+    'semantic',
+    'rouge'
+];
+//# sourceMappingURL=evaluation.js.map

package/dist/collection/types/evaluation.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"evaluation.js","sourceRoot":"","sources":["../../src/types/evaluation.ts"],"names":[],"mappings":"AAKA,MAAM,CAAC,MAAM,wBAAwB,GAAG;IACpC,OAAO;IACP,UAAU;IACV,OAAO;CACD,CAAC","sourcesContent":["export interface EvaluationParameters {\n approach: EvaluationApproach;\n threshold?: number; \n}\n\nexport const EvaluationApproachValues = [\n 'exact',\n 'semantic',\n 'rouge'\n] as const;\n\nexport type EvaluationApproach = typeof EvaluationApproachValues[number];\n\nexport interface EvaluationApproachResult {\n score: number; // 0-1\n approachUsed: EvaluationApproach;\n}"]}

package/dist/components/index.js CHANGED Viewed

@@ -1,4 +1,4 @@
-export { L as LLMTestRunner, g as getAssetPath, r as render, s as setAssetPath, a as setNonce, b as setPlatformOptions } from './p-CYUbsbxt.js';
+export { L as LLMTestRunner, g as getAssetPath, r as render, s as setAssetPath, a as setNonce, b as setPlatformOptions } from './p-lpWX1sHl.js';
 function appGlobalScript () {
     window.env = {

package/dist/components/llm-test-runner.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { L as LLMTestRunner, d as defineCustomElement$1 } from './p-CYUbsbxt.js';
+import { L as LLMTestRunner, d as defineCustomElement$1 } from './p-lpWX1sHl.js';
 const LlmTestRunner = LLMTestRunner;
 const defineCustomElement = defineCustomElement$1;