npm - llm-testrunner-components - Versions diffs - 1.0.6 → 1.0.7 - Mend

llm-testrunner-components 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/types/components/llm-test-runner/llm-test-runner.d.ts CHANGED Viewed

@@ -1,10 +1,12 @@
 import { EventEmitter } from '../../stencil-public-runtime';
 import { EvaluationResult } from '../../lib/evaluation/types';
+import { EvaluationParameters } from '../../types/evaluation';
 export interface TestCase {
     id: string;
     question: string;
     expectedKeywords: string[];
     expectedSourceLinks: string[];
+    evaluationParameters?: EvaluationParameters;
     output?: string;
     isRunning?: boolean;
     error?: string;
@@ -14,7 +16,7 @@ export interface TestCase {
 export interface LLMRequestPayload {
     prompt: string;
     resolve: (result: string) => void;
-    reject: (err: any) => void;
+    reject: (err: Error | unknown) => void;
 }
 export declare class LLMTestRunner {
     llmRequest: EventEmitter<LLMRequestPayload>;
@@ -33,6 +35,7 @@ export declare class LLMTestRunner {
     private updateTestCase;
     private runSingleTest;
     private deleteTestCase;
+    private updateApproach;
     private addKeyword;
     private removeKeyword;
     private addSourceLink;

package/dist/types/lib/evaluation/constant.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export declare const DEFAULT_ROUGE_PASS_SCORE = 0.7;
+export declare const DEFAULT_SEMANTIC_PASS_SCORE = 0.7;
+export declare const ROUGE = "rouge";

package/dist/types/lib/evaluation/constants/evaluation-approach.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export declare enum EvaluationApproach {
+    EXACT = "exact",
+    SEMANTIC = "semantic"
+}

package/dist/types/lib/evaluation/evaluation-engine.d.ts CHANGED Viewed

@@ -1,8 +1,4 @@
 import { EvaluationRequest, EvaluationCallback } from './types';
 export declare class LLMEvaluationEngine {
-    constructor();
     evaluateResponse(request: EvaluationRequest, callback: EvaluationCallback): Promise<void>;
-    private performEvaluation;
-    private evaluateKeywords;
-    private evaluateSourceLinks;
 }

package/dist/types/lib/evaluation/evaluators/exact/exact.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { EvaluationRequest, EvaluationResult, SourceLinkMatch } from '../../types';
+export declare function performEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;
+export declare function evaluateSourceLinks(expectedSourceLinks: string[], actualResponse: string): SourceLinkMatch[];

package/dist/types/lib/evaluation/evaluators/rouge1-evaluator.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import { EvaluationRequest, EvaluationResult } from '../types';
+/**
+ * Computes the ROUGE-1 score for a single keyword against the candidate text.
+ *
+ * ROUGE-1 measures the overlap of unigrams (single words) between the candidate
+ * and reference text. A score of 1.0 indicates perfect overlap.
+ *
+ * @example
+ * const match = evaluateSingleKeyword(
+ *   "The quick brown fox",
+ *   "quick fox",
+ *   0.5
+ * );
+ * // Returns: { keyword: "quick fox", found: true, score: 0.67, ... }
+ * //general idea , here we are doing it. by word to word comparison
+ */
+export declare function performRouge1Evaluation(request: EvaluationRequest): Promise<EvaluationResult>;

package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { EvaluationRequest, EvaluationResult } from '../types';
2	+ export declare function performRougeLEvaluation(request: EvaluationRequest): EvaluationResult;

package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/lib/evaluation/evaluators/semantic/SemanticEvaluator.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import { EvaluationResult, EvaluationRequest } from '../../types';
+export declare class SemanticEvaluator {
+    private static extractor;
+    initialize(): Promise<void>;
+    performEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;
+}

package/dist/types/lib/evaluation/evaluators/semantic/evaluate-keywords.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { KeywordMatch } from '../../types';
+import { FeatureExtractionPipeline } from '@xenova/transformers';
+/**
+ * Evaluates whether each keyword is semantically present in the response text.
+ * Uses embeddings and cosine similarity instead of direct string matching.
+ */
+export declare function evaluateKeywordsSemantically(extractor: FeatureExtractionPipeline, response: string, keywords: string[], threshold: number): Promise<KeywordMatch[]>;

package/dist/types/lib/evaluation/evaluators/semantic/index.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { EvaluationRequest, EvaluationResult } from '../../types';
2	+ export declare function performSemanticEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;

package/dist/types/lib/evaluation/evaluators/semantic/model-loader.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function loadSemanticModel(): Promise<import("@xenova/transformers").FeatureExtractionPipeline>;

package/dist/types/lib/evaluation/evaluators/semantic/similarity-utils.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function cosineSimilarity(vecA: number[], vecB: number[]): number;

package/dist/types/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/lib/evaluation/evaluators/semantic/text-utils.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function splitIntoWords(text: string): string[];

package/dist/types/lib/evaluation/rouge1-evaluator.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/lib/evaluation/types.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
+import { EvaluationParameters, EvaluationApproachResult } from '../../types/evaluation';
 export interface EvaluationRequest {
     testCaseId: string;
     question: string;
     expectedKeywords: string[];
     expectedSourceLinks: string[];
     actualResponse: string;
+    evaluationParameters: EvaluationParameters;
 }
 export interface EvaluationResult {
     testCaseId: string;
@@ -11,13 +13,30 @@ export interface EvaluationResult {
     keywordMatches: KeywordMatch[];
     sourceLinkMatches: SourceLinkMatch[];
     timestamp?: string;
+    evaluationParameters: EvaluationParameters;
+    evaluationApproachResult: EvaluationApproachResult;
 }
 export interface KeywordMatch {
     keyword: string;
     found: boolean;
+    evaluationApproachResult: EvaluationApproachResult;
 }
 export interface SourceLinkMatch {
     link: string;
     found: boolean;
+    evaluationApproachResult: EvaluationApproachResult;
 }
 export type EvaluationCallback = (result: EvaluationResult) => void;
+export interface RougeKeywordDetails {
+    rouge1: number;
+    rougeL: number;
+    scoreUsed: string;
+    approach: string;
+}
+export interface Rouge1OverallDetails {
+    keywordsPassed: number;
+    totalKeywords: number;
+    passRate: string;
+    thresholdUsed: number;
+    approach: string;
+}

package/dist/types/lib/rate-limited-fetcher/rate-limited-fetcher.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 export declare class RateLimitedFetcher {
     private queue;
     private delay;
-    private intervalId?;
+    private timer?;
     constructor(delayMs: number);
     private startQueue;
     schedule<T>(task: () => Promise<T>): Promise<T>;

package/dist/types/types/evaluation.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+export interface EvaluationParameters {
+    approach: EvaluationApproach;
+    threshold?: number;
+}
+export declare const EvaluationApproachValues: readonly ["exact", "semantic", "rouge"];
+export type EvaluationApproach = typeof EvaluationApproachValues[number];
+export interface EvaluationApproachResult {
+    score: number;
+    approachUsed: EvaluationApproach;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-testrunner-components",
-  "version": "1.0.6",
+  "version": "1.0.7",
   "description": "A Stencil web component library for LLM test runner functionality",
   "main": "dist/index.cjs.js",
   "module": "dist/index.js",
@@ -34,17 +34,21 @@
   ],
   "scripts": {
     "start": "stencil build --dev --watch --serve",
-    "test": "stencil test --spec --e2e",
+    "test": "stencil test --spec --e2e --passWithNoTests",
     "test.watch": "stencil test --spec --e2e --watchAll",
     "generate": "stencil generate",
     "build": "stencil build",
     "build:react": "tsc --project tsconfig.react.json",
     "build:all": "npm run build && npm run build:react",
     "build-publish": "npm run build:all && npm run just-publish",
-    "just-publish": "npm publish --access=public"
+    "just-publish": "npm publish --access=public",
+    "lint": "eslint src --ext .ts,.tsx",
+    "lint:fix": "eslint src --ext .ts,.tsx --fix"
   },
   "dependencies": {
     "@types/uuid": "^10.0.0",
+    "@xenova/transformers": "^2.17.2",
+    "js-rouge": "^3.0.0",
     "uuid": "^10.0.0"
   },
   "devDependencies": {