llm-testrunner-components 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -48
- package/dist/cjs/index.cjs.js +24610 -60
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/collection/components/llm-test-runner/llm-test-runner.css +14 -2
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +38 -9
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/lib/evaluation/constant.js +4 -0
- package/dist/collection/lib/evaluation/constant.js.map +1 -0
- package/dist/collection/lib/evaluation/constants/evaluation-approach.js +6 -0
- package/dist/collection/lib/evaluation/constants/evaluation-approach.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluation-engine.js +28 -44
- package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluators/exact/exact.js +51 -0
- package/dist/collection/lib/evaluation/evaluators/exact/exact.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.js +82 -0
- package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js +73 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js +313 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js +63 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js +56 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/index.js +7 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/index.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js +15 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js +16 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js +65 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js +5 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js.map +1 -0
- package/dist/collection/lib/evaluation/rouge1-evaluator.test.js +118 -0
- package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +1 -0
- package/dist/collection/lib/evaluation/types.js.map +1 -1
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js +6 -6
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js.map +1 -1
- package/dist/collection/types/evaluation.js +6 -0
- package/dist/collection/types/evaluation.js.map +1 -0
- package/dist/components/index.js +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/p-lpWX1sHl.js +26319 -0
- package/dist/components/p-lpWX1sHl.js.map +1 -0
- package/dist/esm/index.js +24609 -60
- package/dist/esm/index.js.map +1 -1
- package/dist/llm-testrunner/index.esm.js +6 -1
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -1
- package/dist/types/lib/evaluation/constant.d.ts +3 -0
- package/dist/types/lib/evaluation/constants/evaluation-approach.d.ts +4 -0
- package/dist/types/lib/evaluation/evaluation-engine.d.ts +0 -4
- package/dist/types/lib/evaluation/evaluators/exact/exact.d.ts +3 -0
- package/dist/types/lib/evaluation/evaluators/rouge1-evaluator.d.ts +17 -0
- package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.d.ts +2 -0
- package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.test.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/SemanticEvaluator.d.ts +6 -0
- package/dist/types/lib/evaluation/evaluators/semantic/evaluate-keywords.d.ts +7 -0
- package/dist/types/lib/evaluation/evaluators/semantic/index.d.ts +2 -0
- package/dist/types/lib/evaluation/evaluators/semantic/model-loader.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/similarity-utils.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/text-utils.d.ts +1 -0
- package/dist/types/lib/evaluation/rouge1-evaluator.test.d.ts +1 -0
- package/dist/types/lib/evaluation/types.d.ts +19 -0
- package/dist/types/lib/rate-limited-fetcher/rate-limited-fetcher.d.ts +1 -1
- package/dist/types/types/evaluation.d.ts +10 -0
- package/package.json +7 -3
- package/dist/components/p-CYUbsbxt.js +0 -1770
- package/dist/components/p-CYUbsbxt.js.map +0 -1
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { EventEmitter } from '../../stencil-public-runtime';
|
|
2
2
|
import { EvaluationResult } from '../../lib/evaluation/types';
|
|
3
|
+
import { EvaluationParameters } from '../../types/evaluation';
|
|
3
4
|
export interface TestCase {
|
|
4
5
|
id: string;
|
|
5
6
|
question: string;
|
|
6
7
|
expectedKeywords: string[];
|
|
7
8
|
expectedSourceLinks: string[];
|
|
9
|
+
evaluationParameters?: EvaluationParameters;
|
|
8
10
|
output?: string;
|
|
9
11
|
isRunning?: boolean;
|
|
10
12
|
error?: string;
|
|
@@ -14,7 +16,7 @@ export interface TestCase {
|
|
|
14
16
|
export interface LLMRequestPayload {
|
|
15
17
|
prompt: string;
|
|
16
18
|
resolve: (result: string) => void;
|
|
17
|
-
reject: (err:
|
|
19
|
+
reject: (err: Error | unknown) => void;
|
|
18
20
|
}
|
|
19
21
|
export declare class LLMTestRunner {
|
|
20
22
|
llmRequest: EventEmitter<LLMRequestPayload>;
|
|
@@ -33,6 +35,7 @@ export declare class LLMTestRunner {
|
|
|
33
35
|
private updateTestCase;
|
|
34
36
|
private runSingleTest;
|
|
35
37
|
private deleteTestCase;
|
|
38
|
+
private updateApproach;
|
|
36
39
|
private addKeyword;
|
|
37
40
|
private removeKeyword;
|
|
38
41
|
private addSourceLink;
|
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
import { EvaluationRequest, EvaluationCallback } from './types';
|
|
2
2
|
export declare class LLMEvaluationEngine {
|
|
3
|
-
constructor();
|
|
4
3
|
evaluateResponse(request: EvaluationRequest, callback: EvaluationCallback): Promise<void>;
|
|
5
|
-
private performEvaluation;
|
|
6
|
-
private evaluateKeywords;
|
|
7
|
-
private evaluateSourceLinks;
|
|
8
4
|
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { EvaluationRequest, EvaluationResult, SourceLinkMatch } from '../../types';
|
|
2
|
+
export declare function performEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;
|
|
3
|
+
export declare function evaluateSourceLinks(expectedSourceLinks: string[], actualResponse: string): SourceLinkMatch[];
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { EvaluationRequest, EvaluationResult } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Computes the ROUGE-1 score for a single keyword against the candidate text.
|
|
4
|
+
*
|
|
5
|
+
* ROUGE-1 measures the overlap of unigrams (single words) between the candidate
|
|
6
|
+
* and reference text. A score of 1.0 indicates perfect overlap.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* const match = evaluateSingleKeyword(
|
|
10
|
+
* "The quick brown fox",
|
|
11
|
+
* "quick fox",
|
|
12
|
+
* 0.5
|
|
13
|
+
* );
|
|
14
|
+
* // Returns: { keyword: "quick fox", found: true, score: 0.67, ... }
|
|
15
|
+
* //general idea , here we are doing it. by word to word comparison
|
|
16
|
+
*/
|
|
17
|
+
export declare function performRouge1Evaluation(request: EvaluationRequest): Promise<EvaluationResult>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { KeywordMatch } from '../../types';
|
|
2
|
+
import { FeatureExtractionPipeline } from '@xenova/transformers';
|
|
3
|
+
/**
|
|
4
|
+
* Evaluates whether each keyword is semantically present in the response text.
|
|
5
|
+
* Uses embeddings and cosine similarity instead of direct string matching.
|
|
6
|
+
*/
|
|
7
|
+
export declare function evaluateKeywordsSemantically(extractor: FeatureExtractionPipeline, response: string, keywords: string[], threshold: number): Promise<KeywordMatch[]>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function loadSemanticModel(): Promise<import("@xenova/transformers").FeatureExtractionPipeline>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function cosineSimilarity(vecA: number[], vecB: number[]): number;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function splitIntoWords(text: string): string[];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import { EvaluationParameters, EvaluationApproachResult } from '../../types/evaluation';
|
|
1
2
|
export interface EvaluationRequest {
|
|
2
3
|
testCaseId: string;
|
|
3
4
|
question: string;
|
|
4
5
|
expectedKeywords: string[];
|
|
5
6
|
expectedSourceLinks: string[];
|
|
6
7
|
actualResponse: string;
|
|
8
|
+
evaluationParameters: EvaluationParameters;
|
|
7
9
|
}
|
|
8
10
|
export interface EvaluationResult {
|
|
9
11
|
testCaseId: string;
|
|
@@ -11,13 +13,30 @@ export interface EvaluationResult {
|
|
|
11
13
|
keywordMatches: KeywordMatch[];
|
|
12
14
|
sourceLinkMatches: SourceLinkMatch[];
|
|
13
15
|
timestamp?: string;
|
|
16
|
+
evaluationParameters: EvaluationParameters;
|
|
17
|
+
evaluationApproachResult: EvaluationApproachResult;
|
|
14
18
|
}
|
|
15
19
|
export interface KeywordMatch {
|
|
16
20
|
keyword: string;
|
|
17
21
|
found: boolean;
|
|
22
|
+
evaluationApproachResult: EvaluationApproachResult;
|
|
18
23
|
}
|
|
19
24
|
export interface SourceLinkMatch {
|
|
20
25
|
link: string;
|
|
21
26
|
found: boolean;
|
|
27
|
+
evaluationApproachResult: EvaluationApproachResult;
|
|
22
28
|
}
|
|
23
29
|
export type EvaluationCallback = (result: EvaluationResult) => void;
|
|
30
|
+
export interface RougeKeywordDetails {
|
|
31
|
+
rouge1: number;
|
|
32
|
+
rougeL: number;
|
|
33
|
+
scoreUsed: string;
|
|
34
|
+
approach: string;
|
|
35
|
+
}
|
|
36
|
+
export interface Rouge1OverallDetails {
|
|
37
|
+
keywordsPassed: number;
|
|
38
|
+
totalKeywords: number;
|
|
39
|
+
passRate: string;
|
|
40
|
+
thresholdUsed: number;
|
|
41
|
+
approach: string;
|
|
42
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface EvaluationParameters {
|
|
2
|
+
approach: EvaluationApproach;
|
|
3
|
+
threshold?: number;
|
|
4
|
+
}
|
|
5
|
+
export declare const EvaluationApproachValues: readonly ["exact", "semantic", "rouge"];
|
|
6
|
+
export type EvaluationApproach = typeof EvaluationApproachValues[number];
|
|
7
|
+
export interface EvaluationApproachResult {
|
|
8
|
+
score: number;
|
|
9
|
+
approachUsed: EvaluationApproach;
|
|
10
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm-testrunner-components",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.7",
|
|
4
4
|
"description": "A Stencil web component library for LLM test runner functionality",
|
|
5
5
|
"main": "dist/index.cjs.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -34,17 +34,21 @@
|
|
|
34
34
|
],
|
|
35
35
|
"scripts": {
|
|
36
36
|
"start": "stencil build --dev --watch --serve",
|
|
37
|
-
"test": "stencil test --spec --e2e",
|
|
37
|
+
"test": "stencil test --spec --e2e --passWithNoTests",
|
|
38
38
|
"test.watch": "stencil test --spec --e2e --watchAll",
|
|
39
39
|
"generate": "stencil generate",
|
|
40
40
|
"build": "stencil build",
|
|
41
41
|
"build:react": "tsc --project tsconfig.react.json",
|
|
42
42
|
"build:all": "npm run build && npm run build:react",
|
|
43
43
|
"build-publish": "npm run build:all && npm run just-publish",
|
|
44
|
-
"just-publish": "npm publish --access=public"
|
|
44
|
+
"just-publish": "npm publish --access=public",
|
|
45
|
+
"lint": "eslint src --ext .ts,.tsx",
|
|
46
|
+
"lint:fix": "eslint src --ext .ts,.tsx --fix"
|
|
45
47
|
},
|
|
46
48
|
"dependencies": {
|
|
47
49
|
"@types/uuid": "^10.0.0",
|
|
50
|
+
"@xenova/transformers": "^2.17.2",
|
|
51
|
+
"js-rouge": "^3.0.0",
|
|
48
52
|
"uuid": "^10.0.0"
|
|
49
53
|
},
|
|
50
54
|
"devDependencies": {
|