llm-testrunner-components 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +3 -48
  2. package/dist/cjs/index.cjs.js +24610 -60
  3. package/dist/cjs/index.cjs.js.map +1 -1
  4. package/dist/collection/components/llm-test-runner/llm-test-runner.css +14 -2
  5. package/dist/collection/components/llm-test-runner/llm-test-runner.js +38 -9
  6. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  7. package/dist/collection/lib/evaluation/constant.js +4 -0
  8. package/dist/collection/lib/evaluation/constant.js.map +1 -0
  9. package/dist/collection/lib/evaluation/constants/evaluation-approach.js +6 -0
  10. package/dist/collection/lib/evaluation/constants/evaluation-approach.js.map +1 -0
  11. package/dist/collection/lib/evaluation/evaluation-engine.js +28 -44
  12. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  13. package/dist/collection/lib/evaluation/evaluators/exact/exact.js +51 -0
  14. package/dist/collection/lib/evaluation/evaluators/exact/exact.js.map +1 -0
  15. package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.js +82 -0
  16. package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.js.map +1 -0
  17. package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js +73 -0
  18. package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js.map +1 -0
  19. package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js +313 -0
  20. package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js.map +1 -0
  21. package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js +63 -0
  22. package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map +1 -0
  23. package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js +56 -0
  24. package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js.map +1 -0
  25. package/dist/collection/lib/evaluation/evaluators/semantic/index.js +7 -0
  26. package/dist/collection/lib/evaluation/evaluators/semantic/index.js.map +1 -0
  27. package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js +15 -0
  28. package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js.map +1 -0
  29. package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js +16 -0
  30. package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js.map +1 -0
  31. package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js +65 -0
  32. package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js.map +1 -0
  33. package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js +5 -0
  34. package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js.map +1 -0
  35. package/dist/collection/lib/evaluation/rouge1-evaluator.test.js +118 -0
  36. package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +1 -0
  37. package/dist/collection/lib/evaluation/types.js.map +1 -1
  38. package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js +6 -6
  39. package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js.map +1 -1
  40. package/dist/collection/types/evaluation.js +6 -0
  41. package/dist/collection/types/evaluation.js.map +1 -0
  42. package/dist/components/index.js +1 -1
  43. package/dist/components/llm-test-runner.js +1 -1
  44. package/dist/components/p-lpWX1sHl.js +26319 -0
  45. package/dist/components/p-lpWX1sHl.js.map +1 -0
  46. package/dist/esm/index.js +24609 -60
  47. package/dist/esm/index.js.map +1 -1
  48. package/dist/llm-testrunner/index.esm.js +6 -1
  49. package/dist/llm-testrunner/index.esm.js.map +1 -1
  50. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -1
  51. package/dist/types/lib/evaluation/constant.d.ts +3 -0
  52. package/dist/types/lib/evaluation/constants/evaluation-approach.d.ts +4 -0
  53. package/dist/types/lib/evaluation/evaluation-engine.d.ts +0 -4
  54. package/dist/types/lib/evaluation/evaluators/exact/exact.d.ts +3 -0
  55. package/dist/types/lib/evaluation/evaluators/rouge1-evaluator.d.ts +17 -0
  56. package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.d.ts +2 -0
  57. package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.test.d.ts +1 -0
  58. package/dist/types/lib/evaluation/evaluators/semantic/SemanticEvaluator.d.ts +6 -0
  59. package/dist/types/lib/evaluation/evaluators/semantic/evaluate-keywords.d.ts +7 -0
  60. package/dist/types/lib/evaluation/evaluators/semantic/index.d.ts +2 -0
  61. package/dist/types/lib/evaluation/evaluators/semantic/model-loader.d.ts +1 -0
  62. package/dist/types/lib/evaluation/evaluators/semantic/similarity-utils.d.ts +1 -0
  63. package/dist/types/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.d.ts +1 -0
  64. package/dist/types/lib/evaluation/evaluators/semantic/text-utils.d.ts +1 -0
  65. package/dist/types/lib/evaluation/rouge1-evaluator.test.d.ts +1 -0
  66. package/dist/types/lib/evaluation/types.d.ts +19 -0
  67. package/dist/types/lib/rate-limited-fetcher/rate-limited-fetcher.d.ts +1 -1
  68. package/dist/types/types/evaluation.d.ts +10 -0
  69. package/package.json +7 -3
  70. package/dist/components/p-CYUbsbxt.js +0 -1770
  71. package/dist/components/p-CYUbsbxt.js.map +0 -1
@@ -1,10 +1,12 @@
1
1
  import { EventEmitter } from '../../stencil-public-runtime';
2
2
  import { EvaluationResult } from '../../lib/evaluation/types';
3
+ import { EvaluationParameters } from '../../types/evaluation';
3
4
  export interface TestCase {
4
5
  id: string;
5
6
  question: string;
6
7
  expectedKeywords: string[];
7
8
  expectedSourceLinks: string[];
9
+ evaluationParameters?: EvaluationParameters;
8
10
  output?: string;
9
11
  isRunning?: boolean;
10
12
  error?: string;
@@ -14,7 +16,7 @@ export interface TestCase {
14
16
  export interface LLMRequestPayload {
15
17
  prompt: string;
16
18
  resolve: (result: string) => void;
17
- reject: (err: any) => void;
19
+ reject: (err: Error | unknown) => void;
18
20
  }
19
21
  export declare class LLMTestRunner {
20
22
  llmRequest: EventEmitter<LLMRequestPayload>;
@@ -33,6 +35,7 @@ export declare class LLMTestRunner {
33
35
  private updateTestCase;
34
36
  private runSingleTest;
35
37
  private deleteTestCase;
38
+ private updateApproach;
36
39
  private addKeyword;
37
40
  private removeKeyword;
38
41
  private addSourceLink;
@@ -0,0 +1,3 @@
1
+ export declare const DEFAULT_ROUGE_PASS_SCORE = 0.7;
2
+ export declare const DEFAULT_SEMANTIC_PASS_SCORE = 0.7;
3
+ export declare const ROUGE = "rouge";
@@ -0,0 +1,4 @@
1
+ export declare enum EvaluationApproach {
2
+ EXACT = "exact",
3
+ SEMANTIC = "semantic"
4
+ }
@@ -1,8 +1,4 @@
1
1
  import { EvaluationRequest, EvaluationCallback } from './types';
2
2
  export declare class LLMEvaluationEngine {
3
- constructor();
4
3
  evaluateResponse(request: EvaluationRequest, callback: EvaluationCallback): Promise<void>;
5
- private performEvaluation;
6
- private evaluateKeywords;
7
- private evaluateSourceLinks;
8
4
  }
@@ -0,0 +1,3 @@
1
+ import { EvaluationRequest, EvaluationResult, SourceLinkMatch } from '../../types';
2
+ export declare function performEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;
3
+ export declare function evaluateSourceLinks(expectedSourceLinks: string[], actualResponse: string): SourceLinkMatch[];
@@ -0,0 +1,17 @@
1
+ import { EvaluationRequest, EvaluationResult } from '../types';
2
+ /**
3
+ * Computes the ROUGE-1 score for a single keyword against the candidate text.
4
+ *
5
+ * ROUGE-1 measures the overlap of unigrams (single words) between the candidate
6
+ * and reference text. A score of 1.0 indicates perfect overlap.
7
+ *
8
+ * @example
9
+ * const match = evaluateSingleKeyword(
10
+ * "The quick brown fox",
11
+ * "quick fox",
12
+ * 0.5
13
+ * );
14
+ * // Returns: { keyword: "quick fox", found: true, score: 0.67, ... }
15
+ * //general idea , here we are doing it. by word to word comparison
16
+ */
17
+ export declare function performRouge1Evaluation(request: EvaluationRequest): Promise<EvaluationResult>;
@@ -0,0 +1,2 @@
1
+ import { EvaluationRequest, EvaluationResult } from '../types';
2
+ export declare function performRougeLEvaluation(request: EvaluationRequest): EvaluationResult;
@@ -0,0 +1,6 @@
1
+ import { EvaluationResult, EvaluationRequest } from '../../types';
2
+ export declare class SemanticEvaluator {
3
+ private static extractor;
4
+ initialize(): Promise<void>;
5
+ performEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;
6
+ }
@@ -0,0 +1,7 @@
1
+ import { KeywordMatch } from '../../types';
2
+ import { FeatureExtractionPipeline } from '@xenova/transformers';
3
+ /**
4
+ * Evaluates whether each keyword is semantically present in the response text.
5
+ * Uses embeddings and cosine similarity instead of direct string matching.
6
+ */
7
+ export declare function evaluateKeywordsSemantically(extractor: FeatureExtractionPipeline, response: string, keywords: string[], threshold: number): Promise<KeywordMatch[]>;
@@ -0,0 +1,2 @@
1
+ import { EvaluationRequest, EvaluationResult } from '../../types';
2
+ export declare function performSemanticEvaluation(request: EvaluationRequest): Promise<EvaluationResult>;
@@ -0,0 +1 @@
1
+ export declare function loadSemanticModel(): Promise<import("@xenova/transformers").FeatureExtractionPipeline>;
@@ -0,0 +1 @@
1
+ export declare function cosineSimilarity(vecA: number[], vecB: number[]): number;
@@ -0,0 +1 @@
1
+ export declare function splitIntoWords(text: string): string[];
@@ -1,9 +1,11 @@
1
+ import { EvaluationParameters, EvaluationApproachResult } from '../../types/evaluation';
1
2
  export interface EvaluationRequest {
2
3
  testCaseId: string;
3
4
  question: string;
4
5
  expectedKeywords: string[];
5
6
  expectedSourceLinks: string[];
6
7
  actualResponse: string;
8
+ evaluationParameters: EvaluationParameters;
7
9
  }
8
10
  export interface EvaluationResult {
9
11
  testCaseId: string;
@@ -11,13 +13,30 @@ export interface EvaluationResult {
11
13
  keywordMatches: KeywordMatch[];
12
14
  sourceLinkMatches: SourceLinkMatch[];
13
15
  timestamp?: string;
16
+ evaluationParameters: EvaluationParameters;
17
+ evaluationApproachResult: EvaluationApproachResult;
14
18
  }
15
19
  export interface KeywordMatch {
16
20
  keyword: string;
17
21
  found: boolean;
22
+ evaluationApproachResult: EvaluationApproachResult;
18
23
  }
19
24
  export interface SourceLinkMatch {
20
25
  link: string;
21
26
  found: boolean;
27
+ evaluationApproachResult: EvaluationApproachResult;
22
28
  }
23
29
  export type EvaluationCallback = (result: EvaluationResult) => void;
30
+ export interface RougeKeywordDetails {
31
+ rouge1: number;
32
+ rougeL: number;
33
+ scoreUsed: string;
34
+ approach: string;
35
+ }
36
+ export interface Rouge1OverallDetails {
37
+ keywordsPassed: number;
38
+ totalKeywords: number;
39
+ passRate: string;
40
+ thresholdUsed: number;
41
+ approach: string;
42
+ }
@@ -1,7 +1,7 @@
1
1
  export declare class RateLimitedFetcher {
2
2
  private queue;
3
3
  private delay;
4
- private intervalId?;
4
+ private timer?;
5
5
  constructor(delayMs: number);
6
6
  private startQueue;
7
7
  schedule<T>(task: () => Promise<T>): Promise<T>;
@@ -0,0 +1,10 @@
1
+ export interface EvaluationParameters {
2
+ approach: EvaluationApproach;
3
+ threshold?: number;
4
+ }
5
+ export declare const EvaluationApproachValues: readonly ["exact", "semantic", "rouge"];
6
+ export type EvaluationApproach = typeof EvaluationApproachValues[number];
7
+ export interface EvaluationApproachResult {
8
+ score: number;
9
+ approachUsed: EvaluationApproach;
10
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-testrunner-components",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "description": "A Stencil web component library for LLM test runner functionality",
5
5
  "main": "dist/index.cjs.js",
6
6
  "module": "dist/index.js",
@@ -34,17 +34,21 @@
34
34
  ],
35
35
  "scripts": {
36
36
  "start": "stencil build --dev --watch --serve",
37
- "test": "stencil test --spec --e2e",
37
+ "test": "stencil test --spec --e2e --passWithNoTests",
38
38
  "test.watch": "stencil test --spec --e2e --watchAll",
39
39
  "generate": "stencil generate",
40
40
  "build": "stencil build",
41
41
  "build:react": "tsc --project tsconfig.react.json",
42
42
  "build:all": "npm run build && npm run build:react",
43
43
  "build-publish": "npm run build:all && npm run just-publish",
44
- "just-publish": "npm publish --access=public"
44
+ "just-publish": "npm publish --access=public",
45
+ "lint": "eslint src --ext .ts,.tsx",
46
+ "lint:fix": "eslint src --ext .ts,.tsx --fix"
45
47
  },
46
48
  "dependencies": {
47
49
  "@types/uuid": "^10.0.0",
50
+ "@xenova/transformers": "^2.17.2",
51
+ "js-rouge": "^3.0.0",
48
52
  "uuid": "^10.0.0"
49
53
  },
50
54
  "devDependencies": {