judgeval 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/logger-instance.js +17 -19
  3. package/dist/cjs/common/logger-instance.js.map +1 -1
  4. package/dist/cjs/common/tracer.js +210 -126
  5. package/dist/cjs/common/tracer.js.map +1 -1
  6. package/dist/cjs/constants.js +3 -2
  7. package/dist/cjs/constants.js.map +1 -1
  8. package/dist/cjs/index.js +1 -3
  9. package/dist/cjs/index.js.map +1 -1
  10. package/dist/cjs/judgment-client.js +20 -114
  11. package/dist/cjs/judgment-client.js.map +1 -1
  12. package/dist/cjs/scorers/api-scorer.js +56 -48
  13. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  14. package/dist/cjs/scorers/base-scorer.js +66 -11
  15. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  16. package/dist/esm/common/logger-instance.js +17 -19
  17. package/dist/esm/common/logger-instance.js.map +1 -1
  18. package/dist/esm/common/tracer.js +211 -127
  19. package/dist/esm/common/tracer.js.map +1 -1
  20. package/dist/esm/constants.js +2 -1
  21. package/dist/esm/constants.js.map +1 -1
  22. package/dist/esm/index.js +0 -1
  23. package/dist/esm/index.js.map +1 -1
  24. package/dist/esm/judgment-client.js +20 -114
  25. package/dist/esm/judgment-client.js.map +1 -1
  26. package/dist/esm/scorers/api-scorer.js +56 -48
  27. package/dist/esm/scorers/api-scorer.js.map +1 -1
  28. package/dist/esm/scorers/base-scorer.js +66 -11
  29. package/dist/esm/scorers/base-scorer.js.map +1 -1
  30. package/dist/types/common/tracer.d.ts +27 -13
  31. package/dist/types/constants.d.ts +2 -1
  32. package/dist/types/index.d.ts +0 -1
  33. package/dist/types/judgment-client.d.ts +0 -22
  34. package/dist/types/scorers/api-scorer.d.ts +15 -15
  35. package/dist/types/scorers/base-scorer.d.ts +53 -10
  36. package/package.json +10 -3
  37. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  38. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  39. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  40. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  41. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -5,67 +5,67 @@ import { ScorerData } from '../data/result.js';
5
5
  * Implementation of API-based scorers
6
6
  */
7
7
  export declare class AnswerCorrectnessScorer extends APIJudgmentScorer {
8
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
8
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
9
9
  a_score_example(example: Example): Promise<ScorerData>;
10
10
  }
11
11
  export declare class AnswerRelevancyScorer extends APIJudgmentScorer {
12
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
12
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
13
13
  a_score_example(example: Example): Promise<ScorerData>;
14
14
  }
15
15
  export declare class ComparisonScorer extends APIJudgmentScorer {
16
16
  criteria: string[];
17
17
  description: string;
18
- constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>, verbose?: boolean);
18
+ constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
19
19
  toJSON(): Record<string, any>;
20
20
  a_score_example(example: Example): Promise<ScorerData>;
21
21
  }
22
22
  export declare class ContextualPrecisionScorer extends APIJudgmentScorer {
23
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
23
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
24
24
  a_score_example(example: Example): Promise<ScorerData>;
25
25
  }
26
26
  export declare class ContextualRecallScorer extends APIJudgmentScorer {
27
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
27
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
28
28
  a_score_example(example: Example): Promise<ScorerData>;
29
29
  }
30
30
  export declare class ContextualRelevancyScorer extends APIJudgmentScorer {
31
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
31
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
32
32
  a_score_example(example: Example): Promise<ScorerData>;
33
33
  }
34
34
  export declare class ExecutionOrderScorer extends APIJudgmentScorer {
35
35
  strictMode: boolean;
36
36
  expectedTools?: string[];
37
- constructor(threshold?: number, strictMode?: boolean, expectedTools?: string[], additional_metadata?: Record<string, any>, verbose?: boolean);
37
+ constructor(threshold?: number, expectedTools?: string[], additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
38
38
  toJSON(): Record<string, any>;
39
39
  a_score_example(example: Example): Promise<ScorerData>;
40
40
  }
41
41
  export declare class FaithfulnessScorer extends APIJudgmentScorer {
42
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
42
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
43
43
  a_score_example(example: Example): Promise<ScorerData>;
44
44
  }
45
45
  export declare class GroundednessScorer extends APIJudgmentScorer {
46
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
46
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
47
47
  a_score_example(example: Example): Promise<ScorerData>;
48
48
  }
49
49
  export declare class HallucinationScorer extends APIJudgmentScorer {
50
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
50
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
51
51
  a_score_example(example: Example): Promise<ScorerData>;
52
52
  }
53
53
  export declare class InstructionAdherenceScorer extends APIJudgmentScorer {
54
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
54
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
55
55
  a_score_example(example: Example): Promise<ScorerData>;
56
56
  }
57
57
  export declare class JsonCorrectnessScorer extends APIJudgmentScorer {
58
58
  jsonSchema?: Record<string, any>;
59
- constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>, verbose?: boolean);
59
+ constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
60
60
  toJSON(): Record<string, any>;
61
61
  a_score_example(example: Example): Promise<ScorerData>;
62
62
  }
63
63
  export declare class SummarizationScorer extends APIJudgmentScorer {
64
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
64
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
65
65
  a_score_example(example: Example): Promise<ScorerData>;
66
66
  }
67
67
  export declare class Text2SQLScorer extends APIJudgmentScorer {
68
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
68
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
69
69
  a_score_example(example: Example): Promise<ScorerData>;
70
70
  }
71
71
  export declare class ScorerWrapper {
@@ -75,5 +75,5 @@ export declare class ScorerWrapper {
75
75
  get threshold(): number;
76
76
  get additional_metadata(): Record<string, any> | undefined;
77
77
  toJSON(): Record<string, any>;
78
- static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean): APIJudgmentScorer;
78
+ static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean): APIJudgmentScorer;
79
79
  }
@@ -8,8 +8,18 @@ export interface Scorer {
8
8
  scoreType: string;
9
9
  threshold: number;
10
10
  score?: number;
11
+ score_breakdown?: Record<string, any>;
12
+ reason?: string;
13
+ success?: boolean;
14
+ evaluation_model?: string;
15
+ strict_mode: boolean;
16
+ async_mode: boolean;
17
+ verbose_mode: boolean;
18
+ include_reason: boolean;
19
+ error?: string;
20
+ evaluation_cost?: number;
21
+ verbose_logs?: string;
11
22
  additional_metadata?: Record<string, any>;
12
- verbose: boolean;
13
23
  validateThreshold(): void;
14
24
  toJSON(): Record<string, any>;
15
25
  successCheck(): boolean;
@@ -22,9 +32,13 @@ export declare abstract class APIJudgmentScorer implements Scorer {
22
32
  get scoreType(): string;
23
33
  readonly threshold: number;
24
34
  score?: number;
35
+ score_breakdown?: Record<string, any>;
25
36
  additional_metadata?: Record<string, any>;
26
- verbose: boolean;
27
- constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
37
+ strict_mode: boolean;
38
+ async_mode: boolean;
39
+ verbose_mode: boolean;
40
+ include_reason: boolean;
41
+ constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
28
42
  /**
29
43
  * Check if the score meets the threshold
30
44
  */
@@ -47,27 +61,46 @@ export declare abstract class JudgevalScorer implements Scorer {
47
61
  scoreType: string;
48
62
  threshold: number;
49
63
  score?: number;
64
+ score_breakdown?: Record<string, any>;
65
+ reason?: string;
66
+ success?: boolean;
67
+ evaluation_model?: string;
68
+ strict_mode: boolean;
69
+ async_mode: boolean;
70
+ verbose_mode: boolean;
71
+ include_reason: boolean;
72
+ error?: string;
73
+ evaluation_cost?: number;
74
+ verbose_logs?: string;
50
75
  additional_metadata?: Record<string, any>;
51
- verbose: boolean;
52
- constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
76
+ constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, include_reason?: boolean, async_mode?: boolean, strict_mode?: boolean, verbose_mode?: boolean);
53
77
  /**
54
78
  * Check if the score meets the threshold
55
79
  */
56
80
  successCheck(): boolean;
81
+ /**
82
+ * Internal method to check success
83
+ * This is equivalent to Python's _success_check method
84
+ */
85
+ protected _successCheck(): boolean;
57
86
  /**
58
87
  * Validate that the threshold is within the allowed range
59
88
  */
60
89
  validateThreshold(): void;
90
+ /**
91
+ * Convert the scorer to a plain object
92
+ */
93
+ toJSON(): Record<string, any>;
61
94
  /**
62
95
  * Score an example
63
- * @param example The example to score
64
- * @returns A ScorerData object with the score
96
+ * This must be implemented by subclasses
65
97
  */
66
98
  abstract scoreExample(example: Example): Promise<ScorerData>;
67
99
  /**
68
- * Convert the scorer to a plain object
100
+ * Get the name of the scorer
101
+ * This is equivalent to Python's __name__ property
69
102
  */
70
- toJSON(): Record<string, any>;
103
+ get name(): string;
71
104
  }
72
105
  /**
73
106
  * Wrapper for scorers to allow dynamic loading of implementations
@@ -77,8 +110,18 @@ export declare class ScorerWrapper implements Scorer {
77
110
  scoreType: string;
78
111
  threshold: number;
79
112
  score?: number;
113
+ score_breakdown?: Record<string, any>;
114
+ reason?: string;
115
+ success?: boolean;
116
+ evaluation_model?: string;
117
+ strict_mode: boolean;
118
+ async_mode: boolean;
119
+ verbose_mode: boolean;
120
+ include_reason: boolean;
121
+ error?: string;
122
+ evaluation_cost?: number;
123
+ verbose_logs?: string;
80
124
  additional_metadata?: Record<string, any>;
81
- verbose: boolean;
82
125
  scorer: any;
83
126
  constructor(scorer: any);
84
127
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "judgeval",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "Judgment SDK for TypeScript/JavaScript",
5
5
  "main": "./dist/cjs/index.js",
6
6
  "module": "./dist/esm/index.js",
@@ -49,6 +49,7 @@
49
49
  "@types/node": "^20.12.12",
50
50
  "@typescript-eslint/eslint-plugin": "^7.10.0",
51
51
  "@typescript-eslint/parser": "^7.10.0",
52
+ "cross-env": "^7.0.3",
52
53
  "eslint": "^8.57.0",
53
54
  "eslint-config-prettier": "^9.1.0",
54
55
  "eslint-plugin-prettier": "^5.1.3",
@@ -59,12 +60,18 @@
59
60
  "typescript": "^5.4.5"
60
61
  },
61
62
  "scripts": {
62
- "build": "rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
63
+ "build:dev": "rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
64
+ "build:prod": "cross-env NODE_ENV=production rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
65
+ "build": "npm run build:prod",
63
66
  "build:examples": "tsc -p tsconfig.examples.json",
67
+ "build:e2etests": "tsc -p tsconfig.e2etests.json",
64
68
  "test": "jest",
69
+ "test:e2e": "jest e2etests",
70
+ "test:e2e:eval": "jest --config jest.config.js src/e2etests/eval-operations.test.ts",
71
+ "test:e2e:traces": "jest --config jest.config.js src/e2etests/judgee-traces.test.ts",
65
72
  "lint": "eslint . --ext .ts",
66
73
  "format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
67
- "prepublishOnly": "npm run build",
74
+ "prepublishOnly": "npm run build:prod",
68
75
  "docs": "typedoc --out docs src/index.ts",
69
76
  "demo:basic": "npx ts-node src/demo/basic-bot.ts",
70
77
  "demo:llm-wrap": "npx ts-node src/demo/llm-wrap-demo.ts",
@@ -1,84 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.ExactMatchScorer = void 0;
13
- const base_scorer_js_1 = require("./base-scorer.js");
14
- class ExactMatchScorer extends base_scorer_js_1.JudgevalScorer {
15
- constructor(threshold = 1.0, additionalMetadata, verbose = false) {
16
- super('exact_match', threshold, additionalMetadata, verbose);
17
- }
18
- scoreExample(example) {
19
- return __awaiter(this, void 0, void 0, function* () {
20
- var _a;
21
- try {
22
- // Check if the example has expected output
23
- if (!example.expectedOutput) {
24
- return {
25
- name: this.type,
26
- threshold: this.threshold,
27
- success: false,
28
- score: 0,
29
- reason: "Expected output is required for exact match scoring",
30
- strict_mode: null,
31
- evaluation_model: "exact-match",
32
- error: "Missing expected output",
33
- evaluation_cost: null,
34
- verbose_logs: null,
35
- additional_metadata: this.additional_metadata || {}
36
- };
37
- }
38
- // Compare the actual output with the expected output
39
- const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
40
- const expectedOutput = example.expectedOutput.trim();
41
- // Calculate the score (1 for exact match, 0 otherwise)
42
- const isMatch = actualOutput === expectedOutput;
43
- this.score = isMatch ? 1 : 0;
44
- // Generate a reason for the score
45
- const reason = isMatch
46
- ? "The actual output exactly matches the expected output."
47
- : `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
48
- // Return the scorer data
49
- return {
50
- name: this.type,
51
- threshold: this.threshold,
52
- success: this.successCheck(),
53
- score: this.score,
54
- reason: reason,
55
- strict_mode: null,
56
- evaluation_model: "exact-match",
57
- error: null,
58
- evaluation_cost: null,
59
- verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
60
- additional_metadata: this.additional_metadata || {}
61
- };
62
- }
63
- catch (error) {
64
- // Handle any errors during scoring
65
- const errorMessage = error instanceof Error ? error.message : String(error);
66
- return {
67
- name: this.type,
68
- threshold: this.threshold,
69
- success: false,
70
- score: 0,
71
- reason: `Error during scoring: ${errorMessage}`,
72
- strict_mode: null,
73
- evaluation_model: "exact-match",
74
- error: errorMessage,
75
- evaluation_cost: null,
76
- verbose_logs: null,
77
- additional_metadata: this.additional_metadata || {}
78
- };
79
- }
80
- });
81
- }
82
- }
83
- exports.ExactMatchScorer = ExactMatchScorer;
84
- //# sourceMappingURL=exact-match-scorer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;;;;AAIA,qDAAkD;AAGlD,MAAa,gBAAiB,SAAQ,+BAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF;AAtED,4CAsEC"}
@@ -1,80 +0,0 @@
1
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
- return new (P || (P = Promise))(function (resolve, reject) {
4
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
- step((generator = generator.apply(thisArg, _arguments || [])).next());
8
- });
9
- };
10
- import { JudgevalScorer } from './base-scorer.js';
11
- export class ExactMatchScorer extends JudgevalScorer {
12
- constructor(threshold = 1.0, additionalMetadata, verbose = false) {
13
- super('exact_match', threshold, additionalMetadata, verbose);
14
- }
15
- scoreExample(example) {
16
- return __awaiter(this, void 0, void 0, function* () {
17
- var _a;
18
- try {
19
- // Check if the example has expected output
20
- if (!example.expectedOutput) {
21
- return {
22
- name: this.type,
23
- threshold: this.threshold,
24
- success: false,
25
- score: 0,
26
- reason: "Expected output is required for exact match scoring",
27
- strict_mode: null,
28
- evaluation_model: "exact-match",
29
- error: "Missing expected output",
30
- evaluation_cost: null,
31
- verbose_logs: null,
32
- additional_metadata: this.additional_metadata || {}
33
- };
34
- }
35
- // Compare the actual output with the expected output
36
- const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
37
- const expectedOutput = example.expectedOutput.trim();
38
- // Calculate the score (1 for exact match, 0 otherwise)
39
- const isMatch = actualOutput === expectedOutput;
40
- this.score = isMatch ? 1 : 0;
41
- // Generate a reason for the score
42
- const reason = isMatch
43
- ? "The actual output exactly matches the expected output."
44
- : `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
45
- // Return the scorer data
46
- return {
47
- name: this.type,
48
- threshold: this.threshold,
49
- success: this.successCheck(),
50
- score: this.score,
51
- reason: reason,
52
- strict_mode: null,
53
- evaluation_model: "exact-match",
54
- error: null,
55
- evaluation_cost: null,
56
- verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
57
- additional_metadata: this.additional_metadata || {}
58
- };
59
- }
60
- catch (error) {
61
- // Handle any errors during scoring
62
- const errorMessage = error instanceof Error ? error.message : String(error);
63
- return {
64
- name: this.type,
65
- threshold: this.threshold,
66
- success: false,
67
- score: 0,
68
- reason: `Error during scoring: ${errorMessage}`,
69
- strict_mode: null,
70
- evaluation_model: "exact-match",
71
- error: errorMessage,
72
- evaluation_cost: null,
73
- verbose_logs: null,
74
- additional_metadata: this.additional_metadata || {}
75
- };
76
- }
77
- });
78
- }
79
- }
80
- //# sourceMappingURL=exact-match-scorer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAGlD,MAAM,OAAO,gBAAiB,SAAQ,cAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF"}
@@ -1,10 +0,0 @@
1
- /**
2
- * ExactMatchScorer - A custom scorer that checks if the actual output exactly matches the expected output
3
- */
4
- import { Example } from '../data/example.js';
5
- import { JudgevalScorer } from './base-scorer.js';
6
- import { ScorerData } from '../data/result.js';
7
- export declare class ExactMatchScorer extends JudgevalScorer {
8
- constructor(threshold?: number, additionalMetadata?: Record<string, any>, verbose?: boolean);
9
- scoreExample(example: Example): Promise<ScorerData>;
10
- }