judgeval 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -68
- package/dist/cjs/common/logger-instance.js +17 -19
- package/dist/cjs/common/logger-instance.js.map +1 -1
- package/dist/cjs/common/tracer.js +210 -126
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +3 -2
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/index.js +1 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +20 -114
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +56 -48
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +66 -11
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/esm/common/logger-instance.js +17 -19
- package/dist/esm/common/logger-instance.js.map +1 -1
- package/dist/esm/common/tracer.js +211 -127
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +2 -1
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/index.js +0 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +20 -114
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +56 -48
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +66 -11
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/types/common/tracer.d.ts +27 -13
- package/dist/types/constants.d.ts +2 -1
- package/dist/types/index.d.ts +0 -1
- package/dist/types/judgment-client.d.ts +0 -22
- package/dist/types/scorers/api-scorer.d.ts +15 -15
- package/dist/types/scorers/base-scorer.d.ts +53 -10
- package/package.json +10 -3
- package/dist/cjs/scorers/exact-match-scorer.js +0 -84
- package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
- package/dist/esm/scorers/exact-match-scorer.js +0 -80
- package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
- package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
|
@@ -5,67 +5,67 @@ import { ScorerData } from '../data/result.js';
|
|
|
5
5
|
* Implementation of API-based scorers
|
|
6
6
|
*/
|
|
7
7
|
export declare class AnswerCorrectnessScorer extends APIJudgmentScorer {
|
|
8
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
8
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
9
9
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
10
10
|
}
|
|
11
11
|
export declare class AnswerRelevancyScorer extends APIJudgmentScorer {
|
|
12
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
12
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
13
13
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
14
14
|
}
|
|
15
15
|
export declare class ComparisonScorer extends APIJudgmentScorer {
|
|
16
16
|
criteria: string[];
|
|
17
17
|
description: string;
|
|
18
|
-
constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>,
|
|
18
|
+
constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
19
19
|
toJSON(): Record<string, any>;
|
|
20
20
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
21
21
|
}
|
|
22
22
|
export declare class ContextualPrecisionScorer extends APIJudgmentScorer {
|
|
23
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
23
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
24
24
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
25
25
|
}
|
|
26
26
|
export declare class ContextualRecallScorer extends APIJudgmentScorer {
|
|
27
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
27
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
28
28
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
29
29
|
}
|
|
30
30
|
export declare class ContextualRelevancyScorer extends APIJudgmentScorer {
|
|
31
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
31
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
32
32
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
33
33
|
}
|
|
34
34
|
export declare class ExecutionOrderScorer extends APIJudgmentScorer {
|
|
35
35
|
strictMode: boolean;
|
|
36
36
|
expectedTools?: string[];
|
|
37
|
-
constructor(threshold?: number,
|
|
37
|
+
constructor(threshold?: number, expectedTools?: string[], additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
38
38
|
toJSON(): Record<string, any>;
|
|
39
39
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
40
40
|
}
|
|
41
41
|
export declare class FaithfulnessScorer extends APIJudgmentScorer {
|
|
42
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
42
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
43
43
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
44
44
|
}
|
|
45
45
|
export declare class GroundednessScorer extends APIJudgmentScorer {
|
|
46
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
46
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
47
47
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
48
48
|
}
|
|
49
49
|
export declare class HallucinationScorer extends APIJudgmentScorer {
|
|
50
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
50
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
51
51
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
52
52
|
}
|
|
53
53
|
export declare class InstructionAdherenceScorer extends APIJudgmentScorer {
|
|
54
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
54
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
55
55
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
56
56
|
}
|
|
57
57
|
export declare class JsonCorrectnessScorer extends APIJudgmentScorer {
|
|
58
58
|
jsonSchema?: Record<string, any>;
|
|
59
|
-
constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>,
|
|
59
|
+
constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
60
60
|
toJSON(): Record<string, any>;
|
|
61
61
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
62
62
|
}
|
|
63
63
|
export declare class SummarizationScorer extends APIJudgmentScorer {
|
|
64
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
64
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
65
65
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
66
66
|
}
|
|
67
67
|
export declare class Text2SQLScorer extends APIJudgmentScorer {
|
|
68
|
-
constructor(threshold?: number, additional_metadata?: Record<string, any>,
|
|
68
|
+
constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
69
69
|
a_score_example(example: Example): Promise<ScorerData>;
|
|
70
70
|
}
|
|
71
71
|
export declare class ScorerWrapper {
|
|
@@ -75,5 +75,5 @@ export declare class ScorerWrapper {
|
|
|
75
75
|
get threshold(): number;
|
|
76
76
|
get additional_metadata(): Record<string, any> | undefined;
|
|
77
77
|
toJSON(): Record<string, any>;
|
|
78
|
-
static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>,
|
|
78
|
+
static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean): APIJudgmentScorer;
|
|
79
79
|
}
|
|
@@ -8,8 +8,18 @@ export interface Scorer {
|
|
|
8
8
|
scoreType: string;
|
|
9
9
|
threshold: number;
|
|
10
10
|
score?: number;
|
|
11
|
+
score_breakdown?: Record<string, any>;
|
|
12
|
+
reason?: string;
|
|
13
|
+
success?: boolean;
|
|
14
|
+
evaluation_model?: string;
|
|
15
|
+
strict_mode: boolean;
|
|
16
|
+
async_mode: boolean;
|
|
17
|
+
verbose_mode: boolean;
|
|
18
|
+
include_reason: boolean;
|
|
19
|
+
error?: string;
|
|
20
|
+
evaluation_cost?: number;
|
|
21
|
+
verbose_logs?: string;
|
|
11
22
|
additional_metadata?: Record<string, any>;
|
|
12
|
-
verbose: boolean;
|
|
13
23
|
validateThreshold(): void;
|
|
14
24
|
toJSON(): Record<string, any>;
|
|
15
25
|
successCheck(): boolean;
|
|
@@ -22,9 +32,13 @@ export declare abstract class APIJudgmentScorer implements Scorer {
|
|
|
22
32
|
get scoreType(): string;
|
|
23
33
|
readonly threshold: number;
|
|
24
34
|
score?: number;
|
|
35
|
+
score_breakdown?: Record<string, any>;
|
|
25
36
|
additional_metadata?: Record<string, any>;
|
|
26
|
-
|
|
27
|
-
|
|
37
|
+
strict_mode: boolean;
|
|
38
|
+
async_mode: boolean;
|
|
39
|
+
verbose_mode: boolean;
|
|
40
|
+
include_reason: boolean;
|
|
41
|
+
constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
|
|
28
42
|
/**
|
|
29
43
|
* Check if the score meets the threshold
|
|
30
44
|
*/
|
|
@@ -47,27 +61,46 @@ export declare abstract class JudgevalScorer implements Scorer {
|
|
|
47
61
|
scoreType: string;
|
|
48
62
|
threshold: number;
|
|
49
63
|
score?: number;
|
|
64
|
+
score_breakdown?: Record<string, any>;
|
|
65
|
+
reason?: string;
|
|
66
|
+
success?: boolean;
|
|
67
|
+
evaluation_model?: string;
|
|
68
|
+
strict_mode: boolean;
|
|
69
|
+
async_mode: boolean;
|
|
70
|
+
verbose_mode: boolean;
|
|
71
|
+
include_reason: boolean;
|
|
72
|
+
error?: string;
|
|
73
|
+
evaluation_cost?: number;
|
|
74
|
+
verbose_logs?: string;
|
|
50
75
|
additional_metadata?: Record<string, any>;
|
|
51
|
-
|
|
52
|
-
constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
|
|
76
|
+
constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, include_reason?: boolean, async_mode?: boolean, strict_mode?: boolean, verbose_mode?: boolean);
|
|
53
77
|
/**
|
|
54
78
|
* Check if the score meets the threshold
|
|
55
79
|
*/
|
|
56
80
|
successCheck(): boolean;
|
|
81
|
+
/**
|
|
82
|
+
* Internal method to check success
|
|
83
|
+
* This is equivalent to Python's _success_check method
|
|
84
|
+
*/
|
|
85
|
+
protected _successCheck(): boolean;
|
|
57
86
|
/**
|
|
58
87
|
* Validate that the threshold is within the allowed range
|
|
59
88
|
*/
|
|
60
89
|
validateThreshold(): void;
|
|
90
|
+
/**
|
|
91
|
+
* Convert the scorer to a plain object
|
|
92
|
+
*/
|
|
93
|
+
toJSON(): Record<string, any>;
|
|
61
94
|
/**
|
|
62
95
|
* Score an example
|
|
63
|
-
*
|
|
64
|
-
* @returns A ScorerData object with the score
|
|
96
|
+
* This must be implemented by subclasses
|
|
65
97
|
*/
|
|
66
98
|
abstract scoreExample(example: Example): Promise<ScorerData>;
|
|
67
99
|
/**
|
|
68
|
-
*
|
|
100
|
+
* Get the name of the scorer
|
|
101
|
+
* This is equivalent to Python's __name__ property
|
|
69
102
|
*/
|
|
70
|
-
|
|
103
|
+
get name(): string;
|
|
71
104
|
}
|
|
72
105
|
/**
|
|
73
106
|
* Wrapper for scorers to allow dynamic loading of implementations
|
|
@@ -77,8 +110,18 @@ export declare class ScorerWrapper implements Scorer {
|
|
|
77
110
|
scoreType: string;
|
|
78
111
|
threshold: number;
|
|
79
112
|
score?: number;
|
|
113
|
+
score_breakdown?: Record<string, any>;
|
|
114
|
+
reason?: string;
|
|
115
|
+
success?: boolean;
|
|
116
|
+
evaluation_model?: string;
|
|
117
|
+
strict_mode: boolean;
|
|
118
|
+
async_mode: boolean;
|
|
119
|
+
verbose_mode: boolean;
|
|
120
|
+
include_reason: boolean;
|
|
121
|
+
error?: string;
|
|
122
|
+
evaluation_cost?: number;
|
|
123
|
+
verbose_logs?: string;
|
|
80
124
|
additional_metadata?: Record<string, any>;
|
|
81
|
-
verbose: boolean;
|
|
82
125
|
scorer: any;
|
|
83
126
|
constructor(scorer: any);
|
|
84
127
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "judgeval",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Judgment SDK for TypeScript/JavaScript",
|
|
5
5
|
"main": "./dist/cjs/index.js",
|
|
6
6
|
"module": "./dist/esm/index.js",
|
|
@@ -49,6 +49,7 @@
|
|
|
49
49
|
"@types/node": "^20.12.12",
|
|
50
50
|
"@typescript-eslint/eslint-plugin": "^7.10.0",
|
|
51
51
|
"@typescript-eslint/parser": "^7.10.0",
|
|
52
|
+
"cross-env": "^7.0.3",
|
|
52
53
|
"eslint": "^8.57.0",
|
|
53
54
|
"eslint-config-prettier": "^9.1.0",
|
|
54
55
|
"eslint-plugin-prettier": "^5.1.3",
|
|
@@ -59,12 +60,18 @@
|
|
|
59
60
|
"typescript": "^5.4.5"
|
|
60
61
|
},
|
|
61
62
|
"scripts": {
|
|
62
|
-
"build": "rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
|
|
63
|
+
"build:dev": "rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
|
|
64
|
+
"build:prod": "cross-env NODE_ENV=production rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
|
|
65
|
+
"build": "npm run build:prod",
|
|
63
66
|
"build:examples": "tsc -p tsconfig.examples.json",
|
|
67
|
+
"build:e2etests": "tsc -p tsconfig.e2etests.json",
|
|
64
68
|
"test": "jest",
|
|
69
|
+
"test:e2e": "jest e2etests",
|
|
70
|
+
"test:e2e:eval": "jest --config jest.config.js src/e2etests/eval-operations.test.ts",
|
|
71
|
+
"test:e2e:traces": "jest --config jest.config.js src/e2etests/judgee-traces.test.ts",
|
|
65
72
|
"lint": "eslint . --ext .ts",
|
|
66
73
|
"format": "prettier --write \"src/**/*.ts\" \"tests/**/*.ts\"",
|
|
67
|
-
"prepublishOnly": "npm run build",
|
|
74
|
+
"prepublishOnly": "npm run build:prod",
|
|
68
75
|
"docs": "typedoc --out docs src/index.ts",
|
|
69
76
|
"demo:basic": "npx ts-node src/demo/basic-bot.ts",
|
|
70
77
|
"demo:llm-wrap": "npx ts-node src/demo/llm-wrap-demo.ts",
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
-
exports.ExactMatchScorer = void 0;
|
|
13
|
-
const base_scorer_js_1 = require("./base-scorer.js");
|
|
14
|
-
class ExactMatchScorer extends base_scorer_js_1.JudgevalScorer {
|
|
15
|
-
constructor(threshold = 1.0, additionalMetadata, verbose = false) {
|
|
16
|
-
super('exact_match', threshold, additionalMetadata, verbose);
|
|
17
|
-
}
|
|
18
|
-
scoreExample(example) {
|
|
19
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
20
|
-
var _a;
|
|
21
|
-
try {
|
|
22
|
-
// Check if the example has expected output
|
|
23
|
-
if (!example.expectedOutput) {
|
|
24
|
-
return {
|
|
25
|
-
name: this.type,
|
|
26
|
-
threshold: this.threshold,
|
|
27
|
-
success: false,
|
|
28
|
-
score: 0,
|
|
29
|
-
reason: "Expected output is required for exact match scoring",
|
|
30
|
-
strict_mode: null,
|
|
31
|
-
evaluation_model: "exact-match",
|
|
32
|
-
error: "Missing expected output",
|
|
33
|
-
evaluation_cost: null,
|
|
34
|
-
verbose_logs: null,
|
|
35
|
-
additional_metadata: this.additional_metadata || {}
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
// Compare the actual output with the expected output
|
|
39
|
-
const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
|
|
40
|
-
const expectedOutput = example.expectedOutput.trim();
|
|
41
|
-
// Calculate the score (1 for exact match, 0 otherwise)
|
|
42
|
-
const isMatch = actualOutput === expectedOutput;
|
|
43
|
-
this.score = isMatch ? 1 : 0;
|
|
44
|
-
// Generate a reason for the score
|
|
45
|
-
const reason = isMatch
|
|
46
|
-
? "The actual output exactly matches the expected output."
|
|
47
|
-
: `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
|
|
48
|
-
// Return the scorer data
|
|
49
|
-
return {
|
|
50
|
-
name: this.type,
|
|
51
|
-
threshold: this.threshold,
|
|
52
|
-
success: this.successCheck(),
|
|
53
|
-
score: this.score,
|
|
54
|
-
reason: reason,
|
|
55
|
-
strict_mode: null,
|
|
56
|
-
evaluation_model: "exact-match",
|
|
57
|
-
error: null,
|
|
58
|
-
evaluation_cost: null,
|
|
59
|
-
verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
|
|
60
|
-
additional_metadata: this.additional_metadata || {}
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
catch (error) {
|
|
64
|
-
// Handle any errors during scoring
|
|
65
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
66
|
-
return {
|
|
67
|
-
name: this.type,
|
|
68
|
-
threshold: this.threshold,
|
|
69
|
-
success: false,
|
|
70
|
-
score: 0,
|
|
71
|
-
reason: `Error during scoring: ${errorMessage}`,
|
|
72
|
-
strict_mode: null,
|
|
73
|
-
evaluation_model: "exact-match",
|
|
74
|
-
error: errorMessage,
|
|
75
|
-
evaluation_cost: null,
|
|
76
|
-
verbose_logs: null,
|
|
77
|
-
additional_metadata: this.additional_metadata || {}
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
exports.ExactMatchScorer = ExactMatchScorer;
|
|
84
|
-
//# sourceMappingURL=exact-match-scorer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;;;;AAIA,qDAAkD;AAGlD,MAAa,gBAAiB,SAAQ,+BAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF;AAtED,4CAsEC"}
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
import { JudgevalScorer } from './base-scorer.js';
|
|
11
|
-
export class ExactMatchScorer extends JudgevalScorer {
|
|
12
|
-
constructor(threshold = 1.0, additionalMetadata, verbose = false) {
|
|
13
|
-
super('exact_match', threshold, additionalMetadata, verbose);
|
|
14
|
-
}
|
|
15
|
-
scoreExample(example) {
|
|
16
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
17
|
-
var _a;
|
|
18
|
-
try {
|
|
19
|
-
// Check if the example has expected output
|
|
20
|
-
if (!example.expectedOutput) {
|
|
21
|
-
return {
|
|
22
|
-
name: this.type,
|
|
23
|
-
threshold: this.threshold,
|
|
24
|
-
success: false,
|
|
25
|
-
score: 0,
|
|
26
|
-
reason: "Expected output is required for exact match scoring",
|
|
27
|
-
strict_mode: null,
|
|
28
|
-
evaluation_model: "exact-match",
|
|
29
|
-
error: "Missing expected output",
|
|
30
|
-
evaluation_cost: null,
|
|
31
|
-
verbose_logs: null,
|
|
32
|
-
additional_metadata: this.additional_metadata || {}
|
|
33
|
-
};
|
|
34
|
-
}
|
|
35
|
-
// Compare the actual output with the expected output
|
|
36
|
-
const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
|
|
37
|
-
const expectedOutput = example.expectedOutput.trim();
|
|
38
|
-
// Calculate the score (1 for exact match, 0 otherwise)
|
|
39
|
-
const isMatch = actualOutput === expectedOutput;
|
|
40
|
-
this.score = isMatch ? 1 : 0;
|
|
41
|
-
// Generate a reason for the score
|
|
42
|
-
const reason = isMatch
|
|
43
|
-
? "The actual output exactly matches the expected output."
|
|
44
|
-
: `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
|
|
45
|
-
// Return the scorer data
|
|
46
|
-
return {
|
|
47
|
-
name: this.type,
|
|
48
|
-
threshold: this.threshold,
|
|
49
|
-
success: this.successCheck(),
|
|
50
|
-
score: this.score,
|
|
51
|
-
reason: reason,
|
|
52
|
-
strict_mode: null,
|
|
53
|
-
evaluation_model: "exact-match",
|
|
54
|
-
error: null,
|
|
55
|
-
evaluation_cost: null,
|
|
56
|
-
verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
|
|
57
|
-
additional_metadata: this.additional_metadata || {}
|
|
58
|
-
};
|
|
59
|
-
}
|
|
60
|
-
catch (error) {
|
|
61
|
-
// Handle any errors during scoring
|
|
62
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
63
|
-
return {
|
|
64
|
-
name: this.type,
|
|
65
|
-
threshold: this.threshold,
|
|
66
|
-
success: false,
|
|
67
|
-
score: 0,
|
|
68
|
-
reason: `Error during scoring: ${errorMessage}`,
|
|
69
|
-
strict_mode: null,
|
|
70
|
-
evaluation_model: "exact-match",
|
|
71
|
-
error: errorMessage,
|
|
72
|
-
evaluation_cost: null,
|
|
73
|
-
verbose_logs: null,
|
|
74
|
-
additional_metadata: this.additional_metadata || {}
|
|
75
|
-
};
|
|
76
|
-
}
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
//# sourceMappingURL=exact-match-scorer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAGlD,MAAM,OAAO,gBAAiB,SAAQ,cAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF"}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ExactMatchScorer - A custom scorer that checks if the actual output exactly matches the expected output
|
|
3
|
-
*/
|
|
4
|
-
import { Example } from '../data/example.js';
|
|
5
|
-
import { JudgevalScorer } from './base-scorer.js';
|
|
6
|
-
import { ScorerData } from '../data/result.js';
|
|
7
|
-
export declare class ExactMatchScorer extends JudgevalScorer {
|
|
8
|
-
constructor(threshold?: number, additionalMetadata?: Record<string, any>, verbose?: boolean);
|
|
9
|
-
scoreExample(example: Example): Promise<ScorerData>;
|
|
10
|
-
}
|