judgeval 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -68
- package/dist/cjs/common/tracer.js +235 -143
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +8 -5
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
- package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/cjs/data/datasets/eval-dataset.js +405 -0
- package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
- package/dist/cjs/data/example.js +22 -1
- package/dist/cjs/data/example.js.map +1 -1
- package/dist/cjs/e2etests/eval-operations.test.js +282 -0
- package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
- package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
- package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/cjs/index.js +1 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +326 -645
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +56 -48
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +66 -11
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/esm/common/tracer.js +236 -144
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +7 -4
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
- package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/esm/data/datasets/eval-dataset.js +375 -0
- package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
- package/dist/esm/data/example.js +22 -1
- package/dist/esm/data/example.js.map +1 -1
- package/dist/esm/e2etests/eval-operations.test.js +254 -0
- package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
- package/dist/esm/e2etests/judgee-traces.test.js +253 -0
- package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/esm/index.js +0 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +328 -647
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +56 -48
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +66 -11
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/types/common/tracer.d.ts +27 -14
- package/dist/types/constants.d.ts +4 -4
- package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
- package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
- package/dist/types/data/example.d.ts +24 -12
- package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
- package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
- package/dist/types/index.d.ts +0 -1
- package/dist/types/judgment-client.d.ts +3 -47
- package/dist/types/scorers/api-scorer.d.ts +15 -15
- package/dist/types/scorers/base-scorer.d.ts +53 -10
- package/package.json +2 -1
- package/dist/cjs/scorers/exact-match-scorer.js +0 -84
- package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
- package/dist/esm/scorers/exact-match-scorer.js +0 -80
- package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
- package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
-
exports.ExactMatchScorer = void 0;
|
|
13
|
-
const base_scorer_js_1 = require("./base-scorer.js");
|
|
14
|
-
class ExactMatchScorer extends base_scorer_js_1.JudgevalScorer {
|
|
15
|
-
constructor(threshold = 1.0, additionalMetadata, verbose = false) {
|
|
16
|
-
super('exact_match', threshold, additionalMetadata, verbose);
|
|
17
|
-
}
|
|
18
|
-
scoreExample(example) {
|
|
19
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
20
|
-
var _a;
|
|
21
|
-
try {
|
|
22
|
-
// Check if the example has expected output
|
|
23
|
-
if (!example.expectedOutput) {
|
|
24
|
-
return {
|
|
25
|
-
name: this.type,
|
|
26
|
-
threshold: this.threshold,
|
|
27
|
-
success: false,
|
|
28
|
-
score: 0,
|
|
29
|
-
reason: "Expected output is required for exact match scoring",
|
|
30
|
-
strict_mode: null,
|
|
31
|
-
evaluation_model: "exact-match",
|
|
32
|
-
error: "Missing expected output",
|
|
33
|
-
evaluation_cost: null,
|
|
34
|
-
verbose_logs: null,
|
|
35
|
-
additional_metadata: this.additional_metadata || {}
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
// Compare the actual output with the expected output
|
|
39
|
-
const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
|
|
40
|
-
const expectedOutput = example.expectedOutput.trim();
|
|
41
|
-
// Calculate the score (1 for exact match, 0 otherwise)
|
|
42
|
-
const isMatch = actualOutput === expectedOutput;
|
|
43
|
-
this.score = isMatch ? 1 : 0;
|
|
44
|
-
// Generate a reason for the score
|
|
45
|
-
const reason = isMatch
|
|
46
|
-
? "The actual output exactly matches the expected output."
|
|
47
|
-
: `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
|
|
48
|
-
// Return the scorer data
|
|
49
|
-
return {
|
|
50
|
-
name: this.type,
|
|
51
|
-
threshold: this.threshold,
|
|
52
|
-
success: this.successCheck(),
|
|
53
|
-
score: this.score,
|
|
54
|
-
reason: reason,
|
|
55
|
-
strict_mode: null,
|
|
56
|
-
evaluation_model: "exact-match",
|
|
57
|
-
error: null,
|
|
58
|
-
evaluation_cost: null,
|
|
59
|
-
verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
|
|
60
|
-
additional_metadata: this.additional_metadata || {}
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
catch (error) {
|
|
64
|
-
// Handle any errors during scoring
|
|
65
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
66
|
-
return {
|
|
67
|
-
name: this.type,
|
|
68
|
-
threshold: this.threshold,
|
|
69
|
-
success: false,
|
|
70
|
-
score: 0,
|
|
71
|
-
reason: `Error during scoring: ${errorMessage}`,
|
|
72
|
-
strict_mode: null,
|
|
73
|
-
evaluation_model: "exact-match",
|
|
74
|
-
error: errorMessage,
|
|
75
|
-
evaluation_cost: null,
|
|
76
|
-
verbose_logs: null,
|
|
77
|
-
additional_metadata: this.additional_metadata || {}
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
exports.ExactMatchScorer = ExactMatchScorer;
|
|
84
|
-
//# sourceMappingURL=exact-match-scorer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;;;;AAIA,qDAAkD;AAGlD,MAAa,gBAAiB,SAAQ,+BAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF;AAtED,4CAsEC"}
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
import { JudgevalScorer } from './base-scorer.js';
|
|
11
|
-
export class ExactMatchScorer extends JudgevalScorer {
|
|
12
|
-
constructor(threshold = 1.0, additionalMetadata, verbose = false) {
|
|
13
|
-
super('exact_match', threshold, additionalMetadata, verbose);
|
|
14
|
-
}
|
|
15
|
-
scoreExample(example) {
|
|
16
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
17
|
-
var _a;
|
|
18
|
-
try {
|
|
19
|
-
// Check if the example has expected output
|
|
20
|
-
if (!example.expectedOutput) {
|
|
21
|
-
return {
|
|
22
|
-
name: this.type,
|
|
23
|
-
threshold: this.threshold,
|
|
24
|
-
success: false,
|
|
25
|
-
score: 0,
|
|
26
|
-
reason: "Expected output is required for exact match scoring",
|
|
27
|
-
strict_mode: null,
|
|
28
|
-
evaluation_model: "exact-match",
|
|
29
|
-
error: "Missing expected output",
|
|
30
|
-
evaluation_cost: null,
|
|
31
|
-
verbose_logs: null,
|
|
32
|
-
additional_metadata: this.additional_metadata || {}
|
|
33
|
-
};
|
|
34
|
-
}
|
|
35
|
-
// Compare the actual output with the expected output
|
|
36
|
-
const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
|
|
37
|
-
const expectedOutput = example.expectedOutput.trim();
|
|
38
|
-
// Calculate the score (1 for exact match, 0 otherwise)
|
|
39
|
-
const isMatch = actualOutput === expectedOutput;
|
|
40
|
-
this.score = isMatch ? 1 : 0;
|
|
41
|
-
// Generate a reason for the score
|
|
42
|
-
const reason = isMatch
|
|
43
|
-
? "The actual output exactly matches the expected output."
|
|
44
|
-
: `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
|
|
45
|
-
// Return the scorer data
|
|
46
|
-
return {
|
|
47
|
-
name: this.type,
|
|
48
|
-
threshold: this.threshold,
|
|
49
|
-
success: this.successCheck(),
|
|
50
|
-
score: this.score,
|
|
51
|
-
reason: reason,
|
|
52
|
-
strict_mode: null,
|
|
53
|
-
evaluation_model: "exact-match",
|
|
54
|
-
error: null,
|
|
55
|
-
evaluation_cost: null,
|
|
56
|
-
verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
|
|
57
|
-
additional_metadata: this.additional_metadata || {}
|
|
58
|
-
};
|
|
59
|
-
}
|
|
60
|
-
catch (error) {
|
|
61
|
-
// Handle any errors during scoring
|
|
62
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
63
|
-
return {
|
|
64
|
-
name: this.type,
|
|
65
|
-
threshold: this.threshold,
|
|
66
|
-
success: false,
|
|
67
|
-
score: 0,
|
|
68
|
-
reason: `Error during scoring: ${errorMessage}`,
|
|
69
|
-
strict_mode: null,
|
|
70
|
-
evaluation_model: "exact-match",
|
|
71
|
-
error: errorMessage,
|
|
72
|
-
evaluation_cost: null,
|
|
73
|
-
verbose_logs: null,
|
|
74
|
-
additional_metadata: this.additional_metadata || {}
|
|
75
|
-
};
|
|
76
|
-
}
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
//# sourceMappingURL=exact-match-scorer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAGlD,MAAM,OAAO,gBAAiB,SAAQ,cAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF"}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ExactMatchScorer - A custom scorer that checks if the actual output exactly matches the expected output
|
|
3
|
-
*/
|
|
4
|
-
import { Example } from '../data/example.js';
|
|
5
|
-
import { JudgevalScorer } from './base-scorer.js';
|
|
6
|
-
import { ScorerData } from '../data/result.js';
|
|
7
|
-
export declare class ExactMatchScorer extends JudgevalScorer {
|
|
8
|
-
constructor(threshold?: number, additionalMetadata?: Record<string, any>, verbose?: boolean);
|
|
9
|
-
scoreExample(example: Example): Promise<ScorerData>;
|
|
10
|
-
}
|