judgeval 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/tracer.js +235 -143
  3. package/dist/cjs/common/tracer.js.map +1 -1
  4. package/dist/cjs/constants.js +8 -5
  5. package/dist/cjs/constants.js.map +1 -1
  6. package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
  7. package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
  8. package/dist/cjs/data/datasets/eval-dataset.js +405 -0
  9. package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
  10. package/dist/cjs/data/example.js +22 -1
  11. package/dist/cjs/data/example.js.map +1 -1
  12. package/dist/cjs/e2etests/eval-operations.test.js +282 -0
  13. package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
  14. package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
  15. package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
  16. package/dist/cjs/index.js +1 -3
  17. package/dist/cjs/index.js.map +1 -1
  18. package/dist/cjs/judgment-client.js +326 -645
  19. package/dist/cjs/judgment-client.js.map +1 -1
  20. package/dist/cjs/scorers/api-scorer.js +56 -48
  21. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  22. package/dist/cjs/scorers/base-scorer.js +66 -11
  23. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  24. package/dist/esm/common/tracer.js +236 -144
  25. package/dist/esm/common/tracer.js.map +1 -1
  26. package/dist/esm/constants.js +7 -4
  27. package/dist/esm/constants.js.map +1 -1
  28. package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
  29. package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
  30. package/dist/esm/data/datasets/eval-dataset.js +375 -0
  31. package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
  32. package/dist/esm/data/example.js +22 -1
  33. package/dist/esm/data/example.js.map +1 -1
  34. package/dist/esm/e2etests/eval-operations.test.js +254 -0
  35. package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
  36. package/dist/esm/e2etests/judgee-traces.test.js +253 -0
  37. package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
  38. package/dist/esm/index.js +0 -1
  39. package/dist/esm/index.js.map +1 -1
  40. package/dist/esm/judgment-client.js +328 -647
  41. package/dist/esm/judgment-client.js.map +1 -1
  42. package/dist/esm/scorers/api-scorer.js +56 -48
  43. package/dist/esm/scorers/api-scorer.js.map +1 -1
  44. package/dist/esm/scorers/base-scorer.js +66 -11
  45. package/dist/esm/scorers/base-scorer.js.map +1 -1
  46. package/dist/types/common/tracer.d.ts +27 -14
  47. package/dist/types/constants.d.ts +4 -4
  48. package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
  49. package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
  50. package/dist/types/data/example.d.ts +24 -12
  51. package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
  52. package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
  53. package/dist/types/index.d.ts +0 -1
  54. package/dist/types/judgment-client.d.ts +3 -47
  55. package/dist/types/scorers/api-scorer.d.ts +15 -15
  56. package/dist/types/scorers/base-scorer.d.ts +53 -10
  57. package/package.json +2 -1
  58. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  59. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  60. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  61. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  62. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -1,84 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.ExactMatchScorer = void 0;
13
- const base_scorer_js_1 = require("./base-scorer.js");
14
- class ExactMatchScorer extends base_scorer_js_1.JudgevalScorer {
15
- constructor(threshold = 1.0, additionalMetadata, verbose = false) {
16
- super('exact_match', threshold, additionalMetadata, verbose);
17
- }
18
- scoreExample(example) {
19
- return __awaiter(this, void 0, void 0, function* () {
20
- var _a;
21
- try {
22
- // Check if the example has expected output
23
- if (!example.expectedOutput) {
24
- return {
25
- name: this.type,
26
- threshold: this.threshold,
27
- success: false,
28
- score: 0,
29
- reason: "Expected output is required for exact match scoring",
30
- strict_mode: null,
31
- evaluation_model: "exact-match",
32
- error: "Missing expected output",
33
- evaluation_cost: null,
34
- verbose_logs: null,
35
- additional_metadata: this.additional_metadata || {}
36
- };
37
- }
38
- // Compare the actual output with the expected output
39
- const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
40
- const expectedOutput = example.expectedOutput.trim();
41
- // Calculate the score (1 for exact match, 0 otherwise)
42
- const isMatch = actualOutput === expectedOutput;
43
- this.score = isMatch ? 1 : 0;
44
- // Generate a reason for the score
45
- const reason = isMatch
46
- ? "The actual output exactly matches the expected output."
47
- : `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
48
- // Return the scorer data
49
- return {
50
- name: this.type,
51
- threshold: this.threshold,
52
- success: this.successCheck(),
53
- score: this.score,
54
- reason: reason,
55
- strict_mode: null,
56
- evaluation_model: "exact-match",
57
- error: null,
58
- evaluation_cost: null,
59
- verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
60
- additional_metadata: this.additional_metadata || {}
61
- };
62
- }
63
- catch (error) {
64
- // Handle any errors during scoring
65
- const errorMessage = error instanceof Error ? error.message : String(error);
66
- return {
67
- name: this.type,
68
- threshold: this.threshold,
69
- success: false,
70
- score: 0,
71
- reason: `Error during scoring: ${errorMessage}`,
72
- strict_mode: null,
73
- evaluation_model: "exact-match",
74
- error: errorMessage,
75
- evaluation_cost: null,
76
- verbose_logs: null,
77
- additional_metadata: this.additional_metadata || {}
78
- };
79
- }
80
- });
81
- }
82
- }
83
- exports.ExactMatchScorer = ExactMatchScorer;
84
- //# sourceMappingURL=exact-match-scorer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;;;;AAIA,qDAAkD;AAGlD,MAAa,gBAAiB,SAAQ,+BAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF;AAtED,4CAsEC"}
@@ -1,80 +0,0 @@
1
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
- return new (P || (P = Promise))(function (resolve, reject) {
4
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
- step((generator = generator.apply(thisArg, _arguments || [])).next());
8
- });
9
- };
10
- import { JudgevalScorer } from './base-scorer.js';
11
- export class ExactMatchScorer extends JudgevalScorer {
12
- constructor(threshold = 1.0, additionalMetadata, verbose = false) {
13
- super('exact_match', threshold, additionalMetadata, verbose);
14
- }
15
- scoreExample(example) {
16
- return __awaiter(this, void 0, void 0, function* () {
17
- var _a;
18
- try {
19
- // Check if the example has expected output
20
- if (!example.expectedOutput) {
21
- return {
22
- name: this.type,
23
- threshold: this.threshold,
24
- success: false,
25
- score: 0,
26
- reason: "Expected output is required for exact match scoring",
27
- strict_mode: null,
28
- evaluation_model: "exact-match",
29
- error: "Missing expected output",
30
- evaluation_cost: null,
31
- verbose_logs: null,
32
- additional_metadata: this.additional_metadata || {}
33
- };
34
- }
35
- // Compare the actual output with the expected output
36
- const actualOutput = ((_a = example.actualOutput) === null || _a === void 0 ? void 0 : _a.trim()) || '';
37
- const expectedOutput = example.expectedOutput.trim();
38
- // Calculate the score (1 for exact match, 0 otherwise)
39
- const isMatch = actualOutput === expectedOutput;
40
- this.score = isMatch ? 1 : 0;
41
- // Generate a reason for the score
42
- const reason = isMatch
43
- ? "The actual output exactly matches the expected output."
44
- : `The actual output "${actualOutput}" does not match the expected output "${expectedOutput}".`;
45
- // Return the scorer data
46
- return {
47
- name: this.type,
48
- threshold: this.threshold,
49
- success: this.successCheck(),
50
- score: this.score,
51
- reason: reason,
52
- strict_mode: null,
53
- evaluation_model: "exact-match",
54
- error: null,
55
- evaluation_cost: null,
56
- verbose_logs: this.verbose ? `Comparing: "${actualOutput}" with "${expectedOutput}"` : null,
57
- additional_metadata: this.additional_metadata || {}
58
- };
59
- }
60
- catch (error) {
61
- // Handle any errors during scoring
62
- const errorMessage = error instanceof Error ? error.message : String(error);
63
- return {
64
- name: this.type,
65
- threshold: this.threshold,
66
- success: false,
67
- score: 0,
68
- reason: `Error during scoring: ${errorMessage}`,
69
- strict_mode: null,
70
- evaluation_model: "exact-match",
71
- error: errorMessage,
72
- evaluation_cost: null,
73
- verbose_logs: null,
74
- additional_metadata: this.additional_metadata || {}
75
- };
76
- }
77
- });
78
- }
79
- }
80
- //# sourceMappingURL=exact-match-scorer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"exact-match-scorer.js","sourceRoot":"","sources":["../../../src/scorers/exact-match-scorer.ts"],"names":[],"mappings":";;;;;;;;;AAIA,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAGlD,MAAM,OAAO,gBAAiB,SAAQ,cAAc;IAClD,YAAY,YAAoB,GAAG,EAAE,kBAAwC,EAAE,UAAmB,KAAK;QACrG,KAAK,CAAC,aAAa,EAAE,SAAS,EAAE,kBAAkB,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAEK,YAAY,CAAC,OAAgB;;;YACjC,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;oBAC5B,OAAO;wBACL,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,SAAS,EAAE,IAAI,CAAC,SAAS;wBACzB,OAAO,EAAE,KAAK;wBACd,KAAK,EAAE,CAAC;wBACR,MAAM,EAAE,qDAAqD;wBAC7D,WAAW,EAAE,IAAI;wBACjB,gBAAgB,EAAE,aAAa;wBAC/B,KAAK,EAAE,yBAAyB;wBAChC,eAAe,EAAE,IAAI;wBACrB,YAAY,EAAE,IAAI;wBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;qBACpD,CAAC;gBACJ,CAAC;gBAED,qDAAqD;gBACrD,MAAM,YAAY,GAAG,CAAA,MAAA,OAAO,CAAC,YAAY,0CAAE,IAAI,EAAE,KAAI,EAAE,CAAC;gBACxD,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAErD,uDAAuD;gBACvD,MAAM,OAAO,GAAG,YAAY,KAAK,cAAc,CAAC;gBAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAE7B,kCAAkC;gBAClC,MAAM,MAAM,GAAG,OAAO;oBACpB,CAAC,CAAC,wDAAwD;oBAC1D,CAAC,CAAC,sBAAsB,YAAY,yCAAyC,cAAc,IAAI,CAAC;gBAElG,yBAAyB;gBACzB,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE;oBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,YAAY,WAAW,cAAc,GAAG,CAAC,CAAC,CAAC,IAAI;oBAC3F,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,mCAAmC;gBACnC,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE5E,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI;oBACjB,gBAAgB,EAAE,aAAa;oBAC/B,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,EAAE;iBACpD,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;CACF"}
@@ -1,10 +0,0 @@
1
- /**
2
- * ExactMatchScorer - A custom scorer that checks if the actual output exactly matches the expected output
3
- */
4
- import { Example } from '../data/example.js';
5
- import { JudgevalScorer } from './base-scorer.js';
6
- import { ScorerData } from '../data/result.js';
7
- export declare class ExactMatchScorer extends JudgevalScorer {
8
- constructor(threshold?: number, additionalMetadata?: Record<string, any>, verbose?: boolean);
9
- scoreExample(example: Example): Promise<ScorerData>;
10
- }