judgeval 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/tracer.js +235 -143
  3. package/dist/cjs/common/tracer.js.map +1 -1
  4. package/dist/cjs/constants.js +8 -5
  5. package/dist/cjs/constants.js.map +1 -1
  6. package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
  7. package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
  8. package/dist/cjs/data/datasets/eval-dataset.js +405 -0
  9. package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
  10. package/dist/cjs/data/example.js +22 -1
  11. package/dist/cjs/data/example.js.map +1 -1
  12. package/dist/cjs/e2etests/eval-operations.test.js +282 -0
  13. package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
  14. package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
  15. package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
  16. package/dist/cjs/index.js +1 -3
  17. package/dist/cjs/index.js.map +1 -1
  18. package/dist/cjs/judgment-client.js +326 -645
  19. package/dist/cjs/judgment-client.js.map +1 -1
  20. package/dist/cjs/scorers/api-scorer.js +56 -48
  21. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  22. package/dist/cjs/scorers/base-scorer.js +66 -11
  23. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  24. package/dist/esm/common/tracer.js +236 -144
  25. package/dist/esm/common/tracer.js.map +1 -1
  26. package/dist/esm/constants.js +7 -4
  27. package/dist/esm/constants.js.map +1 -1
  28. package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
  29. package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
  30. package/dist/esm/data/datasets/eval-dataset.js +375 -0
  31. package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
  32. package/dist/esm/data/example.js +22 -1
  33. package/dist/esm/data/example.js.map +1 -1
  34. package/dist/esm/e2etests/eval-operations.test.js +254 -0
  35. package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
  36. package/dist/esm/e2etests/judgee-traces.test.js +253 -0
  37. package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
  38. package/dist/esm/index.js +0 -1
  39. package/dist/esm/index.js.map +1 -1
  40. package/dist/esm/judgment-client.js +328 -647
  41. package/dist/esm/judgment-client.js.map +1 -1
  42. package/dist/esm/scorers/api-scorer.js +56 -48
  43. package/dist/esm/scorers/api-scorer.js.map +1 -1
  44. package/dist/esm/scorers/base-scorer.js +66 -11
  45. package/dist/esm/scorers/base-scorer.js.map +1 -1
  46. package/dist/types/common/tracer.d.ts +27 -14
  47. package/dist/types/constants.d.ts +4 -4
  48. package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
  49. package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
  50. package/dist/types/data/example.d.ts +24 -12
  51. package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
  52. package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
  53. package/dist/types/index.d.ts +0 -1
  54. package/dist/types/judgment-client.d.ts +3 -47
  55. package/dist/types/scorers/api-scorer.d.ts +15 -15
  56. package/dist/types/scorers/base-scorer.d.ts +53 -10
  57. package/package.json +2 -1
  58. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  59. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  60. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  61. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  62. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -26,13 +26,15 @@ interface TraceEntry {
26
26
  function: string;
27
27
  span_id: string;
28
28
  depth: number;
29
- timestamp: number;
29
+ created_at: number;
30
30
  duration?: number;
31
31
  output?: any;
32
32
  inputs?: Record<string, any>;
33
33
  span_type: SpanType;
34
34
  parent_span_id?: string;
35
35
  evaluation_runs?: any[];
36
+ trace_id?: string;
37
+ message?: string;
36
38
  }
37
39
  interface TraceSavePayload {
38
40
  trace_id: string;
@@ -49,8 +51,7 @@ interface TraceSavePayload {
49
51
  total_cost_usd: number;
50
52
  };
51
53
  entries: CondensedSpanEntry[];
52
- rules?: Record<string, Rule>;
53
- empty_save: boolean;
54
+ evaluation_runs: any[];
54
55
  overwrite: boolean;
55
56
  parent_trace_id?: string | null;
56
57
  parent_name?: string | null;
@@ -59,15 +60,24 @@ interface CondensedSpanEntry {
59
60
  span_id: string;
60
61
  function: string;
61
62
  depth: number;
62
- timestamp: number;
63
+ created_at: string;
63
64
  parent_span_id?: string | null;
64
65
  span_type: SpanType;
65
66
  inputs: Record<string, any> | null;
66
67
  output: any | null;
67
- evaluation_runs: any[];
68
68
  duration: number | null;
69
+ trace_id?: string;
69
70
  children?: CondensedSpanEntry[];
70
71
  }
72
+ interface TokenCostResponse {
73
+ model: string;
74
+ prompt_tokens: number;
75
+ completion_tokens: number;
76
+ total_tokens: number;
77
+ prompt_tokens_cost_usd: number;
78
+ completion_tokens_cost_usd: number;
79
+ total_cost_usd: number;
80
+ }
71
81
  /**
72
82
  * Client for interacting with Judgment trace API endpoints.
73
83
  */
@@ -77,10 +87,20 @@ declare class TraceManagerClient {
77
87
  constructor(apiKey: string, organizationId: string);
78
88
  private _fetch;
79
89
  fetchTrace(traceId: string): Promise<any>;
80
- saveTrace(traceData: TraceSavePayload, emptySave: boolean): Promise<any>;
90
+ saveTrace(traceData: TraceSavePayload): Promise<any>;
81
91
  deleteTrace(traceId: string): Promise<any>;
82
92
  deleteTraces(traceIds: string[]): Promise<any>;
83
93
  addTraceToEvalQueue(traceData: TraceSavePayload): Promise<any>;
94
+ /**
95
+ * Calculate token costs directly using the API endpoint.
96
+ * This is more accurate than client-side calculation as it uses the most up-to-date pricing.
97
+ *
98
+ * @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
99
+ * @param promptTokens Number of tokens in the prompt/input
100
+ * @param completionTokens Number of tokens in the completion/output
101
+ * @returns Object containing token counts and calculated costs in USD
102
+ */
103
+ calculateTokenCosts(model: string, promptTokens: number, completionTokens: number): Promise<TokenCostResponse | null>;
84
104
  }
85
105
  /**
86
106
  * Represents an ongoing trace context.
@@ -100,6 +120,7 @@ declare class TraceClient {
100
120
  private apiKey;
101
121
  private organizationId;
102
122
  private originalName;
123
+ private _spanDepths;
103
124
  constructor(config: {
104
125
  tracer: Tracer;
105
126
  traceId?: string;
@@ -154,14 +175,6 @@ declare class TraceClient {
154
175
  model?: string;
155
176
  logResults?: boolean;
156
177
  }): Promise<void>;
157
- /**
158
- * Private helper to add an evaluation entry to the trace.
159
- * This mirrors the structure of Python's add_eval_run.
160
- *
161
- * @param evalRunPayload The constructed payload for the evaluation.
162
- * @param startTime The start time (in seconds) of the evaluation process.
163
- */
164
- private _addEvalRun;
165
178
  getOriginalName(): string;
166
179
  }
167
180
  /**
@@ -21,15 +21,15 @@ export declare enum APIScorer {
21
21
  }
22
22
  export declare const UNBOUNDED_SCORERS: Set<APIScorer>;
23
23
  export declare const ROOT_API = "https://api.judgmentlabs.ai";
24
- export declare const JUDGMENT_EVAL_API_URL = "https://api.judgmentlabs.ai/evaluate/";
25
24
  export declare const JUDGMENT_DATASETS_PUSH_API_URL = "https://api.judgmentlabs.ai/datasets/push/";
26
25
  export declare const JUDGMENT_DATASETS_PULL_API_URL = "https://api.judgmentlabs.ai/datasets/pull/";
27
26
  export declare const JUDGMENT_DATASETS_DELETE_API_URL = "https://api.judgmentlabs.ai/datasets/delete/";
28
27
  export declare const JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = "https://api.judgmentlabs.ai/datasets/export_jsonl/";
29
28
  export declare const JUDGMENT_DATASETS_PROJECT_STATS_API_URL = "https://api.judgmentlabs.ai/datasets/fetch_stats_by_project/";
30
29
  export declare const JUDGMENT_DATASETS_INSERT_API_URL = "https://api.judgmentlabs.ai/datasets/insert_examples/";
31
- export declare const JUDGMENT_EVAL_LOG_API_URL = "https://api.judgmentlabs.ai/log_eval_results/";
32
30
  export declare const JUDGMENT_EVAL_FETCH_API_URL = "https://api.judgmentlabs.ai/fetch_eval_results/";
31
+ export declare const JUDGMENT_EVAL_API_URL = "https://api.judgmentlabs.ai/evaluate/";
32
+ export declare const JUDGMENT_EVAL_LOG_API_URL = "https://api.judgmentlabs.ai/log_eval_results/";
33
33
  export declare const JUDGMENT_EVAL_DELETE_API_URL = "https://api.judgmentlabs.ai/delete_eval_results_by_project_and_run_names/";
34
34
  export declare const JUDGMENT_EVAL_DELETE_PROJECT_API_URL = "https://api.judgmentlabs.ai/delete_eval_results_by_project/";
35
35
  export declare const JUDGMENT_PROJECT_DELETE_API_URL = "https://api.judgmentlabs.ai/projects/delete/";
@@ -37,8 +37,8 @@ export declare const JUDGMENT_PROJECT_CREATE_API_URL = "https://api.judgmentlabs
37
37
  export declare const JUDGMENT_TRACES_FETCH_API_URL = "https://api.judgmentlabs.ai/traces/fetch/";
38
38
  export declare const JUDGMENT_TRACES_SAVE_API_URL = "https://api.judgmentlabs.ai/traces/save/";
39
39
  export declare const JUDGMENT_TRACES_DELETE_API_URL = "https://api.judgmentlabs.ai/traces/delete/";
40
- export declare const JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL = "https://api.judgmentlabs.ai/traces/add_to_trace_eval_queue/";
41
- export declare const JUDGMENT_WEBSOCKET_URL: string;
40
+ export declare const JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL = "https://api.judgmentlabs.ai/traces/add_to_eval_queue/";
41
+ export declare const JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL = "https://api.judgmentlabs.ai/calculate-token-costs";
42
42
  export declare const JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = "https://api.judgmentlabs.ai/add_to_run_eval_queue/";
43
43
  export declare const RABBITMQ_HOST: string;
44
44
  export declare const RABBITMQ_PORT: number;
@@ -0,0 +1,39 @@
1
+ import { Example } from '../example.js';
2
+ import { EvalDataset } from './eval-dataset.js';
3
+ import { AxiosResponse } from 'axios';
4
+ export declare class EvalDatasetClient {
5
+ private judgmentApiKey;
6
+ private organizationId;
7
+ constructor(judgmentApiKey: string, organizationId: string);
8
+ createDataset(examples?: Example[]): EvalDataset;
9
+ /**
10
+ * Pushes the dataset to the Judgment platform.
11
+ * @returns True if successful, false otherwise.
12
+ */
13
+ pushDataset(dataset: EvalDataset, alias: string, projectName: string, overwrite?: boolean): Promise<boolean>;
14
+ /**
15
+ * Pulls the dataset from the Judgment platform.
16
+ */
17
+ pullDataset(alias: string, projectName: string): Promise<EvalDataset>;
18
+ /**
19
+ * Deletes the dataset from the Judgment platform.
20
+ * @returns True if successful, false otherwise.
21
+ */
22
+ deleteDataset(alias: string, projectName: string): Promise<boolean>;
23
+ /**
24
+ * Pulls dataset statistics for a project from the Judgment platform.
25
+ */
26
+ pullProjectDatasetStats(projectName: string): Promise<Record<string, any>>;
27
+ /**
28
+ * Inserts new examples into an existing dataset on the Judgment platform.
29
+ * @returns True if successful, false otherwise.
30
+ */
31
+ insertDataset(alias: string, examples: Example[], projectName: string): Promise<boolean>;
32
+ /**
33
+ * Exports a dataset in JSONL format from the Judgment platform.
34
+ * @returns AxiosResponse containing the stream if successful.
35
+ */
36
+ exportJsonl(alias: string, projectName: string): Promise<AxiosResponse>;
37
+ private getAuthHeaders;
38
+ private handleApiError;
39
+ }
@@ -0,0 +1,45 @@
1
+ import { Example, ExampleOptions } from '../example.js';
2
+ type SaveFileType = 'json' | 'csv' | 'yaml';
3
+ export declare class EvalDataset {
4
+ examples: Example[];
5
+ private _alias;
6
+ private _id;
7
+ constructor(examples?: Example[]);
8
+ addExample(e: Example): void;
9
+ get length(): number;
10
+ get alias(): string | null;
11
+ set alias(value: string | null);
12
+ get id(): string | null;
13
+ set id(value: string | null);
14
+ /**
15
+ * Adds examples from a JSON file.
16
+ * Assumes the JSON file has a top-level key "examples" containing an array of example objects.
17
+ * @param filePath Path to the JSON file.
18
+ */
19
+ addFromJson(filePath: string): void;
20
+ /**
21
+ * Adds examples from a YAML file.
22
+ * Assumes the YAML file has a top-level key "examples" containing an array of example objects.
23
+ * @param filePath Path to the YAML file.
24
+ */
25
+ addFromYaml(filePath: string): void;
26
+ /**
27
+ * Adds examples from a CSV file.
28
+ * @param filePath Path to the CSV file.
29
+ * @param headerMapping Dictionary mapping Example headers (keys) to custom headers in the CSV (values).
30
+ * @param primaryDelimiter Main delimiter used in CSV file. Defaults to ",".
31
+ * @param secondaryDelimiter Secondary delimiter for list fields (context, retrieval_context, etc.). Defaults to ";".
32
+ */
33
+ addFromCsv(filePath: string, headerMapping: {
34
+ [key in keyof ExampleOptions]?: string;
35
+ }, primaryDelimiter?: string, secondaryDelimiter?: string): void;
36
+ /**
37
+ * Saves the dataset as a file.
38
+ * @param fileType The file type to save as ('json', 'csv', 'yaml').
39
+ * @param dirPath The directory path to save the file to.
40
+ * @param saveName Optional: The name of the file (without extension). Defaults to a timestamp.
41
+ * @param secondaryDelimiter Optional: The delimiter used for joining list fields in CSV output. Defaults to ";".
42
+ */
43
+ saveAs(fileType: SaveFileType, dirPath: string, saveName?: string, secondaryDelimiter?: string): void;
44
+ }
45
+ export {};
@@ -3,29 +3,35 @@
3
3
  */
4
4
  export interface ExampleOptions {
5
5
  input: string;
6
- actualOutput?: string;
7
- expectedOutput?: string;
6
+ actualOutput?: string | string[];
7
+ expectedOutput?: string | string[];
8
8
  context?: string[];
9
9
  retrievalContext?: string[];
10
10
  additionalMetadata?: Record<string, any>;
11
- toolsCalled?: any[];
12
- expectedTools?: any[];
11
+ toolsCalled?: string[];
12
+ expectedTools?: string[];
13
+ name?: string;
13
14
  exampleId?: string;
14
15
  exampleIndex?: number;
15
16
  timestamp?: string;
17
+ traceId?: string;
18
+ example?: boolean;
16
19
  }
17
20
  export declare class Example {
18
21
  input: string;
19
- actualOutput?: string;
20
- expectedOutput?: string;
22
+ actualOutput?: string | string[];
23
+ expectedOutput?: string | string[];
21
24
  context?: string[];
22
25
  retrievalContext?: string[];
23
26
  additionalMetadata?: Record<string, any>;
24
- toolsCalled?: any[];
25
- expectedTools?: any[];
27
+ toolsCalled?: string[];
28
+ expectedTools?: string[];
29
+ name?: string;
26
30
  exampleId: string;
27
31
  exampleIndex?: number;
28
32
  timestamp?: string;
33
+ traceId?: string;
34
+ example?: boolean;
29
35
  constructor(options: ExampleOptions);
30
36
  /**
31
37
  * Generate a UUID for the example ID
@@ -52,19 +58,25 @@ export declare class ExampleBuilder {
52
58
  private _additionalMetadata?;
53
59
  private _toolsCalled?;
54
60
  private _expectedTools?;
61
+ private _name?;
55
62
  private _exampleId?;
56
63
  private _exampleIndex?;
57
64
  private _timestamp?;
65
+ private _traceId?;
66
+ private _example?;
58
67
  input(input: string): ExampleBuilder;
59
- actualOutput(actualOutput: string): ExampleBuilder;
60
- expectedOutput(expectedOutput: string): ExampleBuilder;
68
+ actualOutput(actualOutput: string | string[]): ExampleBuilder;
69
+ expectedOutput(expectedOutput: string | string[]): ExampleBuilder;
61
70
  context(context: string[]): ExampleBuilder;
62
71
  retrievalContext(retrievalContext: string[]): ExampleBuilder;
63
72
  additionalMetadata(additionalMetadata: Record<string, any>): ExampleBuilder;
64
- toolsCalled(toolsCalled: any[]): ExampleBuilder;
65
- expectedTools(expectedTools: any[]): ExampleBuilder;
73
+ toolsCalled(toolsCalled: string[]): ExampleBuilder;
74
+ expectedTools(expectedTools: string[]): ExampleBuilder;
75
+ name(name: string): ExampleBuilder;
66
76
  exampleId(exampleId: string): ExampleBuilder;
67
77
  exampleIndex(exampleIndex: number): ExampleBuilder;
68
78
  timestamp(timestamp: string): ExampleBuilder;
79
+ traceId(traceId: string): ExampleBuilder;
80
+ example(example: boolean): ExampleBuilder;
69
81
  build(): Example;
70
82
  }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * E2E tests for evaluation operations in the JudgmentClient.
3
+ * Migrated from the Python SDK's test_eval_operations.py
4
+ */
5
+ export {};
@@ -0,0 +1,5 @@
1
+ /**
2
+ * E2E tests for judgee traces operations in the Tracer API.
3
+ * Migrated from the Python SDK's test_judgee_traces_update.py
4
+ */
5
+ export {};
@@ -3,7 +3,6 @@ export { ScoringResult, ScoringResultBuilder, ScorerData, ScoringResultOptions }
3
3
  export { Tracer, SpanType, wrap, TraceClient } from './common/tracer.js';
4
4
  export { Scorer, APIJudgmentScorer, JudgevalScorer, ScorerWrapper } from './scorers/base-scorer.js';
5
5
  export { AnswerCorrectnessScorer, AnswerRelevancyScorer, ComparisonScorer, ContextualPrecisionScorer, ContextualRecallScorer, ContextualRelevancyScorer, ExecutionOrderScorer, FaithfulnessScorer, GroundednessScorer, HallucinationScorer, InstructionAdherenceScorer, JsonCorrectnessScorer, SummarizationScorer } from './scorers/api-scorer.js';
6
- export { ExactMatchScorer } from './scorers/exact-match-scorer.js';
7
6
  export { AlertStatus, Condition, NotificationConfig, Rule, AlertResult, RulesEngine } from './rules.js';
8
7
  export { EvaluationRun, EvaluationRunOptions } from './evaluation-run.js';
9
8
  export { runEval, assertTest, JudgmentAPIError, sendToRabbitMQ, executeApiEval, mergeResults, checkMissingScorerData, checkEvalRunNameExists, logEvaluationResults, checkExamples } from './run-evaluation.js';
@@ -50,32 +50,8 @@ export declare class JudgmentClient {
50
50
  /**
51
51
  * Evaluate a dataset
52
52
  */
53
- evaluateDataset(dataset: any, // EvalDataset would be implemented separately
53
+ evaluateDataset(dataset: any, // Keep type loose for stub
54
54
  scorers: Array<ScorerWrapper | JudgevalScorer>, model: string | string[] | any, aggregator?: string, metadata?: Record<string, any>, projectName?: string, evalRunName?: string, logResults?: boolean, useJudgment?: boolean, rules?: Rule[]): Promise<ScoringResult[]>;
55
- /**
56
- * Create a dataset
57
- */
58
- createDataset(): any;
59
- /**
60
- * Push a dataset to the Judgment platform
61
- */
62
- pushDataset(alias: string, dataset: any, projectName: string, overwrite?: boolean): Promise<boolean>;
63
- /**
64
- * Pull a dataset from the Judgment platform
65
- */
66
- pullDataset(alias: string, projectName: string): Promise<any>;
67
- /**
68
- * Delete a dataset from the Judgment platform
69
- */
70
- deleteDataset(alias: string, projectName: string): Promise<boolean>;
71
- /**
72
- * Pull project dataset stats from the Judgment platform
73
- */
74
- pullProjectDatasetStats(projectName: string): Promise<Record<string, any>>;
75
- /**
76
- * Insert examples into a dataset on the Judgment platform
77
- */
78
- insertDataset(alias: string, examples: Example[], projectName: string): Promise<boolean>;
79
55
  /**
80
56
  * Pull evaluation results from the server
81
57
  * @param projectName Name of the project
@@ -83,28 +59,6 @@ export declare class JudgmentClient {
83
59
  * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
84
60
  */
85
61
  pullEval(projectName: string, evalRunName: string): Promise<Array<Record<string, any | ScoringResult[]>>>;
86
- /**
87
- * Get evaluation run results (alias for pullEval with a more intuitive name)
88
- * @param projectName Name of the project
89
- * @param evalRunName Name of the evaluation run
90
- * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
91
- */
92
- getEvalRun(projectName: string, evalRunName: string): Promise<Array<Record<string, any | ScoringResult[]>>>;
93
- /**
94
- * List all evaluation runs for a project
95
- * @param projectName Name of the project
96
- * @param limit Maximum number of evaluation runs to return (default: 100)
97
- * @param offset Offset for pagination (default: 0)
98
- * @returns List of evaluation run metadata
99
- */
100
- listEvalRuns(projectName: string, limit?: number, offset?: number): Promise<Array<Record<string, any>>>;
101
- /**
102
- * Get evaluation run statistics
103
- * @param projectName Name of the project
104
- * @param evalRunName Name of the evaluation run
105
- * @returns Statistics for the evaluation run
106
- */
107
- getEvalRunStats(projectName: string, evalRunName: string): Promise<Record<string, any>>;
108
62
  /**
109
63
  * Export evaluation results to a file format
110
64
  * @param projectName Name of the project
@@ -176,4 +130,6 @@ export declare class JudgmentClient {
176
130
  * @returns A string representing the progress bar
177
131
  */
178
132
  private _createProgressBar;
133
+ private getAuthHeaders;
134
+ private handleApiError;
179
135
  }
@@ -5,67 +5,67 @@ import { ScorerData } from '../data/result.js';
5
5
  * Implementation of API-based scorers
6
6
  */
7
7
  export declare class AnswerCorrectnessScorer extends APIJudgmentScorer {
8
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
8
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
9
9
  a_score_example(example: Example): Promise<ScorerData>;
10
10
  }
11
11
  export declare class AnswerRelevancyScorer extends APIJudgmentScorer {
12
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
12
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
13
13
  a_score_example(example: Example): Promise<ScorerData>;
14
14
  }
15
15
  export declare class ComparisonScorer extends APIJudgmentScorer {
16
16
  criteria: string[];
17
17
  description: string;
18
- constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>, verbose?: boolean);
18
+ constructor(threshold?: number, criteria?: string[], description?: string, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
19
19
  toJSON(): Record<string, any>;
20
20
  a_score_example(example: Example): Promise<ScorerData>;
21
21
  }
22
22
  export declare class ContextualPrecisionScorer extends APIJudgmentScorer {
23
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
23
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
24
24
  a_score_example(example: Example): Promise<ScorerData>;
25
25
  }
26
26
  export declare class ContextualRecallScorer extends APIJudgmentScorer {
27
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
27
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
28
28
  a_score_example(example: Example): Promise<ScorerData>;
29
29
  }
30
30
  export declare class ContextualRelevancyScorer extends APIJudgmentScorer {
31
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
31
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
32
32
  a_score_example(example: Example): Promise<ScorerData>;
33
33
  }
34
34
  export declare class ExecutionOrderScorer extends APIJudgmentScorer {
35
35
  strictMode: boolean;
36
36
  expectedTools?: string[];
37
- constructor(threshold?: number, strictMode?: boolean, expectedTools?: string[], additional_metadata?: Record<string, any>, verbose?: boolean);
37
+ constructor(threshold?: number, expectedTools?: string[], additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
38
38
  toJSON(): Record<string, any>;
39
39
  a_score_example(example: Example): Promise<ScorerData>;
40
40
  }
41
41
  export declare class FaithfulnessScorer extends APIJudgmentScorer {
42
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
42
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
43
43
  a_score_example(example: Example): Promise<ScorerData>;
44
44
  }
45
45
  export declare class GroundednessScorer extends APIJudgmentScorer {
46
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
46
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
47
47
  a_score_example(example: Example): Promise<ScorerData>;
48
48
  }
49
49
  export declare class HallucinationScorer extends APIJudgmentScorer {
50
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
50
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
51
51
  a_score_example(example: Example): Promise<ScorerData>;
52
52
  }
53
53
  export declare class InstructionAdherenceScorer extends APIJudgmentScorer {
54
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
54
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
55
55
  a_score_example(example: Example): Promise<ScorerData>;
56
56
  }
57
57
  export declare class JsonCorrectnessScorer extends APIJudgmentScorer {
58
58
  jsonSchema?: Record<string, any>;
59
- constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>, verbose?: boolean);
59
+ constructor(threshold?: number, jsonSchema?: Record<string, any>, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
60
60
  toJSON(): Record<string, any>;
61
61
  a_score_example(example: Example): Promise<ScorerData>;
62
62
  }
63
63
  export declare class SummarizationScorer extends APIJudgmentScorer {
64
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
64
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
65
65
  a_score_example(example: Example): Promise<ScorerData>;
66
66
  }
67
67
  export declare class Text2SQLScorer extends APIJudgmentScorer {
68
- constructor(threshold?: number, additional_metadata?: Record<string, any>, verbose?: boolean);
68
+ constructor(threshold?: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
69
69
  a_score_example(example: Example): Promise<ScorerData>;
70
70
  }
71
71
  export declare class ScorerWrapper {
@@ -75,5 +75,5 @@ export declare class ScorerWrapper {
75
75
  get threshold(): number;
76
76
  get additional_metadata(): Record<string, any> | undefined;
77
77
  toJSON(): Record<string, any>;
78
- static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean): APIJudgmentScorer;
78
+ static fromType(type: string, threshold: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean): APIJudgmentScorer;
79
79
  }
@@ -8,8 +8,18 @@ export interface Scorer {
8
8
  scoreType: string;
9
9
  threshold: number;
10
10
  score?: number;
11
+ score_breakdown?: Record<string, any>;
12
+ reason?: string;
13
+ success?: boolean;
14
+ evaluation_model?: string;
15
+ strict_mode: boolean;
16
+ async_mode: boolean;
17
+ verbose_mode: boolean;
18
+ include_reason: boolean;
19
+ error?: string;
20
+ evaluation_cost?: number;
21
+ verbose_logs?: string;
11
22
  additional_metadata?: Record<string, any>;
12
- verbose: boolean;
13
23
  validateThreshold(): void;
14
24
  toJSON(): Record<string, any>;
15
25
  successCheck(): boolean;
@@ -22,9 +32,13 @@ export declare abstract class APIJudgmentScorer implements Scorer {
22
32
  get scoreType(): string;
23
33
  readonly threshold: number;
24
34
  score?: number;
35
+ score_breakdown?: Record<string, any>;
25
36
  additional_metadata?: Record<string, any>;
26
- verbose: boolean;
27
- constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
37
+ strict_mode: boolean;
38
+ async_mode: boolean;
39
+ verbose_mode: boolean;
40
+ include_reason: boolean;
41
+ constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, strict_mode?: boolean, async_mode?: boolean, verbose_mode?: boolean, include_reason?: boolean);
28
42
  /**
29
43
  * Check if the score meets the threshold
30
44
  */
@@ -47,27 +61,46 @@ export declare abstract class JudgevalScorer implements Scorer {
47
61
  scoreType: string;
48
62
  threshold: number;
49
63
  score?: number;
64
+ score_breakdown?: Record<string, any>;
65
+ reason?: string;
66
+ success?: boolean;
67
+ evaluation_model?: string;
68
+ strict_mode: boolean;
69
+ async_mode: boolean;
70
+ verbose_mode: boolean;
71
+ include_reason: boolean;
72
+ error?: string;
73
+ evaluation_cost?: number;
74
+ verbose_logs?: string;
50
75
  additional_metadata?: Record<string, any>;
51
- verbose: boolean;
52
- constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, verbose?: boolean);
76
+ constructor(type: string, threshold: number, additional_metadata?: Record<string, any>, include_reason?: boolean, async_mode?: boolean, strict_mode?: boolean, verbose_mode?: boolean);
53
77
  /**
54
78
  * Check if the score meets the threshold
55
79
  */
56
80
  successCheck(): boolean;
81
+ /**
82
+ * Internal method to check success
83
+ * This is equivalent to Python's _success_check method
84
+ */
85
+ protected _successCheck(): boolean;
57
86
  /**
58
87
  * Validate that the threshold is within the allowed range
59
88
  */
60
89
  validateThreshold(): void;
90
+ /**
91
+ * Convert the scorer to a plain object
92
+ */
93
+ toJSON(): Record<string, any>;
61
94
  /**
62
95
  * Score an example
63
- * @param example The example to score
64
- * @returns A ScorerData object with the score
96
+ * This must be implemented by subclasses
65
97
  */
66
98
  abstract scoreExample(example: Example): Promise<ScorerData>;
67
99
  /**
68
- * Convert the scorer to a plain object
100
+ * Get the name of the scorer
101
+ * This is equivalent to Python's __name__ property
69
102
  */
70
- toJSON(): Record<string, any>;
103
+ get name(): string;
71
104
  }
72
105
  /**
73
106
  * Wrapper for scorers to allow dynamic loading of implementations
@@ -77,8 +110,18 @@ export declare class ScorerWrapper implements Scorer {
77
110
  scoreType: string;
78
111
  threshold: number;
79
112
  score?: number;
113
+ score_breakdown?: Record<string, any>;
114
+ reason?: string;
115
+ success?: boolean;
116
+ evaluation_model?: string;
117
+ strict_mode: boolean;
118
+ async_mode: boolean;
119
+ verbose_mode: boolean;
120
+ include_reason: boolean;
121
+ error?: string;
122
+ evaluation_cost?: number;
123
+ verbose_logs?: string;
80
124
  additional_metadata?: Record<string, any>;
81
- verbose: boolean;
82
125
  scorer: any;
83
126
  constructor(scorer: any);
84
127
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "judgeval",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Judgment SDK for TypeScript/JavaScript",
5
5
  "main": "./dist/cjs/index.js",
6
6
  "module": "./dist/esm/index.js",
@@ -49,6 +49,7 @@
49
49
  "@types/node": "^20.12.12",
50
50
  "@typescript-eslint/eslint-plugin": "^7.10.0",
51
51
  "@typescript-eslint/parser": "^7.10.0",
52
+ "cross-env": "^7.0.3",
52
53
  "eslint": "^8.57.0",
53
54
  "eslint-config-prettier": "^9.1.0",
54
55
  "eslint-plugin-prettier": "^5.1.3",