judgeval 0.1.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/LICENSE.md +202 -0
  2. package/README.md +340 -0
  3. package/dist/clients.d.ts +7 -0
  4. package/dist/clients.js +78 -0
  5. package/dist/clients.js.map +1 -0
  6. package/dist/common/integrations/langgraph.d.ts +40 -0
  7. package/dist/common/integrations/langgraph.js +444 -0
  8. package/dist/common/integrations/langgraph.js.map +1 -0
  9. package/dist/common/logger-instance.d.ts +3 -0
  10. package/dist/common/logger-instance.js +64 -0
  11. package/dist/common/logger-instance.js.map +1 -0
  12. package/dist/common/logger.d.ts +54 -0
  13. package/dist/common/logger.js +221 -0
  14. package/dist/common/logger.js.map +1 -0
  15. package/dist/common/tracer.d.ts +205 -0
  16. package/dist/common/tracer.js +1035 -0
  17. package/dist/common/tracer.js.map +1 -0
  18. package/dist/constants.d.ts +51 -0
  19. package/dist/constants.js +344 -0
  20. package/dist/constants.js.map +1 -0
  21. package/dist/data/example.d.ts +70 -0
  22. package/dist/data/example.js +125 -0
  23. package/dist/data/example.js.map +1 -0
  24. package/dist/data/result.d.ts +51 -0
  25. package/dist/data/result.js +83 -0
  26. package/dist/data/result.js.map +1 -0
  27. package/dist/evaluation-run.d.ts +44 -0
  28. package/dist/evaluation-run.js +136 -0
  29. package/dist/evaluation-run.js.map +1 -0
  30. package/dist/index.d.ts +10 -0
  31. package/dist/index.js +73 -0
  32. package/dist/index.js.map +1 -0
  33. package/dist/judgment-client.d.ts +179 -0
  34. package/dist/judgment-client.js +1038 -0
  35. package/dist/judgment-client.js.map +1 -0
  36. package/dist/rules.d.ts +120 -0
  37. package/dist/rules.js +322 -0
  38. package/dist/rules.js.map +1 -0
  39. package/dist/run-evaluation.d.ts +78 -0
  40. package/dist/run-evaluation.js +618 -0
  41. package/dist/run-evaluation.js.map +1 -0
  42. package/dist/scorers/api-scorer.d.ts +79 -0
  43. package/dist/scorers/api-scorer.js +291 -0
  44. package/dist/scorers/api-scorer.js.map +1 -0
  45. package/dist/scorers/base-scorer.d.ts +100 -0
  46. package/dist/scorers/base-scorer.js +190 -0
  47. package/dist/scorers/base-scorer.js.map +1 -0
  48. package/dist/scorers/exact-match-scorer.d.ts +10 -0
  49. package/dist/scorers/exact-match-scorer.js +84 -0
  50. package/dist/scorers/exact-match-scorer.js.map +1 -0
  51. package/package.json +88 -0
@@ -0,0 +1,179 @@
1
+ import { Example } from './data/example';
2
+ import { ScoringResult } from './data/result';
3
+ import { APIJudgmentScorer, JudgevalScorer, ScorerWrapper } from './scorers/base-scorer';
4
+ import { Rule } from './rules';
5
+ /**
6
+ * Singleton implementation for JudgmentClient
7
+ */
8
+ export declare class JudgmentClient {
9
+ private static instance;
10
+ private judgmentApiKey;
11
+ private organizationId;
12
+ /**
13
+ * Get the singleton instance of JudgmentClient
14
+ */
15
+ static getInstance(judgmentApiKey?: string, organizationId?: string): JudgmentClient;
16
+ /**
17
+ * Constructor for JudgmentClient
18
+ * @param judgmentApiKey The Judgment API key
19
+ * @param organizationId The organization ID
20
+ */
21
+ constructor(judgmentApiKey?: string, organizationId?: string);
22
+ /**
23
+ * Run an evaluation asynchronously
24
+ */
25
+ aRunEvaluation(examples: Example[], scorers: Array<ScorerWrapper | JudgevalScorer | APIJudgmentScorer>, model: string | string[] | any, aggregator?: string, metadata?: Record<string, any>, logResults?: boolean, projectName?: string, evalRunName?: string, override?: boolean, useJudgment?: boolean, ignoreErrors?: boolean, rules?: Rule[]): Promise<ScoringResult[]>;
26
+ /**
27
+ * Run an evaluation
28
+ */
29
+ runEvaluation(examples: Example[], scorers: Array<ScorerWrapper | JudgevalScorer | APIJudgmentScorer>, model: string | string[] | any, aggregator?: string, metadata?: Record<string, any>, logResults?: boolean, projectName?: string, evalRunName?: string, override?: boolean, useJudgment?: boolean, ignoreErrors?: boolean, asyncExecution?: boolean, rules?: Rule[]): Promise<ScoringResult[]>;
30
+ /**
31
+ * Run an evaluation with a simplified interface (recommended)
32
+ * @param config Configuration object for the evaluation
33
+ * @returns Promise<ScoringResult[]> The evaluation results
34
+ */
35
+ evaluate(config: {
36
+ examples: Example[];
37
+ scorers: Array<ScorerWrapper | JudgevalScorer | APIJudgmentScorer>;
38
+ model?: string | string[] | any;
39
+ aggregator?: string;
40
+ metadata?: Record<string, any>;
41
+ projectName?: string;
42
+ evalName?: string;
43
+ logResults?: boolean;
44
+ useJudgment?: boolean;
45
+ ignoreErrors?: boolean;
46
+ asyncExecution?: boolean;
47
+ rules?: Rule[];
48
+ override?: boolean;
49
+ }): Promise<ScoringResult[]>;
50
+ /**
51
+ * Evaluate a dataset
52
+ */
53
+ evaluateDataset(dataset: any, // EvalDataset would be implemented separately
54
+ scorers: Array<ScorerWrapper | JudgevalScorer>, model: string | string[] | any, aggregator?: string, metadata?: Record<string, any>, projectName?: string, evalRunName?: string, logResults?: boolean, useJudgment?: boolean, rules?: Rule[]): Promise<ScoringResult[]>;
55
+ /**
56
+ * Create a dataset
57
+ */
58
+ createDataset(): any;
59
+ /**
60
+ * Push a dataset to the Judgment platform
61
+ */
62
+ pushDataset(alias: string, dataset: any, projectName: string, overwrite?: boolean): Promise<boolean>;
63
+ /**
64
+ * Pull a dataset from the Judgment platform
65
+ */
66
+ pullDataset(alias: string, projectName: string): Promise<any>;
67
+ /**
68
+ * Delete a dataset from the Judgment platform
69
+ */
70
+ deleteDataset(alias: string, projectName: string): Promise<boolean>;
71
+ /**
72
+ * Pull project dataset stats from the Judgment platform
73
+ */
74
+ pullProjectDatasetStats(projectName: string): Promise<Record<string, any>>;
75
+ /**
76
+ * Insert examples into a dataset on the Judgment platform
77
+ */
78
+ insertDataset(alias: string, examples: Example[], projectName: string): Promise<boolean>;
79
+ /**
80
+ * Pull evaluation results from the server
81
+ * @param projectName Name of the project
82
+ * @param evalRunName Name of the evaluation run
83
+ * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
84
+ */
85
+ pullEval(projectName: string, evalRunName: string): Promise<Array<Record<string, any | ScoringResult[]>>>;
86
+ /**
87
+ * Get evaluation run results (alias for pullEval with a more intuitive name)
88
+ * @param projectName Name of the project
89
+ * @param evalRunName Name of the evaluation run
90
+ * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
91
+ */
92
+ getEvalRun(projectName: string, evalRunName: string): Promise<Array<Record<string, any | ScoringResult[]>>>;
93
+ /**
94
+ * List all evaluation runs for a project
95
+ * @param projectName Name of the project
96
+ * @param limit Maximum number of evaluation runs to return (default: 100)
97
+ * @param offset Offset for pagination (default: 0)
98
+ * @returns List of evaluation run metadata
99
+ */
100
+ listEvalRuns(projectName: string, limit?: number, offset?: number): Promise<Array<Record<string, any>>>;
101
+ /**
102
+ * Get evaluation run statistics
103
+ * @param projectName Name of the project
104
+ * @param evalRunName Name of the evaluation run
105
+ * @returns Statistics for the evaluation run
106
+ */
107
+ getEvalRunStats(projectName: string, evalRunName: string): Promise<Record<string, any>>;
108
+ /**
109
+ * Export evaluation results to a file format
110
+ * @param projectName Name of the project
111
+ * @param evalRunName Name of the evaluation run
112
+ * @param format Export format ('json' or 'csv')
113
+ * @returns The exported data as a string
114
+ */
115
+ exportEvalResults(projectName: string, evalRunName: string, format?: 'json' | 'csv'): Promise<string>;
116
+ /**
117
+ * Delete an evaluation from the server
118
+ */
119
+ deleteEval(projectName: string, evalRunNames: string[]): Promise<boolean>;
120
+ /**
121
+ * Delete all evaluations from the server for a given project
122
+ */
123
+ deleteProjectEvals(projectName: string): Promise<boolean>;
124
+ /**
125
+ * Create a project on the server
126
+ */
127
+ createProject(projectName: string): Promise<boolean>;
128
+ /**
129
+ * Delete a project from the server
130
+ */
131
+ deleteProject(projectName: string): Promise<boolean>;
132
+ /**
133
+ * Validate that the user API key is valid
134
+ */
135
+ private validateApiKey;
136
+ /**
137
+ * Assert a test by running the evaluation and checking the results for success
138
+ */
139
+ assertTest(examples: Example[], scorers: Array<APIJudgmentScorer | JudgevalScorer>, // Type matches Python's intent
140
+ model: string | string[] | any, aggregator?: string, metadata?: Record<string, any>, logResults?: boolean, projectName?: string, evalRunName?: string, override?: boolean, rules?: Rule[]): Promise<void>;
141
+ /**
142
+ * Pull the results of an evaluation run. Matches `pullEval` logic but returns only the ScoringResult array.
143
+ * @param projectName The name of the project
144
+ * @param evalRunName The name of the evaluation run
145
+ * @returns The results of the evaluation run as ScoringResult[] or empty array on error/no results.
146
+ */
147
+ pullEvalResults(projectName: string, evalRunName: string): Promise<ScoringResult[]>;
148
+ /**
149
+ * Check the status of an evaluation run using the fetch endpoint.
150
+ * This is a heuristic approach as the endpoint might return full results or status info.
151
+ * @param projectName The name of the project
152
+ * @param evalRunName The name of the evaluation run
153
+ * @returns An object representing the status { status: string, progress: number, message: string }
154
+ */
155
+ checkEvalStatus(projectName: string, evalRunName: string): Promise<{
156
+ status: string;
157
+ progress: number;
158
+ message: string;
159
+ error?: string;
160
+ }>;
161
+ /**
162
+ * Wait for an async evaluation to complete and return the results
163
+ * @param projectName The name of the project
164
+ * @param evalRunName The name of the evaluation run
165
+ * @param options Optional configuration for polling: intervalMs, maxAttempts, showProgress
166
+ * @returns The evaluation results as ScoringResult[] or empty array on timeout/failure.
167
+ */
168
+ waitForEvaluation(projectName: string, evalRunName: string, options?: {
169
+ intervalMs?: number;
170
+ maxAttempts?: number;
171
+ showProgress?: boolean;
172
+ }): Promise<ScoringResult[]>;
173
+ /**
174
+ * Create a simple ASCII progress bar
175
+ * @param percent The percentage to display (0-100)
176
+ * @returns A string representing the progress bar
177
+ */
178
+ private _createProgressBar;
179
+ }