judgeval 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +202 -0
- package/README.md +340 -0
- package/dist/clients.d.ts +7 -0
- package/dist/clients.js +78 -0
- package/dist/clients.js.map +1 -0
- package/dist/common/integrations/langgraph.d.ts +40 -0
- package/dist/common/integrations/langgraph.js +444 -0
- package/dist/common/integrations/langgraph.js.map +1 -0
- package/dist/common/logger-instance.d.ts +3 -0
- package/dist/common/logger-instance.js +64 -0
- package/dist/common/logger-instance.js.map +1 -0
- package/dist/common/logger.d.ts +54 -0
- package/dist/common/logger.js +221 -0
- package/dist/common/logger.js.map +1 -0
- package/dist/common/tracer.d.ts +205 -0
- package/dist/common/tracer.js +1035 -0
- package/dist/common/tracer.js.map +1 -0
- package/dist/constants.d.ts +51 -0
- package/dist/constants.js +344 -0
- package/dist/constants.js.map +1 -0
- package/dist/data/example.d.ts +70 -0
- package/dist/data/example.js +125 -0
- package/dist/data/example.js.map +1 -0
- package/dist/data/result.d.ts +51 -0
- package/dist/data/result.js +83 -0
- package/dist/data/result.js.map +1 -0
- package/dist/evaluation-run.d.ts +44 -0
- package/dist/evaluation-run.js +136 -0
- package/dist/evaluation-run.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/judgment-client.d.ts +179 -0
- package/dist/judgment-client.js +1038 -0
- package/dist/judgment-client.js.map +1 -0
- package/dist/rules.d.ts +120 -0
- package/dist/rules.js +322 -0
- package/dist/rules.js.map +1 -0
- package/dist/run-evaluation.d.ts +78 -0
- package/dist/run-evaluation.js +618 -0
- package/dist/run-evaluation.js.map +1 -0
- package/dist/scorers/api-scorer.d.ts +79 -0
- package/dist/scorers/api-scorer.js +291 -0
- package/dist/scorers/api-scorer.js.map +1 -0
- package/dist/scorers/base-scorer.d.ts +100 -0
- package/dist/scorers/base-scorer.js +190 -0
- package/dist/scorers/base-scorer.js.map +1 -0
- package/dist/scorers/exact-match-scorer.d.ts +10 -0
- package/dist/scorers/exact-match-scorer.js +84 -0
- package/dist/scorers/exact-match-scorer.js.map +1 -0
- package/package.json +88 -0
|
@@ -0,0 +1,1038 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
26
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
27
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
28
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
29
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
30
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
31
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
35
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
36
|
+
};
|
|
37
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
38
|
+
exports.JudgmentClient = void 0;
|
|
39
|
+
const dotenv = __importStar(require("dotenv"));
|
|
40
|
+
const axios_1 = __importDefault(require("axios"));
|
|
41
|
+
const example_1 = require("./data/example");
|
|
42
|
+
const result_1 = require("./data/result");
|
|
43
|
+
const base_scorer_1 = require("./scorers/base-scorer");
|
|
44
|
+
const evaluation_run_1 = require("./evaluation-run");
|
|
45
|
+
const rules_1 = require("./rules");
|
|
46
|
+
const run_evaluation_1 = require("./run-evaluation");
|
|
47
|
+
const constants_1 = require("./constants");
|
|
48
|
+
const logger_instance_1 = __importDefault(require("./common/logger-instance"));
|
|
49
|
+
// Load environment variables
|
|
50
|
+
dotenv.config();
|
|
51
|
+
/**
|
|
52
|
+
* Singleton implementation for JudgmentClient
|
|
53
|
+
*/
|
|
54
|
+
class JudgmentClient {
|
|
55
|
+
/**
|
|
56
|
+
* Get the singleton instance of JudgmentClient
|
|
57
|
+
*/
|
|
58
|
+
static getInstance(judgmentApiKey, organizationId) {
|
|
59
|
+
if (!JudgmentClient.instance) {
|
|
60
|
+
JudgmentClient.instance = new JudgmentClient(judgmentApiKey, organizationId);
|
|
61
|
+
}
|
|
62
|
+
return JudgmentClient.instance;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Constructor for JudgmentClient
|
|
66
|
+
* @param judgmentApiKey The Judgment API key
|
|
67
|
+
* @param organizationId The organization ID
|
|
68
|
+
*/
|
|
69
|
+
constructor(judgmentApiKey, organizationId) {
|
|
70
|
+
this.judgmentApiKey = judgmentApiKey || process.env.JUDGMENT_API_KEY || '';
|
|
71
|
+
this.organizationId = organizationId || process.env.JUDGMENT_ORG_ID || '';
|
|
72
|
+
// Keep this as direct output
|
|
73
|
+
console.log('Successfully initialized JudgmentClient!');
|
|
74
|
+
if (!this.judgmentApiKey) {
|
|
75
|
+
// Use logger for internal error, but throw for user
|
|
76
|
+
logger_instance_1.default.error('JUDGMENT_API_KEY is not set.');
|
|
77
|
+
throw new Error('Judgment API key is required. Set it in the constructor or as an environment variable JUDGMENT_API_KEY.');
|
|
78
|
+
}
|
|
79
|
+
if (!this.organizationId) {
|
|
80
|
+
throw new Error('Organization ID is required. Set it in the constructor or as an environment variable JUDGMENT_ORG_ID.');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Run an evaluation asynchronously
|
|
85
|
+
*/
|
|
86
|
+
aRunEvaluation(examples_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
87
|
+
return __awaiter(this, arguments, void 0, function* (examples, scorers, model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, useJudgment = true, ignoreErrors = true, rules) {
|
|
88
|
+
// Simply call runEvaluation with asyncExecution=true
|
|
89
|
+
return this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalRunName, override, useJudgment, ignoreErrors, true, // Set asyncExecution to true
|
|
90
|
+
rules);
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Run an evaluation
|
|
95
|
+
*/
|
|
96
|
+
runEvaluation(examples_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
97
|
+
return __awaiter(this, arguments, void 0, function* (examples, scorers, model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, useJudgment = true, ignoreErrors = true, asyncExecution = false, rules) {
|
|
98
|
+
try {
|
|
99
|
+
// Load appropriate implementations for all scorers
|
|
100
|
+
const loadedScorers = [];
|
|
101
|
+
for (const scorer of scorers) {
|
|
102
|
+
try {
|
|
103
|
+
if (scorer instanceof base_scorer_1.ScorerWrapper) {
|
|
104
|
+
loadedScorers.push(scorer.loadImplementation(useJudgment));
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
// Assume scorers passed are already JudgevalScorer or APIJudgmentScorer
|
|
108
|
+
loadedScorers.push(scorer);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
116
|
+
if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_1.JudgevalScorer)) {
|
|
117
|
+
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
|
|
118
|
+
}
|
|
119
|
+
// Convert ScorerWrapper in rules to their implementations
|
|
120
|
+
let loadedRules;
|
|
121
|
+
if (rules) {
|
|
122
|
+
loadedRules = [];
|
|
123
|
+
for (const rule of rules) {
|
|
124
|
+
try {
|
|
125
|
+
const processedConditions = [];
|
|
126
|
+
for (const condition of rule.conditions) {
|
|
127
|
+
// Convert metric if it's a ScorerWrapper
|
|
128
|
+
if (condition.metric instanceof base_scorer_1.ScorerWrapper) {
|
|
129
|
+
try {
|
|
130
|
+
// Create a new Condition object with the loaded implementation
|
|
131
|
+
const loadedMetric = condition.metric.loadImplementation(useJudgment);
|
|
132
|
+
const newCondition = new rules_1.Condition(loadedMetric);
|
|
133
|
+
// Copy other properties from the original condition if necessary
|
|
134
|
+
// Example: newCondition.threshold = condition.threshold;
|
|
135
|
+
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric })); // Copy all properties, overriding metric
|
|
136
|
+
processedConditions.push(newCondition);
|
|
137
|
+
}
|
|
138
|
+
catch (error) {
|
|
139
|
+
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
processedConditions.push(condition);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Create new rule with processed conditions
|
|
147
|
+
const newRule = new rules_1.Rule(rule.name, processedConditions, rule.combineType, rule.description, rule.notification, rule.ruleId);
|
|
148
|
+
loadedRules.push(newRule);
|
|
149
|
+
}
|
|
150
|
+
catch (error) {
|
|
151
|
+
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
const evaluationRun = new evaluation_run_1.EvaluationRun({
|
|
156
|
+
logResults,
|
|
157
|
+
projectName,
|
|
158
|
+
evalName: evalRunName,
|
|
159
|
+
examples,
|
|
160
|
+
scorers: loadedScorers,
|
|
161
|
+
model,
|
|
162
|
+
aggregator,
|
|
163
|
+
metadata,
|
|
164
|
+
judgmentApiKey: this.judgmentApiKey,
|
|
165
|
+
rules: loadedRules,
|
|
166
|
+
organizationId: this.organizationId
|
|
167
|
+
});
|
|
168
|
+
return (0, run_evaluation_1.runEval)(evaluationRun, override, ignoreErrors, asyncExecution);
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
if (error instanceof Error) {
|
|
172
|
+
if (error.message.includes('one or more fields are invalid')) {
|
|
173
|
+
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Run an evaluation with a simplified interface (recommended)
|
|
187
|
+
* @param config Configuration object for the evaluation
|
|
188
|
+
* @returns Promise<ScoringResult[]> The evaluation results
|
|
189
|
+
*/
|
|
190
|
+
evaluate(config) {
|
|
191
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
192
|
+
// Set default values
|
|
193
|
+
const { examples, scorers, model = 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo', aggregator = undefined, metadata = {}, projectName = 'default_project', evalName = `eval-run-${Date.now()}`, logResults = true, useJudgment = true, ignoreErrors = true, asyncExecution = false, rules = undefined, override = false } = config;
|
|
194
|
+
// Call the original runEvaluation method with the extracted parameters
|
|
195
|
+
return this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalName, override, useJudgment, ignoreErrors, asyncExecution, rules);
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Evaluate a dataset
|
|
200
|
+
*/
|
|
201
|
+
evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
202
|
+
return __awaiter(this, arguments, void 0, function* (dataset, // EvalDataset would be implemented separately
|
|
203
|
+
scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
|
|
204
|
+
try {
|
|
205
|
+
// Load appropriate implementations for all scorers
|
|
206
|
+
const loadedScorers = [];
|
|
207
|
+
for (const scorer of scorers) {
|
|
208
|
+
try {
|
|
209
|
+
if (scorer instanceof base_scorer_1.ScorerWrapper) {
|
|
210
|
+
loadedScorers.push(scorer.loadImplementation(useJudgment));
|
|
211
|
+
}
|
|
212
|
+
else {
|
|
213
|
+
// Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
|
|
214
|
+
loadedScorers.push(scorer);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
catch (error) {
|
|
218
|
+
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
222
|
+
if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_1.JudgevalScorer)) {
|
|
223
|
+
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
|
|
224
|
+
}
|
|
225
|
+
// Convert ScorerWrapper in rules to their implementations
|
|
226
|
+
let loadedRules;
|
|
227
|
+
if (rules) {
|
|
228
|
+
loadedRules = [];
|
|
229
|
+
for (const rule of rules) {
|
|
230
|
+
try {
|
|
231
|
+
const processedConditions = [];
|
|
232
|
+
for (const condition of rule.conditions) {
|
|
233
|
+
// Convert metric if it's a ScorerWrapper
|
|
234
|
+
if (condition.metric instanceof base_scorer_1.ScorerWrapper) {
|
|
235
|
+
try {
|
|
236
|
+
const loadedMetric = condition.metric.loadImplementation(useJudgment);
|
|
237
|
+
const newCondition = new rules_1.Condition(loadedMetric);
|
|
238
|
+
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
|
|
239
|
+
processedConditions.push(newCondition);
|
|
240
|
+
}
|
|
241
|
+
catch (error) {
|
|
242
|
+
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
else {
|
|
246
|
+
processedConditions.push(condition);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
// Create new rule with processed conditions
|
|
250
|
+
const newRule = new rules_1.Rule(rule.name, processedConditions, rule.combineType, rule.description, rule.notification, rule.ruleId);
|
|
251
|
+
loadedRules.push(newRule);
|
|
252
|
+
}
|
|
253
|
+
catch (error) {
|
|
254
|
+
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
const evaluationRun = new evaluation_run_1.EvaluationRun({
|
|
259
|
+
logResults,
|
|
260
|
+
projectName,
|
|
261
|
+
evalName: evalRunName,
|
|
262
|
+
examples: dataset.examples, // Assuming dataset has an 'examples' property
|
|
263
|
+
scorers: loadedScorers,
|
|
264
|
+
model,
|
|
265
|
+
aggregator,
|
|
266
|
+
metadata,
|
|
267
|
+
judgmentApiKey: this.judgmentApiKey,
|
|
268
|
+
rules: loadedRules,
|
|
269
|
+
organizationId: this.organizationId
|
|
270
|
+
});
|
|
271
|
+
// Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
|
|
272
|
+
return (0, run_evaluation_1.runEval)(evaluationRun, false, true, false);
|
|
273
|
+
}
|
|
274
|
+
catch (error) {
|
|
275
|
+
if (error instanceof Error) {
|
|
276
|
+
if (error.message.includes('one or more fields are invalid')) {
|
|
277
|
+
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
else {
|
|
284
|
+
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Create a dataset
|
|
291
|
+
*/
|
|
292
|
+
createDataset() {
|
|
293
|
+
// This would be implemented with EvalDataset
|
|
294
|
+
throw new Error('Not implemented yet');
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Push a dataset to the Judgment platform
|
|
298
|
+
*/
|
|
299
|
+
pushDataset(alias_1, dataset_1, projectName_1) {
|
|
300
|
+
return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
|
|
301
|
+
// This would be implemented with EvalDataset
|
|
302
|
+
throw new Error('Not implemented yet');
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Pull a dataset from the Judgment platform
|
|
307
|
+
*/
|
|
308
|
+
pullDataset(alias, projectName) {
|
|
309
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
310
|
+
// This would be implemented with EvalDataset
|
|
311
|
+
throw new Error('Not implemented yet');
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Delete a dataset from the Judgment platform
|
|
316
|
+
*/
|
|
317
|
+
deleteDataset(alias, projectName) {
|
|
318
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
319
|
+
// This would be implemented with EvalDataset
|
|
320
|
+
throw new Error('Not implemented yet');
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Pull project dataset stats from the Judgment platform
|
|
325
|
+
*/
|
|
326
|
+
pullProjectDatasetStats(projectName) {
|
|
327
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
328
|
+
// This would be implemented with EvalDataset
|
|
329
|
+
throw new Error('Not implemented yet');
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Insert examples into a dataset on the Judgment platform
|
|
334
|
+
*/
|
|
335
|
+
insertDataset(alias, examples, projectName) {
|
|
336
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
337
|
+
// This would be implemented with EvalDataset
|
|
338
|
+
throw new Error('Not implemented yet');
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Pull evaluation results from the server
|
|
343
|
+
* @param projectName Name of the project
|
|
344
|
+
* @param evalRunName Name of the evaluation run
|
|
345
|
+
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
346
|
+
*/
|
|
347
|
+
pullEval(projectName, evalRunName // Consistent parameter name, but API uses eval_name
|
|
348
|
+
) {
|
|
349
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
350
|
+
var _a, _b, _c, _d;
|
|
351
|
+
// Body matches Python's structure for this endpoint
|
|
352
|
+
const evalRunRequestBody = {
|
|
353
|
+
project_name: projectName,
|
|
354
|
+
eval_name: evalRunName, // Use eval_name in the body for the API
|
|
355
|
+
judgment_api_key: this.judgmentApiKey
|
|
356
|
+
};
|
|
357
|
+
try {
|
|
358
|
+
const response = yield axios_1.default.post(constants_1.JUDGMENT_EVAL_FETCH_API_URL, // Use constant
|
|
359
|
+
evalRunRequestBody, {
|
|
360
|
+
headers: {
|
|
361
|
+
'Content-Type': 'application/json',
|
|
362
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
363
|
+
'X-Organization-Id': this.organizationId
|
|
364
|
+
}
|
|
365
|
+
});
|
|
366
|
+
// Process the response to match the Python SDK's format
|
|
367
|
+
// Python returns [{ 'id': ..., 'results': [ScoringResult, ...]}]
|
|
368
|
+
// The API response is a list of results, each with an 'id' and 'result'
|
|
369
|
+
if (!Array.isArray(response.data) || response.data.length === 0) {
|
|
370
|
+
return [{ id: '', results: [] }]; // Return empty structure if no data
|
|
371
|
+
}
|
|
372
|
+
const evalRunResult = { id: '', results: [] };
|
|
373
|
+
evalRunResult.id = ((_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.id) || ''; // Assume ID is same for all results in run
|
|
374
|
+
for (const result of response.data) {
|
|
375
|
+
const resultData = result.result || {};
|
|
376
|
+
const dataObject = resultData.data_object || {};
|
|
377
|
+
// Create Example from data_object
|
|
378
|
+
const example = new example_1.Example({
|
|
379
|
+
input: dataObject.input,
|
|
380
|
+
actualOutput: dataObject.actual_output,
|
|
381
|
+
expectedOutput: dataObject.expected_output,
|
|
382
|
+
context: dataObject.context,
|
|
383
|
+
retrievalContext: dataObject.retrieval_context,
|
|
384
|
+
additionalMetadata: dataObject.additional_metadata,
|
|
385
|
+
toolsCalled: dataObject.tools_called,
|
|
386
|
+
expectedTools: dataObject.expected_tools,
|
|
387
|
+
exampleId: dataObject.example_id,
|
|
388
|
+
exampleIndex: dataObject.example_index,
|
|
389
|
+
timestamp: dataObject.timestamp
|
|
390
|
+
});
|
|
391
|
+
// Create ScoringResult
|
|
392
|
+
const scoringResult = new result_1.ScoringResult({
|
|
393
|
+
dataObject: example,
|
|
394
|
+
scorersData: resultData.scorers_data || [],
|
|
395
|
+
error: resultData.error
|
|
396
|
+
});
|
|
397
|
+
evalRunResult.results.push(scoringResult);
|
|
398
|
+
}
|
|
399
|
+
return [evalRunResult]; // Wrap in array to match Python return type [{...}]
|
|
400
|
+
}
|
|
401
|
+
catch (error) {
|
|
402
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
403
|
+
const statusCode = (_b = error.response) === null || _b === void 0 ? void 0 : _b.status;
|
|
404
|
+
const errorMessage = ((_d = (_c = error.response) === null || _c === void 0 ? void 0 : _c.data) === null || _d === void 0 ? void 0 : _d.detail) || error.message;
|
|
405
|
+
throw new Error(`Failed to pull evaluation results: ${statusCode} - ${errorMessage}`);
|
|
406
|
+
}
|
|
407
|
+
if (error instanceof Error) {
|
|
408
|
+
throw new Error(`Failed to pull evaluation results: ${error.message}`);
|
|
409
|
+
}
|
|
410
|
+
throw new Error(`Failed to pull evaluation results: ${String(error)}`);
|
|
411
|
+
}
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Get evaluation run results (alias for pullEval with a more intuitive name)
|
|
416
|
+
* @param projectName Name of the project
|
|
417
|
+
* @param evalRunName Name of the evaluation run
|
|
418
|
+
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
419
|
+
*/
|
|
420
|
+
getEvalRun(projectName, evalRunName) {
|
|
421
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
422
|
+
return this.pullEval(projectName, evalRunName);
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
/**
|
|
426
|
+
* List all evaluation runs for a project
|
|
427
|
+
* @param projectName Name of the project
|
|
428
|
+
* @param limit Maximum number of evaluation runs to return (default: 100)
|
|
429
|
+
* @param offset Offset for pagination (default: 0)
|
|
430
|
+
* @returns List of evaluation run metadata
|
|
431
|
+
*/
|
|
432
|
+
listEvalRuns(projectName_1) {
|
|
433
|
+
return __awaiter(this, arguments, void 0, function* (projectName, limit = 100, offset = 0) {
|
|
434
|
+
var _a, _b, _c;
|
|
435
|
+
try {
|
|
436
|
+
// Use ROOT_API for the base URL
|
|
437
|
+
const url = `${constants_1.ROOT_API}/projects/${projectName}/eval-runs`;
|
|
438
|
+
const response = yield axios_1.default.get(url, {
|
|
439
|
+
params: {
|
|
440
|
+
limit,
|
|
441
|
+
offset
|
|
442
|
+
},
|
|
443
|
+
headers: {
|
|
444
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
445
|
+
'X-Organization-Id': this.organizationId
|
|
446
|
+
}
|
|
447
|
+
});
|
|
448
|
+
return response.data || [];
|
|
449
|
+
}
|
|
450
|
+
catch (error) {
|
|
451
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
452
|
+
const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
|
|
453
|
+
const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
|
|
454
|
+
throw new Error(`Failed to list evaluation runs: ${statusCode} - ${errorMessage}`);
|
|
455
|
+
}
|
|
456
|
+
if (error instanceof Error) {
|
|
457
|
+
throw new Error(`Failed to list evaluation runs: ${error.message}`);
|
|
458
|
+
}
|
|
459
|
+
throw new Error(`Failed to list evaluation runs: ${String(error)}`);
|
|
460
|
+
}
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* Get evaluation run statistics
|
|
465
|
+
* @param projectName Name of the project
|
|
466
|
+
* @param evalRunName Name of the evaluation run
|
|
467
|
+
* @returns Statistics for the evaluation run
|
|
468
|
+
*/
|
|
469
|
+
getEvalRunStats(projectName, evalRunName) {
|
|
470
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
471
|
+
var _a, _b, _c;
|
|
472
|
+
try {
|
|
473
|
+
// Use ROOT_API for the base URL
|
|
474
|
+
const url = `${constants_1.ROOT_API}/projects/${projectName}/eval-runs/${evalRunName}/stats`;
|
|
475
|
+
const response = yield axios_1.default.get(url, {
|
|
476
|
+
headers: {
|
|
477
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
478
|
+
'X-Organization-Id': this.organizationId
|
|
479
|
+
}
|
|
480
|
+
});
|
|
481
|
+
return response.data || {};
|
|
482
|
+
}
|
|
483
|
+
catch (error) {
|
|
484
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
485
|
+
const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
|
|
486
|
+
const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
|
|
487
|
+
throw new Error(`Failed to get evaluation run statistics: ${statusCode} - ${errorMessage}`);
|
|
488
|
+
}
|
|
489
|
+
if (error instanceof Error) {
|
|
490
|
+
throw new Error(`Failed to get evaluation run statistics: ${error.message}`);
|
|
491
|
+
}
|
|
492
|
+
throw new Error(`Failed to get evaluation run statistics: ${String(error)}`);
|
|
493
|
+
}
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* Export evaluation results to a file format
|
|
498
|
+
* @param projectName Name of the project
|
|
499
|
+
* @param evalRunName Name of the evaluation run
|
|
500
|
+
* @param format Export format ('json' or 'csv')
|
|
501
|
+
* @returns The exported data as a string
|
|
502
|
+
*/
|
|
503
|
+
exportEvalResults(projectName_1, evalRunName_1) {
|
|
504
|
+
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
|
|
505
|
+
try {
|
|
506
|
+
const evalRunArray = yield this.pullEval(projectName, evalRunName);
|
|
507
|
+
const evalRunData = evalRunArray[0]; // Get the first element containing id and results
|
|
508
|
+
if (!evalRunData || !evalRunData.results) {
|
|
509
|
+
return format === 'json' ? JSON.stringify([], null, 2) : 'No results found';
|
|
510
|
+
}
|
|
511
|
+
if (format === 'json') {
|
|
512
|
+
// Return the whole structure including ID and results array
|
|
513
|
+
return JSON.stringify(evalRunData, null, 2);
|
|
514
|
+
}
|
|
515
|
+
else if (format === 'csv') {
|
|
516
|
+
const results = evalRunData.results;
|
|
517
|
+
if (!Array.isArray(results) || results.length === 0) {
|
|
518
|
+
return 'No results found';
|
|
519
|
+
}
|
|
520
|
+
// Dynamically require json2csv only when needed
|
|
521
|
+
let Parser;
|
|
522
|
+
try {
|
|
523
|
+
Parser = require('json2csv').Parser;
|
|
524
|
+
}
|
|
525
|
+
catch (e) {
|
|
526
|
+
throw new Error("The 'json2csv' package is required for CSV export. Please install it (`npm install json2csv`).");
|
|
527
|
+
}
|
|
528
|
+
try {
|
|
529
|
+
// Flatten the structure slightly for better CSV output
|
|
530
|
+
const processedResults = results.map((result) => {
|
|
531
|
+
// Flatten dataObject properties and scorersData
|
|
532
|
+
const flatResult = {};
|
|
533
|
+
flatResult.eval_run_id = evalRunData.id; // Add eval run ID
|
|
534
|
+
// Flatten dataObject
|
|
535
|
+
if (result.dataObject) {
|
|
536
|
+
for (const [key, value] of Object.entries(result.dataObject)) {
|
|
537
|
+
// Prefix with 'data_' to avoid potential clashes
|
|
538
|
+
flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
// Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
|
|
542
|
+
if (Array.isArray(result.scorersData)) {
|
|
543
|
+
result.scorersData.forEach((scorerData, index) => {
|
|
544
|
+
flatResult[`scorer_${index}_name`] = scorerData.name;
|
|
545
|
+
flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
|
|
546
|
+
flatResult[`scorer_${index}_error`] = scorerData.error;
|
|
547
|
+
// Add other scorer fields if necessary, e.g., metadata
|
|
548
|
+
if (scorerData.additional_metadata) {
|
|
549
|
+
flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
|
|
550
|
+
}
|
|
551
|
+
});
|
|
552
|
+
}
|
|
553
|
+
flatResult.error = result.error; // Top-level error for the example processing
|
|
554
|
+
return flatResult;
|
|
555
|
+
});
|
|
556
|
+
const parser = new Parser();
|
|
557
|
+
return parser.parse(processedResults);
|
|
558
|
+
}
|
|
559
|
+
catch (error) {
|
|
560
|
+
console.error('Error converting to CSV:', error);
|
|
561
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
562
|
+
return `Error generating CSV: ${errorMessage}`;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
else {
|
|
566
|
+
throw new Error(`Unsupported export format: ${format}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
catch (error) {
|
|
570
|
+
if (error instanceof Error) {
|
|
571
|
+
throw new Error(`Failed to export evaluation results: ${error.message}`);
|
|
572
|
+
}
|
|
573
|
+
throw new Error(`Failed to export evaluation results: ${String(error)}`);
|
|
574
|
+
}
|
|
575
|
+
});
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Delete an evaluation from the server
|
|
579
|
+
*/
|
|
580
|
+
deleteEval(projectName, evalRunNames) {
|
|
581
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
582
|
+
var _a, _b;
|
|
583
|
+
if (!evalRunNames || evalRunNames.length === 0) {
|
|
584
|
+
throw new Error('No evaluation run names provided');
|
|
585
|
+
}
|
|
586
|
+
// Body matches Python's structure for this endpoint
|
|
587
|
+
const evalRunRequestBody = {
|
|
588
|
+
project_name: projectName,
|
|
589
|
+
eval_names: evalRunNames,
|
|
590
|
+
judgment_api_key: this.judgmentApiKey // Required by this specific API endpoint
|
|
591
|
+
};
|
|
592
|
+
try {
|
|
593
|
+
const response = yield axios_1.default.delete(constants_1.JUDGMENT_EVAL_DELETE_API_URL, // Use constant
|
|
594
|
+
{
|
|
595
|
+
data: evalRunRequestBody,
|
|
596
|
+
headers: {
|
|
597
|
+
'Content-Type': 'application/json',
|
|
598
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
599
|
+
'X-Organization-Id': this.organizationId
|
|
600
|
+
}
|
|
601
|
+
});
|
|
602
|
+
return Boolean(response.data);
|
|
603
|
+
}
|
|
604
|
+
catch (error) {
|
|
605
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
606
|
+
const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
|
|
607
|
+
const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
|
|
608
|
+
if (status === 404) {
|
|
609
|
+
throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
|
|
610
|
+
}
|
|
611
|
+
else if (status === 500) {
|
|
612
|
+
throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
|
|
613
|
+
}
|
|
614
|
+
else {
|
|
615
|
+
throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
// Rethrow original or wrapped error
|
|
619
|
+
if (error instanceof Error) {
|
|
620
|
+
throw new Error(`Error deleting eval results: ${error.message}`);
|
|
621
|
+
}
|
|
622
|
+
throw new Error(`Error deleting eval results: ${String(error)}`);
|
|
623
|
+
}
|
|
624
|
+
});
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Delete all evaluations from the server for a given project
|
|
628
|
+
*/
|
|
629
|
+
deleteProjectEvals(projectName) {
|
|
630
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
631
|
+
var _a, _b;
|
|
632
|
+
try {
|
|
633
|
+
const response = yield axios_1.default.delete(constants_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, // Use constant
|
|
634
|
+
{
|
|
635
|
+
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
636
|
+
data: {
|
|
637
|
+
project_name: projectName,
|
|
638
|
+
},
|
|
639
|
+
headers: {
|
|
640
|
+
'Content-Type': 'application/json',
|
|
641
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
642
|
+
'X-Organization-Id': this.organizationId
|
|
643
|
+
}
|
|
644
|
+
});
|
|
645
|
+
// Python returns response.json(), check if TS response needs similar handling
|
|
646
|
+
return Boolean(response.data); // Assuming response.data indicates success
|
|
647
|
+
}
|
|
648
|
+
catch (error) {
|
|
649
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
650
|
+
const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
|
|
651
|
+
const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
|
|
652
|
+
if (status === 404) {
|
|
653
|
+
// Assuming 404 might mean project not found or no evals to delete
|
|
654
|
+
console.warn(`Project '${projectName}' not found or no evals to delete.`);
|
|
655
|
+
return false; // Or true depending on desired idempotency behavior
|
|
656
|
+
}
|
|
657
|
+
else if (status === 500) {
|
|
658
|
+
throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
|
|
659
|
+
}
|
|
660
|
+
else {
|
|
661
|
+
throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
if (error instanceof Error) {
|
|
665
|
+
throw new Error(`Error deleting project evals: ${error.message}`);
|
|
666
|
+
}
|
|
667
|
+
throw new Error(`Error deleting project evals: ${String(error)}`);
|
|
668
|
+
}
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
/**
|
|
672
|
+
* Create a project on the server
|
|
673
|
+
*/
|
|
674
|
+
createProject(projectName) {
|
|
675
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
676
|
+
try {
|
|
677
|
+
const response = yield axios_1.default.post(constants_1.JUDGMENT_PROJECT_CREATE_API_URL, // Use constant
|
|
678
|
+
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
679
|
+
{
|
|
680
|
+
project_name: projectName,
|
|
681
|
+
}, {
|
|
682
|
+
headers: {
|
|
683
|
+
'Content-Type': 'application/json',
|
|
684
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
685
|
+
'X-Organization-Id': this.organizationId
|
|
686
|
+
}
|
|
687
|
+
});
|
|
688
|
+
// Python returns response.json(), check if TS response needs similar handling
|
|
689
|
+
return Boolean(response.data); // Assuming response.data indicates success
|
|
690
|
+
}
|
|
691
|
+
catch (error) {
|
|
692
|
+
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
693
|
+
// Check for specific conflict error (e.g., 409) if API provides it
|
|
694
|
+
if (error.response.status === 409) {
|
|
695
|
+
console.warn(`Project '${projectName}' already exists.`);
|
|
696
|
+
return false; // Or true if idempotent creation is desired
|
|
697
|
+
}
|
|
698
|
+
throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
699
|
+
}
|
|
700
|
+
else if (error instanceof Error) {
|
|
701
|
+
throw new Error(`Error creating project: ${error.message}`);
|
|
702
|
+
}
|
|
703
|
+
else {
|
|
704
|
+
throw new Error(`Error creating project: ${String(error)}`);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
});
|
|
708
|
+
}
|
|
709
|
+
/**
|
|
710
|
+
* Delete a project from the server
|
|
711
|
+
*/
|
|
712
|
+
deleteProject(projectName) {
|
|
713
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
714
|
+
try {
|
|
715
|
+
const response = yield axios_1.default.delete(constants_1.JUDGMENT_PROJECT_DELETE_API_URL, // Use constant
|
|
716
|
+
{
|
|
717
|
+
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
718
|
+
data: {
|
|
719
|
+
project_name: projectName,
|
|
720
|
+
},
|
|
721
|
+
headers: {
|
|
722
|
+
'Content-Type': 'application/json',
|
|
723
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
724
|
+
'X-Organization-Id': this.organizationId
|
|
725
|
+
}
|
|
726
|
+
});
|
|
727
|
+
// Python returns response.json(), check if TS response needs similar handling
|
|
728
|
+
return Boolean(response.data); // Assuming response.data indicates success
|
|
729
|
+
}
|
|
730
|
+
catch (error) {
|
|
731
|
+
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
732
|
+
if (error.response.status === 404) {
|
|
733
|
+
console.warn(`Project '${projectName}' not found for deletion.`);
|
|
734
|
+
return false; // Or true depending on desired idempotency
|
|
735
|
+
}
|
|
736
|
+
throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
737
|
+
}
|
|
738
|
+
else if (error instanceof Error) {
|
|
739
|
+
throw new Error(`Error deleting project: ${error.message}`);
|
|
740
|
+
}
|
|
741
|
+
else {
|
|
742
|
+
throw new Error(`Error deleting project: ${String(error)}`);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
});
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Validate that the user API key is valid
|
|
749
|
+
*/
|
|
750
|
+
validateApiKey() {
|
|
751
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
752
|
+
var _a, _b;
|
|
753
|
+
try {
|
|
754
|
+
const response = yield axios_1.default.post(`${constants_1.ROOT_API}/validate_api_key/`, // Use ROOT_API
|
|
755
|
+
{}, // Empty body
|
|
756
|
+
{
|
|
757
|
+
headers: {
|
|
758
|
+
'Content-Type': 'application/json',
|
|
759
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
760
|
+
// Removed 'X-Organization-Id' header to match Python for this specific endpoint
|
|
761
|
+
}
|
|
762
|
+
});
|
|
763
|
+
if (response.status === 200) {
|
|
764
|
+
return [true, JSON.stringify(response.data)];
|
|
765
|
+
}
|
|
766
|
+
else {
|
|
767
|
+
// Status might be non-200 but still valid JSON error response
|
|
768
|
+
return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
catch (error) {
|
|
772
|
+
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
773
|
+
return [false, ((_b = error.response.data) === null || _b === void 0 ? void 0 : _b.detail) || `Error validating API key (Status: ${error.response.status})`];
|
|
774
|
+
}
|
|
775
|
+
else if (error instanceof Error) {
|
|
776
|
+
return [false, `Error validating API key: ${error.message}`];
|
|
777
|
+
}
|
|
778
|
+
else {
|
|
779
|
+
return [false, `Unknown error validating API key: ${String(error)}`];
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
/**
|
|
785
|
+
* Assert a test by running the evaluation and checking the results for success
|
|
786
|
+
*/
|
|
787
|
+
assertTest(examples_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
788
|
+
return __awaiter(this, arguments, void 0, function* (examples, scorers, // Type matches Python's intent
|
|
789
|
+
model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, rules) {
|
|
790
|
+
const results = yield this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalRunName, override, true, // useJudgment = true (necessary if API scorers or rules are involved)
|
|
791
|
+
false, // ignoreErrors = false for assert
|
|
792
|
+
false, // asyncExecution = false
|
|
793
|
+
rules);
|
|
794
|
+
(0, run_evaluation_1.assertTest)(results); // Assumes assertTest handles ScoringResult[]
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
/**
|
|
798
|
+
* Pull the results of an evaluation run. Matches `pullEval` logic but returns only the ScoringResult array.
|
|
799
|
+
* @param projectName The name of the project
|
|
800
|
+
* @param evalRunName The name of the evaluation run
|
|
801
|
+
* @returns The results of the evaluation run as ScoringResult[] or empty array on error/no results.
|
|
802
|
+
*/
|
|
803
|
+
pullEvalResults(projectName, evalRunName) {
|
|
804
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
805
|
+
var _a;
|
|
806
|
+
try {
|
|
807
|
+
const evalRunArray = yield this.pullEval(projectName, evalRunName);
|
|
808
|
+
// pullEval returns [{ id: ..., results: [...] }], extract results
|
|
809
|
+
return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
|
|
810
|
+
}
|
|
811
|
+
catch (error) {
|
|
812
|
+
// Log error but return empty array to allow waitForEvaluation to potentially retry
|
|
813
|
+
logger_instance_1.default.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
|
|
814
|
+
return [];
|
|
815
|
+
}
|
|
816
|
+
});
|
|
817
|
+
}
|
|
818
|
+
/**
|
|
819
|
+
* Check the status of an evaluation run using the fetch endpoint.
|
|
820
|
+
* This is a heuristic approach as the endpoint might return full results or status info.
|
|
821
|
+
* @param projectName The name of the project
|
|
822
|
+
* @param evalRunName The name of the evaluation run
|
|
823
|
+
* @returns An object representing the status { status: string, progress: number, message: string }
|
|
824
|
+
*/
|
|
825
|
+
checkEvalStatus(projectName, evalRunName) {
|
|
826
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
827
|
+
var _a, _b;
|
|
828
|
+
// Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
|
|
829
|
+
const requestBody = {
|
|
830
|
+
project_name: projectName,
|
|
831
|
+
eval_name: evalRunName, // Use 'eval_name'
|
|
832
|
+
judgment_api_key: this.judgmentApiKey,
|
|
833
|
+
};
|
|
834
|
+
try {
|
|
835
|
+
const response = yield axios_1.default.post(constants_1.JUDGMENT_EVAL_FETCH_API_URL, // Use fetch URL
|
|
836
|
+
requestBody, {
|
|
837
|
+
headers: {
|
|
838
|
+
'Content-Type': 'application/json',
|
|
839
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
840
|
+
'X-Organization-Id': this.organizationId
|
|
841
|
+
},
|
|
842
|
+
timeout: 15000 // Slightly increased timeout for status checks
|
|
843
|
+
});
|
|
844
|
+
// Interpret response: API might return status object or full results array
|
|
845
|
+
let statusData = { status: 'unknown', progress: 0, message: '' };
|
|
846
|
+
if (Array.isArray(response.data)) {
|
|
847
|
+
// If it's an array, assume results are complete unless explicitly stated otherwise
|
|
848
|
+
if (response.data.length > 0 && ((_b = (_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.result) === null || _b === void 0 ? void 0 : _b.status)) {
|
|
849
|
+
// Check if the first result object contains status info
|
|
850
|
+
statusData = response.data[0].result; // Assuming status is within the 'result' field
|
|
851
|
+
}
|
|
852
|
+
else if (response.data.length > 0) {
|
|
853
|
+
// Assume complete if we get results array without specific status fields
|
|
854
|
+
statusData = { status: 'complete', progress: 1.0, message: 'Results received' };
|
|
855
|
+
}
|
|
856
|
+
else {
|
|
857
|
+
// Empty array might mean still processing or no results yet
|
|
858
|
+
statusData = { status: 'processing', progress: 0, message: 'Waiting for results...' };
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
else if (typeof response.data === 'object' && response.data !== null && response.data.status) {
|
|
862
|
+
// Might be a direct status object from the API
|
|
863
|
+
statusData = response.data;
|
|
864
|
+
}
|
|
865
|
+
else {
|
|
866
|
+
// Unexpected response format
|
|
867
|
+
statusData = { status: 'unknown', progress: 0, message: `Unexpected response format: ${JSON.stringify(response.data)}` };
|
|
868
|
+
}
|
|
869
|
+
// Normalize the progress value
|
|
870
|
+
let progress = 0;
|
|
871
|
+
if (statusData.progress !== undefined && statusData.progress !== null) {
|
|
872
|
+
const parsedProgress = parseFloat(statusData.progress);
|
|
873
|
+
if (!isNaN(parsedProgress)) {
|
|
874
|
+
progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
const normalizedStatus = {
|
|
878
|
+
status: statusData.status || 'unknown',
|
|
879
|
+
progress: progress,
|
|
880
|
+
message: statusData.message || '',
|
|
881
|
+
error: statusData.error // Include error field if present
|
|
882
|
+
};
|
|
883
|
+
// Only log status if it's not being called from waitForEvaluation
|
|
884
|
+
// Check stack trace for caller function name
|
|
885
|
+
const stack = new Error().stack;
|
|
886
|
+
const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
|
|
887
|
+
if (!isCalledByWaitForEvaluation) {
|
|
888
|
+
// Use logger for status updates when called directly
|
|
889
|
+
logger_instance_1.default.info(`Evaluation Status: ${normalizedStatus.status}`);
|
|
890
|
+
logger_instance_1.default.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
|
|
891
|
+
if (normalizedStatus.message) {
|
|
892
|
+
logger_instance_1.default.info(`Message: ${normalizedStatus.message}`);
|
|
893
|
+
}
|
|
894
|
+
if (normalizedStatus.error) {
|
|
895
|
+
logger_instance_1.default.error(`Error in status: ${normalizedStatus.error}`);
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
return normalizedStatus;
|
|
899
|
+
}
|
|
900
|
+
catch (error) {
|
|
901
|
+
// Don't throw errors from status check, just return default 'unknown' status
|
|
902
|
+
// This allows waitForEvaluation to continue polling even on transient network issues
|
|
903
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
904
|
+
logger_instance_1.default.error(`Error checking evaluation status for '${evalRunName}': ${errorMessage}`);
|
|
905
|
+
return {
|
|
906
|
+
status: 'unknown',
|
|
907
|
+
progress: 0,
|
|
908
|
+
message: `Error checking status: ${errorMessage}`
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
});
|
|
912
|
+
}
|
|
913
|
+
/**
|
|
914
|
+
* Wait for an async evaluation to complete and return the results
|
|
915
|
+
* @param projectName The name of the project
|
|
916
|
+
* @param evalRunName The name of the evaluation run
|
|
917
|
+
* @param options Optional configuration for polling: intervalMs, maxAttempts, showProgress
|
|
918
|
+
* @returns The evaluation results as ScoringResult[] or empty array on timeout/failure.
|
|
919
|
+
*/
|
|
920
|
+
waitForEvaluation(projectName_1, evalRunName_1) {
|
|
921
|
+
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
|
|
922
|
+
const { intervalMs = 3000, // Slightly longer interval
|
|
923
|
+
maxAttempts = 200, // ~10 minutes total wait time (200 * 3s)
|
|
924
|
+
showProgress = true } = options;
|
|
925
|
+
let attempts = 0;
|
|
926
|
+
let lastProgressPercent = -1;
|
|
927
|
+
let lastStatus = '';
|
|
928
|
+
if (showProgress) {
|
|
929
|
+
// Use logger for initial message
|
|
930
|
+
logger_instance_1.default.info(`Waiting for evaluation "${evalRunName}" in project "${projectName}" to complete...`);
|
|
931
|
+
}
|
|
932
|
+
while (attempts < maxAttempts) {
|
|
933
|
+
attempts++;
|
|
934
|
+
try {
|
|
935
|
+
const status = yield this.checkEvalStatus(projectName, evalRunName); // Call internal status check
|
|
936
|
+
const currentProgressPercent = Math.round(status.progress * 100);
|
|
937
|
+
// Show progress/status updates only when they change significantly
|
|
938
|
+
if (showProgress && (currentProgressPercent !== lastProgressPercent || status.status !== lastStatus)) {
|
|
939
|
+
const progressBar = this._createProgressBar(currentProgressPercent >= 0 ? currentProgressPercent : 0);
|
|
940
|
+
// Use process.stdout.write to potentially overwrite the line (works best in standard terminals)
|
|
941
|
+
process.stdout.write('\rAttempt ' + attempts + '/' + maxAttempts + ' | Status: ' + status.status + ' | Progress: ' + progressBar + ' ' + currentProgressPercent + '% ');
|
|
942
|
+
lastProgressPercent = currentProgressPercent;
|
|
943
|
+
lastStatus = status.status;
|
|
944
|
+
}
|
|
945
|
+
// Check evaluation status
|
|
946
|
+
if (status.status === 'complete') {
|
|
947
|
+
if (showProgress) {
|
|
948
|
+
process.stdout.write('\n'); // Keep direct console output for progress bar newline
|
|
949
|
+
// Use logger for status update
|
|
950
|
+
logger_instance_1.default.info('Evaluation complete! Fetching results...');
|
|
951
|
+
}
|
|
952
|
+
try {
|
|
953
|
+
// Use the dedicated results fetching method
|
|
954
|
+
const results = yield this.pullEvalResults(projectName, evalRunName);
|
|
955
|
+
if (results.length > 0) {
|
|
956
|
+
// Use logger for status update
|
|
957
|
+
logger_instance_1.default.info(`Successfully fetched ${results.length} results.`);
|
|
958
|
+
return results;
|
|
959
|
+
}
|
|
960
|
+
else {
|
|
961
|
+
// If complete status but no results, might be an issue. Log and return empty.
|
|
962
|
+
logger_instance_1.default.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
|
|
963
|
+
return [];
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
catch (fetchError) {
|
|
967
|
+
if (showProgress)
|
|
968
|
+
process.stdout.write('\n'); // Keep direct console output
|
|
969
|
+
logger_instance_1.default.error(`Error fetching results after completion for '${evalRunName}': ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
|
|
970
|
+
return []; // Return empty array on error
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
else if (status.status === 'failed') {
|
|
974
|
+
if (showProgress)
|
|
975
|
+
process.stdout.write('\n'); // Keep direct console output
|
|
976
|
+
logger_instance_1.default.error(`Evaluation failed for '${evalRunName}': ${status.error || status.message || 'Unknown error'}`);
|
|
977
|
+
return []; // Return empty array on failure
|
|
978
|
+
}
|
|
979
|
+
else if (status.status === 'unknown') {
|
|
980
|
+
// Log unknown status but continue polling
|
|
981
|
+
// Avoid flooding logs if status remains unknown
|
|
982
|
+
if (lastStatus !== 'unknown') {
|
|
983
|
+
if (showProgress)
|
|
984
|
+
process.stdout.write('\n'); // Keep direct console output
|
|
985
|
+
logger_instance_1.default.warn(`Evaluation status unknown for '${evalRunName}' (attempt ${attempts}). Retrying...`);
|
|
986
|
+
lastProgressPercent = -1; // Reset progress display
|
|
987
|
+
}
|
|
988
|
+
lastStatus = 'unknown';
|
|
989
|
+
}
|
|
990
|
+
else {
|
|
991
|
+
// Still processing (e.g., 'processing', 'running', 'pending')
|
|
992
|
+
lastStatus = status.status;
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
catch (error) {
|
|
996
|
+
// Log the error but continue polling (checkEvalStatus should handle internal errors gracefully)
|
|
997
|
+
if (showProgress)
|
|
998
|
+
process.stdout.write('\n'); // Keep direct console output
|
|
999
|
+
logger_instance_1.default.error(`Error during status check loop (attempt ${attempts}/${maxAttempts}): ${error instanceof Error ? error.message : String(error)}`);
|
|
1000
|
+
lastProgressPercent = -1; // Reset progress display
|
|
1001
|
+
lastStatus = 'error_in_loop'; // Indicate issue in the loop itself
|
|
1002
|
+
}
|
|
1003
|
+
// Wait before next poll only if not completed/failed
|
|
1004
|
+
if (lastStatus !== 'complete' && lastStatus !== 'failed') {
|
|
1005
|
+
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
1006
|
+
}
|
|
1007
|
+
else {
|
|
1008
|
+
// Break loop if already completed or failed to avoid unnecessary delay
|
|
1009
|
+
break;
|
|
1010
|
+
}
|
|
1011
|
+
} // End while loop
|
|
1012
|
+
// If loop finished without completing/failing
|
|
1013
|
+
if (lastStatus !== 'complete' && lastStatus !== 'failed') {
|
|
1014
|
+
if (showProgress)
|
|
1015
|
+
process.stdout.write('\n'); // Keep direct console output
|
|
1016
|
+
logger_instance_1.default.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
|
|
1017
|
+
return []; // Return empty array on timeout
|
|
1018
|
+
}
|
|
1019
|
+
// Should technically be unreachable if break conditions work, but safeguard return
|
|
1020
|
+
return [];
|
|
1021
|
+
});
|
|
1022
|
+
}
|
|
1023
|
+
/**
|
|
1024
|
+
* Create a simple ASCII progress bar
|
|
1025
|
+
* @param percent The percentage to display (0-100)
|
|
1026
|
+
* @returns A string representing the progress bar
|
|
1027
|
+
*/
|
|
1028
|
+
_createProgressBar(percent) {
|
|
1029
|
+
const width = 25; // Slightly wider bar
|
|
1030
|
+
// Clamp percent between 0 and 100
|
|
1031
|
+
const clampedPercent = Math.max(0, Math.min(100, percent));
|
|
1032
|
+
const completed = Math.round(width * (clampedPercent / 100)); // Use round for potentially smoother look
|
|
1033
|
+
const remaining = width - completed;
|
|
1034
|
+
return '[' + '#'.repeat(completed) + '-'.repeat(remaining) + ']'; // Use different chars
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
exports.JudgmentClient = JudgmentClient;
|
|
1038
|
+
//# sourceMappingURL=judgment-client.js.map
|