judgeval 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -68
- package/dist/cjs/common/tracer.js +235 -143
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +8 -5
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
- package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/cjs/data/datasets/eval-dataset.js +405 -0
- package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
- package/dist/cjs/data/example.js +22 -1
- package/dist/cjs/data/example.js.map +1 -1
- package/dist/cjs/e2etests/eval-operations.test.js +282 -0
- package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
- package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
- package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/cjs/index.js +1 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +326 -645
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +56 -48
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +66 -11
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/esm/common/tracer.js +236 -144
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +7 -4
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
- package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/esm/data/datasets/eval-dataset.js +375 -0
- package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
- package/dist/esm/data/example.js +22 -1
- package/dist/esm/data/example.js.map +1 -1
- package/dist/esm/e2etests/eval-operations.test.js +254 -0
- package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
- package/dist/esm/e2etests/judgee-traces.test.js +253 -0
- package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/esm/index.js +0 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +328 -647
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +56 -48
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +66 -11
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/types/common/tracer.d.ts +27 -14
- package/dist/types/constants.d.ts +4 -4
- package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
- package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
- package/dist/types/data/example.d.ts +24 -12
- package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
- package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
- package/dist/types/index.d.ts +0 -1
- package/dist/types/judgment-client.d.ts +3 -47
- package/dist/types/scorers/api-scorer.d.ts +15 -15
- package/dist/types/scorers/base-scorer.d.ts +53 -10
- package/package.json +2 -1
- package/dist/cjs/scorers/exact-match-scorer.js +0 -84
- package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
- package/dist/esm/scorers/exact-match-scorer.js +0 -80
- package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
- package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
|
@@ -46,6 +46,10 @@ const rules_js_1 = require("./rules.js");
|
|
|
46
46
|
const run_evaluation_js_1 = require("./run-evaluation.js");
|
|
47
47
|
const constants_js_1 = require("./constants.js");
|
|
48
48
|
const logger_instance_js_1 = __importDefault(require("./common/logger-instance.js"));
|
|
49
|
+
// Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
|
|
50
|
+
const cli_progress_1 = __importDefault(require("cli-progress"));
|
|
51
|
+
const ansi_colors_1 = __importDefault(require("ansi-colors"));
|
|
52
|
+
const eval_dataset_client_js_1 = require("./data/datasets/eval-dataset-client.js");
|
|
49
53
|
// Load environment variables
|
|
50
54
|
dotenv.config();
|
|
51
55
|
/**
|
|
@@ -199,143 +203,10 @@ class JudgmentClient {
|
|
|
199
203
|
* Evaluate a dataset
|
|
200
204
|
*/
|
|
201
205
|
evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
202
|
-
return __awaiter(this, arguments, void 0, function* (dataset, //
|
|
203
|
-
scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
const loadedScorers = [];
|
|
207
|
-
for (const scorer of scorers) {
|
|
208
|
-
try {
|
|
209
|
-
if (scorer instanceof base_scorer_js_1.ScorerWrapper) {
|
|
210
|
-
loadedScorers.push(scorer.loadImplementation(useJudgment));
|
|
211
|
-
}
|
|
212
|
-
else {
|
|
213
|
-
// Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
|
|
214
|
-
loadedScorers.push(scorer);
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
catch (error) {
|
|
218
|
-
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
222
|
-
if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_js_1.JudgevalScorer)) {
|
|
223
|
-
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
|
|
224
|
-
}
|
|
225
|
-
// Convert ScorerWrapper in rules to their implementations
|
|
226
|
-
let loadedRules;
|
|
227
|
-
if (rules) {
|
|
228
|
-
loadedRules = [];
|
|
229
|
-
for (const rule of rules) {
|
|
230
|
-
try {
|
|
231
|
-
const processedConditions = [];
|
|
232
|
-
for (const condition of rule.conditions) {
|
|
233
|
-
// Convert metric if it's a ScorerWrapper
|
|
234
|
-
if (condition.metric instanceof base_scorer_js_1.ScorerWrapper) {
|
|
235
|
-
try {
|
|
236
|
-
const loadedMetric = condition.metric.loadImplementation(useJudgment);
|
|
237
|
-
const newCondition = new rules_js_1.Condition(loadedMetric);
|
|
238
|
-
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
|
|
239
|
-
processedConditions.push(newCondition);
|
|
240
|
-
}
|
|
241
|
-
catch (error) {
|
|
242
|
-
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
else {
|
|
246
|
-
processedConditions.push(condition);
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
// Create new rule with processed conditions
|
|
250
|
-
const newRule = new rules_js_1.Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
|
|
251
|
-
loadedRules.push(newRule);
|
|
252
|
-
}
|
|
253
|
-
catch (error) {
|
|
254
|
-
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
const evaluationRun = new evaluation_run_js_1.EvaluationRun({
|
|
259
|
-
logResults,
|
|
260
|
-
projectName,
|
|
261
|
-
evalName: evalRunName,
|
|
262
|
-
examples: dataset.examples, // Assuming dataset has an 'examples' property
|
|
263
|
-
scorers: loadedScorers,
|
|
264
|
-
model,
|
|
265
|
-
aggregator,
|
|
266
|
-
metadata,
|
|
267
|
-
judgmentApiKey: this.judgmentApiKey,
|
|
268
|
-
rules: loadedRules,
|
|
269
|
-
organizationId: this.organizationId
|
|
270
|
-
});
|
|
271
|
-
// Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
|
|
272
|
-
return (0, run_evaluation_js_1.runEval)(evaluationRun, false, true, false);
|
|
273
|
-
}
|
|
274
|
-
catch (error) {
|
|
275
|
-
if (error instanceof Error) {
|
|
276
|
-
if (error.message.includes('one or more fields are invalid')) {
|
|
277
|
-
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
|
|
278
|
-
}
|
|
279
|
-
else {
|
|
280
|
-
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
else {
|
|
284
|
-
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
});
|
|
288
|
-
}
|
|
289
|
-
/**
|
|
290
|
-
* Create a dataset
|
|
291
|
-
*/
|
|
292
|
-
createDataset() {
|
|
293
|
-
// This would be implemented with EvalDataset
|
|
294
|
-
throw new Error('Not implemented yet');
|
|
295
|
-
}
|
|
296
|
-
/**
|
|
297
|
-
* Push a dataset to the Judgment platform
|
|
298
|
-
*/
|
|
299
|
-
pushDataset(alias_1, dataset_1, projectName_1) {
|
|
300
|
-
return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
|
|
301
|
-
// This would be implemented with EvalDataset
|
|
302
|
-
throw new Error('Not implemented yet');
|
|
303
|
-
});
|
|
304
|
-
}
|
|
305
|
-
/**
|
|
306
|
-
* Pull a dataset from the Judgment platform
|
|
307
|
-
*/
|
|
308
|
-
pullDataset(alias, projectName) {
|
|
309
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
310
|
-
// This would be implemented with EvalDataset
|
|
311
|
-
throw new Error('Not implemented yet');
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
/**
|
|
315
|
-
* Delete a dataset from the Judgment platform
|
|
316
|
-
*/
|
|
317
|
-
deleteDataset(alias, projectName) {
|
|
318
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
319
|
-
// This would be implemented with EvalDataset
|
|
320
|
-
throw new Error('Not implemented yet');
|
|
321
|
-
});
|
|
322
|
-
}
|
|
323
|
-
/**
|
|
324
|
-
* Pull project dataset stats from the Judgment platform
|
|
325
|
-
*/
|
|
326
|
-
pullProjectDatasetStats(projectName) {
|
|
327
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
328
|
-
// This would be implemented with EvalDataset
|
|
329
|
-
throw new Error('Not implemented yet');
|
|
330
|
-
});
|
|
331
|
-
}
|
|
332
|
-
/**
|
|
333
|
-
* Insert examples into a dataset on the Judgment platform
|
|
334
|
-
*/
|
|
335
|
-
insertDataset(alias, examples, projectName) {
|
|
336
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
337
|
-
// This would be implemented with EvalDataset
|
|
338
|
-
throw new Error('Not implemented yet');
|
|
206
|
+
return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
|
|
207
|
+
scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
|
|
208
|
+
// Keep type loose for stub
|
|
209
|
+
throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
|
|
339
210
|
});
|
|
340
211
|
}
|
|
341
212
|
/**
|
|
@@ -344,39 +215,29 @@ class JudgmentClient {
|
|
|
344
215
|
* @param evalRunName Name of the evaluation run
|
|
345
216
|
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
346
217
|
*/
|
|
347
|
-
pullEval(projectName, evalRunName
|
|
348
|
-
) {
|
|
218
|
+
pullEval(projectName, evalRunName) {
|
|
349
219
|
return __awaiter(this, void 0, void 0, function* () {
|
|
350
|
-
var _a, _b, _c, _d;
|
|
351
|
-
// Body matches Python's structure for this endpoint
|
|
352
220
|
const evalRunRequestBody = {
|
|
353
221
|
project_name: projectName,
|
|
354
|
-
eval_name: evalRunName,
|
|
222
|
+
eval_name: evalRunName,
|
|
355
223
|
judgment_api_key: this.judgmentApiKey
|
|
356
224
|
};
|
|
357
225
|
try {
|
|
358
|
-
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL,
|
|
359
|
-
|
|
360
|
-
headers: {
|
|
361
|
-
'Content-Type': 'application/json',
|
|
362
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
363
|
-
'X-Organization-Id': this.organizationId
|
|
364
|
-
}
|
|
226
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
|
|
227
|
+
headers: this.getAuthHeaders()
|
|
365
228
|
});
|
|
366
|
-
// Process the response to match the Python SDK's format
|
|
367
|
-
// Python returns [{ 'id': ..., 'results': [ScoringResult, ...]}]
|
|
368
|
-
// The API response is a list of results, each with an 'id' and 'result'
|
|
369
229
|
if (!Array.isArray(response.data) || response.data.length === 0) {
|
|
370
|
-
return [{ id: '', results: [] }];
|
|
230
|
+
return [{ id: '', results: [] }];
|
|
371
231
|
}
|
|
372
|
-
const evalRunResult = {
|
|
373
|
-
evalRunResult.id = ((_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.id) || ''; // Assume ID is same for all results in run
|
|
232
|
+
const evalRunResult = [{}];
|
|
374
233
|
for (const result of response.data) {
|
|
234
|
+
const resultId = result.id || '';
|
|
375
235
|
const resultData = result.result || {};
|
|
236
|
+
// Extract data object from result data
|
|
376
237
|
const dataObject = resultData.data_object || {};
|
|
377
|
-
// Create Example
|
|
238
|
+
// Create Example with required input field
|
|
378
239
|
const example = new example_js_1.Example({
|
|
379
|
-
input: dataObject.input,
|
|
240
|
+
input: dataObject.input || '',
|
|
380
241
|
actualOutput: dataObject.actual_output,
|
|
381
242
|
expectedOutput: dataObject.expected_output,
|
|
382
243
|
context: dataObject.context,
|
|
@@ -386,110 +247,21 @@ class JudgmentClient {
|
|
|
386
247
|
expectedTools: dataObject.expected_tools,
|
|
387
248
|
exampleId: dataObject.example_id,
|
|
388
249
|
exampleIndex: dataObject.example_index,
|
|
389
|
-
timestamp: dataObject.timestamp
|
|
390
|
-
|
|
391
|
-
// Create ScoringResult
|
|
392
|
-
const scoringResult = new result_js_1.ScoringResult({
|
|
393
|
-
dataObject: example,
|
|
394
|
-
scorersData: resultData.scorers_data || [],
|
|
395
|
-
error: resultData.error
|
|
250
|
+
timestamp: dataObject.timestamp,
|
|
251
|
+
example: dataObject.example // Include example boolean
|
|
396
252
|
});
|
|
397
|
-
evalRunResult.
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
throw new Error(`Failed to pull evaluation results: ${statusCode} - ${errorMessage}`);
|
|
406
|
-
}
|
|
407
|
-
if (error instanceof Error) {
|
|
408
|
-
throw new Error(`Failed to pull evaluation results: ${error.message}`);
|
|
409
|
-
}
|
|
410
|
-
throw new Error(`Failed to pull evaluation results: ${String(error)}`);
|
|
411
|
-
}
|
|
412
|
-
});
|
|
413
|
-
}
|
|
414
|
-
/**
|
|
415
|
-
* Get evaluation run results (alias for pullEval with a more intuitive name)
|
|
416
|
-
* @param projectName Name of the project
|
|
417
|
-
* @param evalRunName Name of the evaluation run
|
|
418
|
-
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
419
|
-
*/
|
|
420
|
-
getEvalRun(projectName, evalRunName) {
|
|
421
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
422
|
-
return this.pullEval(projectName, evalRunName);
|
|
423
|
-
});
|
|
424
|
-
}
|
|
425
|
-
/**
|
|
426
|
-
* List all evaluation runs for a project
|
|
427
|
-
* @param projectName Name of the project
|
|
428
|
-
* @param limit Maximum number of evaluation runs to return (default: 100)
|
|
429
|
-
* @param offset Offset for pagination (default: 0)
|
|
430
|
-
* @returns List of evaluation run metadata
|
|
431
|
-
*/
|
|
432
|
-
listEvalRuns(projectName_1) {
|
|
433
|
-
return __awaiter(this, arguments, void 0, function* (projectName, limit = 100, offset = 0) {
|
|
434
|
-
var _a, _b, _c;
|
|
435
|
-
try {
|
|
436
|
-
// Use ROOT_API for the base URL
|
|
437
|
-
const url = `${constants_js_1.ROOT_API}/projects/${projectName}/eval-runs`;
|
|
438
|
-
const response = yield axios_1.default.get(url, {
|
|
439
|
-
params: {
|
|
440
|
-
limit,
|
|
441
|
-
offset
|
|
442
|
-
},
|
|
443
|
-
headers: {
|
|
444
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
445
|
-
'X-Organization-Id': this.organizationId
|
|
446
|
-
}
|
|
447
|
-
});
|
|
448
|
-
return response.data || [];
|
|
449
|
-
}
|
|
450
|
-
catch (error) {
|
|
451
|
-
if (axios_1.default.isAxiosError(error)) {
|
|
452
|
-
const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
|
|
453
|
-
const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
|
|
454
|
-
throw new Error(`Failed to list evaluation runs: ${statusCode} - ${errorMessage}`);
|
|
455
|
-
}
|
|
456
|
-
if (error instanceof Error) {
|
|
457
|
-
throw new Error(`Failed to list evaluation runs: ${error.message}`);
|
|
458
|
-
}
|
|
459
|
-
throw new Error(`Failed to list evaluation runs: ${String(error)}`);
|
|
460
|
-
}
|
|
461
|
-
});
|
|
462
|
-
}
|
|
463
|
-
/**
|
|
464
|
-
* Get evaluation run statistics
|
|
465
|
-
* @param projectName Name of the project
|
|
466
|
-
* @param evalRunName Name of the evaluation run
|
|
467
|
-
* @returns Statistics for the evaluation run
|
|
468
|
-
*/
|
|
469
|
-
getEvalRunStats(projectName, evalRunName) {
|
|
470
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
471
|
-
var _a, _b, _c;
|
|
472
|
-
try {
|
|
473
|
-
// Use ROOT_API for the base URL
|
|
474
|
-
const url = `${constants_js_1.ROOT_API}/projects/${projectName}/eval-runs/${evalRunName}/stats`;
|
|
475
|
-
const response = yield axios_1.default.get(url, {
|
|
476
|
-
headers: {
|
|
477
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
478
|
-
'X-Organization-Id': this.organizationId
|
|
479
|
-
}
|
|
480
|
-
});
|
|
481
|
-
return response.data || {};
|
|
253
|
+
evalRunResult[0].id = resultId;
|
|
254
|
+
evalRunResult[0].results = [new result_js_1.ScoringResult({
|
|
255
|
+
dataObject: example,
|
|
256
|
+
scorersData: resultData.scorers_data || [],
|
|
257
|
+
error: resultData.error
|
|
258
|
+
})];
|
|
259
|
+
}
|
|
260
|
+
return evalRunResult;
|
|
482
261
|
}
|
|
483
262
|
catch (error) {
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
|
|
487
|
-
throw new Error(`Failed to get evaluation run statistics: ${statusCode} - ${errorMessage}`);
|
|
488
|
-
}
|
|
489
|
-
if (error instanceof Error) {
|
|
490
|
-
throw new Error(`Failed to get evaluation run statistics: ${error.message}`);
|
|
491
|
-
}
|
|
492
|
-
throw new Error(`Failed to get evaluation run statistics: ${String(error)}`);
|
|
263
|
+
this.handleApiError(error, 'pullEval');
|
|
264
|
+
throw error;
|
|
493
265
|
}
|
|
494
266
|
});
|
|
495
267
|
}
|
|
@@ -502,92 +274,67 @@ class JudgmentClient {
|
|
|
502
274
|
*/
|
|
503
275
|
exportEvalResults(projectName_1, evalRunName_1) {
|
|
504
276
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
|
|
277
|
+
logger_instance_js_1.default.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
|
|
505
278
|
try {
|
|
506
|
-
const
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
return
|
|
279
|
+
const resultsData = yield this.pullEval(projectName, evalRunName);
|
|
280
|
+
if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
|
|
281
|
+
logger_instance_js_1.default.warn('No results found to export.');
|
|
282
|
+
return '';
|
|
510
283
|
}
|
|
284
|
+
const results = resultsData[0].results;
|
|
511
285
|
if (format === 'json') {
|
|
512
|
-
//
|
|
513
|
-
return JSON.stringify(
|
|
286
|
+
// Pretty print JSON
|
|
287
|
+
return JSON.stringify(results.map(r => r.toJSON()), null, 2);
|
|
514
288
|
}
|
|
515
289
|
else if (format === 'csv') {
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
const
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
290
|
+
if (results.length === 0)
|
|
291
|
+
return ''; // No data to export
|
|
292
|
+
// Dynamically determine headers from the first result object
|
|
293
|
+
// Flatten the structure for CSV
|
|
294
|
+
const flatResults = results.map(result => {
|
|
295
|
+
var _a, _b, _c;
|
|
296
|
+
const flat = {};
|
|
297
|
+
const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
|
|
298
|
+
const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
|
|
299
|
+
// Add example data fields (snake_case)
|
|
300
|
+
for (const key in exampleData) {
|
|
301
|
+
// Prefix example fields to avoid collision, e.g., example_input
|
|
302
|
+
flat[`example_${key}`] = exampleData[key];
|
|
528
303
|
}
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
|
|
535
|
-
throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
|
|
536
|
-
}
|
|
537
|
-
try {
|
|
538
|
-
// Flatten the structure slightly for better CSV output
|
|
539
|
-
const processedResults = results.map((result) => {
|
|
540
|
-
// Flatten dataObject properties and scorersData
|
|
541
|
-
const flatResult = {};
|
|
542
|
-
flatResult.eval_run_id = evalRunData.id; // Add eval run ID
|
|
543
|
-
// Flatten dataObject
|
|
544
|
-
if (result.dataObject) {
|
|
545
|
-
for (const [key, value] of Object.entries(result.dataObject)) {
|
|
546
|
-
// Prefix with 'data_' to avoid potential clashes
|
|
547
|
-
flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
// Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
|
|
551
|
-
if (Array.isArray(result.scorersData)) {
|
|
552
|
-
result.scorersData.forEach((scorerData, index) => {
|
|
553
|
-
flatResult[`scorer_${index}_name`] = scorerData.name;
|
|
554
|
-
flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
|
|
555
|
-
flatResult[`scorer_${index}_error`] = scorerData.error;
|
|
556
|
-
// Add other scorer fields if necessary, e.g., metadata
|
|
557
|
-
if (scorerData.additional_metadata) {
|
|
558
|
-
flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
|
|
559
|
-
}
|
|
560
|
-
});
|
|
561
|
-
}
|
|
562
|
-
flatResult.error = result.error; // Top-level error for the example processing
|
|
563
|
-
return flatResult;
|
|
564
|
-
});
|
|
565
|
-
// Define headers dynamically based on the keys of the first processed result
|
|
566
|
-
if (processedResults.length === 0) {
|
|
567
|
-
return 'No data to export after processing.'; // Handle case with no valid results after processing
|
|
568
|
-
}
|
|
569
|
-
const headers = Object.keys(processedResults[0]).map(key => ({ id: key, title: key }));
|
|
570
|
-
const csvStringifier = createObjectCsvStringifier({
|
|
571
|
-
header: headers
|
|
304
|
+
// Add scorers data
|
|
305
|
+
scorersData.forEach(scorer => {
|
|
306
|
+
flat[`scorer_${scorer.name}_score`] = scorer.score;
|
|
307
|
+
flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
|
|
308
|
+
flat[`scorer_${scorer.name}_error`] = scorer.error;
|
|
572
309
|
});
|
|
573
|
-
//
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
310
|
+
// Add top-level error if present
|
|
311
|
+
flat['top_level_error'] = result.error;
|
|
312
|
+
return flat;
|
|
313
|
+
});
|
|
314
|
+
// Get all unique keys from the flattened results for headers
|
|
315
|
+
const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
|
|
316
|
+
// Use papaparse for robust CSV generation
|
|
317
|
+
const Papa = require('papaparse'); // Use require here if not imported at top
|
|
318
|
+
const csv = Papa.unparse({
|
|
319
|
+
fields: headers,
|
|
320
|
+
data: flatResults
|
|
321
|
+
}, {
|
|
322
|
+
header: true,
|
|
323
|
+
quotes: true, // Ensure fields with commas/newlines are quoted
|
|
324
|
+
quoteChar: '"',
|
|
325
|
+
escapeChar: '"',
|
|
326
|
+
delimiter: ','
|
|
327
|
+
});
|
|
328
|
+
return csv;
|
|
581
329
|
}
|
|
582
330
|
else {
|
|
583
331
|
throw new Error(`Unsupported export format: ${format}`);
|
|
584
332
|
}
|
|
585
333
|
}
|
|
586
334
|
catch (error) {
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
throw new Error(`Failed to export evaluation results: ${String(error)}`);
|
|
335
|
+
logger_instance_js_1.default.error(`Error exporting eval results: ${error}`);
|
|
336
|
+
this.handleApiError(error, 'exportEvalResults');
|
|
337
|
+
throw error;
|
|
591
338
|
}
|
|
592
339
|
});
|
|
593
340
|
}
|
|
@@ -596,47 +343,23 @@ class JudgmentClient {
|
|
|
596
343
|
*/
|
|
597
344
|
deleteEval(projectName, evalRunNames) {
|
|
598
345
|
return __awaiter(this, void 0, void 0, function* () {
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
throw new Error('No evaluation run names provided');
|
|
602
|
-
}
|
|
603
|
-
// Body matches Python's structure for this endpoint
|
|
604
|
-
const evalRunRequestBody = {
|
|
346
|
+
logger_instance_js_1.default.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
|
|
347
|
+
const requestBody = {
|
|
605
348
|
project_name: projectName,
|
|
606
349
|
eval_names: evalRunNames,
|
|
607
|
-
judgment_api_key: this.judgmentApiKey
|
|
350
|
+
judgment_api_key: this.judgmentApiKey,
|
|
608
351
|
};
|
|
609
352
|
try {
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
data: evalRunRequestBody,
|
|
613
|
-
headers: {
|
|
614
|
-
'Content-Type': 'application/json',
|
|
615
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
616
|
-
'X-Organization-Id': this.organizationId
|
|
617
|
-
}
|
|
353
|
+
yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
|
|
354
|
+
headers: this.getAuthHeaders()
|
|
618
355
|
});
|
|
619
|
-
|
|
356
|
+
logger_instance_js_1.default.info('Successfully deleted eval runs.');
|
|
357
|
+
return true;
|
|
620
358
|
}
|
|
621
359
|
catch (error) {
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
if (status === 404) {
|
|
626
|
-
throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
|
|
627
|
-
}
|
|
628
|
-
else if (status === 500) {
|
|
629
|
-
throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
|
|
630
|
-
}
|
|
631
|
-
else {
|
|
632
|
-
throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
|
|
633
|
-
}
|
|
634
|
-
}
|
|
635
|
-
// Rethrow original or wrapped error
|
|
636
|
-
if (error instanceof Error) {
|
|
637
|
-
throw new Error(`Error deleting eval results: ${error.message}`);
|
|
638
|
-
}
|
|
639
|
-
throw new Error(`Error deleting eval results: ${String(error)}`);
|
|
360
|
+
logger_instance_js_1.default.error(`Error deleting eval runs: ${error}`);
|
|
361
|
+
this.handleApiError(error, 'deleteEval');
|
|
362
|
+
return false;
|
|
640
363
|
}
|
|
641
364
|
});
|
|
642
365
|
}
|
|
@@ -645,43 +368,22 @@ class JudgmentClient {
|
|
|
645
368
|
*/
|
|
646
369
|
deleteProjectEvals(projectName) {
|
|
647
370
|
return __awaiter(this, void 0, void 0, function* () {
|
|
648
|
-
|
|
371
|
+
logger_instance_js_1.default.info(`Deleting ALL eval runs for project: ${projectName}`);
|
|
372
|
+
const requestBody = {
|
|
373
|
+
project_name: projectName,
|
|
374
|
+
judgment_api_key: this.judgmentApiKey,
|
|
375
|
+
};
|
|
649
376
|
try {
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
653
|
-
data: {
|
|
654
|
-
project_name: projectName,
|
|
655
|
-
},
|
|
656
|
-
headers: {
|
|
657
|
-
'Content-Type': 'application/json',
|
|
658
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
659
|
-
'X-Organization-Id': this.organizationId
|
|
660
|
-
}
|
|
377
|
+
yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
|
|
378
|
+
headers: this.getAuthHeaders()
|
|
661
379
|
});
|
|
662
|
-
|
|
663
|
-
return
|
|
380
|
+
logger_instance_js_1.default.info(`Successfully deleted all eval runs for project ${projectName}.`);
|
|
381
|
+
return true;
|
|
664
382
|
}
|
|
665
383
|
catch (error) {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
if (status === 404) {
|
|
670
|
-
// Assuming 404 might mean project not found or no evals to delete
|
|
671
|
-
console.warn(`Project '${projectName}' not found or no evals to delete.`);
|
|
672
|
-
return false; // Or true depending on desired idempotency behavior
|
|
673
|
-
}
|
|
674
|
-
else if (status === 500) {
|
|
675
|
-
throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
|
|
676
|
-
}
|
|
677
|
-
else {
|
|
678
|
-
throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
|
|
679
|
-
}
|
|
680
|
-
}
|
|
681
|
-
if (error instanceof Error) {
|
|
682
|
-
throw new Error(`Error deleting project evals: ${error.message}`);
|
|
683
|
-
}
|
|
684
|
-
throw new Error(`Error deleting project evals: ${String(error)}`);
|
|
384
|
+
logger_instance_js_1.default.error(`Error deleting project evals: ${error}`);
|
|
385
|
+
this.handleApiError(error, 'deleteProjectEvals');
|
|
386
|
+
return false;
|
|
685
387
|
}
|
|
686
388
|
});
|
|
687
389
|
}
|
|
@@ -690,37 +392,34 @@ class JudgmentClient {
|
|
|
690
392
|
*/
|
|
691
393
|
createProject(projectName) {
|
|
692
394
|
return __awaiter(this, void 0, void 0, function* () {
|
|
395
|
+
logger_instance_js_1.default.info(`Creating project: ${projectName}`);
|
|
396
|
+
const requestBody = {
|
|
397
|
+
project_name: projectName,
|
|
398
|
+
judgment_api_key: this.judgmentApiKey,
|
|
399
|
+
};
|
|
693
400
|
try {
|
|
694
|
-
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL,
|
|
695
|
-
|
|
696
|
-
{
|
|
697
|
-
project_name: projectName,
|
|
698
|
-
}, {
|
|
699
|
-
headers: {
|
|
700
|
-
'Content-Type': 'application/json',
|
|
701
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
702
|
-
'X-Organization-Id': this.organizationId
|
|
703
|
-
}
|
|
401
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
|
|
402
|
+
headers: this.getAuthHeaders()
|
|
704
403
|
});
|
|
705
|
-
//
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
710
|
-
// Check for specific conflict error (e.g., 409) if API provides it
|
|
711
|
-
if (error.response.status === 409) {
|
|
712
|
-
console.warn(`Project '${projectName}' already exists.`);
|
|
713
|
-
return false; // Or true if idempotent creation is desired
|
|
714
|
-
}
|
|
715
|
-
throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
404
|
+
// Check for specific success message or status if API provides one
|
|
405
|
+
if (response.data && response.data.message === 'Project added successfully') {
|
|
406
|
+
logger_instance_js_1.default.info(`Successfully created project: ${projectName}`);
|
|
407
|
+
return true;
|
|
716
408
|
}
|
|
717
|
-
else if (
|
|
718
|
-
|
|
409
|
+
else if (response.data && response.data.message === 'Project already exists') {
|
|
410
|
+
logger_instance_js_1.default.warn(`Project '${projectName}' already exists.`);
|
|
411
|
+
return true; // Or false, depending on desired behavior for existing projects
|
|
719
412
|
}
|
|
720
413
|
else {
|
|
721
|
-
|
|
414
|
+
logger_instance_js_1.default.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
415
|
+
return false;
|
|
722
416
|
}
|
|
723
417
|
}
|
|
418
|
+
catch (error) {
|
|
419
|
+
logger_instance_js_1.default.error(`Error creating project: ${error}`);
|
|
420
|
+
this.handleApiError(error, 'createProject');
|
|
421
|
+
return false;
|
|
422
|
+
}
|
|
724
423
|
});
|
|
725
424
|
}
|
|
726
425
|
/**
|
|
@@ -728,37 +427,29 @@ class JudgmentClient {
|
|
|
728
427
|
*/
|
|
729
428
|
deleteProject(projectName) {
|
|
730
429
|
return __awaiter(this, void 0, void 0, function* () {
|
|
430
|
+
logger_instance_js_1.default.info(`Deleting project: ${projectName}`);
|
|
431
|
+
const requestBody = {
|
|
432
|
+
project_name: projectName,
|
|
433
|
+
judgment_api_key: this.judgmentApiKey,
|
|
434
|
+
};
|
|
731
435
|
try {
|
|
732
|
-
const response = yield axios_1.default.
|
|
733
|
-
|
|
734
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
735
|
-
data: {
|
|
736
|
-
project_name: projectName,
|
|
737
|
-
},
|
|
738
|
-
headers: {
|
|
739
|
-
'Content-Type': 'application/json',
|
|
740
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
741
|
-
'X-Organization-Id': this.organizationId
|
|
742
|
-
}
|
|
436
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
|
|
437
|
+
headers: this.getAuthHeaders()
|
|
743
438
|
});
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
catch (error) {
|
|
748
|
-
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
749
|
-
if (error.response.status === 404) {
|
|
750
|
-
console.warn(`Project '${projectName}' not found for deletion.`);
|
|
751
|
-
return false; // Or true depending on desired idempotency
|
|
752
|
-
}
|
|
753
|
-
throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
754
|
-
}
|
|
755
|
-
else if (error instanceof Error) {
|
|
756
|
-
throw new Error(`Error deleting project: ${error.message}`);
|
|
439
|
+
if (response.data && response.data.message === 'Project deleted successfully') {
|
|
440
|
+
logger_instance_js_1.default.info(`Successfully deleted project: ${projectName}`);
|
|
441
|
+
return true;
|
|
757
442
|
}
|
|
758
443
|
else {
|
|
759
|
-
|
|
444
|
+
logger_instance_js_1.default.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
445
|
+
return false;
|
|
760
446
|
}
|
|
761
447
|
}
|
|
448
|
+
catch (error) {
|
|
449
|
+
logger_instance_js_1.default.error(`Error deleting project: ${error}`);
|
|
450
|
+
this.handleApiError(error, 'deleteProject');
|
|
451
|
+
return false;
|
|
452
|
+
}
|
|
762
453
|
});
|
|
763
454
|
}
|
|
764
455
|
/**
|
|
@@ -766,35 +457,36 @@ class JudgmentClient {
|
|
|
766
457
|
*/
|
|
767
458
|
validateApiKey() {
|
|
768
459
|
return __awaiter(this, void 0, void 0, function* () {
|
|
769
|
-
var _a, _b;
|
|
460
|
+
var _a, _b, _c, _d;
|
|
461
|
+
logger_instance_js_1.default.debug('Validating API Key...');
|
|
770
462
|
try {
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
// Removed 'X-Organization-Id' header to match Python for this specific endpoint
|
|
778
|
-
}
|
|
779
|
-
});
|
|
780
|
-
if (response.status === 200) {
|
|
781
|
-
return [true, JSON.stringify(response.data)];
|
|
782
|
-
}
|
|
783
|
-
else {
|
|
784
|
-
// Status might be non-200 but still valid JSON error response
|
|
785
|
-
return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
|
|
786
|
-
}
|
|
463
|
+
// Instantiate EvalDatasetClient to perform the validation call
|
|
464
|
+
const datasetClient = new eval_dataset_client_js_1.EvalDatasetClient(this.judgmentApiKey, this.organizationId);
|
|
465
|
+
// Use the dataset client to make the call
|
|
466
|
+
yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
|
|
467
|
+
logger_instance_js_1.default.debug('API Key appears valid.');
|
|
468
|
+
return [true, 'API Key is valid.'];
|
|
787
469
|
}
|
|
788
470
|
catch (error) {
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
471
|
+
let message = 'API Key validation failed.';
|
|
472
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
473
|
+
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
|
|
474
|
+
message = 'API Key is invalid or expired.';
|
|
475
|
+
}
|
|
476
|
+
else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
|
|
477
|
+
// If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
|
|
478
|
+
// This depends on the specific validation endpoint behavior
|
|
479
|
+
message = 'API Key might be valid, but validation endpoint returned 404.';
|
|
480
|
+
}
|
|
481
|
+
else {
|
|
482
|
+
message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
|
|
483
|
+
}
|
|
794
484
|
}
|
|
795
485
|
else {
|
|
796
|
-
|
|
486
|
+
message = `API Key validation failed: ${String(error)}`;
|
|
797
487
|
}
|
|
488
|
+
logger_instance_js_1.default.error(message);
|
|
489
|
+
return [false, message];
|
|
798
490
|
}
|
|
799
491
|
});
|
|
800
492
|
}
|
|
@@ -819,17 +511,12 @@ class JudgmentClient {
|
|
|
819
511
|
*/
|
|
820
512
|
pullEvalResults(projectName, evalRunName) {
|
|
821
513
|
return __awaiter(this, void 0, void 0, function* () {
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
const evalRunArray = yield this.pullEval(projectName, evalRunName);
|
|
825
|
-
// pullEval returns [{ id: ..., results: [...] }], extract results
|
|
826
|
-
return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
|
|
827
|
-
}
|
|
828
|
-
catch (error) {
|
|
829
|
-
// Log error but return empty array to allow waitForEvaluation to potentially retry
|
|
830
|
-
logger_instance_js_1.default.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
|
|
514
|
+
const rawResults = yield this.pullEval(projectName, evalRunName);
|
|
515
|
+
if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
|
|
831
516
|
return [];
|
|
832
517
|
}
|
|
518
|
+
// Assuming pullEval correctly returns results in the expected format
|
|
519
|
+
return rawResults[0].results;
|
|
833
520
|
});
|
|
834
521
|
}
|
|
835
522
|
/**
|
|
@@ -841,88 +528,72 @@ class JudgmentClient {
|
|
|
841
528
|
*/
|
|
842
529
|
checkEvalStatus(projectName, evalRunName) {
|
|
843
530
|
return __awaiter(this, void 0, void 0, function* () {
|
|
844
|
-
var _a
|
|
845
|
-
// Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
|
|
531
|
+
var _a;
|
|
846
532
|
const requestBody = {
|
|
847
533
|
project_name: projectName,
|
|
848
|
-
eval_name: evalRunName,
|
|
534
|
+
eval_name: evalRunName,
|
|
849
535
|
judgment_api_key: this.judgmentApiKey,
|
|
850
536
|
};
|
|
851
537
|
try {
|
|
852
|
-
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL,
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
857
|
-
'X-Organization-Id': this.organizationId
|
|
858
|
-
},
|
|
859
|
-
timeout: 15000 // Slightly increased timeout for status checks
|
|
538
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
|
|
539
|
+
headers: this.getAuthHeaders(),
|
|
540
|
+
// Add a shorter timeout for status checks?
|
|
541
|
+
// timeout: 5000
|
|
860
542
|
});
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
if (
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
543
|
+
const data = response.data;
|
|
544
|
+
// Check if the response looks like a status object
|
|
545
|
+
if (data && typeof data.status === 'string') {
|
|
546
|
+
return {
|
|
547
|
+
status: data.status || 'unknown',
|
|
548
|
+
progress: typeof data.progress === 'number' ? data.progress : 0,
|
|
549
|
+
message: data.message || '',
|
|
550
|
+
error: data.error
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
// Check if the response looks like completed results (array format from pullEval)
|
|
554
|
+
else if (Array.isArray(data) && data.length > 0 && data[0].results) {
|
|
555
|
+
return {
|
|
556
|
+
status: 'completed',
|
|
557
|
+
progress: 100,
|
|
558
|
+
message: 'Evaluation completed.'
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
// Check if response looks like completed results (single object format)
|
|
562
|
+
else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
|
|
563
|
+
return {
|
|
564
|
+
status: 'completed',
|
|
565
|
+
progress: 100,
|
|
566
|
+
message: 'Evaluation completed.'
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
// Handle other potential responses or assume pending/unknown
|
|
882
570
|
else {
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
const parsedProgress = parseFloat(statusData.progress);
|
|
890
|
-
if (!isNaN(parsedProgress)) {
|
|
891
|
-
progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
|
|
892
|
-
}
|
|
571
|
+
logger_instance_js_1.default.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
|
|
572
|
+
return {
|
|
573
|
+
status: 'unknown',
|
|
574
|
+
progress: 0,
|
|
575
|
+
message: 'Could not determine status from API response.'
|
|
576
|
+
};
|
|
893
577
|
}
|
|
894
|
-
const normalizedStatus = {
|
|
895
|
-
status: statusData.status || 'unknown',
|
|
896
|
-
progress: progress,
|
|
897
|
-
message: statusData.message || '',
|
|
898
|
-
error: statusData.error // Include error field if present
|
|
899
|
-
};
|
|
900
|
-
// Only log status if it's not being called from waitForEvaluation
|
|
901
|
-
// Check stack trace for caller function name
|
|
902
|
-
const stack = new Error().stack;
|
|
903
|
-
const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
|
|
904
|
-
if (!isCalledByWaitForEvaluation) {
|
|
905
|
-
// Use logger for status updates when called directly
|
|
906
|
-
logger_instance_js_1.default.info(`Evaluation Status: ${normalizedStatus.status}`);
|
|
907
|
-
logger_instance_js_1.default.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
|
|
908
|
-
if (normalizedStatus.message) {
|
|
909
|
-
logger_instance_js_1.default.info(`Message: ${normalizedStatus.message}`);
|
|
910
|
-
}
|
|
911
|
-
if (normalizedStatus.error) {
|
|
912
|
-
logger_instance_js_1.default.error(`Error in status: ${normalizedStatus.error}`);
|
|
913
|
-
}
|
|
914
|
-
}
|
|
915
|
-
return normalizedStatus;
|
|
916
578
|
}
|
|
917
579
|
catch (error) {
|
|
918
|
-
// Don't throw
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
580
|
+
// Don't throw here, return status indicating error
|
|
581
|
+
let errorMessage = 'Failed to fetch evaluation status.';
|
|
582
|
+
let status = 'error';
|
|
583
|
+
if (axios_1.default.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
|
|
584
|
+
status = 'not_found';
|
|
585
|
+
errorMessage = 'Evaluation run not found.';
|
|
586
|
+
logger_instance_js_1.default.warn(`Evaluation run ${evalRunName} not found.`);
|
|
587
|
+
}
|
|
588
|
+
else {
|
|
589
|
+
this.handleApiError(error, 'checkEvalStatus');
|
|
590
|
+
errorMessage = `Error fetching status: ${String(error)}`;
|
|
591
|
+
}
|
|
922
592
|
return {
|
|
923
|
-
status:
|
|
593
|
+
status: status,
|
|
924
594
|
progress: 0,
|
|
925
|
-
message:
|
|
595
|
+
message: errorMessage,
|
|
596
|
+
error: String(error) // Include error string
|
|
926
597
|
};
|
|
927
598
|
}
|
|
928
599
|
});
|
|
@@ -936,105 +607,79 @@ class JudgmentClient {
|
|
|
936
607
|
*/
|
|
937
608
|
waitForEvaluation(projectName_1, evalRunName_1) {
|
|
938
609
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
|
|
939
|
-
const { intervalMs =
|
|
940
|
-
|
|
941
|
-
showProgress = true } = options;
|
|
942
|
-
let attempts = 0;
|
|
943
|
-
let lastProgressPercent = -1;
|
|
944
|
-
let lastStatus = '';
|
|
610
|
+
const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
|
|
611
|
+
let progressBar;
|
|
945
612
|
if (showProgress) {
|
|
946
|
-
|
|
947
|
-
|
|
613
|
+
progressBar = new cli_progress_1.default.SingleBar({
|
|
614
|
+
format: `Waiting for ${ansi_colors_1.default.magenta(evalRunName)}... | ${ansi_colors_1.default.cyan('{bar}')} | {percentage}% || {status}`,
|
|
615
|
+
barCompleteChar: '\u2588',
|
|
616
|
+
barIncompleteChar: '\u2591',
|
|
617
|
+
hideCursor: true,
|
|
618
|
+
clearOnComplete: false,
|
|
619
|
+
stopOnComplete: true,
|
|
620
|
+
}, cli_progress_1.default.Presets.shades_classic);
|
|
621
|
+
progressBar.start(100, 0, { status: 'Initiating...' });
|
|
948
622
|
}
|
|
949
|
-
|
|
950
|
-
attempts++;
|
|
623
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
951
624
|
try {
|
|
952
|
-
const
|
|
953
|
-
const
|
|
954
|
-
|
|
955
|
-
if (
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
}
|
|
962
|
-
// Check evaluation status
|
|
963
|
-
if (status.status === 'complete') {
|
|
964
|
-
if (showProgress) {
|
|
965
|
-
process.stdout.write('\n'); // Keep direct console output for progress bar newline
|
|
966
|
-
// Use logger for status update
|
|
967
|
-
logger_instance_js_1.default.info('Evaluation complete! Fetching results...');
|
|
968
|
-
}
|
|
969
|
-
try {
|
|
970
|
-
// Use the dedicated results fetching method
|
|
971
|
-
const results = yield this.pullEvalResults(projectName, evalRunName);
|
|
972
|
-
if (results.length > 0) {
|
|
973
|
-
// Use logger for status update
|
|
974
|
-
logger_instance_js_1.default.info(`Successfully fetched ${results.length} results.`);
|
|
975
|
-
return results;
|
|
976
|
-
}
|
|
977
|
-
else {
|
|
978
|
-
// If complete status but no results, might be an issue. Log and return empty.
|
|
979
|
-
logger_instance_js_1.default.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
|
|
980
|
-
return [];
|
|
981
|
-
}
|
|
982
|
-
}
|
|
983
|
-
catch (fetchError) {
|
|
984
|
-
if (showProgress)
|
|
985
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
986
|
-
logger_instance_js_1.default.error(`Error fetching results after completion for '${evalRunName}': ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
|
|
987
|
-
return []; // Return empty array on error
|
|
988
|
-
}
|
|
989
|
-
}
|
|
990
|
-
else if (status.status === 'failed') {
|
|
991
|
-
if (showProgress)
|
|
992
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
993
|
-
logger_instance_js_1.default.error(`Evaluation failed for '${evalRunName}': ${status.error || status.message || 'Unknown error'}`);
|
|
994
|
-
return []; // Return empty array on failure
|
|
995
|
-
}
|
|
996
|
-
else if (status.status === 'unknown') {
|
|
997
|
-
// Log unknown status but continue polling
|
|
998
|
-
// Avoid flooding logs if status remains unknown
|
|
999
|
-
if (lastStatus !== 'unknown') {
|
|
1000
|
-
if (showProgress)
|
|
1001
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
1002
|
-
logger_instance_js_1.default.warn(`Evaluation status unknown for '${evalRunName}' (attempt ${attempts}). Retrying...`);
|
|
1003
|
-
lastProgressPercent = -1; // Reset progress display
|
|
625
|
+
const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
|
|
626
|
+
const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
|
|
627
|
+
const statusText = statusResult.message || statusResult.status;
|
|
628
|
+
if (progressBar) {
|
|
629
|
+
progressBar.update(progress, { status: statusText });
|
|
630
|
+
}
|
|
631
|
+
if (statusResult.status === 'completed') {
|
|
632
|
+
if (progressBar) {
|
|
633
|
+
progressBar.update(100, { status: ansi_colors_1.default.green('Completed! Fetching results...') });
|
|
1004
634
|
}
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
635
|
+
// Fetch final results using pullEval
|
|
636
|
+
const finalResults = yield this.pullEvalResults(projectName, evalRunName);
|
|
637
|
+
logger_instance_js_1.default.info(`Evaluation run ${evalRunName} completed successfully.`);
|
|
638
|
+
return finalResults;
|
|
639
|
+
}
|
|
640
|
+
else if (statusResult.status === 'error' || statusResult.status === 'failed') {
|
|
641
|
+
// Concatenate error details into a single message string
|
|
642
|
+
const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
|
|
643
|
+
logger_instance_js_1.default.error(errorMsg);
|
|
644
|
+
if (progressBar)
|
|
645
|
+
progressBar.stop();
|
|
646
|
+
// Pass only the combined message to the constructor
|
|
647
|
+
throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
|
|
648
|
+
}
|
|
649
|
+
else if (statusResult.status === 'not_found') {
|
|
650
|
+
const errorMsg = `Evaluation run ${evalRunName} not found.`;
|
|
651
|
+
logger_instance_js_1.default.error(errorMsg);
|
|
652
|
+
if (progressBar)
|
|
653
|
+
progressBar.stop();
|
|
654
|
+
// Pass only the message to the constructor
|
|
655
|
+
throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
|
|
656
|
+
}
|
|
657
|
+
// Wait for the next interval
|
|
658
|
+
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
1011
659
|
}
|
|
1012
660
|
catch (error) {
|
|
1013
|
-
//
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
661
|
+
// Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
|
|
662
|
+
logger_instance_js_1.default.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
|
|
663
|
+
// Option: Rethrow immediately vs. retry vs. specific handling
|
|
664
|
+
if (error instanceof run_evaluation_js_1.JudgmentAPIError) { // If it was already a processed API error, rethrow
|
|
665
|
+
if (progressBar)
|
|
666
|
+
progressBar.stop();
|
|
667
|
+
throw error;
|
|
668
|
+
}
|
|
669
|
+
// For other errors, wait and retry (up to maxAttempts)
|
|
670
|
+
if (attempt === maxAttempts) {
|
|
671
|
+
if (progressBar)
|
|
672
|
+
progressBar.stop();
|
|
673
|
+
throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
|
|
674
|
+
}
|
|
675
|
+
// Still retryable, wait for interval
|
|
1022
676
|
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
1023
677
|
}
|
|
1024
|
-
else {
|
|
1025
|
-
// Break loop if already completed or failed to avoid unnecessary delay
|
|
1026
|
-
break;
|
|
1027
|
-
}
|
|
1028
|
-
} // End while loop
|
|
1029
|
-
// If loop finished without completing/failing
|
|
1030
|
-
if (lastStatus !== 'complete' && lastStatus !== 'failed') {
|
|
1031
|
-
if (showProgress)
|
|
1032
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
1033
|
-
logger_instance_js_1.default.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
|
|
1034
|
-
return []; // Return empty array on timeout
|
|
1035
678
|
}
|
|
1036
|
-
//
|
|
1037
|
-
|
|
679
|
+
// If loop finishes without completion or error
|
|
680
|
+
if (progressBar)
|
|
681
|
+
progressBar.stop();
|
|
682
|
+
throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
|
|
1038
683
|
});
|
|
1039
684
|
}
|
|
1040
685
|
/**
|
|
@@ -1043,12 +688,48 @@ class JudgmentClient {
|
|
|
1043
688
|
* @returns A string representing the progress bar
|
|
1044
689
|
*/
|
|
1045
690
|
_createProgressBar(percent) {
|
|
1046
|
-
const width =
|
|
1047
|
-
|
|
1048
|
-
const
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
691
|
+
const width = 20; // Width of the progress bar
|
|
692
|
+
const filled = Math.round(width * (percent / 100));
|
|
693
|
+
const empty = width - filled;
|
|
694
|
+
return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
|
|
695
|
+
}
|
|
696
|
+
// Keep helper methods private
|
|
697
|
+
getAuthHeaders() {
|
|
698
|
+
return {
|
|
699
|
+
'Content-Type': 'application/json',
|
|
700
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
701
|
+
'X-Organization-Id': this.organizationId,
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
// Ensure this handles errors from Eval/Project API calls correctly
|
|
705
|
+
handleApiError(error, context) {
|
|
706
|
+
logger_instance_js_1.default.error(`API Error during ${context}:`);
|
|
707
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
708
|
+
const axiosError = error;
|
|
709
|
+
const response = axiosError.response;
|
|
710
|
+
if (response) {
|
|
711
|
+
logger_instance_js_1.default.error(`Status: ${response.status} ${response.statusText}`);
|
|
712
|
+
logger_instance_js_1.default.debug('Response Data:', response.data);
|
|
713
|
+
if (response.status === 422) {
|
|
714
|
+
logger_instance_js_1.default.error('Validation Error Detail:', response.data);
|
|
715
|
+
}
|
|
716
|
+
else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
|
|
717
|
+
logger_instance_js_1.default.error(`Evaluation run not found.`);
|
|
718
|
+
}
|
|
719
|
+
else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
|
|
720
|
+
logger_instance_js_1.default.warn(`${context}: Resource not found, may have already been deleted.`);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
else if (axiosError.request) {
|
|
724
|
+
logger_instance_js_1.default.error('No response received from server.');
|
|
725
|
+
}
|
|
726
|
+
else {
|
|
727
|
+
logger_instance_js_1.default.error(`Error setting up API request for ${context}`);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
else {
|
|
731
|
+
logger_instance_js_1.default.error(`Unexpected error during ${context}`);
|
|
732
|
+
}
|
|
1052
733
|
}
|
|
1053
734
|
}
|
|
1054
735
|
exports.JudgmentClient = JudgmentClient;
|