judgeval 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/common/logger-instance.js +19 -17
- package/dist/cjs/common/logger-instance.js.map +1 -1
- package/dist/cjs/common/tracer.js +58 -50
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +6 -4
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
- package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/cjs/data/datasets/eval-dataset.js +405 -0
- package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
- package/dist/cjs/data/example.js +22 -1
- package/dist/cjs/data/example.js.map +1 -1
- package/dist/cjs/e2etests/eval-operations.test.js +282 -0
- package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
- package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
- package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/cjs/judgment-client.js +309 -534
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/esm/common/logger-instance.js +19 -17
- package/dist/esm/common/logger-instance.js.map +1 -1
- package/dist/esm/common/tracer.js +58 -50
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +5 -3
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
- package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/esm/data/datasets/eval-dataset.js +375 -0
- package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
- package/dist/esm/data/example.js +22 -1
- package/dist/esm/data/example.js.map +1 -1
- package/dist/esm/e2etests/eval-operations.test.js +254 -0
- package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
- package/dist/esm/e2etests/judgee-traces.test.js +253 -0
- package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/esm/judgment-client.js +311 -536
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/types/common/tracer.d.ts +0 -1
- package/dist/types/constants.d.ts +2 -3
- package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
- package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
- package/dist/types/data/example.d.ts +24 -12
- package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
- package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
- package/dist/types/judgment-client.d.ts +3 -25
- package/package.json +3 -9
|
@@ -46,6 +46,10 @@ const rules_js_1 = require("./rules.js");
|
|
|
46
46
|
const run_evaluation_js_1 = require("./run-evaluation.js");
|
|
47
47
|
const constants_js_1 = require("./constants.js");
|
|
48
48
|
const logger_instance_js_1 = __importDefault(require("./common/logger-instance.js"));
|
|
49
|
+
// Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
|
|
50
|
+
const cli_progress_1 = __importDefault(require("cli-progress"));
|
|
51
|
+
const ansi_colors_1 = __importDefault(require("ansi-colors"));
|
|
52
|
+
const eval_dataset_client_js_1 = require("./data/datasets/eval-dataset-client.js");
|
|
49
53
|
// Load environment variables
|
|
50
54
|
dotenv.config();
|
|
51
55
|
/**
|
|
@@ -199,143 +203,10 @@ class JudgmentClient {
|
|
|
199
203
|
* Evaluate a dataset
|
|
200
204
|
*/
|
|
201
205
|
evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
202
|
-
return __awaiter(this, arguments, void 0, function* (dataset, //
|
|
203
|
-
scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
const loadedScorers = [];
|
|
207
|
-
for (const scorer of scorers) {
|
|
208
|
-
try {
|
|
209
|
-
if (scorer instanceof base_scorer_js_1.ScorerWrapper) {
|
|
210
|
-
loadedScorers.push(scorer.loadImplementation(useJudgment));
|
|
211
|
-
}
|
|
212
|
-
else {
|
|
213
|
-
// Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
|
|
214
|
-
loadedScorers.push(scorer);
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
catch (error) {
|
|
218
|
-
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
222
|
-
if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_js_1.JudgevalScorer)) {
|
|
223
|
-
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
|
|
224
|
-
}
|
|
225
|
-
// Convert ScorerWrapper in rules to their implementations
|
|
226
|
-
let loadedRules;
|
|
227
|
-
if (rules) {
|
|
228
|
-
loadedRules = [];
|
|
229
|
-
for (const rule of rules) {
|
|
230
|
-
try {
|
|
231
|
-
const processedConditions = [];
|
|
232
|
-
for (const condition of rule.conditions) {
|
|
233
|
-
// Convert metric if it's a ScorerWrapper
|
|
234
|
-
if (condition.metric instanceof base_scorer_js_1.ScorerWrapper) {
|
|
235
|
-
try {
|
|
236
|
-
const loadedMetric = condition.metric.loadImplementation(useJudgment);
|
|
237
|
-
const newCondition = new rules_js_1.Condition(loadedMetric);
|
|
238
|
-
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
|
|
239
|
-
processedConditions.push(newCondition);
|
|
240
|
-
}
|
|
241
|
-
catch (error) {
|
|
242
|
-
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
else {
|
|
246
|
-
processedConditions.push(condition);
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
// Create new rule with processed conditions
|
|
250
|
-
const newRule = new rules_js_1.Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
|
|
251
|
-
loadedRules.push(newRule);
|
|
252
|
-
}
|
|
253
|
-
catch (error) {
|
|
254
|
-
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
const evaluationRun = new evaluation_run_js_1.EvaluationRun({
|
|
259
|
-
logResults,
|
|
260
|
-
projectName,
|
|
261
|
-
evalName: evalRunName,
|
|
262
|
-
examples: dataset.examples, // Assuming dataset has an 'examples' property
|
|
263
|
-
scorers: loadedScorers,
|
|
264
|
-
model,
|
|
265
|
-
aggregator,
|
|
266
|
-
metadata,
|
|
267
|
-
judgmentApiKey: this.judgmentApiKey,
|
|
268
|
-
rules: loadedRules,
|
|
269
|
-
organizationId: this.organizationId
|
|
270
|
-
});
|
|
271
|
-
// Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
|
|
272
|
-
return (0, run_evaluation_js_1.runEval)(evaluationRun, false, true, false);
|
|
273
|
-
}
|
|
274
|
-
catch (error) {
|
|
275
|
-
if (error instanceof Error) {
|
|
276
|
-
if (error.message.includes('one or more fields are invalid')) {
|
|
277
|
-
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
|
|
278
|
-
}
|
|
279
|
-
else {
|
|
280
|
-
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
else {
|
|
284
|
-
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
});
|
|
288
|
-
}
|
|
289
|
-
/**
|
|
290
|
-
* Create a dataset
|
|
291
|
-
*/
|
|
292
|
-
createDataset() {
|
|
293
|
-
// This would be implemented with EvalDataset
|
|
294
|
-
throw new Error('Not implemented yet');
|
|
295
|
-
}
|
|
296
|
-
/**
|
|
297
|
-
* Push a dataset to the Judgment platform
|
|
298
|
-
*/
|
|
299
|
-
pushDataset(alias_1, dataset_1, projectName_1) {
|
|
300
|
-
return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
|
|
301
|
-
// This would be implemented with EvalDataset
|
|
302
|
-
throw new Error('Not implemented yet');
|
|
303
|
-
});
|
|
304
|
-
}
|
|
305
|
-
/**
|
|
306
|
-
* Pull a dataset from the Judgment platform
|
|
307
|
-
*/
|
|
308
|
-
pullDataset(alias, projectName) {
|
|
309
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
310
|
-
// This would be implemented with EvalDataset
|
|
311
|
-
throw new Error('Not implemented yet');
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
/**
|
|
315
|
-
* Delete a dataset from the Judgment platform
|
|
316
|
-
*/
|
|
317
|
-
deleteDataset(alias, projectName) {
|
|
318
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
319
|
-
// This would be implemented with EvalDataset
|
|
320
|
-
throw new Error('Not implemented yet');
|
|
321
|
-
});
|
|
322
|
-
}
|
|
323
|
-
/**
|
|
324
|
-
* Pull project dataset stats from the Judgment platform
|
|
325
|
-
*/
|
|
326
|
-
pullProjectDatasetStats(projectName) {
|
|
327
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
328
|
-
// This would be implemented with EvalDataset
|
|
329
|
-
throw new Error('Not implemented yet');
|
|
330
|
-
});
|
|
331
|
-
}
|
|
332
|
-
/**
|
|
333
|
-
* Insert examples into a dataset on the Judgment platform
|
|
334
|
-
*/
|
|
335
|
-
insertDataset(alias, examples, projectName) {
|
|
336
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
337
|
-
// This would be implemented with EvalDataset
|
|
338
|
-
throw new Error('Not implemented yet');
|
|
206
|
+
return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
|
|
207
|
+
scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
|
|
208
|
+
// Keep type loose for stub
|
|
209
|
+
throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
|
|
339
210
|
});
|
|
340
211
|
}
|
|
341
212
|
/**
|
|
@@ -353,11 +224,7 @@ class JudgmentClient {
|
|
|
353
224
|
};
|
|
354
225
|
try {
|
|
355
226
|
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
|
|
356
|
-
headers:
|
|
357
|
-
'Content-Type': 'application/json',
|
|
358
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
359
|
-
'X-Organization-Id': this.organizationId
|
|
360
|
-
}
|
|
227
|
+
headers: this.getAuthHeaders()
|
|
361
228
|
});
|
|
362
229
|
if (!Array.isArray(response.data) || response.data.length === 0) {
|
|
363
230
|
return [{ id: '', results: [] }];
|
|
@@ -380,7 +247,8 @@ class JudgmentClient {
|
|
|
380
247
|
expectedTools: dataObject.expected_tools,
|
|
381
248
|
exampleId: dataObject.example_id,
|
|
382
249
|
exampleIndex: dataObject.example_index,
|
|
383
|
-
timestamp: dataObject.timestamp
|
|
250
|
+
timestamp: dataObject.timestamp,
|
|
251
|
+
example: dataObject.example // Include example boolean
|
|
384
252
|
});
|
|
385
253
|
evalRunResult[0].id = resultId;
|
|
386
254
|
evalRunResult[0].results = [new result_js_1.ScoringResult({
|
|
@@ -392,10 +260,8 @@ class JudgmentClient {
|
|
|
392
260
|
return evalRunResult;
|
|
393
261
|
}
|
|
394
262
|
catch (error) {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
}
|
|
398
|
-
throw new Error(`Error fetching eval results: ${String(error)}`);
|
|
263
|
+
this.handleApiError(error, 'pullEval');
|
|
264
|
+
throw error;
|
|
399
265
|
}
|
|
400
266
|
});
|
|
401
267
|
}
|
|
@@ -408,92 +274,67 @@ class JudgmentClient {
|
|
|
408
274
|
*/
|
|
409
275
|
exportEvalResults(projectName_1, evalRunName_1) {
|
|
410
276
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
|
|
277
|
+
logger_instance_js_1.default.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
|
|
411
278
|
try {
|
|
412
|
-
const
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
return
|
|
279
|
+
const resultsData = yield this.pullEval(projectName, evalRunName);
|
|
280
|
+
if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
|
|
281
|
+
logger_instance_js_1.default.warn('No results found to export.');
|
|
282
|
+
return '';
|
|
416
283
|
}
|
|
284
|
+
const results = resultsData[0].results;
|
|
417
285
|
if (format === 'json') {
|
|
418
|
-
//
|
|
419
|
-
return JSON.stringify(
|
|
286
|
+
// Pretty print JSON
|
|
287
|
+
return JSON.stringify(results.map(r => r.toJSON()), null, 2);
|
|
420
288
|
}
|
|
421
289
|
else if (format === 'csv') {
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
const
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
}
|
|
436
|
-
catch (e) {
|
|
437
|
-
// Provide a more helpful error message
|
|
438
|
-
const errorMsg = e instanceof Error ? e.message : String(e);
|
|
439
|
-
// Update error message to reflect import() failure
|
|
440
|
-
console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
|
|
441
|
-
throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
|
|
442
|
-
}
|
|
443
|
-
try {
|
|
444
|
-
// Flatten the structure slightly for better CSV output
|
|
445
|
-
const processedResults = results.map((result) => {
|
|
446
|
-
// Flatten dataObject properties and scorersData
|
|
447
|
-
const flatResult = {};
|
|
448
|
-
flatResult.eval_run_id = evalRunData.id; // Add eval run ID
|
|
449
|
-
// Flatten dataObject
|
|
450
|
-
if (result.dataObject) {
|
|
451
|
-
for (const [key, value] of Object.entries(result.dataObject)) {
|
|
452
|
-
// Prefix with 'data_' to avoid potential clashes
|
|
453
|
-
flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
// Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
|
|
457
|
-
if (Array.isArray(result.scorersData)) {
|
|
458
|
-
result.scorersData.forEach((scorerData, index) => {
|
|
459
|
-
flatResult[`scorer_${index}_name`] = scorerData.name;
|
|
460
|
-
flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
|
|
461
|
-
flatResult[`scorer_${index}_error`] = scorerData.error;
|
|
462
|
-
// Add other scorer fields if necessary, e.g., metadata
|
|
463
|
-
if (scorerData.additional_metadata) {
|
|
464
|
-
flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
|
|
465
|
-
}
|
|
466
|
-
});
|
|
467
|
-
}
|
|
468
|
-
flatResult.error = result.error; // Top-level error for the example processing
|
|
469
|
-
return flatResult;
|
|
470
|
-
});
|
|
471
|
-
// Define headers dynamically based on the keys of the first processed result
|
|
472
|
-
if (processedResults.length === 0) {
|
|
473
|
-
return 'No data to export after processing.'; // Handle case with no valid results after processing
|
|
290
|
+
if (results.length === 0)
|
|
291
|
+
return ''; // No data to export
|
|
292
|
+
// Dynamically determine headers from the first result object
|
|
293
|
+
// Flatten the structure for CSV
|
|
294
|
+
const flatResults = results.map(result => {
|
|
295
|
+
var _a, _b, _c;
|
|
296
|
+
const flat = {};
|
|
297
|
+
const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
|
|
298
|
+
const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
|
|
299
|
+
// Add example data fields (snake_case)
|
|
300
|
+
for (const key in exampleData) {
|
|
301
|
+
// Prefix example fields to avoid collision, e.g., example_input
|
|
302
|
+
flat[`example_${key}`] = exampleData[key];
|
|
474
303
|
}
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
304
|
+
// Add scorers data
|
|
305
|
+
scorersData.forEach(scorer => {
|
|
306
|
+
flat[`scorer_${scorer.name}_score`] = scorer.score;
|
|
307
|
+
flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
|
|
308
|
+
flat[`scorer_${scorer.name}_error`] = scorer.error;
|
|
478
309
|
});
|
|
479
|
-
//
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
310
|
+
// Add top-level error if present
|
|
311
|
+
flat['top_level_error'] = result.error;
|
|
312
|
+
return flat;
|
|
313
|
+
});
|
|
314
|
+
// Get all unique keys from the flattened results for headers
|
|
315
|
+
const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
|
|
316
|
+
// Use papaparse for robust CSV generation
|
|
317
|
+
const Papa = require('papaparse'); // Use require here if not imported at top
|
|
318
|
+
const csv = Papa.unparse({
|
|
319
|
+
fields: headers,
|
|
320
|
+
data: flatResults
|
|
321
|
+
}, {
|
|
322
|
+
header: true,
|
|
323
|
+
quotes: true, // Ensure fields with commas/newlines are quoted
|
|
324
|
+
quoteChar: '"',
|
|
325
|
+
escapeChar: '"',
|
|
326
|
+
delimiter: ','
|
|
327
|
+
});
|
|
328
|
+
return csv;
|
|
487
329
|
}
|
|
488
330
|
else {
|
|
489
331
|
throw new Error(`Unsupported export format: ${format}`);
|
|
490
332
|
}
|
|
491
333
|
}
|
|
492
334
|
catch (error) {
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
throw new Error(`Failed to export evaluation results: ${String(error)}`);
|
|
335
|
+
logger_instance_js_1.default.error(`Error exporting eval results: ${error}`);
|
|
336
|
+
this.handleApiError(error, 'exportEvalResults');
|
|
337
|
+
throw error;
|
|
497
338
|
}
|
|
498
339
|
});
|
|
499
340
|
}
|
|
@@ -502,47 +343,23 @@ class JudgmentClient {
|
|
|
502
343
|
*/
|
|
503
344
|
deleteEval(projectName, evalRunNames) {
|
|
504
345
|
return __awaiter(this, void 0, void 0, function* () {
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
throw new Error('No evaluation run names provided');
|
|
508
|
-
}
|
|
509
|
-
// Body matches Python's structure for this endpoint
|
|
510
|
-
const evalRunRequestBody = {
|
|
346
|
+
logger_instance_js_1.default.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
|
|
347
|
+
const requestBody = {
|
|
511
348
|
project_name: projectName,
|
|
512
349
|
eval_names: evalRunNames,
|
|
513
|
-
judgment_api_key: this.judgmentApiKey
|
|
350
|
+
judgment_api_key: this.judgmentApiKey,
|
|
514
351
|
};
|
|
515
352
|
try {
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
data: evalRunRequestBody,
|
|
519
|
-
headers: {
|
|
520
|
-
'Content-Type': 'application/json',
|
|
521
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
522
|
-
'X-Organization-Id': this.organizationId
|
|
523
|
-
}
|
|
353
|
+
yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
|
|
354
|
+
headers: this.getAuthHeaders()
|
|
524
355
|
});
|
|
525
|
-
|
|
356
|
+
logger_instance_js_1.default.info('Successfully deleted eval runs.');
|
|
357
|
+
return true;
|
|
526
358
|
}
|
|
527
359
|
catch (error) {
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
if (status === 404) {
|
|
532
|
-
throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
|
|
533
|
-
}
|
|
534
|
-
else if (status === 500) {
|
|
535
|
-
throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
|
|
536
|
-
}
|
|
537
|
-
else {
|
|
538
|
-
throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
// Rethrow original or wrapped error
|
|
542
|
-
if (error instanceof Error) {
|
|
543
|
-
throw new Error(`Error deleting eval results: ${error.message}`);
|
|
544
|
-
}
|
|
545
|
-
throw new Error(`Error deleting eval results: ${String(error)}`);
|
|
360
|
+
logger_instance_js_1.default.error(`Error deleting eval runs: ${error}`);
|
|
361
|
+
this.handleApiError(error, 'deleteEval');
|
|
362
|
+
return false;
|
|
546
363
|
}
|
|
547
364
|
});
|
|
548
365
|
}
|
|
@@ -551,43 +368,22 @@ class JudgmentClient {
|
|
|
551
368
|
*/
|
|
552
369
|
deleteProjectEvals(projectName) {
|
|
553
370
|
return __awaiter(this, void 0, void 0, function* () {
|
|
554
|
-
|
|
371
|
+
logger_instance_js_1.default.info(`Deleting ALL eval runs for project: ${projectName}`);
|
|
372
|
+
const requestBody = {
|
|
373
|
+
project_name: projectName,
|
|
374
|
+
judgment_api_key: this.judgmentApiKey,
|
|
375
|
+
};
|
|
555
376
|
try {
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
559
|
-
data: {
|
|
560
|
-
project_name: projectName,
|
|
561
|
-
},
|
|
562
|
-
headers: {
|
|
563
|
-
'Content-Type': 'application/json',
|
|
564
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
565
|
-
'X-Organization-Id': this.organizationId
|
|
566
|
-
}
|
|
377
|
+
yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
|
|
378
|
+
headers: this.getAuthHeaders()
|
|
567
379
|
});
|
|
568
|
-
|
|
569
|
-
return
|
|
380
|
+
logger_instance_js_1.default.info(`Successfully deleted all eval runs for project ${projectName}.`);
|
|
381
|
+
return true;
|
|
570
382
|
}
|
|
571
383
|
catch (error) {
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
if (status === 404) {
|
|
576
|
-
// Assuming 404 might mean project not found or no evals to delete
|
|
577
|
-
console.warn(`Project '${projectName}' not found or no evals to delete.`);
|
|
578
|
-
return false; // Or true depending on desired idempotency behavior
|
|
579
|
-
}
|
|
580
|
-
else if (status === 500) {
|
|
581
|
-
throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
|
|
582
|
-
}
|
|
583
|
-
else {
|
|
584
|
-
throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
if (error instanceof Error) {
|
|
588
|
-
throw new Error(`Error deleting project evals: ${error.message}`);
|
|
589
|
-
}
|
|
590
|
-
throw new Error(`Error deleting project evals: ${String(error)}`);
|
|
384
|
+
logger_instance_js_1.default.error(`Error deleting project evals: ${error}`);
|
|
385
|
+
this.handleApiError(error, 'deleteProjectEvals');
|
|
386
|
+
return false;
|
|
591
387
|
}
|
|
592
388
|
});
|
|
593
389
|
}
|
|
@@ -596,37 +392,34 @@ class JudgmentClient {
|
|
|
596
392
|
*/
|
|
597
393
|
createProject(projectName) {
|
|
598
394
|
return __awaiter(this, void 0, void 0, function* () {
|
|
395
|
+
logger_instance_js_1.default.info(`Creating project: ${projectName}`);
|
|
396
|
+
const requestBody = {
|
|
397
|
+
project_name: projectName,
|
|
398
|
+
judgment_api_key: this.judgmentApiKey,
|
|
399
|
+
};
|
|
599
400
|
try {
|
|
600
|
-
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL,
|
|
601
|
-
|
|
602
|
-
{
|
|
603
|
-
project_name: projectName,
|
|
604
|
-
}, {
|
|
605
|
-
headers: {
|
|
606
|
-
'Content-Type': 'application/json',
|
|
607
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
608
|
-
'X-Organization-Id': this.organizationId
|
|
609
|
-
}
|
|
401
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
|
|
402
|
+
headers: this.getAuthHeaders()
|
|
610
403
|
});
|
|
611
|
-
//
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
616
|
-
// Check for specific conflict error (e.g., 409) if API provides it
|
|
617
|
-
if (error.response.status === 409) {
|
|
618
|
-
console.warn(`Project '${projectName}' already exists.`);
|
|
619
|
-
return false; // Or true if idempotent creation is desired
|
|
620
|
-
}
|
|
621
|
-
throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
404
|
+
// Check for specific success message or status if API provides one
|
|
405
|
+
if (response.data && response.data.message === 'Project added successfully') {
|
|
406
|
+
logger_instance_js_1.default.info(`Successfully created project: ${projectName}`);
|
|
407
|
+
return true;
|
|
622
408
|
}
|
|
623
|
-
else if (
|
|
624
|
-
|
|
409
|
+
else if (response.data && response.data.message === 'Project already exists') {
|
|
410
|
+
logger_instance_js_1.default.warn(`Project '${projectName}' already exists.`);
|
|
411
|
+
return true; // Or false, depending on desired behavior for existing projects
|
|
625
412
|
}
|
|
626
413
|
else {
|
|
627
|
-
|
|
414
|
+
logger_instance_js_1.default.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
415
|
+
return false;
|
|
628
416
|
}
|
|
629
417
|
}
|
|
418
|
+
catch (error) {
|
|
419
|
+
logger_instance_js_1.default.error(`Error creating project: ${error}`);
|
|
420
|
+
this.handleApiError(error, 'createProject');
|
|
421
|
+
return false;
|
|
422
|
+
}
|
|
630
423
|
});
|
|
631
424
|
}
|
|
632
425
|
/**
|
|
@@ -634,37 +427,29 @@ class JudgmentClient {
|
|
|
634
427
|
*/
|
|
635
428
|
deleteProject(projectName) {
|
|
636
429
|
return __awaiter(this, void 0, void 0, function* () {
|
|
430
|
+
logger_instance_js_1.default.info(`Deleting project: ${projectName}`);
|
|
431
|
+
const requestBody = {
|
|
432
|
+
project_name: projectName,
|
|
433
|
+
judgment_api_key: this.judgmentApiKey,
|
|
434
|
+
};
|
|
637
435
|
try {
|
|
638
|
-
const response = yield axios_1.default.
|
|
639
|
-
|
|
640
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
641
|
-
data: {
|
|
642
|
-
project_name: projectName,
|
|
643
|
-
},
|
|
644
|
-
headers: {
|
|
645
|
-
'Content-Type': 'application/json',
|
|
646
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
647
|
-
'X-Organization-Id': this.organizationId
|
|
648
|
-
}
|
|
436
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
|
|
437
|
+
headers: this.getAuthHeaders()
|
|
649
438
|
});
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
catch (error) {
|
|
654
|
-
if (axios_1.default.isAxiosError(error) && error.response) {
|
|
655
|
-
if (error.response.status === 404) {
|
|
656
|
-
console.warn(`Project '${projectName}' not found for deletion.`);
|
|
657
|
-
return false; // Or true depending on desired idempotency
|
|
658
|
-
}
|
|
659
|
-
throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
660
|
-
}
|
|
661
|
-
else if (error instanceof Error) {
|
|
662
|
-
throw new Error(`Error deleting project: ${error.message}`);
|
|
439
|
+
if (response.data && response.data.message === 'Project deleted successfully') {
|
|
440
|
+
logger_instance_js_1.default.info(`Successfully deleted project: ${projectName}`);
|
|
441
|
+
return true;
|
|
663
442
|
}
|
|
664
443
|
else {
|
|
665
|
-
|
|
444
|
+
logger_instance_js_1.default.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
445
|
+
return false;
|
|
666
446
|
}
|
|
667
447
|
}
|
|
448
|
+
catch (error) {
|
|
449
|
+
logger_instance_js_1.default.error(`Error deleting project: ${error}`);
|
|
450
|
+
this.handleApiError(error, 'deleteProject');
|
|
451
|
+
return false;
|
|
452
|
+
}
|
|
668
453
|
});
|
|
669
454
|
}
|
|
670
455
|
/**
|
|
@@ -672,35 +457,36 @@ class JudgmentClient {
|
|
|
672
457
|
*/
|
|
673
458
|
validateApiKey() {
|
|
674
459
|
return __awaiter(this, void 0, void 0, function* () {
|
|
675
|
-
var _a, _b;
|
|
460
|
+
var _a, _b, _c, _d;
|
|
461
|
+
logger_instance_js_1.default.debug('Validating API Key...');
|
|
676
462
|
try {
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
// Removed 'X-Organization-Id' header to match Python for this specific endpoint
|
|
684
|
-
}
|
|
685
|
-
});
|
|
686
|
-
if (response.status === 200) {
|
|
687
|
-
return [true, JSON.stringify(response.data)];
|
|
688
|
-
}
|
|
689
|
-
else {
|
|
690
|
-
// Status might be non-200 but still valid JSON error response
|
|
691
|
-
return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
|
|
692
|
-
}
|
|
463
|
+
// Instantiate EvalDatasetClient to perform the validation call
|
|
464
|
+
const datasetClient = new eval_dataset_client_js_1.EvalDatasetClient(this.judgmentApiKey, this.organizationId);
|
|
465
|
+
// Use the dataset client to make the call
|
|
466
|
+
yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
|
|
467
|
+
logger_instance_js_1.default.debug('API Key appears valid.');
|
|
468
|
+
return [true, 'API Key is valid.'];
|
|
693
469
|
}
|
|
694
470
|
catch (error) {
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
471
|
+
let message = 'API Key validation failed.';
|
|
472
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
473
|
+
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
|
|
474
|
+
message = 'API Key is invalid or expired.';
|
|
475
|
+
}
|
|
476
|
+
else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
|
|
477
|
+
// If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
|
|
478
|
+
// This depends on the specific validation endpoint behavior
|
|
479
|
+
message = 'API Key might be valid, but validation endpoint returned 404.';
|
|
480
|
+
}
|
|
481
|
+
else {
|
|
482
|
+
message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
|
|
483
|
+
}
|
|
700
484
|
}
|
|
701
485
|
else {
|
|
702
|
-
|
|
486
|
+
message = `API Key validation failed: ${String(error)}`;
|
|
703
487
|
}
|
|
488
|
+
logger_instance_js_1.default.error(message);
|
|
489
|
+
return [false, message];
|
|
704
490
|
}
|
|
705
491
|
});
|
|
706
492
|
}
|
|
@@ -725,17 +511,12 @@ class JudgmentClient {
|
|
|
725
511
|
*/
|
|
726
512
|
pullEvalResults(projectName, evalRunName) {
|
|
727
513
|
return __awaiter(this, void 0, void 0, function* () {
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
const evalRunArray = yield this.pullEval(projectName, evalRunName);
|
|
731
|
-
// pullEval returns [{ id: ..., results: [...] }], extract results
|
|
732
|
-
return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
|
|
733
|
-
}
|
|
734
|
-
catch (error) {
|
|
735
|
-
// Log error but return empty array to allow waitForEvaluation to potentially retry
|
|
736
|
-
logger_instance_js_1.default.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
|
|
514
|
+
const rawResults = yield this.pullEval(projectName, evalRunName);
|
|
515
|
+
if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
|
|
737
516
|
return [];
|
|
738
517
|
}
|
|
518
|
+
// Assuming pullEval correctly returns results in the expected format
|
|
519
|
+
return rawResults[0].results;
|
|
739
520
|
});
|
|
740
521
|
}
|
|
741
522
|
/**
|
|
@@ -747,88 +528,72 @@ class JudgmentClient {
|
|
|
747
528
|
*/
|
|
748
529
|
checkEvalStatus(projectName, evalRunName) {
|
|
749
530
|
return __awaiter(this, void 0, void 0, function* () {
|
|
750
|
-
var _a
|
|
751
|
-
// Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
|
|
531
|
+
var _a;
|
|
752
532
|
const requestBody = {
|
|
753
533
|
project_name: projectName,
|
|
754
|
-
eval_name: evalRunName,
|
|
534
|
+
eval_name: evalRunName,
|
|
755
535
|
judgment_api_key: this.judgmentApiKey,
|
|
756
536
|
};
|
|
757
537
|
try {
|
|
758
|
-
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL,
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
763
|
-
'X-Organization-Id': this.organizationId
|
|
764
|
-
},
|
|
765
|
-
timeout: 15000 // Slightly increased timeout for status checks
|
|
538
|
+
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
|
|
539
|
+
headers: this.getAuthHeaders(),
|
|
540
|
+
// Add a shorter timeout for status checks?
|
|
541
|
+
// timeout: 5000
|
|
766
542
|
});
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
if (
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
543
|
+
const data = response.data;
|
|
544
|
+
// Check if the response looks like a status object
|
|
545
|
+
if (data && typeof data.status === 'string') {
|
|
546
|
+
return {
|
|
547
|
+
status: data.status || 'unknown',
|
|
548
|
+
progress: typeof data.progress === 'number' ? data.progress : 0,
|
|
549
|
+
message: data.message || '',
|
|
550
|
+
error: data.error
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
// Check if the response looks like completed results (array format from pullEval)
|
|
554
|
+
else if (Array.isArray(data) && data.length > 0 && data[0].results) {
|
|
555
|
+
return {
|
|
556
|
+
status: 'completed',
|
|
557
|
+
progress: 100,
|
|
558
|
+
message: 'Evaluation completed.'
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
// Check if response looks like completed results (single object format)
|
|
562
|
+
else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
|
|
563
|
+
return {
|
|
564
|
+
status: 'completed',
|
|
565
|
+
progress: 100,
|
|
566
|
+
message: 'Evaluation completed.'
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
// Handle other potential responses or assume pending/unknown
|
|
788
570
|
else {
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
const parsedProgress = parseFloat(statusData.progress);
|
|
796
|
-
if (!isNaN(parsedProgress)) {
|
|
797
|
-
progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
|
|
798
|
-
}
|
|
571
|
+
logger_instance_js_1.default.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
|
|
572
|
+
return {
|
|
573
|
+
status: 'unknown',
|
|
574
|
+
progress: 0,
|
|
575
|
+
message: 'Could not determine status from API response.'
|
|
576
|
+
};
|
|
799
577
|
}
|
|
800
|
-
const normalizedStatus = {
|
|
801
|
-
status: statusData.status || 'unknown',
|
|
802
|
-
progress: progress,
|
|
803
|
-
message: statusData.message || '',
|
|
804
|
-
error: statusData.error // Include error field if present
|
|
805
|
-
};
|
|
806
|
-
// Only log status if it's not being called from waitForEvaluation
|
|
807
|
-
// Check stack trace for caller function name
|
|
808
|
-
const stack = new Error().stack;
|
|
809
|
-
const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
|
|
810
|
-
if (!isCalledByWaitForEvaluation) {
|
|
811
|
-
// Use logger for status updates when called directly
|
|
812
|
-
logger_instance_js_1.default.info(`Evaluation Status: ${normalizedStatus.status}`);
|
|
813
|
-
logger_instance_js_1.default.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
|
|
814
|
-
if (normalizedStatus.message) {
|
|
815
|
-
logger_instance_js_1.default.info(`Message: ${normalizedStatus.message}`);
|
|
816
|
-
}
|
|
817
|
-
if (normalizedStatus.error) {
|
|
818
|
-
logger_instance_js_1.default.error(`Error in status: ${normalizedStatus.error}`);
|
|
819
|
-
}
|
|
820
|
-
}
|
|
821
|
-
return normalizedStatus;
|
|
822
578
|
}
|
|
823
579
|
catch (error) {
|
|
824
|
-
// Don't throw
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
580
|
+
// Don't throw here, return status indicating error
|
|
581
|
+
let errorMessage = 'Failed to fetch evaluation status.';
|
|
582
|
+
let status = 'error';
|
|
583
|
+
if (axios_1.default.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
|
|
584
|
+
status = 'not_found';
|
|
585
|
+
errorMessage = 'Evaluation run not found.';
|
|
586
|
+
logger_instance_js_1.default.warn(`Evaluation run ${evalRunName} not found.`);
|
|
587
|
+
}
|
|
588
|
+
else {
|
|
589
|
+
this.handleApiError(error, 'checkEvalStatus');
|
|
590
|
+
errorMessage = `Error fetching status: ${String(error)}`;
|
|
591
|
+
}
|
|
828
592
|
return {
|
|
829
|
-
status:
|
|
593
|
+
status: status,
|
|
830
594
|
progress: 0,
|
|
831
|
-
message:
|
|
595
|
+
message: errorMessage,
|
|
596
|
+
error: String(error) // Include error string
|
|
832
597
|
};
|
|
833
598
|
}
|
|
834
599
|
});
|
|
@@ -842,105 +607,79 @@ class JudgmentClient {
|
|
|
842
607
|
*/
|
|
843
608
|
waitForEvaluation(projectName_1, evalRunName_1) {
|
|
844
609
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
|
|
845
|
-
const { intervalMs =
|
|
846
|
-
|
|
847
|
-
showProgress = true } = options;
|
|
848
|
-
let attempts = 0;
|
|
849
|
-
let lastProgressPercent = -1;
|
|
850
|
-
let lastStatus = '';
|
|
610
|
+
const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
|
|
611
|
+
let progressBar;
|
|
851
612
|
if (showProgress) {
|
|
852
|
-
|
|
853
|
-
|
|
613
|
+
progressBar = new cli_progress_1.default.SingleBar({
|
|
614
|
+
format: `Waiting for ${ansi_colors_1.default.magenta(evalRunName)}... | ${ansi_colors_1.default.cyan('{bar}')} | {percentage}% || {status}`,
|
|
615
|
+
barCompleteChar: '\u2588',
|
|
616
|
+
barIncompleteChar: '\u2591',
|
|
617
|
+
hideCursor: true,
|
|
618
|
+
clearOnComplete: false,
|
|
619
|
+
stopOnComplete: true,
|
|
620
|
+
}, cli_progress_1.default.Presets.shades_classic);
|
|
621
|
+
progressBar.start(100, 0, { status: 'Initiating...' });
|
|
854
622
|
}
|
|
855
|
-
|
|
856
|
-
attempts++;
|
|
623
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
857
624
|
try {
|
|
858
|
-
const
|
|
859
|
-
const
|
|
860
|
-
|
|
861
|
-
if (
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
}
|
|
868
|
-
// Check evaluation status
|
|
869
|
-
if (status.status === 'complete') {
|
|
870
|
-
if (showProgress) {
|
|
871
|
-
process.stdout.write('\n'); // Keep direct console output for progress bar newline
|
|
872
|
-
// Use logger for status update
|
|
873
|
-
logger_instance_js_1.default.info('Evaluation complete! Fetching results...');
|
|
874
|
-
}
|
|
875
|
-
try {
|
|
876
|
-
// Use the dedicated results fetching method
|
|
877
|
-
const results = yield this.pullEvalResults(projectName, evalRunName);
|
|
878
|
-
if (results.length > 0) {
|
|
879
|
-
// Use logger for status update
|
|
880
|
-
logger_instance_js_1.default.info(`Successfully fetched ${results.length} results.`);
|
|
881
|
-
return results;
|
|
882
|
-
}
|
|
883
|
-
else {
|
|
884
|
-
// If complete status but no results, might be an issue. Log and return empty.
|
|
885
|
-
logger_instance_js_1.default.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
|
|
886
|
-
return [];
|
|
887
|
-
}
|
|
625
|
+
const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
|
|
626
|
+
const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
|
|
627
|
+
const statusText = statusResult.message || statusResult.status;
|
|
628
|
+
if (progressBar) {
|
|
629
|
+
progressBar.update(progress, { status: statusText });
|
|
630
|
+
}
|
|
631
|
+
if (statusResult.status === 'completed') {
|
|
632
|
+
if (progressBar) {
|
|
633
|
+
progressBar.update(100, { status: ansi_colors_1.default.green('Completed! Fetching results...') });
|
|
888
634
|
}
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
else {
|
|
914
|
-
// Still processing (e.g., 'processing', 'running', 'pending')
|
|
915
|
-
lastStatus = status.status;
|
|
916
|
-
}
|
|
635
|
+
// Fetch final results using pullEval
|
|
636
|
+
const finalResults = yield this.pullEvalResults(projectName, evalRunName);
|
|
637
|
+
logger_instance_js_1.default.info(`Evaluation run ${evalRunName} completed successfully.`);
|
|
638
|
+
return finalResults;
|
|
639
|
+
}
|
|
640
|
+
else if (statusResult.status === 'error' || statusResult.status === 'failed') {
|
|
641
|
+
// Concatenate error details into a single message string
|
|
642
|
+
const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
|
|
643
|
+
logger_instance_js_1.default.error(errorMsg);
|
|
644
|
+
if (progressBar)
|
|
645
|
+
progressBar.stop();
|
|
646
|
+
// Pass only the combined message to the constructor
|
|
647
|
+
throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
|
|
648
|
+
}
|
|
649
|
+
else if (statusResult.status === 'not_found') {
|
|
650
|
+
const errorMsg = `Evaluation run ${evalRunName} not found.`;
|
|
651
|
+
logger_instance_js_1.default.error(errorMsg);
|
|
652
|
+
if (progressBar)
|
|
653
|
+
progressBar.stop();
|
|
654
|
+
// Pass only the message to the constructor
|
|
655
|
+
throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
|
|
656
|
+
}
|
|
657
|
+
// Wait for the next interval
|
|
658
|
+
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
917
659
|
}
|
|
918
660
|
catch (error) {
|
|
919
|
-
//
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
661
|
+
// Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
|
|
662
|
+
logger_instance_js_1.default.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
|
|
663
|
+
// Option: Rethrow immediately vs. retry vs. specific handling
|
|
664
|
+
if (error instanceof run_evaluation_js_1.JudgmentAPIError) { // If it was already a processed API error, rethrow
|
|
665
|
+
if (progressBar)
|
|
666
|
+
progressBar.stop();
|
|
667
|
+
throw error;
|
|
668
|
+
}
|
|
669
|
+
// For other errors, wait and retry (up to maxAttempts)
|
|
670
|
+
if (attempt === maxAttempts) {
|
|
671
|
+
if (progressBar)
|
|
672
|
+
progressBar.stop();
|
|
673
|
+
throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
|
|
674
|
+
}
|
|
675
|
+
// Still retryable, wait for interval
|
|
928
676
|
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
929
677
|
}
|
|
930
|
-
else {
|
|
931
|
-
// Break loop if already completed or failed to avoid unnecessary delay
|
|
932
|
-
break;
|
|
933
|
-
}
|
|
934
|
-
} // End while loop
|
|
935
|
-
// If loop finished without completing/failing
|
|
936
|
-
if (lastStatus !== 'complete' && lastStatus !== 'failed') {
|
|
937
|
-
if (showProgress)
|
|
938
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
939
|
-
logger_instance_js_1.default.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
|
|
940
|
-
return []; // Return empty array on timeout
|
|
941
678
|
}
|
|
942
|
-
//
|
|
943
|
-
|
|
679
|
+
// If loop finishes without completion or error
|
|
680
|
+
if (progressBar)
|
|
681
|
+
progressBar.stop();
|
|
682
|
+
throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
|
|
944
683
|
});
|
|
945
684
|
}
|
|
946
685
|
/**
|
|
@@ -949,12 +688,48 @@ class JudgmentClient {
|
|
|
949
688
|
* @returns A string representing the progress bar
|
|
950
689
|
*/
|
|
951
690
|
_createProgressBar(percent) {
|
|
952
|
-
const width =
|
|
953
|
-
|
|
954
|
-
const
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
691
|
+
const width = 20; // Width of the progress bar
|
|
692
|
+
const filled = Math.round(width * (percent / 100));
|
|
693
|
+
const empty = width - filled;
|
|
694
|
+
return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
|
|
695
|
+
}
|
|
696
|
+
// Keep helper methods private
|
|
697
|
+
getAuthHeaders() {
|
|
698
|
+
return {
|
|
699
|
+
'Content-Type': 'application/json',
|
|
700
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
701
|
+
'X-Organization-Id': this.organizationId,
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
// Ensure this handles errors from Eval/Project API calls correctly
|
|
705
|
+
handleApiError(error, context) {
|
|
706
|
+
logger_instance_js_1.default.error(`API Error during ${context}:`);
|
|
707
|
+
if (axios_1.default.isAxiosError(error)) {
|
|
708
|
+
const axiosError = error;
|
|
709
|
+
const response = axiosError.response;
|
|
710
|
+
if (response) {
|
|
711
|
+
logger_instance_js_1.default.error(`Status: ${response.status} ${response.statusText}`);
|
|
712
|
+
logger_instance_js_1.default.debug('Response Data:', response.data);
|
|
713
|
+
if (response.status === 422) {
|
|
714
|
+
logger_instance_js_1.default.error('Validation Error Detail:', response.data);
|
|
715
|
+
}
|
|
716
|
+
else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
|
|
717
|
+
logger_instance_js_1.default.error(`Evaluation run not found.`);
|
|
718
|
+
}
|
|
719
|
+
else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
|
|
720
|
+
logger_instance_js_1.default.warn(`${context}: Resource not found, may have already been deleted.`);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
else if (axiosError.request) {
|
|
724
|
+
logger_instance_js_1.default.error('No response received from server.');
|
|
725
|
+
}
|
|
726
|
+
else {
|
|
727
|
+
logger_instance_js_1.default.error(`Error setting up API request for ${context}`);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
else {
|
|
731
|
+
logger_instance_js_1.default.error(`Unexpected error during ${context}`);
|
|
732
|
+
}
|
|
958
733
|
}
|
|
959
734
|
}
|
|
960
735
|
exports.JudgmentClient = JudgmentClient;
|