judgeval 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/common/logger-instance.js +19 -17
- package/dist/cjs/common/logger-instance.js.map +1 -1
- package/dist/cjs/common/tracer.js +58 -50
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +6 -4
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
- package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/cjs/data/datasets/eval-dataset.js +405 -0
- package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
- package/dist/cjs/data/example.js +22 -1
- package/dist/cjs/data/example.js.map +1 -1
- package/dist/cjs/e2etests/eval-operations.test.js +282 -0
- package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
- package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
- package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/cjs/judgment-client.js +309 -534
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/esm/common/logger-instance.js +19 -17
- package/dist/esm/common/logger-instance.js.map +1 -1
- package/dist/esm/common/tracer.js +58 -50
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +5 -3
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
- package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/esm/data/datasets/eval-dataset.js +375 -0
- package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
- package/dist/esm/data/example.js +22 -1
- package/dist/esm/data/example.js.map +1 -1
- package/dist/esm/e2etests/eval-operations.test.js +254 -0
- package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
- package/dist/esm/e2etests/judgee-traces.test.js +253 -0
- package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/esm/judgment-client.js +311 -536
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/types/common/tracer.d.ts +0 -1
- package/dist/types/constants.d.ts +2 -3
- package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
- package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
- package/dist/types/data/example.d.ts +24 -12
- package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
- package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
- package/dist/types/judgment-client.d.ts +3 -25
- package/package.json +3 -9
|
@@ -14,9 +14,13 @@ import { ScoringResult } from './data/result.js';
|
|
|
14
14
|
import { JudgevalScorer, ScorerWrapper } from './scorers/base-scorer.js';
|
|
15
15
|
import { EvaluationRun } from './evaluation-run.js';
|
|
16
16
|
import { Rule, Condition } from './rules.js';
|
|
17
|
-
import { runEval, assertTest } from './run-evaluation.js';
|
|
18
|
-
import {
|
|
17
|
+
import { runEval, assertTest, JudgmentAPIError } from './run-evaluation.js';
|
|
18
|
+
import { JUDGMENT_EVAL_FETCH_API_URL, JUDGMENT_EVAL_DELETE_API_URL, JUDGMENT_EVAL_DELETE_PROJECT_API_URL, JUDGMENT_PROJECT_DELETE_API_URL, JUDGMENT_PROJECT_CREATE_API_URL, } from './constants.js';
|
|
19
19
|
import logger from './common/logger-instance.js';
|
|
20
|
+
// Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
|
|
21
|
+
import cliProgress from 'cli-progress';
|
|
22
|
+
import colors from 'ansi-colors';
|
|
23
|
+
import { EvalDatasetClient } from './data/datasets/eval-dataset-client.js';
|
|
20
24
|
// Load environment variables
|
|
21
25
|
dotenv.config();
|
|
22
26
|
/**
|
|
@@ -170,143 +174,10 @@ export class JudgmentClient {
|
|
|
170
174
|
* Evaluate a dataset
|
|
171
175
|
*/
|
|
172
176
|
evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
173
|
-
return __awaiter(this, arguments, void 0, function* (dataset, //
|
|
174
|
-
scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
const loadedScorers = [];
|
|
178
|
-
for (const scorer of scorers) {
|
|
179
|
-
try {
|
|
180
|
-
if (scorer instanceof ScorerWrapper) {
|
|
181
|
-
loadedScorers.push(scorer.loadImplementation(useJudgment));
|
|
182
|
-
}
|
|
183
|
-
else {
|
|
184
|
-
// Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
|
|
185
|
-
loadedScorers.push(scorer);
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
catch (error) {
|
|
189
|
-
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
193
|
-
if (rules && loadedScorers.some(scorer => scorer instanceof JudgevalScorer)) {
|
|
194
|
-
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
|
|
195
|
-
}
|
|
196
|
-
// Convert ScorerWrapper in rules to their implementations
|
|
197
|
-
let loadedRules;
|
|
198
|
-
if (rules) {
|
|
199
|
-
loadedRules = [];
|
|
200
|
-
for (const rule of rules) {
|
|
201
|
-
try {
|
|
202
|
-
const processedConditions = [];
|
|
203
|
-
for (const condition of rule.conditions) {
|
|
204
|
-
// Convert metric if it's a ScorerWrapper
|
|
205
|
-
if (condition.metric instanceof ScorerWrapper) {
|
|
206
|
-
try {
|
|
207
|
-
const loadedMetric = condition.metric.loadImplementation(useJudgment);
|
|
208
|
-
const newCondition = new Condition(loadedMetric);
|
|
209
|
-
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
|
|
210
|
-
processedConditions.push(newCondition);
|
|
211
|
-
}
|
|
212
|
-
catch (error) {
|
|
213
|
-
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
else {
|
|
217
|
-
processedConditions.push(condition);
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
// Create new rule with processed conditions
|
|
221
|
-
const newRule = new Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
|
|
222
|
-
loadedRules.push(newRule);
|
|
223
|
-
}
|
|
224
|
-
catch (error) {
|
|
225
|
-
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
const evaluationRun = new EvaluationRun({
|
|
230
|
-
logResults,
|
|
231
|
-
projectName,
|
|
232
|
-
evalName: evalRunName,
|
|
233
|
-
examples: dataset.examples, // Assuming dataset has an 'examples' property
|
|
234
|
-
scorers: loadedScorers,
|
|
235
|
-
model,
|
|
236
|
-
aggregator,
|
|
237
|
-
metadata,
|
|
238
|
-
judgmentApiKey: this.judgmentApiKey,
|
|
239
|
-
rules: loadedRules,
|
|
240
|
-
organizationId: this.organizationId
|
|
241
|
-
});
|
|
242
|
-
// Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
|
|
243
|
-
return runEval(evaluationRun, false, true, false);
|
|
244
|
-
}
|
|
245
|
-
catch (error) {
|
|
246
|
-
if (error instanceof Error) {
|
|
247
|
-
if (error.message.includes('one or more fields are invalid')) {
|
|
248
|
-
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
|
|
249
|
-
}
|
|
250
|
-
else {
|
|
251
|
-
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
else {
|
|
255
|
-
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
});
|
|
259
|
-
}
|
|
260
|
-
/**
|
|
261
|
-
* Create a dataset
|
|
262
|
-
*/
|
|
263
|
-
createDataset() {
|
|
264
|
-
// This would be implemented with EvalDataset
|
|
265
|
-
throw new Error('Not implemented yet');
|
|
266
|
-
}
|
|
267
|
-
/**
|
|
268
|
-
* Push a dataset to the Judgment platform
|
|
269
|
-
*/
|
|
270
|
-
pushDataset(alias_1, dataset_1, projectName_1) {
|
|
271
|
-
return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
|
|
272
|
-
// This would be implemented with EvalDataset
|
|
273
|
-
throw new Error('Not implemented yet');
|
|
274
|
-
});
|
|
275
|
-
}
|
|
276
|
-
/**
|
|
277
|
-
* Pull a dataset from the Judgment platform
|
|
278
|
-
*/
|
|
279
|
-
pullDataset(alias, projectName) {
|
|
280
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
281
|
-
// This would be implemented with EvalDataset
|
|
282
|
-
throw new Error('Not implemented yet');
|
|
283
|
-
});
|
|
284
|
-
}
|
|
285
|
-
/**
|
|
286
|
-
* Delete a dataset from the Judgment platform
|
|
287
|
-
*/
|
|
288
|
-
deleteDataset(alias, projectName) {
|
|
289
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
290
|
-
// This would be implemented with EvalDataset
|
|
291
|
-
throw new Error('Not implemented yet');
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
/**
|
|
295
|
-
* Pull project dataset stats from the Judgment platform
|
|
296
|
-
*/
|
|
297
|
-
pullProjectDatasetStats(projectName) {
|
|
298
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
299
|
-
// This would be implemented with EvalDataset
|
|
300
|
-
throw new Error('Not implemented yet');
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
/**
|
|
304
|
-
* Insert examples into a dataset on the Judgment platform
|
|
305
|
-
*/
|
|
306
|
-
insertDataset(alias, examples, projectName) {
|
|
307
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
308
|
-
// This would be implemented with EvalDataset
|
|
309
|
-
throw new Error('Not implemented yet');
|
|
177
|
+
return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
|
|
178
|
+
scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
|
|
179
|
+
// Keep type loose for stub
|
|
180
|
+
throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
|
|
310
181
|
});
|
|
311
182
|
}
|
|
312
183
|
/**
|
|
@@ -324,11 +195,7 @@ export class JudgmentClient {
|
|
|
324
195
|
};
|
|
325
196
|
try {
|
|
326
197
|
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
|
|
327
|
-
headers:
|
|
328
|
-
'Content-Type': 'application/json',
|
|
329
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
330
|
-
'X-Organization-Id': this.organizationId
|
|
331
|
-
}
|
|
198
|
+
headers: this.getAuthHeaders()
|
|
332
199
|
});
|
|
333
200
|
if (!Array.isArray(response.data) || response.data.length === 0) {
|
|
334
201
|
return [{ id: '', results: [] }];
|
|
@@ -351,7 +218,8 @@ export class JudgmentClient {
|
|
|
351
218
|
expectedTools: dataObject.expected_tools,
|
|
352
219
|
exampleId: dataObject.example_id,
|
|
353
220
|
exampleIndex: dataObject.example_index,
|
|
354
|
-
timestamp: dataObject.timestamp
|
|
221
|
+
timestamp: dataObject.timestamp,
|
|
222
|
+
example: dataObject.example // Include example boolean
|
|
355
223
|
});
|
|
356
224
|
evalRunResult[0].id = resultId;
|
|
357
225
|
evalRunResult[0].results = [new ScoringResult({
|
|
@@ -363,10 +231,8 @@ export class JudgmentClient {
|
|
|
363
231
|
return evalRunResult;
|
|
364
232
|
}
|
|
365
233
|
catch (error) {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
}
|
|
369
|
-
throw new Error(`Error fetching eval results: ${String(error)}`);
|
|
234
|
+
this.handleApiError(error, 'pullEval');
|
|
235
|
+
throw error;
|
|
370
236
|
}
|
|
371
237
|
});
|
|
372
238
|
}
|
|
@@ -379,92 +245,67 @@ export class JudgmentClient {
|
|
|
379
245
|
*/
|
|
380
246
|
exportEvalResults(projectName_1, evalRunName_1) {
|
|
381
247
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
|
|
248
|
+
logger.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
|
|
382
249
|
try {
|
|
383
|
-
const
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
return
|
|
250
|
+
const resultsData = yield this.pullEval(projectName, evalRunName);
|
|
251
|
+
if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
|
|
252
|
+
logger.warn('No results found to export.');
|
|
253
|
+
return '';
|
|
387
254
|
}
|
|
255
|
+
const results = resultsData[0].results;
|
|
388
256
|
if (format === 'json') {
|
|
389
|
-
//
|
|
390
|
-
return JSON.stringify(
|
|
257
|
+
// Pretty print JSON
|
|
258
|
+
return JSON.stringify(results.map(r => r.toJSON()), null, 2);
|
|
391
259
|
}
|
|
392
260
|
else if (format === 'csv') {
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
const
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
}
|
|
407
|
-
catch (e) {
|
|
408
|
-
// Provide a more helpful error message
|
|
409
|
-
const errorMsg = e instanceof Error ? e.message : String(e);
|
|
410
|
-
// Update error message to reflect import() failure
|
|
411
|
-
console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
|
|
412
|
-
throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
|
|
413
|
-
}
|
|
414
|
-
try {
|
|
415
|
-
// Flatten the structure slightly for better CSV output
|
|
416
|
-
const processedResults = results.map((result) => {
|
|
417
|
-
// Flatten dataObject properties and scorersData
|
|
418
|
-
const flatResult = {};
|
|
419
|
-
flatResult.eval_run_id = evalRunData.id; // Add eval run ID
|
|
420
|
-
// Flatten dataObject
|
|
421
|
-
if (result.dataObject) {
|
|
422
|
-
for (const [key, value] of Object.entries(result.dataObject)) {
|
|
423
|
-
// Prefix with 'data_' to avoid potential clashes
|
|
424
|
-
flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
// Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
|
|
428
|
-
if (Array.isArray(result.scorersData)) {
|
|
429
|
-
result.scorersData.forEach((scorerData, index) => {
|
|
430
|
-
flatResult[`scorer_${index}_name`] = scorerData.name;
|
|
431
|
-
flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
|
|
432
|
-
flatResult[`scorer_${index}_error`] = scorerData.error;
|
|
433
|
-
// Add other scorer fields if necessary, e.g., metadata
|
|
434
|
-
if (scorerData.additional_metadata) {
|
|
435
|
-
flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
|
|
436
|
-
}
|
|
437
|
-
});
|
|
438
|
-
}
|
|
439
|
-
flatResult.error = result.error; // Top-level error for the example processing
|
|
440
|
-
return flatResult;
|
|
441
|
-
});
|
|
442
|
-
// Define headers dynamically based on the keys of the first processed result
|
|
443
|
-
if (processedResults.length === 0) {
|
|
444
|
-
return 'No data to export after processing.'; // Handle case with no valid results after processing
|
|
261
|
+
if (results.length === 0)
|
|
262
|
+
return ''; // No data to export
|
|
263
|
+
// Dynamically determine headers from the first result object
|
|
264
|
+
// Flatten the structure for CSV
|
|
265
|
+
const flatResults = results.map(result => {
|
|
266
|
+
var _a, _b, _c;
|
|
267
|
+
const flat = {};
|
|
268
|
+
const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
|
|
269
|
+
const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
|
|
270
|
+
// Add example data fields (snake_case)
|
|
271
|
+
for (const key in exampleData) {
|
|
272
|
+
// Prefix example fields to avoid collision, e.g., example_input
|
|
273
|
+
flat[`example_${key}`] = exampleData[key];
|
|
445
274
|
}
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
275
|
+
// Add scorers data
|
|
276
|
+
scorersData.forEach(scorer => {
|
|
277
|
+
flat[`scorer_${scorer.name}_score`] = scorer.score;
|
|
278
|
+
flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
|
|
279
|
+
flat[`scorer_${scorer.name}_error`] = scorer.error;
|
|
449
280
|
});
|
|
450
|
-
//
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
281
|
+
// Add top-level error if present
|
|
282
|
+
flat['top_level_error'] = result.error;
|
|
283
|
+
return flat;
|
|
284
|
+
});
|
|
285
|
+
// Get all unique keys from the flattened results for headers
|
|
286
|
+
const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
|
|
287
|
+
// Use papaparse for robust CSV generation
|
|
288
|
+
const Papa = require('papaparse'); // Use require here if not imported at top
|
|
289
|
+
const csv = Papa.unparse({
|
|
290
|
+
fields: headers,
|
|
291
|
+
data: flatResults
|
|
292
|
+
}, {
|
|
293
|
+
header: true,
|
|
294
|
+
quotes: true, // Ensure fields with commas/newlines are quoted
|
|
295
|
+
quoteChar: '"',
|
|
296
|
+
escapeChar: '"',
|
|
297
|
+
delimiter: ','
|
|
298
|
+
});
|
|
299
|
+
return csv;
|
|
458
300
|
}
|
|
459
301
|
else {
|
|
460
302
|
throw new Error(`Unsupported export format: ${format}`);
|
|
461
303
|
}
|
|
462
304
|
}
|
|
463
305
|
catch (error) {
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
throw new Error(`Failed to export evaluation results: ${String(error)}`);
|
|
306
|
+
logger.error(`Error exporting eval results: ${error}`);
|
|
307
|
+
this.handleApiError(error, 'exportEvalResults');
|
|
308
|
+
throw error;
|
|
468
309
|
}
|
|
469
310
|
});
|
|
470
311
|
}
|
|
@@ -473,47 +314,23 @@ export class JudgmentClient {
|
|
|
473
314
|
*/
|
|
474
315
|
deleteEval(projectName, evalRunNames) {
|
|
475
316
|
return __awaiter(this, void 0, void 0, function* () {
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
throw new Error('No evaluation run names provided');
|
|
479
|
-
}
|
|
480
|
-
// Body matches Python's structure for this endpoint
|
|
481
|
-
const evalRunRequestBody = {
|
|
317
|
+
logger.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
|
|
318
|
+
const requestBody = {
|
|
482
319
|
project_name: projectName,
|
|
483
320
|
eval_names: evalRunNames,
|
|
484
|
-
judgment_api_key: this.judgmentApiKey
|
|
321
|
+
judgment_api_key: this.judgmentApiKey,
|
|
485
322
|
};
|
|
486
323
|
try {
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
data: evalRunRequestBody,
|
|
490
|
-
headers: {
|
|
491
|
-
'Content-Type': 'application/json',
|
|
492
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
493
|
-
'X-Organization-Id': this.organizationId
|
|
494
|
-
}
|
|
324
|
+
yield axios.post(JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
|
|
325
|
+
headers: this.getAuthHeaders()
|
|
495
326
|
});
|
|
496
|
-
|
|
327
|
+
logger.info('Successfully deleted eval runs.');
|
|
328
|
+
return true;
|
|
497
329
|
}
|
|
498
330
|
catch (error) {
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
if (status === 404) {
|
|
503
|
-
throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
|
|
504
|
-
}
|
|
505
|
-
else if (status === 500) {
|
|
506
|
-
throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
|
|
507
|
-
}
|
|
508
|
-
else {
|
|
509
|
-
throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
// Rethrow original or wrapped error
|
|
513
|
-
if (error instanceof Error) {
|
|
514
|
-
throw new Error(`Error deleting eval results: ${error.message}`);
|
|
515
|
-
}
|
|
516
|
-
throw new Error(`Error deleting eval results: ${String(error)}`);
|
|
331
|
+
logger.error(`Error deleting eval runs: ${error}`);
|
|
332
|
+
this.handleApiError(error, 'deleteEval');
|
|
333
|
+
return false;
|
|
517
334
|
}
|
|
518
335
|
});
|
|
519
336
|
}
|
|
@@ -522,43 +339,22 @@ export class JudgmentClient {
|
|
|
522
339
|
*/
|
|
523
340
|
deleteProjectEvals(projectName) {
|
|
524
341
|
return __awaiter(this, void 0, void 0, function* () {
|
|
525
|
-
|
|
342
|
+
logger.info(`Deleting ALL eval runs for project: ${projectName}`);
|
|
343
|
+
const requestBody = {
|
|
344
|
+
project_name: projectName,
|
|
345
|
+
judgment_api_key: this.judgmentApiKey,
|
|
346
|
+
};
|
|
526
347
|
try {
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
530
|
-
data: {
|
|
531
|
-
project_name: projectName,
|
|
532
|
-
},
|
|
533
|
-
headers: {
|
|
534
|
-
'Content-Type': 'application/json',
|
|
535
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
536
|
-
'X-Organization-Id': this.organizationId
|
|
537
|
-
}
|
|
348
|
+
yield axios.post(JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
|
|
349
|
+
headers: this.getAuthHeaders()
|
|
538
350
|
});
|
|
539
|
-
|
|
540
|
-
return
|
|
351
|
+
logger.info(`Successfully deleted all eval runs for project ${projectName}.`);
|
|
352
|
+
return true;
|
|
541
353
|
}
|
|
542
354
|
catch (error) {
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
if (status === 404) {
|
|
547
|
-
// Assuming 404 might mean project not found or no evals to delete
|
|
548
|
-
console.warn(`Project '${projectName}' not found or no evals to delete.`);
|
|
549
|
-
return false; // Or true depending on desired idempotency behavior
|
|
550
|
-
}
|
|
551
|
-
else if (status === 500) {
|
|
552
|
-
throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
|
|
553
|
-
}
|
|
554
|
-
else {
|
|
555
|
-
throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
if (error instanceof Error) {
|
|
559
|
-
throw new Error(`Error deleting project evals: ${error.message}`);
|
|
560
|
-
}
|
|
561
|
-
throw new Error(`Error deleting project evals: ${String(error)}`);
|
|
355
|
+
logger.error(`Error deleting project evals: ${error}`);
|
|
356
|
+
this.handleApiError(error, 'deleteProjectEvals');
|
|
357
|
+
return false;
|
|
562
358
|
}
|
|
563
359
|
});
|
|
564
360
|
}
|
|
@@ -567,37 +363,34 @@ export class JudgmentClient {
|
|
|
567
363
|
*/
|
|
568
364
|
createProject(projectName) {
|
|
569
365
|
return __awaiter(this, void 0, void 0, function* () {
|
|
366
|
+
logger.info(`Creating project: ${projectName}`);
|
|
367
|
+
const requestBody = {
|
|
368
|
+
project_name: projectName,
|
|
369
|
+
judgment_api_key: this.judgmentApiKey,
|
|
370
|
+
};
|
|
570
371
|
try {
|
|
571
|
-
const response = yield axios.post(JUDGMENT_PROJECT_CREATE_API_URL,
|
|
572
|
-
|
|
573
|
-
{
|
|
574
|
-
project_name: projectName,
|
|
575
|
-
}, {
|
|
576
|
-
headers: {
|
|
577
|
-
'Content-Type': 'application/json',
|
|
578
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
579
|
-
'X-Organization-Id': this.organizationId
|
|
580
|
-
}
|
|
372
|
+
const response = yield axios.post(JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
|
|
373
|
+
headers: this.getAuthHeaders()
|
|
581
374
|
});
|
|
582
|
-
//
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
if (axios.isAxiosError(error) && error.response) {
|
|
587
|
-
// Check for specific conflict error (e.g., 409) if API provides it
|
|
588
|
-
if (error.response.status === 409) {
|
|
589
|
-
console.warn(`Project '${projectName}' already exists.`);
|
|
590
|
-
return false; // Or true if idempotent creation is desired
|
|
591
|
-
}
|
|
592
|
-
throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
375
|
+
// Check for specific success message or status if API provides one
|
|
376
|
+
if (response.data && response.data.message === 'Project added successfully') {
|
|
377
|
+
logger.info(`Successfully created project: ${projectName}`);
|
|
378
|
+
return true;
|
|
593
379
|
}
|
|
594
|
-
else if (
|
|
595
|
-
|
|
380
|
+
else if (response.data && response.data.message === 'Project already exists') {
|
|
381
|
+
logger.warn(`Project '${projectName}' already exists.`);
|
|
382
|
+
return true; // Or false, depending on desired behavior for existing projects
|
|
596
383
|
}
|
|
597
384
|
else {
|
|
598
|
-
|
|
385
|
+
logger.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
386
|
+
return false;
|
|
599
387
|
}
|
|
600
388
|
}
|
|
389
|
+
catch (error) {
|
|
390
|
+
logger.error(`Error creating project: ${error}`);
|
|
391
|
+
this.handleApiError(error, 'createProject');
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
601
394
|
});
|
|
602
395
|
}
|
|
603
396
|
/**
|
|
@@ -605,37 +398,29 @@ export class JudgmentClient {
|
|
|
605
398
|
*/
|
|
606
399
|
deleteProject(projectName) {
|
|
607
400
|
return __awaiter(this, void 0, void 0, function* () {
|
|
401
|
+
logger.info(`Deleting project: ${projectName}`);
|
|
402
|
+
const requestBody = {
|
|
403
|
+
project_name: projectName,
|
|
404
|
+
judgment_api_key: this.judgmentApiKey,
|
|
405
|
+
};
|
|
608
406
|
try {
|
|
609
|
-
const response = yield axios.
|
|
610
|
-
|
|
611
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
612
|
-
data: {
|
|
613
|
-
project_name: projectName,
|
|
614
|
-
},
|
|
615
|
-
headers: {
|
|
616
|
-
'Content-Type': 'application/json',
|
|
617
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
618
|
-
'X-Organization-Id': this.organizationId
|
|
619
|
-
}
|
|
407
|
+
const response = yield axios.post(JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
|
|
408
|
+
headers: this.getAuthHeaders()
|
|
620
409
|
});
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
catch (error) {
|
|
625
|
-
if (axios.isAxiosError(error) && error.response) {
|
|
626
|
-
if (error.response.status === 404) {
|
|
627
|
-
console.warn(`Project '${projectName}' not found for deletion.`);
|
|
628
|
-
return false; // Or true depending on desired idempotency
|
|
629
|
-
}
|
|
630
|
-
throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
631
|
-
}
|
|
632
|
-
else if (error instanceof Error) {
|
|
633
|
-
throw new Error(`Error deleting project: ${error.message}`);
|
|
410
|
+
if (response.data && response.data.message === 'Project deleted successfully') {
|
|
411
|
+
logger.info(`Successfully deleted project: ${projectName}`);
|
|
412
|
+
return true;
|
|
634
413
|
}
|
|
635
414
|
else {
|
|
636
|
-
|
|
415
|
+
logger.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
416
|
+
return false;
|
|
637
417
|
}
|
|
638
418
|
}
|
|
419
|
+
catch (error) {
|
|
420
|
+
logger.error(`Error deleting project: ${error}`);
|
|
421
|
+
this.handleApiError(error, 'deleteProject');
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
639
424
|
});
|
|
640
425
|
}
|
|
641
426
|
/**
|
|
@@ -643,35 +428,36 @@ export class JudgmentClient {
|
|
|
643
428
|
*/
|
|
644
429
|
validateApiKey() {
|
|
645
430
|
return __awaiter(this, void 0, void 0, function* () {
|
|
646
|
-
var _a, _b;
|
|
431
|
+
var _a, _b, _c, _d;
|
|
432
|
+
logger.debug('Validating API Key...');
|
|
647
433
|
try {
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
// Removed 'X-Organization-Id' header to match Python for this specific endpoint
|
|
655
|
-
}
|
|
656
|
-
});
|
|
657
|
-
if (response.status === 200) {
|
|
658
|
-
return [true, JSON.stringify(response.data)];
|
|
659
|
-
}
|
|
660
|
-
else {
|
|
661
|
-
// Status might be non-200 but still valid JSON error response
|
|
662
|
-
return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
|
|
663
|
-
}
|
|
434
|
+
// Instantiate EvalDatasetClient to perform the validation call
|
|
435
|
+
const datasetClient = new EvalDatasetClient(this.judgmentApiKey, this.organizationId);
|
|
436
|
+
// Use the dataset client to make the call
|
|
437
|
+
yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
|
|
438
|
+
logger.debug('API Key appears valid.');
|
|
439
|
+
return [true, 'API Key is valid.'];
|
|
664
440
|
}
|
|
665
441
|
catch (error) {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
442
|
+
let message = 'API Key validation failed.';
|
|
443
|
+
if (axios.isAxiosError(error)) {
|
|
444
|
+
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
|
|
445
|
+
message = 'API Key is invalid or expired.';
|
|
446
|
+
}
|
|
447
|
+
else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
|
|
448
|
+
// If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
|
|
449
|
+
// This depends on the specific validation endpoint behavior
|
|
450
|
+
message = 'API Key might be valid, but validation endpoint returned 404.';
|
|
451
|
+
}
|
|
452
|
+
else {
|
|
453
|
+
message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
|
|
454
|
+
}
|
|
671
455
|
}
|
|
672
456
|
else {
|
|
673
|
-
|
|
457
|
+
message = `API Key validation failed: ${String(error)}`;
|
|
674
458
|
}
|
|
459
|
+
logger.error(message);
|
|
460
|
+
return [false, message];
|
|
675
461
|
}
|
|
676
462
|
});
|
|
677
463
|
}
|
|
@@ -696,17 +482,12 @@ export class JudgmentClient {
|
|
|
696
482
|
*/
|
|
697
483
|
pullEvalResults(projectName, evalRunName) {
|
|
698
484
|
return __awaiter(this, void 0, void 0, function* () {
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
const evalRunArray = yield this.pullEval(projectName, evalRunName);
|
|
702
|
-
// pullEval returns [{ id: ..., results: [...] }], extract results
|
|
703
|
-
return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
|
|
704
|
-
}
|
|
705
|
-
catch (error) {
|
|
706
|
-
// Log error but return empty array to allow waitForEvaluation to potentially retry
|
|
707
|
-
logger.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
|
|
485
|
+
const rawResults = yield this.pullEval(projectName, evalRunName);
|
|
486
|
+
if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
|
|
708
487
|
return [];
|
|
709
488
|
}
|
|
489
|
+
// Assuming pullEval correctly returns results in the expected format
|
|
490
|
+
return rawResults[0].results;
|
|
710
491
|
});
|
|
711
492
|
}
|
|
712
493
|
/**
|
|
@@ -718,88 +499,72 @@ export class JudgmentClient {
|
|
|
718
499
|
*/
|
|
719
500
|
checkEvalStatus(projectName, evalRunName) {
|
|
720
501
|
return __awaiter(this, void 0, void 0, function* () {
|
|
721
|
-
var _a
|
|
722
|
-
// Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
|
|
502
|
+
var _a;
|
|
723
503
|
const requestBody = {
|
|
724
504
|
project_name: projectName,
|
|
725
|
-
eval_name: evalRunName,
|
|
505
|
+
eval_name: evalRunName,
|
|
726
506
|
judgment_api_key: this.judgmentApiKey,
|
|
727
507
|
};
|
|
728
508
|
try {
|
|
729
|
-
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL,
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
734
|
-
'X-Organization-Id': this.organizationId
|
|
735
|
-
},
|
|
736
|
-
timeout: 15000 // Slightly increased timeout for status checks
|
|
509
|
+
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
|
|
510
|
+
headers: this.getAuthHeaders(),
|
|
511
|
+
// Add a shorter timeout for status checks?
|
|
512
|
+
// timeout: 5000
|
|
737
513
|
});
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
if (
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
514
|
+
const data = response.data;
|
|
515
|
+
// Check if the response looks like a status object
|
|
516
|
+
if (data && typeof data.status === 'string') {
|
|
517
|
+
return {
|
|
518
|
+
status: data.status || 'unknown',
|
|
519
|
+
progress: typeof data.progress === 'number' ? data.progress : 0,
|
|
520
|
+
message: data.message || '',
|
|
521
|
+
error: data.error
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
// Check if the response looks like completed results (array format from pullEval)
|
|
525
|
+
else if (Array.isArray(data) && data.length > 0 && data[0].results) {
|
|
526
|
+
return {
|
|
527
|
+
status: 'completed',
|
|
528
|
+
progress: 100,
|
|
529
|
+
message: 'Evaluation completed.'
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
// Check if response looks like completed results (single object format)
|
|
533
|
+
else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
|
|
534
|
+
return {
|
|
535
|
+
status: 'completed',
|
|
536
|
+
progress: 100,
|
|
537
|
+
message: 'Evaluation completed.'
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
// Handle other potential responses or assume pending/unknown
|
|
759
541
|
else {
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
const parsedProgress = parseFloat(statusData.progress);
|
|
767
|
-
if (!isNaN(parsedProgress)) {
|
|
768
|
-
progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
|
|
769
|
-
}
|
|
542
|
+
logger.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
|
|
543
|
+
return {
|
|
544
|
+
status: 'unknown',
|
|
545
|
+
progress: 0,
|
|
546
|
+
message: 'Could not determine status from API response.'
|
|
547
|
+
};
|
|
770
548
|
}
|
|
771
|
-
const normalizedStatus = {
|
|
772
|
-
status: statusData.status || 'unknown',
|
|
773
|
-
progress: progress,
|
|
774
|
-
message: statusData.message || '',
|
|
775
|
-
error: statusData.error // Include error field if present
|
|
776
|
-
};
|
|
777
|
-
// Only log status if it's not being called from waitForEvaluation
|
|
778
|
-
// Check stack trace for caller function name
|
|
779
|
-
const stack = new Error().stack;
|
|
780
|
-
const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
|
|
781
|
-
if (!isCalledByWaitForEvaluation) {
|
|
782
|
-
// Use logger for status updates when called directly
|
|
783
|
-
logger.info(`Evaluation Status: ${normalizedStatus.status}`);
|
|
784
|
-
logger.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
|
|
785
|
-
if (normalizedStatus.message) {
|
|
786
|
-
logger.info(`Message: ${normalizedStatus.message}`);
|
|
787
|
-
}
|
|
788
|
-
if (normalizedStatus.error) {
|
|
789
|
-
logger.error(`Error in status: ${normalizedStatus.error}`);
|
|
790
|
-
}
|
|
791
|
-
}
|
|
792
|
-
return normalizedStatus;
|
|
793
549
|
}
|
|
794
550
|
catch (error) {
|
|
795
|
-
// Don't throw
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
551
|
+
// Don't throw here, return status indicating error
|
|
552
|
+
let errorMessage = 'Failed to fetch evaluation status.';
|
|
553
|
+
let status = 'error';
|
|
554
|
+
if (axios.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
|
|
555
|
+
status = 'not_found';
|
|
556
|
+
errorMessage = 'Evaluation run not found.';
|
|
557
|
+
logger.warn(`Evaluation run ${evalRunName} not found.`);
|
|
558
|
+
}
|
|
559
|
+
else {
|
|
560
|
+
this.handleApiError(error, 'checkEvalStatus');
|
|
561
|
+
errorMessage = `Error fetching status: ${String(error)}`;
|
|
562
|
+
}
|
|
799
563
|
return {
|
|
800
|
-
status:
|
|
564
|
+
status: status,
|
|
801
565
|
progress: 0,
|
|
802
|
-
message:
|
|
566
|
+
message: errorMessage,
|
|
567
|
+
error: String(error) // Include error string
|
|
803
568
|
};
|
|
804
569
|
}
|
|
805
570
|
});
|
|
@@ -813,105 +578,79 @@ export class JudgmentClient {
|
|
|
813
578
|
*/
|
|
814
579
|
waitForEvaluation(projectName_1, evalRunName_1) {
|
|
815
580
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
|
|
816
|
-
const { intervalMs =
|
|
817
|
-
|
|
818
|
-
showProgress = true } = options;
|
|
819
|
-
let attempts = 0;
|
|
820
|
-
let lastProgressPercent = -1;
|
|
821
|
-
let lastStatus = '';
|
|
581
|
+
const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
|
|
582
|
+
let progressBar;
|
|
822
583
|
if (showProgress) {
|
|
823
|
-
|
|
824
|
-
|
|
584
|
+
progressBar = new cliProgress.SingleBar({
|
|
585
|
+
format: `Waiting for ${colors.magenta(evalRunName)}... | ${colors.cyan('{bar}')} | {percentage}% || {status}`,
|
|
586
|
+
barCompleteChar: '\u2588',
|
|
587
|
+
barIncompleteChar: '\u2591',
|
|
588
|
+
hideCursor: true,
|
|
589
|
+
clearOnComplete: false,
|
|
590
|
+
stopOnComplete: true,
|
|
591
|
+
}, cliProgress.Presets.shades_classic);
|
|
592
|
+
progressBar.start(100, 0, { status: 'Initiating...' });
|
|
825
593
|
}
|
|
826
|
-
|
|
827
|
-
attempts++;
|
|
594
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
828
595
|
try {
|
|
829
|
-
const
|
|
830
|
-
const
|
|
831
|
-
|
|
832
|
-
if (
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
}
|
|
839
|
-
// Check evaluation status
|
|
840
|
-
if (status.status === 'complete') {
|
|
841
|
-
if (showProgress) {
|
|
842
|
-
process.stdout.write('\n'); // Keep direct console output for progress bar newline
|
|
843
|
-
// Use logger for status update
|
|
844
|
-
logger.info('Evaluation complete! Fetching results...');
|
|
845
|
-
}
|
|
846
|
-
try {
|
|
847
|
-
// Use the dedicated results fetching method
|
|
848
|
-
const results = yield this.pullEvalResults(projectName, evalRunName);
|
|
849
|
-
if (results.length > 0) {
|
|
850
|
-
// Use logger for status update
|
|
851
|
-
logger.info(`Successfully fetched ${results.length} results.`);
|
|
852
|
-
return results;
|
|
853
|
-
}
|
|
854
|
-
else {
|
|
855
|
-
// If complete status but no results, might be an issue. Log and return empty.
|
|
856
|
-
logger.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
|
|
857
|
-
return [];
|
|
858
|
-
}
|
|
596
|
+
const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
|
|
597
|
+
const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
|
|
598
|
+
const statusText = statusResult.message || statusResult.status;
|
|
599
|
+
if (progressBar) {
|
|
600
|
+
progressBar.update(progress, { status: statusText });
|
|
601
|
+
}
|
|
602
|
+
if (statusResult.status === 'completed') {
|
|
603
|
+
if (progressBar) {
|
|
604
|
+
progressBar.update(100, { status: colors.green('Completed! Fetching results...') });
|
|
859
605
|
}
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
else {
|
|
885
|
-
// Still processing (e.g., 'processing', 'running', 'pending')
|
|
886
|
-
lastStatus = status.status;
|
|
887
|
-
}
|
|
606
|
+
// Fetch final results using pullEval
|
|
607
|
+
const finalResults = yield this.pullEvalResults(projectName, evalRunName);
|
|
608
|
+
logger.info(`Evaluation run ${evalRunName} completed successfully.`);
|
|
609
|
+
return finalResults;
|
|
610
|
+
}
|
|
611
|
+
else if (statusResult.status === 'error' || statusResult.status === 'failed') {
|
|
612
|
+
// Concatenate error details into a single message string
|
|
613
|
+
const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
|
|
614
|
+
logger.error(errorMsg);
|
|
615
|
+
if (progressBar)
|
|
616
|
+
progressBar.stop();
|
|
617
|
+
// Pass only the combined message to the constructor
|
|
618
|
+
throw new JudgmentAPIError(errorMsg);
|
|
619
|
+
}
|
|
620
|
+
else if (statusResult.status === 'not_found') {
|
|
621
|
+
const errorMsg = `Evaluation run ${evalRunName} not found.`;
|
|
622
|
+
logger.error(errorMsg);
|
|
623
|
+
if (progressBar)
|
|
624
|
+
progressBar.stop();
|
|
625
|
+
// Pass only the message to the constructor
|
|
626
|
+
throw new JudgmentAPIError(errorMsg);
|
|
627
|
+
}
|
|
628
|
+
// Wait for the next interval
|
|
629
|
+
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
888
630
|
}
|
|
889
631
|
catch (error) {
|
|
890
|
-
//
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
632
|
+
// Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
|
|
633
|
+
logger.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
|
|
634
|
+
// Option: Rethrow immediately vs. retry vs. specific handling
|
|
635
|
+
if (error instanceof JudgmentAPIError) { // If it was already a processed API error, rethrow
|
|
636
|
+
if (progressBar)
|
|
637
|
+
progressBar.stop();
|
|
638
|
+
throw error;
|
|
639
|
+
}
|
|
640
|
+
// For other errors, wait and retry (up to maxAttempts)
|
|
641
|
+
if (attempt === maxAttempts) {
|
|
642
|
+
if (progressBar)
|
|
643
|
+
progressBar.stop();
|
|
644
|
+
throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
|
|
645
|
+
}
|
|
646
|
+
// Still retryable, wait for interval
|
|
899
647
|
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
900
648
|
}
|
|
901
|
-
else {
|
|
902
|
-
// Break loop if already completed or failed to avoid unnecessary delay
|
|
903
|
-
break;
|
|
904
|
-
}
|
|
905
|
-
} // End while loop
|
|
906
|
-
// If loop finished without completing/failing
|
|
907
|
-
if (lastStatus !== 'complete' && lastStatus !== 'failed') {
|
|
908
|
-
if (showProgress)
|
|
909
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
910
|
-
logger.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
|
|
911
|
-
return []; // Return empty array on timeout
|
|
912
649
|
}
|
|
913
|
-
//
|
|
914
|
-
|
|
650
|
+
// If loop finishes without completion or error
|
|
651
|
+
if (progressBar)
|
|
652
|
+
progressBar.stop();
|
|
653
|
+
throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
|
|
915
654
|
});
|
|
916
655
|
}
|
|
917
656
|
/**
|
|
@@ -920,12 +659,48 @@ export class JudgmentClient {
|
|
|
920
659
|
* @returns A string representing the progress bar
|
|
921
660
|
*/
|
|
922
661
|
_createProgressBar(percent) {
|
|
923
|
-
const width =
|
|
924
|
-
|
|
925
|
-
const
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
662
|
+
const width = 20; // Width of the progress bar
|
|
663
|
+
const filled = Math.round(width * (percent / 100));
|
|
664
|
+
const empty = width - filled;
|
|
665
|
+
return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
|
|
666
|
+
}
|
|
667
|
+
// Keep helper methods private
|
|
668
|
+
getAuthHeaders() {
|
|
669
|
+
return {
|
|
670
|
+
'Content-Type': 'application/json',
|
|
671
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
672
|
+
'X-Organization-Id': this.organizationId,
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
// Ensure this handles errors from Eval/Project API calls correctly
|
|
676
|
+
handleApiError(error, context) {
|
|
677
|
+
logger.error(`API Error during ${context}:`);
|
|
678
|
+
if (axios.isAxiosError(error)) {
|
|
679
|
+
const axiosError = error;
|
|
680
|
+
const response = axiosError.response;
|
|
681
|
+
if (response) {
|
|
682
|
+
logger.error(`Status: ${response.status} ${response.statusText}`);
|
|
683
|
+
logger.debug('Response Data:', response.data);
|
|
684
|
+
if (response.status === 422) {
|
|
685
|
+
logger.error('Validation Error Detail:', response.data);
|
|
686
|
+
}
|
|
687
|
+
else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
|
|
688
|
+
logger.error(`Evaluation run not found.`);
|
|
689
|
+
}
|
|
690
|
+
else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
|
|
691
|
+
logger.warn(`${context}: Resource not found, may have already been deleted.`);
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
else if (axiosError.request) {
|
|
695
|
+
logger.error('No response received from server.');
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
logger.error(`Error setting up API request for ${context}`);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
else {
|
|
702
|
+
logger.error(`Unexpected error during ${context}`);
|
|
703
|
+
}
|
|
929
704
|
}
|
|
930
705
|
}
|
|
931
706
|
//# sourceMappingURL=judgment-client.js.map
|