judgeval 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/tracer.js +235 -143
  3. package/dist/cjs/common/tracer.js.map +1 -1
  4. package/dist/cjs/constants.js +8 -5
  5. package/dist/cjs/constants.js.map +1 -1
  6. package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
  7. package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
  8. package/dist/cjs/data/datasets/eval-dataset.js +405 -0
  9. package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
  10. package/dist/cjs/data/example.js +22 -1
  11. package/dist/cjs/data/example.js.map +1 -1
  12. package/dist/cjs/e2etests/eval-operations.test.js +282 -0
  13. package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
  14. package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
  15. package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
  16. package/dist/cjs/index.js +1 -3
  17. package/dist/cjs/index.js.map +1 -1
  18. package/dist/cjs/judgment-client.js +326 -645
  19. package/dist/cjs/judgment-client.js.map +1 -1
  20. package/dist/cjs/scorers/api-scorer.js +56 -48
  21. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  22. package/dist/cjs/scorers/base-scorer.js +66 -11
  23. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  24. package/dist/esm/common/tracer.js +236 -144
  25. package/dist/esm/common/tracer.js.map +1 -1
  26. package/dist/esm/constants.js +7 -4
  27. package/dist/esm/constants.js.map +1 -1
  28. package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
  29. package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
  30. package/dist/esm/data/datasets/eval-dataset.js +375 -0
  31. package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
  32. package/dist/esm/data/example.js +22 -1
  33. package/dist/esm/data/example.js.map +1 -1
  34. package/dist/esm/e2etests/eval-operations.test.js +254 -0
  35. package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
  36. package/dist/esm/e2etests/judgee-traces.test.js +253 -0
  37. package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
  38. package/dist/esm/index.js +0 -1
  39. package/dist/esm/index.js.map +1 -1
  40. package/dist/esm/judgment-client.js +328 -647
  41. package/dist/esm/judgment-client.js.map +1 -1
  42. package/dist/esm/scorers/api-scorer.js +56 -48
  43. package/dist/esm/scorers/api-scorer.js.map +1 -1
  44. package/dist/esm/scorers/base-scorer.js +66 -11
  45. package/dist/esm/scorers/base-scorer.js.map +1 -1
  46. package/dist/types/common/tracer.d.ts +27 -14
  47. package/dist/types/constants.d.ts +4 -4
  48. package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
  49. package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
  50. package/dist/types/data/example.d.ts +24 -12
  51. package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
  52. package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
  53. package/dist/types/index.d.ts +0 -1
  54. package/dist/types/judgment-client.d.ts +3 -47
  55. package/dist/types/scorers/api-scorer.d.ts +15 -15
  56. package/dist/types/scorers/base-scorer.d.ts +53 -10
  57. package/package.json +2 -1
  58. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  59. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  60. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  61. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  62. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -46,6 +46,10 @@ const rules_js_1 = require("./rules.js");
46
46
  const run_evaluation_js_1 = require("./run-evaluation.js");
47
47
  const constants_js_1 = require("./constants.js");
48
48
  const logger_instance_js_1 = __importDefault(require("./common/logger-instance.js"));
49
+ // Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
50
+ const cli_progress_1 = __importDefault(require("cli-progress"));
51
+ const ansi_colors_1 = __importDefault(require("ansi-colors"));
52
+ const eval_dataset_client_js_1 = require("./data/datasets/eval-dataset-client.js");
49
53
  // Load environment variables
50
54
  dotenv.config();
51
55
  /**
@@ -199,143 +203,10 @@ class JudgmentClient {
199
203
  * Evaluate a dataset
200
204
  */
201
205
  evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
202
- return __awaiter(this, arguments, void 0, function* (dataset, // EvalDataset would be implemented separately
203
- scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
204
- try {
205
- // Load appropriate implementations for all scorers
206
- const loadedScorers = [];
207
- for (const scorer of scorers) {
208
- try {
209
- if (scorer instanceof base_scorer_js_1.ScorerWrapper) {
210
- loadedScorers.push(scorer.loadImplementation(useJudgment));
211
- }
212
- else {
213
- // Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
214
- loadedScorers.push(scorer);
215
- }
216
- }
217
- catch (error) {
218
- throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
219
- }
220
- }
221
- // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
222
- if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_js_1.JudgevalScorer)) {
223
- throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
224
- }
225
- // Convert ScorerWrapper in rules to their implementations
226
- let loadedRules;
227
- if (rules) {
228
- loadedRules = [];
229
- for (const rule of rules) {
230
- try {
231
- const processedConditions = [];
232
- for (const condition of rule.conditions) {
233
- // Convert metric if it's a ScorerWrapper
234
- if (condition.metric instanceof base_scorer_js_1.ScorerWrapper) {
235
- try {
236
- const loadedMetric = condition.metric.loadImplementation(useJudgment);
237
- const newCondition = new rules_js_1.Condition(loadedMetric);
238
- Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
239
- processedConditions.push(newCondition);
240
- }
241
- catch (error) {
242
- throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
243
- }
244
- }
245
- else {
246
- processedConditions.push(condition);
247
- }
248
- }
249
- // Create new rule with processed conditions
250
- const newRule = new rules_js_1.Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
251
- loadedRules.push(newRule);
252
- }
253
- catch (error) {
254
- throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
255
- }
256
- }
257
- }
258
- const evaluationRun = new evaluation_run_js_1.EvaluationRun({
259
- logResults,
260
- projectName,
261
- evalName: evalRunName,
262
- examples: dataset.examples, // Assuming dataset has an 'examples' property
263
- scorers: loadedScorers,
264
- model,
265
- aggregator,
266
- metadata,
267
- judgmentApiKey: this.judgmentApiKey,
268
- rules: loadedRules,
269
- organizationId: this.organizationId
270
- });
271
- // Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
272
- return (0, run_evaluation_js_1.runEval)(evaluationRun, false, true, false);
273
- }
274
- catch (error) {
275
- if (error instanceof Error) {
276
- if (error.message.includes('one or more fields are invalid')) {
277
- throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
278
- }
279
- else {
280
- throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
281
- }
282
- }
283
- else {
284
- throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
285
- }
286
- }
287
- });
288
- }
289
- /**
290
- * Create a dataset
291
- */
292
- createDataset() {
293
- // This would be implemented with EvalDataset
294
- throw new Error('Not implemented yet');
295
- }
296
- /**
297
- * Push a dataset to the Judgment platform
298
- */
299
- pushDataset(alias_1, dataset_1, projectName_1) {
300
- return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
301
- // This would be implemented with EvalDataset
302
- throw new Error('Not implemented yet');
303
- });
304
- }
305
- /**
306
- * Pull a dataset from the Judgment platform
307
- */
308
- pullDataset(alias, projectName) {
309
- return __awaiter(this, void 0, void 0, function* () {
310
- // This would be implemented with EvalDataset
311
- throw new Error('Not implemented yet');
312
- });
313
- }
314
- /**
315
- * Delete a dataset from the Judgment platform
316
- */
317
- deleteDataset(alias, projectName) {
318
- return __awaiter(this, void 0, void 0, function* () {
319
- // This would be implemented with EvalDataset
320
- throw new Error('Not implemented yet');
321
- });
322
- }
323
- /**
324
- * Pull project dataset stats from the Judgment platform
325
- */
326
- pullProjectDatasetStats(projectName) {
327
- return __awaiter(this, void 0, void 0, function* () {
328
- // This would be implemented with EvalDataset
329
- throw new Error('Not implemented yet');
330
- });
331
- }
332
- /**
333
- * Insert examples into a dataset on the Judgment platform
334
- */
335
- insertDataset(alias, examples, projectName) {
336
- return __awaiter(this, void 0, void 0, function* () {
337
- // This would be implemented with EvalDataset
338
- throw new Error('Not implemented yet');
206
+ return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
207
+ scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
208
+ // Keep type loose for stub
209
+ throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
339
210
  });
340
211
  }
341
212
  /**
@@ -344,39 +215,29 @@ class JudgmentClient {
344
215
  * @param evalRunName Name of the evaluation run
345
216
  * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
346
217
  */
347
- pullEval(projectName, evalRunName // Consistent parameter name, but API uses eval_name
348
- ) {
218
+ pullEval(projectName, evalRunName) {
349
219
  return __awaiter(this, void 0, void 0, function* () {
350
- var _a, _b, _c, _d;
351
- // Body matches Python's structure for this endpoint
352
220
  const evalRunRequestBody = {
353
221
  project_name: projectName,
354
- eval_name: evalRunName, // Use eval_name in the body for the API
222
+ eval_name: evalRunName,
355
223
  judgment_api_key: this.judgmentApiKey
356
224
  };
357
225
  try {
358
- const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, // Use constant
359
- evalRunRequestBody, {
360
- headers: {
361
- 'Content-Type': 'application/json',
362
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
363
- 'X-Organization-Id': this.organizationId
364
- }
226
+ const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
227
+ headers: this.getAuthHeaders()
365
228
  });
366
- // Process the response to match the Python SDK's format
367
- // Python returns [{ 'id': ..., 'results': [ScoringResult, ...]}]
368
- // The API response is a list of results, each with an 'id' and 'result'
369
229
  if (!Array.isArray(response.data) || response.data.length === 0) {
370
- return [{ id: '', results: [] }]; // Return empty structure if no data
230
+ return [{ id: '', results: [] }];
371
231
  }
372
- const evalRunResult = { id: '', results: [] };
373
- evalRunResult.id = ((_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.id) || ''; // Assume ID is same for all results in run
232
+ const evalRunResult = [{}];
374
233
  for (const result of response.data) {
234
+ const resultId = result.id || '';
375
235
  const resultData = result.result || {};
236
+ // Extract data object from result data
376
237
  const dataObject = resultData.data_object || {};
377
- // Create Example from data_object
238
+ // Create Example with required input field
378
239
  const example = new example_js_1.Example({
379
- input: dataObject.input,
240
+ input: dataObject.input || '',
380
241
  actualOutput: dataObject.actual_output,
381
242
  expectedOutput: dataObject.expected_output,
382
243
  context: dataObject.context,
@@ -386,110 +247,21 @@ class JudgmentClient {
386
247
  expectedTools: dataObject.expected_tools,
387
248
  exampleId: dataObject.example_id,
388
249
  exampleIndex: dataObject.example_index,
389
- timestamp: dataObject.timestamp
390
- });
391
- // Create ScoringResult
392
- const scoringResult = new result_js_1.ScoringResult({
393
- dataObject: example,
394
- scorersData: resultData.scorers_data || [],
395
- error: resultData.error
250
+ timestamp: dataObject.timestamp,
251
+ example: dataObject.example // Include example boolean
396
252
  });
397
- evalRunResult.results.push(scoringResult);
398
- }
399
- return [evalRunResult]; // Wrap in array to match Python return type [{...}]
400
- }
401
- catch (error) {
402
- if (axios_1.default.isAxiosError(error)) {
403
- const statusCode = (_b = error.response) === null || _b === void 0 ? void 0 : _b.status;
404
- const errorMessage = ((_d = (_c = error.response) === null || _c === void 0 ? void 0 : _c.data) === null || _d === void 0 ? void 0 : _d.detail) || error.message;
405
- throw new Error(`Failed to pull evaluation results: ${statusCode} - ${errorMessage}`);
406
- }
407
- if (error instanceof Error) {
408
- throw new Error(`Failed to pull evaluation results: ${error.message}`);
409
- }
410
- throw new Error(`Failed to pull evaluation results: ${String(error)}`);
411
- }
412
- });
413
- }
414
- /**
415
- * Get evaluation run results (alias for pullEval with a more intuitive name)
416
- * @param projectName Name of the project
417
- * @param evalRunName Name of the evaluation run
418
- * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
419
- */
420
- getEvalRun(projectName, evalRunName) {
421
- return __awaiter(this, void 0, void 0, function* () {
422
- return this.pullEval(projectName, evalRunName);
423
- });
424
- }
425
- /**
426
- * List all evaluation runs for a project
427
- * @param projectName Name of the project
428
- * @param limit Maximum number of evaluation runs to return (default: 100)
429
- * @param offset Offset for pagination (default: 0)
430
- * @returns List of evaluation run metadata
431
- */
432
- listEvalRuns(projectName_1) {
433
- return __awaiter(this, arguments, void 0, function* (projectName, limit = 100, offset = 0) {
434
- var _a, _b, _c;
435
- try {
436
- // Use ROOT_API for the base URL
437
- const url = `${constants_js_1.ROOT_API}/projects/${projectName}/eval-runs`;
438
- const response = yield axios_1.default.get(url, {
439
- params: {
440
- limit,
441
- offset
442
- },
443
- headers: {
444
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
445
- 'X-Organization-Id': this.organizationId
446
- }
447
- });
448
- return response.data || [];
449
- }
450
- catch (error) {
451
- if (axios_1.default.isAxiosError(error)) {
452
- const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
453
- const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
454
- throw new Error(`Failed to list evaluation runs: ${statusCode} - ${errorMessage}`);
455
- }
456
- if (error instanceof Error) {
457
- throw new Error(`Failed to list evaluation runs: ${error.message}`);
458
- }
459
- throw new Error(`Failed to list evaluation runs: ${String(error)}`);
460
- }
461
- });
462
- }
463
- /**
464
- * Get evaluation run statistics
465
- * @param projectName Name of the project
466
- * @param evalRunName Name of the evaluation run
467
- * @returns Statistics for the evaluation run
468
- */
469
- getEvalRunStats(projectName, evalRunName) {
470
- return __awaiter(this, void 0, void 0, function* () {
471
- var _a, _b, _c;
472
- try {
473
- // Use ROOT_API for the base URL
474
- const url = `${constants_js_1.ROOT_API}/projects/${projectName}/eval-runs/${evalRunName}/stats`;
475
- const response = yield axios_1.default.get(url, {
476
- headers: {
477
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
478
- 'X-Organization-Id': this.organizationId
479
- }
480
- });
481
- return response.data || {};
253
+ evalRunResult[0].id = resultId;
254
+ evalRunResult[0].results = [new result_js_1.ScoringResult({
255
+ dataObject: example,
256
+ scorersData: resultData.scorers_data || [],
257
+ error: resultData.error
258
+ })];
259
+ }
260
+ return evalRunResult;
482
261
  }
483
262
  catch (error) {
484
- if (axios_1.default.isAxiosError(error)) {
485
- const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
486
- const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
487
- throw new Error(`Failed to get evaluation run statistics: ${statusCode} - ${errorMessage}`);
488
- }
489
- if (error instanceof Error) {
490
- throw new Error(`Failed to get evaluation run statistics: ${error.message}`);
491
- }
492
- throw new Error(`Failed to get evaluation run statistics: ${String(error)}`);
263
+ this.handleApiError(error, 'pullEval');
264
+ throw error;
493
265
  }
494
266
  });
495
267
  }
@@ -502,92 +274,67 @@ class JudgmentClient {
502
274
  */
503
275
  exportEvalResults(projectName_1, evalRunName_1) {
504
276
  return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
277
+ logger_instance_js_1.default.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
505
278
  try {
506
- const evalRunArray = yield this.pullEval(projectName, evalRunName);
507
- const evalRunData = evalRunArray[0]; // Get the first element containing id and results
508
- if (!evalRunData || !evalRunData.results) {
509
- return format === 'json' ? JSON.stringify([], null, 2) : 'No results found';
279
+ const resultsData = yield this.pullEval(projectName, evalRunName);
280
+ if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
281
+ logger_instance_js_1.default.warn('No results found to export.');
282
+ return '';
510
283
  }
284
+ const results = resultsData[0].results;
511
285
  if (format === 'json') {
512
- // Return the whole structure including ID and results array
513
- return JSON.stringify(evalRunData, null, 2);
286
+ // Pretty print JSON
287
+ return JSON.stringify(results.map(r => r.toJSON()), null, 2);
514
288
  }
515
289
  else if (format === 'csv') {
516
- const results = evalRunData.results;
517
- if (!Array.isArray(results) || results.length === 0) {
518
- return 'No results found';
519
- }
520
- // Use csv-writer instead of json2csv
521
- let createObjectCsvStringifier;
522
- try {
523
- // Use dynamic import() for ES Modules
524
- const csvWriterModule = yield Promise.resolve().then(() => __importStar(require('csv-writer')));
525
- createObjectCsvStringifier = csvWriterModule.createObjectCsvStringifier;
526
- if (!createObjectCsvStringifier) { // Check if the function exists
527
- throw new Error("Could not load createObjectCsvStringifier from csv-writer");
290
+ if (results.length === 0)
291
+ return ''; // No data to export
292
+ // Dynamically determine headers from the first result object
293
+ // Flatten the structure for CSV
294
+ const flatResults = results.map(result => {
295
+ var _a, _b, _c;
296
+ const flat = {};
297
+ const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
298
+ const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
299
+ // Add example data fields (snake_case)
300
+ for (const key in exampleData) {
301
+ // Prefix example fields to avoid collision, e.g., example_input
302
+ flat[`example_${key}`] = exampleData[key];
528
303
  }
529
- }
530
- catch (e) {
531
- // Provide a more helpful error message
532
- const errorMsg = e instanceof Error ? e.message : String(e);
533
- // Update error message to reflect import() failure
534
- console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
535
- throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
536
- }
537
- try {
538
- // Flatten the structure slightly for better CSV output
539
- const processedResults = results.map((result) => {
540
- // Flatten dataObject properties and scorersData
541
- const flatResult = {};
542
- flatResult.eval_run_id = evalRunData.id; // Add eval run ID
543
- // Flatten dataObject
544
- if (result.dataObject) {
545
- for (const [key, value] of Object.entries(result.dataObject)) {
546
- // Prefix with 'data_' to avoid potential clashes
547
- flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
548
- }
549
- }
550
- // Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
551
- if (Array.isArray(result.scorersData)) {
552
- result.scorersData.forEach((scorerData, index) => {
553
- flatResult[`scorer_${index}_name`] = scorerData.name;
554
- flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
555
- flatResult[`scorer_${index}_error`] = scorerData.error;
556
- // Add other scorer fields if necessary, e.g., metadata
557
- if (scorerData.additional_metadata) {
558
- flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
559
- }
560
- });
561
- }
562
- flatResult.error = result.error; // Top-level error for the example processing
563
- return flatResult;
564
- });
565
- // Define headers dynamically based on the keys of the first processed result
566
- if (processedResults.length === 0) {
567
- return 'No data to export after processing.'; // Handle case with no valid results after processing
568
- }
569
- const headers = Object.keys(processedResults[0]).map(key => ({ id: key, title: key }));
570
- const csvStringifier = createObjectCsvStringifier({
571
- header: headers
304
+ // Add scorers data
305
+ scorersData.forEach(scorer => {
306
+ flat[`scorer_${scorer.name}_score`] = scorer.score;
307
+ flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
308
+ flat[`scorer_${scorer.name}_error`] = scorer.error;
572
309
  });
573
- // Generate CSV string (header + records)
574
- return csvStringifier.getHeaderString() + csvStringifier.stringifyRecords(processedResults);
575
- }
576
- catch (error) {
577
- console.error('Error converting to CSV:', error);
578
- const errorMessage = error instanceof Error ? error.message : String(error);
579
- return `Error generating CSV: ${errorMessage}`;
580
- }
310
+ // Add top-level error if present
311
+ flat['top_level_error'] = result.error;
312
+ return flat;
313
+ });
314
+ // Get all unique keys from the flattened results for headers
315
+ const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
316
+ // Use papaparse for robust CSV generation
317
+ const Papa = require('papaparse'); // Use require here if not imported at top
318
+ const csv = Papa.unparse({
319
+ fields: headers,
320
+ data: flatResults
321
+ }, {
322
+ header: true,
323
+ quotes: true, // Ensure fields with commas/newlines are quoted
324
+ quoteChar: '"',
325
+ escapeChar: '"',
326
+ delimiter: ','
327
+ });
328
+ return csv;
581
329
  }
582
330
  else {
583
331
  throw new Error(`Unsupported export format: ${format}`);
584
332
  }
585
333
  }
586
334
  catch (error) {
587
- if (error instanceof Error) {
588
- throw new Error(`Failed to export evaluation results: ${error.message}`);
589
- }
590
- throw new Error(`Failed to export evaluation results: ${String(error)}`);
335
+ logger_instance_js_1.default.error(`Error exporting eval results: ${error}`);
336
+ this.handleApiError(error, 'exportEvalResults');
337
+ throw error;
591
338
  }
592
339
  });
593
340
  }
@@ -596,47 +343,23 @@ class JudgmentClient {
596
343
  */
597
344
  deleteEval(projectName, evalRunNames) {
598
345
  return __awaiter(this, void 0, void 0, function* () {
599
- var _a, _b;
600
- if (!evalRunNames || evalRunNames.length === 0) {
601
- throw new Error('No evaluation run names provided');
602
- }
603
- // Body matches Python's structure for this endpoint
604
- const evalRunRequestBody = {
346
+ logger_instance_js_1.default.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
347
+ const requestBody = {
605
348
  project_name: projectName,
606
349
  eval_names: evalRunNames,
607
- judgment_api_key: this.judgmentApiKey // Required by this specific API endpoint
350
+ judgment_api_key: this.judgmentApiKey,
608
351
  };
609
352
  try {
610
- const response = yield axios_1.default.delete(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, // Use constant
611
- {
612
- data: evalRunRequestBody,
613
- headers: {
614
- 'Content-Type': 'application/json',
615
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
616
- 'X-Organization-Id': this.organizationId
617
- }
353
+ yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
354
+ headers: this.getAuthHeaders()
618
355
  });
619
- return Boolean(response.data);
356
+ logger_instance_js_1.default.info('Successfully deleted eval runs.');
357
+ return true;
620
358
  }
621
359
  catch (error) {
622
- if (axios_1.default.isAxiosError(error)) {
623
- const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
624
- const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
625
- if (status === 404) {
626
- throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
627
- }
628
- else if (status === 500) {
629
- throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
630
- }
631
- else {
632
- throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
633
- }
634
- }
635
- // Rethrow original or wrapped error
636
- if (error instanceof Error) {
637
- throw new Error(`Error deleting eval results: ${error.message}`);
638
- }
639
- throw new Error(`Error deleting eval results: ${String(error)}`);
360
+ logger_instance_js_1.default.error(`Error deleting eval runs: ${error}`);
361
+ this.handleApiError(error, 'deleteEval');
362
+ return false;
640
363
  }
641
364
  });
642
365
  }
@@ -645,43 +368,22 @@ class JudgmentClient {
645
368
  */
646
369
  deleteProjectEvals(projectName) {
647
370
  return __awaiter(this, void 0, void 0, function* () {
648
- var _a, _b;
371
+ logger_instance_js_1.default.info(`Deleting ALL eval runs for project: ${projectName}`);
372
+ const requestBody = {
373
+ project_name: projectName,
374
+ judgment_api_key: this.judgmentApiKey,
375
+ };
649
376
  try {
650
- const response = yield axios_1.default.delete(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, // Use constant
651
- {
652
- // Remove judgment_api_key from body to match Python (uses header auth)
653
- data: {
654
- project_name: projectName,
655
- },
656
- headers: {
657
- 'Content-Type': 'application/json',
658
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
659
- 'X-Organization-Id': this.organizationId
660
- }
377
+ yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
378
+ headers: this.getAuthHeaders()
661
379
  });
662
- // Python returns response.json(), check if TS response needs similar handling
663
- return Boolean(response.data); // Assuming response.data indicates success
380
+ logger_instance_js_1.default.info(`Successfully deleted all eval runs for project ${projectName}.`);
381
+ return true;
664
382
  }
665
383
  catch (error) {
666
- if (axios_1.default.isAxiosError(error)) {
667
- const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
668
- const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
669
- if (status === 404) {
670
- // Assuming 404 might mean project not found or no evals to delete
671
- console.warn(`Project '${projectName}' not found or no evals to delete.`);
672
- return false; // Or true depending on desired idempotency behavior
673
- }
674
- else if (status === 500) {
675
- throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
676
- }
677
- else {
678
- throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
679
- }
680
- }
681
- if (error instanceof Error) {
682
- throw new Error(`Error deleting project evals: ${error.message}`);
683
- }
684
- throw new Error(`Error deleting project evals: ${String(error)}`);
384
+ logger_instance_js_1.default.error(`Error deleting project evals: ${error}`);
385
+ this.handleApiError(error, 'deleteProjectEvals');
386
+ return false;
685
387
  }
686
388
  });
687
389
  }
@@ -690,37 +392,34 @@ class JudgmentClient {
690
392
  */
691
393
  createProject(projectName) {
692
394
  return __awaiter(this, void 0, void 0, function* () {
395
+ logger_instance_js_1.default.info(`Creating project: ${projectName}`);
396
+ const requestBody = {
397
+ project_name: projectName,
398
+ judgment_api_key: this.judgmentApiKey,
399
+ };
693
400
  try {
694
- const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, // Use constant
695
- // Remove judgment_api_key from body to match Python (uses header auth)
696
- {
697
- project_name: projectName,
698
- }, {
699
- headers: {
700
- 'Content-Type': 'application/json',
701
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
702
- 'X-Organization-Id': this.organizationId
703
- }
401
+ const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
402
+ headers: this.getAuthHeaders()
704
403
  });
705
- // Python returns response.json(), check if TS response needs similar handling
706
- return Boolean(response.data); // Assuming response.data indicates success
707
- }
708
- catch (error) {
709
- if (axios_1.default.isAxiosError(error) && error.response) {
710
- // Check for specific conflict error (e.g., 409) if API provides it
711
- if (error.response.status === 409) {
712
- console.warn(`Project '${projectName}' already exists.`);
713
- return false; // Or true if idempotent creation is desired
714
- }
715
- throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
404
+ // Check for specific success message or status if API provides one
405
+ if (response.data && response.data.message === 'Project added successfully') {
406
+ logger_instance_js_1.default.info(`Successfully created project: ${projectName}`);
407
+ return true;
716
408
  }
717
- else if (error instanceof Error) {
718
- throw new Error(`Error creating project: ${error.message}`);
409
+ else if (response.data && response.data.message === 'Project already exists') {
410
+ logger_instance_js_1.default.warn(`Project '${projectName}' already exists.`);
411
+ return true; // Or false, depending on desired behavior for existing projects
719
412
  }
720
413
  else {
721
- throw new Error(`Error creating project: ${String(error)}`);
414
+ logger_instance_js_1.default.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
415
+ return false;
722
416
  }
723
417
  }
418
+ catch (error) {
419
+ logger_instance_js_1.default.error(`Error creating project: ${error}`);
420
+ this.handleApiError(error, 'createProject');
421
+ return false;
422
+ }
724
423
  });
725
424
  }
726
425
  /**
@@ -728,37 +427,29 @@ class JudgmentClient {
728
427
  */
729
428
  deleteProject(projectName) {
730
429
  return __awaiter(this, void 0, void 0, function* () {
430
+ logger_instance_js_1.default.info(`Deleting project: ${projectName}`);
431
+ const requestBody = {
432
+ project_name: projectName,
433
+ judgment_api_key: this.judgmentApiKey,
434
+ };
731
435
  try {
732
- const response = yield axios_1.default.delete(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, // Use constant
733
- {
734
- // Remove judgment_api_key from body to match Python (uses header auth)
735
- data: {
736
- project_name: projectName,
737
- },
738
- headers: {
739
- 'Content-Type': 'application/json',
740
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
741
- 'X-Organization-Id': this.organizationId
742
- }
436
+ const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
437
+ headers: this.getAuthHeaders()
743
438
  });
744
- // Python returns response.json(), check if TS response needs similar handling
745
- return Boolean(response.data); // Assuming response.data indicates success
746
- }
747
- catch (error) {
748
- if (axios_1.default.isAxiosError(error) && error.response) {
749
- if (error.response.status === 404) {
750
- console.warn(`Project '${projectName}' not found for deletion.`);
751
- return false; // Or true depending on desired idempotency
752
- }
753
- throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
754
- }
755
- else if (error instanceof Error) {
756
- throw new Error(`Error deleting project: ${error.message}`);
439
+ if (response.data && response.data.message === 'Project deleted successfully') {
440
+ logger_instance_js_1.default.info(`Successfully deleted project: ${projectName}`);
441
+ return true;
757
442
  }
758
443
  else {
759
- throw new Error(`Error deleting project: ${String(error)}`);
444
+ logger_instance_js_1.default.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
445
+ return false;
760
446
  }
761
447
  }
448
+ catch (error) {
449
+ logger_instance_js_1.default.error(`Error deleting project: ${error}`);
450
+ this.handleApiError(error, 'deleteProject');
451
+ return false;
452
+ }
762
453
  });
763
454
  }
764
455
  /**
@@ -766,35 +457,36 @@ class JudgmentClient {
766
457
  */
767
458
  validateApiKey() {
768
459
  return __awaiter(this, void 0, void 0, function* () {
769
- var _a, _b;
460
+ var _a, _b, _c, _d;
461
+ logger_instance_js_1.default.debug('Validating API Key...');
770
462
  try {
771
- const response = yield axios_1.default.post(`${constants_js_1.ROOT_API}/validate_api_key/`, // Use ROOT_API
772
- {}, // Empty body
773
- {
774
- headers: {
775
- 'Content-Type': 'application/json',
776
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
777
- // Removed 'X-Organization-Id' header to match Python for this specific endpoint
778
- }
779
- });
780
- if (response.status === 200) {
781
- return [true, JSON.stringify(response.data)];
782
- }
783
- else {
784
- // Status might be non-200 but still valid JSON error response
785
- return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
786
- }
463
+ // Instantiate EvalDatasetClient to perform the validation call
464
+ const datasetClient = new eval_dataset_client_js_1.EvalDatasetClient(this.judgmentApiKey, this.organizationId);
465
+ // Use the dataset client to make the call
466
+ yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
467
+ logger_instance_js_1.default.debug('API Key appears valid.');
468
+ return [true, 'API Key is valid.'];
787
469
  }
788
470
  catch (error) {
789
- if (axios_1.default.isAxiosError(error) && error.response) {
790
- return [false, ((_b = error.response.data) === null || _b === void 0 ? void 0 : _b.detail) || `Error validating API key (Status: ${error.response.status})`];
791
- }
792
- else if (error instanceof Error) {
793
- return [false, `Error validating API key: ${error.message}`];
471
+ let message = 'API Key validation failed.';
472
+ if (axios_1.default.isAxiosError(error)) {
473
+ if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
474
+ message = 'API Key is invalid or expired.';
475
+ }
476
+ else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
477
+ // If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
478
+ // This depends on the specific validation endpoint behavior
479
+ message = 'API Key might be valid, but validation endpoint returned 404.';
480
+ }
481
+ else {
482
+ message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
483
+ }
794
484
  }
795
485
  else {
796
- return [false, `Unknown error validating API key: ${String(error)}`];
486
+ message = `API Key validation failed: ${String(error)}`;
797
487
  }
488
+ logger_instance_js_1.default.error(message);
489
+ return [false, message];
798
490
  }
799
491
  });
800
492
  }
@@ -819,17 +511,12 @@ class JudgmentClient {
819
511
  */
820
512
  pullEvalResults(projectName, evalRunName) {
821
513
  return __awaiter(this, void 0, void 0, function* () {
822
- var _a;
823
- try {
824
- const evalRunArray = yield this.pullEval(projectName, evalRunName);
825
- // pullEval returns [{ id: ..., results: [...] }], extract results
826
- return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
827
- }
828
- catch (error) {
829
- // Log error but return empty array to allow waitForEvaluation to potentially retry
830
- logger_instance_js_1.default.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
514
+ const rawResults = yield this.pullEval(projectName, evalRunName);
515
+ if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
831
516
  return [];
832
517
  }
518
+ // Assuming pullEval correctly returns results in the expected format
519
+ return rawResults[0].results;
833
520
  });
834
521
  }
835
522
  /**
@@ -841,88 +528,72 @@ class JudgmentClient {
841
528
  */
842
529
  checkEvalStatus(projectName, evalRunName) {
843
530
  return __awaiter(this, void 0, void 0, function* () {
844
- var _a, _b;
845
- // Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
531
+ var _a;
846
532
  const requestBody = {
847
533
  project_name: projectName,
848
- eval_name: evalRunName, // Use 'eval_name'
534
+ eval_name: evalRunName,
849
535
  judgment_api_key: this.judgmentApiKey,
850
536
  };
851
537
  try {
852
- const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, // Use fetch URL
853
- requestBody, {
854
- headers: {
855
- 'Content-Type': 'application/json',
856
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
857
- 'X-Organization-Id': this.organizationId
858
- },
859
- timeout: 15000 // Slightly increased timeout for status checks
538
+ const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
539
+ headers: this.getAuthHeaders(),
540
+ // Add a shorter timeout for status checks?
541
+ // timeout: 5000
860
542
  });
861
- // Interpret response: API might return status object or full results array
862
- let statusData = { status: 'unknown', progress: 0, message: '' };
863
- if (Array.isArray(response.data)) {
864
- // If it's an array, assume results are complete unless explicitly stated otherwise
865
- if (response.data.length > 0 && ((_b = (_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.result) === null || _b === void 0 ? void 0 : _b.status)) {
866
- // Check if the first result object contains status info
867
- statusData = response.data[0].result; // Assuming status is within the 'result' field
868
- }
869
- else if (response.data.length > 0) {
870
- // Assume complete if we get results array without specific status fields
871
- statusData = { status: 'complete', progress: 1.0, message: 'Results received' };
872
- }
873
- else {
874
- // Empty array might mean still processing or no results yet
875
- statusData = { status: 'processing', progress: 0, message: 'Waiting for results...' };
876
- }
877
- }
878
- else if (typeof response.data === 'object' && response.data !== null && response.data.status) {
879
- // Might be a direct status object from the API
880
- statusData = response.data;
881
- }
543
+ const data = response.data;
544
+ // Check if the response looks like a status object
545
+ if (data && typeof data.status === 'string') {
546
+ return {
547
+ status: data.status || 'unknown',
548
+ progress: typeof data.progress === 'number' ? data.progress : 0,
549
+ message: data.message || '',
550
+ error: data.error
551
+ };
552
+ }
553
+ // Check if the response looks like completed results (array format from pullEval)
554
+ else if (Array.isArray(data) && data.length > 0 && data[0].results) {
555
+ return {
556
+ status: 'completed',
557
+ progress: 100,
558
+ message: 'Evaluation completed.'
559
+ };
560
+ }
561
+ // Check if response looks like completed results (single object format)
562
+ else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
563
+ return {
564
+ status: 'completed',
565
+ progress: 100,
566
+ message: 'Evaluation completed.'
567
+ };
568
+ }
569
+ // Handle other potential responses or assume pending/unknown
882
570
  else {
883
- // Unexpected response format
884
- statusData = { status: 'unknown', progress: 0, message: `Unexpected response format: ${JSON.stringify(response.data)}` };
885
- }
886
- // Normalize the progress value
887
- let progress = 0;
888
- if (statusData.progress !== undefined && statusData.progress !== null) {
889
- const parsedProgress = parseFloat(statusData.progress);
890
- if (!isNaN(parsedProgress)) {
891
- progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
892
- }
571
+ logger_instance_js_1.default.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
572
+ return {
573
+ status: 'unknown',
574
+ progress: 0,
575
+ message: 'Could not determine status from API response.'
576
+ };
893
577
  }
894
- const normalizedStatus = {
895
- status: statusData.status || 'unknown',
896
- progress: progress,
897
- message: statusData.message || '',
898
- error: statusData.error // Include error field if present
899
- };
900
- // Only log status if it's not being called from waitForEvaluation
901
- // Check stack trace for caller function name
902
- const stack = new Error().stack;
903
- const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
904
- if (!isCalledByWaitForEvaluation) {
905
- // Use logger for status updates when called directly
906
- logger_instance_js_1.default.info(`Evaluation Status: ${normalizedStatus.status}`);
907
- logger_instance_js_1.default.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
908
- if (normalizedStatus.message) {
909
- logger_instance_js_1.default.info(`Message: ${normalizedStatus.message}`);
910
- }
911
- if (normalizedStatus.error) {
912
- logger_instance_js_1.default.error(`Error in status: ${normalizedStatus.error}`);
913
- }
914
- }
915
- return normalizedStatus;
916
578
  }
917
579
  catch (error) {
918
- // Don't throw errors from status check, just return default 'unknown' status
919
- // This allows waitForEvaluation to continue polling even on transient network issues
920
- const errorMessage = error instanceof Error ? error.message : String(error);
921
- logger_instance_js_1.default.error(`Error checking evaluation status for '${evalRunName}': ${errorMessage}`);
580
+ // Don't throw here, return status indicating error
581
+ let errorMessage = 'Failed to fetch evaluation status.';
582
+ let status = 'error';
583
+ if (axios_1.default.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
584
+ status = 'not_found';
585
+ errorMessage = 'Evaluation run not found.';
586
+ logger_instance_js_1.default.warn(`Evaluation run ${evalRunName} not found.`);
587
+ }
588
+ else {
589
+ this.handleApiError(error, 'checkEvalStatus');
590
+ errorMessage = `Error fetching status: ${String(error)}`;
591
+ }
922
592
  return {
923
- status: 'unknown',
593
+ status: status,
924
594
  progress: 0,
925
- message: `Error checking status: ${errorMessage}`
595
+ message: errorMessage,
596
+ error: String(error) // Include error string
926
597
  };
927
598
  }
928
599
  });
@@ -936,105 +607,79 @@ class JudgmentClient {
936
607
  */
937
608
  waitForEvaluation(projectName_1, evalRunName_1) {
938
609
  return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
939
- const { intervalMs = 3000, // Slightly longer interval
940
- maxAttempts = 200, // ~10 minutes total wait time (200 * 3s)
941
- showProgress = true } = options;
942
- let attempts = 0;
943
- let lastProgressPercent = -1;
944
- let lastStatus = '';
610
+ const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
611
+ let progressBar;
945
612
  if (showProgress) {
946
- // Use logger for initial message
947
- logger_instance_js_1.default.info(`Waiting for evaluation "${evalRunName}" in project "${projectName}" to complete...`);
613
+ progressBar = new cli_progress_1.default.SingleBar({
614
+ format: `Waiting for ${ansi_colors_1.default.magenta(evalRunName)}... | ${ansi_colors_1.default.cyan('{bar}')} | {percentage}% || {status}`,
615
+ barCompleteChar: '\u2588',
616
+ barIncompleteChar: '\u2591',
617
+ hideCursor: true,
618
+ clearOnComplete: false,
619
+ stopOnComplete: true,
620
+ }, cli_progress_1.default.Presets.shades_classic);
621
+ progressBar.start(100, 0, { status: 'Initiating...' });
948
622
  }
949
- while (attempts < maxAttempts) {
950
- attempts++;
623
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
951
624
  try {
952
- const status = yield this.checkEvalStatus(projectName, evalRunName); // Call internal status check
953
- const currentProgressPercent = Math.round(status.progress * 100);
954
- // Show progress/status updates only when they change significantly
955
- if (showProgress && (currentProgressPercent !== lastProgressPercent || status.status !== lastStatus)) {
956
- const progressBar = this._createProgressBar(currentProgressPercent >= 0 ? currentProgressPercent : 0);
957
- // Use process.stdout.write to potentially overwrite the line (works best in standard terminals)
958
- process.stdout.write('\rAttempt ' + attempts + '/' + maxAttempts + ' | Status: ' + status.status + ' | Progress: ' + progressBar + ' ' + currentProgressPercent + '% ');
959
- lastProgressPercent = currentProgressPercent;
960
- lastStatus = status.status;
961
- }
962
- // Check evaluation status
963
- if (status.status === 'complete') {
964
- if (showProgress) {
965
- process.stdout.write('\n'); // Keep direct console output for progress bar newline
966
- // Use logger for status update
967
- logger_instance_js_1.default.info('Evaluation complete! Fetching results...');
968
- }
969
- try {
970
- // Use the dedicated results fetching method
971
- const results = yield this.pullEvalResults(projectName, evalRunName);
972
- if (results.length > 0) {
973
- // Use logger for status update
974
- logger_instance_js_1.default.info(`Successfully fetched ${results.length} results.`);
975
- return results;
976
- }
977
- else {
978
- // If complete status but no results, might be an issue. Log and return empty.
979
- logger_instance_js_1.default.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
980
- return [];
981
- }
982
- }
983
- catch (fetchError) {
984
- if (showProgress)
985
- process.stdout.write('\n'); // Keep direct console output
986
- logger_instance_js_1.default.error(`Error fetching results after completion for '${evalRunName}': ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
987
- return []; // Return empty array on error
988
- }
989
- }
990
- else if (status.status === 'failed') {
991
- if (showProgress)
992
- process.stdout.write('\n'); // Keep direct console output
993
- logger_instance_js_1.default.error(`Evaluation failed for '${evalRunName}': ${status.error || status.message || 'Unknown error'}`);
994
- return []; // Return empty array on failure
995
- }
996
- else if (status.status === 'unknown') {
997
- // Log unknown status but continue polling
998
- // Avoid flooding logs if status remains unknown
999
- if (lastStatus !== 'unknown') {
1000
- if (showProgress)
1001
- process.stdout.write('\n'); // Keep direct console output
1002
- logger_instance_js_1.default.warn(`Evaluation status unknown for '${evalRunName}' (attempt ${attempts}). Retrying...`);
1003
- lastProgressPercent = -1; // Reset progress display
625
+ const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
626
+ const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
627
+ const statusText = statusResult.message || statusResult.status;
628
+ if (progressBar) {
629
+ progressBar.update(progress, { status: statusText });
630
+ }
631
+ if (statusResult.status === 'completed') {
632
+ if (progressBar) {
633
+ progressBar.update(100, { status: ansi_colors_1.default.green('Completed! Fetching results...') });
1004
634
  }
1005
- lastStatus = 'unknown';
1006
- }
1007
- else {
1008
- // Still processing (e.g., 'processing', 'running', 'pending')
1009
- lastStatus = status.status;
1010
- }
635
+ // Fetch final results using pullEval
636
+ const finalResults = yield this.pullEvalResults(projectName, evalRunName);
637
+ logger_instance_js_1.default.info(`Evaluation run ${evalRunName} completed successfully.`);
638
+ return finalResults;
639
+ }
640
+ else if (statusResult.status === 'error' || statusResult.status === 'failed') {
641
+ // Concatenate error details into a single message string
642
+ const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
643
+ logger_instance_js_1.default.error(errorMsg);
644
+ if (progressBar)
645
+ progressBar.stop();
646
+ // Pass only the combined message to the constructor
647
+ throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
648
+ }
649
+ else if (statusResult.status === 'not_found') {
650
+ const errorMsg = `Evaluation run ${evalRunName} not found.`;
651
+ logger_instance_js_1.default.error(errorMsg);
652
+ if (progressBar)
653
+ progressBar.stop();
654
+ // Pass only the message to the constructor
655
+ throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
656
+ }
657
+ // Wait for the next interval
658
+ yield new Promise(resolve => setTimeout(resolve, intervalMs));
1011
659
  }
1012
660
  catch (error) {
1013
- // Log the error but continue polling (checkEvalStatus should handle internal errors gracefully)
1014
- if (showProgress)
1015
- process.stdout.write('\n'); // Keep direct console output
1016
- logger_instance_js_1.default.error(`Error during status check loop (attempt ${attempts}/${maxAttempts}): ${error instanceof Error ? error.message : String(error)}`);
1017
- lastProgressPercent = -1; // Reset progress display
1018
- lastStatus = 'error_in_loop'; // Indicate issue in the loop itself
1019
- }
1020
- // Wait before next poll only if not completed/failed
1021
- if (lastStatus !== 'complete' && lastStatus !== 'failed') {
661
+ // Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
662
+ logger_instance_js_1.default.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
663
+ // Option: Rethrow immediately vs. retry vs. specific handling
664
+ if (error instanceof run_evaluation_js_1.JudgmentAPIError) { // If it was already a processed API error, rethrow
665
+ if (progressBar)
666
+ progressBar.stop();
667
+ throw error;
668
+ }
669
+ // For other errors, wait and retry (up to maxAttempts)
670
+ if (attempt === maxAttempts) {
671
+ if (progressBar)
672
+ progressBar.stop();
673
+ throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
674
+ }
675
+ // Still retryable, wait for interval
1022
676
  yield new Promise(resolve => setTimeout(resolve, intervalMs));
1023
677
  }
1024
- else {
1025
- // Break loop if already completed or failed to avoid unnecessary delay
1026
- break;
1027
- }
1028
- } // End while loop
1029
- // If loop finished without completing/failing
1030
- if (lastStatus !== 'complete' && lastStatus !== 'failed') {
1031
- if (showProgress)
1032
- process.stdout.write('\n'); // Keep direct console output
1033
- logger_instance_js_1.default.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
1034
- return []; // Return empty array on timeout
1035
678
  }
1036
- // Should technically be unreachable if break conditions work, but safeguard return
1037
- return [];
679
+ // If loop finishes without completion or error
680
+ if (progressBar)
681
+ progressBar.stop();
682
+ throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
1038
683
  });
1039
684
  }
1040
685
  /**
@@ -1043,12 +688,48 @@ class JudgmentClient {
1043
688
  * @returns A string representing the progress bar
1044
689
  */
1045
690
  _createProgressBar(percent) {
1046
- const width = 25; // Slightly wider bar
1047
- // Clamp percent between 0 and 100
1048
- const clampedPercent = Math.max(0, Math.min(100, percent));
1049
- const completed = Math.round(width * (clampedPercent / 100)); // Use round for potentially smoother look
1050
- const remaining = width - completed;
1051
- return '[' + '#'.repeat(completed) + '-'.repeat(remaining) + ']'; // Use different chars
691
+ const width = 20; // Width of the progress bar
692
+ const filled = Math.round(width * (percent / 100));
693
+ const empty = width - filled;
694
+ return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
695
+ }
696
+ // Keep helper methods private
697
+ getAuthHeaders() {
698
+ return {
699
+ 'Content-Type': 'application/json',
700
+ 'Authorization': `Bearer ${this.judgmentApiKey}`,
701
+ 'X-Organization-Id': this.organizationId,
702
+ };
703
+ }
704
+ // Ensure this handles errors from Eval/Project API calls correctly
705
+ handleApiError(error, context) {
706
+ logger_instance_js_1.default.error(`API Error during ${context}:`);
707
+ if (axios_1.default.isAxiosError(error)) {
708
+ const axiosError = error;
709
+ const response = axiosError.response;
710
+ if (response) {
711
+ logger_instance_js_1.default.error(`Status: ${response.status} ${response.statusText}`);
712
+ logger_instance_js_1.default.debug('Response Data:', response.data);
713
+ if (response.status === 422) {
714
+ logger_instance_js_1.default.error('Validation Error Detail:', response.data);
715
+ }
716
+ else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
717
+ logger_instance_js_1.default.error(`Evaluation run not found.`);
718
+ }
719
+ else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
720
+ logger_instance_js_1.default.warn(`${context}: Resource not found, may have already been deleted.`);
721
+ }
722
+ }
723
+ else if (axiosError.request) {
724
+ logger_instance_js_1.default.error('No response received from server.');
725
+ }
726
+ else {
727
+ logger_instance_js_1.default.error(`Error setting up API request for ${context}`);
728
+ }
729
+ }
730
+ else {
731
+ logger_instance_js_1.default.error(`Unexpected error during ${context}`);
732
+ }
1052
733
  }
1053
734
  }
1054
735
  exports.JudgmentClient = JudgmentClient;