judgeval 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +95 -68
  2. package/dist/cjs/common/tracer.js +235 -143
  3. package/dist/cjs/common/tracer.js.map +1 -1
  4. package/dist/cjs/constants.js +8 -5
  5. package/dist/cjs/constants.js.map +1 -1
  6. package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
  7. package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
  8. package/dist/cjs/data/datasets/eval-dataset.js +405 -0
  9. package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
  10. package/dist/cjs/data/example.js +22 -1
  11. package/dist/cjs/data/example.js.map +1 -1
  12. package/dist/cjs/e2etests/eval-operations.test.js +282 -0
  13. package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
  14. package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
  15. package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
  16. package/dist/cjs/index.js +1 -3
  17. package/dist/cjs/index.js.map +1 -1
  18. package/dist/cjs/judgment-client.js +326 -645
  19. package/dist/cjs/judgment-client.js.map +1 -1
  20. package/dist/cjs/scorers/api-scorer.js +56 -48
  21. package/dist/cjs/scorers/api-scorer.js.map +1 -1
  22. package/dist/cjs/scorers/base-scorer.js +66 -11
  23. package/dist/cjs/scorers/base-scorer.js.map +1 -1
  24. package/dist/esm/common/tracer.js +236 -144
  25. package/dist/esm/common/tracer.js.map +1 -1
  26. package/dist/esm/constants.js +7 -4
  27. package/dist/esm/constants.js.map +1 -1
  28. package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
  29. package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
  30. package/dist/esm/data/datasets/eval-dataset.js +375 -0
  31. package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
  32. package/dist/esm/data/example.js +22 -1
  33. package/dist/esm/data/example.js.map +1 -1
  34. package/dist/esm/e2etests/eval-operations.test.js +254 -0
  35. package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
  36. package/dist/esm/e2etests/judgee-traces.test.js +253 -0
  37. package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
  38. package/dist/esm/index.js +0 -1
  39. package/dist/esm/index.js.map +1 -1
  40. package/dist/esm/judgment-client.js +328 -647
  41. package/dist/esm/judgment-client.js.map +1 -1
  42. package/dist/esm/scorers/api-scorer.js +56 -48
  43. package/dist/esm/scorers/api-scorer.js.map +1 -1
  44. package/dist/esm/scorers/base-scorer.js +66 -11
  45. package/dist/esm/scorers/base-scorer.js.map +1 -1
  46. package/dist/types/common/tracer.d.ts +27 -14
  47. package/dist/types/constants.d.ts +4 -4
  48. package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
  49. package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
  50. package/dist/types/data/example.d.ts +24 -12
  51. package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
  52. package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
  53. package/dist/types/index.d.ts +0 -1
  54. package/dist/types/judgment-client.d.ts +3 -47
  55. package/dist/types/scorers/api-scorer.d.ts +15 -15
  56. package/dist/types/scorers/base-scorer.d.ts +53 -10
  57. package/package.json +2 -1
  58. package/dist/cjs/scorers/exact-match-scorer.js +0 -84
  59. package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
  60. package/dist/esm/scorers/exact-match-scorer.js +0 -80
  61. package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
  62. package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
@@ -14,9 +14,13 @@ import { ScoringResult } from './data/result.js';
14
14
  import { JudgevalScorer, ScorerWrapper } from './scorers/base-scorer.js';
15
15
  import { EvaluationRun } from './evaluation-run.js';
16
16
  import { Rule, Condition } from './rules.js';
17
- import { runEval, assertTest } from './run-evaluation.js';
18
- import { ROOT_API, JUDGMENT_EVAL_FETCH_API_URL, JUDGMENT_EVAL_DELETE_API_URL, JUDGMENT_EVAL_DELETE_PROJECT_API_URL, JUDGMENT_PROJECT_DELETE_API_URL, JUDGMENT_PROJECT_CREATE_API_URL } from './constants.js';
17
+ import { runEval, assertTest, JudgmentAPIError } from './run-evaluation.js';
18
+ import { JUDGMENT_EVAL_FETCH_API_URL, JUDGMENT_EVAL_DELETE_API_URL, JUDGMENT_EVAL_DELETE_PROJECT_API_URL, JUDGMENT_PROJECT_DELETE_API_URL, JUDGMENT_PROJECT_CREATE_API_URL, } from './constants.js';
19
19
  import logger from './common/logger-instance.js';
20
+ // Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
21
+ import cliProgress from 'cli-progress';
22
+ import colors from 'ansi-colors';
23
+ import { EvalDatasetClient } from './data/datasets/eval-dataset-client.js';
20
24
  // Load environment variables
21
25
  dotenv.config();
22
26
  /**
@@ -170,143 +174,10 @@ export class JudgmentClient {
170
174
  * Evaluate a dataset
171
175
  */
172
176
  evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
173
- return __awaiter(this, arguments, void 0, function* (dataset, // EvalDataset would be implemented separately
174
- scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
175
- try {
176
- // Load appropriate implementations for all scorers
177
- const loadedScorers = [];
178
- for (const scorer of scorers) {
179
- try {
180
- if (scorer instanceof ScorerWrapper) {
181
- loadedScorers.push(scorer.loadImplementation(useJudgment));
182
- }
183
- else {
184
- // Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
185
- loadedScorers.push(scorer);
186
- }
187
- }
188
- catch (error) {
189
- throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
190
- }
191
- }
192
- // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
193
- if (rules && loadedScorers.some(scorer => scorer instanceof JudgevalScorer)) {
194
- throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
195
- }
196
- // Convert ScorerWrapper in rules to their implementations
197
- let loadedRules;
198
- if (rules) {
199
- loadedRules = [];
200
- for (const rule of rules) {
201
- try {
202
- const processedConditions = [];
203
- for (const condition of rule.conditions) {
204
- // Convert metric if it's a ScorerWrapper
205
- if (condition.metric instanceof ScorerWrapper) {
206
- try {
207
- const loadedMetric = condition.metric.loadImplementation(useJudgment);
208
- const newCondition = new Condition(loadedMetric);
209
- Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
210
- processedConditions.push(newCondition);
211
- }
212
- catch (error) {
213
- throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
214
- }
215
- }
216
- else {
217
- processedConditions.push(condition);
218
- }
219
- }
220
- // Create new rule with processed conditions
221
- const newRule = new Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
222
- loadedRules.push(newRule);
223
- }
224
- catch (error) {
225
- throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
226
- }
227
- }
228
- }
229
- const evaluationRun = new EvaluationRun({
230
- logResults,
231
- projectName,
232
- evalName: evalRunName,
233
- examples: dataset.examples, // Assuming dataset has an 'examples' property
234
- scorers: loadedScorers,
235
- model,
236
- aggregator,
237
- metadata,
238
- judgmentApiKey: this.judgmentApiKey,
239
- rules: loadedRules,
240
- organizationId: this.organizationId
241
- });
242
- // Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
243
- return runEval(evaluationRun, false, true, false);
244
- }
245
- catch (error) {
246
- if (error instanceof Error) {
247
- if (error.message.includes('one or more fields are invalid')) {
248
- throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
249
- }
250
- else {
251
- throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
252
- }
253
- }
254
- else {
255
- throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
256
- }
257
- }
258
- });
259
- }
260
- /**
261
- * Create a dataset
262
- */
263
- createDataset() {
264
- // This would be implemented with EvalDataset
265
- throw new Error('Not implemented yet');
266
- }
267
- /**
268
- * Push a dataset to the Judgment platform
269
- */
270
- pushDataset(alias_1, dataset_1, projectName_1) {
271
- return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
272
- // This would be implemented with EvalDataset
273
- throw new Error('Not implemented yet');
274
- });
275
- }
276
- /**
277
- * Pull a dataset from the Judgment platform
278
- */
279
- pullDataset(alias, projectName) {
280
- return __awaiter(this, void 0, void 0, function* () {
281
- // This would be implemented with EvalDataset
282
- throw new Error('Not implemented yet');
283
- });
284
- }
285
- /**
286
- * Delete a dataset from the Judgment platform
287
- */
288
- deleteDataset(alias, projectName) {
289
- return __awaiter(this, void 0, void 0, function* () {
290
- // This would be implemented with EvalDataset
291
- throw new Error('Not implemented yet');
292
- });
293
- }
294
- /**
295
- * Pull project dataset stats from the Judgment platform
296
- */
297
- pullProjectDatasetStats(projectName) {
298
- return __awaiter(this, void 0, void 0, function* () {
299
- // This would be implemented with EvalDataset
300
- throw new Error('Not implemented yet');
301
- });
302
- }
303
- /**
304
- * Insert examples into a dataset on the Judgment platform
305
- */
306
- insertDataset(alias, examples, projectName) {
307
- return __awaiter(this, void 0, void 0, function* () {
308
- // This would be implemented with EvalDataset
309
- throw new Error('Not implemented yet');
177
+ return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
178
+ scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
179
+ // Keep type loose for stub
180
+ throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
310
181
  });
311
182
  }
312
183
  /**
@@ -315,39 +186,29 @@ export class JudgmentClient {
315
186
  * @param evalRunName Name of the evaluation run
316
187
  * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
317
188
  */
318
- pullEval(projectName, evalRunName // Consistent parameter name, but API uses eval_name
319
- ) {
189
+ pullEval(projectName, evalRunName) {
320
190
  return __awaiter(this, void 0, void 0, function* () {
321
- var _a, _b, _c, _d;
322
- // Body matches Python's structure for this endpoint
323
191
  const evalRunRequestBody = {
324
192
  project_name: projectName,
325
- eval_name: evalRunName, // Use eval_name in the body for the API
193
+ eval_name: evalRunName,
326
194
  judgment_api_key: this.judgmentApiKey
327
195
  };
328
196
  try {
329
- const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, // Use constant
330
- evalRunRequestBody, {
331
- headers: {
332
- 'Content-Type': 'application/json',
333
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
334
- 'X-Organization-Id': this.organizationId
335
- }
197
+ const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
198
+ headers: this.getAuthHeaders()
336
199
  });
337
- // Process the response to match the Python SDK's format
338
- // Python returns [{ 'id': ..., 'results': [ScoringResult, ...]}]
339
- // The API response is a list of results, each with an 'id' and 'result'
340
200
  if (!Array.isArray(response.data) || response.data.length === 0) {
341
- return [{ id: '', results: [] }]; // Return empty structure if no data
201
+ return [{ id: '', results: [] }];
342
202
  }
343
- const evalRunResult = { id: '', results: [] };
344
- evalRunResult.id = ((_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.id) || ''; // Assume ID is same for all results in run
203
+ const evalRunResult = [{}];
345
204
  for (const result of response.data) {
205
+ const resultId = result.id || '';
346
206
  const resultData = result.result || {};
207
+ // Extract data object from result data
347
208
  const dataObject = resultData.data_object || {};
348
- // Create Example from data_object
209
+ // Create Example with required input field
349
210
  const example = new Example({
350
- input: dataObject.input,
211
+ input: dataObject.input || '',
351
212
  actualOutput: dataObject.actual_output,
352
213
  expectedOutput: dataObject.expected_output,
353
214
  context: dataObject.context,
@@ -357,110 +218,21 @@ export class JudgmentClient {
357
218
  expectedTools: dataObject.expected_tools,
358
219
  exampleId: dataObject.example_id,
359
220
  exampleIndex: dataObject.example_index,
360
- timestamp: dataObject.timestamp
361
- });
362
- // Create ScoringResult
363
- const scoringResult = new ScoringResult({
364
- dataObject: example,
365
- scorersData: resultData.scorers_data || [],
366
- error: resultData.error
221
+ timestamp: dataObject.timestamp,
222
+ example: dataObject.example // Include example boolean
367
223
  });
368
- evalRunResult.results.push(scoringResult);
369
- }
370
- return [evalRunResult]; // Wrap in array to match Python return type [{...}]
371
- }
372
- catch (error) {
373
- if (axios.isAxiosError(error)) {
374
- const statusCode = (_b = error.response) === null || _b === void 0 ? void 0 : _b.status;
375
- const errorMessage = ((_d = (_c = error.response) === null || _c === void 0 ? void 0 : _c.data) === null || _d === void 0 ? void 0 : _d.detail) || error.message;
376
- throw new Error(`Failed to pull evaluation results: ${statusCode} - ${errorMessage}`);
377
- }
378
- if (error instanceof Error) {
379
- throw new Error(`Failed to pull evaluation results: ${error.message}`);
380
- }
381
- throw new Error(`Failed to pull evaluation results: ${String(error)}`);
382
- }
383
- });
384
- }
385
- /**
386
- * Get evaluation run results (alias for pullEval with a more intuitive name)
387
- * @param projectName Name of the project
388
- * @param evalRunName Name of the evaluation run
389
- * @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
390
- */
391
- getEvalRun(projectName, evalRunName) {
392
- return __awaiter(this, void 0, void 0, function* () {
393
- return this.pullEval(projectName, evalRunName);
394
- });
395
- }
396
- /**
397
- * List all evaluation runs for a project
398
- * @param projectName Name of the project
399
- * @param limit Maximum number of evaluation runs to return (default: 100)
400
- * @param offset Offset for pagination (default: 0)
401
- * @returns List of evaluation run metadata
402
- */
403
- listEvalRuns(projectName_1) {
404
- return __awaiter(this, arguments, void 0, function* (projectName, limit = 100, offset = 0) {
405
- var _a, _b, _c;
406
- try {
407
- // Use ROOT_API for the base URL
408
- const url = `${ROOT_API}/projects/${projectName}/eval-runs`;
409
- const response = yield axios.get(url, {
410
- params: {
411
- limit,
412
- offset
413
- },
414
- headers: {
415
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
416
- 'X-Organization-Id': this.organizationId
417
- }
418
- });
419
- return response.data || [];
420
- }
421
- catch (error) {
422
- if (axios.isAxiosError(error)) {
423
- const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
424
- const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
425
- throw new Error(`Failed to list evaluation runs: ${statusCode} - ${errorMessage}`);
426
- }
427
- if (error instanceof Error) {
428
- throw new Error(`Failed to list evaluation runs: ${error.message}`);
429
- }
430
- throw new Error(`Failed to list evaluation runs: ${String(error)}`);
431
- }
432
- });
433
- }
434
- /**
435
- * Get evaluation run statistics
436
- * @param projectName Name of the project
437
- * @param evalRunName Name of the evaluation run
438
- * @returns Statistics for the evaluation run
439
- */
440
- getEvalRunStats(projectName, evalRunName) {
441
- return __awaiter(this, void 0, void 0, function* () {
442
- var _a, _b, _c;
443
- try {
444
- // Use ROOT_API for the base URL
445
- const url = `${ROOT_API}/projects/${projectName}/eval-runs/${evalRunName}/stats`;
446
- const response = yield axios.get(url, {
447
- headers: {
448
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
449
- 'X-Organization-Id': this.organizationId
450
- }
451
- });
452
- return response.data || {};
224
+ evalRunResult[0].id = resultId;
225
+ evalRunResult[0].results = [new ScoringResult({
226
+ dataObject: example,
227
+ scorersData: resultData.scorers_data || [],
228
+ error: resultData.error
229
+ })];
230
+ }
231
+ return evalRunResult;
453
232
  }
454
233
  catch (error) {
455
- if (axios.isAxiosError(error)) {
456
- const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
457
- const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
458
- throw new Error(`Failed to get evaluation run statistics: ${statusCode} - ${errorMessage}`);
459
- }
460
- if (error instanceof Error) {
461
- throw new Error(`Failed to get evaluation run statistics: ${error.message}`);
462
- }
463
- throw new Error(`Failed to get evaluation run statistics: ${String(error)}`);
234
+ this.handleApiError(error, 'pullEval');
235
+ throw error;
464
236
  }
465
237
  });
466
238
  }
@@ -473,92 +245,67 @@ export class JudgmentClient {
473
245
  */
474
246
  exportEvalResults(projectName_1, evalRunName_1) {
475
247
  return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
248
+ logger.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
476
249
  try {
477
- const evalRunArray = yield this.pullEval(projectName, evalRunName);
478
- const evalRunData = evalRunArray[0]; // Get the first element containing id and results
479
- if (!evalRunData || !evalRunData.results) {
480
- return format === 'json' ? JSON.stringify([], null, 2) : 'No results found';
250
+ const resultsData = yield this.pullEval(projectName, evalRunName);
251
+ if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
252
+ logger.warn('No results found to export.');
253
+ return '';
481
254
  }
255
+ const results = resultsData[0].results;
482
256
  if (format === 'json') {
483
- // Return the whole structure including ID and results array
484
- return JSON.stringify(evalRunData, null, 2);
257
+ // Pretty print JSON
258
+ return JSON.stringify(results.map(r => r.toJSON()), null, 2);
485
259
  }
486
260
  else if (format === 'csv') {
487
- const results = evalRunData.results;
488
- if (!Array.isArray(results) || results.length === 0) {
489
- return 'No results found';
490
- }
491
- // Use csv-writer instead of json2csv
492
- let createObjectCsvStringifier;
493
- try {
494
- // Use dynamic import() for ES Modules
495
- const csvWriterModule = yield import('csv-writer');
496
- createObjectCsvStringifier = csvWriterModule.createObjectCsvStringifier;
497
- if (!createObjectCsvStringifier) { // Check if the function exists
498
- throw new Error("Could not load createObjectCsvStringifier from csv-writer");
261
+ if (results.length === 0)
262
+ return ''; // No data to export
263
+ // Dynamically determine headers from the first result object
264
+ // Flatten the structure for CSV
265
+ const flatResults = results.map(result => {
266
+ var _a, _b, _c;
267
+ const flat = {};
268
+ const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
269
+ const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
270
+ // Add example data fields (snake_case)
271
+ for (const key in exampleData) {
272
+ // Prefix example fields to avoid collision, e.g., example_input
273
+ flat[`example_${key}`] = exampleData[key];
499
274
  }
500
- }
501
- catch (e) {
502
- // Provide a more helpful error message
503
- const errorMsg = e instanceof Error ? e.message : String(e);
504
- // Update error message to reflect import() failure
505
- console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
506
- throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
507
- }
508
- try {
509
- // Flatten the structure slightly for better CSV output
510
- const processedResults = results.map((result) => {
511
- // Flatten dataObject properties and scorersData
512
- const flatResult = {};
513
- flatResult.eval_run_id = evalRunData.id; // Add eval run ID
514
- // Flatten dataObject
515
- if (result.dataObject) {
516
- for (const [key, value] of Object.entries(result.dataObject)) {
517
- // Prefix with 'data_' to avoid potential clashes
518
- flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
519
- }
520
- }
521
- // Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
522
- if (Array.isArray(result.scorersData)) {
523
- result.scorersData.forEach((scorerData, index) => {
524
- flatResult[`scorer_${index}_name`] = scorerData.name;
525
- flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
526
- flatResult[`scorer_${index}_error`] = scorerData.error;
527
- // Add other scorer fields if necessary, e.g., metadata
528
- if (scorerData.additional_metadata) {
529
- flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
530
- }
531
- });
532
- }
533
- flatResult.error = result.error; // Top-level error for the example processing
534
- return flatResult;
535
- });
536
- // Define headers dynamically based on the keys of the first processed result
537
- if (processedResults.length === 0) {
538
- return 'No data to export after processing.'; // Handle case with no valid results after processing
539
- }
540
- const headers = Object.keys(processedResults[0]).map(key => ({ id: key, title: key }));
541
- const csvStringifier = createObjectCsvStringifier({
542
- header: headers
275
+ // Add scorers data
276
+ scorersData.forEach(scorer => {
277
+ flat[`scorer_${scorer.name}_score`] = scorer.score;
278
+ flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
279
+ flat[`scorer_${scorer.name}_error`] = scorer.error;
543
280
  });
544
- // Generate CSV string (header + records)
545
- return csvStringifier.getHeaderString() + csvStringifier.stringifyRecords(processedResults);
546
- }
547
- catch (error) {
548
- console.error('Error converting to CSV:', error);
549
- const errorMessage = error instanceof Error ? error.message : String(error);
550
- return `Error generating CSV: ${errorMessage}`;
551
- }
281
+ // Add top-level error if present
282
+ flat['top_level_error'] = result.error;
283
+ return flat;
284
+ });
285
+ // Get all unique keys from the flattened results for headers
286
+ const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
287
+ // Use papaparse for robust CSV generation
288
+ const Papa = require('papaparse'); // Use require here if not imported at top
289
+ const csv = Papa.unparse({
290
+ fields: headers,
291
+ data: flatResults
292
+ }, {
293
+ header: true,
294
+ quotes: true, // Ensure fields with commas/newlines are quoted
295
+ quoteChar: '"',
296
+ escapeChar: '"',
297
+ delimiter: ','
298
+ });
299
+ return csv;
552
300
  }
553
301
  else {
554
302
  throw new Error(`Unsupported export format: ${format}`);
555
303
  }
556
304
  }
557
305
  catch (error) {
558
- if (error instanceof Error) {
559
- throw new Error(`Failed to export evaluation results: ${error.message}`);
560
- }
561
- throw new Error(`Failed to export evaluation results: ${String(error)}`);
306
+ logger.error(`Error exporting eval results: ${error}`);
307
+ this.handleApiError(error, 'exportEvalResults');
308
+ throw error;
562
309
  }
563
310
  });
564
311
  }
@@ -567,47 +314,23 @@ export class JudgmentClient {
567
314
  */
568
315
  deleteEval(projectName, evalRunNames) {
569
316
  return __awaiter(this, void 0, void 0, function* () {
570
- var _a, _b;
571
- if (!evalRunNames || evalRunNames.length === 0) {
572
- throw new Error('No evaluation run names provided');
573
- }
574
- // Body matches Python's structure for this endpoint
575
- const evalRunRequestBody = {
317
+ logger.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
318
+ const requestBody = {
576
319
  project_name: projectName,
577
320
  eval_names: evalRunNames,
578
- judgment_api_key: this.judgmentApiKey // Required by this specific API endpoint
321
+ judgment_api_key: this.judgmentApiKey,
579
322
  };
580
323
  try {
581
- const response = yield axios.delete(JUDGMENT_EVAL_DELETE_API_URL, // Use constant
582
- {
583
- data: evalRunRequestBody,
584
- headers: {
585
- 'Content-Type': 'application/json',
586
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
587
- 'X-Organization-Id': this.organizationId
588
- }
324
+ yield axios.post(JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
325
+ headers: this.getAuthHeaders()
589
326
  });
590
- return Boolean(response.data);
327
+ logger.info('Successfully deleted eval runs.');
328
+ return true;
591
329
  }
592
330
  catch (error) {
593
- if (axios.isAxiosError(error)) {
594
- const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
595
- const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
596
- if (status === 404) {
597
- throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
598
- }
599
- else if (status === 500) {
600
- throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
601
- }
602
- else {
603
- throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
604
- }
605
- }
606
- // Rethrow original or wrapped error
607
- if (error instanceof Error) {
608
- throw new Error(`Error deleting eval results: ${error.message}`);
609
- }
610
- throw new Error(`Error deleting eval results: ${String(error)}`);
331
+ logger.error(`Error deleting eval runs: ${error}`);
332
+ this.handleApiError(error, 'deleteEval');
333
+ return false;
611
334
  }
612
335
  });
613
336
  }
@@ -616,43 +339,22 @@ export class JudgmentClient {
616
339
  */
617
340
  deleteProjectEvals(projectName) {
618
341
  return __awaiter(this, void 0, void 0, function* () {
619
- var _a, _b;
342
+ logger.info(`Deleting ALL eval runs for project: ${projectName}`);
343
+ const requestBody = {
344
+ project_name: projectName,
345
+ judgment_api_key: this.judgmentApiKey,
346
+ };
620
347
  try {
621
- const response = yield axios.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL, // Use constant
622
- {
623
- // Remove judgment_api_key from body to match Python (uses header auth)
624
- data: {
625
- project_name: projectName,
626
- },
627
- headers: {
628
- 'Content-Type': 'application/json',
629
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
630
- 'X-Organization-Id': this.organizationId
631
- }
348
+ yield axios.post(JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
349
+ headers: this.getAuthHeaders()
632
350
  });
633
- // Python returns response.json(), check if TS response needs similar handling
634
- return Boolean(response.data); // Assuming response.data indicates success
351
+ logger.info(`Successfully deleted all eval runs for project ${projectName}.`);
352
+ return true;
635
353
  }
636
354
  catch (error) {
637
- if (axios.isAxiosError(error)) {
638
- const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
639
- const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
640
- if (status === 404) {
641
- // Assuming 404 might mean project not found or no evals to delete
642
- console.warn(`Project '${projectName}' not found or no evals to delete.`);
643
- return false; // Or true depending on desired idempotency behavior
644
- }
645
- else if (status === 500) {
646
- throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
647
- }
648
- else {
649
- throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
650
- }
651
- }
652
- if (error instanceof Error) {
653
- throw new Error(`Error deleting project evals: ${error.message}`);
654
- }
655
- throw new Error(`Error deleting project evals: ${String(error)}`);
355
+ logger.error(`Error deleting project evals: ${error}`);
356
+ this.handleApiError(error, 'deleteProjectEvals');
357
+ return false;
656
358
  }
657
359
  });
658
360
  }
@@ -661,37 +363,34 @@ export class JudgmentClient {
661
363
  */
662
364
  createProject(projectName) {
663
365
  return __awaiter(this, void 0, void 0, function* () {
366
+ logger.info(`Creating project: ${projectName}`);
367
+ const requestBody = {
368
+ project_name: projectName,
369
+ judgment_api_key: this.judgmentApiKey,
370
+ };
664
371
  try {
665
- const response = yield axios.post(JUDGMENT_PROJECT_CREATE_API_URL, // Use constant
666
- // Remove judgment_api_key from body to match Python (uses header auth)
667
- {
668
- project_name: projectName,
669
- }, {
670
- headers: {
671
- 'Content-Type': 'application/json',
672
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
673
- 'X-Organization-Id': this.organizationId
674
- }
372
+ const response = yield axios.post(JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
373
+ headers: this.getAuthHeaders()
675
374
  });
676
- // Python returns response.json(), check if TS response needs similar handling
677
- return Boolean(response.data); // Assuming response.data indicates success
678
- }
679
- catch (error) {
680
- if (axios.isAxiosError(error) && error.response) {
681
- // Check for specific conflict error (e.g., 409) if API provides it
682
- if (error.response.status === 409) {
683
- console.warn(`Project '${projectName}' already exists.`);
684
- return false; // Or true if idempotent creation is desired
685
- }
686
- throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
375
+ // Check for specific success message or status if API provides one
376
+ if (response.data && response.data.message === 'Project added successfully') {
377
+ logger.info(`Successfully created project: ${projectName}`);
378
+ return true;
687
379
  }
688
- else if (error instanceof Error) {
689
- throw new Error(`Error creating project: ${error.message}`);
380
+ else if (response.data && response.data.message === 'Project already exists') {
381
+ logger.warn(`Project '${projectName}' already exists.`);
382
+ return true; // Or false, depending on desired behavior for existing projects
690
383
  }
691
384
  else {
692
- throw new Error(`Error creating project: ${String(error)}`);
385
+ logger.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
386
+ return false;
693
387
  }
694
388
  }
389
+ catch (error) {
390
+ logger.error(`Error creating project: ${error}`);
391
+ this.handleApiError(error, 'createProject');
392
+ return false;
393
+ }
695
394
  });
696
395
  }
697
396
  /**
@@ -699,37 +398,29 @@ export class JudgmentClient {
699
398
  */
700
399
  deleteProject(projectName) {
701
400
  return __awaiter(this, void 0, void 0, function* () {
401
+ logger.info(`Deleting project: ${projectName}`);
402
+ const requestBody = {
403
+ project_name: projectName,
404
+ judgment_api_key: this.judgmentApiKey,
405
+ };
702
406
  try {
703
- const response = yield axios.delete(JUDGMENT_PROJECT_DELETE_API_URL, // Use constant
704
- {
705
- // Remove judgment_api_key from body to match Python (uses header auth)
706
- data: {
707
- project_name: projectName,
708
- },
709
- headers: {
710
- 'Content-Type': 'application/json',
711
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
712
- 'X-Organization-Id': this.organizationId
713
- }
407
+ const response = yield axios.post(JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
408
+ headers: this.getAuthHeaders()
714
409
  });
715
- // Python returns response.json(), check if TS response needs similar handling
716
- return Boolean(response.data); // Assuming response.data indicates success
717
- }
718
- catch (error) {
719
- if (axios.isAxiosError(error) && error.response) {
720
- if (error.response.status === 404) {
721
- console.warn(`Project '${projectName}' not found for deletion.`);
722
- return false; // Or true depending on desired idempotency
723
- }
724
- throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
725
- }
726
- else if (error instanceof Error) {
727
- throw new Error(`Error deleting project: ${error.message}`);
410
+ if (response.data && response.data.message === 'Project deleted successfully') {
411
+ logger.info(`Successfully deleted project: ${projectName}`);
412
+ return true;
728
413
  }
729
414
  else {
730
- throw new Error(`Error deleting project: ${String(error)}`);
415
+ logger.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
416
+ return false;
731
417
  }
732
418
  }
419
+ catch (error) {
420
+ logger.error(`Error deleting project: ${error}`);
421
+ this.handleApiError(error, 'deleteProject');
422
+ return false;
423
+ }
733
424
  });
734
425
  }
735
426
  /**
@@ -737,35 +428,36 @@ export class JudgmentClient {
737
428
  */
738
429
  validateApiKey() {
739
430
  return __awaiter(this, void 0, void 0, function* () {
740
- var _a, _b;
431
+ var _a, _b, _c, _d;
432
+ logger.debug('Validating API Key...');
741
433
  try {
742
- const response = yield axios.post(`${ROOT_API}/validate_api_key/`, // Use ROOT_API
743
- {}, // Empty body
744
- {
745
- headers: {
746
- 'Content-Type': 'application/json',
747
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
748
- // Removed 'X-Organization-Id' header to match Python for this specific endpoint
749
- }
750
- });
751
- if (response.status === 200) {
752
- return [true, JSON.stringify(response.data)];
753
- }
754
- else {
755
- // Status might be non-200 but still valid JSON error response
756
- return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
757
- }
434
+ // Instantiate EvalDatasetClient to perform the validation call
435
+ const datasetClient = new EvalDatasetClient(this.judgmentApiKey, this.organizationId);
436
+ // Use the dataset client to make the call
437
+ yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
438
+ logger.debug('API Key appears valid.');
439
+ return [true, 'API Key is valid.'];
758
440
  }
759
441
  catch (error) {
760
- if (axios.isAxiosError(error) && error.response) {
761
- return [false, ((_b = error.response.data) === null || _b === void 0 ? void 0 : _b.detail) || `Error validating API key (Status: ${error.response.status})`];
762
- }
763
- else if (error instanceof Error) {
764
- return [false, `Error validating API key: ${error.message}`];
442
+ let message = 'API Key validation failed.';
443
+ if (axios.isAxiosError(error)) {
444
+ if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
445
+ message = 'API Key is invalid or expired.';
446
+ }
447
+ else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
448
+ // If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
449
+ // This depends on the specific validation endpoint behavior
450
+ message = 'API Key might be valid, but validation endpoint returned 404.';
451
+ }
452
+ else {
453
+ message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
454
+ }
765
455
  }
766
456
  else {
767
- return [false, `Unknown error validating API key: ${String(error)}`];
457
+ message = `API Key validation failed: ${String(error)}`;
768
458
  }
459
+ logger.error(message);
460
+ return [false, message];
769
461
  }
770
462
  });
771
463
  }
@@ -790,17 +482,12 @@ export class JudgmentClient {
790
482
  */
791
483
  pullEvalResults(projectName, evalRunName) {
792
484
  return __awaiter(this, void 0, void 0, function* () {
793
- var _a;
794
- try {
795
- const evalRunArray = yield this.pullEval(projectName, evalRunName);
796
- // pullEval returns [{ id: ..., results: [...] }], extract results
797
- return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
798
- }
799
- catch (error) {
800
- // Log error but return empty array to allow waitForEvaluation to potentially retry
801
- logger.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
485
+ const rawResults = yield this.pullEval(projectName, evalRunName);
486
+ if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
802
487
  return [];
803
488
  }
489
+ // Assuming pullEval correctly returns results in the expected format
490
+ return rawResults[0].results;
804
491
  });
805
492
  }
806
493
  /**
@@ -812,88 +499,72 @@ export class JudgmentClient {
812
499
  */
813
500
  checkEvalStatus(projectName, evalRunName) {
814
501
  return __awaiter(this, void 0, void 0, function* () {
815
- var _a, _b;
816
- // Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
502
+ var _a;
817
503
  const requestBody = {
818
504
  project_name: projectName,
819
- eval_name: evalRunName, // Use 'eval_name'
505
+ eval_name: evalRunName,
820
506
  judgment_api_key: this.judgmentApiKey,
821
507
  };
822
508
  try {
823
- const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, // Use fetch URL
824
- requestBody, {
825
- headers: {
826
- 'Content-Type': 'application/json',
827
- 'Authorization': `Bearer ${this.judgmentApiKey}`,
828
- 'X-Organization-Id': this.organizationId
829
- },
830
- timeout: 15000 // Slightly increased timeout for status checks
509
+ const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
510
+ headers: this.getAuthHeaders(),
511
+ // Add a shorter timeout for status checks?
512
+ // timeout: 5000
831
513
  });
832
- // Interpret response: API might return status object or full results array
833
- let statusData = { status: 'unknown', progress: 0, message: '' };
834
- if (Array.isArray(response.data)) {
835
- // If it's an array, assume results are complete unless explicitly stated otherwise
836
- if (response.data.length > 0 && ((_b = (_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.result) === null || _b === void 0 ? void 0 : _b.status)) {
837
- // Check if the first result object contains status info
838
- statusData = response.data[0].result; // Assuming status is within the 'result' field
839
- }
840
- else if (response.data.length > 0) {
841
- // Assume complete if we get results array without specific status fields
842
- statusData = { status: 'complete', progress: 1.0, message: 'Results received' };
843
- }
844
- else {
845
- // Empty array might mean still processing or no results yet
846
- statusData = { status: 'processing', progress: 0, message: 'Waiting for results...' };
847
- }
848
- }
849
- else if (typeof response.data === 'object' && response.data !== null && response.data.status) {
850
- // Might be a direct status object from the API
851
- statusData = response.data;
852
- }
514
+ const data = response.data;
515
+ // Check if the response looks like a status object
516
+ if (data && typeof data.status === 'string') {
517
+ return {
518
+ status: data.status || 'unknown',
519
+ progress: typeof data.progress === 'number' ? data.progress : 0,
520
+ message: data.message || '',
521
+ error: data.error
522
+ };
523
+ }
524
+ // Check if the response looks like completed results (array format from pullEval)
525
+ else if (Array.isArray(data) && data.length > 0 && data[0].results) {
526
+ return {
527
+ status: 'completed',
528
+ progress: 100,
529
+ message: 'Evaluation completed.'
530
+ };
531
+ }
532
+ // Check if response looks like completed results (single object format)
533
+ else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
534
+ return {
535
+ status: 'completed',
536
+ progress: 100,
537
+ message: 'Evaluation completed.'
538
+ };
539
+ }
540
+ // Handle other potential responses or assume pending/unknown
853
541
  else {
854
- // Unexpected response format
855
- statusData = { status: 'unknown', progress: 0, message: `Unexpected response format: ${JSON.stringify(response.data)}` };
856
- }
857
- // Normalize the progress value
858
- let progress = 0;
859
- if (statusData.progress !== undefined && statusData.progress !== null) {
860
- const parsedProgress = parseFloat(statusData.progress);
861
- if (!isNaN(parsedProgress)) {
862
- progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
863
- }
542
+ logger.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
543
+ return {
544
+ status: 'unknown',
545
+ progress: 0,
546
+ message: 'Could not determine status from API response.'
547
+ };
864
548
  }
865
- const normalizedStatus = {
866
- status: statusData.status || 'unknown',
867
- progress: progress,
868
- message: statusData.message || '',
869
- error: statusData.error // Include error field if present
870
- };
871
- // Only log status if it's not being called from waitForEvaluation
872
- // Check stack trace for caller function name
873
- const stack = new Error().stack;
874
- const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
875
- if (!isCalledByWaitForEvaluation) {
876
- // Use logger for status updates when called directly
877
- logger.info(`Evaluation Status: ${normalizedStatus.status}`);
878
- logger.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
879
- if (normalizedStatus.message) {
880
- logger.info(`Message: ${normalizedStatus.message}`);
881
- }
882
- if (normalizedStatus.error) {
883
- logger.error(`Error in status: ${normalizedStatus.error}`);
884
- }
885
- }
886
- return normalizedStatus;
887
549
  }
888
550
  catch (error) {
889
- // Don't throw errors from status check, just return default 'unknown' status
890
- // This allows waitForEvaluation to continue polling even on transient network issues
891
- const errorMessage = error instanceof Error ? error.message : String(error);
892
- logger.error(`Error checking evaluation status for '${evalRunName}': ${errorMessage}`);
551
+ // Don't throw here, return status indicating error
552
+ let errorMessage = 'Failed to fetch evaluation status.';
553
+ let status = 'error';
554
+ if (axios.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
555
+ status = 'not_found';
556
+ errorMessage = 'Evaluation run not found.';
557
+ logger.warn(`Evaluation run ${evalRunName} not found.`);
558
+ }
559
+ else {
560
+ this.handleApiError(error, 'checkEvalStatus');
561
+ errorMessage = `Error fetching status: ${String(error)}`;
562
+ }
893
563
  return {
894
- status: 'unknown',
564
+ status: status,
895
565
  progress: 0,
896
- message: `Error checking status: ${errorMessage}`
566
+ message: errorMessage,
567
+ error: String(error) // Include error string
897
568
  };
898
569
  }
899
570
  });
@@ -907,105 +578,79 @@ export class JudgmentClient {
907
578
  */
908
579
  waitForEvaluation(projectName_1, evalRunName_1) {
909
580
  return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
910
- const { intervalMs = 3000, // Slightly longer interval
911
- maxAttempts = 200, // ~10 minutes total wait time (200 * 3s)
912
- showProgress = true } = options;
913
- let attempts = 0;
914
- let lastProgressPercent = -1;
915
- let lastStatus = '';
581
+ const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
582
+ let progressBar;
916
583
  if (showProgress) {
917
- // Use logger for initial message
918
- logger.info(`Waiting for evaluation "${evalRunName}" in project "${projectName}" to complete...`);
584
+ progressBar = new cliProgress.SingleBar({
585
+ format: `Waiting for ${colors.magenta(evalRunName)}... | ${colors.cyan('{bar}')} | {percentage}% || {status}`,
586
+ barCompleteChar: '\u2588',
587
+ barIncompleteChar: '\u2591',
588
+ hideCursor: true,
589
+ clearOnComplete: false,
590
+ stopOnComplete: true,
591
+ }, cliProgress.Presets.shades_classic);
592
+ progressBar.start(100, 0, { status: 'Initiating...' });
919
593
  }
920
- while (attempts < maxAttempts) {
921
- attempts++;
594
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
922
595
  try {
923
- const status = yield this.checkEvalStatus(projectName, evalRunName); // Call internal status check
924
- const currentProgressPercent = Math.round(status.progress * 100);
925
- // Show progress/status updates only when they change significantly
926
- if (showProgress && (currentProgressPercent !== lastProgressPercent || status.status !== lastStatus)) {
927
- const progressBar = this._createProgressBar(currentProgressPercent >= 0 ? currentProgressPercent : 0);
928
- // Use process.stdout.write to potentially overwrite the line (works best in standard terminals)
929
- process.stdout.write('\rAttempt ' + attempts + '/' + maxAttempts + ' | Status: ' + status.status + ' | Progress: ' + progressBar + ' ' + currentProgressPercent + '% ');
930
- lastProgressPercent = currentProgressPercent;
931
- lastStatus = status.status;
932
- }
933
- // Check evaluation status
934
- if (status.status === 'complete') {
935
- if (showProgress) {
936
- process.stdout.write('\n'); // Keep direct console output for progress bar newline
937
- // Use logger for status update
938
- logger.info('Evaluation complete! Fetching results...');
939
- }
940
- try {
941
- // Use the dedicated results fetching method
942
- const results = yield this.pullEvalResults(projectName, evalRunName);
943
- if (results.length > 0) {
944
- // Use logger for status update
945
- logger.info(`Successfully fetched ${results.length} results.`);
946
- return results;
947
- }
948
- else {
949
- // If complete status but no results, might be an issue. Log and return empty.
950
- logger.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
951
- return [];
952
- }
953
- }
954
- catch (fetchError) {
955
- if (showProgress)
956
- process.stdout.write('\n'); // Keep direct console output
957
- logger.error(`Error fetching results after completion for '${evalRunName}': ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
958
- return []; // Return empty array on error
959
- }
960
- }
961
- else if (status.status === 'failed') {
962
- if (showProgress)
963
- process.stdout.write('\n'); // Keep direct console output
964
- logger.error(`Evaluation failed for '${evalRunName}': ${status.error || status.message || 'Unknown error'}`);
965
- return []; // Return empty array on failure
966
- }
967
- else if (status.status === 'unknown') {
968
- // Log unknown status but continue polling
969
- // Avoid flooding logs if status remains unknown
970
- if (lastStatus !== 'unknown') {
971
- if (showProgress)
972
- process.stdout.write('\n'); // Keep direct console output
973
- logger.warn(`Evaluation status unknown for '${evalRunName}' (attempt ${attempts}). Retrying...`);
974
- lastProgressPercent = -1; // Reset progress display
596
+ const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
597
+ const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
598
+ const statusText = statusResult.message || statusResult.status;
599
+ if (progressBar) {
600
+ progressBar.update(progress, { status: statusText });
601
+ }
602
+ if (statusResult.status === 'completed') {
603
+ if (progressBar) {
604
+ progressBar.update(100, { status: colors.green('Completed! Fetching results...') });
975
605
  }
976
- lastStatus = 'unknown';
977
- }
978
- else {
979
- // Still processing (e.g., 'processing', 'running', 'pending')
980
- lastStatus = status.status;
981
- }
606
+ // Fetch final results using pullEval
607
+ const finalResults = yield this.pullEvalResults(projectName, evalRunName);
608
+ logger.info(`Evaluation run ${evalRunName} completed successfully.`);
609
+ return finalResults;
610
+ }
611
+ else if (statusResult.status === 'error' || statusResult.status === 'failed') {
612
+ // Concatenate error details into a single message string
613
+ const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
614
+ logger.error(errorMsg);
615
+ if (progressBar)
616
+ progressBar.stop();
617
+ // Pass only the combined message to the constructor
618
+ throw new JudgmentAPIError(errorMsg);
619
+ }
620
+ else if (statusResult.status === 'not_found') {
621
+ const errorMsg = `Evaluation run ${evalRunName} not found.`;
622
+ logger.error(errorMsg);
623
+ if (progressBar)
624
+ progressBar.stop();
625
+ // Pass only the message to the constructor
626
+ throw new JudgmentAPIError(errorMsg);
627
+ }
628
+ // Wait for the next interval
629
+ yield new Promise(resolve => setTimeout(resolve, intervalMs));
982
630
  }
983
631
  catch (error) {
984
- // Log the error but continue polling (checkEvalStatus should handle internal errors gracefully)
985
- if (showProgress)
986
- process.stdout.write('\n'); // Keep direct console output
987
- logger.error(`Error during status check loop (attempt ${attempts}/${maxAttempts}): ${error instanceof Error ? error.message : String(error)}`);
988
- lastProgressPercent = -1; // Reset progress display
989
- lastStatus = 'error_in_loop'; // Indicate issue in the loop itself
990
- }
991
- // Wait before next poll only if not completed/failed
992
- if (lastStatus !== 'complete' && lastStatus !== 'failed') {
632
+ // Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
633
+ logger.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
634
+ // Option: Rethrow immediately vs. retry vs. specific handling
635
+ if (error instanceof JudgmentAPIError) { // If it was already a processed API error, rethrow
636
+ if (progressBar)
637
+ progressBar.stop();
638
+ throw error;
639
+ }
640
+ // For other errors, wait and retry (up to maxAttempts)
641
+ if (attempt === maxAttempts) {
642
+ if (progressBar)
643
+ progressBar.stop();
644
+ throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
645
+ }
646
+ // Still retryable, wait for interval
993
647
  yield new Promise(resolve => setTimeout(resolve, intervalMs));
994
648
  }
995
- else {
996
- // Break loop if already completed or failed to avoid unnecessary delay
997
- break;
998
- }
999
- } // End while loop
1000
- // If loop finished without completing/failing
1001
- if (lastStatus !== 'complete' && lastStatus !== 'failed') {
1002
- if (showProgress)
1003
- process.stdout.write('\n'); // Keep direct console output
1004
- logger.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
1005
- return []; // Return empty array on timeout
1006
649
  }
1007
- // Should technically be unreachable if break conditions work, but safeguard return
1008
- return [];
650
+ // If loop finishes without completion or error
651
+ if (progressBar)
652
+ progressBar.stop();
653
+ throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
1009
654
  });
1010
655
  }
1011
656
  /**
@@ -1014,12 +659,48 @@ export class JudgmentClient {
1014
659
  * @returns A string representing the progress bar
1015
660
  */
1016
661
  _createProgressBar(percent) {
1017
- const width = 25; // Slightly wider bar
1018
- // Clamp percent between 0 and 100
1019
- const clampedPercent = Math.max(0, Math.min(100, percent));
1020
- const completed = Math.round(width * (clampedPercent / 100)); // Use round for potentially smoother look
1021
- const remaining = width - completed;
1022
- return '[' + '#'.repeat(completed) + '-'.repeat(remaining) + ']'; // Use different chars
662
+ const width = 20; // Width of the progress bar
663
+ const filled = Math.round(width * (percent / 100));
664
+ const empty = width - filled;
665
+ return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
666
+ }
667
+ // Keep helper methods private
668
+ getAuthHeaders() {
669
+ return {
670
+ 'Content-Type': 'application/json',
671
+ 'Authorization': `Bearer ${this.judgmentApiKey}`,
672
+ 'X-Organization-Id': this.organizationId,
673
+ };
674
+ }
675
+ // Ensure this handles errors from Eval/Project API calls correctly
676
+ handleApiError(error, context) {
677
+ logger.error(`API Error during ${context}:`);
678
+ if (axios.isAxiosError(error)) {
679
+ const axiosError = error;
680
+ const response = axiosError.response;
681
+ if (response) {
682
+ logger.error(`Status: ${response.status} ${response.statusText}`);
683
+ logger.debug('Response Data:', response.data);
684
+ if (response.status === 422) {
685
+ logger.error('Validation Error Detail:', response.data);
686
+ }
687
+ else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
688
+ logger.error(`Evaluation run not found.`);
689
+ }
690
+ else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
691
+ logger.warn(`${context}: Resource not found, may have already been deleted.`);
692
+ }
693
+ }
694
+ else if (axiosError.request) {
695
+ logger.error('No response received from server.');
696
+ }
697
+ else {
698
+ logger.error(`Error setting up API request for ${context}`);
699
+ }
700
+ }
701
+ else {
702
+ logger.error(`Unexpected error during ${context}`);
703
+ }
1023
704
  }
1024
705
  }
1025
706
  //# sourceMappingURL=judgment-client.js.map