judgeval 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -68
- package/dist/cjs/common/tracer.js +235 -143
- package/dist/cjs/common/tracer.js.map +1 -1
- package/dist/cjs/constants.js +8 -5
- package/dist/cjs/constants.js.map +1 -1
- package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
- package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/cjs/data/datasets/eval-dataset.js +405 -0
- package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
- package/dist/cjs/data/example.js +22 -1
- package/dist/cjs/data/example.js.map +1 -1
- package/dist/cjs/e2etests/eval-operations.test.js +282 -0
- package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
- package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
- package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/cjs/index.js +1 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/judgment-client.js +326 -645
- package/dist/cjs/judgment-client.js.map +1 -1
- package/dist/cjs/scorers/api-scorer.js +56 -48
- package/dist/cjs/scorers/api-scorer.js.map +1 -1
- package/dist/cjs/scorers/base-scorer.js +66 -11
- package/dist/cjs/scorers/base-scorer.js.map +1 -1
- package/dist/esm/common/tracer.js +236 -144
- package/dist/esm/common/tracer.js.map +1 -1
- package/dist/esm/constants.js +7 -4
- package/dist/esm/constants.js.map +1 -1
- package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
- package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
- package/dist/esm/data/datasets/eval-dataset.js +375 -0
- package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
- package/dist/esm/data/example.js +22 -1
- package/dist/esm/data/example.js.map +1 -1
- package/dist/esm/e2etests/eval-operations.test.js +254 -0
- package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
- package/dist/esm/e2etests/judgee-traces.test.js +253 -0
- package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
- package/dist/esm/index.js +0 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/judgment-client.js +328 -647
- package/dist/esm/judgment-client.js.map +1 -1
- package/dist/esm/scorers/api-scorer.js +56 -48
- package/dist/esm/scorers/api-scorer.js.map +1 -1
- package/dist/esm/scorers/base-scorer.js +66 -11
- package/dist/esm/scorers/base-scorer.js.map +1 -1
- package/dist/types/common/tracer.d.ts +27 -14
- package/dist/types/constants.d.ts +4 -4
- package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
- package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
- package/dist/types/data/example.d.ts +24 -12
- package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
- package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
- package/dist/types/index.d.ts +0 -1
- package/dist/types/judgment-client.d.ts +3 -47
- package/dist/types/scorers/api-scorer.d.ts +15 -15
- package/dist/types/scorers/base-scorer.d.ts +53 -10
- package/package.json +2 -1
- package/dist/cjs/scorers/exact-match-scorer.js +0 -84
- package/dist/cjs/scorers/exact-match-scorer.js.map +0 -1
- package/dist/esm/scorers/exact-match-scorer.js +0 -80
- package/dist/esm/scorers/exact-match-scorer.js.map +0 -1
- package/dist/types/scorers/exact-match-scorer.d.ts +0 -10
|
@@ -14,9 +14,13 @@ import { ScoringResult } from './data/result.js';
|
|
|
14
14
|
import { JudgevalScorer, ScorerWrapper } from './scorers/base-scorer.js';
|
|
15
15
|
import { EvaluationRun } from './evaluation-run.js';
|
|
16
16
|
import { Rule, Condition } from './rules.js';
|
|
17
|
-
import { runEval, assertTest } from './run-evaluation.js';
|
|
18
|
-
import {
|
|
17
|
+
import { runEval, assertTest, JudgmentAPIError } from './run-evaluation.js';
|
|
18
|
+
import { JUDGMENT_EVAL_FETCH_API_URL, JUDGMENT_EVAL_DELETE_API_URL, JUDGMENT_EVAL_DELETE_PROJECT_API_URL, JUDGMENT_PROJECT_DELETE_API_URL, JUDGMENT_PROJECT_CREATE_API_URL, } from './constants.js';
|
|
19
19
|
import logger from './common/logger-instance.js';
|
|
20
|
+
// Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
|
|
21
|
+
import cliProgress from 'cli-progress';
|
|
22
|
+
import colors from 'ansi-colors';
|
|
23
|
+
import { EvalDatasetClient } from './data/datasets/eval-dataset-client.js';
|
|
20
24
|
// Load environment variables
|
|
21
25
|
dotenv.config();
|
|
22
26
|
/**
|
|
@@ -170,143 +174,10 @@ export class JudgmentClient {
|
|
|
170
174
|
* Evaluate a dataset
|
|
171
175
|
*/
|
|
172
176
|
evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
|
|
173
|
-
return __awaiter(this, arguments, void 0, function* (dataset, //
|
|
174
|
-
scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
const loadedScorers = [];
|
|
178
|
-
for (const scorer of scorers) {
|
|
179
|
-
try {
|
|
180
|
-
if (scorer instanceof ScorerWrapper) {
|
|
181
|
-
loadedScorers.push(scorer.loadImplementation(useJudgment));
|
|
182
|
-
}
|
|
183
|
-
else {
|
|
184
|
-
// Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
|
|
185
|
-
loadedScorers.push(scorer);
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
catch (error) {
|
|
189
|
-
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
|
|
193
|
-
if (rules && loadedScorers.some(scorer => scorer instanceof JudgevalScorer)) {
|
|
194
|
-
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
|
|
195
|
-
}
|
|
196
|
-
// Convert ScorerWrapper in rules to their implementations
|
|
197
|
-
let loadedRules;
|
|
198
|
-
if (rules) {
|
|
199
|
-
loadedRules = [];
|
|
200
|
-
for (const rule of rules) {
|
|
201
|
-
try {
|
|
202
|
-
const processedConditions = [];
|
|
203
|
-
for (const condition of rule.conditions) {
|
|
204
|
-
// Convert metric if it's a ScorerWrapper
|
|
205
|
-
if (condition.metric instanceof ScorerWrapper) {
|
|
206
|
-
try {
|
|
207
|
-
const loadedMetric = condition.metric.loadImplementation(useJudgment);
|
|
208
|
-
const newCondition = new Condition(loadedMetric);
|
|
209
|
-
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
|
|
210
|
-
processedConditions.push(newCondition);
|
|
211
|
-
}
|
|
212
|
-
catch (error) {
|
|
213
|
-
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
else {
|
|
217
|
-
processedConditions.push(condition);
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
// Create new rule with processed conditions
|
|
221
|
-
const newRule = new Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
|
|
222
|
-
loadedRules.push(newRule);
|
|
223
|
-
}
|
|
224
|
-
catch (error) {
|
|
225
|
-
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
const evaluationRun = new EvaluationRun({
|
|
230
|
-
logResults,
|
|
231
|
-
projectName,
|
|
232
|
-
evalName: evalRunName,
|
|
233
|
-
examples: dataset.examples, // Assuming dataset has an 'examples' property
|
|
234
|
-
scorers: loadedScorers,
|
|
235
|
-
model,
|
|
236
|
-
aggregator,
|
|
237
|
-
metadata,
|
|
238
|
-
judgmentApiKey: this.judgmentApiKey,
|
|
239
|
-
rules: loadedRules,
|
|
240
|
-
organizationId: this.organizationId
|
|
241
|
-
});
|
|
242
|
-
// Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
|
|
243
|
-
return runEval(evaluationRun, false, true, false);
|
|
244
|
-
}
|
|
245
|
-
catch (error) {
|
|
246
|
-
if (error instanceof Error) {
|
|
247
|
-
if (error.message.includes('one or more fields are invalid')) {
|
|
248
|
-
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
|
|
249
|
-
}
|
|
250
|
-
else {
|
|
251
|
-
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
else {
|
|
255
|
-
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
});
|
|
259
|
-
}
|
|
260
|
-
/**
|
|
261
|
-
* Create a dataset
|
|
262
|
-
*/
|
|
263
|
-
createDataset() {
|
|
264
|
-
// This would be implemented with EvalDataset
|
|
265
|
-
throw new Error('Not implemented yet');
|
|
266
|
-
}
|
|
267
|
-
/**
|
|
268
|
-
* Push a dataset to the Judgment platform
|
|
269
|
-
*/
|
|
270
|
-
pushDataset(alias_1, dataset_1, projectName_1) {
|
|
271
|
-
return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
|
|
272
|
-
// This would be implemented with EvalDataset
|
|
273
|
-
throw new Error('Not implemented yet');
|
|
274
|
-
});
|
|
275
|
-
}
|
|
276
|
-
/**
|
|
277
|
-
* Pull a dataset from the Judgment platform
|
|
278
|
-
*/
|
|
279
|
-
pullDataset(alias, projectName) {
|
|
280
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
281
|
-
// This would be implemented with EvalDataset
|
|
282
|
-
throw new Error('Not implemented yet');
|
|
283
|
-
});
|
|
284
|
-
}
|
|
285
|
-
/**
|
|
286
|
-
* Delete a dataset from the Judgment platform
|
|
287
|
-
*/
|
|
288
|
-
deleteDataset(alias, projectName) {
|
|
289
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
290
|
-
// This would be implemented with EvalDataset
|
|
291
|
-
throw new Error('Not implemented yet');
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
/**
|
|
295
|
-
* Pull project dataset stats from the Judgment platform
|
|
296
|
-
*/
|
|
297
|
-
pullProjectDatasetStats(projectName) {
|
|
298
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
299
|
-
// This would be implemented with EvalDataset
|
|
300
|
-
throw new Error('Not implemented yet');
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
/**
|
|
304
|
-
* Insert examples into a dataset on the Judgment platform
|
|
305
|
-
*/
|
|
306
|
-
insertDataset(alias, examples, projectName) {
|
|
307
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
308
|
-
// This would be implemented with EvalDataset
|
|
309
|
-
throw new Error('Not implemented yet');
|
|
177
|
+
return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
|
|
178
|
+
scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
|
|
179
|
+
// Keep type loose for stub
|
|
180
|
+
throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
|
|
310
181
|
});
|
|
311
182
|
}
|
|
312
183
|
/**
|
|
@@ -315,39 +186,29 @@ export class JudgmentClient {
|
|
|
315
186
|
* @param evalRunName Name of the evaluation run
|
|
316
187
|
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
317
188
|
*/
|
|
318
|
-
pullEval(projectName, evalRunName
|
|
319
|
-
) {
|
|
189
|
+
pullEval(projectName, evalRunName) {
|
|
320
190
|
return __awaiter(this, void 0, void 0, function* () {
|
|
321
|
-
var _a, _b, _c, _d;
|
|
322
|
-
// Body matches Python's structure for this endpoint
|
|
323
191
|
const evalRunRequestBody = {
|
|
324
192
|
project_name: projectName,
|
|
325
|
-
eval_name: evalRunName,
|
|
193
|
+
eval_name: evalRunName,
|
|
326
194
|
judgment_api_key: this.judgmentApiKey
|
|
327
195
|
};
|
|
328
196
|
try {
|
|
329
|
-
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL,
|
|
330
|
-
|
|
331
|
-
headers: {
|
|
332
|
-
'Content-Type': 'application/json',
|
|
333
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
334
|
-
'X-Organization-Id': this.organizationId
|
|
335
|
-
}
|
|
197
|
+
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
|
|
198
|
+
headers: this.getAuthHeaders()
|
|
336
199
|
});
|
|
337
|
-
// Process the response to match the Python SDK's format
|
|
338
|
-
// Python returns [{ 'id': ..., 'results': [ScoringResult, ...]}]
|
|
339
|
-
// The API response is a list of results, each with an 'id' and 'result'
|
|
340
200
|
if (!Array.isArray(response.data) || response.data.length === 0) {
|
|
341
|
-
return [{ id: '', results: [] }];
|
|
201
|
+
return [{ id: '', results: [] }];
|
|
342
202
|
}
|
|
343
|
-
const evalRunResult = {
|
|
344
|
-
evalRunResult.id = ((_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.id) || ''; // Assume ID is same for all results in run
|
|
203
|
+
const evalRunResult = [{}];
|
|
345
204
|
for (const result of response.data) {
|
|
205
|
+
const resultId = result.id || '';
|
|
346
206
|
const resultData = result.result || {};
|
|
207
|
+
// Extract data object from result data
|
|
347
208
|
const dataObject = resultData.data_object || {};
|
|
348
|
-
// Create Example
|
|
209
|
+
// Create Example with required input field
|
|
349
210
|
const example = new Example({
|
|
350
|
-
input: dataObject.input,
|
|
211
|
+
input: dataObject.input || '',
|
|
351
212
|
actualOutput: dataObject.actual_output,
|
|
352
213
|
expectedOutput: dataObject.expected_output,
|
|
353
214
|
context: dataObject.context,
|
|
@@ -357,110 +218,21 @@ export class JudgmentClient {
|
|
|
357
218
|
expectedTools: dataObject.expected_tools,
|
|
358
219
|
exampleId: dataObject.example_id,
|
|
359
220
|
exampleIndex: dataObject.example_index,
|
|
360
|
-
timestamp: dataObject.timestamp
|
|
361
|
-
|
|
362
|
-
// Create ScoringResult
|
|
363
|
-
const scoringResult = new ScoringResult({
|
|
364
|
-
dataObject: example,
|
|
365
|
-
scorersData: resultData.scorers_data || [],
|
|
366
|
-
error: resultData.error
|
|
221
|
+
timestamp: dataObject.timestamp,
|
|
222
|
+
example: dataObject.example // Include example boolean
|
|
367
223
|
});
|
|
368
|
-
evalRunResult.
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
throw new Error(`Failed to pull evaluation results: ${statusCode} - ${errorMessage}`);
|
|
377
|
-
}
|
|
378
|
-
if (error instanceof Error) {
|
|
379
|
-
throw new Error(`Failed to pull evaluation results: ${error.message}`);
|
|
380
|
-
}
|
|
381
|
-
throw new Error(`Failed to pull evaluation results: ${String(error)}`);
|
|
382
|
-
}
|
|
383
|
-
});
|
|
384
|
-
}
|
|
385
|
-
/**
|
|
386
|
-
* Get evaluation run results (alias for pullEval with a more intuitive name)
|
|
387
|
-
* @param projectName Name of the project
|
|
388
|
-
* @param evalRunName Name of the evaluation run
|
|
389
|
-
* @returns Array containing one object with 'id' and 'results' (list of ScoringResult)
|
|
390
|
-
*/
|
|
391
|
-
getEvalRun(projectName, evalRunName) {
|
|
392
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
393
|
-
return this.pullEval(projectName, evalRunName);
|
|
394
|
-
});
|
|
395
|
-
}
|
|
396
|
-
/**
|
|
397
|
-
* List all evaluation runs for a project
|
|
398
|
-
* @param projectName Name of the project
|
|
399
|
-
* @param limit Maximum number of evaluation runs to return (default: 100)
|
|
400
|
-
* @param offset Offset for pagination (default: 0)
|
|
401
|
-
* @returns List of evaluation run metadata
|
|
402
|
-
*/
|
|
403
|
-
listEvalRuns(projectName_1) {
|
|
404
|
-
return __awaiter(this, arguments, void 0, function* (projectName, limit = 100, offset = 0) {
|
|
405
|
-
var _a, _b, _c;
|
|
406
|
-
try {
|
|
407
|
-
// Use ROOT_API for the base URL
|
|
408
|
-
const url = `${ROOT_API}/projects/${projectName}/eval-runs`;
|
|
409
|
-
const response = yield axios.get(url, {
|
|
410
|
-
params: {
|
|
411
|
-
limit,
|
|
412
|
-
offset
|
|
413
|
-
},
|
|
414
|
-
headers: {
|
|
415
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
416
|
-
'X-Organization-Id': this.organizationId
|
|
417
|
-
}
|
|
418
|
-
});
|
|
419
|
-
return response.data || [];
|
|
420
|
-
}
|
|
421
|
-
catch (error) {
|
|
422
|
-
if (axios.isAxiosError(error)) {
|
|
423
|
-
const statusCode = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
|
|
424
|
-
const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
|
|
425
|
-
throw new Error(`Failed to list evaluation runs: ${statusCode} - ${errorMessage}`);
|
|
426
|
-
}
|
|
427
|
-
if (error instanceof Error) {
|
|
428
|
-
throw new Error(`Failed to list evaluation runs: ${error.message}`);
|
|
429
|
-
}
|
|
430
|
-
throw new Error(`Failed to list evaluation runs: ${String(error)}`);
|
|
431
|
-
}
|
|
432
|
-
});
|
|
433
|
-
}
|
|
434
|
-
/**
|
|
435
|
-
* Get evaluation run statistics
|
|
436
|
-
* @param projectName Name of the project
|
|
437
|
-
* @param evalRunName Name of the evaluation run
|
|
438
|
-
* @returns Statistics for the evaluation run
|
|
439
|
-
*/
|
|
440
|
-
getEvalRunStats(projectName, evalRunName) {
|
|
441
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
442
|
-
var _a, _b, _c;
|
|
443
|
-
try {
|
|
444
|
-
// Use ROOT_API for the base URL
|
|
445
|
-
const url = `${ROOT_API}/projects/${projectName}/eval-runs/${evalRunName}/stats`;
|
|
446
|
-
const response = yield axios.get(url, {
|
|
447
|
-
headers: {
|
|
448
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
449
|
-
'X-Organization-Id': this.organizationId
|
|
450
|
-
}
|
|
451
|
-
});
|
|
452
|
-
return response.data || {};
|
|
224
|
+
evalRunResult[0].id = resultId;
|
|
225
|
+
evalRunResult[0].results = [new ScoringResult({
|
|
226
|
+
dataObject: example,
|
|
227
|
+
scorersData: resultData.scorers_data || [],
|
|
228
|
+
error: resultData.error
|
|
229
|
+
})];
|
|
230
|
+
}
|
|
231
|
+
return evalRunResult;
|
|
453
232
|
}
|
|
454
233
|
catch (error) {
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
const errorMessage = ((_c = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.detail) || error.message;
|
|
458
|
-
throw new Error(`Failed to get evaluation run statistics: ${statusCode} - ${errorMessage}`);
|
|
459
|
-
}
|
|
460
|
-
if (error instanceof Error) {
|
|
461
|
-
throw new Error(`Failed to get evaluation run statistics: ${error.message}`);
|
|
462
|
-
}
|
|
463
|
-
throw new Error(`Failed to get evaluation run statistics: ${String(error)}`);
|
|
234
|
+
this.handleApiError(error, 'pullEval');
|
|
235
|
+
throw error;
|
|
464
236
|
}
|
|
465
237
|
});
|
|
466
238
|
}
|
|
@@ -473,92 +245,67 @@ export class JudgmentClient {
|
|
|
473
245
|
*/
|
|
474
246
|
exportEvalResults(projectName_1, evalRunName_1) {
|
|
475
247
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
|
|
248
|
+
logger.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
|
|
476
249
|
try {
|
|
477
|
-
const
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
return
|
|
250
|
+
const resultsData = yield this.pullEval(projectName, evalRunName);
|
|
251
|
+
if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
|
|
252
|
+
logger.warn('No results found to export.');
|
|
253
|
+
return '';
|
|
481
254
|
}
|
|
255
|
+
const results = resultsData[0].results;
|
|
482
256
|
if (format === 'json') {
|
|
483
|
-
//
|
|
484
|
-
return JSON.stringify(
|
|
257
|
+
// Pretty print JSON
|
|
258
|
+
return JSON.stringify(results.map(r => r.toJSON()), null, 2);
|
|
485
259
|
}
|
|
486
260
|
else if (format === 'csv') {
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
const
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
261
|
+
if (results.length === 0)
|
|
262
|
+
return ''; // No data to export
|
|
263
|
+
// Dynamically determine headers from the first result object
|
|
264
|
+
// Flatten the structure for CSV
|
|
265
|
+
const flatResults = results.map(result => {
|
|
266
|
+
var _a, _b, _c;
|
|
267
|
+
const flat = {};
|
|
268
|
+
const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
|
|
269
|
+
const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
|
|
270
|
+
// Add example data fields (snake_case)
|
|
271
|
+
for (const key in exampleData) {
|
|
272
|
+
// Prefix example fields to avoid collision, e.g., example_input
|
|
273
|
+
flat[`example_${key}`] = exampleData[key];
|
|
499
274
|
}
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
|
|
506
|
-
throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
|
|
507
|
-
}
|
|
508
|
-
try {
|
|
509
|
-
// Flatten the structure slightly for better CSV output
|
|
510
|
-
const processedResults = results.map((result) => {
|
|
511
|
-
// Flatten dataObject properties and scorersData
|
|
512
|
-
const flatResult = {};
|
|
513
|
-
flatResult.eval_run_id = evalRunData.id; // Add eval run ID
|
|
514
|
-
// Flatten dataObject
|
|
515
|
-
if (result.dataObject) {
|
|
516
|
-
for (const [key, value] of Object.entries(result.dataObject)) {
|
|
517
|
-
// Prefix with 'data_' to avoid potential clashes
|
|
518
|
-
flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
// Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
|
|
522
|
-
if (Array.isArray(result.scorersData)) {
|
|
523
|
-
result.scorersData.forEach((scorerData, index) => {
|
|
524
|
-
flatResult[`scorer_${index}_name`] = scorerData.name;
|
|
525
|
-
flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
|
|
526
|
-
flatResult[`scorer_${index}_error`] = scorerData.error;
|
|
527
|
-
// Add other scorer fields if necessary, e.g., metadata
|
|
528
|
-
if (scorerData.additional_metadata) {
|
|
529
|
-
flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
|
|
530
|
-
}
|
|
531
|
-
});
|
|
532
|
-
}
|
|
533
|
-
flatResult.error = result.error; // Top-level error for the example processing
|
|
534
|
-
return flatResult;
|
|
535
|
-
});
|
|
536
|
-
// Define headers dynamically based on the keys of the first processed result
|
|
537
|
-
if (processedResults.length === 0) {
|
|
538
|
-
return 'No data to export after processing.'; // Handle case with no valid results after processing
|
|
539
|
-
}
|
|
540
|
-
const headers = Object.keys(processedResults[0]).map(key => ({ id: key, title: key }));
|
|
541
|
-
const csvStringifier = createObjectCsvStringifier({
|
|
542
|
-
header: headers
|
|
275
|
+
// Add scorers data
|
|
276
|
+
scorersData.forEach(scorer => {
|
|
277
|
+
flat[`scorer_${scorer.name}_score`] = scorer.score;
|
|
278
|
+
flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
|
|
279
|
+
flat[`scorer_${scorer.name}_error`] = scorer.error;
|
|
543
280
|
});
|
|
544
|
-
//
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
281
|
+
// Add top-level error if present
|
|
282
|
+
flat['top_level_error'] = result.error;
|
|
283
|
+
return flat;
|
|
284
|
+
});
|
|
285
|
+
// Get all unique keys from the flattened results for headers
|
|
286
|
+
const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
|
|
287
|
+
// Use papaparse for robust CSV generation
|
|
288
|
+
const Papa = require('papaparse'); // Use require here if not imported at top
|
|
289
|
+
const csv = Papa.unparse({
|
|
290
|
+
fields: headers,
|
|
291
|
+
data: flatResults
|
|
292
|
+
}, {
|
|
293
|
+
header: true,
|
|
294
|
+
quotes: true, // Ensure fields with commas/newlines are quoted
|
|
295
|
+
quoteChar: '"',
|
|
296
|
+
escapeChar: '"',
|
|
297
|
+
delimiter: ','
|
|
298
|
+
});
|
|
299
|
+
return csv;
|
|
552
300
|
}
|
|
553
301
|
else {
|
|
554
302
|
throw new Error(`Unsupported export format: ${format}`);
|
|
555
303
|
}
|
|
556
304
|
}
|
|
557
305
|
catch (error) {
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
throw new Error(`Failed to export evaluation results: ${String(error)}`);
|
|
306
|
+
logger.error(`Error exporting eval results: ${error}`);
|
|
307
|
+
this.handleApiError(error, 'exportEvalResults');
|
|
308
|
+
throw error;
|
|
562
309
|
}
|
|
563
310
|
});
|
|
564
311
|
}
|
|
@@ -567,47 +314,23 @@ export class JudgmentClient {
|
|
|
567
314
|
*/
|
|
568
315
|
deleteEval(projectName, evalRunNames) {
|
|
569
316
|
return __awaiter(this, void 0, void 0, function* () {
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
throw new Error('No evaluation run names provided');
|
|
573
|
-
}
|
|
574
|
-
// Body matches Python's structure for this endpoint
|
|
575
|
-
const evalRunRequestBody = {
|
|
317
|
+
logger.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
|
|
318
|
+
const requestBody = {
|
|
576
319
|
project_name: projectName,
|
|
577
320
|
eval_names: evalRunNames,
|
|
578
|
-
judgment_api_key: this.judgmentApiKey
|
|
321
|
+
judgment_api_key: this.judgmentApiKey,
|
|
579
322
|
};
|
|
580
323
|
try {
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
data: evalRunRequestBody,
|
|
584
|
-
headers: {
|
|
585
|
-
'Content-Type': 'application/json',
|
|
586
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
587
|
-
'X-Organization-Id': this.organizationId
|
|
588
|
-
}
|
|
324
|
+
yield axios.post(JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
|
|
325
|
+
headers: this.getAuthHeaders()
|
|
589
326
|
});
|
|
590
|
-
|
|
327
|
+
logger.info('Successfully deleted eval runs.');
|
|
328
|
+
return true;
|
|
591
329
|
}
|
|
592
330
|
catch (error) {
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
if (status === 404) {
|
|
597
|
-
throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
|
|
598
|
-
}
|
|
599
|
-
else if (status === 500) {
|
|
600
|
-
throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
|
|
601
|
-
}
|
|
602
|
-
else {
|
|
603
|
-
throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
|
|
604
|
-
}
|
|
605
|
-
}
|
|
606
|
-
// Rethrow original or wrapped error
|
|
607
|
-
if (error instanceof Error) {
|
|
608
|
-
throw new Error(`Error deleting eval results: ${error.message}`);
|
|
609
|
-
}
|
|
610
|
-
throw new Error(`Error deleting eval results: ${String(error)}`);
|
|
331
|
+
logger.error(`Error deleting eval runs: ${error}`);
|
|
332
|
+
this.handleApiError(error, 'deleteEval');
|
|
333
|
+
return false;
|
|
611
334
|
}
|
|
612
335
|
});
|
|
613
336
|
}
|
|
@@ -616,43 +339,22 @@ export class JudgmentClient {
|
|
|
616
339
|
*/
|
|
617
340
|
deleteProjectEvals(projectName) {
|
|
618
341
|
return __awaiter(this, void 0, void 0, function* () {
|
|
619
|
-
|
|
342
|
+
logger.info(`Deleting ALL eval runs for project: ${projectName}`);
|
|
343
|
+
const requestBody = {
|
|
344
|
+
project_name: projectName,
|
|
345
|
+
judgment_api_key: this.judgmentApiKey,
|
|
346
|
+
};
|
|
620
347
|
try {
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
624
|
-
data: {
|
|
625
|
-
project_name: projectName,
|
|
626
|
-
},
|
|
627
|
-
headers: {
|
|
628
|
-
'Content-Type': 'application/json',
|
|
629
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
630
|
-
'X-Organization-Id': this.organizationId
|
|
631
|
-
}
|
|
348
|
+
yield axios.post(JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
|
|
349
|
+
headers: this.getAuthHeaders()
|
|
632
350
|
});
|
|
633
|
-
|
|
634
|
-
return
|
|
351
|
+
logger.info(`Successfully deleted all eval runs for project ${projectName}.`);
|
|
352
|
+
return true;
|
|
635
353
|
}
|
|
636
354
|
catch (error) {
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
if (status === 404) {
|
|
641
|
-
// Assuming 404 might mean project not found or no evals to delete
|
|
642
|
-
console.warn(`Project '${projectName}' not found or no evals to delete.`);
|
|
643
|
-
return false; // Or true depending on desired idempotency behavior
|
|
644
|
-
}
|
|
645
|
-
else if (status === 500) {
|
|
646
|
-
throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
|
|
647
|
-
}
|
|
648
|
-
else {
|
|
649
|
-
throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
|
|
650
|
-
}
|
|
651
|
-
}
|
|
652
|
-
if (error instanceof Error) {
|
|
653
|
-
throw new Error(`Error deleting project evals: ${error.message}`);
|
|
654
|
-
}
|
|
655
|
-
throw new Error(`Error deleting project evals: ${String(error)}`);
|
|
355
|
+
logger.error(`Error deleting project evals: ${error}`);
|
|
356
|
+
this.handleApiError(error, 'deleteProjectEvals');
|
|
357
|
+
return false;
|
|
656
358
|
}
|
|
657
359
|
});
|
|
658
360
|
}
|
|
@@ -661,37 +363,34 @@ export class JudgmentClient {
|
|
|
661
363
|
*/
|
|
662
364
|
createProject(projectName) {
|
|
663
365
|
return __awaiter(this, void 0, void 0, function* () {
|
|
366
|
+
logger.info(`Creating project: ${projectName}`);
|
|
367
|
+
const requestBody = {
|
|
368
|
+
project_name: projectName,
|
|
369
|
+
judgment_api_key: this.judgmentApiKey,
|
|
370
|
+
};
|
|
664
371
|
try {
|
|
665
|
-
const response = yield axios.post(JUDGMENT_PROJECT_CREATE_API_URL,
|
|
666
|
-
|
|
667
|
-
{
|
|
668
|
-
project_name: projectName,
|
|
669
|
-
}, {
|
|
670
|
-
headers: {
|
|
671
|
-
'Content-Type': 'application/json',
|
|
672
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
673
|
-
'X-Organization-Id': this.organizationId
|
|
674
|
-
}
|
|
372
|
+
const response = yield axios.post(JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
|
|
373
|
+
headers: this.getAuthHeaders()
|
|
675
374
|
});
|
|
676
|
-
//
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
if (axios.isAxiosError(error) && error.response) {
|
|
681
|
-
// Check for specific conflict error (e.g., 409) if API provides it
|
|
682
|
-
if (error.response.status === 409) {
|
|
683
|
-
console.warn(`Project '${projectName}' already exists.`);
|
|
684
|
-
return false; // Or true if idempotent creation is desired
|
|
685
|
-
}
|
|
686
|
-
throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
375
|
+
// Check for specific success message or status if API provides one
|
|
376
|
+
if (response.data && response.data.message === 'Project added successfully') {
|
|
377
|
+
logger.info(`Successfully created project: ${projectName}`);
|
|
378
|
+
return true;
|
|
687
379
|
}
|
|
688
|
-
else if (
|
|
689
|
-
|
|
380
|
+
else if (response.data && response.data.message === 'Project already exists') {
|
|
381
|
+
logger.warn(`Project '${projectName}' already exists.`);
|
|
382
|
+
return true; // Or false, depending on desired behavior for existing projects
|
|
690
383
|
}
|
|
691
384
|
else {
|
|
692
|
-
|
|
385
|
+
logger.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
386
|
+
return false;
|
|
693
387
|
}
|
|
694
388
|
}
|
|
389
|
+
catch (error) {
|
|
390
|
+
logger.error(`Error creating project: ${error}`);
|
|
391
|
+
this.handleApiError(error, 'createProject');
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
695
394
|
});
|
|
696
395
|
}
|
|
697
396
|
/**
|
|
@@ -699,37 +398,29 @@ export class JudgmentClient {
|
|
|
699
398
|
*/
|
|
700
399
|
deleteProject(projectName) {
|
|
701
400
|
return __awaiter(this, void 0, void 0, function* () {
|
|
401
|
+
logger.info(`Deleting project: ${projectName}`);
|
|
402
|
+
const requestBody = {
|
|
403
|
+
project_name: projectName,
|
|
404
|
+
judgment_api_key: this.judgmentApiKey,
|
|
405
|
+
};
|
|
702
406
|
try {
|
|
703
|
-
const response = yield axios.
|
|
704
|
-
|
|
705
|
-
// Remove judgment_api_key from body to match Python (uses header auth)
|
|
706
|
-
data: {
|
|
707
|
-
project_name: projectName,
|
|
708
|
-
},
|
|
709
|
-
headers: {
|
|
710
|
-
'Content-Type': 'application/json',
|
|
711
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
712
|
-
'X-Organization-Id': this.organizationId
|
|
713
|
-
}
|
|
407
|
+
const response = yield axios.post(JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
|
|
408
|
+
headers: this.getAuthHeaders()
|
|
714
409
|
});
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
catch (error) {
|
|
719
|
-
if (axios.isAxiosError(error) && error.response) {
|
|
720
|
-
if (error.response.status === 404) {
|
|
721
|
-
console.warn(`Project '${projectName}' not found for deletion.`);
|
|
722
|
-
return false; // Or true depending on desired idempotency
|
|
723
|
-
}
|
|
724
|
-
throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
|
|
725
|
-
}
|
|
726
|
-
else if (error instanceof Error) {
|
|
727
|
-
throw new Error(`Error deleting project: ${error.message}`);
|
|
410
|
+
if (response.data && response.data.message === 'Project deleted successfully') {
|
|
411
|
+
logger.info(`Successfully deleted project: ${projectName}`);
|
|
412
|
+
return true;
|
|
728
413
|
}
|
|
729
414
|
else {
|
|
730
|
-
|
|
415
|
+
logger.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
|
|
416
|
+
return false;
|
|
731
417
|
}
|
|
732
418
|
}
|
|
419
|
+
catch (error) {
|
|
420
|
+
logger.error(`Error deleting project: ${error}`);
|
|
421
|
+
this.handleApiError(error, 'deleteProject');
|
|
422
|
+
return false;
|
|
423
|
+
}
|
|
733
424
|
});
|
|
734
425
|
}
|
|
735
426
|
/**
|
|
@@ -737,35 +428,36 @@ export class JudgmentClient {
|
|
|
737
428
|
*/
|
|
738
429
|
validateApiKey() {
|
|
739
430
|
return __awaiter(this, void 0, void 0, function* () {
|
|
740
|
-
var _a, _b;
|
|
431
|
+
var _a, _b, _c, _d;
|
|
432
|
+
logger.debug('Validating API Key...');
|
|
741
433
|
try {
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
// Removed 'X-Organization-Id' header to match Python for this specific endpoint
|
|
749
|
-
}
|
|
750
|
-
});
|
|
751
|
-
if (response.status === 200) {
|
|
752
|
-
return [true, JSON.stringify(response.data)];
|
|
753
|
-
}
|
|
754
|
-
else {
|
|
755
|
-
// Status might be non-200 but still valid JSON error response
|
|
756
|
-
return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
|
|
757
|
-
}
|
|
434
|
+
// Instantiate EvalDatasetClient to perform the validation call
|
|
435
|
+
const datasetClient = new EvalDatasetClient(this.judgmentApiKey, this.organizationId);
|
|
436
|
+
// Use the dataset client to make the call
|
|
437
|
+
yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
|
|
438
|
+
logger.debug('API Key appears valid.');
|
|
439
|
+
return [true, 'API Key is valid.'];
|
|
758
440
|
}
|
|
759
441
|
catch (error) {
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
442
|
+
let message = 'API Key validation failed.';
|
|
443
|
+
if (axios.isAxiosError(error)) {
|
|
444
|
+
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
|
|
445
|
+
message = 'API Key is invalid or expired.';
|
|
446
|
+
}
|
|
447
|
+
else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
|
|
448
|
+
// If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
|
|
449
|
+
// This depends on the specific validation endpoint behavior
|
|
450
|
+
message = 'API Key might be valid, but validation endpoint returned 404.';
|
|
451
|
+
}
|
|
452
|
+
else {
|
|
453
|
+
message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
|
|
454
|
+
}
|
|
765
455
|
}
|
|
766
456
|
else {
|
|
767
|
-
|
|
457
|
+
message = `API Key validation failed: ${String(error)}`;
|
|
768
458
|
}
|
|
459
|
+
logger.error(message);
|
|
460
|
+
return [false, message];
|
|
769
461
|
}
|
|
770
462
|
});
|
|
771
463
|
}
|
|
@@ -790,17 +482,12 @@ export class JudgmentClient {
|
|
|
790
482
|
*/
|
|
791
483
|
pullEvalResults(projectName, evalRunName) {
|
|
792
484
|
return __awaiter(this, void 0, void 0, function* () {
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
const evalRunArray = yield this.pullEval(projectName, evalRunName);
|
|
796
|
-
// pullEval returns [{ id: ..., results: [...] }], extract results
|
|
797
|
-
return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
|
|
798
|
-
}
|
|
799
|
-
catch (error) {
|
|
800
|
-
// Log error but return empty array to allow waitForEvaluation to potentially retry
|
|
801
|
-
logger.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
|
|
485
|
+
const rawResults = yield this.pullEval(projectName, evalRunName);
|
|
486
|
+
if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
|
|
802
487
|
return [];
|
|
803
488
|
}
|
|
489
|
+
// Assuming pullEval correctly returns results in the expected format
|
|
490
|
+
return rawResults[0].results;
|
|
804
491
|
});
|
|
805
492
|
}
|
|
806
493
|
/**
|
|
@@ -812,88 +499,72 @@ export class JudgmentClient {
|
|
|
812
499
|
*/
|
|
813
500
|
checkEvalStatus(projectName, evalRunName) {
|
|
814
501
|
return __awaiter(this, void 0, void 0, function* () {
|
|
815
|
-
var _a
|
|
816
|
-
// Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
|
|
502
|
+
var _a;
|
|
817
503
|
const requestBody = {
|
|
818
504
|
project_name: projectName,
|
|
819
|
-
eval_name: evalRunName,
|
|
505
|
+
eval_name: evalRunName,
|
|
820
506
|
judgment_api_key: this.judgmentApiKey,
|
|
821
507
|
};
|
|
822
508
|
try {
|
|
823
|
-
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL,
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
828
|
-
'X-Organization-Id': this.organizationId
|
|
829
|
-
},
|
|
830
|
-
timeout: 15000 // Slightly increased timeout for status checks
|
|
509
|
+
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
|
|
510
|
+
headers: this.getAuthHeaders(),
|
|
511
|
+
// Add a shorter timeout for status checks?
|
|
512
|
+
// timeout: 5000
|
|
831
513
|
});
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
if (
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
514
|
+
const data = response.data;
|
|
515
|
+
// Check if the response looks like a status object
|
|
516
|
+
if (data && typeof data.status === 'string') {
|
|
517
|
+
return {
|
|
518
|
+
status: data.status || 'unknown',
|
|
519
|
+
progress: typeof data.progress === 'number' ? data.progress : 0,
|
|
520
|
+
message: data.message || '',
|
|
521
|
+
error: data.error
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
// Check if the response looks like completed results (array format from pullEval)
|
|
525
|
+
else if (Array.isArray(data) && data.length > 0 && data[0].results) {
|
|
526
|
+
return {
|
|
527
|
+
status: 'completed',
|
|
528
|
+
progress: 100,
|
|
529
|
+
message: 'Evaluation completed.'
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
// Check if response looks like completed results (single object format)
|
|
533
|
+
else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
|
|
534
|
+
return {
|
|
535
|
+
status: 'completed',
|
|
536
|
+
progress: 100,
|
|
537
|
+
message: 'Evaluation completed.'
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
// Handle other potential responses or assume pending/unknown
|
|
853
541
|
else {
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
const parsedProgress = parseFloat(statusData.progress);
|
|
861
|
-
if (!isNaN(parsedProgress)) {
|
|
862
|
-
progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
|
|
863
|
-
}
|
|
542
|
+
logger.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
|
|
543
|
+
return {
|
|
544
|
+
status: 'unknown',
|
|
545
|
+
progress: 0,
|
|
546
|
+
message: 'Could not determine status from API response.'
|
|
547
|
+
};
|
|
864
548
|
}
|
|
865
|
-
const normalizedStatus = {
|
|
866
|
-
status: statusData.status || 'unknown',
|
|
867
|
-
progress: progress,
|
|
868
|
-
message: statusData.message || '',
|
|
869
|
-
error: statusData.error // Include error field if present
|
|
870
|
-
};
|
|
871
|
-
// Only log status if it's not being called from waitForEvaluation
|
|
872
|
-
// Check stack trace for caller function name
|
|
873
|
-
const stack = new Error().stack;
|
|
874
|
-
const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
|
|
875
|
-
if (!isCalledByWaitForEvaluation) {
|
|
876
|
-
// Use logger for status updates when called directly
|
|
877
|
-
logger.info(`Evaluation Status: ${normalizedStatus.status}`);
|
|
878
|
-
logger.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
|
|
879
|
-
if (normalizedStatus.message) {
|
|
880
|
-
logger.info(`Message: ${normalizedStatus.message}`);
|
|
881
|
-
}
|
|
882
|
-
if (normalizedStatus.error) {
|
|
883
|
-
logger.error(`Error in status: ${normalizedStatus.error}`);
|
|
884
|
-
}
|
|
885
|
-
}
|
|
886
|
-
return normalizedStatus;
|
|
887
549
|
}
|
|
888
550
|
catch (error) {
|
|
889
|
-
// Don't throw
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
551
|
+
// Don't throw here, return status indicating error
|
|
552
|
+
let errorMessage = 'Failed to fetch evaluation status.';
|
|
553
|
+
let status = 'error';
|
|
554
|
+
if (axios.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
|
|
555
|
+
status = 'not_found';
|
|
556
|
+
errorMessage = 'Evaluation run not found.';
|
|
557
|
+
logger.warn(`Evaluation run ${evalRunName} not found.`);
|
|
558
|
+
}
|
|
559
|
+
else {
|
|
560
|
+
this.handleApiError(error, 'checkEvalStatus');
|
|
561
|
+
errorMessage = `Error fetching status: ${String(error)}`;
|
|
562
|
+
}
|
|
893
563
|
return {
|
|
894
|
-
status:
|
|
564
|
+
status: status,
|
|
895
565
|
progress: 0,
|
|
896
|
-
message:
|
|
566
|
+
message: errorMessage,
|
|
567
|
+
error: String(error) // Include error string
|
|
897
568
|
};
|
|
898
569
|
}
|
|
899
570
|
});
|
|
@@ -907,105 +578,79 @@ export class JudgmentClient {
|
|
|
907
578
|
*/
|
|
908
579
|
waitForEvaluation(projectName_1, evalRunName_1) {
|
|
909
580
|
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
|
|
910
|
-
const { intervalMs =
|
|
911
|
-
|
|
912
|
-
showProgress = true } = options;
|
|
913
|
-
let attempts = 0;
|
|
914
|
-
let lastProgressPercent = -1;
|
|
915
|
-
let lastStatus = '';
|
|
581
|
+
const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
|
|
582
|
+
let progressBar;
|
|
916
583
|
if (showProgress) {
|
|
917
|
-
|
|
918
|
-
|
|
584
|
+
progressBar = new cliProgress.SingleBar({
|
|
585
|
+
format: `Waiting for ${colors.magenta(evalRunName)}... | ${colors.cyan('{bar}')} | {percentage}% || {status}`,
|
|
586
|
+
barCompleteChar: '\u2588',
|
|
587
|
+
barIncompleteChar: '\u2591',
|
|
588
|
+
hideCursor: true,
|
|
589
|
+
clearOnComplete: false,
|
|
590
|
+
stopOnComplete: true,
|
|
591
|
+
}, cliProgress.Presets.shades_classic);
|
|
592
|
+
progressBar.start(100, 0, { status: 'Initiating...' });
|
|
919
593
|
}
|
|
920
|
-
|
|
921
|
-
attempts++;
|
|
594
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
922
595
|
try {
|
|
923
|
-
const
|
|
924
|
-
const
|
|
925
|
-
|
|
926
|
-
if (
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
}
|
|
933
|
-
// Check evaluation status
|
|
934
|
-
if (status.status === 'complete') {
|
|
935
|
-
if (showProgress) {
|
|
936
|
-
process.stdout.write('\n'); // Keep direct console output for progress bar newline
|
|
937
|
-
// Use logger for status update
|
|
938
|
-
logger.info('Evaluation complete! Fetching results...');
|
|
939
|
-
}
|
|
940
|
-
try {
|
|
941
|
-
// Use the dedicated results fetching method
|
|
942
|
-
const results = yield this.pullEvalResults(projectName, evalRunName);
|
|
943
|
-
if (results.length > 0) {
|
|
944
|
-
// Use logger for status update
|
|
945
|
-
logger.info(`Successfully fetched ${results.length} results.`);
|
|
946
|
-
return results;
|
|
947
|
-
}
|
|
948
|
-
else {
|
|
949
|
-
// If complete status but no results, might be an issue. Log and return empty.
|
|
950
|
-
logger.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
|
|
951
|
-
return [];
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
catch (fetchError) {
|
|
955
|
-
if (showProgress)
|
|
956
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
957
|
-
logger.error(`Error fetching results after completion for '${evalRunName}': ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
|
|
958
|
-
return []; // Return empty array on error
|
|
959
|
-
}
|
|
960
|
-
}
|
|
961
|
-
else if (status.status === 'failed') {
|
|
962
|
-
if (showProgress)
|
|
963
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
964
|
-
logger.error(`Evaluation failed for '${evalRunName}': ${status.error || status.message || 'Unknown error'}`);
|
|
965
|
-
return []; // Return empty array on failure
|
|
966
|
-
}
|
|
967
|
-
else if (status.status === 'unknown') {
|
|
968
|
-
// Log unknown status but continue polling
|
|
969
|
-
// Avoid flooding logs if status remains unknown
|
|
970
|
-
if (lastStatus !== 'unknown') {
|
|
971
|
-
if (showProgress)
|
|
972
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
973
|
-
logger.warn(`Evaluation status unknown for '${evalRunName}' (attempt ${attempts}). Retrying...`);
|
|
974
|
-
lastProgressPercent = -1; // Reset progress display
|
|
596
|
+
const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
|
|
597
|
+
const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
|
|
598
|
+
const statusText = statusResult.message || statusResult.status;
|
|
599
|
+
if (progressBar) {
|
|
600
|
+
progressBar.update(progress, { status: statusText });
|
|
601
|
+
}
|
|
602
|
+
if (statusResult.status === 'completed') {
|
|
603
|
+
if (progressBar) {
|
|
604
|
+
progressBar.update(100, { status: colors.green('Completed! Fetching results...') });
|
|
975
605
|
}
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
606
|
+
// Fetch final results using pullEval
|
|
607
|
+
const finalResults = yield this.pullEvalResults(projectName, evalRunName);
|
|
608
|
+
logger.info(`Evaluation run ${evalRunName} completed successfully.`);
|
|
609
|
+
return finalResults;
|
|
610
|
+
}
|
|
611
|
+
else if (statusResult.status === 'error' || statusResult.status === 'failed') {
|
|
612
|
+
// Concatenate error details into a single message string
|
|
613
|
+
const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
|
|
614
|
+
logger.error(errorMsg);
|
|
615
|
+
if (progressBar)
|
|
616
|
+
progressBar.stop();
|
|
617
|
+
// Pass only the combined message to the constructor
|
|
618
|
+
throw new JudgmentAPIError(errorMsg);
|
|
619
|
+
}
|
|
620
|
+
else if (statusResult.status === 'not_found') {
|
|
621
|
+
const errorMsg = `Evaluation run ${evalRunName} not found.`;
|
|
622
|
+
logger.error(errorMsg);
|
|
623
|
+
if (progressBar)
|
|
624
|
+
progressBar.stop();
|
|
625
|
+
// Pass only the message to the constructor
|
|
626
|
+
throw new JudgmentAPIError(errorMsg);
|
|
627
|
+
}
|
|
628
|
+
// Wait for the next interval
|
|
629
|
+
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
982
630
|
}
|
|
983
631
|
catch (error) {
|
|
984
|
-
//
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
632
|
+
// Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
|
|
633
|
+
logger.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
|
|
634
|
+
// Option: Rethrow immediately vs. retry vs. specific handling
|
|
635
|
+
if (error instanceof JudgmentAPIError) { // If it was already a processed API error, rethrow
|
|
636
|
+
if (progressBar)
|
|
637
|
+
progressBar.stop();
|
|
638
|
+
throw error;
|
|
639
|
+
}
|
|
640
|
+
// For other errors, wait and retry (up to maxAttempts)
|
|
641
|
+
if (attempt === maxAttempts) {
|
|
642
|
+
if (progressBar)
|
|
643
|
+
progressBar.stop();
|
|
644
|
+
throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
|
|
645
|
+
}
|
|
646
|
+
// Still retryable, wait for interval
|
|
993
647
|
yield new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
994
648
|
}
|
|
995
|
-
else {
|
|
996
|
-
// Break loop if already completed or failed to avoid unnecessary delay
|
|
997
|
-
break;
|
|
998
|
-
}
|
|
999
|
-
} // End while loop
|
|
1000
|
-
// If loop finished without completing/failing
|
|
1001
|
-
if (lastStatus !== 'complete' && lastStatus !== 'failed') {
|
|
1002
|
-
if (showProgress)
|
|
1003
|
-
process.stdout.write('\n'); // Keep direct console output
|
|
1004
|
-
logger.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
|
|
1005
|
-
return []; // Return empty array on timeout
|
|
1006
649
|
}
|
|
1007
|
-
//
|
|
1008
|
-
|
|
650
|
+
// If loop finishes without completion or error
|
|
651
|
+
if (progressBar)
|
|
652
|
+
progressBar.stop();
|
|
653
|
+
throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
|
|
1009
654
|
});
|
|
1010
655
|
}
|
|
1011
656
|
/**
|
|
@@ -1014,12 +659,48 @@ export class JudgmentClient {
|
|
|
1014
659
|
* @returns A string representing the progress bar
|
|
1015
660
|
*/
|
|
1016
661
|
_createProgressBar(percent) {
|
|
1017
|
-
const width =
|
|
1018
|
-
|
|
1019
|
-
const
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
662
|
+
const width = 20; // Width of the progress bar
|
|
663
|
+
const filled = Math.round(width * (percent / 100));
|
|
664
|
+
const empty = width - filled;
|
|
665
|
+
return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
|
|
666
|
+
}
|
|
667
|
+
// Keep helper methods private
|
|
668
|
+
getAuthHeaders() {
|
|
669
|
+
return {
|
|
670
|
+
'Content-Type': 'application/json',
|
|
671
|
+
'Authorization': `Bearer ${this.judgmentApiKey}`,
|
|
672
|
+
'X-Organization-Id': this.organizationId,
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
// Ensure this handles errors from Eval/Project API calls correctly
|
|
676
|
+
handleApiError(error, context) {
|
|
677
|
+
logger.error(`API Error during ${context}:`);
|
|
678
|
+
if (axios.isAxiosError(error)) {
|
|
679
|
+
const axiosError = error;
|
|
680
|
+
const response = axiosError.response;
|
|
681
|
+
if (response) {
|
|
682
|
+
logger.error(`Status: ${response.status} ${response.statusText}`);
|
|
683
|
+
logger.debug('Response Data:', response.data);
|
|
684
|
+
if (response.status === 422) {
|
|
685
|
+
logger.error('Validation Error Detail:', response.data);
|
|
686
|
+
}
|
|
687
|
+
else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
|
|
688
|
+
logger.error(`Evaluation run not found.`);
|
|
689
|
+
}
|
|
690
|
+
else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
|
|
691
|
+
logger.warn(`${context}: Resource not found, may have already been deleted.`);
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
else if (axiosError.request) {
|
|
695
|
+
logger.error('No response received from server.');
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
logger.error(`Error setting up API request for ${context}`);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
else {
|
|
702
|
+
logger.error(`Unexpected error during ${context}`);
|
|
703
|
+
}
|
|
1023
704
|
}
|
|
1024
705
|
}
|
|
1025
706
|
//# sourceMappingURL=judgment-client.js.map
|