npm - judgeval - Versions diffs - 0.2.1 → 0.2.2 - Mend

judgeval 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/cjs/common/logger-instance.js +19 -17
package/dist/cjs/common/logger-instance.js.map +1 -1
package/dist/cjs/common/tracer.js +58 -50
package/dist/cjs/common/tracer.js.map +1 -1
package/dist/cjs/constants.js +6 -4
package/dist/cjs/constants.js.map +1 -1
package/dist/cjs/data/datasets/eval-dataset-client.js +349 -0
package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -0
package/dist/cjs/data/datasets/eval-dataset.js +405 -0
package/dist/cjs/data/datasets/eval-dataset.js.map +1 -0
package/dist/cjs/data/example.js +22 -1
package/dist/cjs/data/example.js.map +1 -1
package/dist/cjs/e2etests/eval-operations.test.js +282 -0
package/dist/cjs/e2etests/eval-operations.test.js.map +1 -0
package/dist/cjs/e2etests/judgee-traces.test.js +278 -0
package/dist/cjs/e2etests/judgee-traces.test.js.map +1 -0
package/dist/cjs/judgment-client.js +309 -534
package/dist/cjs/judgment-client.js.map +1 -1
package/dist/esm/common/logger-instance.js +19 -17
package/dist/esm/common/logger-instance.js.map +1 -1
package/dist/esm/common/tracer.js +58 -50
package/dist/esm/common/tracer.js.map +1 -1
package/dist/esm/constants.js +5 -3
package/dist/esm/constants.js.map +1 -1
package/dist/esm/data/datasets/eval-dataset-client.js +342 -0
package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -0
package/dist/esm/data/datasets/eval-dataset.js +375 -0
package/dist/esm/data/datasets/eval-dataset.js.map +1 -0
package/dist/esm/data/example.js +22 -1
package/dist/esm/data/example.js.map +1 -1
package/dist/esm/e2etests/eval-operations.test.js +254 -0
package/dist/esm/e2etests/eval-operations.test.js.map +1 -0
package/dist/esm/e2etests/judgee-traces.test.js +253 -0
package/dist/esm/e2etests/judgee-traces.test.js.map +1 -0
package/dist/esm/judgment-client.js +311 -536
package/dist/esm/judgment-client.js.map +1 -1
package/dist/types/common/tracer.d.ts +0 -1
package/dist/types/constants.d.ts +2 -3
package/dist/types/data/datasets/eval-dataset-client.d.ts +39 -0
package/dist/types/data/datasets/eval-dataset.d.ts +45 -0
package/dist/types/data/example.d.ts +24 -12
package/dist/types/e2etests/eval-operations.test.d.ts +5 -0
package/dist/types/e2etests/judgee-traces.test.d.ts +5 -0
package/dist/types/judgment-client.d.ts +3 -25
package/package.json +3 -9

package/dist/cjs/judgment-client.js CHANGED Viewed

@@ -46,6 +46,10 @@ const rules_js_1 = require("./rules.js");
 const run_evaluation_js_1 = require("./run-evaluation.js");
 const constants_js_1 = require("./constants.js");
 const logger_instance_js_1 = __importDefault(require("./common/logger-instance.js"));
+// Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
+const cli_progress_1 = __importDefault(require("cli-progress"));
+const ansi_colors_1 = __importDefault(require("ansi-colors"));
+const eval_dataset_client_js_1 = require("./data/datasets/eval-dataset-client.js");
 // Load environment variables
 dotenv.config();
 /**
@@ -199,143 +203,10 @@ class JudgmentClient {
      * Evaluate a dataset
      */
     evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
-        return __awaiter(this, arguments, void 0, function* (dataset, // EvalDataset would be implemented separately
-        scorers, model, aggregator, metadata, projectName = '', evalRunName = '', logResults = true, useJudgment = true, rules) {
-            try {
-                // Load appropriate implementations for all scorers
-                const loadedScorers = [];
-                for (const scorer of scorers) {
-                    try {
-                        if (scorer instanceof base_scorer_js_1.ScorerWrapper) {
-                            loadedScorers.push(scorer.loadImplementation(useJudgment));
-                        }
-                        else {
-                            // Assuming scorers passed are already JudgevalScorer or APIJudgmentScorer
-                            loadedScorers.push(scorer);
-                        }
-                    }
-                    catch (error) {
-                        throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
-                    }
-                }
-                // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
-                if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_js_1.JudgevalScorer)) {
-                    throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
-                }
-                // Convert ScorerWrapper in rules to their implementations
-                let loadedRules;
-                if (rules) {
-                    loadedRules = [];
-                    for (const rule of rules) {
-                        try {
-                            const processedConditions = [];
-                            for (const condition of rule.conditions) {
-                                // Convert metric if it's a ScorerWrapper
-                                if (condition.metric instanceof base_scorer_js_1.ScorerWrapper) {
-                                    try {
-                                        const loadedMetric = condition.metric.loadImplementation(useJudgment);
-                                        const newCondition = new rules_js_1.Condition(loadedMetric);
-                                        Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric }));
-                                        processedConditions.push(newCondition);
-                                    }
-                                    catch (error) {
-                                        throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
-                                    }
-                                }
-                                else {
-                                    processedConditions.push(condition);
-                                }
-                            }
-                            // Create new rule with processed conditions
-                            const newRule = new rules_js_1.Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
-                            loadedRules.push(newRule);
-                        }
-                        catch (error) {
-                            throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
-                        }
-                    }
-                }
-                const evaluationRun = new evaluation_run_js_1.EvaluationRun({
-                    logResults,
-                    projectName,
-                    evalName: evalRunName,
-                    examples: dataset.examples, // Assuming dataset has an 'examples' property
-                    scorers: loadedScorers,
-                    model,
-                    aggregator,
-                    metadata,
-                    judgmentApiKey: this.judgmentApiKey,
-                    rules: loadedRules,
-                    organizationId: this.organizationId
-                });
-                // Assuming override=false, ignoreErrors=true, asyncExecution=false as defaults for evaluateDataset
-                return (0, run_evaluation_js_1.runEval)(evaluationRun, false, true, false);
-            }
-            catch (error) {
-                if (error instanceof Error) {
-                    if (error.message.includes('one or more fields are invalid')) {
-                        throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
-                    }
-                    else {
-                        throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
-                    }
-                }
-                else {
-                    throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
-                }
-            }
-        });
-    }
-    /**
-     * Create a dataset
-     */
-    createDataset() {
-        // This would be implemented with EvalDataset
-        throw new Error('Not implemented yet');
-    }
-    /**
-     * Push a dataset to the Judgment platform
-     */
-    pushDataset(alias_1, dataset_1, projectName_1) {
-        return __awaiter(this, arguments, void 0, function* (alias, dataset, projectName, overwrite = false) {
-            // This would be implemented with EvalDataset
-            throw new Error('Not implemented yet');
-        });
-    }
-    /**
-     * Pull a dataset from the Judgment platform
-     */
-    pullDataset(alias, projectName) {
-        return __awaiter(this, void 0, void 0, function* () {
-            // This would be implemented with EvalDataset
-            throw new Error('Not implemented yet');
-        });
-    }
-    /**
-     * Delete a dataset from the Judgment platform
-     */
-    deleteDataset(alias, projectName) {
-        return __awaiter(this, void 0, void 0, function* () {
-            // This would be implemented with EvalDataset
-            throw new Error('Not implemented yet');
-        });
-    }
-    /**
-     * Pull project dataset stats from the Judgment platform
-     */
-    pullProjectDatasetStats(projectName) {
-        return __awaiter(this, void 0, void 0, function* () {
-            // This would be implemented with EvalDataset
-            throw new Error('Not implemented yet');
-        });
-    }
-    /**
-     * Insert examples into a dataset on the Judgment platform
-     */
-    insertDataset(alias, examples, projectName) {
-        return __awaiter(this, void 0, void 0, function* () {
-            // This would be implemented with EvalDataset
-            throw new Error('Not implemented yet');
+        return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
+        scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
+            // Keep type loose for stub
+            throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
         });
     }
     /**
@@ -353,11 +224,7 @@ class JudgmentClient {
             };
             try {
                 const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        'X-Organization-Id': this.organizationId
-                    }
+                    headers: this.getAuthHeaders()
                 });
                 if (!Array.isArray(response.data) || response.data.length === 0) {
                     return [{ id: '', results: [] }];
@@ -380,7 +247,8 @@ class JudgmentClient {
                         expectedTools: dataObject.expected_tools,
                         exampleId: dataObject.example_id,
                         exampleIndex: dataObject.example_index,
-                        timestamp: dataObject.timestamp
+                        timestamp: dataObject.timestamp,
+                        example: dataObject.example // Include example boolean
                     });
                     evalRunResult[0].id = resultId;
                     evalRunResult[0].results = [new result_js_1.ScoringResult({
@@ -392,10 +260,8 @@ class JudgmentClient {
                 return evalRunResult;
             }
             catch (error) {
-                if (axios_1.default.isAxiosError(error) && error.response) {
-                    throw new Error(`Error fetching eval results: ${JSON.stringify(error.response.data)}`);
-                }
-                throw new Error(`Error fetching eval results: ${String(error)}`);
+                this.handleApiError(error, 'pullEval');
+                throw error;
             }
         });
     }
@@ -408,92 +274,67 @@ class JudgmentClient {
      */
     exportEvalResults(projectName_1, evalRunName_1) {
         return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
+            logger_instance_js_1.default.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
             try {
-                const evalRunArray = yield this.pullEval(projectName, evalRunName);
-                const evalRunData = evalRunArray[0]; // Get the first element containing id and results
-                if (!evalRunData || !evalRunData.results) {
-                    return format === 'json' ? JSON.stringify([], null, 2) : 'No results found';
+                const resultsData = yield this.pullEval(projectName, evalRunName);
+                if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
+                    logger_instance_js_1.default.warn('No results found to export.');
+                    return '';
                 }
+                const results = resultsData[0].results;
                 if (format === 'json') {
-                    // Return the whole structure including ID and results array
-                    return JSON.stringify(evalRunData, null, 2);
+                    // Pretty print JSON
+                    return JSON.stringify(results.map(r => r.toJSON()), null, 2);
                 }
                 else if (format === 'csv') {
-                    const results = evalRunData.results;
-                    if (!Array.isArray(results) || results.length === 0) {
-                        return 'No results found';
-                    }
-                    // Use csv-writer instead of json2csv
-                    let createObjectCsvStringifier;
-                    try {
-                        // Use dynamic import() for ES Modules
-                        const csvWriterModule = yield Promise.resolve().then(() => __importStar(require('csv-writer')));
-                        createObjectCsvStringifier = csvWriterModule.createObjectCsvStringifier;
-                        if (!createObjectCsvStringifier) { // Check if the function exists
-                            throw new Error("Could not load createObjectCsvStringifier from csv-writer");
-                        }
-                    }
-                    catch (e) {
-                        // Provide a more helpful error message
-                        const errorMsg = e instanceof Error ? e.message : String(e);
-                        // Update error message to reflect import() failure
-                        console.error(`Failed to dynamically import 'csv-writer': ${errorMsg}. Ensure it's installed (\`npm install csv-writer\`).`);
-                        throw new Error("The 'csv-writer' package is required for CSV export but failed to load dynamically.");
-                    }
-                    try {
-                        // Flatten the structure slightly for better CSV output
-                        const processedResults = results.map((result) => {
-                            // Flatten dataObject properties and scorersData
-                            const flatResult = {};
-                            flatResult.eval_run_id = evalRunData.id; // Add eval run ID
-                            // Flatten dataObject
-                            if (result.dataObject) {
-                                for (const [key, value] of Object.entries(result.dataObject)) {
-                                    // Prefix with 'data_' to avoid potential clashes
-                                    flatResult[`data_${key}`] = (typeof value === 'object' && value !== null) ? JSON.stringify(value) : value;
-                                }
-                            }
-                            // Flatten scorersData - creates columns like scorer_0_name, scorer_0_score, etc.
-                            if (Array.isArray(result.scorersData)) {
-                                result.scorersData.forEach((scorerData, index) => {
-                                    flatResult[`scorer_${index}_name`] = scorerData.name;
-                                    flatResult[`scorer_${index}_score`] = (typeof scorerData.score === 'object' && scorerData.score !== null) ? JSON.stringify(scorerData.score) : scorerData.score;
-                                    flatResult[`scorer_${index}_error`] = scorerData.error;
-                                    // Add other scorer fields if necessary, e.g., metadata
-                                    if (scorerData.additional_metadata) {
-                                        flatResult[`scorer_${index}_metadata`] = JSON.stringify(scorerData.additional_metadata);
-                                    }
-                                });
-                            }
-                            flatResult.error = result.error; // Top-level error for the example processing
-                            return flatResult;
-                        });
-                        // Define headers dynamically based on the keys of the first processed result
-                        if (processedResults.length === 0) {
-                            return 'No data to export after processing.'; // Handle case with no valid results after processing
+                    if (results.length === 0)
+                        return ''; // No data to export
+                    // Dynamically determine headers from the first result object
+                    // Flatten the structure for CSV
+                    const flatResults = results.map(result => {
+                        var _a, _b, _c;
+                        const flat = {};
+                        const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
+                        const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
+                        // Add example data fields (snake_case)
+                        for (const key in exampleData) {
+                            // Prefix example fields to avoid collision, e.g., example_input
+                            flat[`example_${key}`] = exampleData[key];
                         }
-                        const headers = Object.keys(processedResults[0]).map(key => ({ id: key, title: key }));
-                        const csvStringifier = createObjectCsvStringifier({
-                            header: headers
+                        // Add scorers data
+                        scorersData.forEach(scorer => {
+                            flat[`scorer_${scorer.name}_score`] = scorer.score;
+                            flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
+                            flat[`scorer_${scorer.name}_error`] = scorer.error;
                         });
-                        // Generate CSV string (header + records)
-                        return csvStringifier.getHeaderString() + csvStringifier.stringifyRecords(processedResults);
-                    }
-                    catch (error) {
-                        console.error('Error converting to CSV:', error);
-                        const errorMessage = error instanceof Error ? error.message : String(error);
-                        return `Error generating CSV: ${errorMessage}`;
-                    }
+                        // Add top-level error if present
+                        flat['top_level_error'] = result.error;
+                        return flat;
+                    });
+                    // Get all unique keys from the flattened results for headers
+                    const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
+                    // Use papaparse for robust CSV generation
+                    const Papa = require('papaparse'); // Use require here if not imported at top
+                    const csv = Papa.unparse({
+                        fields: headers,
+                        data: flatResults
+                    }, {
+                        header: true,
+                        quotes: true, // Ensure fields with commas/newlines are quoted
+                        quoteChar: '"',
+                        escapeChar: '"',
+                        delimiter: ','
+                    });
+                    return csv;
                 }
                 else {
                     throw new Error(`Unsupported export format: ${format}`);
                 }
             }
             catch (error) {
-                if (error instanceof Error) {
-                    throw new Error(`Failed to export evaluation results: ${error.message}`);
-                }
-                throw new Error(`Failed to export evaluation results: ${String(error)}`);
+                logger_instance_js_1.default.error(`Error exporting eval results: ${error}`);
+                this.handleApiError(error, 'exportEvalResults');
+                throw error;
             }
         });
     }
@@ -502,47 +343,23 @@ class JudgmentClient {
      */
     deleteEval(projectName, evalRunNames) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b;
-            if (!evalRunNames || evalRunNames.length === 0) {
-                throw new Error('No evaluation run names provided');
-            }
-            // Body matches Python's structure for this endpoint
-            const evalRunRequestBody = {
+            logger_instance_js_1.default.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
+            const requestBody = {
                 project_name: projectName,
                 eval_names: evalRunNames,
-                judgment_api_key: this.judgmentApiKey // Required by this specific API endpoint
+                judgment_api_key: this.judgmentApiKey,
             };
             try {
-                const response = yield axios_1.default.delete(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, // Use constant
-                {
-                    data: evalRunRequestBody,
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        'X-Organization-Id': this.organizationId
-                    }
+                yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
+                    headers: this.getAuthHeaders()
                 });
-                return Boolean(response.data);
+                logger_instance_js_1.default.info('Successfully deleted eval runs.');
+                return true;
             }
             catch (error) {
-                if (axios_1.default.isAxiosError(error)) {
-                    const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
-                    const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
-                    if (status === 404) {
-                        throw new Error(`Eval results not found: ${JSON.stringify(data)}`);
-                    }
-                    else if (status === 500) {
-                        throw new Error(`Error deleting eval results: ${JSON.stringify(data)}`);
-                    }
-                    else {
-                        throw new Error(`Error deleting eval results (${status}): ${JSON.stringify(data)}`);
-                    }
-                }
-                // Rethrow original or wrapped error
-                if (error instanceof Error) {
-                    throw new Error(`Error deleting eval results: ${error.message}`);
-                }
-                throw new Error(`Error deleting eval results: ${String(error)}`);
+                logger_instance_js_1.default.error(`Error deleting eval runs: ${error}`);
+                this.handleApiError(error, 'deleteEval');
+                return false;
             }
         });
     }
@@ -551,43 +368,22 @@ class JudgmentClient {
      */
     deleteProjectEvals(projectName) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b;
+            logger_instance_js_1.default.info(`Deleting ALL eval runs for project: ${projectName}`);
+            const requestBody = {
+                project_name: projectName,
+                judgment_api_key: this.judgmentApiKey,
+            };
             try {
-                const response = yield axios_1.default.delete(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, // Use constant
-                {
-                    // Remove judgment_api_key from body to match Python (uses header auth)
-                    data: {
-                        project_name: projectName,
-                    },
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        'X-Organization-Id': this.organizationId
-                    }
+                yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
+                    headers: this.getAuthHeaders()
                 });
-                // Python returns response.json(), check if TS response needs similar handling
-                return Boolean(response.data); // Assuming response.data indicates success
+                logger_instance_js_1.default.info(`Successfully deleted all eval runs for project ${projectName}.`);
+                return true;
             }
             catch (error) {
-                if (axios_1.default.isAxiosError(error)) {
-                    const status = (_a = error.response) === null || _a === void 0 ? void 0 : _a.status;
-                    const data = (_b = error.response) === null || _b === void 0 ? void 0 : _b.data;
-                    if (status === 404) {
-                        // Assuming 404 might mean project not found or no evals to delete
-                        console.warn(`Project '${projectName}' not found or no evals to delete.`);
-                        return false; // Or true depending on desired idempotency behavior
-                    }
-                    else if (status === 500) {
-                        throw new Error(`Error deleting project evals: ${JSON.stringify(data)}`);
-                    }
-                    else {
-                        throw new Error(`Error deleting project evals (${status}): ${JSON.stringify(data)}`);
-                    }
-                }
-                if (error instanceof Error) {
-                    throw new Error(`Error deleting project evals: ${error.message}`);
-                }
-                throw new Error(`Error deleting project evals: ${String(error)}`);
+                logger_instance_js_1.default.error(`Error deleting project evals: ${error}`);
+                this.handleApiError(error, 'deleteProjectEvals');
+                return false;
             }
         });
     }
@@ -596,37 +392,34 @@ class JudgmentClient {
      */
     createProject(projectName) {
         return __awaiter(this, void 0, void 0, function* () {
+            logger_instance_js_1.default.info(`Creating project: ${projectName}`);
+            const requestBody = {
+                project_name: projectName,
+                judgment_api_key: this.judgmentApiKey,
+            };
             try {
-                const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, // Use constant
-                // Remove judgment_api_key from body to match Python (uses header auth)
-                {
-                    project_name: projectName,
-                }, {
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        'X-Organization-Id': this.organizationId
-                    }
+                const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
+                    headers: this.getAuthHeaders()
                 });
-                // Python returns response.json(), check if TS response needs similar handling
-                return Boolean(response.data); // Assuming response.data indicates success
-            }
-            catch (error) {
-                if (axios_1.default.isAxiosError(error) && error.response) {
-                    // Check for specific conflict error (e.g., 409) if API provides it
-                    if (error.response.status === 409) {
-                        console.warn(`Project '${projectName}' already exists.`);
-                        return false; // Or true if idempotent creation is desired
-                    }
-                    throw new Error(`Error creating project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
+                // Check for specific success message or status if API provides one
+                if (response.data && response.data.message === 'Project added successfully') {
+                    logger_instance_js_1.default.info(`Successfully created project: ${projectName}`);
+                    return true;
                 }
-                else if (error instanceof Error) {
-                    throw new Error(`Error creating project: ${error.message}`);
+                else if (response.data && response.data.message === 'Project already exists') {
+                    logger_instance_js_1.default.warn(`Project '${projectName}' already exists.`);
+                    return true; // Or false, depending on desired behavior for existing projects
                 }
                 else {
-                    throw new Error(`Error creating project: ${String(error)}`);
+                    logger_instance_js_1.default.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
+                    return false;
                 }
             }
+            catch (error) {
+                logger_instance_js_1.default.error(`Error creating project: ${error}`);
+                this.handleApiError(error, 'createProject');
+                return false;
+            }
         });
     }
     /**
@@ -634,37 +427,29 @@ class JudgmentClient {
      */
     deleteProject(projectName) {
         return __awaiter(this, void 0, void 0, function* () {
+            logger_instance_js_1.default.info(`Deleting project: ${projectName}`);
+            const requestBody = {
+                project_name: projectName,
+                judgment_api_key: this.judgmentApiKey,
+            };
             try {
-                const response = yield axios_1.default.delete(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, // Use constant
-                {
-                    // Remove judgment_api_key from body to match Python (uses header auth)
-                    data: {
-                        project_name: projectName,
-                    },
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        'X-Organization-Id': this.organizationId
-                    }
+                const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
+                    headers: this.getAuthHeaders()
                 });
-                // Python returns response.json(), check if TS response needs similar handling
-                return Boolean(response.data); // Assuming response.data indicates success
-            }
-            catch (error) {
-                if (axios_1.default.isAxiosError(error) && error.response) {
-                    if (error.response.status === 404) {
-                        console.warn(`Project '${projectName}' not found for deletion.`);
-                        return false; // Or true depending on desired idempotency
-                    }
-                    throw new Error(`Error deleting project (${error.response.status}): ${JSON.stringify(error.response.data)}`);
-                }
-                else if (error instanceof Error) {
-                    throw new Error(`Error deleting project: ${error.message}`);
+                if (response.data && response.data.message === 'Project deleted successfully') {
+                    logger_instance_js_1.default.info(`Successfully deleted project: ${projectName}`);
+                    return true;
                 }
                 else {
-                    throw new Error(`Error deleting project: ${String(error)}`);
+                    logger_instance_js_1.default.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
+                    return false;
                 }
             }
+            catch (error) {
+                logger_instance_js_1.default.error(`Error deleting project: ${error}`);
+                this.handleApiError(error, 'deleteProject');
+                return false;
+            }
         });
     }
     /**
@@ -672,35 +457,36 @@ class JudgmentClient {
      */
     validateApiKey() {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b;
+            var _a, _b, _c, _d;
+            logger_instance_js_1.default.debug('Validating API Key...');
             try {
-                const response = yield axios_1.default.post(`${constants_js_1.ROOT_API}/validate_api_key/`, // Use ROOT_API
-                {}, // Empty body
-                {
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        // Removed 'X-Organization-Id' header to match Python for this specific endpoint
-                    }
-                });
-                if (response.status === 200) {
-                    return [true, JSON.stringify(response.data)];
-                }
-                else {
-                    // Status might be non-200 but still valid JSON error response
-                    return [false, ((_a = response.data) === null || _a === void 0 ? void 0 : _a.detail) || `Error validating API key (Status: ${response.status})`];
-                }
+                // Instantiate EvalDatasetClient to perform the validation call
+                const datasetClient = new eval_dataset_client_js_1.EvalDatasetClient(this.judgmentApiKey, this.organizationId);
+                // Use the dataset client to make the call
+                yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
+                logger_instance_js_1.default.debug('API Key appears valid.');
+                return [true, 'API Key is valid.'];
             }
             catch (error) {
-                if (axios_1.default.isAxiosError(error) && error.response) {
-                    return [false, ((_b = error.response.data) === null || _b === void 0 ? void 0 : _b.detail) || `Error validating API key (Status: ${error.response.status})`];
-                }
-                else if (error instanceof Error) {
-                    return [false, `Error validating API key: ${error.message}`];
+                let message = 'API Key validation failed.';
+                if (axios_1.default.isAxiosError(error)) {
+                    if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
+                        message = 'API Key is invalid or expired.';
+                    }
+                    else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
+                        // If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
+                        // This depends on the specific validation endpoint behavior
+                        message = 'API Key might be valid, but validation endpoint returned 404.';
+                    }
+                    else {
+                        message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
+                    }
                 }
                 else {
-                    return [false, `Unknown error validating API key: ${String(error)}`];
+                    message = `API Key validation failed: ${String(error)}`;
                 }
+                logger_instance_js_1.default.error(message);
+                return [false, message];
             }
         });
     }
@@ -725,17 +511,12 @@ class JudgmentClient {
      */
     pullEvalResults(projectName, evalRunName) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a;
-            try {
-                const evalRunArray = yield this.pullEval(projectName, evalRunName);
-                // pullEval returns [{ id: ..., results: [...] }], extract results
-                return ((_a = evalRunArray[0]) === null || _a === void 0 ? void 0 : _a.results) || [];
-            }
-            catch (error) {
-                // Log error but return empty array to allow waitForEvaluation to potentially retry
-                logger_instance_js_1.default.error(`Failed to pull evaluation results for '${evalRunName}': ${error instanceof Error ? error.message : String(error)}`);
+            const rawResults = yield this.pullEval(projectName, evalRunName);
+            if (!rawResults || rawResults.length === 0 || !rawResults[0].results) {
                 return [];
             }
+            // Assuming pullEval correctly returns results in the expected format
+            return rawResults[0].results;
         });
     }
     /**
@@ -747,88 +528,72 @@ class JudgmentClient {
      */
     checkEvalStatus(projectName, evalRunName) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b;
-            // Using 'eval_name' in body for consistency with pullEval/fetch endpoint.
+            var _a;
             const requestBody = {
                 project_name: projectName,
-                eval_name: evalRunName, // Use 'eval_name'
+                eval_name: evalRunName,
                 judgment_api_key: this.judgmentApiKey,
             };
             try {
-                const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, // Use fetch URL
-                requestBody, {
-                    headers: {
-                        'Content-Type': 'application/json',
-                        'Authorization': `Bearer ${this.judgmentApiKey}`,
-                        'X-Organization-Id': this.organizationId
-                    },
-                    timeout: 15000 // Slightly increased timeout for status checks
+                const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
+                    headers: this.getAuthHeaders(),
+                    // Add a shorter timeout for status checks?
+                    // timeout: 5000
                 });
-                // Interpret response: API might return status object or full results array
-                let statusData = { status: 'unknown', progress: 0, message: '' };
-                if (Array.isArray(response.data)) {
-                    // If it's an array, assume results are complete unless explicitly stated otherwise
-                    if (response.data.length > 0 && ((_b = (_a = response.data[0]) === null || _a === void 0 ? void 0 : _a.result) === null || _b === void 0 ? void 0 : _b.status)) {
-                        // Check if the first result object contains status info
-                        statusData = response.data[0].result; // Assuming status is within the 'result' field
-                    }
-                    else if (response.data.length > 0) {
-                        // Assume complete if we get results array without specific status fields
-                        statusData = { status: 'complete', progress: 1.0, message: 'Results received' };
-                    }
-                    else {
-                        // Empty array might mean still processing or no results yet
-                        statusData = { status: 'processing', progress: 0, message: 'Waiting for results...' };
-                    }
-                }
-                else if (typeof response.data === 'object' && response.data !== null && response.data.status) {
-                    // Might be a direct status object from the API
-                    statusData = response.data;
-                }
+                const data = response.data;
+                // Check if the response looks like a status object
+                if (data && typeof data.status === 'string') {
+                    return {
+                        status: data.status || 'unknown',
+                        progress: typeof data.progress === 'number' ? data.progress : 0,
+                        message: data.message || '',
+                        error: data.error
+                    };
+                }
+                // Check if the response looks like completed results (array format from pullEval)
+                else if (Array.isArray(data) && data.length > 0 && data[0].results) {
+                    return {
+                        status: 'completed',
+                        progress: 100,
+                        message: 'Evaluation completed.'
+                    };
+                }
+                // Check if response looks like completed results (single object format)
+                else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
+                    return {
+                        status: 'completed',
+                        progress: 100,
+                        message: 'Evaluation completed.'
+                    };
+                }
+                // Handle other potential responses or assume pending/unknown
                 else {
-                    // Unexpected response format
-                    statusData = { status: 'unknown', progress: 0, message: `Unexpected response format: ${JSON.stringify(response.data)}` };
-                }
-                // Normalize the progress value
-                let progress = 0;
-                if (statusData.progress !== undefined && statusData.progress !== null) {
-                    const parsedProgress = parseFloat(statusData.progress);
-                    if (!isNaN(parsedProgress)) {
-                        progress = Math.max(0, Math.min(1, parsedProgress)); // Ensure progress is between 0 and 1
-                    }
+                    logger_instance_js_1.default.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
+                    return {
+                        status: 'unknown',
+                        progress: 0,
+                        message: 'Could not determine status from API response.'
+                    };
                 }
-                const normalizedStatus = {
-                    status: statusData.status || 'unknown',
-                    progress: progress,
-                    message: statusData.message || '',
-                    error: statusData.error // Include error field if present
-                };
-                // Only log status if it's not being called from waitForEvaluation
-                // Check stack trace for caller function name
-                const stack = new Error().stack;
-                const isCalledByWaitForEvaluation = stack === null || stack === void 0 ? void 0 : stack.includes('waitForEvaluation');
-                if (!isCalledByWaitForEvaluation) {
-                    // Use logger for status updates when called directly
-                    logger_instance_js_1.default.info(`Evaluation Status: ${normalizedStatus.status}`);
-                    logger_instance_js_1.default.info(`Progress: ${Math.round(normalizedStatus.progress * 100)}%`);
-                    if (normalizedStatus.message) {
-                        logger_instance_js_1.default.info(`Message: ${normalizedStatus.message}`);
-                    }
-                    if (normalizedStatus.error) {
-                        logger_instance_js_1.default.error(`Error in status: ${normalizedStatus.error}`);
-                    }
-                }
-                return normalizedStatus;
             }
             catch (error) {
-                // Don't throw errors from status check, just return default 'unknown' status
-                // This allows waitForEvaluation to continue polling even on transient network issues
-                const errorMessage = error instanceof Error ? error.message : String(error);
-                logger_instance_js_1.default.error(`Error checking evaluation status for '${evalRunName}': ${errorMessage}`);
+                // Don't throw here, return status indicating error
+                let errorMessage = 'Failed to fetch evaluation status.';
+                let status = 'error';
+                if (axios_1.default.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
+                    status = 'not_found';
+                    errorMessage = 'Evaluation run not found.';
+                    logger_instance_js_1.default.warn(`Evaluation run ${evalRunName} not found.`);
+                }
+                else {
+                    this.handleApiError(error, 'checkEvalStatus');
+                    errorMessage = `Error fetching status: ${String(error)}`;
+                }
                 return {
-                    status: 'unknown',
+                    status: status,
                     progress: 0,
-                    message: `Error checking status: ${errorMessage}`
+                    message: errorMessage,
+                    error: String(error) // Include error string
                 };
             }
         });
@@ -842,105 +607,79 @@ class JudgmentClient {
      */
     waitForEvaluation(projectName_1, evalRunName_1) {
         return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
-            const { intervalMs = 3000, // Slightly longer interval
-            maxAttempts = 200, // ~10 minutes total wait time (200 * 3s)
-            showProgress = true } = options;
-            let attempts = 0;
-            let lastProgressPercent = -1;
-            let lastStatus = '';
+            const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
+            let progressBar;
             if (showProgress) {
-                // Use logger for initial message
-                logger_instance_js_1.default.info(`Waiting for evaluation "${evalRunName}" in project "${projectName}" to complete...`);
+                progressBar = new cli_progress_1.default.SingleBar({
+                    format: `Waiting for ${ansi_colors_1.default.magenta(evalRunName)}... | ${ansi_colors_1.default.cyan('{bar}')} | {percentage}% || {status}`,
+                    barCompleteChar: '\u2588',
+                    barIncompleteChar: '\u2591',
+                    hideCursor: true,
+                    clearOnComplete: false,
+                    stopOnComplete: true,
+                }, cli_progress_1.default.Presets.shades_classic);
+                progressBar.start(100, 0, { status: 'Initiating...' });
             }
-            while (attempts < maxAttempts) {
-                attempts++;
+            for (let attempt = 1; attempt <= maxAttempts; attempt++) {
                 try {
-                    const status = yield this.checkEvalStatus(projectName, evalRunName); // Call internal status check
-                    const currentProgressPercent = Math.round(status.progress * 100);
-                    // Show progress/status updates only when they change significantly
-                    if (showProgress && (currentProgressPercent !== lastProgressPercent || status.status !== lastStatus)) {
-                        const progressBar = this._createProgressBar(currentProgressPercent >= 0 ? currentProgressPercent : 0);
-                        // Use process.stdout.write to potentially overwrite the line (works best in standard terminals)
-                        process.stdout.write('\rAttempt ' + attempts + '/' + maxAttempts + ' | Status: ' + status.status + ' | Progress: ' + progressBar + ' ' + currentProgressPercent + '% ');
-                        lastProgressPercent = currentProgressPercent;
-                        lastStatus = status.status;
-                    }
-                    // Check evaluation status
-                    if (status.status === 'complete') {
-                        if (showProgress) {
-                            process.stdout.write('\n'); // Keep direct console output for progress bar newline
-                            // Use logger for status update
-                            logger_instance_js_1.default.info('Evaluation complete! Fetching results...');
-                        }
-                        try {
-                            // Use the dedicated results fetching method
-                            const results = yield this.pullEvalResults(projectName, evalRunName);
-                            if (results.length > 0) {
-                                // Use logger for status update
-                                logger_instance_js_1.default.info(`Successfully fetched ${results.length} results.`);
-                                return results;
-                            }
-                            else {
-                                // If complete status but no results, might be an issue. Log and return empty.
-                                logger_instance_js_1.default.warn(`Evaluation reported complete, but no results were fetched for '${evalRunName}'.`);
-                                return [];
-                            }
+                    const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
+                    const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
+                    const statusText = statusResult.message || statusResult.status;
+                    if (progressBar) {
+                        progressBar.update(progress, { status: statusText });
+                    }
+                    if (statusResult.status === 'completed') {
+                        if (progressBar) {
+                            progressBar.update(100, { status: ansi_colors_1.default.green('Completed! Fetching results...') });
                         }
-                        catch (fetchError) {
-                            if (showProgress)
-                                process.stdout.write('\n'); // Keep direct console output
-                            logger_instance_js_1.default.error(`Error fetching results after completion for '${evalRunName}': ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`);
-                            return []; // Return empty array on error
-                        }
-                    }
-                    else if (status.status === 'failed') {
-                        if (showProgress)
-                            process.stdout.write('\n'); // Keep direct console output
-                        logger_instance_js_1.default.error(`Evaluation failed for '${evalRunName}': ${status.error || status.message || 'Unknown error'}`);
-                        return []; // Return empty array on failure
-                    }
-                    else if (status.status === 'unknown') {
-                        // Log unknown status but continue polling
-                        // Avoid flooding logs if status remains unknown
-                        if (lastStatus !== 'unknown') {
-                            if (showProgress)
-                                process.stdout.write('\n'); // Keep direct console output
-                            logger_instance_js_1.default.warn(`Evaluation status unknown for '${evalRunName}' (attempt ${attempts}). Retrying...`);
-                            lastProgressPercent = -1; // Reset progress display
-                        }
-                        lastStatus = 'unknown';
-                    }
-                    else {
-                        // Still processing (e.g., 'processing', 'running', 'pending')
-                        lastStatus = status.status;
-                    }
+                        // Fetch final results using pullEval
+                        const finalResults = yield this.pullEvalResults(projectName, evalRunName);
+                        logger_instance_js_1.default.info(`Evaluation run ${evalRunName} completed successfully.`);
+                        return finalResults;
+                    }
+                    else if (statusResult.status === 'error' || statusResult.status === 'failed') {
+                        // Concatenate error details into a single message string
+                        const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
+                        logger_instance_js_1.default.error(errorMsg);
+                        if (progressBar)
+                            progressBar.stop();
+                        // Pass only the combined message to the constructor
+                        throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
+                    }
+                    else if (statusResult.status === 'not_found') {
+                        const errorMsg = `Evaluation run ${evalRunName} not found.`;
+                        logger_instance_js_1.default.error(errorMsg);
+                        if (progressBar)
+                            progressBar.stop();
+                        // Pass only the message to the constructor
+                        throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
+                    }
+                    // Wait for the next interval
+                    yield new Promise(resolve => setTimeout(resolve, intervalMs));
                 }
                 catch (error) {
-                    // Log the error but continue polling (checkEvalStatus should handle internal errors gracefully)
-                    if (showProgress)
-                        process.stdout.write('\n'); // Keep direct console output
-                    logger_instance_js_1.default.error(`Error during status check loop (attempt ${attempts}/${maxAttempts}): ${error instanceof Error ? error.message : String(error)}`);
-                    lastProgressPercent = -1; // Reset progress display
-                    lastStatus = 'error_in_loop'; // Indicate issue in the loop itself
-                }
-                // Wait before next poll only if not completed/failed
-                if (lastStatus !== 'complete' && lastStatus !== 'failed') {
+                    // Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
+                    logger_instance_js_1.default.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
+                    // Option: Rethrow immediately vs. retry vs. specific handling
+                    if (error instanceof run_evaluation_js_1.JudgmentAPIError) { // If it was already a processed API error, rethrow
+                        if (progressBar)
+                            progressBar.stop();
+                        throw error;
+                    }
+                    // For other errors, wait and retry (up to maxAttempts)
+                    if (attempt === maxAttempts) {
+                        if (progressBar)
+                            progressBar.stop();
+                        throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
+                    }
+                    // Still retryable, wait for interval
                     yield new Promise(resolve => setTimeout(resolve, intervalMs));
                 }
-                else {
-                    // Break loop if already completed or failed to avoid unnecessary delay
-                    break;
-                }
-            } // End while loop
-            // If loop finished without completing/failing
-            if (lastStatus !== 'complete' && lastStatus !== 'failed') {
-                if (showProgress)
-                    process.stdout.write('\n'); // Keep direct console output
-                logger_instance_js_1.default.error(`Evaluation polling timed out after ${attempts} attempts for "${evalRunName}". Last known status: ${lastStatus}`);
-                return []; // Return empty array on timeout
             }
-            // Should technically be unreachable if break conditions work, but safeguard return
-            return [];
+            // If loop finishes without completion or error
+            if (progressBar)
+                progressBar.stop();
+            throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
         });
     }
     /**
@@ -949,12 +688,48 @@ class JudgmentClient {
      * @returns A string representing the progress bar
      */
     _createProgressBar(percent) {
-        const width = 25; // Slightly wider bar
-        // Clamp percent between 0 and 100
-        const clampedPercent = Math.max(0, Math.min(100, percent));
-        const completed = Math.round(width * (clampedPercent / 100)); // Use round for potentially smoother look
-        const remaining = width - completed;
-        return '[' + '#'.repeat(completed) + '-'.repeat(remaining) + ']'; // Use different chars
+        const width = 20; // Width of the progress bar
+        const filled = Math.round(width * (percent / 100));
+        const empty = width - filled;
+        return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
+    }
+    // Keep helper methods private
+    getAuthHeaders() {
+        return {
+            'Content-Type': 'application/json',
+            'Authorization': `Bearer ${this.judgmentApiKey}`,
+            'X-Organization-Id': this.organizationId,
+        };
+    }
+    // Ensure this handles errors from Eval/Project API calls correctly
+    handleApiError(error, context) {
+        logger_instance_js_1.default.error(`API Error during ${context}:`);
+        if (axios_1.default.isAxiosError(error)) {
+            const axiosError = error;
+            const response = axiosError.response;
+            if (response) {
+                logger_instance_js_1.default.error(`Status: ${response.status} ${response.statusText}`);
+                logger_instance_js_1.default.debug('Response Data:', response.data);
+                if (response.status === 422) {
+                    logger_instance_js_1.default.error('Validation Error Detail:', response.data);
+                }
+                else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
+                    logger_instance_js_1.default.error(`Evaluation run not found.`);
+                }
+                else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
+                    logger_instance_js_1.default.warn(`${context}: Resource not found, may have already been deleted.`);
+                }
+            }
+            else if (axiosError.request) {
+                logger_instance_js_1.default.error('No response received from server.');
+            }
+            else {
+                logger_instance_js_1.default.error(`Error setting up API request for ${context}`);
+            }
+        }
+        else {
+            logger_instance_js_1.default.error(`Unexpected error during ${context}`);
+        }
     }
 }
 exports.JudgmentClient = JudgmentClient;