PyPI - judgeval - Versions diffs - 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl - Mend

judgeval 0.0.51py3-none-any.whl → 0.0.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

judgeval/common/logger.py +46 -199
judgeval/common/s3_storage.py +2 -6
judgeval/common/tracer.py +182 -262
judgeval/common/utils.py +16 -36
judgeval/constants.py +14 -20
judgeval/data/__init__.py +0 -2
judgeval/data/datasets/dataset.py +6 -10
judgeval/data/datasets/eval_dataset_client.py +25 -27
judgeval/data/example.py +5 -138
judgeval/data/judgment_types.py +214 -0
judgeval/data/result.py +7 -25
judgeval/data/scorer_data.py +28 -40
judgeval/data/scripts/fix_default_factory.py +23 -0
judgeval/data/scripts/openapi_transform.py +123 -0
judgeval/data/tool.py +3 -54
judgeval/data/trace.py +31 -50
judgeval/data/trace_run.py +3 -3
judgeval/evaluation_run.py +16 -23
judgeval/integrations/langgraph.py +11 -12
judgeval/judges/litellm_judge.py +3 -6
judgeval/judges/mixture_of_judges.py +8 -25
judgeval/judges/together_judge.py +3 -6
judgeval/judgment_client.py +22 -24
judgeval/rules.py +7 -19
judgeval/run_evaluation.py +79 -242
judgeval/scorers/__init__.py +4 -20
judgeval/scorers/agent_scorer.py +21 -0
judgeval/scorers/api_scorer.py +28 -38
judgeval/scorers/base_scorer.py +98 -0
judgeval/scorers/example_scorer.py +19 -0
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -17
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -24
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +16 -68
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +4 -12
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -17
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +18 -14
judgeval/scorers/score.py +45 -330
judgeval/scorers/utils.py +6 -88
judgeval/utils/file_utils.py +4 -6
judgeval/version_check.py +3 -2
{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/METADATA +3 -2
judgeval-0.0.53.dist-info/RECORD +65 -0
judgeval/data/custom_example.py +0 -19
judgeval/scorers/judgeval_scorer.py +0 -177
judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
judgeval/scorers/prompt_scorer.py +0 -296
judgeval-0.0.51.dist-info/RECORD +0 -69
{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/WHEEL +0 -0
{judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/utils.py CHANGED Viewed

@@ -31,7 +31,7 @@ from judgeval.constants import (
     TOGETHER_SUPPORTED_MODELS,
     LITELLM_SUPPORTED_MODELS,
 )
-from judgeval.common.logger import debug, error
+from judgeval.common.logger import judgeval_logger
 class CustomModelParameters(pydantic.BaseModel):
@@ -40,18 +40,21 @@ class CustomModelParameters(pydantic.BaseModel):
     litellm_base_url: str
     @pydantic.field_validator("model_name")
+    @classmethod
     def validate_model_name(cls, v):
         if not v:
             raise ValueError("Model name cannot be empty")
         return v
     @pydantic.field_validator("secret_key")
+    @classmethod
     def validate_secret_key(cls, v):
         if not v:
             raise ValueError("Secret key cannot be empty")
         return v
     @pydantic.field_validator("litellm_base_url")
+    @classmethod
     def validate_litellm_base_url(cls, v):
         if not v:
             raise ValueError("Litellm base URL cannot be empty")
@@ -64,6 +67,7 @@ class ChatCompletionRequest(pydantic.BaseModel):
     response_format: Optional[Union[pydantic.BaseModel, Dict[str, Any]]] = None
     @pydantic.field_validator("messages")
+    @classmethod
     def validate_messages(cls, messages):
         if not messages:
             raise ValueError("Messages cannot be empty")
@@ -83,6 +87,7 @@ class ChatCompletionRequest(pydantic.BaseModel):
         return messages
     @pydantic.field_validator("model")
+    @classmethod
     def validate_model(cls, model):
         if not model:
             raise ValueError("Model cannot be empty")
@@ -91,6 +96,7 @@ class ChatCompletionRequest(pydantic.BaseModel):
         return model
     @pydantic.field_validator("response_format", mode="before")
+    @classmethod
     def validate_response_format(cls, response_format):
         if response_format is not None:
             if not isinstance(response_format, (dict, pydantic.BaseModel)):
@@ -145,11 +151,7 @@ def fetch_together_api_response(
         model=model, messages=messages, response_format=response_format
     )
-    debug(f"Calling Together API with model: {request.model}")
-    debug(f"Messages: {request.messages}")
     if request.response_format is not None:
-        debug(f"Using response format: {request.response_format}")
         response = together_client.chat.completions.create(
             model=request.model,
             messages=request.messages,
@@ -161,7 +163,6 @@ def fetch_together_api_response(
             messages=request.messages,
         )
-    debug(f"Received response: {response.choices[0].message.content[:100]}...")
     return response.choices[0].message.content
@@ -175,11 +176,7 @@ async def afetch_together_api_response(
         model=model, messages=messages, response_format=response_format
     )
-    debug(f"Calling Together API with model: {request.model}")
-    debug(f"Messages: {request.messages}")
     if request.response_format is not None:
-        debug(f"Using response format: {request.response_format}")
         response = await async_together_client.chat.completions.create(
             model=request.model,
             messages=request.messages,
@@ -251,7 +248,7 @@ def query_together_api_multiple_calls(
             try:
                 out[idx] = future.result()
             except Exception as e:
-                error(f"Error in parallel call {idx}: {str(e)}")
+                judgeval_logger.error(f"Error in parallel call {idx}: {str(e)}")
                 out[idx] = None
     return out
@@ -294,17 +291,15 @@ async def aquery_together_api_multiple_calls(
     # Validate message format
     validate_batched_chat_messages(messages)
-    debug(f"Starting parallel Together API calls for {len(messages)} messages")
     out: List[Union[str, None]] = [None] * len(messages)
     async def fetch_and_store(idx, model, message, response_format):
         try:
-            debug(f"Processing call {idx} with model {model}")
             out[idx] = await afetch_together_api_response(
                 model, message, response_format
             )
         except Exception as e:
-            error(f"Error in parallel call {idx}: {str(e)}")
+            judgeval_logger.error(f"Error in parallel call {idx}: {str(e)}")
             out[idx] = None
     tasks = [
@@ -315,7 +310,6 @@ async def aquery_together_api_multiple_calls(
     ]
     await asyncio.gather(*tasks)
-    debug(f"Completed {len(messages)} parallel calls")
     return out
@@ -329,11 +323,7 @@ def fetch_litellm_api_response(
         model=model, messages=messages, response_format=response_format
     )
-    debug(f"Calling LiteLLM API with model: {request.model}")
-    debug(f"Messages: {request.messages}")
     if request.response_format is not None:
-        debug(f"Using response format: {request.response_format}")
         response = litellm.completion(
             model=request.model,
             messages=request.messages,
@@ -483,7 +473,7 @@ def query_litellm_api_multiple_calls(
             try:
                 out[idx] = future.result()
             except Exception as e:
-                error(f"Error in parallel call {idx}: {str(e)}")
+                judgeval_logger.error(f"Error in parallel call {idx}: {str(e)}")
                 out[idx] = None
     return out
@@ -513,7 +503,7 @@ async def aquery_litellm_api_multiple_calls(
                 model, message, response_format
             )
         except Exception as e:
-            error(f"Error in parallel call {idx}: {str(e)}")
+            judgeval_logger.error(f"Error in parallel call {idx}: {str(e)}")
             out[idx] = None
     tasks = [
@@ -681,7 +671,6 @@ async def aget_chat_completion(
     Raises:
         - ValueError: If requested model is not supported by Litellm or TogetherAI.
     """
-    debug(f"Starting chat completion for model {model_type}, batched={batched}")
     if batched:
         validate_batched_chat_messages(messages)
@@ -693,7 +682,6 @@ async def aget_chat_completion(
         and is_batched_messages(messages)
         and model_type in TOGETHER_SUPPORTED_MODELS
     ):
-        debug("Using batched Together API call")
         return await aquery_together_api_multiple_calls(
             models=[model_type] * len(messages),
             messages=messages,
@@ -704,7 +692,6 @@ async def aget_chat_completion(
         and is_batched_messages(messages)
         and model_type in LITELLM_SUPPORTED_MODELS
     ):
-        debug("Using batched LiteLLM API call")
         return await aquery_litellm_api_multiple_calls(
             models=[model_type] * len(messages),
             messages=messages,
@@ -715,7 +702,6 @@ async def aget_chat_completion(
         and is_simple_messages(messages)
         and model_type in TOGETHER_SUPPORTED_MODELS
     ):
-        debug("Using single Together API call")
         return await afetch_together_api_response(
             model=model_type, messages=messages, response_format=response_format
         )
@@ -724,12 +710,11 @@ async def aget_chat_completion(
         and is_simple_messages(messages)
         and model_type in LITELLM_SUPPORTED_MODELS
     ):
-        debug("Using single LiteLLM API call")
         return await afetch_litellm_api_response(
             model=model_type, messages=messages, response_format=response_format
         )
-    error(f"Model {model_type} not supported by either API")
+    judgeval_logger.error(f"Model {model_type} not supported by either API")
     raise ValueError(
         f"Model {model_type} is not supported by Litellm or TogetherAI for chat completions. Please check the model name and try again."
     )
@@ -753,7 +738,6 @@ def get_completion_multiple_models(
     Raises:
         ValueError: If a model is not supported by Litellm or Together
     """
-    debug(f"Starting multiple model completion for {len(models)} models")
     if models is None or models == []:
         raise ValueError("Models list cannot be empty")
@@ -761,7 +745,9 @@ def get_completion_multiple_models(
     validate_batched_chat_messages(messages)
     if len(models) != len(messages):
-        error(f"Model/message count mismatch: {len(models)} vs {len(messages)}")
+        judgeval_logger.error(
+            f"Model/message count mismatch: {len(models)} vs {len(messages)}"
+        )
         raise ValueError(
             f"Number of models and messages must be the same: {len(models)} != {len(messages)}"
         )
@@ -774,13 +760,11 @@ def get_completion_multiple_models(
         zip(models, messages, response_formats)
     ):
         if model in TOGETHER_SUPPORTED_MODELS:
-            debug(f"Model {model} routed to Together API")
             together_calls[idx] = (model, message, r_format)
         elif model in LITELLM_SUPPORTED_MODELS:
-            debug(f"Model {model} routed to LiteLLM API")
             litellm_calls[idx] = (model, message, r_format)
         else:
-            error(f"Model {model} not supported by either API")
+            judgeval_logger.error(f"Model {model} not supported by either API")
             raise ValueError(
                 f"Model {model} is not supported by Litellm or TogetherAI for chat completions. Please check the model name and try again."
             )
@@ -792,7 +776,6 @@ def get_completion_multiple_models(
     # Get the responses from the TogetherAI models
     # List of responses from the TogetherAI models in order of the together_calls dict
     if together_calls:
-        debug(f"Executing {len(together_calls)} Together API calls")
         together_responses = query_together_api_multiple_calls(
             models=[model for model, _, _ in together_calls.values()],
             messages=[message for _, message, _ in together_calls.values()],
@@ -801,7 +784,6 @@ def get_completion_multiple_models(
     # Get the responses from the Litellm models
     if litellm_calls:
-        debug(f"Executing {len(litellm_calls)} LiteLLM API calls")
         litellm_responses = query_litellm_api_multiple_calls(
             models=[model for model, _, _ in litellm_calls.values()],
             messages=[message for _, message, _ in litellm_calls.values()],
@@ -809,13 +791,11 @@ def get_completion_multiple_models(
         )
     # Merge the responses in the order of the original models
-    debug("Merging responses")
     out: List[Union[str, None]] = [None] * len(models)
     for idx, (model, message, r_format) in together_calls.items():
         out[idx] = together_responses.pop(0)
     for idx, (model, message, r_format) in litellm_calls.items():
         out[idx] = litellm_responses.pop(0)
-    debug("Multiple model completion finished")
     return out

judgeval/constants.py CHANGED Viewed

@@ -7,7 +7,7 @@ import litellm
 import os
-class APIScorer(str, Enum):
+class APIScorerType(str, Enum):
     """
     Collection of proprietary scorers implemented by Judgment.
@@ -15,23 +15,17 @@ class APIScorer(str, Enum):
     Examples via the Judgment API.
     """
-    FAITHFULNESS = "faithfulness"
-    ANSWER_RELEVANCY = "answer_relevancy"
-    ANSWER_CORRECTNESS = "answer_correctness"
-    HALLUCINATION = "hallucination"
-    SUMMARIZATION = "summarization"
-    CONTEXTUAL_RECALL = "contextual_recall"
-    CONTEXTUAL_RELEVANCY = "contextual_relevancy"
-    CONTEXTUAL_PRECISION = "contextual_precision"
-    INSTRUCTION_ADHERENCE = "instruction_adherence"
-    EXECUTION_ORDER = "execution_order"
-    JSON_CORRECTNESS = "json_correctness"
-    COMPARISON = "comparison"
-    GROUNDEDNESS = "groundedness"
-    DERAILMENT = "derailment"
-    TOOL_ORDER = "tool_order"
-    CLASSIFIER = "classifier"
-    TOOL_DEPENDENCY = "tool_dependency"
+    PROMPT_SCORER = "Prompt Scorer"
+    FAITHFULNESS = "Faithfulness"
+    ANSWER_RELEVANCY = "Answer Relevancy"
+    ANSWER_CORRECTNESS = "Answer Correctness"
+    INSTRUCTION_ADHERENCE = "Instruction Adherence"
+    EXECUTION_ORDER = "Execution Order"
+    DERAILMENT = "Derailment"
+    TOOL_ORDER = "Tool Order"
+    CLASSIFIER = "Classifier"
+    TOOL_DEPENDENCY = "Tool Dependency"
+    CUSTOM = "Custom"
     @classmethod
     def _missing_(cls, value):
@@ -41,8 +35,8 @@ class APIScorer(str, Enum):
                 return member
-UNBOUNDED_SCORERS = set(
-    [APIScorer.COMPARISON]
+UNBOUNDED_SCORERS: set[APIScorerType] = (
+    set()
 )  # scorers whose scores are not bounded between 0-1
 ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")

judgeval/data/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from judgeval.data.example import Example, ExampleParams
-from judgeval.data.custom_example import CustomExample
 from judgeval.data.scorer_data import ScorerData, create_scorer_data
 from judgeval.data.result import ScoringResult, generate_scoring_result
 from judgeval.data.trace import Trace, TraceSpan, TraceUsage
@@ -8,7 +7,6 @@ from judgeval.data.trace import Trace, TraceSpan, TraceUsage
 __all__ = [
     "Example",
     "ExampleParams",
-    "CustomExample",
     "ScorerData",
     "create_scorer_data",
     "ScoringResult",

judgeval/data/datasets/dataset.py CHANGED Viewed

@@ -8,7 +8,7 @@ from dataclasses import dataclass, field
 from typing import List, Union, Literal, Optional
 from judgeval.data import Example, Trace
-from judgeval.common.logger import debug, error, warning, info
+from judgeval.common.logger import judgeval_logger
 from judgeval.utils.file_utils import get_examples_from_yaml
@@ -29,7 +29,7 @@ class EvalDataset:
         traces: Optional[List[Trace]] = None,
     ):
         if not judgment_api_key:
-            warning("No judgment_api_key provided")
+            judgeval_logger.error("No judgment_api_key provided")
         self.examples = examples or []
         self.traces = traces or []
         self._alias = None
@@ -38,11 +38,10 @@ class EvalDataset:
         self.organization_id = organization_id
     def add_from_json(self, file_path: str) -> None:
-        debug(f"Loading dataset from JSON file: {file_path}")
         """
         Adds examples from a JSON file.
-        The format of the JSON file is expected to be a dictionary with one key: "examples".
+        The format of the JSON file is expected to be a dictionary with one key: "examples".
         The value of the key is a list of dictionaries, where each dictionary represents an example.
         The JSON file is expected to have the following format:
@@ -82,13 +81,12 @@ class EvalDataset:
                 payload = json.load(file)
                 examples = payload.get("examples", [])
         except FileNotFoundError:
-            error(f"JSON file not found: {file_path}")
+            judgeval_logger.error(f"JSON file not found: {file_path}")
             raise FileNotFoundError(f"The file {file_path} was not found.")
         except json.JSONDecodeError:
-            error(f"Invalid JSON file: {file_path}")
+            judgeval_logger.error(f"Invalid JSON file: {file_path}")
             raise ValueError(f"The file {file_path} is not a valid JSON file.")
-        info(f"Added {len(examples)} examples from JSON")
         new_examples = [Example(**e) for e in examples]
         for e in new_examples:
             self.add_example(e)
@@ -189,11 +187,10 @@ class EvalDataset:
             self.add_example(e)
     def add_from_yaml(self, file_path: str) -> None:
-        debug(f"Loading dataset from YAML file: {file_path}")
         """
         Adds examples from a YAML file.
-        The format of the YAML file is expected to be a dictionary with one key: "examples".
+        The format of the YAML file is expected to be a dictionary with one key: "examples".
         The value of the key is a list of dictionaries, where each dictionary represents an example.
         The YAML file is expected to have the following format:
@@ -220,7 +217,6 @@ class EvalDataset:
         """
         examples = get_examples_from_yaml(file_path)
-        info(f"Added {len(examples)} examples from YAML")
         for e in examples:
             self.add_example(e)

judgeval/data/datasets/eval_dataset_client.py CHANGED Viewed

@@ -2,8 +2,7 @@ from typing import Optional, List
 from requests import Response, exceptions
 from judgeval.utils.requests import requests
 from rich.progress import Progress, SpinnerColumn, TextColumn
-from judgeval.common.logger import debug, error, warning, info
+from judgeval.common.logger import judgeval_logger
 from judgeval.constants import (
     JUDGMENT_DATASETS_PUSH_API_URL,
     JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
@@ -31,9 +30,8 @@ class EvalDatasetClient:
         project_name: str,
         overwrite: Optional[bool] = False,
     ) -> bool:
-        debug(f"Pushing dataset with alias '{alias}' (overwrite={overwrite})")
         if overwrite:
-            warning(f"Overwrite enabled for alias '{alias}'")
+            judgeval_logger.warning(f"Overwrite enabled for alias '{alias}'")
         """
         Pushes the dataset to Judgment platform
@@ -76,16 +74,19 @@ class EvalDatasetClient:
                     verify=True,
                 )
                 if response.status_code != 200:
-                    error(f"Server error during push: {response.json()}")
+                    judgeval_logger.error(
+                        f"Server error during push: {response.json()}"
+                    )
                     raise Exception(f"Server error during push: {response.json()}")
                 response.raise_for_status()
             except exceptions.HTTPError as err:
                 if response.status_code == 422:
-                    error(f"Validation error during push: {err.response.json()}")
+                    judgeval_logger.error(
+                        f"Validation error during push: {err.response.json()}"
+                    )
                 else:
-                    error(f"HTTP error during push: {err}")
+                    judgeval_logger.error(f"HTTP error during push: {err}")
-            info(f"Successfully pushed dataset with alias '{alias}'")
             payload = response.json()
             dataset._alias = payload.get("_alias")
             dataset._id = payload.get("_id")
@@ -98,7 +99,6 @@ class EvalDatasetClient:
     def append_examples(
         self, alias: str, examples: List[Example], project_name: str
     ) -> bool:
-        debug(f"Appending dataset with alias '{alias}'")
         """
         Appends the dataset to Judgment platform
@@ -139,14 +139,18 @@ class EvalDatasetClient:
                     verify=True,
                 )
                 if response.status_code != 200:
-                    error(f"Server error during append: {response.json()}")
+                    judgeval_logger.error(
+                        f"Server error during append: {response.json()}"
+                    )
                     raise Exception(f"Server error during append: {response.json()}")
                 response.raise_for_status()
             except exceptions.HTTPError as err:
                 if response.status_code == 422:
-                    error(f"Validation error during append: {err.response.json()}")
+                    judgeval_logger.error(
+                        f"Validation error during append: {err.response.json()}"
+                    )
                 else:
-                    error(f"HTTP error during append: {err}")
+                    judgeval_logger.error(f"HTTP error during append: {err}")
             progress.update(
                 task_id,
@@ -155,7 +159,6 @@ class EvalDatasetClient:
             return True
     def pull(self, alias: str, project_name: str) -> EvalDataset:
-        debug(f"Pulling dataset with alias '{alias}'")
         """
         Pulls the dataset from Judgment platform
@@ -163,7 +166,7 @@ class EvalDatasetClient:
         {
             "alias": alias,
             "project_name": project_name
-        }
+        }
         ==>
         {
             "examples": [...],
@@ -198,10 +201,9 @@ class EvalDatasetClient:
                 )
                 response.raise_for_status()
             except exceptions.RequestException as e:
-                error(f"Error pulling dataset: {str(e)}")
+                judgeval_logger.error(f"Error pulling dataset: {str(e)}")
                 raise
-            info(f"Successfully pulled dataset with alias '{alias}'")
             payload = response.json()
             dataset.examples = [Example(**e) for e in payload.get("examples", [])]
             dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
@@ -239,20 +241,19 @@ class EvalDatasetClient:
                 )
                 response.raise_for_status()
             except exceptions.RequestException as e:
-                error(f"Error deleting dataset: {str(e)}")
+                judgeval_logger.error(f"Error deleting dataset: {str(e)}")
                 raise
             return True
     def pull_project_dataset_stats(self, project_name: str) -> dict:
-        debug(f"Pulling project datasets stats for project_name: {project_name}'")
         """
-        Pulls the project datasets stats from Judgment platform
+        Pulls the project datasets stats from Judgment platform
         Mock request:
         {
             "project_name": project_name
-        }
+        }
         ==>
         {
             "test_dataset_1": {"examples_count": len(dataset1.examples)},
@@ -286,10 +287,9 @@ class EvalDatasetClient:
                 )
                 response.raise_for_status()
             except exceptions.RequestException as e:
-                error(f"Error pulling dataset: {str(e)}")
+                judgeval_logger.error(f"Error pulling dataset: {str(e)}")
                 raise
-            info(f"Successfully pulled datasets for userid: {self.judgment_api_key}'")
             payload = response.json()
             progress.update(
@@ -301,7 +301,6 @@ class EvalDatasetClient:
     def export_jsonl(self, alias: str, project_name: str) -> Response:
         """Export dataset in JSONL format from Judgment platform"""
-        debug(f"Exporting dataset with alias '{alias}' as JSONL")
         with Progress(
             SpinnerColumn(style="rgb(106,0,255)"),
             TextColumn("[progress.description]{task.description}"),
@@ -326,15 +325,14 @@ class EvalDatasetClient:
                 response.raise_for_status()
             except exceptions.HTTPError as err:
                 if err.response.status_code == 404:
-                    error(f"Dataset not found: {alias}")
+                    judgeval_logger.error(f"Dataset not found: {alias}")
                 else:
-                    error(f"HTTP error during export: {err}")
+                    judgeval_logger.error(f"HTTP error during export: {err}")
                 raise
             except Exception as e:
-                error(f"Error during export: {str(e)}")
+                judgeval_logger.error(f"Error during export: {str(e)}")
                 raise
-            info(f"Successfully exported dataset with alias '{alias}'")
             progress.update(
                 task_id,
                 description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",

judgeval 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl

judgeval 0.0.51py3-none-any.whl → 0.0.53py3-none-any.whl