PyPI - judgeval - Versions diffs - 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl - Mend

judgeval 0.0.44py3-none-any.whl → 0.0.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

judgeval/__init__.py +5 -4
judgeval/clients.py +6 -6
judgeval/common/__init__.py +7 -2
judgeval/common/exceptions.py +2 -3
judgeval/common/logger.py +74 -49
judgeval/common/s3_storage.py +30 -23
judgeval/common/tracer.py +1273 -939
judgeval/common/utils.py +416 -244
judgeval/constants.py +73 -61
judgeval/data/__init__.py +1 -1
judgeval/data/custom_example.py +3 -2
judgeval/data/datasets/dataset.py +80 -54
judgeval/data/datasets/eval_dataset_client.py +131 -181
judgeval/data/example.py +67 -43
judgeval/data/result.py +11 -9
judgeval/data/scorer_data.py +4 -2
judgeval/data/tool.py +25 -16
judgeval/data/trace.py +57 -29
judgeval/data/trace_run.py +5 -11
judgeval/evaluation_run.py +22 -82
judgeval/integrations/langgraph.py +546 -184
judgeval/judges/base_judge.py +1 -2
judgeval/judges/litellm_judge.py +33 -11
judgeval/judges/mixture_of_judges.py +128 -78
judgeval/judges/together_judge.py +22 -9
judgeval/judges/utils.py +14 -5
judgeval/judgment_client.py +259 -271
judgeval/rules.py +169 -142
judgeval/run_evaluation.py +462 -305
judgeval/scorers/api_scorer.py +20 -11
judgeval/scorers/exceptions.py +1 -0
judgeval/scorers/judgeval_scorer.py +77 -58
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +46 -15
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +12 -11
judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +7 -5
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +5 -2
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +2 -1
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +17 -8
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +3 -2
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +8 -9
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +5 -5
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +5 -2
judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +9 -10
judgeval/scorers/prompt_scorer.py +48 -37
judgeval/scorers/score.py +86 -53
judgeval/scorers/utils.py +11 -7
judgeval/tracer/__init__.py +1 -1
judgeval/utils/alerts.py +23 -12
judgeval/utils/{data_utils.py → file_utils.py} +5 -9
judgeval/utils/requests.py +29 -0
judgeval/version_check.py +5 -2
{judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/METADATA +79 -135
judgeval-0.0.46.dist-info/RECORD +69 -0
judgeval-0.0.44.dist-info/RECORD +0 -68
{judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/WHEEL +0 -0
{judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/licenses/LICENSE.md +0 -0

judgeval/constants.py CHANGED Viewed

@@ -6,13 +6,15 @@ from enum import Enum
 import litellm
 import os
-class APIScorer(str, Enum):
+class APIScorer(str, Enum):
     """
     Collection of proprietary scorers implemented by Judgment.
     These are ready-made evaluation scorers that can be used to evaluate
     Examples via the Judgment API.
     """
     FAITHFULNESS = "faithfulness"
     ANSWER_RELEVANCY = "answer_relevancy"
     ANSWER_CORRECTNESS = "answer_correctness"
@@ -30,6 +32,7 @@ class APIScorer(str, Enum):
     TOOL_ORDER = "tool_order"
     CLASSIFIER = "classifier"
     TOOL_DEPENDENCY = "tool_dependency"
     @classmethod
     def _missing_(cls, value):
         # Handle case-insensitive lookup
@@ -37,7 +40,10 @@ class APIScorer(str, Enum):
             if member.value == value.lower():
                 return member
-UNBOUNDED_SCORERS = set([APIScorer.COMPARISON])  # scorers whose scores are not bounded between 0-1
+UNBOUNDED_SCORERS = set(
+    [APIScorer.COMPARISON]
+)  # scorers whose scores are not bounded between 0-1
 ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
 # API URLs
@@ -52,87 +58,93 @@ JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_p
 JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
 JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
 JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
-JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
+JUDGMENT_EVAL_DELETE_API_URL = (
+    f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
+)
 JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
 JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
 JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
 JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
 JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
 JUDGMENT_TRACES_UPSERT_API_URL = f"{ROOT_API}/traces/upsert/"
-JUDGMENT_TRACES_USAGE_CHECK_API_URL = f"{ROOT_API}/traces/usage/check/"
-JUDGMENT_TRACES_USAGE_UPDATE_API_URL = f"{ROOT_API}/traces/usage/update/"
 JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
 JUDGMENT_TRACES_ADD_ANNOTATION_API_URL = f"{ROOT_API}/traces/add_annotation/"
 JUDGMENT_TRACES_SPANS_BATCH_API_URL = f"{ROOT_API}/traces/spans/batch/"
-JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = f"{ROOT_API}/traces/evaluation_runs/batch/"
+JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = (
+    f"{ROOT_API}/traces/evaluation_runs/batch/"
+)
 JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
 JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
 # RabbitMQ
-RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "rabbitmq-networklb-faa155df16ec9085.elb.us-west-1.amazonaws.com")
+RABBITMQ_HOST = os.getenv(
+    "RABBITMQ_HOST", "rabbitmq-networklb-faa155df16ec9085.elb.us-west-1.amazonaws.com"
+)
 RABBITMQ_PORT = os.getenv("RABBITMQ_PORT", 5672)
 RABBITMQ_QUEUE = os.getenv("RABBITMQ_QUEUE", "task_queue")
 # Models
 LITELLM_SUPPORTED_MODELS = set(litellm.model_list)
 TOGETHER_SUPPORTED_MODELS = [
-  "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
-  "Qwen/Qwen2-VL-72B-Instruct",
-  "meta-llama/Llama-Vision-Free",
-  "Gryphe/MythoMax-L2-13b",
-  "Qwen/Qwen2.5-72B-Instruct-Turbo",
-  "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
-  "deepseek-ai/DeepSeek-R1",
-  "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-  "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-  "google/gemma-2-27b-it",
-  "mistralai/Mistral-Small-24B-Instruct-2501",
-  "mistralai/Mixtral-8x22B-Instruct-v0.1",
-  "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-  "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-classifier",
-  "deepseek-ai/DeepSeek-V3",
-  "Qwen/Qwen2-72B-Instruct",
-  "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
-  "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
-  "upstage/SOLAR-10.7B-Instruct-v1.0",
-  "togethercomputer/MoA-1",
-  "Qwen/QwQ-32B-Preview",
-  "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-  "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-  "mistralai/Mistral-7B-Instruct-v0.2",
-  "databricks/dbrx-instruct",
-  "meta-llama/Llama-3-8b-chat-hf",
-  "google/gemma-2b-it",
-  "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
-  "google/gemma-2-9b-it",
-  "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-  "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-p",
-  "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-  "Gryphe/MythoMax-L2-13b-Lite",
-  "meta-llama/Llama-2-7b-chat-hf",
-  "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-  "meta-llama/Llama-2-13b-chat-hf",
-  "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
-  "scb10x/scb10x-llama3-typhoon-v1-5x-4f316",
-  "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
-  "Qwen/Qwen2.5-Coder-32B-Instruct",
-  "microsoft/WizardLM-2-8x22B",
-  "mistralai/Mistral-7B-Instruct-v0.3",
-  "scb10x/scb10x-llama3-1-typhoon2-60256",
-  "Qwen/Qwen2.5-7B-Instruct-Turbo",
-  "scb10x/scb10x-llama3-1-typhoon-18370",
-  "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-  "meta-llama/Llama-3-70b-chat-hf",
-  "mistralai/Mixtral-8x7B-Instruct-v0.1",
-  "togethercomputer/MoA-1-Turbo",
-  "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
-  "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
-  "mistralai/Mistral-7B-Instruct-v0.1"
+    "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
+    "Qwen/Qwen2-VL-72B-Instruct",
+    "meta-llama/Llama-Vision-Free",
+    "Gryphe/MythoMax-L2-13b",
+    "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+    "deepseek-ai/DeepSeek-R1",
+    "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
+    "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+    "google/gemma-2-27b-it",
+    "mistralai/Mistral-Small-24B-Instruct-2501",
+    "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-classifier",
+    "deepseek-ai/DeepSeek-V3",
+    "Qwen/Qwen2-72B-Instruct",
+    "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
+    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+    "upstage/SOLAR-10.7B-Instruct-v1.0",
+    "togethercomputer/MoA-1",
+    "Qwen/QwQ-32B-Preview",
+    "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "databricks/dbrx-instruct",
+    "meta-llama/Llama-3-8b-chat-hf",
+    "google/gemma-2b-it",
+    "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
+    "google/gemma-2-9b-it",
+    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-p",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    "Gryphe/MythoMax-L2-13b-Lite",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+    "meta-llama/Llama-2-13b-chat-hf",
+    "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct",
+    "scb10x/scb10x-llama3-typhoon-v1-5x-4f316",
+    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
+    "Qwen/Qwen2.5-Coder-32B-Instruct",
+    "microsoft/WizardLM-2-8x22B",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "scb10x/scb10x-llama3-1-typhoon2-60256",
+    "Qwen/Qwen2.5-7B-Instruct-Turbo",
+    "scb10x/scb10x-llama3-1-typhoon-18370",
+    "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+    "meta-llama/Llama-3-70b-chat-hf",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "togethercomputer/MoA-1-Turbo",
+    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+    "mistralai/Mistral-7B-Instruct-v0.1",
 ]
 JUDGMENT_SUPPORTED_MODELS = {"osiris-large", "osiris-mini", "osiris"}
-ACCEPTABLE_MODELS = set(litellm.model_list) | set(TOGETHER_SUPPORTED_MODELS) | JUDGMENT_SUPPORTED_MODELS
+ACCEPTABLE_MODELS = (
+    set(litellm.model_list) | set(TOGETHER_SUPPORTED_MODELS) | JUDGMENT_SUPPORTED_MODELS
+)
 ## System settings
 MAX_WORKER_THREADS = 10

judgeval/data/__init__.py CHANGED Viewed

@@ -15,5 +15,5 @@ __all__ = [
     "generate_scoring_result",
     "Trace",
     "TraceSpan",
-    "TraceUsage"
+    "TraceUsage",
 ]

judgeval/data/custom_example.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from pydantic import BaseModel, Field
-from typing import Optional, Union, List, Dict, Any
+from typing import Optional, List, Dict, Any
 from uuid import uuid4
 class CustomExample(BaseModel):
     input: Optional[Dict[str, Any]] = None
     actual_output: Optional[Dict[str, Any]] = None
@@ -15,4 +16,4 @@ class CustomExample(BaseModel):
     example_id: str = Field(default_factory=lambda: str(uuid4()))
     example_index: Optional[int] = None
     timestamp: Optional[str] = None
-    trace_id: Optional[str] = None
+    trace_id: Optional[str] = None

judgeval/data/datasets/dataset.py CHANGED Viewed

@@ -9,6 +9,8 @@ from typing import List, Union, Literal, Optional
 from judgeval.data import Example, Trace
 from judgeval.common.logger import debug, error, warning, info
+from judgeval.utils.file_utils import get_examples_from_yaml
 @dataclass
 class EvalDataset:
@@ -18,12 +20,14 @@ class EvalDataset:
     _id: Union[str, None] = field(default=None)
     judgment_api_key: str = field(default="")
     organization_id: str = field(default="")
-    def __init__(self,
-                 judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
-                 organization_id: str = os.getenv("JUDGMENT_ORG_ID"),
-                 examples: Optional[List[Example]] = None,
-                 traces: Optional[List[Trace]] = None
-                 ):
+    def __init__(
+        self,
+        judgment_api_key: str = os.getenv("JUDGMENT_API_KEY", ""),
+        organization_id: str = os.getenv("JUDGMENT_ORG_ID", ""),
+        examples: Optional[List[Example]] = None,
+        traces: Optional[List[Trace]] = None,
+    ):
         if not judgment_api_key:
             warning("No judgment_api_key provided")
         self.examples = examples or []
@@ -88,14 +92,14 @@ class EvalDataset:
         new_examples = [Example(**e) for e in examples]
         for e in new_examples:
             self.add_example(e)
     def add_from_csv(
-        self,
+        self,
         file_path: str,
         header_mapping: dict,
         primary_delimiter: str = ",",
-        secondary_delimiter: str = ";"
-        ) -> None:
+        secondary_delimiter: str = ";",
+    ) -> None:
         """
         Add Examples from a CSV file.
@@ -111,9 +115,9 @@ class EvalDataset:
             raise ModuleNotFoundError(
                 "Please install pandas to use this method. 'pip install pandas'"
             )
         # Pandas naturally reads numbers in data files as ints, not strings (can lead to unexpected behavior)
-        df = pd.read_csv(file_path, dtype={'trace_id': str}, sep=primary_delimiter)
+        df = pd.read_csv(file_path, dtype={"trace_id": str}, sep=primary_delimiter)
         """
         The user should pass in a dict mapping from Judgment Example headers to their custom defined headers.
         Available headers for Example objects are as follows:
@@ -131,42 +135,55 @@ class EvalDataset:
         This can be adjusted using the `secondary_delimiter` parameter.
         """
         examples = []
         def process_csv_row(value, header):
             """
             Maps a singular value in the CSV file to the appropriate type based on the header.
             If value exists and can be split into type List[*], we will split upon the user's provided secondary delimiter.
             """
             # check that the CSV value is not null for entry
-            null_replacement = dict() if header == 'additional_metadata' else None
-            if pd.isna(value) or value == '':
+            null_replacement = dict() if header == "additional_metadata" else None
+            if pd.isna(value) or value == "":
                 return null_replacement
             try:
-                value = ast.literal_eval(value) if header == 'additional_metadata' else str(value)
+                value = (
+                    ast.literal_eval(value)
+                    if header == "additional_metadata"
+                    else str(value)
+                )
             except (ValueError, SyntaxError):
                 value = str(value)
-            if header in ["context", "retrieval_context", "tools_called", "expected_tools"]:
+            if header in [
+                "context",
+                "retrieval_context",
+                "tools_called",
+                "expected_tools",
+            ]:
                 # attempt to split the value by the secondary delimiter
                 value = value.split(secondary_delimiter)
             return value
         for _, row in df.iterrows():
             data = {
-                header: process_csv_row(
-                    row[header_mapping[header]], header
-                )
+                header: process_csv_row(row[header_mapping[header]], header)
                 for header in header_mapping
             }
             if "example" in header_mapping and row[header_mapping["example"]]:
                 if "name" in header_mapping:
-                    data["name"] = row[header_mapping["name"]] if pd.notna(row[header_mapping["name"]]) else None
+                    data["name"] = (
+                        row[header_mapping["name"]]
+                        if pd.notna(row[header_mapping["name"]])
+                        else None
+                    )
                 # every Example has `input` and `actual_output` fields
                 if data["input"] is not None and data["actual_output"] is not None:
                     e = Example(**data)
                     examples.append(e)
                 else:
-                    raise ValueError("Every example must have an 'input' and 'actual_output' field.")
+                    raise ValueError(
+                        "Every example must have an 'input' and 'actual_output' field."
+                    )
         for e in examples:
             self.add_example(e)
@@ -201,32 +218,25 @@ class EvalDataset:
             timestamp: "20241230_160117"
             trace_id: "123"
         """
-        try:
-            with open(file_path, "r") as file:
-                payload = yaml.safe_load(file)
-                if payload is None:
-                    raise ValueError("The YAML file is empty.")
-                examples = payload.get("examples", [])
-        except FileNotFoundError:
-            error(f"YAML file not found: {file_path}")
-            raise FileNotFoundError(f"The file {file_path} was not found.")
-        except yaml.YAMLError:
-            error(f"Invalid YAML file: {file_path}")
-            raise ValueError(f"The file {file_path} is not a valid YAML file.")
+        examples = get_examples_from_yaml(file_path)
         info(f"Added {len(examples)} examples from YAML")
-        new_examples = [Example(**e) for e in examples]
-        for e in new_examples:
+        for e in examples:
             self.add_example(e)
     def add_example(self, e: Example) -> None:
         self.examples.append(e)
         # TODO if we need to add rank, then we need to do it here
     def add_trace(self, t: Trace) -> None:
         self.traces.append(t)
-    def save_as(self, file_type: Literal["json", "csv", "yaml"], dir_path: str, save_name: str = None) -> None:
+    def save_as(
+        self,
+        file_type: Literal["json", "csv", "yaml"],
+        dir_path: str,
+        save_name: str | None = None,
+    ) -> None:
         """
         Saves the dataset as a file. Save only the examples.
@@ -237,7 +247,11 @@ class EvalDataset:
         """
         if not os.path.exists(dir_path):
             os.makedirs(dir_path)
-        file_name = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") if save_name is None else save_name
+        file_name = (
+            datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+            if save_name is None
+            else save_name
+        )
         complete_path = os.path.join(dir_path, f"{file_name}.{file_type}")
         if file_type == "json":
             with open(complete_path, "w") as file:
@@ -251,12 +265,23 @@ class EvalDataset:
         elif file_type == "csv":
             with open(complete_path, "w", newline="") as file:
                 writer = csv.writer(file)
-                writer.writerow([
-                    "input", "actual_output", "expected_output", "context", \
-                    "retrieval_context", "additional_metadata", "tools_called", \
-                    "expected_tools", "name", "comments", "source_file", "example", \
-                    "trace_id"
-                ])
+                writer.writerow(
+                    [
+                        "input",
+                        "actual_output",
+                        "expected_output",
+                        "context",
+                        "retrieval_context",
+                        "additional_metadata",
+                        "tools_called",
+                        "expected_tools",
+                        "name",
+                        "comments",
+                        "source_file",
+                        "example",
+                        "trace_id",
+                    ]
+                )
                 for e in self.examples:
                     writer.writerow(
                         [
@@ -274,8 +299,7 @@ class EvalDataset:
                             True,  # Adding an Example
                         ]
                     )
         elif file_type == "yaml":
             with open(complete_path, "w") as file:
                 yaml_data = {
@@ -300,14 +324,16 @@ class EvalDataset:
                 yaml.dump(yaml_data, file, default_flow_style=False)
         else:
             ACCEPTABLE_FILE_TYPES = ["json", "csv", "yaml"]
-            raise TypeError(f"Invalid file type: {file_type}. Please choose from {ACCEPTABLE_FILE_TYPES}")
+            raise TypeError(
+                f"Invalid file type: {file_type}. Please choose from {ACCEPTABLE_FILE_TYPES}"
+            )
     def __iter__(self):
         return iter(self.examples)
     def __len__(self):
         return len(self.examples)
     def __str__(self):
         return (
             f"{self.__class__.__name__}("
@@ -316,4 +342,4 @@ class EvalDataset:
             f"_alias={self._alias}, "
             f"_id={self._id}"
             f")"
-        )
+        )

judgeval 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl

judgeval 0.0.44py3-none-any.whl → 0.0.46py3-none-any.whl