PyPI - judgeval - Versions diffs - 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl - Mend

judgeval 0.0.35py3-none-any.whl → 0.0.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

judgeval/common/tracer.py +352 -118
judgeval/constants.py +3 -2
judgeval/data/datasets/dataset.py +3 -0
judgeval/data/datasets/eval_dataset_client.py +63 -3
judgeval/integrations/langgraph.py +1961 -299
judgeval/judgment_client.py +8 -2
judgeval/run_evaluation.py +67 -18
judgeval/scorers/score.py +1 -0
{judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/METADATA +1 -2
{judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/RECORD +12 -12
{judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/WHEEL +0 -0
{judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/licenses/LICENSE.md +0 -0

judgeval/constants.py CHANGED Viewed

@@ -42,14 +42,15 @@ ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
 JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
 JUDGMENT_SEQUENCE_EVAL_API_URL = f"{ROOT_API}/evaluate_sequence/"
 JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
-JUDGMENT_DATASETS_APPEND_API_URL = f"{ROOT_API}/datasets/insert_examples/"
+JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
+JUDGMENT_DATASETS_APPEND_SEQUENCES_API_URL = f"{ROOT_API}/datasets/insert_sequences/"
 JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
 JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
 JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
 JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
 JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
 JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
-JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_eval_results/"
+JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
 JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
 JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
 JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"

judgeval/data/datasets/dataset.py CHANGED Viewed

@@ -224,6 +224,9 @@ class EvalDataset:
         self.examples = self.examples + [e]
         # TODO if we need to add rank, then we need to do it here
+    def add_sequence(self, s: Sequence) -> None:
+        self.sequences = self.sequences + [s]
     def save_as(self, file_type: Literal["json", "csv", "yaml"], dir_path: str, save_name: str = None) -> None:
         """
         Saves the dataset as a file. Save only the examples.

judgeval/data/datasets/eval_dataset_client.py CHANGED Viewed

@@ -6,7 +6,8 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
 from judgeval.common.logger import debug, error, warning, info
 from judgeval.constants import (
     JUDGMENT_DATASETS_PUSH_API_URL,
-    JUDGMENT_DATASETS_APPEND_API_URL,
+    JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
+    JUDGMENT_DATASETS_APPEND_SEQUENCES_API_URL,
     JUDGMENT_DATASETS_PULL_API_URL,
     JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
     JUDGMENT_DATASETS_DELETE_API_URL,
@@ -58,6 +59,8 @@ class EvalDatasetClient:
                     "dataset_alias": alias,
                     "project_name": project_name,
                     "examples": [e.to_dict() for e in dataset.examples],
+                    "sequences": [s.model_dump() for s in dataset.sequences],
+                    "is_sequence": len(dataset.sequences) > 0,
                     "overwrite": overwrite,
                 }
             try:
@@ -92,7 +95,7 @@ class EvalDatasetClient:
             return True
-    def append(self, alias: str, examples: List[Example], project_name: str) -> bool:
+    def append_examples(self, alias: str, examples: List[Example], project_name: str) -> bool:
         debug(f"Appending dataset with alias '{alias}'")
         """
         Appends the dataset to Judgment platform
@@ -124,7 +127,7 @@ class EvalDatasetClient:
                 }
             try:
                 response = requests.post(
-                    JUDGMENT_DATASETS_APPEND_API_URL,
+                    JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
                     json=content,
                     headers={
                         "Content-Type": "application/json",
@@ -149,6 +152,63 @@ class EvalDatasetClient:
                 )
             return True
+    def append_sequences(self, alias: str, sequences: List[Sequence], project_name: str) -> bool:
+        debug(f"Appending dataset with alias '{alias}'")
+        """
+        Appends the dataset to Judgment platform
+        Mock request:
+        dataset = {
+            "alias": alias,
+            "examples": [...],
+            "project_name": project_name
+        } ==>
+        {
+            "_alias": alias,
+            "_id": "..."  # ID of the dataset
+        }
+        """
+        with Progress(
+            SpinnerColumn(style="rgb(106,0,255)"),
+            TextColumn("[progress.description]{task.description}"),
+            transient=False,
+        ) as progress:
+            task_id = progress.add_task(
+                f"Appending [rgb(106,0,255)]'{alias}' to Judgment...",
+                total=100,
+            )
+            content = {
+                    "dataset_alias": alias,
+                    "project_name": project_name,
+                    "sequences": [s.model_dump() for s in sequences],
+                }
+            try:
+                response = requests.post(
+                    JUDGMENT_DATASETS_APPEND_SEQUENCES_API_URL,
+                    json=content,
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self.judgment_api_key}",
+                        "X-Organization-Id": self.organization_id
+                    },
+                    verify=True
+                )
+                if response.status_code != 200:
+                    error(f"Server error during append: {response.json()}")
+                    raise Exception(f"Server error during append: {response.json()}")
+                response.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                if response.status_code == 422:
+                    error(f"Validation error during append: {err.response.json()}")
+                else:
+                    error(f"HTTP error during append: {err}")
+            progress.update(
+                    task_id,
+                    description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
+                )
+            return True
     def pull(self, alias: str, project_name: str) -> EvalDataset:
         debug(f"Pulling dataset with alias '{alias}'")
         """

judgeval 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl

judgeval 0.0.35py3-none-any.whl → 0.0.36py3-none-any.whl