PyPI - judgeval - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

judgeval 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

judgeval/common/tracer.py +19 -7
judgeval/constants.py +2 -0
judgeval/data/datasets/dataset.py +5 -3
judgeval/data/datasets/eval_dataset_client.py +114 -10
judgeval/data/example.py +20 -5
judgeval/evaluation_run.py +1 -0
judgeval/judgment_client.py +40 -11
judgeval/run_evaluation.py +23 -8
{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/METADATA +1 -1
{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/RECORD +12 -12
{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/WHEEL +0 -0
{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/tracer.py CHANGED Viewed

@@ -188,8 +188,9 @@ class TraceManagerClient:
     - Saving a trace
     - Deleting a trace
     """
-    def __init__(self, judgment_api_key: str):
+    def __init__(self, judgment_api_key: str, organization_id: str):
         self.judgment_api_key = judgment_api_key
+        self.organization_id = organization_id
     def fetch_trace(self, trace_id: str):
         """
@@ -199,10 +200,11 @@ class TraceManagerClient:
             JUDGMENT_TRACES_FETCH_API_URL,
             json={
                 "trace_id": trace_id,
-                "judgment_api_key": self.judgment_api_key,
             },
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
@@ -225,6 +227,8 @@ class TraceManagerClient:
             json=trace_data,
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
@@ -243,11 +247,12 @@ class TraceManagerClient:
         response = requests.delete(
             JUDGMENT_TRACES_DELETE_API_URL,
             json={
-                "judgment_api_key": self.judgment_api_key,
                 "trace_ids": [trace_id],
             },
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
@@ -263,11 +268,12 @@ class TraceManagerClient:
         response = requests.delete(
             JUDGMENT_TRACES_DELETE_API_URL,
             json={
-                "judgment_api_key": self.judgment_api_key,
                 "trace_ids": trace_ids,
             },
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
             }
         )
@@ -290,7 +296,7 @@ class TraceClient:
         self.span_type = None
         self._current_span: Optional[TraceEntry] = None
         self.overwrite = overwrite
-        self.trace_manager_client = TraceManagerClient(tracer.api_key)  # Manages DB operations for trace data
+        self.trace_manager_client = TraceManagerClient(tracer.api_key, tracer.organization_id)  # Manages DB operations for trace data
     @contextmanager
     def span(self, name: str, span_type: SpanType = "span"):
@@ -367,6 +373,7 @@ class TraceClient:
             raise ValueError(f"Failed to load scorers: {str(e)}")
         eval_run = EvaluationRun(
+            organization_id=self.tracer.organization_id,
             log_results=log_results,
             project_name=self.project_name,
             eval_name=f"{self.name.capitalize()}-"
@@ -542,7 +549,6 @@ class TraceClient:
         # Create trace document
         trace_data = {
             "trace_id": self.trace_id,
-            "api_key": self.tracer.api_key,
             "name": self.name,
             "project_name": self.project_name,
             "created_at": datetime.fromtimestamp(self.start_time).isoformat(),
@@ -564,6 +570,8 @@ class TraceClient:
             channel = connection.channel()
             channel.queue_declare(queue=RABBITMQ_QUEUE, durable=True)
+            trace_data["judgment_api_key"] = self.tracer.api_key
+            trace_data["organization_id"] = self.tracer.organization_id
             channel.basic_publish(
                 exchange='',
@@ -589,14 +597,18 @@ class Tracer:
             cls._instance = super(Tracer, cls).__new__(cls)
         return cls._instance
-    def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
+    def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project", organization_id: str = os.getenv("ORGANIZATION_ID")):
         if not hasattr(self, 'initialized'):
             if not api_key:
                 raise ValueError("Tracer must be configured with a Judgment API key")
+            if not organization_id:
+                raise ValueError("Tracer must be configured with an Organization ID")
             self.api_key: str = api_key
             self.project_name: str = project_name
             self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
+            self.organization_id: str = organization_id
             self.depth: int = 0
             self._current_trace: Optional[str] = None
             self.initialized: bool = True

judgeval/constants.py CHANGED Viewed

@@ -36,7 +36,9 @@ ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
 JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
 JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
 JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull/"
+JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
 JUDGMENT_DATASETS_PULL_ALL_API_URL = f"{ROOT_API}/datasets/get_all_stats/"
+JUDGMENT_DATASETS_EDIT_API_URL = f"{ROOT_API}/datasets/edit/"
 JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
 JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_eval_results/"
 JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_name/"

judgeval/data/datasets/dataset.py CHANGED Viewed

@@ -17,9 +17,10 @@ class EvalDataset:
     _alias: Union[str, None] = field(default=None)
     _id: Union[str, None] = field(default=None)
     judgment_api_key: str = field(default="")
+    organization_id: str = field(default="")
     def __init__(self,
                  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
+                 organization_id: str = os.getenv("ORGANIZATION_ID"),
                  ground_truths: List[GroundTruthExample] = [],
                  examples: List[Example] = [],
                  ):
@@ -31,7 +32,7 @@ class EvalDataset:
         self._alias = None
         self._id = None
         self.judgment_api_key = judgment_api_key
+        self.organization_id = organization_id
     def add_from_json(self, file_path: str) -> None:
         debug(f"Loading dataset from JSON file: {file_path}")
@@ -162,7 +163,8 @@ class EvalDataset:
                 "additional_metadata": ast.literal_eval(row["additional_metadata"]) if pd.notna(row["additional_metadata"]) else dict(),
                 "tools_called": row["tools_called"].split(";") if pd.notna(row["tools_called"]) else [],
                 "expected_tools": row["expected_tools"].split(";") if pd.notna(row["expected_tools"]) else [],
-                "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None
+                "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None,
+                "example_id": str(row["example_id"]) if pd.notna(row["example_id"]) else None
             }
             if row["example"]:
                 data["name"] = row["name"] if pd.notna(row["name"]) else None

judgeval/data/datasets/eval_dataset_client.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from typing import Optional
+from typing import Optional, List
 import requests
 from rich.progress import Progress, SpinnerColumn, TextColumn
@@ -7,7 +7,9 @@ from judgeval.common.logger import debug, error, warning, info
 from judgeval.constants import (
     JUDGMENT_DATASETS_PUSH_API_URL,
     JUDGMENT_DATASETS_PULL_API_URL,
-    JUDGMENT_DATASETS_PULL_ALL_API_URL
+    JUDGMENT_DATASETS_PULL_ALL_API_URL,
+    JUDGMENT_DATASETS_EDIT_API_URL,
+    JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
 )
 from judgeval.data import Example
 from judgeval.data.datasets import EvalDataset
@@ -17,13 +19,14 @@ from judgeval.data.datasets.ground_truth import GroundTruthExample
 class EvalDatasetClient:
-    def __init__(self, judgment_api_key: str):
+    def __init__(self, judgment_api_key: str, organization_id: str):
         self.judgment_api_key = judgment_api_key
+        self.organization_id = organization_id
     def create_dataset(self) -> EvalDataset:
         return EvalDataset(judgment_api_key=self.judgment_api_key)
-    def push(self, dataset: EvalDataset, alias: str,overwrite: Optional[bool] = False) -> bool:
+    def push(self, dataset: EvalDataset, alias: str, overwrite: Optional[bool] = False) -> bool:
         debug(f"Pushing dataset with alias '{alias}' (overwrite={overwrite})")
         if overwrite:
             warning(f"Overwrite enabled for alias '{alias}'")
@@ -56,12 +59,16 @@ class EvalDatasetClient:
                     "ground_truths": [g.to_dict() for g in dataset.ground_truths],
                     "examples": [e.to_dict() for e in dataset.examples],
                     "overwrite": overwrite,
-                    "judgment_api_key": dataset.judgment_api_key
                 }
             try:
                 response = requests.post(
                     JUDGMENT_DATASETS_PUSH_API_URL,
-                    json=content
+                    json=content,
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self.judgment_api_key}",
+                        "X-Organization-Id": self.organization_id
+                    }
                 )
                 if response.status_code == 500:
                     error(f"Server error during push: {content.get('message')}")
@@ -115,13 +122,17 @@ class EvalDatasetClient:
                 )
                 request_body = {
                     "alias": alias,
-                    "judgment_api_key": self.judgment_api_key
                 }
                 try:
                     response = requests.post(
                         JUDGMENT_DATASETS_PULL_API_URL,
-                        json=request_body
+                        json=request_body,
+                        headers={
+                            "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
+                        }
                     )
                     response.raise_for_status()
                 except requests.exceptions.RequestException as e:
@@ -169,13 +180,17 @@ class EvalDatasetClient:
                     total=100,
                 )
                 request_body = {
-                    "judgment_api_key": self.judgment_api_key
                 }
                 try:
                     response = requests.post(
                         JUDGMENT_DATASETS_PULL_ALL_API_URL,
-                        json=request_body
+                        json=request_body,
+                        headers={
+                            "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
+                        }
                     )
                     response.raise_for_status()
                 except requests.exceptions.RequestException as e:
@@ -191,3 +206,92 @@ class EvalDatasetClient:
                 )
                 return payload
+    def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
+        """
+        Edits the dataset on Judgment platform by adding new examples and ground truths
+        Mock request:
+        {
+            "alias": alias,
+            "examples": [...],
+            "ground_truths": [...],
+            "judgment_api_key": self.judgment_api_key
+        }
+        """
+        with Progress(
+                SpinnerColumn(style="rgb(106,0,255)"),
+                TextColumn("[progress.description]{task.description}"),
+                transient=False,
+            ) as progress:
+            task_id = progress.add_task(
+                f"Editing dataset [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] on Judgment...",
+                total=100,
+            )
+            content = {
+                "alias": alias,
+                "examples": [e.to_dict() for e in examples],
+                "ground_truths": [g.to_dict() for g in ground_truths],
+                "judgment_api_key": self.judgment_api_key
+            }
+            try:
+                response = requests.post(
+                    JUDGMENT_DATASETS_EDIT_API_URL,
+                    json=content,
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self.judgment_api_key}",
+                        "X-Organization-Id": self.organization_id
+                    }
+                )
+                response.raise_for_status()
+            except requests.exceptions.RequestException as e:
+                error(f"Error editing dataset: {str(e)}")
+                return False
+            info(f"Successfully edited dataset '{alias}'")
+            return True
+    def export_jsonl(self, alias: str) -> requests.Response:
+        """Export dataset in JSONL format from Judgment platform"""
+        debug(f"Exporting dataset with alias '{alias}' as JSONL")
+        with Progress(
+            SpinnerColumn(style="rgb(106,0,255)"),
+            TextColumn("[progress.description]{task.description}"),
+            transient=False,
+        ) as progress:
+            task_id = progress.add_task(
+                f"Exporting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] as JSONL...",
+                total=100,
+            )
+            try:
+                response = requests.post(
+                    JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
+                    json={"alias": alias},
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self.judgment_api_key}",
+                        "X-Organization-Id": self.organization_id
+                    },
+                    stream=True
+                )
+                response.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                if err.response.status_code == 404:
+                    error(f"Dataset not found: {alias}")
+                else:
+                    error(f"HTTP error during export: {err}")
+                raise
+            except Exception as e:
+                error(f"Error during export: {str(e)}")
+                raise
+            info(f"Successfully exported dataset with alias '{alias}'")
+            progress.update(
+                task_id,
+                description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
+            )
+            return response

judgeval/data/example.py CHANGED Viewed

@@ -4,9 +4,11 @@ Classes for representing examples in a dataset.
 from typing import TypeVar, Optional, Any, Dict, List
-from pydantic import BaseModel
+from uuid import uuid4
+from pydantic import BaseModel, Field, field_validator
 from enum import Enum
 from datetime import datetime
+import time
 Input = TypeVar('Input')
@@ -33,15 +35,26 @@ class Example(BaseModel):
     tools_called: Optional[List[str]] = None
     expected_tools: Optional[List[str]] = None
     name: Optional[str] = None
-    example_id: Optional[str] = None
+    example_id: str = Field(default_factory=lambda: str(uuid4()))
+    example_index: Optional[int] = None
     timestamp: Optional[str] = None
     trace_id: Optional[str] = None
+    @field_validator('input', 'actual_output', mode='before')
+    def convert_to_str(cls, value):
+        try:
+            return str(value)
+        except Exception:
+            return repr(value)
     def __init__(self, **data):
-        super().__init__(**data)
+        if 'example_id' not in data:
+            data['example_id'] = str(uuid4())
         # Set timestamp if not provided
-        if self.timestamp is None:
-            self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        if 'timestamp' not in data:
+            data['timestamp'] = datetime.now().strftime("%Y%m%d_%H%M%S")
+        super().__init__(**data)
     def to_dict(self):
         return {
@@ -55,6 +68,7 @@ class Example(BaseModel):
             "expected_tools": self.expected_tools,
             "name": self.name,
             "example_id": self.example_id,
+            "example_index": self.example_index,
             "timestamp": self.timestamp,
             "trace_id": self.trace_id
         }
@@ -71,6 +85,7 @@ class Example(BaseModel):
             f"expected_tools={self.expected_tools}, "
             f"name={self.name}, "
             f"example_id={self.example_id}, "
+            f"example_index={self.example_index}, "
             f"timestamp={self.timestamp}, "
             f"trace_id={self.trace_id})"
         )

judgeval/evaluation_run.py CHANGED Viewed

@@ -24,6 +24,7 @@ class EvaluationRun(BaseModel):
     # The user will specify whether they want log_results when they call run_eval
     log_results: bool = False  # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
+    organization_id: Optional[str] = None
     project_name: Optional[str] = None
     eval_name: Optional[str] = None
     examples: List[Example]

judgeval/judgment_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Optional, List, Dict, Any, Union
 import requests
 from judgeval.constants import ROOT_API
-from judgeval.data.datasets import EvalDataset, EvalDatasetClient
+from judgeval.data.datasets import EvalDataset, EvalDatasetClient, GroundTruthExample
 from judgeval.data import (
     ScoringResult,
     Example
@@ -34,9 +34,10 @@ class EvalRunRequestBody(BaseModel):
 class JudgmentClient:
-    def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY")):
+    def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("ORGANIZATION_ID")):
         self.judgment_api_key = judgment_api_key
-        self.eval_dataset_client = EvalDatasetClient(judgment_api_key)
+        self.organization_id = organization_id
+        self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)
         # Verify API key is valid
         result, response = self._validate_api_key()
@@ -78,7 +79,8 @@ class JudgmentClient:
                 model=model,
                 aggregator=aggregator,
                 metadata=metadata,
-                judgment_api_key=self.judgment_api_key
+                judgment_api_key=self.judgment_api_key,
+                organization_id=self.organization_id
             )
             return run_eval(eval, override)
         except ValueError as e:
@@ -115,7 +117,8 @@ class JudgmentClient:
                 model=model,
                 aggregator=aggregator,
                 metadata=metadata,
-                judgment_api_key=self.judgment_api_key
+                judgment_api_key=self.judgment_api_key,
+                organization_id=self.organization_id
             )
             return run_eval(evaluation_run)
         except ValueError as e:
@@ -164,6 +167,11 @@ class JudgmentClient:
         """
         return self.eval_dataset_client.pull_all_user_dataset_stats()
+    def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
+        """
+        Edits the dataset on Judgment platform by adding new examples and ground truths
+        """
+        return self.eval_dataset_client.edit_dataset(alias, examples, ground_truths)
     # Maybe add option where you can pass in the EvaluationRun object and it will pull the eval results from the backend
     def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
@@ -182,6 +190,11 @@ class JudgmentClient:
                                                    eval_name=eval_run_name,
                                                    judgment_api_key=self.judgment_api_key)
         eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
+                                 headers={
+                                    "Content-Type": "application/json",
+                                    "Authorization": f"Bearer {self.judgment_api_key}",
+                                    "X-Organization-Id": self.organization_id
+                                 },
                                  json=eval_run_request_body.model_dump())
         if eval_run.status_code != requests.codes.ok:
             raise ValueError(f"Error fetching eval results: {eval_run.json()}")
@@ -213,6 +226,8 @@ class JudgmentClient:
                         json=eval_run_request_body.model_dump(),
                         headers={
                             "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
                         })
         if response.status_code != requests.codes.ok:
             raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -231,10 +246,12 @@ class JudgmentClient:
         response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
                         json={
                             "project_name": project_name,
-                            "judgment_api_key": self.judgment_api_key
+                            "judgment_api_key": self.judgment_api_key,
                         },
                         headers={
                             "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}",
+                            "X-Organization-Id": self.organization_id
                         })
         if response.status_code != requests.codes.ok:
             raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -246,7 +263,11 @@ class JudgmentClient:
         """
         response = requests.post(
             f"{ROOT_API}/validate_api_key/",
-            json={"api_key": self.judgment_api_key}
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+            },
+            json={}  # Empty body now
         )
         if response.status_code == 200:
             return True, response.json()
@@ -268,12 +289,16 @@ class JudgmentClient:
         """
         request_body = {
             "slug": slug,
-            "judgment_api_key": self.judgment_api_key
         }
         response = requests.post(
             f"{ROOT_API}/fetch_scorer/",
-            json=request_body
+            json=request_body,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
+            }
         )
         if response.status_code == 500:
@@ -306,13 +331,17 @@ class JudgmentClient:
             "name": scorer.name,
             "conversation": scorer.conversation,
             "options": scorer.options,
-            "judgment_api_key": self.judgment_api_key,
             "slug": slug
         }
         response = requests.post(
             f"{ROOT_API}/save_scorer/",
-            json=request_body
+            json=request_body,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+                "X-Organization-Id": self.organization_id
+            }
         )
         if response.status_code == 500:

judgeval/run_evaluation.py CHANGED Viewed

@@ -47,7 +47,13 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
     try:
         # submit API request to execute evals
         payload = evaluation_run.model_dump(warnings=False)
-        response = requests.post(JUDGMENT_EVAL_API_URL, json=payload)
+        response = requests.post(
+            JUDGMENT_EVAL_API_URL, headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
+            "X-Organization-Id": evaluation_run.organization_id
+        },
+        json=payload)
         response_data = response.json()
     except Exception as e:
         error(f"Error: {e}")
@@ -135,7 +141,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
     return results
-def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str) -> None:
+def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> None:
     """
     Checks if an evaluation run name already exists for a given project.
@@ -151,6 +157,11 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
     try:
         response = requests.post(
             f"{ROOT_API}/eval-run-name-exists/",
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {judgment_api_key}",
+                "X-Organization-Id": organization_id
+            },
             json={
                 "eval_name": eval_name,
                 "project_name": project_name,
@@ -188,9 +199,13 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
     try:
         res = requests.post(
             JUDGMENT_EVAL_LOG_API_URL,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
+                "X-Organization-Id": evaluation_run.organization_id
+            },
             json={
                 "results": [result.to_dict() for result in merged_results],
-                "judgment_api_key": evaluation_run.judgment_api_key,
                 "project_name": evaluation_run.project_name,
                 "eval_name": evaluation_run.eval_name,
             }
@@ -241,18 +256,17 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
         check_eval_run_name_exists(
             evaluation_run.eval_name,
             evaluation_run.project_name,
-            evaluation_run.judgment_api_key
+            evaluation_run.judgment_api_key,
+            evaluation_run.organization_id
         )
     # Set example IDs if not already set
     debug("Initializing examples with IDs and timestamps")
     for idx, example in enumerate(evaluation_run.examples):
-        if example.example_id is None:
-            example.example_id = idx
-            debug(f"Set example ID {idx} for input: {example.input[:50]}...")
+        example.example_index = idx  # Set numeric index
         example.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         with example_logging_context(example.timestamp, example.example_id):
-            debug(f"Initialized example {example.example_id}")
+            debug(f"Initialized example {example.example_id} (index: {example.example_index})")
             debug(f"Input: {example.input}")
             debug(f"Actual output: {example.actual_output}")
             if example.expected_output:
@@ -301,6 +315,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
                 aggregator=evaluation_run.aggregator,
                 metadata=evaluation_run.metadata,
                 judgment_api_key=evaluation_run.judgment_api_key,
+                organization_id=evaluation_run.organization_id,
                 log_results=evaluation_run.log_results
             )
             debug("Sending request to Judgment API")

{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.12
+Version: 0.0.14
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
 judgeval/clients.py,sha256=Ns5ljrgPPXUMo7fSPJxO12H64lcPyKeQPIVG_RMi2cM,1162
-judgeval/constants.py,sha256=oL3kWHg9CzQJiTInDTgJgxRhF3fgylhvEVP360UqG8A,2654
-judgeval/evaluation_run.py,sha256=ev-IbL34SwRv8lwB4KHfYag1jYo6b049R8mmwNBqmnM,5923
-judgeval/judgment_client.py,sha256=thmSXi2essIlmd_j5SjlBw9_8qJJp6N3djoWdLaMrj0,13770
-judgeval/run_evaluation.py,sha256=YOQ6s9RuUrXPTgoYexf7r6Hl1QKIMSTdvHl9kw-ZMzw,20103
+judgeval/constants.py,sha256=43hGesvBbX1uzc4KXvjLCVdd6cyZRMSnEJp11oA7h74,2794
+judgeval/evaluation_run.py,sha256=59lG8AUFTKqbY_JVEEA0I093-Pmiy0ERYDK5BuXuEGg,5965
+judgeval/judgment_client.py,sha256=ryGT3A9-Him6oco3WvuHbjB-FVvAR3wCiiGz03eO_Q4,15409
+judgeval/run_evaluation.py,sha256=Cc7BS07WyqsNpQ38HdMdRI782N3DANjM8UcIq9AwaGA,20769
 judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
 judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
 judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
-judgeval/common/tracer.py,sha256=1WmHF5dGT-fesskT8BH39BZ65eQ9WURN49yGg9A6YKM,32397
+judgeval/common/tracer.py,sha256=qam2suh-0_Cu_B7AWg3AMfEo2TisRZVY1SnAfqhiFQo,33211
 judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
 judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
 judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
-judgeval/data/example.py,sha256=lymGZ3jG818-r2vyFunt6OLFrhESOyJnbhao_ljTjlA,2471
+judgeval/data/example.py,sha256=Rd-eDEM-giYfkfsGh_PBS2wwl15QlQPzbMV-J64Yj5E,2991
 judgeval/data/result.py,sha256=8FIO-bFKPegZuByKRjA2_sumjb8oGWQ5ZeQ1RVz5z2w,4393
 judgeval/data/scorer_data.py,sha256=pYljblCPZrlMIv5Eg7R-clnmsqzUBAwokKjZpwa0DXE,3280
 judgeval/data/datasets/__init__.py,sha256=eO6ayeM_bTGwIt0eDSlTBIIBvXvIWRWWSfYZrZROPiQ,265
-judgeval/data/datasets/dataset.py,sha256=AGdU21vZ4iVjqbjQ7JY-u29FzJrdDFTgdvhzvYVJNyo,11833
-judgeval/data/datasets/eval_dataset_client.py,sha256=TaCDzymGFNFjGRrieEdQB8dT8xqNPpsEi2XLGFyrJno,7113
+judgeval/data/datasets/dataset.py,sha256=KdAY0KRUB2jxcGmc1XXXheFFcPsGFOIGY-kTwBNQS_Y,12080
+judgeval/data/datasets/eval_dataset_client.py,sha256=DzxWQIiHlbpg6FpmWY6brcSP_h_rGcztk2A_6tQNFys,11411
 judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
 judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
 judgeval/judges/__init__.py,sha256=tyQ5KY88Kp1Ctfw2IJxnVEpy8DnFCtmy04JdPOpp-As,339
@@ -78,7 +78,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarizat
 judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
 judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
 judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
-judgeval-0.0.12.dist-info/METADATA,sha256=QabQInkXXIceknwYzcLrqn9YbGk7nURNgseoD2TfM24,1283
-judgeval-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.0.12.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.0.12.dist-info/RECORD,,
+judgeval-0.0.14.dist-info/METADATA,sha256=ZmCAECDNWwzpuES1slYKWcY_U-SMOsjaOdtSoj6wu0I,1283
+judgeval-0.0.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.14.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.14.dist-info/RECORD,,

{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.0.12.dist-info → judgeval-0.0.14.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl

judgeval 0.0.12py3-none-any.whl → 0.0.14py3-none-any.whl