PyPI - judgeval - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

judgeval 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

judgeval/common/tracer.py +87 -28
judgeval/constants.py +2 -0
judgeval/data/datasets/dataset.py +2 -1
judgeval/data/datasets/eval_dataset_client.py +106 -9
judgeval/data/example.py +13 -5
judgeval/judgment_client.py +29 -6
judgeval/run_evaluation.py +16 -5
{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/METADATA +1 -1
{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/RECORD +11 -11
{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/WHEEL +0 -0
{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/tracer.py CHANGED Viewed

@@ -199,10 +199,11 @@ class TraceManagerClient:
             JUDGMENT_TRACES_FETCH_API_URL,
             json={
                 "trace_id": trace_id,
-                "judgment_api_key": self.judgment_api_key,
+                # "judgment_api_key": self.judgment_api_key,
             },
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}"
             }
         )
@@ -225,6 +226,7 @@ class TraceManagerClient:
             json=trace_data,
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}"
             }
         )
@@ -248,6 +250,7 @@ class TraceManagerClient:
             },
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}"
             }
         )
@@ -263,11 +266,12 @@ class TraceManagerClient:
         response = requests.delete(
             JUDGMENT_TRACES_DELETE_API_URL,
             json={
-                "judgment_api_key": self.judgment_api_key,
+                # "judgment_api_key": self.judgment_api_key,
                 "trace_ids": trace_ids,
             },
             headers={
                 "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}"
             }
         )
@@ -557,7 +561,8 @@ class TraceClient:
             "overwrite": overwrite
         }
-        if not empty_save:
+        # Execute asynchrous evaluation in the background
+        if not empty_save:  # Only send to RabbitMQ if the trace is not empty
             connection = pika.BlockingConnection(
                 pika.ConnectionParameters(host=RABBITMQ_HOST, port=RABBITMQ_PORT))
             channel = connection.channel()
@@ -575,6 +580,25 @@ class TraceClient:
         self.trace_manager_client.save_trace(trace_data, empty_save)
+        # Save trace data by making POST request to API
+        response = requests.post(
+            JUDGMENT_TRACES_SAVE_API_URL,
+            json=trace_data,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.tracer.api_key}"  # Bearer token format
+            }
+        )
+        if response.status_code == HTTPStatus.BAD_REQUEST:
+            raise ValueError(f"Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: {response.text}")
+        elif response.status_code != HTTPStatus.OK:
+            raise ValueError(f"Failed to save trace data: {response.text}")
+        if not empty_save and "ui_results_url" in response.json():
+            rprint(f"\n🔍 You can view your trace data here: [rgb(106,0,255)]{response.json()['ui_results_url']}[/]\n")
         return self.trace_id, trace_data
     def delete(self):
@@ -588,23 +612,31 @@ class Tracer:
             cls._instance = super(Tracer, cls).__new__(cls)
         return cls._instance
-    def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY")):
+    def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
         if not hasattr(self, 'initialized'):
             if not api_key:
                 raise ValueError("Tracer must be configured with a Judgment API key")
             self.api_key: str = api_key
+            self.project_name: str = project_name
             self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
             self.depth: int = 0
             self._current_trace: Optional[str] = None
             self.initialized: bool = True
+        elif hasattr(self, 'project_name') and self.project_name != project_name:
+            warnings.warn(
+                f"Attempting to initialize Tracer with project_name='{project_name}' but it was already initialized with "
+                f"project_name='{self.project_name}'. Due to the singleton pattern, the original project_name will be used. "
+                "To use a different project name, ensure the first Tracer initialization uses the desired project name.",
+                RuntimeWarning
+            )
     @contextmanager
-    def trace(self, name: str, project_name: str = "default_project", overwrite: bool = False) -> Generator[TraceClient, None, None]:
+    def trace(self, name: str, project_name: str = None, overwrite: bool = False) -> Generator[TraceClient, None, None]:
         """Start a new trace context using a context manager"""
         trace_id = str(uuid.uuid4())
-        trace = TraceClient(self, trace_id, name, project_name=project_name, overwrite=overwrite)
+        project = project_name if project_name is not None else self.project_name
+        trace = TraceClient(self, trace_id, name, project_name=project, overwrite=overwrite)
         prev_trace = self._current_trace
         self._current_trace = trace
@@ -623,28 +655,40 @@ class Tracer:
         """
         return self._current_trace
-    def observe(self, func=None, *, name=None, span_type: SpanType = "span"):
+    def observe(self, func=None, *, name=None, span_type: SpanType = "span", project_name: str = None, overwrite: bool = False):
         """
         Decorator to trace function execution with detailed entry/exit information.
         Args:
-            func: The function to trace
-            name: Optional custom name for the function
-            span_type: The type of span to use for this observation (default: "span")
+            func: The function to decorate
+            name: Optional custom name for the span (defaults to function name)
+            span_type: Type of span (default "span")
+            project_name: Optional project name override
+            overwrite: Whether to overwrite existing traces
         """
         if func is None:
-            return lambda f: self.observe(f, name=name, span_type=span_type)
+            return lambda f: self.observe(f, name=name, span_type=span_type, project_name=project_name, overwrite=overwrite)
+        # Use provided name or fall back to function name
+        span_name = name or func.__name__
         if asyncio.iscoroutinefunction(func):
             @functools.wraps(func)
             async def async_wrapper(*args, **kwargs):
+                # If there's already a trace, use it. Otherwise create a new one
                 if self._current_trace:
-                    span_name = name or func.__name__
-                    with self._current_trace.span(span_name, span_type=span_type) as span:
-                        # Set the span type
-                        span.span_type = span_type
+                    trace = self._current_trace
+                else:
+                    trace_id = str(uuid.uuid4())
+                    trace_name = str(uuid.uuid4())
+                    project = project_name if project_name is not None else self.project_name
+                    trace = TraceClient(self, trace_id, trace_name, project_name=project, overwrite=overwrite)
+                    self._current_trace = trace
+                    # Only save empty trace for the root call
+                    trace.save(empty_save=True, overwrite=overwrite)
+                try:
+                    with trace.span(span_name, span_type=span_type) as span:
                         # Record inputs
                         span.record_input({
                             'args': list(args),
@@ -658,19 +702,30 @@ class Tracer:
                         span.record_output(result)
                         return result
-                return await func(*args, **kwargs)
+                finally:
+                    # Only save and cleanup if this is the root observe call
+                    if self.depth == 0:
+                        trace.save(empty_save=False, overwrite=overwrite)
+                        self._current_trace = None
             return async_wrapper
         else:
             @functools.wraps(func)
             def wrapper(*args, **kwargs):
+                # If there's already a trace, use it. Otherwise create a new one
                 if self._current_trace:
-                    span_name = name or func.__name__
-                    with self._current_trace.span(span_name, span_type=span_type) as span:
-                        # Set the span type
-                        span.span_type = span_type
+                    trace = self._current_trace
+                else:
+                    trace_id = str(uuid.uuid4())
+                    trace_name = str(uuid.uuid4())
+                    project = project_name if project_name is not None else self.project_name
+                    trace = TraceClient(self, trace_id, trace_name, project_name=project, overwrite=overwrite)
+                    self._current_trace = trace
+                    # Only save empty trace for the root call
+                    trace.save(empty_save=True, overwrite=overwrite)
+                try:
+                    with trace.span(span_name, span_type=span_type) as span:
                         # Record inputs
                         span.record_input({
                             'args': list(args),
@@ -684,8 +739,12 @@ class Tracer:
                         span.record_output(result)
                         return result
-                return func(*args, **kwargs)
+                finally:
+                    # Only save and cleanup if this is the root observe call
+                    if self.depth == 0:
+                        trace.save(empty_save=False, overwrite=overwrite)
+                        self._current_trace = None
             return wrapper
 def wrap(client: Any) -> Any:

judgeval/constants.py CHANGED Viewed

@@ -36,7 +36,9 @@ ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
 JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
 JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
 JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull/"
+JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
 JUDGMENT_DATASETS_PULL_ALL_API_URL = f"{ROOT_API}/datasets/get_all_stats/"
+JUDGMENT_DATASETS_EDIT_API_URL = f"{ROOT_API}/datasets/edit/"
 JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
 JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_eval_results/"
 JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_name/"

judgeval/data/datasets/dataset.py CHANGED Viewed

@@ -162,7 +162,8 @@ class EvalDataset:
                 "additional_metadata": ast.literal_eval(row["additional_metadata"]) if pd.notna(row["additional_metadata"]) else dict(),
                 "tools_called": row["tools_called"].split(";") if pd.notna(row["tools_called"]) else [],
                 "expected_tools": row["expected_tools"].split(";") if pd.notna(row["expected_tools"]) else [],
-                "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None
+                "trace_id": row["trace_id"] if pd.notna(row["trace_id"]) else None,
+                "example_id": str(row["example_id"]) if pd.notna(row["example_id"]) else None
             }
             if row["example"]:
                 data["name"] = row["name"] if pd.notna(row["name"]) else None

judgeval/data/datasets/eval_dataset_client.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from typing import Optional
+from typing import Optional, List
 import requests
 from rich.progress import Progress, SpinnerColumn, TextColumn
@@ -7,7 +7,9 @@ from judgeval.common.logger import debug, error, warning, info
 from judgeval.constants import (
     JUDGMENT_DATASETS_PUSH_API_URL,
     JUDGMENT_DATASETS_PULL_API_URL,
-    JUDGMENT_DATASETS_PULL_ALL_API_URL
+    JUDGMENT_DATASETS_PULL_ALL_API_URL,
+    JUDGMENT_DATASETS_EDIT_API_URL,
+    JUDGMENT_DATASETS_EXPORT_JSONL_API_URL
 )
 from judgeval.data import Example
 from judgeval.data.datasets import EvalDataset
@@ -23,7 +25,7 @@ class EvalDatasetClient:
     def create_dataset(self) -> EvalDataset:
         return EvalDataset(judgment_api_key=self.judgment_api_key)
-    def push(self, dataset: EvalDataset, alias: str,overwrite: Optional[bool] = False) -> bool:
+    def push(self, dataset: EvalDataset, alias: str, overwrite: Optional[bool] = False) -> bool:
         debug(f"Pushing dataset with alias '{alias}' (overwrite={overwrite})")
         if overwrite:
             warning(f"Overwrite enabled for alias '{alias}'")
@@ -56,12 +58,16 @@ class EvalDatasetClient:
                     "ground_truths": [g.to_dict() for g in dataset.ground_truths],
                     "examples": [e.to_dict() for e in dataset.examples],
                     "overwrite": overwrite,
-                    "judgment_api_key": dataset.judgment_api_key
+                    # "judgment_api_key": dataset.judgment_api_key
                 }
             try:
                 response = requests.post(
                     JUDGMENT_DATASETS_PUSH_API_URL,
-                    json=content
+                    json=content,
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self.judgment_api_key}"
+                    }
                 )
                 if response.status_code == 500:
                     error(f"Server error during push: {content.get('message')}")
@@ -115,13 +121,17 @@ class EvalDatasetClient:
                 )
                 request_body = {
                     "alias": alias,
-                    "judgment_api_key": self.judgment_api_key
+                    # "judgment_api_key": self.judgment_api_key
                 }
                 try:
                     response = requests.post(
                         JUDGMENT_DATASETS_PULL_API_URL,
-                        json=request_body
+                        json=request_body,
+                        headers={
+                            "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}"
+                        }
                     )
                     response.raise_for_status()
                 except requests.exceptions.RequestException as e:
@@ -169,13 +179,17 @@ class EvalDatasetClient:
                     total=100,
                 )
                 request_body = {
-                    "judgment_api_key": self.judgment_api_key
+                    # "judgment_api_key": self.judgment_api_key
                 }
                 try:
                     response = requests.post(
                         JUDGMENT_DATASETS_PULL_ALL_API_URL,
-                        json=request_body
+                        json=request_body,
+                        headers={
+                            "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}"
+                        }
                     )
                     response.raise_for_status()
                 except requests.exceptions.RequestException as e:
@@ -191,3 +205,86 @@ class EvalDatasetClient:
                 )
                 return payload
+    def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
+        """
+        Edits the dataset on Judgment platform by adding new examples and ground truths
+        Mock request:
+        {
+            "alias": alias,
+            "examples": [...],
+            "ground_truths": [...],
+            "judgment_api_key": self.judgment_api_key
+        }
+        """
+        with Progress(
+                SpinnerColumn(style="rgb(106,0,255)"),
+                TextColumn("[progress.description]{task.description}"),
+                transient=False,
+            ) as progress:
+            task_id = progress.add_task(
+                f"Editing dataset [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] on Judgment...",
+                total=100,
+            )
+            content = {
+                "alias": alias,
+                "examples": [e.to_dict() for e in examples],
+                "ground_truths": [g.to_dict() for g in ground_truths],
+                "judgment_api_key": self.judgment_api_key
+            }
+            try:
+                response = requests.post(
+                    JUDGMENT_DATASETS_EDIT_API_URL,
+                    json=content
+                )
+                response.raise_for_status()
+            except requests.exceptions.RequestException as e:
+                error(f"Error editing dataset: {str(e)}")
+                return False
+            info(f"Successfully edited dataset '{alias}'")
+            return True
+    def export_jsonl(self, alias: str) -> requests.Response:
+        """Export dataset in JSONL format from Judgment platform"""
+        debug(f"Exporting dataset with alias '{alias}' as JSONL")
+        with Progress(
+            SpinnerColumn(style="rgb(106,0,255)"),
+            TextColumn("[progress.description]{task.description}"),
+            transient=False,
+        ) as progress:
+            task_id = progress.add_task(
+                f"Exporting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] as JSONL...",
+                total=100,
+            )
+            try:
+                response = requests.post(
+                    JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
+                    json={"alias": alias},
+                    headers={
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self.judgment_api_key}"
+                    },
+                    stream=True
+                )
+                response.raise_for_status()
+            except requests.exceptions.HTTPError as err:
+                if err.response.status_code == 404:
+                    error(f"Dataset not found: {alias}")
+                else:
+                    error(f"HTTP error during export: {err}")
+                raise
+            except Exception as e:
+                error(f"Error during export: {str(e)}")
+                raise
+            info(f"Successfully exported dataset with alias '{alias}'")
+            progress.update(
+                task_id,
+                description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
+            )
+            return response

judgeval/data/example.py CHANGED Viewed

@@ -4,9 +4,11 @@ Classes for representing examples in a dataset.
 from typing import TypeVar, Optional, Any, Dict, List
-from pydantic import BaseModel
+from uuid import uuid4
+from pydantic import BaseModel, Field
 from enum import Enum
 from datetime import datetime
+import time
 Input = TypeVar('Input')
@@ -33,15 +35,19 @@ class Example(BaseModel):
     tools_called: Optional[List[str]] = None
     expected_tools: Optional[List[str]] = None
     name: Optional[str] = None
-    example_id: Optional[str] = None
+    example_id: str = Field(default_factory=lambda: str(uuid4()))
+    example_index: Optional[int] = None
     timestamp: Optional[str] = None
     trace_id: Optional[str] = None
     def __init__(self, **data):
-        super().__init__(**data)
+        if 'example_id' not in data:
+            data['example_id'] = str(uuid4())
         # Set timestamp if not provided
-        if self.timestamp is None:
-            self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        if 'timestamp' not in data:
+            data['timestamp'] = datetime.now().strftime("%Y%m%d_%H%M%S")
+        super().__init__(**data)
     def to_dict(self):
         return {
@@ -55,6 +61,7 @@ class Example(BaseModel):
             "expected_tools": self.expected_tools,
             "name": self.name,
             "example_id": self.example_id,
+            "example_index": self.example_index,
             "timestamp": self.timestamp,
             "trace_id": self.trace_id
         }
@@ -71,6 +78,7 @@ class Example(BaseModel):
             f"expected_tools={self.expected_tools}, "
             f"name={self.name}, "
             f"example_id={self.example_id}, "
+            f"example_index={self.example_index}, "
             f"timestamp={self.timestamp}, "
             f"trace_id={self.trace_id})"
         )

judgeval/judgment_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Optional, List, Dict, Any, Union
 import requests
 from judgeval.constants import ROOT_API
-from judgeval.data.datasets import EvalDataset, EvalDatasetClient
+from judgeval.data.datasets import EvalDataset, EvalDatasetClient, GroundTruthExample
 from judgeval.data import (
     ScoringResult,
     Example
@@ -164,6 +164,11 @@ class JudgmentClient:
         """
         return self.eval_dataset_client.pull_all_user_dataset_stats()
+    def edit_dataset(self, alias: str, examples: List[Example], ground_truths: List[GroundTruthExample]) -> bool:
+        """
+        Edits the dataset on Judgment platform by adding new examples and ground truths
+        """
+        return self.eval_dataset_client.edit_dataset(alias, examples, ground_truths)
     # Maybe add option where you can pass in the EvaluationRun object and it will pull the eval results from the backend
     def pull_eval(self, project_name: str, eval_run_name: str) -> List[Dict[str, Union[str, List[ScoringResult]]]]:
@@ -182,6 +187,10 @@ class JudgmentClient:
                                                    eval_name=eval_run_name,
                                                    judgment_api_key=self.judgment_api_key)
         eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
+                                 headers={
+                                    "Content-Type": "application/json",
+                                    "Authorization": f"Bearer {self.judgment_api_key}"
+                                 },
                                  json=eval_run_request_body.model_dump())
         if eval_run.status_code != requests.codes.ok:
             raise ValueError(f"Error fetching eval results: {eval_run.json()}")
@@ -213,6 +222,7 @@ class JudgmentClient:
                         json=eval_run_request_body.model_dump(),
                         headers={
                             "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}"
                         })
         if response.status_code != requests.codes.ok:
             raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -235,6 +245,7 @@ class JudgmentClient:
                         },
                         headers={
                             "Content-Type": "application/json",
+                            "Authorization": f"Bearer {self.judgment_api_key}"
                         })
         if response.status_code != requests.codes.ok:
             raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -246,7 +257,11 @@ class JudgmentClient:
         """
         response = requests.post(
             f"{ROOT_API}/validate_api_key/",
-            json={"api_key": self.judgment_api_key}
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}",
+            },
+            json={}  # Empty body now
         )
         if response.status_code == 200:
             return True, response.json()
@@ -268,12 +283,16 @@ class JudgmentClient:
         """
         request_body = {
             "slug": slug,
-            "judgment_api_key": self.judgment_api_key
+            # "judgment_api_key": self.judgment_api_key
         }
         response = requests.post(
             f"{ROOT_API}/fetch_scorer/",
-            json=request_body
+            json=request_body,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}"
+            }
         )
         if response.status_code == 500:
@@ -306,13 +325,17 @@ class JudgmentClient:
             "name": scorer.name,
             "conversation": scorer.conversation,
             "options": scorer.options,
-            "judgment_api_key": self.judgment_api_key,
+            # "judgment_api_key": self.judgment_api_key,
             "slug": slug
         }
         response = requests.post(
             f"{ROOT_API}/save_scorer/",
-            json=request_body
+            json=request_body,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.judgment_api_key}"
+            }
         )
         if response.status_code == 500:

judgeval/run_evaluation.py CHANGED Viewed

@@ -47,7 +47,12 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
     try:
         # submit API request to execute evals
         payload = evaluation_run.model_dump(warnings=False)
-        response = requests.post(JUDGMENT_EVAL_API_URL, json=payload)
+        response = requests.post(
+            JUDGMENT_EVAL_API_URL, headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {evaluation_run.judgment_api_key}"
+        },
+        json=payload)
         response_data = response.json()
     except Exception as e:
         error(f"Error: {e}")
@@ -151,6 +156,10 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
     try:
         response = requests.post(
             f"{ROOT_API}/eval-run-name-exists/",
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {judgment_api_key}"
+            },
             json={
                 "eval_name": eval_name,
                 "project_name": project_name,
@@ -188,6 +197,10 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
     try:
         res = requests.post(
             JUDGMENT_EVAL_LOG_API_URL,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {evaluation_run.judgment_api_key}"
+            },
             json={
                 "results": [result.to_dict() for result in merged_results],
                 "judgment_api_key": evaluation_run.judgment_api_key,
@@ -247,12 +260,10 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
     # Set example IDs if not already set
     debug("Initializing examples with IDs and timestamps")
     for idx, example in enumerate(evaluation_run.examples):
-        if example.example_id is None:
-            example.example_id = idx
-            debug(f"Set example ID {idx} for input: {example.input[:50]}...")
+        example.example_index = idx  # Set numeric index
         example.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         with example_logging_context(example.timestamp, example.example_id):
-            debug(f"Initialized example {example.example_id}")
+            debug(f"Initialized example {example.example_id} (index: {example.example_index})")
             debug(f"Input: {example.input}")
             debug(f"Actual output: {example.actual_output}")
             if example.expected_output:

{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.11
+Version: 0.0.13
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 judgeval/__init__.py,sha256=xiiG4CkeaOtey4fusCd9CBz0BVqzTIbV-K2EFIU0rUM,283
 judgeval/clients.py,sha256=Ns5ljrgPPXUMo7fSPJxO12H64lcPyKeQPIVG_RMi2cM,1162
-judgeval/constants.py,sha256=oL3kWHg9CzQJiTInDTgJgxRhF3fgylhvEVP360UqG8A,2654
+judgeval/constants.py,sha256=43hGesvBbX1uzc4KXvjLCVdd6cyZRMSnEJp11oA7h74,2794
 judgeval/evaluation_run.py,sha256=ev-IbL34SwRv8lwB4KHfYag1jYo6b049R8mmwNBqmnM,5923
-judgeval/judgment_client.py,sha256=thmSXi2essIlmd_j5SjlBw9_8qJJp6N3djoWdLaMrj0,13770
-judgeval/run_evaluation.py,sha256=YOQ6s9RuUrXPTgoYexf7r6Hl1QKIMSTdvHl9kw-ZMzw,20103
+judgeval/judgment_client.py,sha256=7vaarj6zXQmQ44m0cVCe72S4e92eZ4tK8sqNTnx4FLQ,14957
+judgeval/run_evaluation.py,sha256=vl6TcwJVH2jN60Gja1E1tPI3Jvv6YNeNMTDVTcWkqZY,20520
 judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
 judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
 judgeval/common/logger.py,sha256=QXN3UMymmKu2iMEMEgATLBnMDjGr_pE2iOSEFoICgg8,6092
-judgeval/common/tracer.py,sha256=wp-oGl8rdAe3_UXcvrEKFg7V6Vnvrnz9y_RVVgYOjCY,29934
+judgeval/common/tracer.py,sha256=szU7mhyMIoG9EvPIb6dtxv7ix83WVuv7TtVX31FWMoQ,33582
 judgeval/common/utils.py,sha256=3WRyyX0tvnnj_VAVlEdtZrfzyWj6zfX04xdpCtE1m5Y,33736
 judgeval/data/__init__.py,sha256=YferxwmUqoBi18hrdgro0BD0h4pt20LAqISeUzGMcVU,474
 judgeval/data/api_example.py,sha256=vwWFbI6eJr5VgURCRbuSiMtEXLUbTCih_BcaqEBy-pg,4108
-judgeval/data/example.py,sha256=lymGZ3jG818-r2vyFunt6OLFrhESOyJnbhao_ljTjlA,2471
+judgeval/data/example.py,sha256=r_ZA_Fq0k-1xSutSLURwj0-Ug0C_yQl4GQlqtDxbYT0,2771
 judgeval/data/result.py,sha256=8FIO-bFKPegZuByKRjA2_sumjb8oGWQ5ZeQ1RVz5z2w,4393
 judgeval/data/scorer_data.py,sha256=pYljblCPZrlMIv5Eg7R-clnmsqzUBAwokKjZpwa0DXE,3280
 judgeval/data/datasets/__init__.py,sha256=eO6ayeM_bTGwIt0eDSlTBIIBvXvIWRWWSfYZrZROPiQ,265
-judgeval/data/datasets/dataset.py,sha256=AGdU21vZ4iVjqbjQ7JY-u29FzJrdDFTgdvhzvYVJNyo,11833
-judgeval/data/datasets/eval_dataset_client.py,sha256=TaCDzymGFNFjGRrieEdQB8dT8xqNPpsEi2XLGFyrJno,7113
+judgeval/data/datasets/dataset.py,sha256=6-BhkGiwMmvROxnFbefgzsFZy7wAaLi9kiTQ6p0h_xk,11928
+judgeval/data/datasets/eval_dataset_client.py,sha256=6wybPyt0BjrMQcOl3cTkcY3c9Pbm_K1fnpMiuzh56E4,11006
 judgeval/data/datasets/ground_truth.py,sha256=OTBs3VZe-Wp0vEXEsq14GPZHYtpWT16bhGQTycIvkKc,2057
 judgeval/data/datasets/utils.py,sha256=lQxyl7mevct7JcDSyIrU_8QOzT-EYPWEvoUiAeOdeek,2502
 judgeval/judges/__init__.py,sha256=tyQ5KY88Kp1Ctfw2IJxnVEpy8DnFCtmy04JdPOpp-As,339
@@ -78,7 +78,7 @@ judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarizat
 judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py,sha256=JUB3TMqS1OHr6PqpIGqkyiBNbyfUaw7lZuUATjU3_ek,168
 judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py,sha256=CYGRJY5EuyICYzHrmFdLykwXakX8AC7G3Bhj7p6szfY,5493
 judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
-judgeval-0.0.11.dist-info/METADATA,sha256=WH8aPpUNCwE1Zr21qJ0H0WEVB_i_dilyLSbw9e5nXZo,1283
-judgeval-0.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.0.11.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.0.11.dist-info/RECORD,,
+judgeval-0.0.13.dist-info/METADATA,sha256=6BQFdiV0_9Oe119PBqfNnmgX1ZWXjN-_6x0q9lVvnDg,1283
+judgeval-0.0.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.0.13.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.0.13.dist-info/RECORD,,

{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.0.11.dist-info → judgeval-0.0.13.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

judgeval 0.0.11py3-none-any.whl → 0.0.13py3-none-any.whl