PyPI - judgeval - Versions diffs - 0.0.54__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

judgeval 0.0.54py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

judgeval/common/api/__init__.py +3 -0
judgeval/common/api/api.py +352 -0
judgeval/common/api/constants.py +165 -0
judgeval/common/storage/__init__.py +6 -0
judgeval/common/tracer/__init__.py +31 -0
judgeval/common/tracer/constants.py +22 -0
judgeval/common/tracer/core.py +1916 -0
judgeval/common/tracer/otel_exporter.py +108 -0
judgeval/common/tracer/otel_span_processor.py +234 -0
judgeval/common/tracer/span_processor.py +37 -0
judgeval/common/tracer/span_transformer.py +211 -0
judgeval/common/tracer/trace_manager.py +92 -0
judgeval/common/utils.py +2 -2
judgeval/constants.py +3 -30
judgeval/data/datasets/eval_dataset_client.py +29 -156
judgeval/data/judgment_types.py +4 -12
judgeval/data/result.py +1 -1
judgeval/data/scorer_data.py +2 -2
judgeval/data/scripts/openapi_transform.py +1 -1
judgeval/data/trace.py +66 -1
judgeval/data/trace_run.py +0 -3
judgeval/evaluation_run.py +0 -2
judgeval/integrations/langgraph.py +43 -164
judgeval/judgment_client.py +17 -211
judgeval/run_evaluation.py +209 -611
judgeval/scorers/__init__.py +2 -6
judgeval/scorers/base_scorer.py +4 -23
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +3 -3
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +215 -0
judgeval/scorers/score.py +2 -1
judgeval/scorers/utils.py +1 -13
judgeval/utils/requests.py +21 -0
judgeval-0.1.0.dist-info/METADATA +202 -0
{judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/RECORD +37 -29
judgeval/common/tracer.py +0 -3215
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -73
judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -53
judgeval-0.0.54.dist-info/METADATA +0 -1384
/judgeval/common/{s3_storage.py → storage/s3_storage.py} +0 -0
{judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/WHEEL +0 -0
{judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/utils.py CHANGED Viewed

@@ -13,6 +13,7 @@ import asyncio
 import concurrent.futures
 import os
 from types import TracebackType
+from judgeval.common.api.constants import ROOT_API
 from judgeval.utils.requests import requests
 import pprint
 from typing import Any, Dict, List, Mapping, Optional, TypeAlias, Union, TypeGuard
@@ -27,7 +28,6 @@ from judgeval.clients import async_together_client, together_client
 from judgeval.constants import (
     ACCEPTABLE_MODELS,
     MAX_WORKER_THREADS,
-    ROOT_API,
     TOGETHER_SUPPORTED_MODELS,
     LITELLM_SUPPORTED_MODELS,
 )
@@ -128,7 +128,7 @@ def validate_api_key(judgment_api_key: str):
             "Content-Type": "application/json",
             "Authorization": f"Bearer {judgment_api_key}",
         },
-        json={},  # Empty body now
+        json={},
         verify=True,
     )
     if response.status_code == 200:

judgeval/constants.py CHANGED Viewed

@@ -39,36 +39,6 @@ UNBOUNDED_SCORERS: set[APIScorerType] = (
     set()
 )  # scorers whose scores are not bounded between 0-1
-ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
-# API URLs
-JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
-JUDGMENT_TRACE_EVAL_API_URL = f"{ROOT_API}/evaluate_trace/"
-JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
-JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
-JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
-JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
-JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
-JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
-JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
-JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
-JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
-JUDGMENT_EVAL_DELETE_API_URL = (
-    f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
-)
-JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
-JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
-JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
-JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
-JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
-JUDGMENT_TRACES_UPSERT_API_URL = f"{ROOT_API}/traces/upsert/"
-JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
-JUDGMENT_TRACES_ADD_ANNOTATION_API_URL = f"{ROOT_API}/traces/add_annotation/"
-JUDGMENT_TRACES_SPANS_BATCH_API_URL = f"{ROOT_API}/traces/spans/batch/"
-JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = (
-    f"{ROOT_API}/traces/evaluation_runs/batch/"
-)
-JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
-JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
 # RabbitMQ
 RABBITMQ_HOST = os.getenv(
     "RABBITMQ_HOST", "rabbitmq-networklb-faa155df16ec9085.elb.us-west-1.amazonaws.com"
@@ -145,3 +115,6 @@ MAX_WORKER_THREADS = 10
 # Maximum number of concurrent operations for evaluation runs
 MAX_CONCURRENT_EVALUATIONS = 50  # Adjust based on system capabilities
+# Span lifecycle management
+SPAN_LIFECYCLE_END_UPDATE_ID = 20  # Default ending number for completed spans

judgeval/data/datasets/eval_dataset_client.py CHANGED Viewed

@@ -1,27 +1,17 @@
 from typing import Optional, List
-from requests import Response, exceptions
-from judgeval.utils.requests import requests
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from judgeval.common.logger import judgeval_logger
-from judgeval.constants import (
-    JUDGMENT_DATASETS_PUSH_API_URL,
-    JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
-    JUDGMENT_DATASETS_PULL_API_URL,
-    JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
-    JUDGMENT_DATASETS_DELETE_API_URL,
-    JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
-)
+from judgeval.common.api import JudgmentApiClient
 from judgeval.data import Example, Trace
 from judgeval.data.datasets import EvalDataset
 class EvalDatasetClient:
     def __init__(self, judgment_api_key: str, organization_id: str):
-        self.judgment_api_key = judgment_api_key
-        self.organization_id = organization_id
+        self.api_client = JudgmentApiClient(judgment_api_key, organization_id)
     def create_dataset(self) -> EvalDataset:
-        return EvalDataset(judgment_api_key=self.judgment_api_key)
+        return EvalDataset(judgment_api_key=self.api_client.api_key)
     def push(
         self,
@@ -55,39 +45,17 @@ class EvalDatasetClient:
                 f"Pushing [rgb(106,0,255)]'{alias}' to Judgment...",
                 total=100,
             )
-            content = {
-                "dataset_alias": alias,
-                "project_name": project_name,
-                "examples": [e.to_dict() for e in dataset.examples],
-                "traces": [t.model_dump() for t in dataset.traces],
-                "overwrite": overwrite,
-            }
             try:
-                response = requests.post(
-                    JUDGMENT_DATASETS_PUSH_API_URL,
-                    json=content,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}",
-                        "X-Organization-Id": self.organization_id,
-                    },
-                    verify=True,
+                payload = self.api_client.push_dataset(
+                    dataset_alias=alias,
+                    project_name=project_name,
+                    examples=[e.to_dict() for e in dataset.examples],
+                    traces=[t.model_dump() for t in dataset.traces],
+                    overwrite=overwrite or False,
                 )
-                if response.status_code != 200:
-                    judgeval_logger.error(
-                        f"Server error during push: {response.json()}"
-                    )
-                    raise Exception(f"Server error during push: {response.json()}")
-                response.raise_for_status()
-            except exceptions.HTTPError as err:
-                if response.status_code == 422:
-                    judgeval_logger.error(
-                        f"Validation error during push: {err.response.json()}"
-                    )
-                else:
-                    judgeval_logger.error(f"HTTP error during push: {err}")
-            payload = response.json()
+            except Exception as e:
+                judgeval_logger.error(f"Error during push: {e}")
+                raise
             dataset._alias = payload.get("_alias")
             dataset._id = payload.get("_id")
             progress.update(
@@ -122,35 +90,15 @@ class EvalDatasetClient:
                 f"Appending [rgb(106,0,255)]'{alias}' to Judgment...",
                 total=100,
             )
-            content = {
-                "dataset_alias": alias,
-                "project_name": project_name,
-                "examples": [e.to_dict() for e in examples],
-            }
             try:
-                response = requests.post(
-                    JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
-                    json=content,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}",
-                        "X-Organization-Id": self.organization_id,
-                    },
-                    verify=True,
+                self.api_client.append_examples(
+                    dataset_alias=alias,
+                    project_name=project_name,
+                    examples=[e.to_dict() for e in examples],
                 )
-                if response.status_code != 200:
-                    judgeval_logger.error(
-                        f"Server error during append: {response.json()}"
-                    )
-                    raise Exception(f"Server error during append: {response.json()}")
-                response.raise_for_status()
-            except exceptions.HTTPError as err:
-                if response.status_code == 422:
-                    judgeval_logger.error(
-                        f"Validation error during append: {err.response.json()}"
-                    )
-                else:
-                    judgeval_logger.error(f"HTTP error during append: {err}")
+            except Exception as e:
+                judgeval_logger.error(f"Error during append: {e}")
+                raise
             progress.update(
                 task_id,
@@ -186,25 +134,14 @@ class EvalDatasetClient:
                 f"Pulling [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
                 total=100,
             )
-            request_body = {"dataset_alias": alias, "project_name": project_name}
             try:
-                response = requests.post(
-                    JUDGMENT_DATASETS_PULL_API_URL,
-                    json=request_body,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}",
-                        "X-Organization-Id": self.organization_id,
-                    },
-                    verify=True,
+                payload = self.api_client.pull_dataset(
+                    dataset_alias=alias,
+                    project_name=project_name,
                 )
-                response.raise_for_status()
-            except exceptions.RequestException as e:
+            except Exception as e:
                 judgeval_logger.error(f"Error pulling dataset: {str(e)}")
                 raise
-            payload = response.json()
             dataset.examples = [Example(**e) for e in payload.get("examples", [])]
             dataset.traces = [Trace(**t) for t in payload.get("traces", [])]
             dataset._alias = payload.get("alias")
@@ -226,21 +163,12 @@ class EvalDatasetClient:
                 f"Deleting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] from Judgment...",
                 total=100,
             )
-            request_body = {"dataset_alias": alias, "project_name": project_name}
             try:
-                response = requests.post(
-                    JUDGMENT_DATASETS_DELETE_API_URL,
-                    json=request_body,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}",
-                        "X-Organization-Id": self.organization_id,
-                    },
-                    verify=True,
+                self.api_client.delete_dataset(
+                    dataset_alias=alias,
+                    project_name=project_name,
                 )
-                response.raise_for_status()
-            except exceptions.RequestException as e:
+            except Exception as e:
                 judgeval_logger.error(f"Error deleting dataset: {str(e)}")
                 raise
@@ -272,70 +200,15 @@ class EvalDatasetClient:
                 "Pulling [rgb(106,0,255)]' datasets'[/rgb(106,0,255)] from Judgment...",
                 total=100,
             )
-            request_body = {"project_name": project_name}
             try:
-                response = requests.post(
-                    JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
-                    json=request_body,
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}",
-                        "X-Organization-Id": self.organization_id,
-                    },
-                    verify=True,
-                )
-                response.raise_for_status()
-            except exceptions.RequestException as e:
+                payload = self.api_client.get_project_dataset_stats(project_name)
+            except Exception as e:
                 judgeval_logger.error(f"Error pulling dataset: {str(e)}")
                 raise
-            payload = response.json()
             progress.update(
                 task_id,
                 description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
             )
             return payload
-    def export_jsonl(self, alias: str, project_name: str) -> Response:
-        """Export dataset in JSONL format from Judgment platform"""
-        with Progress(
-            SpinnerColumn(style="rgb(106,0,255)"),
-            TextColumn("[progress.description]{task.description}"),
-            transient=False,
-        ) as progress:
-            task_id = progress.add_task(
-                f"Exporting [rgb(106,0,255)]'{alias}'[/rgb(106,0,255)] as JSONL...",
-                total=100,
-            )
-            try:
-                response = requests.post(
-                    JUDGMENT_DATASETS_EXPORT_JSONL_API_URL,
-                    json={"dataset_alias": alias, "project_name": project_name},
-                    headers={
-                        "Content-Type": "application/json",
-                        "Authorization": f"Bearer {self.judgment_api_key}",
-                        "X-Organization-Id": self.organization_id,
-                    },
-                    stream=True,
-                    verify=True,
-                )
-                response.raise_for_status()
-            except exceptions.HTTPError as err:
-                if err.response.status_code == 404:
-                    judgeval_logger.error(f"Dataset not found: {alias}")
-                else:
-                    judgeval_logger.error(f"HTTP error during export: {err}")
-                raise
-            except Exception as e:
-                judgeval_logger.error(f"Error during export: {str(e)}")
-                raise
-            progress.update(
-                task_id,
-                description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
-            )
-            return response

judgeval/data/judgment_types.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  openapi_new.json
-#   timestamp: 2025-07-12T17:11:33+00:00
+#   timestamp: 2025-07-17T03:14:16+00:00
 from __future__ import annotations
@@ -94,9 +94,6 @@ class TraceSpanJudgmentType(BaseModel):
     output: Annotated[Any, Field(title="Output")] = None
     usage: Optional[TraceUsageJudgmentType] = None
     duration: Annotated[Optional[float], Field(title="Duration")] = None
-    annotation: Annotated[Optional[List[Dict[str, Any]]], Field(title="Annotation")] = (
-        None
-    )
     expected_tools: Annotated[
         Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
     ] = None
@@ -176,6 +173,7 @@ class ScoringResultJudgmentType(BaseModel):
     ] = None
     trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
     run_duration: Annotated[Optional[float], Field(title="Run Duration")] = None
+    evaluation_cost: Annotated[Optional[float], Field(title="Evaluation Cost")] = None
 class TraceRunJudgmentType(BaseModel):
@@ -184,11 +182,8 @@ class TraceRunJudgmentType(BaseModel):
     traces: Annotated[List[TraceJudgmentType], Field(title="Traces")]
     scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
     model: Annotated[str, Field(title="Model")]
-    judgment_api_key: Annotated[Optional[str], Field(title="Judgment Api Key")] = None
     append: Annotated[Optional[bool], Field(title="Append")] = False
-    override_existing_eval_run_name: Annotated[
-        Optional[bool], Field(title="Override Existing Eval Run Name")
-    ] = False
+    override: Annotated[Optional[bool], Field(title="Override")] = False
     trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
     tools: Annotated[Optional[List[Dict[str, Any]]], Field(title="Tools")] = None
@@ -199,11 +194,8 @@ class JudgmentEvalJudgmentType(BaseModel):
     examples: Annotated[List[ExampleJudgmentType], Field(title="Examples")]
     scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
     model: Annotated[str, Field(title="Model")]
-    judgment_api_key: Annotated[Optional[str], Field(title="Judgment Api Key")] = None
     append: Annotated[Optional[bool], Field(title="Append")] = False
-    override_existing_eval_run_name: Annotated[
-        Optional[bool], Field(title="Override Existing Eval Run Name")
-    ] = False
+    override: Annotated[Optional[bool], Field(title="Override")] = False
     trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None

judgeval/data/result.py CHANGED Viewed

@@ -30,7 +30,7 @@ class ScoringResult(ScoringResultJudgmentType):
     def __str__(self) -> str:
         return f"ScoringResult(\
             success={self.success}, \
-            scorer_data={self.scorers_data}, \
+            scorers_data={self.scorers_data}, \
             data_object={self.data_object}, \
             run_duration={self.run_duration})"

judgeval/data/scorer_data.py CHANGED Viewed

@@ -54,7 +54,7 @@ def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
             reason=scorer.reason,
             success=scorer.success,
             strict_mode=scorer.strict_mode,
-            evaluation_model=scorer.evaluation_model,
+            evaluation_model=scorer.model,
             error=scorer.error,
             additional_metadata=scorer.additional_metadata,
         )
@@ -68,7 +68,7 @@ def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
                 reason=scorer.internal_scorer.reason,
                 success=scorer.internal_scorer.success,
                 strict_mode=scorer.internal_scorer.strict_mode,
-                evaluation_model=scorer.internal_scorer.evaluation_model,
+                evaluation_model=scorer.internal_scorer.model,
                 error=scorer.internal_scorer.error,
                 additional_metadata=scorer.internal_scorer.additional_metadata,
             )

judgeval/data/scripts/openapi_transform.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import sys
 from typing import Any, Dict, Generator, List
-import requests
+from judgeval.utils.requests import requests
 spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"

judgeval/data/trace.py CHANGED Viewed

@@ -8,6 +8,7 @@ from judgeval.data.judgment_types import (
     TraceSpanJudgmentType,
     TraceJudgmentType,
 )
+from judgeval.constants import SPAN_LIFECYCLE_END_UPDATE_ID
 from pydantic import BaseModel
@@ -55,6 +56,22 @@ class TraceSpan(TraceSpanJudgmentType):
             self.update_id += 1
             return self.update_id
+    def set_update_id_to_ending_number(
+        self, ending_number: int = SPAN_LIFECYCLE_END_UPDATE_ID
+    ) -> int:
+        """
+        Thread-safe method to set the update_id to a predetermined ending number.
+        Args:
+            ending_number (int): The number to set update_id to. Defaults to SPAN_LIFECYCLE_END_UPDATE_ID.
+        Returns:
+            int: The new update_id value after setting
+        """
+        with self._update_id_lock:
+            self.update_id = ending_number
+            return self.update_id
     def print_span(self):
         """Print the span with proper formatting and parent relationship information."""
         indent = "  " * self.depth
@@ -73,8 +90,56 @@ class TraceSpan(TraceSpanJudgmentType):
     def safe_stringify(self, output, function_name):
         """
-        Safely converts an object to a string or repr, handling serialization issues gracefully.
+        Safely converts an object to a JSON-serializable structure, handling common object types intelligently.
         """
+        # Handle Pydantic models
+        if hasattr(output, "model_dump"):
+            try:
+                return output.model_dump()
+            except Exception:
+                pass
+        # Handle LangChain messages and similar objects with content/type
+        if hasattr(output, "content") and hasattr(output, "type"):
+            try:
+                result = {"type": output.type, "content": output.content}
+                # Add additional fields if they exist
+                if hasattr(output, "additional_kwargs"):
+                    result["additional_kwargs"] = output.additional_kwargs
+                if hasattr(output, "response_metadata"):
+                    result["response_metadata"] = output.response_metadata
+                if hasattr(output, "name"):
+                    result["name"] = output.name
+                return result
+            except Exception:
+                pass
+        if hasattr(output, "dict"):
+            try:
+                return output.dict()
+            except Exception:
+                pass
+        if hasattr(output, "to_dict"):
+            try:
+                return output.to_dict()
+            except Exception:
+                pass
+        if hasattr(output, "__dataclass_fields__"):
+            try:
+                import dataclasses
+                return dataclasses.asdict(output)
+            except Exception:
+                pass
+        if hasattr(output, "__dict__"):
+            try:
+                return output.__dict__
+            except Exception:
+                pass
         try:
             return str(output)
         except (TypeError, OverflowError, ValueError):

judgeval/data/trace_run.py CHANGED Viewed

@@ -16,7 +16,6 @@ class TraceRun(BaseModel):
         scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
         model (str): The model used as a judge when using LLM as a Judge
         metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
-        judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
         rules (Optional[List[Rule]]): Rules to evaluate against scoring results
         append (Optional[bool]): Whether to append to existing evaluation results
         tools (Optional[List[Dict[str, Any]]]): List of tools to use for evaluation
@@ -30,8 +29,6 @@ class TraceRun(BaseModel):
     model: Optional[str] = "gpt-4.1"
     trace_span_id: Optional[str] = None
     append: Optional[bool] = False
-    # API Key will be "" until user calls client.run_eval(), then API Key will be set
-    judgment_api_key: Optional[str] = ""
     override: Optional[bool] = False
     rules: Optional[List[Rule]] = None
     tools: Optional[List[Dict[str, Any]]] = None

judgeval/evaluation_run.py CHANGED Viewed

@@ -17,7 +17,6 @@ class EvaluationRun(BaseModel):
         scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
         model (str): The model used as a judge when using LLM as a Judge
         metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
-        judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
     """
     organization_id: Optional[str] = None
@@ -28,7 +27,6 @@ class EvaluationRun(BaseModel):
     model: Optional[str] = "gpt-4.1"
     trace_span_id: Optional[str] = None
     # API Key will be "" until user calls client.run_eval(), then API Key will be set
-    judgment_api_key: Optional[str] = ""
     override: Optional[bool] = False
     append: Optional[bool] = False

judgeval 0.0.54__py3-none-any.whl → 0.1.0__py3-none-any.whl

judgeval 0.0.54py3-none-any.whl → 0.1.0py3-none-any.whl