PyPI - judgeval - Versions diffs - 0.0.52__py3-none-any.whl → 0.0.54__py3-none-any.whl - Mend

judgeval 0.0.52py3-none-any.whl → 0.0.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

judgeval/common/logger.py +46 -199
judgeval/common/s3_storage.py +2 -6
judgeval/common/tracer.py +182 -262
judgeval/common/utils.py +16 -36
judgeval/constants.py +14 -20
judgeval/data/__init__.py +0 -2
judgeval/data/datasets/dataset.py +6 -10
judgeval/data/datasets/eval_dataset_client.py +25 -27
judgeval/data/example.py +5 -138
judgeval/data/judgment_types.py +214 -0
judgeval/data/result.py +7 -25
judgeval/data/scorer_data.py +28 -40
judgeval/data/scripts/fix_default_factory.py +23 -0
judgeval/data/scripts/openapi_transform.py +123 -0
judgeval/data/tool.py +3 -54
judgeval/data/trace.py +31 -50
judgeval/data/trace_run.py +3 -3
judgeval/evaluation_run.py +16 -23
judgeval/integrations/langgraph.py +11 -12
judgeval/judges/litellm_judge.py +3 -6
judgeval/judges/mixture_of_judges.py +8 -25
judgeval/judges/together_judge.py +3 -6
judgeval/judgment_client.py +22 -24
judgeval/rules.py +7 -19
judgeval/run_evaluation.py +79 -242
judgeval/scorers/__init__.py +4 -20
judgeval/scorers/agent_scorer.py +21 -0
judgeval/scorers/api_scorer.py +28 -38
judgeval/scorers/base_scorer.py +98 -0
judgeval/scorers/example_scorer.py +19 -0
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -17
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -24
judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +16 -68
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +4 -12
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -17
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +18 -14
judgeval/scorers/score.py +45 -330
judgeval/scorers/utils.py +6 -88
judgeval/utils/file_utils.py +4 -6
judgeval/version_check.py +3 -2
{judgeval-0.0.52.dist-info → judgeval-0.0.54.dist-info}/METADATA +6 -5
judgeval-0.0.54.dist-info/RECORD +65 -0
judgeval/data/custom_example.py +0 -19
judgeval/scorers/judgeval_scorer.py +0 -177
judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
judgeval/scorers/prompt_scorer.py +0 -296
judgeval-0.0.52.dist-info/RECORD +0 -69
{judgeval-0.0.52.dist-info → judgeval-0.0.54.dist-info}/WHEEL +0 -0
{judgeval-0.0.52.dist-info → judgeval-0.0.54.dist-info}/licenses/LICENSE.md +0 -0

judgeval/data/scripts/openapi_transform.py ADDED Viewed

@@ -0,0 +1,123 @@
+import json
+import sys
+from typing import Any, Dict, Generator, List
+import requests
+spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"
+if spec_file.startswith("http"):
+    r = requests.get(spec_file)
+    r.raise_for_status()
+    SPEC = r.json()
+else:
+    with open(spec_file, "r") as f:
+        SPEC = json.load(f)
+JUDGEVAL_PATHS: List[str] = [
+    "/log_eval_results/",
+]
+def resolve_ref(ref: str) -> str:
+    assert ref.startswith("#/components/schemas/"), (
+        "Reference must start with #/components/schemas/"
+    )
+    return ref.replace("#/components/schemas/", "")
+def walk(obj: Any) -> Generator[Any, None, None]:
+    yield obj
+    if isinstance(obj, list):
+        for item in obj:
+            yield from walk(item)
+    elif isinstance(obj, dict):
+        for value in obj.values():
+            yield from walk(value)
+def get_referenced_schemas(obj: Any) -> Generator[str, None, None]:
+    for value in walk(obj):
+        if isinstance(value, dict) and "$ref" in value:
+            ref = value["$ref"]
+            resolved = resolve_ref(ref)
+            assert isinstance(ref, str), "Reference must be a string"
+            # Strip the _JudgmentType suffix if it exists to get the original schema name
+            if resolved.endswith("_JudgmentType"):
+                resolved = resolved[: -len("_JudgmentType")]
+            yield resolved
+def transform_schema_refs(obj: Any) -> Any:
+    """Transform all $ref values in a schema to use the _JudgmentType suffix"""
+    if isinstance(obj, dict):
+        result = {}
+        for key, value in obj.items():
+            if (
+                key == "$ref"
+                and isinstance(value, str)
+                and value.startswith("#/components/schemas/")
+            ):
+                # Update the reference to use the suffixed name
+                original_name = resolve_ref(value)
+                suffixed_name = f"{original_name}_JudgmentType"
+                result[key] = f"#/components/schemas/{suffixed_name}"
+            else:
+                result[key] = transform_schema_refs(value)
+        return result
+    elif isinstance(obj, list):
+        return [transform_schema_refs(item) for item in obj]
+    else:
+        return obj
+filtered_paths = {
+    path: spec_data
+    for path, spec_data in SPEC["paths"].items()
+    if path in JUDGEVAL_PATHS
+}
+def filter_schemas() -> Dict[str, Any]:
+    result: Dict[str, Any] = {}
+    processed_original_names: set[str] = set()
+    schemas_to_scan: Any = filtered_paths
+    while True:
+        to_commit: Dict[str, Any] = {}
+        for original_schema_name in get_referenced_schemas(schemas_to_scan):
+            if original_schema_name in processed_original_names:
+                continue
+            assert original_schema_name in SPEC["components"]["schemas"], (
+                f"Schema {original_schema_name} not found in components.schemas"
+            )
+            # Transform the schema to update any internal references
+            original_schema = SPEC["components"]["schemas"][original_schema_name]
+            transformed_schema = transform_schema_refs(original_schema)
+            suffixed_name = f"{original_schema_name}_JudgmentType"
+            to_commit[suffixed_name] = transformed_schema
+            processed_original_names.add(original_schema_name)
+        if not to_commit:
+            break
+        result.update(to_commit)
+        schemas_to_scan = to_commit
+    return result
+# Transform the filtered paths to update schema references
+transformed_paths = transform_schema_refs(filtered_paths)
+spec = {
+    "openapi": SPEC["openapi"],
+    "info": SPEC["info"],
+    "paths": transformed_paths,
+    "components": {
+        **SPEC["components"],
+        "schemas": filter_schemas(),
+    },
+}
+print(json.dumps(spec, indent=4))

judgeval/data/tool.py CHANGED Viewed

@@ -1,56 +1,5 @@
-from pydantic import BaseModel, field_validator
-from typing import Dict, Any, Optional, List
-import warnings
+from judgeval.data.judgment_types import ToolJudgmentType
-class Tool(BaseModel):
-    tool_name: str
-    parameters: Optional[Dict[str, Any]] = None
-    agent_name: Optional[str] = None
-    result_dependencies: Optional[List[Dict[str, Any]]] = None
-    action_dependencies: Optional[List[Dict[str, Any]]] = None
-    require_all: Optional[bool] = None
-    @field_validator("tool_name")
-    def validate_tool_name(cls, v):
-        if not v:
-            warnings.warn("Tool name is empty or None", UserWarning)
-        return v
-    @field_validator("parameters")
-    def validate_parameters(cls, v):
-        if v is not None and not isinstance(v, dict):
-            warnings.warn(
-                f"Parameters should be a dictionary, got {type(v)}", UserWarning
-            )
-        return v
-    @field_validator("agent_name")
-    def validate_agent_name(cls, v):
-        if v is not None and not isinstance(v, str):
-            warnings.warn(f"Agent name should be a string, got {type(v)}", UserWarning)
-        return v
-    @field_validator("result_dependencies")
-    def validate_result_dependencies(cls, v):
-        if v is not None and not isinstance(v, list):
-            warnings.warn(
-                f"Result dependencies should be a list, got {type(v)}", UserWarning
-            )
-        return v
-    @field_validator("action_dependencies")
-    def validate_action_dependencies(cls, v):
-        if v is not None and not isinstance(v, list):
-            warnings.warn(
-                f"Action dependencies should be a list, got {type(v)}", UserWarning
-            )
-        return v
-    @field_validator("require_all")
-    def validate_require_all(cls, v):
-        if v is not None and not isinstance(v, bool):
-            warnings.warn(
-                f"Require all should be a boolean, got {type(v)}", UserWarning
-            )
-        return v
+class Tool(ToolJudgmentType):
+    pass

judgeval/data/trace.py CHANGED Viewed

@@ -1,44 +1,21 @@
-from pydantic import BaseModel, Field
-from typing import Optional, Dict, Any, List
-from judgeval.evaluation_run import EvaluationRun
-from judgeval.data.tool import Tool
+from typing import Any
 import json
 import sys
+import threading
 from datetime import datetime, timezone
+from judgeval.data.judgment_types import (
+    TraceUsageJudgmentType,
+    TraceSpanJudgmentType,
+    TraceJudgmentType,
+)
+from pydantic import BaseModel
-class TraceUsage(BaseModel):
-    prompt_tokens: Optional[int] = None
-    completion_tokens: Optional[int] = None
-    total_tokens: Optional[int] = None
-    prompt_tokens_cost_usd: Optional[float] = None
-    completion_tokens_cost_usd: Optional[float] = None
-    total_cost_usd: Optional[float] = None
-    model_name: Optional[str] = None
-class TraceSpan(BaseModel):
-    span_id: str
-    trace_id: str
-    function: str
-    depth: int
-    created_at: Optional[Any] = None
-    parent_span_id: Optional[str] = None
-    span_type: Optional[str] = "span"
-    inputs: Optional[Dict[str, Any]] = None
-    error: Optional[Dict[str, Any]] = None
-    output: Optional[Any] = None
-    usage: Optional[TraceUsage] = None
-    duration: Optional[float] = None
-    annotation: Optional[List[Dict[str, Any]]] = None
-    evaluation_runs: Optional[List[EvaluationRun]] = []
-    expected_tools: Optional[List[Tool]] = None
-    additional_metadata: Optional[Dict[str, Any]] = None
-    has_evaluation: Optional[bool] = False
-    agent_name: Optional[str] = None
-    state_before: Optional[Dict[str, Any]] = None
-    state_after: Optional[Dict[str, Any]] = None
+class TraceUsage(TraceUsageJudgmentType):
+    pass
+class TraceSpan(TraceSpanJudgmentType):
     def model_dump(self, **kwargs):
         return {
             "span_id": self.span_id,
@@ -50,9 +27,6 @@ class TraceSpan(BaseModel):
             "inputs": self._serialize_value(self.inputs),
             "output": self._serialize_value(self.output),
             "error": self._serialize_value(self.error),
-            "evaluation_runs": [run.model_dump() for run in self.evaluation_runs]
-            if self.evaluation_runs
-            else [],
             "parent_span_id": self.parent_span_id,
             "function": self.function,
             "duration": self.duration,
@@ -63,8 +37,24 @@ class TraceSpan(BaseModel):
             "state_before": self.state_before,
             "state_after": self.state_after,
             "additional_metadata": self._serialize_value(self.additional_metadata),
+            "update_id": self.update_id,
         }
+    def __init__(self, **data):
+        super().__init__(**data)
+        # Initialize thread lock for thread-safe update_id increment
+        self._update_id_lock = threading.Lock()
+    def increment_update_id(self) -> int:
+        """
+        Thread-safe method to increment the update_id counter.
+        Returns:
+            int: The new update_id value after incrementing
+        """
+        with self._update_id_lock:
+            self.update_id += 1
+            return self.update_id
     def print_span(self):
         """Print the span with proper formatting and parent relationship information."""
         indent = "  " * self.depth
@@ -94,6 +84,7 @@ class TraceSpan(BaseModel):
             return repr(output)
         except (TypeError, OverflowError, ValueError):
             pass
         return None
     def _serialize_value(self, value: Any) -> Any:
@@ -140,15 +131,5 @@ class TraceSpan(BaseModel):
             return {"error": "Unable to serialize"}
-class Trace(BaseModel):
-    trace_id: str
-    name: str
-    created_at: str
-    duration: float
-    trace_spans: List[TraceSpan]
-    overwrite: bool = False
-    offline_mode: bool = False
-    rules: Dict[str, Any] = Field(default_factory=dict)
-    has_notification: Optional[bool] = False
-    customer_id: Optional[str] = None
-    tags: List[str] = Field(default_factory=list)
+class Trace(TraceJudgmentType):
+    pass

judgeval/data/trace_run.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from pydantic import BaseModel
 from typing import List, Optional, Dict, Any, Union
 from judgeval.data import Trace
-from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
+from judgeval.scorers import APIScorerConfig, BaseScorer
 from judgeval.rules import Rule
@@ -13,7 +13,7 @@ class TraceRun(BaseModel):
         project_name (str): The name of the project the evaluation results belong to
         eval_name (str): A name for this evaluation run
         traces (List[Trace]): The traces to evaluate
-        scorers (List[Union[JudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
+        scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
         model (str): The model used as a judge when using LLM as a Judge
         metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
         judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
@@ -26,7 +26,7 @@ class TraceRun(BaseModel):
     project_name: Optional[str] = None
     eval_name: Optional[str] = None
     traces: Optional[List[Trace]] = None
-    scorers: List[Union[APIJudgmentScorer, JudgevalScorer]]
+    scorers: List[Union[APIScorerConfig, BaseScorer]]
     model: Optional[str] = "gpt-4.1"
     trace_span_id: Optional[str] = None
     append: Optional[bool] = False

judgeval/evaluation_run.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from typing import List, Optional, Union
 from pydantic import BaseModel, field_validator, Field
-from judgeval.data import Example, CustomExample
-from judgeval.scorers import JudgevalScorer, APIJudgmentScorer
+from judgeval.data import Example
+from judgeval.scorers import BaseScorer, APIScorerConfig
 from judgeval.constants import ACCEPTABLE_MODELS
@@ -13,8 +13,8 @@ class EvaluationRun(BaseModel):
     Args:
         project_name (str): The name of the project the evaluation results belong to
         eval_name (str): A name for this evaluation run
-        examples (Union[List[Example], List[CustomExample]]): The examples to evaluate
-        scorers (List[Union[JudgmentScorer, JudgevalScorer]]): A list of scorers to use for evaluation
+        examples (List[Example]): The examples to evaluate
+        scorers (List[Union[JudgmentScorer, BaseScorer]]): A list of scorers to use for evaluation
         model (str): The model used as a judge when using LLM as a Judge
         metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
         judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
@@ -23,8 +23,8 @@ class EvaluationRun(BaseModel):
     organization_id: Optional[str] = None
     project_name: Optional[str] = Field(default=None, validate_default=True)
     eval_name: Optional[str] = Field(default=None, validate_default=True)
-    examples: Union[List[Example], List[CustomExample]]
-    scorers: List[Union[APIJudgmentScorer, JudgevalScorer]]
+    examples: List[Example]
+    scorers: List[Union[APIScorerConfig, BaseScorer]]
     model: Optional[str] = "gpt-4.1"
     trace_span_id: Optional[str] = None
     # API Key will be "" until user calls client.run_eval(), then API Key will be set
@@ -36,13 +36,8 @@ class EvaluationRun(BaseModel):
         data = super().model_dump(**kwargs)
         data["scorers"] = [
-            scorer.to_dict()
-            if hasattr(scorer, "to_dict")
-            else scorer.model_dump()
-            if hasattr(scorer, "model_dump")
-            else {"score_type": scorer.score_type, "threshold": scorer.threshold}
-            for scorer in self.scorers
-        ]
+            scorer.model_dump() for scorer in self.scorers
+        ]  # Pydantic has problems with properly calling model_dump() on the scorers, so we need to do it manually
         return data
@@ -50,21 +45,19 @@ class EvaluationRun(BaseModel):
     def validate_examples(cls, v):
         if not v:
             raise ValueError("Examples cannot be empty.")
-        first_type = type(v[0])
-        if first_type not in (Example, CustomExample):
-            raise ValueError(f"Invalid type for Example/CustomExample: {first_type}")
-        if not all(isinstance(ex, first_type) for ex in v):
-            raise ValueError(
-                "All examples must be of the same type, either all Example or all CustomExample."
-            )
         return v
-    @field_validator("scorers")
+    @field_validator("scorers", mode="before")
     def validate_scorers(cls, v):
         if not v:
             raise ValueError("Scorers cannot be empty.")
+        if not all(
+            isinstance(scorer, BaseScorer) or isinstance(scorer, APIScorerConfig)
+            for scorer in v
+        ):
+            raise ValueError(
+                "All scorers must be of type BaseScorer or APIScorerConfig."
+            )
         return v
     @field_validator("model")

judgeval/integrations/langgraph.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional, Sequence
 from uuid import UUID
 import time
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from judgeval.common.tracer import (
     TraceClient,
@@ -120,8 +120,6 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                 trace_id,
                 event_name,
                 project_name=project,
-                overwrite=False,
-                rules=self.tracer.rules,
                 enable_monitoring=self.tracer.enable_monitoring,
                 enable_evaluations=self.tracer.enable_evaluations,
             )
@@ -140,7 +138,6 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                 # NEW: Initial save for live tracking (follows the new practice)
                 try:
                     trace_id_saved, server_response = self._trace_client.save(
-                        overwrite=self._trace_client.overwrite,
                         final_save=False,  # Initial save for live tracking
                     )
                 except Exception as e:
@@ -210,6 +207,7 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
             # Set both fields on the span
             new_span.inputs = clean_inputs
             new_span.additional_metadata = metadata
+            new_span.increment_update_id()  # Thread-safe increment for span modification
         else:
             new_span.inputs = {}
             new_span.additional_metadata = {}
@@ -249,10 +247,12 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
             trace_span = trace_client.span_id_to_span.get(span_id)
             if trace_span:
                 trace_span.duration = duration
+                trace_span.increment_update_id()  # Thread-safe increment for span modification
                 # Handle outputs and error
                 if error:
                     trace_span.output = error
+                    trace_span.increment_update_id()  # Thread-safe increment for span modification
                 elif outputs:
                     # Separate metadata from outputs
                     metadata = {}
@@ -272,6 +272,7 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                     # Set both fields on the span
                     trace_span.output = clean_outputs
+                    trace_span.increment_update_id()  # Thread-safe increment for span modification
                     if metadata:
                         # Merge with existing metadata
                         existing_metadata = trace_span.additional_metadata or {}
@@ -279,6 +280,7 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                             **existing_metadata,
                             **metadata,
                         }
+                        trace_span.increment_update_id()  # Thread-safe increment for span modification
                 # Queue span with completed state through background service
                 if trace_client.background_span_service:
@@ -308,20 +310,18 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                     complete_trace_data = {
                         "trace_id": self._trace_client.trace_id,
                         "name": self._trace_client.name,
-                        "created_at": datetime.utcfromtimestamp(
-                            self._trace_client.start_time
+                        "created_at": datetime.fromtimestamp(
+                            self._trace_client.start_time, timezone.utc
                         ).isoformat(),
                         "duration": self._trace_client.get_duration(),
                         "trace_spans": [
                             span.model_dump() for span in self._trace_client.trace_spans
                         ],
-                        "overwrite": self._trace_client.overwrite,
                         "offline_mode": self.tracer.offline_mode,
                         "parent_trace_id": self._trace_client.parent_trace_id,
                         "parent_name": self._trace_client.parent_name,
                     }
                     trace_id, trace_data = self._trace_client.save(
-                        overwrite=self._trace_client.overwrite,
                         final_save=True,  # Final save with usage counter updates
                     )
                     token = self.trace_id_to_token.pop(trace_id, None)
@@ -518,20 +518,18 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                 complete_trace_data = {
                     "trace_id": trace_client.trace_id,
                     "name": trace_client.name,
-                    "created_at": datetime.utcfromtimestamp(
-                        trace_client.start_time
+                    "created_at": datetime.fromtimestamp(
+                        trace_client.start_time, timezone.utc
                     ).isoformat(),
                     "duration": trace_client.get_duration(),
                     "trace_spans": [
                         span.model_dump() for span in trace_client.trace_spans
                     ],
-                    "overwrite": trace_client.overwrite,
                     "offline_mode": self.tracer.offline_mode,
                     "parent_trace_id": trace_client.parent_trace_id,
                     "parent_name": trace_client.parent_name,
                 }
                 trace_id_saved, trace_data = trace_client.save(
-                    overwrite=trace_client.overwrite,
                     final_save=True,
                 )
@@ -815,6 +813,7 @@ class JudgevalCallbackHandler(BaseCallbackHandler):
                 if span_id and span_id in trace_client.span_id_to_span:
                     trace_span = trace_client.span_id_to_span[span_id]
                     trace_span.usage = usage
+                    trace_span.increment_update_id()  # Thread-safe increment for span modification
         self._end_span_tracking(trace_client, run_id, outputs=outputs)
         # --- End Token Usage ---

judgeval/judges/litellm_judge.py CHANGED Viewed

@@ -6,7 +6,7 @@ from judgeval.common.utils import (
     afetch_litellm_api_response,
     fetch_litellm_api_response,
 )
-from judgeval.common.logger import debug, error
+from judgeval.common.logger import judgeval_logger
 BASE_CONVERSATION = [
     {"role": "system", "content": "You are a helpful assistant."},
@@ -15,7 +15,6 @@ BASE_CONVERSATION = [
 class LiteLLMJudge(JudgevalJudge):
     def __init__(self, model: str = "gpt-4.1-mini", **kwargs):
-        debug(f"Initializing LiteLLMJudge with model={model}")
         self.model = model
         self.kwargs = kwargs
         super().__init__(model_name=model)
@@ -25,7 +24,6 @@ class LiteLLMJudge(JudgevalJudge):
         input: Union[str, List[Mapping[str, str]]],
         schema: pydantic.BaseModel = None,
     ) -> str:
-        debug(f"Generating response for input type: {type(input)}")
         if isinstance(input, str):
             convo = BASE_CONVERSATION + [{"role": "user", "content": input}]
             return fetch_litellm_api_response(
@@ -36,7 +34,7 @@ class LiteLLMJudge(JudgevalJudge):
                 model=self.model, messages=input, response_format=schema
             )
         else:
-            error(f"Invalid input type received: {type(input)}")
+            judgeval_logger.error(f"Invalid input type received: {type(input)}")
             raise TypeError(
                 f"Input must be a string or a list of dictionaries. Input type of: {type(input)}"
             )
@@ -46,7 +44,6 @@ class LiteLLMJudge(JudgevalJudge):
         input: Union[str, List[Mapping[str, str]]],
         schema: pydantic.BaseModel = None,
     ) -> str:
-        debug(f"Async generating response for input type: {type(input)}")
         if isinstance(input, str):
             convo = BASE_CONVERSATION + [{"role": "user", "content": input}]
             response = await afetch_litellm_api_response(
@@ -59,7 +56,7 @@ class LiteLLMJudge(JudgevalJudge):
             )
             return response
         else:
-            error(f"Invalid input type received: {type(input)}")
+            judgeval_logger.error(f"Invalid input type received: {type(input)}")
             raise TypeError(
                 f"Input must be a string or a list of dictionaries. Input type of: {type(input)}"
             )

judgeval 0.0.52__py3-none-any.whl → 0.0.54__py3-none-any.whl

judgeval 0.0.52py3-none-any.whl → 0.0.54py3-none-any.whl