PyPI - judgeval - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

judgeval 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

judgeval/__init__.py +139 -12
judgeval/api/__init__.py +501 -0
judgeval/api/api_types.py +344 -0
judgeval/cli.py +2 -4
judgeval/constants.py +10 -26
judgeval/data/evaluation_run.py +49 -26
judgeval/data/example.py +2 -2
judgeval/data/judgment_types.py +266 -82
judgeval/data/result.py +4 -5
judgeval/data/scorer_data.py +4 -2
judgeval/data/tool.py +2 -2
judgeval/data/trace.py +7 -50
judgeval/data/trace_run.py +7 -4
judgeval/{dataset.py → dataset/__init__.py} +43 -28
judgeval/env.py +67 -0
judgeval/{run_evaluation.py → evaluation/__init__.py} +29 -95
judgeval/exceptions.py +27 -0
judgeval/integrations/langgraph/__init__.py +788 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +75 -15
judgeval/judges/together_judge.py +86 -18
judgeval/judges/utils.py +7 -21
judgeval/{common/logger.py → logger.py} +8 -6
judgeval/scorers/__init__.py +0 -4
judgeval/scorers/agent_scorer.py +3 -7
judgeval/scorers/api_scorer.py +8 -13
judgeval/scorers/base_scorer.py +52 -32
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
judgeval/scorers/score.py +21 -31
judgeval/scorers/trace_api_scorer.py +5 -0
judgeval/scorers/utils.py +1 -103
judgeval/tracer/__init__.py +1075 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +37 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +43 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +67 -0
judgeval/tracer/llm/__init__.py +1233 -0
judgeval/{common/tracer → tracer/llm}/providers.py +5 -10
judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} +15 -10
judgeval/tracer/managers.py +188 -0
judgeval/tracer/processors/__init__.py +181 -0
judgeval/tracer/utils.py +20 -0
judgeval/trainer/__init__.py +5 -0
judgeval/{common/trainer → trainer}/config.py +12 -9
judgeval/{common/trainer → trainer}/console.py +2 -9
judgeval/{common/trainer → trainer}/trainable_model.py +12 -7
judgeval/{common/trainer → trainer}/trainer.py +119 -17
judgeval/utils/async_utils.py +2 -3
judgeval/utils/decorators.py +24 -0
judgeval/utils/file_utils.py +37 -4
judgeval/utils/guards.py +32 -0
judgeval/utils/meta.py +14 -0
judgeval/{common/api/json_encoder.py → utils/serialize.py} +7 -1
judgeval/utils/testing.py +88 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +3 -3
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/METADATA +12 -14
judgeval-0.9.0.dist-info/RECORD +80 -0
judgeval/clients.py +0 -35
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -375
judgeval/common/api/constants.py +0 -186
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -97
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -2427
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -188
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -207
judgeval/common/tracer/trace_manager.py +0 -101
judgeval/common/trainer/__init__.py +0 -5
judgeval/common/utils.py +0 -948
judgeval/integrations/langgraph.py +0 -844
judgeval/judges/mixture_of_judges.py +0 -287
judgeval/judgment_client.py +0 -267
judgeval/rules.py +0 -521
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.8.0.dist-info/RECORD +0 -82
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/WHEEL +0 -0
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.8.0.dist-info → judgeval-0.9.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/{common/tracer → tracer/llm}/providers.py RENAMED Viewed

@@ -1,12 +1,7 @@
 from __future__ import annotations
-import logging
 from typing import Any, TypeAlias
-logger = logging.getLogger(__name__)
-# TODO: Have functions that assert and return the relevant exports when the client is installed.
-# The method should raise if the user tries to access client information that doesnt exist.
 HAS_OPENAI = False
 openai_OpenAI = None
 openai_AsyncOpenAI = None
@@ -35,7 +30,7 @@ together_Together = None
 together_AsyncTogether = None
 try:
-    from together import Together, AsyncTogether
+    from together import Together, AsyncTogether  # type: ignore[import-untyped]
     together_Together = Together
     together_AsyncTogether = AsyncTogether
@@ -49,7 +44,7 @@ anthropic_Anthropic = None
 anthropic_AsyncAnthropic = None
 try:
-    from anthropic import Anthropic, AsyncAnthropic
+    from anthropic import Anthropic, AsyncAnthropic  # type: ignore[import-untyped]
     anthropic_Anthropic = Anthropic
     anthropic_AsyncAnthropic = AsyncAnthropic
@@ -63,8 +58,8 @@ google_genai_Client = None
 google_genai_cleint_AsyncClient = None
 try:
-    from google.genai import Client
-    from google.genai.client import AsyncClient
+    from google.genai import Client  # type: ignore[import-untyped]
+    from google.genai.client import AsyncClient  # type: ignore[import-untyped]
     google_genai_Client = Client
     google_genai_AsyncClient = AsyncClient
@@ -78,7 +73,7 @@ groq_Groq = None
 groq_AsyncGroq = None
 try:
-    from groq import Groq, AsyncGroq
+    from groq import Groq, AsyncGroq  # type: ignore[import-untyped]
     groq_Groq = Groq
     groq_AsyncGroq = AsyncGroq

judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} RENAMED Viewed

@@ -10,12 +10,14 @@ import threading
 from typing import Callable, List, Optional
 import time
-from judgeval.common.logger import judgeval_logger
-from judgeval.constants import MAX_CONCURRENT_EVALUATIONS
+from judgeval.logger import judgeval_logger
+from judgeval.env import JUDGMENT_MAX_CONCURRENT_EVALUATIONS
 from judgeval.data import ScoringResult
 from judgeval.data.evaluation_run import EvaluationRun
 from judgeval.utils.async_utils import safe_run_async
 from judgeval.scorers.score import a_execute_scoring
+from judgeval.api import JudgmentSyncClient
+from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
 class LocalEvaluationQueue:
@@ -26,7 +28,9 @@ class LocalEvaluationQueue:
     """
     def __init__(
-        self, max_concurrent: int = MAX_CONCURRENT_EVALUATIONS, num_workers: int = 4
+        self,
+        max_concurrent: int = JUDGMENT_MAX_CONCURRENT_EVALUATIONS,
+        num_workers: int = 4,
     ):
         if num_workers <= 0:
             raise ValueError("num_workers must be a positive integer.")
@@ -35,6 +39,10 @@ class LocalEvaluationQueue:
         self._num_workers = num_workers  # Number of worker threads
         self._worker_threads: List[threading.Thread] = []
         self._shutdown_event = threading.Event()
+        self._api_client = JudgmentSyncClient(
+            api_key=JUDGMENT_API_KEY,
+            organization_id=JUDGMENT_ORG_ID,
+        )
     def enqueue(self, evaluation_run: EvaluationRun) -> None:
         """Add evaluation run to the queue."""
@@ -81,13 +89,8 @@ class LocalEvaluationQueue:
     def start_workers(
         self,
-        callback: Optional[Callable[[EvaluationRun, List[ScoringResult]], None]] = None,
     ) -> List[threading.Thread]:
         """Start multiple background threads to process runs in parallel.
-        Args:
-            callback: Optional function called after each run with (run, results).
         Returns:
             List of started worker threads.
         """
@@ -105,8 +108,10 @@ class LocalEvaluationQueue:
                     try:
                         results = self._process_run(run)
-                        if callback:
-                            callback(run, results)
+                        results_dict = [result.model_dump() for result in results]
+                        self._api_client.log_eval_results(
+                            payload={"results": results_dict, "run": run.model_dump()}
+                        )
                     except Exception as exc:
                         judgeval_logger.error(
                             f"Worker {worker_id} error processing {run.eval_name}: {exc}"

judgeval/tracer/managers.py ADDED Viewed

@@ -0,0 +1,188 @@
+from __future__ import annotations
+from contextlib import asynccontextmanager, contextmanager
+from typing import TYPE_CHECKING, Dict, Optional, List, Any
+from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
+import uuid
+from judgeval.exceptions import JudgmentRuntimeError
+from judgeval.tracer.utils import set_span_attribute
+if TYPE_CHECKING:
+    from judgeval.tracer import Tracer
+@contextmanager
+def sync_span_context(
+    tracer: Tracer,
+    name: str,
+    span_attributes: Optional[Dict[str, str]] = None,
+    disable_partial_emit: bool = False,
+):
+    if span_attributes is None:
+        span_attributes = {}
+    current_cost_context = tracer.get_current_cost_context()
+    cost_context = {"cumulative_cost": 0.0}
+    cost_token = current_cost_context.set(cost_context)
+    try:
+        with tracer.get_tracer().start_as_current_span(
+            name=name,
+            attributes=span_attributes,
+        ) as span:
+            set_span_attribute(span, AttributeKeys.JUDGMENT_CUMULATIVE_LLM_COST, 0.0)
+            if disable_partial_emit:
+                tracer.judgment_processor.set_internal_attribute(
+                    span_context=span.get_span_context(),
+                    key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
+                    value=True,
+                )
+            yield span
+    finally:
+        current_cost_context.reset(cost_token)
+        child_cost = float(cost_context.get("cumulative_cost", 0.0))
+        tracer.add_cost_to_current_context(child_cost)
+@asynccontextmanager
+async def async_span_context(
+    tracer: Tracer,
+    name: str,
+    span_attributes: Optional[Dict[str, str]] = None,
+    disable_partial_emit: bool = False,
+):
+    if span_attributes is None:
+        span_attributes = {}
+    current_cost_context = tracer.get_current_cost_context()
+    cost_context = {"cumulative_cost": 0.0}
+    cost_token = current_cost_context.set(cost_context)
+    try:
+        with tracer.get_tracer().start_as_current_span(
+            name=name,
+            attributes=span_attributes,
+        ) as span:
+            set_span_attribute(span, AttributeKeys.JUDGMENT_CUMULATIVE_LLM_COST, 0.0)
+            if disable_partial_emit:
+                tracer.judgment_processor.set_internal_attribute(
+                    span_context=span.get_span_context(),
+                    key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
+                    value=True,
+                )
+            yield span
+    finally:
+        current_cost_context.reset(cost_token)
+        child_cost = float(cost_context.get("cumulative_cost", 0.0))
+        tracer.add_cost_to_current_context(child_cost)
+def create_agent_context(
+    tracer: Tracer,
+    args: tuple,
+    class_name: Optional[str] = None,
+    identifier: Optional[str] = None,
+    track_state: bool = False,
+    track_attributes: Optional[List[str]] = None,
+    field_mappings: Optional[Dict[str, str]] = None,
+):
+    """Create agent context and return token for cleanup"""
+    agent_id = str(uuid.uuid4())
+    agent_context: Dict[str, Any] = {"agent_id": agent_id}
+    if class_name:
+        agent_context["class_name"] = class_name
+    else:
+        agent_context["class_name"] = None
+    agent_context["track_state"] = track_state
+    agent_context["track_attributes"] = track_attributes or []
+    agent_context["field_mappings"] = field_mappings or {}
+    instance = args[0] if args else None
+    agent_context["instance"] = instance
+    if identifier:
+        if not class_name or not instance or not isinstance(instance, object):
+            raise JudgmentRuntimeError(
+                "'identifier' is set but no class name or instance is available. 'identifier' can only be specified when using the agent() decorator on a class method."
+            )
+        if (
+            instance
+            and hasattr(instance, identifier)
+            and not callable(getattr(instance, identifier))
+        ):
+            instance_name = str(getattr(instance, identifier))
+            agent_context["instance_name"] = instance_name
+        else:
+            raise JudgmentRuntimeError(
+                f"Attribute {identifier} does not exist for {class_name}. Check your agent() decorator."
+            )
+    else:
+        agent_context["instance_name"] = None
+    current_agent_context = tracer.get_current_agent_context().get()
+    if current_agent_context and "agent_id" in current_agent_context:
+        agent_context["parent_agent_id"] = current_agent_context["agent_id"]
+    else:
+        agent_context["parent_agent_id"] = None
+    agent_context["is_agent_entry_point"] = True
+    token = tracer.get_current_agent_context().set(agent_context)  # type: ignore
+    return token
+@contextmanager
+def sync_agent_context(
+    tracer: Tracer,
+    args: tuple,
+    class_name: Optional[str] = None,
+    identifier: Optional[str] = None,
+    track_state: bool = False,
+    track_attributes: Optional[List[str]] = None,
+    field_mappings: Optional[Dict[str, str]] = None,
+):
+    """Context manager for synchronous agent context"""
+    token = create_agent_context(
+        tracer=tracer,
+        args=args,
+        class_name=class_name,
+        identifier=identifier,
+        track_state=track_state,
+        track_attributes=track_attributes,
+        field_mappings=field_mappings,
+    )
+    try:
+        yield
+    finally:
+        tracer.get_current_agent_context().reset(token)
+@asynccontextmanager
+async def async_agent_context(
+    tracer: Tracer,
+    args: tuple,
+    class_name: Optional[str] = None,
+    identifier: Optional[str] = None,
+    track_state: bool = False,
+    track_attributes: Optional[List[str]] = None,
+    field_mappings: Optional[Dict[str, str]] = None,
+):
+    """Context manager for asynchronous agent context"""
+    token = create_agent_context(
+        tracer=tracer,
+        args=args,
+        class_name=class_name,
+        identifier=identifier,
+        track_state=track_state,
+        track_attributes=track_attributes,
+        field_mappings=field_mappings,
+    )
+    try:
+        yield
+    finally:
+        tracer.get_current_agent_context().reset(token)

judgeval/tracer/processors/__init__.py ADDED Viewed

@@ -0,0 +1,181 @@
+from __future__ import annotations
+from typing import Optional, TYPE_CHECKING, Any
+from collections import defaultdict
+from opentelemetry.context import Context
+from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor, SpanContext
+from opentelemetry.sdk.trace.export import (
+    BatchSpanProcessor,
+)
+from judgeval.tracer.exporters import JudgmentSpanExporter
+from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
+if TYPE_CHECKING:
+    from judgeval.tracer import Tracer
+class NoOpSpanProcessor(SpanProcessor):
+    def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
+        pass
+    def on_end(self, span: ReadableSpan) -> None:
+        pass
+    def shutdown(self) -> None:
+        pass
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True
+class JudgmentSpanProcessor(BatchSpanProcessor):
+    def __init__(
+        self,
+        tracer: Tracer,
+        endpoint: str,
+        api_key: str,
+        organization_id: str,
+        /,
+        *,
+        max_queue_size: int = 2**18,
+        export_timeout_millis: int = 30000,
+    ):
+        self.tracer = tracer
+        super().__init__(
+            JudgmentSpanExporter(
+                endpoint=endpoint,
+                api_key=api_key,
+                organization_id=organization_id,
+            ),
+            max_queue_size=max_queue_size,
+            export_timeout_millis=export_timeout_millis,
+        )
+        self._internal_attributes: defaultdict[tuple[int, int], dict[str, Any]] = (
+            defaultdict(dict)
+        )
+    def _get_span_key(self, span_context: SpanContext) -> tuple[int, int]:
+        return (span_context.trace_id, span_context.span_id)
+    def set_internal_attribute(
+        self, span_context: SpanContext, key: str, value: Any
+    ) -> None:
+        span_key = self._get_span_key(span_context)
+        self._internal_attributes[span_key][key] = value
+    def get_internal_attribute(
+        self, span_context: SpanContext, key: str, default: Any = None
+    ) -> Any:
+        span_key = self._get_span_key(span_context)
+        return self._internal_attributes[span_key].get(key, default)
+    def increment_update_id(self, span_context: SpanContext) -> int:
+        current_id = self.get_internal_attribute(
+            span_context=span_context, key=AttributeKeys.JUDGMENT_UPDATE_ID, default=0
+        )
+        new_id = current_id + 1
+        self.set_internal_attribute(
+            span_context=span_context,
+            key=AttributeKeys.JUDGMENT_UPDATE_ID,
+            value=new_id,
+        )
+        return current_id
+    def _cleanup_span_state(self, span_key: tuple[int, int]) -> None:
+        self._internal_attributes.pop(span_key, None)
+    def emit_partial(self) -> None:
+        current_span = self.tracer.get_current_span()
+        if not current_span or not current_span.is_recording():
+            return
+        if not isinstance(current_span, ReadableSpan):
+            return
+        span_context = current_span.get_span_context()
+        if self.get_internal_attribute(
+            span_context=span_context,
+            key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
+            default=False,
+        ):
+            return
+        current_update_id = self.increment_update_id(span_context=span_context)
+        attributes = dict(current_span.attributes or {})
+        attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = current_update_id
+        partial_span = ReadableSpan(
+            name=current_span.name,
+            context=span_context,
+            parent=current_span.parent,
+            resource=current_span.resource,
+            attributes=attributes,
+            events=current_span.events,
+            links=current_span.links,
+            status=current_span.status,
+            kind=current_span.kind,
+            start_time=current_span.start_time,
+            end_time=None,
+            instrumentation_scope=current_span.instrumentation_scope,
+        )
+        super().on_end(partial_span)
+    def on_end(self, span: ReadableSpan) -> None:
+        if not span.context:
+            super().on_end(span)
+            return
+        span_key = self._get_span_key(span.context)
+        if self.get_internal_attribute(
+            span.context, InternalAttributeKeys.CANCELLED, False
+        ):
+            self._cleanup_span_state(span_key)
+            return
+        if span.end_time is not None:
+            attributes = dict(span.attributes or {})
+            attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = 20
+            final_span = ReadableSpan(
+                name=span.name,
+                context=span.context,
+                parent=span.parent,
+                resource=span.resource,
+                attributes=attributes,
+                events=span.events,
+                links=span.links,
+                status=span.status,
+                kind=span.kind,
+                start_time=span.start_time,
+                end_time=span.end_time,
+                instrumentation_scope=span.instrumentation_scope,
+            )
+            self._cleanup_span_state(span_key)
+            super().on_end(final_span)
+        else:
+            super().on_end(span)
+class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
+    def __init__(self):
+        super().__init__(None, "", "", "")  # type: ignore[arg-type]
+    def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
+        pass
+    def on_end(self, span: ReadableSpan) -> None:
+        pass
+    def shutdown(self) -> None:
+        pass
+    def force_flush(self, timeout_millis: int | None = 30000) -> bool:
+        return True
+    def emit_partial(self) -> None:
+        pass
+__all__ = ("NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor")

judgeval/tracer/utils.py ADDED Viewed

@@ -0,0 +1,20 @@
+from typing import Any
+from opentelemetry.trace import Span
+from pydantic import BaseModel
+from typing import Callable, Optional
+from judgeval.scorers.trace_api_scorer import TraceAPIScorerConfig
+from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
+def set_span_attribute(span: Span, name: str, value: Any):
+    if value is None or value == "":
+        return
+    span.set_attribute(name, value)
+class TraceScorerConfig(BaseModel):
+    scorer: TraceAPIScorerConfig
+    model: str = JUDGMENT_DEFAULT_GPT_MODEL
+    sampling_rate: float = 1.0
+    run_condition: Optional[Callable[..., bool]] = None

judgeval/trainer/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from judgeval.trainer.trainer import JudgmentTrainer
+from judgeval.trainer.config import TrainerConfig, ModelConfig
+from judgeval.trainer.trainable_model import TrainableModel
+__all__ = ["JudgmentTrainer", "TrainerConfig", "ModelConfig", "TrainableModel"]

judgeval/{common/trainer → trainer}/config.py RENAMED Viewed

@@ -1,7 +1,12 @@
+from __future__ import annotations
 from dataclasses import dataclass
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, TYPE_CHECKING
 import json
+if TYPE_CHECKING:
+    from fireworks.llm.llm_reinforcement_step import ReinforcementAcceleratorTypeLiteral
 @dataclass
 class TrainerConfig:
@@ -13,15 +18,13 @@ class TrainerConfig:
     base_model_name: str = "qwen2p5-7b-instruct"
     rft_provider: str = "fireworks"
     num_steps: int = 5
-    num_generations_per_prompt: int = (
-        4  # Number of rollouts/generations per input prompt
-    )
-    num_prompts_per_step: int = 4  # Number of input prompts to sample per training step
+    num_generations_per_prompt: int = 4
+    num_prompts_per_step: int = 4
     concurrency: int = 100
     epochs: int = 1
     learning_rate: float = 1e-5
     accelerator_count: int = 1
-    accelerator_type: str = "NVIDIA_A100_80GB"
+    accelerator_type: ReinforcementAcceleratorTypeLiteral = "NVIDIA_A100_80GB"
     temperature: float = 1.5
     max_tokens: int = 50
     enable_addons: bool = True
@@ -87,7 +90,7 @@ class ModelConfig:
         }
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "ModelConfig":
+    def from_dict(cls, data: Dict[str, Any]) -> ModelConfig:
         """Create ModelConfig from dictionary."""
         return cls(
             base_model_name=data.get("base_model_name", "qwen2p5-7b-instruct"),
@@ -107,7 +110,7 @@ class ModelConfig:
         return json.dumps(self.to_dict(), indent=2)
     @classmethod
-    def from_json(cls, json_str: str) -> "ModelConfig":
+    def from_json(cls, json_str: str) -> ModelConfig:
         """Create ModelConfig from JSON string."""
         data = json.loads(json_str)
         return cls.from_dict(data)
@@ -118,7 +121,7 @@ class ModelConfig:
             f.write(self.to_json())
     @classmethod
-    def load_from_file(cls, filepath: str) -> "ModelConfig":
+    def load_from_file(cls, filepath: str) -> ModelConfig:
         """Load ModelConfig from a JSON file."""
         with open(filepath, "r") as f:
             json_str = f.read()

judgeval/{common/trainer → trainer}/console.py RENAMED Viewed

@@ -2,9 +2,10 @@ from contextlib import contextmanager
 from typing import Optional
 import sys
 import os
+from judgeval.utils.decorators import use_once
-# Detect if we're running in a Jupyter environment
+@use_once
 def _is_jupyter_environment():
     """Check if we're running in a Jupyter notebook or similar environment."""
     try:
@@ -22,28 +23,23 @@ def _is_jupyter_environment():
         return False
-# Check environment once at import time
 IS_JUPYTER = _is_jupyter_environment()
 if not IS_JUPYTER:
-    # Safe to use Rich in non-Jupyter environments
     try:
         from rich.console import Console
         from rich.spinner import Spinner
         from rich.live import Live
         from rich.text import Text
-        # Shared console instance for the trainer module to avoid conflicts
         shared_console = Console()
         RICH_AVAILABLE = True
     except ImportError:
         RICH_AVAILABLE = False
 else:
-    # In Jupyter, avoid Rich to prevent recursion issues
     RICH_AVAILABLE = False
-# Fallback implementations for when Rich is not available or safe
 class SimpleSpinner:
     def __init__(self, name, text):
         self.text = text
@@ -69,7 +65,6 @@ def safe_print(message, style=None):
     if RICH_AVAILABLE and not IS_JUPYTER:
         shared_console.print(message, style=style)
     else:
-        # Use simple print with emoji indicators for different styles
         if style == "green":
             print(f"✅ {message}")
         elif style == "yellow":
@@ -97,7 +92,6 @@ def _spinner_progress(
         with Live(spinner, console=shared_console, refresh_per_second=10):
             yield
     else:
-        # Fallback for Jupyter or when Rich is not available
         print(f"🔄 {full_message}")
         try:
             yield
@@ -120,7 +114,6 @@ def _model_spinner_progress(message: str):
             yield update_progress
     else:
-        # Fallback for Jupyter or when Rich is not available
         print(f"🔵 [Model] {message}")
         def update_progress(progress_message: str):

judgeval 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

judgeval 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl