PyPI - judgeval - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

judgeval 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

judgeval/cli.py +65 -0
judgeval/common/api/api.py +44 -38
judgeval/common/api/constants.py +18 -5
judgeval/common/api/json_encoder.py +8 -9
judgeval/common/tracer/core.py +448 -256
judgeval/common/tracer/otel_span_processor.py +1 -1
judgeval/common/tracer/span_processor.py +1 -1
judgeval/common/tracer/span_transformer.py +2 -1
judgeval/common/tracer/trace_manager.py +6 -1
judgeval/common/trainer/__init__.py +5 -0
judgeval/common/trainer/config.py +125 -0
judgeval/common/trainer/console.py +151 -0
judgeval/common/trainer/trainable_model.py +238 -0
judgeval/common/trainer/trainer.py +301 -0
judgeval/data/evaluation_run.py +104 -0
judgeval/data/judgment_types.py +37 -8
judgeval/data/trace.py +1 -0
judgeval/data/trace_run.py +0 -2
judgeval/integrations/langgraph.py +2 -1
judgeval/judgment_client.py +90 -135
judgeval/local_eval_queue.py +3 -5
judgeval/run_evaluation.py +43 -299
judgeval/scorers/base_scorer.py +9 -10
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +17 -3
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/METADATA +10 -47
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/RECORD +29 -22
judgeval-0.7.0.dist-info/entry_points.txt +2 -0
judgeval/evaluation_run.py +0 -80
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/WHEEL +0 -0
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/tracer/otel_span_processor.py CHANGED Viewed

@@ -21,7 +21,7 @@ from judgeval.common.tracer.otel_exporter import JudgmentAPISpanExporter
 from judgeval.common.tracer.span_processor import SpanProcessorBase
 from judgeval.common.tracer.span_transformer import SpanTransformer
 from judgeval.data import TraceSpan
-from judgeval.evaluation_run import EvaluationRun
+from judgeval.data.evaluation_run import EvaluationRun
 class SimpleReadableSpan(ReadableSpan):

judgeval/common/tracer/span_processor.py CHANGED Viewed

@@ -7,7 +7,7 @@ When monitoring is enabled, we use JudgmentSpanProcessor which overrides the met
 """
 from judgeval.data import TraceSpan
-from judgeval.evaluation_run import EvaluationRun
+from judgeval.data.evaluation_run import EvaluationRun
 class SpanProcessorBase:

judgeval/common/tracer/span_transformer.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pydantic import BaseModel
 from judgeval.common.api.json_encoder import json_encoder
 from judgeval.data import TraceSpan
-from judgeval.evaluation_run import EvaluationRun
+from judgeval.data.evaluation_run import EvaluationRun
 class SpanTransformer:
@@ -150,6 +150,7 @@ class SpanTransformer:
                 "additional_metadata": judgment_data.get("additional_metadata"),
                 "has_evaluation": judgment_data.get("has_evaluation", False),
                 "agent_name": judgment_data.get("agent_name"),
+                "class_name": judgment_data.get("class_name"),
                 "state_before": judgment_data.get("state_before"),
                 "state_after": judgment_data.get("state_after"),
                 "update_id": judgment_data.get("update_id", 1),

judgeval/common/tracer/trace_manager.py CHANGED Viewed

@@ -71,7 +71,12 @@ class TraceManagerClient:
         server_response = self.api_client.upsert_trace(trace_data)
-        if not offline_mode and show_link and "ui_results_url" in server_response:
+        if (
+            not offline_mode
+            and show_link
+            and "ui_results_url" in server_response
+            and self.tracer.show_trace_urls
+        ):
             pretty_str = f"\n🔍 You can view your trace data here: [rgb(106,0,255)][link={server_response['ui_results_url']}]View Trace[/link]\n"
             rprint(pretty_str)

judgeval/common/trainer/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .trainer import JudgmentTrainer
+from .config import TrainerConfig, ModelConfig
+from .trainable_model import TrainableModel
+__all__ = ["JudgmentTrainer", "TrainerConfig", "ModelConfig", "TrainableModel"]

judgeval/common/trainer/config.py ADDED Viewed

@@ -0,0 +1,125 @@
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+import json
+@dataclass
+class TrainerConfig:
+    """Configuration class for JudgmentTrainer parameters."""
+    deployment_id: str
+    user_id: str
+    model_id: str
+    base_model_name: str = "qwen2p5-7b-instruct"
+    rft_provider: str = "fireworks"
+    num_steps: int = 5
+    num_generations_per_prompt: int = (
+        4  # Number of rollouts/generations per input prompt
+    )
+    num_prompts_per_step: int = 4  # Number of input prompts to sample per training step
+    concurrency: int = 100
+    epochs: int = 1
+    learning_rate: float = 1e-5
+    accelerator_count: int = 1
+    accelerator_type: str = "NVIDIA_A100_80GB"
+    temperature: float = 1.5
+    max_tokens: int = 50
+    enable_addons: bool = True
+@dataclass
+class ModelConfig:
+    """
+    Configuration class for storing and loading trained model state.
+    This class enables persistence of trained models so they can be loaded
+    and used later without retraining.
+    Example usage:
+        trainer = JudgmentTrainer(config)
+        model_config = trainer.train(agent_function, scorers, prompts)
+        # Save the trained model configuration
+        model_config.save_to_file("my_trained_model.json")
+        # Later, load and use the trained model
+        loaded_config = ModelConfig.load_from_file("my_trained_model.json")
+        trained_model = TrainableModel.from_model_config(loaded_config)
+        # Use the trained model for inference
+        response = trained_model.chat.completions.create(
+            model="current",  # Uses the loaded trained model
+            messages=[{"role": "user", "content": "Hello!"}]
+        )
+    """
+    # Base model configuration
+    base_model_name: str
+    deployment_id: str
+    user_id: str
+    model_id: str
+    enable_addons: bool
+    # Training state
+    current_step: int
+    total_steps: int
+    # Current model information
+    current_model_name: Optional[str] = None
+    is_trained: bool = False
+    # Training parameters used (for reference)
+    training_params: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert ModelConfig to dictionary for serialization."""
+        return {
+            "base_model_name": self.base_model_name,
+            "deployment_id": self.deployment_id,
+            "user_id": self.user_id,
+            "model_id": self.model_id,
+            "enable_addons": self.enable_addons,
+            "current_step": self.current_step,
+            "total_steps": self.total_steps,
+            "current_model_name": self.current_model_name,
+            "is_trained": self.is_trained,
+            "training_params": self.training_params,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ModelConfig":
+        """Create ModelConfig from dictionary."""
+        return cls(
+            base_model_name=data.get("base_model_name", "qwen2p5-7b-instruct"),
+            deployment_id=data.get("deployment_id", "my-base-deployment"),
+            user_id=data.get("user_id", ""),
+            model_id=data.get("model_id", ""),
+            enable_addons=data.get("enable_addons", True),
+            current_step=data.get("current_step", 0),
+            total_steps=data.get("total_steps", 0),
+            current_model_name=data.get("current_model_name"),
+            is_trained=data.get("is_trained", False),
+            training_params=data.get("training_params"),
+        )
+    def to_json(self) -> str:
+        """Convert ModelConfig to JSON string."""
+        return json.dumps(self.to_dict(), indent=2)
+    @classmethod
+    def from_json(cls, json_str: str) -> "ModelConfig":
+        """Create ModelConfig from JSON string."""
+        data = json.loads(json_str)
+        return cls.from_dict(data)
+    def save_to_file(self, filepath: str):
+        """Save ModelConfig to a JSON file."""
+        with open(filepath, "w") as f:
+            f.write(self.to_json())
+    @classmethod
+    def load_from_file(cls, filepath: str) -> "ModelConfig":
+        """Load ModelConfig from a JSON file."""
+        with open(filepath, "r") as f:
+            json_str = f.read()
+        return cls.from_json(json_str)

judgeval/common/trainer/console.py ADDED Viewed

@@ -0,0 +1,151 @@
+from contextlib import contextmanager
+from typing import Optional
+import sys
+import os
+# Detect if we're running in a Jupyter environment
+def _is_jupyter_environment():
+    """Check if we're running in a Jupyter notebook or similar environment."""
+    try:
+        # Check for IPython kernel
+        if "ipykernel" in sys.modules or "IPython" in sys.modules:
+            return True
+        # Check for Jupyter environment variables
+        if "JPY_PARENT_PID" in os.environ:
+            return True
+        # Check if we're in Google Colab
+        if "google.colab" in sys.modules:
+            return True
+        return False
+    except Exception:
+        return False
+# Check environment once at import time
+IS_JUPYTER = _is_jupyter_environment()
+if not IS_JUPYTER:
+    # Safe to use Rich in non-Jupyter environments
+    try:
+        from rich.console import Console
+        from rich.spinner import Spinner
+        from rich.live import Live
+        from rich.text import Text
+        # Shared console instance for the trainer module to avoid conflicts
+        shared_console = Console()
+        RICH_AVAILABLE = True
+    except ImportError:
+        RICH_AVAILABLE = False
+else:
+    # In Jupyter, avoid Rich to prevent recursion issues
+    RICH_AVAILABLE = False
+# Fallback implementations for when Rich is not available or safe
+class SimpleSpinner:
+    def __init__(self, name, text):
+        self.text = text
+class SimpleLive:
+    def __init__(self, spinner, console=None, refresh_per_second=None):
+        self.spinner = spinner
+    def __enter__(self):
+        print(f"🔄 {self.spinner.text}")
+        return self
+    def __exit__(self, *args):
+        pass
+    def update(self, spinner):
+        print(f"🔄 {spinner.text}")
+def safe_print(message, style=None):
+    """Safe print function that works in all environments."""
+    if RICH_AVAILABLE and not IS_JUPYTER:
+        shared_console.print(message, style=style)
+    else:
+        # Use simple print with emoji indicators for different styles
+        if style == "green":
+            print(f"✅ {message}")
+        elif style == "yellow":
+            print(f"⚠️ {message}")
+        elif style == "blue":
+            print(f"🔵 {message}")
+        elif style == "cyan":
+            print(f"🔷 {message}")
+        else:
+            print(message)
+@contextmanager
+def _spinner_progress(
+    message: str, step: Optional[int] = None, total_steps: Optional[int] = None
+):
+    """Context manager for spinner-based progress display."""
+    if step is not None and total_steps is not None:
+        full_message = f"[Step {step}/{total_steps}] {message}"
+    else:
+        full_message = f"[Training] {message}"
+    if RICH_AVAILABLE and not IS_JUPYTER:
+        spinner = Spinner("dots", text=Text(full_message, style="cyan"))
+        with Live(spinner, console=shared_console, refresh_per_second=10):
+            yield
+    else:
+        # Fallback for Jupyter or when Rich is not available
+        print(f"🔄 {full_message}")
+        try:
+            yield
+        finally:
+            print(f"✅ {full_message} - Complete")
+@contextmanager
+def _model_spinner_progress(message: str):
+    """Context manager for model operation spinner-based progress display."""
+    if RICH_AVAILABLE and not IS_JUPYTER:
+        spinner = Spinner("dots", text=Text(f"[Model] {message}", style="blue"))
+        with Live(spinner, console=shared_console, refresh_per_second=10) as live:
+            def update_progress(progress_message: str):
+                """Update the spinner with a new progress message."""
+                new_text = f"[Model] {message}\n  └─ {progress_message}"
+                spinner.text = Text(new_text, style="blue")
+                live.update(spinner)
+            yield update_progress
+    else:
+        # Fallback for Jupyter or when Rich is not available
+        print(f"🔵 [Model] {message}")
+        def update_progress(progress_message: str):
+            print(f"  └─ {progress_message}")
+        yield update_progress
+def _print_progress(
+    message: str, step: Optional[int] = None, total_steps: Optional[int] = None
+):
+    """Print progress message with consistent formatting."""
+    if step is not None and total_steps is not None:
+        safe_print(f"[Step {step}/{total_steps}] {message}", style="green")
+    else:
+        safe_print(f"[Training] {message}", style="green")
+def _print_progress_update(
+    message: str, step: Optional[int] = None, total_steps: Optional[int] = None
+):
+    """Print progress update message (for status changes during long operations)."""
+    safe_print(f"  └─ {message}", style="yellow")
+def _print_model_progress(message: str):
+    """Print model progress message with consistent formatting."""
+    safe_print(f"[Model] {message}", style="blue")

judgeval/common/trainer/trainable_model.py ADDED Viewed

@@ -0,0 +1,238 @@
+from fireworks import LLM
+from .config import TrainerConfig, ModelConfig
+from typing import Optional, Dict, Any, Callable
+from .console import _model_spinner_progress, _print_model_progress
+from judgeval.common.exceptions import JudgmentAPIError
+class TrainableModel:
+    """
+    A wrapper class for managing model snapshots during training.
+    This class automatically handles model snapshot creation and management
+    during the RFT (Reinforcement Fine-Tuning) process,
+    abstracting away manual snapshot management from users.
+    """
+    def __init__(self, config: TrainerConfig):
+        """
+        Initialize the TrainableModel.
+        Args:
+            config: TrainerConfig instance with model configuration
+        """
+        try:
+            self.config = config
+            self.current_step = 0
+            self._current_model = None
+            self._tracer_wrapper_func = None
+            self._base_model = self._create_base_model()
+            self._current_model = self._base_model
+        except Exception as e:
+            raise JudgmentAPIError(
+                f"Failed to initialize TrainableModel: {str(e)}"
+            ) from e
+    @classmethod
+    def from_model_config(cls, model_config: ModelConfig) -> "TrainableModel":
+        """
+        Create a TrainableModel from a saved ModelConfig.
+        Args:
+            model_config: ModelConfig instance with saved model state
+        Returns:
+            TrainableModel instance configured to use the saved model
+        """
+        # Create a TrainerConfig from the ModelConfig
+        trainer_config = TrainerConfig(
+            base_model_name=model_config.base_model_name,
+            deployment_id=model_config.deployment_id,
+            user_id=model_config.user_id,
+            model_id=model_config.model_id,
+            enable_addons=model_config.enable_addons,
+        )
+        instance = cls(trainer_config)
+        instance.current_step = model_config.current_step
+        if model_config.is_trained and model_config.current_model_name:
+            instance._load_trained_model(model_config.current_model_name)
+        return instance
+    def _create_base_model(self):
+        """Create and configure the base model."""
+        try:
+            with _model_spinner_progress(
+                "Creating and deploying base model..."
+            ) as update_progress:
+                update_progress("Creating base model instance...")
+                base_model = LLM(
+                    model=self.config.base_model_name,
+                    deployment_type="on-demand",
+                    id=self.config.deployment_id,
+                    enable_addons=self.config.enable_addons,
+                )
+                update_progress("Applying deployment configuration...")
+                base_model.apply()
+            _print_model_progress("Base model deployment ready")
+            return base_model
+        except Exception as e:
+            raise JudgmentAPIError(
+                f"Failed to create and deploy base model '{self.config.base_model_name}': {str(e)}"
+            ) from e
+    def _load_trained_model(self, model_name: str):
+        """Load a trained model by name."""
+        try:
+            with _model_spinner_progress(
+                f"Loading and deploying trained model: {model_name}"
+            ) as update_progress:
+                update_progress("Creating trained model instance...")
+                self._current_model = LLM(
+                    model=model_name,
+                    deployment_type="on-demand-lora",
+                    base_id=self.config.deployment_id,
+                )
+                update_progress("Applying deployment configuration...")
+                self._current_model.apply()
+            _print_model_progress("Trained model deployment ready")
+            if self._tracer_wrapper_func:
+                self._tracer_wrapper_func(self._current_model)
+        except Exception as e:
+            raise JudgmentAPIError(
+                f"Failed to load and deploy trained model '{model_name}': {str(e)}"
+            ) from e
+    def get_current_model(self):
+        return self._current_model
+    @property
+    def chat(self):
+        """OpenAI-compatible chat interface."""
+        return self._current_model.chat
+    @property
+    def completions(self):
+        """OpenAI-compatible completions interface."""
+        return self._current_model.completions
+    def advance_to_next_step(self, step: int):
+        """
+        Advance to the next training step and update the current model snapshot.
+        Args:
+            step: The current training step number
+        """
+        try:
+            self.current_step = step
+            if step == 0:
+                self._current_model = self._base_model
+            else:
+                model_name = f"accounts/{self.config.user_id}/models/{self.config.model_id}-v{step}"
+                with _model_spinner_progress(
+                    f"Creating and deploying model snapshot: {model_name}"
+                ) as update_progress:
+                    update_progress("Creating model snapshot instance...")
+                    self._current_model = LLM(
+                        model=model_name,
+                        deployment_type="on-demand-lora",
+                        base_id=self.config.deployment_id,
+                    )
+                    update_progress("Applying deployment configuration...")
+                    self._current_model.apply()
+                _print_model_progress("Model snapshot deployment ready")
+                if self._tracer_wrapper_func:
+                    self._tracer_wrapper_func(self._current_model)
+        except Exception as e:
+            raise JudgmentAPIError(
+                f"Failed to advance to training step {step}: {str(e)}"
+            ) from e
+    def perform_reinforcement_step(self, dataset, step: int):
+        """
+        Perform a reinforcement learning step using the current model.
+        Args:
+            dataset: Training dataset for the reinforcement step
+            step: Current step number for output model naming
+        Returns:
+            Training job object
+        """
+        try:
+            model_name = f"{self.config.model_id}-v{step + 1}"
+            return self._current_model.reinforcement_step(
+                dataset=dataset,
+                output_model=model_name,
+                epochs=self.config.epochs,
+                learning_rate=self.config.learning_rate,
+                accelerator_count=self.config.accelerator_count,
+                accelerator_type=self.config.accelerator_type,
+            )
+        except Exception as e:
+            raise JudgmentAPIError(
+                f"Failed to start reinforcement learning step {step + 1}: {str(e)}"
+            ) from e
+    def get_model_config(
+        self, training_params: Optional[Dict[str, Any]] = None
+    ) -> ModelConfig:
+        """
+        Get the current model configuration for persistence.
+        Args:
+            training_params: Optional training parameters to include in config
+        Returns:
+            ModelConfig instance with current model state
+        """
+        current_model_name = None
+        is_trained = False
+        if self.current_step > 0:
+            current_model_name = f"accounts/{self.config.user_id}/models/{self.config.model_id}-v{self.current_step}"
+            is_trained = True
+        return ModelConfig(
+            base_model_name=self.config.base_model_name,
+            deployment_id=self.config.deployment_id,
+            user_id=self.config.user_id,
+            model_id=self.config.model_id,
+            enable_addons=self.config.enable_addons,
+            current_step=self.current_step,
+            total_steps=self.config.num_steps,
+            current_model_name=current_model_name,
+            is_trained=is_trained,
+            training_params=training_params,
+        )
+    def save_model_config(
+        self, filepath: str, training_params: Optional[Dict[str, Any]] = None
+    ):
+        """
+        Save the current model configuration to a file.
+        Args:
+            filepath: Path to save the configuration file
+            training_params: Optional training parameters to include in config
+        """
+        model_config = self.get_model_config(training_params)
+        model_config.save_to_file(filepath)
+    def _register_tracer_wrapper(self, wrapper_func: Callable):
+        """
+        Register a tracer wrapper function to be reapplied when models change.
+        This is called internally by the tracer's wrap() function to ensure
+        that new model instances created during training are automatically wrapped.
+        Args:
+            wrapper_func: Function that wraps a model instance with tracing
+        """
+        self._tracer_wrapper_func = wrapper_func

judgeval 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

judgeval 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl