PyPI - judgeval - Versions diffs - 0.16.9__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

judgeval 0.16.9py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (29) hide show

judgeval/__init__.py +29 -0
judgeval/api/__init__.py +108 -0
judgeval/api/api_types.py +56 -1
judgeval/cli.py +7 -0
judgeval/data/judgment_types.py +56 -1
judgeval/prompts/prompt.py +320 -0
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -12
judgeval/tracer/__init__.py +71 -33
judgeval/tracer/exporters/store.py +32 -16
judgeval/tracer/keys.py +1 -0
judgeval/tracer/llm/llm_anthropic/messages.py +4 -4
judgeval/tracer/llm/llm_anthropic/messages_stream.py +2 -2
judgeval/tracer/llm/llm_google/generate_content.py +1 -1
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +2 -2
judgeval/tracer/llm/llm_openai/chat_completions.py +4 -4
judgeval/tracer/llm/llm_openai/responses.py +4 -4
judgeval/tracer/llm/llm_together/chat_completions.py +4 -4
judgeval/trainer/__init__.py +10 -1
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +1 -1
judgeval/trainer/fireworks_trainer.py +396 -0
judgeval/trainer/trainer.py +52 -387
judgeval/utils/project.py +15 -0
judgeval/version.py +1 -1
{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/METADATA +2 -3
{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/RECORD +29 -25
{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/WHEEL +0 -0
{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/trainer/trainer.py CHANGED Viewed

@@ -1,405 +1,70 @@
-import asyncio
-import json
-import time
-from typing import Optional, Callable, Any, List, Union, Dict
-from fireworks import Dataset  # type: ignore[import-not-found]
-from .config import TrainerConfig, ModelConfig
+from typing import Optional
+from .config import TrainerConfig
+from .base_trainer import BaseTrainer
+from .fireworks_trainer import FireworksTrainer
 from .trainable_model import TrainableModel
 from judgeval.tracer import Tracer
-from judgeval.tracer.exporters.store import SpanStore
-from judgeval.tracer.exporters import InMemorySpanExporter
-from judgeval.tracer.keys import AttributeKeys
-from judgeval import JudgmentClient
-from judgeval.scorers import ExampleScorer, ExampleAPIScorerConfig
-from judgeval.data import Example
-from .console import _spinner_progress, _print_progress, _print_progress_update
 from judgeval.exceptions import JudgmentRuntimeError
-class JudgmentTrainer:
+def JudgmentTrainer(
+    config: TrainerConfig,
+    trainable_model: TrainableModel,
+    tracer: Tracer,
+    project_name: Optional[str] = None,
+) -> BaseTrainer:
     """
-    A reinforcement learning trainer for Judgment models using Fine-Tuning.
+    Factory function for creating reinforcement learning trainers.
-    This class handles the iterative training process where models are improved
-    through reinforcement learning fine-tuning based on generated rollouts and rewards.
-    """
-    def __init__(
-        self,
-        config: TrainerConfig,
-        trainable_model: TrainableModel,
-        tracer: Tracer,
-        project_name: Optional[str] = None,
-    ):
-        """
-        Initialize the JudgmentTrainer.
-        Args:
-            config: TrainerConfig instance with training parameters. If None, uses default config.
-            tracer: Optional tracer for observability
-            trainable_model: Optional trainable model instance
-            project_name: Project name for organizing training runs and evaluations
-        """
-        try:
-            self.config = config
-            self.tracer = tracer
-            self.project_name = project_name or "judgment_training"
-            self.trainable_model = trainable_model
-            self.judgment_client = JudgmentClient()
-            self.span_store = SpanStore()
-            self.span_exporter = InMemorySpanExporter(self.span_store)
-        except Exception as e:
-            raise JudgmentRuntimeError(
-                f"Failed to initialize JudgmentTrainer: {str(e)}"
-            ) from e
-    def _extract_message_history_from_spans(self) -> List[Dict[str, str]]:
-        """
-        Extract message history from spans in the span store for training purposes.
-        This method processes trace spans to reconstruct the conversation flow,
-        extracting messages in chronological order from LLM, user, and tool spans.
-        Returns:
-            List of message dictionaries with 'role' and 'content' keys
-        """
-        spans = self.span_store.get_all()
-        if not spans:
-            return []
-        messages = []
-        first_found = False
-        for span in sorted(spans, key=lambda s: getattr(s, "start_time", 0)):
-            span_attributes = span.attributes or {}
-            span_type = span_attributes.get(AttributeKeys.JUDGMENT_SPAN_KIND, "span")
-            if (
-                not span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
-                and span_type != "llm"
-            ):
-                continue
-            if span_type == "llm":
-                if not first_found and span_attributes.get(
-                    AttributeKeys.JUDGMENT_INPUT
-                ):
-                    input_data: Any = span_attributes.get(
-                        AttributeKeys.JUDGMENT_INPUT, {}
-                    )
-                    if isinstance(input_data, dict) and "messages" in input_data:
-                        input_messages = input_data["messages"]
-                        if input_messages:
-                            first_found = True
-                            for msg in input_messages:
-                                if (
-                                    isinstance(msg, dict)
-                                    and "role" in msg
-                                    and "content" in msg
-                                ):
-                                    messages.append(
-                                        {"role": msg["role"], "content": msg["content"]}
-                                    )
-                # Add assistant response from span output
-                output = span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
-                if output is not None:
-                    content = str(output)
-                    try:
-                        parsed = json.loads(content)
-                        if isinstance(parsed, dict) and "messages" in parsed:
-                            # Extract the actual assistant message content
-                            for msg in parsed["messages"]:
-                                if (
-                                    isinstance(msg, dict)
-                                    and msg.get("role") == "assistant"
-                                ):
-                                    content = msg.get("content", content)
-                                    break
-                    except (json.JSONDecodeError, KeyError):
-                        pass
-                    messages.append({"role": "assistant", "content": content})
-            elif span_type == "user":
-                output = span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
-                if output is not None:
-                    content = str(output)
-                    try:
-                        parsed = json.loads(content)
-                        if isinstance(parsed, dict) and "messages" in parsed:
-                            for msg in parsed["messages"]:
-                                if isinstance(msg, dict) and msg.get("role") == "user":
-                                    content = msg.get("content", content)
-                                    break
-                    except (json.JSONDecodeError, KeyError):
-                        pass
-                    messages.append({"role": "user", "content": content})
+    This factory creates and returns provider-specific trainer implementations
+    (FireworksTrainer, VerifiersTrainer, etc.) based on the configured RFT provider.
-            elif span_type == "tool":
-                output = span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
-                if output is not None:
-                    content = str(output)
-                    try:
-                        parsed = json.loads(content)
-                        if isinstance(parsed, dict) and "messages" in parsed:
-                            for msg in parsed["messages"]:
-                                if isinstance(msg, dict) and msg.get("role") == "user":
-                                    content = msg.get("content", content)
-                                    break
-                    except (json.JSONDecodeError, KeyError):
-                        pass
-                    messages.append({"role": "user", "content": content})
+    The factory pattern allows for easy extension to support multiple training
+    providers without changing the client-facing API.
-        return messages
-    async def generate_rollouts_and_rewards(
-        self,
-        agent_function: Callable[[Any], Any],
-        scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
-        prompts: List[Any],
-        num_prompts_per_step: Optional[int] = None,
-        num_generations_per_prompt: Optional[int] = None,
-        concurrency: Optional[int] = None,
-    ):
-        """
-        Generate rollouts and compute rewards using the current model snapshot.
-        Each sample contains multiple generations for reinforcement learning optimization.
-        Args:
-            agent_function: Function/agent to call for generating responses
-            scorers: List of scorer objects to evaluate responses
-            prompts: List of prompts to use for training
-            num_prompts_per_step: Number of prompts to use per step (defaults to config value, limited by prompts list length)
-            num_generations_per_prompt: Generations per prompt (defaults to config value)
-            concurrency: Concurrency limit (defaults to config value)
-        Returns:
-            List of dataset rows containing samples with messages and evaluations
-        """
-        num_prompts_per_step = min(
-            num_prompts_per_step or self.config.num_prompts_per_step, len(prompts)
-        )
-        num_generations_per_prompt = (
-            num_generations_per_prompt or self.config.num_generations_per_prompt
+    Example:
+        config = TrainerConfig(
+            deployment_id="my-deployment",
+            user_id="my-user",
+            model_id="my-model",
+            rft_provider="fireworks"  # or "verifiers" in the future
         )
-        concurrency = concurrency or self.config.concurrency
-        semaphore = asyncio.Semaphore(concurrency)
-        @self.tracer.observe(span_type="function")
-        async def generate_single_response(prompt_id, generation_id):
-            async with semaphore:
-                prompt_input = prompts[prompt_id]
-                response_data = await agent_function(**prompt_input)
-                messages = response_data.get("messages", [])
-                try:
-                    traced_messages = self._extract_message_history_from_spans()
-                    if traced_messages:
-                        messages = traced_messages
-                except Exception as e:
-                    print(f"Warning: Failed to get message history from trace: {e}")
-                    pass
-                finally:
-                    self.span_store.spans = []
-                example = Example(
-                    input=prompt_input,
-                    messages=messages,
-                    actual_output=response_data,
-                )
-                scoring_results = self.judgment_client.run_evaluation(
-                    examples=[example],
-                    scorers=scorers,
-                    project_name=self.project_name,
-                    eval_run_name=f"training_step_{self.trainable_model.current_step}_prompt_{prompt_id}_gen_{generation_id}",
-                )
-                if scoring_results and scoring_results[0].scorers_data:
-                    scores = [
-                        scorer_data.score
-                        for scorer_data in scoring_results[0].scorers_data
-                        if scorer_data.score is not None
-                    ]
-                    reward = sum(scores) / len(scores) if scores else 0.0
-                else:
-                    reward = 0.0
-            return {
-                "prompt_id": prompt_id,
-                "generation_id": generation_id,
-                "messages": messages,
-                "evals": {"score": reward},
-            }
-        coros = []
-        for prompt_id in range(num_prompts_per_step):
-            for generation_id in range(num_generations_per_prompt):
-                coro = generate_single_response(prompt_id, generation_id)
-                coros.append(coro)
-        with _spinner_progress(f"Generating {len(coros)} rollouts..."):
-            num_completed = 0
-            results = []
+        # User creates and configures the trainable model
+        trainable_model = TrainableModel(config)
+        tracer = Tracer()
-            for coro in asyncio.as_completed(coros):
-                result = await coro
-                results.append(result)
-                num_completed += 1
+        # JudgmentTrainer automatically creates the appropriate provider-specific trainer
+        trainer = JudgmentTrainer(config, trainable_model, tracer)
-        _print_progress(f"Generated {len(results)} rollouts successfully")
-        dataset_rows = []
-        for prompt_id in range(num_prompts_per_step):
-            prompt_generations = [r for r in results if r["prompt_id"] == prompt_id]
-            sample_generations = [
-                {"messages": gen["messages"], "evals": gen["evals"]}
-                for gen in prompt_generations
-            ]
-            dataset_rows.append({"samples": sample_generations})
-        return dataset_rows
-    async def run_reinforcement_learning(
-        self,
-        agent_function: Callable[[Any], Any],
-        scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
-        prompts: List[Any],
-    ) -> ModelConfig:
-        """
-        Run the iterative reinforcement learning fine-tuning loop.
-        This method performs multiple steps of reinforcement learning, where each step:
-        1. Advances to the appropriate model snapshot
-        2. Generates rollouts and computes rewards using scorers
-        3. Trains a new model using reinforcement learning
-        4. Waits for training completion
+        # The returned trainer implements the BaseTrainer interface
+        model_config = await trainer.train(agent_function, scorers, prompts)
         Args:
-            agent_function: Function/agent to call for generating responses
-            scorers: List of scorer objects to evaluate responses
-            prompts: List of prompts to use for training
+        config: TrainerConfig instance with training parameters including rft_provider
+        trainable_model: Provider-specific trainable model instance (e.g., TrainableModel for Fireworks)
+        tracer: Tracer for observability
+        project_name: Project name for organizing training runs and evaluations
         Returns:
-            ModelConfig: Configuration of the trained model for inference and future training
-        """
-        _print_progress("Starting reinforcement learning training")
-        training_params = {
-            "num_steps": self.config.num_steps,
-            "num_prompts_per_step": self.config.num_prompts_per_step,
-            "num_generations_per_prompt": self.config.num_generations_per_prompt,
-            "epochs": self.config.epochs,
-            "learning_rate": self.config.learning_rate,
-            "accelerator_count": self.config.accelerator_count,
-            "accelerator_type": self.config.accelerator_type,
-            "temperature": self.config.temperature,
-            "max_tokens": self.config.max_tokens,
-        }
-        start_step = self.trainable_model.current_step
-        for step in range(start_step, self.config.num_steps):
-            step_num = step + 1
-            _print_progress(
-                f"Starting training step {step_num}", step_num, self.config.num_steps
-            )
-            self.trainable_model.advance_to_next_step(step)
-            dataset_rows = await self.generate_rollouts_and_rewards(
-                agent_function, scorers, prompts
-            )
-            with _spinner_progress(
-                "Preparing training dataset", step_num, self.config.num_steps
-            ):
-                dataset = Dataset.from_list(dataset_rows)
-                dataset.sync()
-            _print_progress(
-                "Starting reinforcement training", step_num, self.config.num_steps
-            )
-            job = self.trainable_model.perform_reinforcement_step(dataset, step)
-            last_state = None
-            with _spinner_progress(
-                "Training job in progress", step_num, self.config.num_steps
-            ):
-                while not job.is_completed:
-                    job.raise_if_bad_state()
-                    current_state = job.state
-                    if current_state != last_state:
-                        if current_state in ["uploading", "validating"]:
-                            _print_progress_update(
-                                f"Training job: {current_state} data"
-                            )
-                        elif current_state == "training":
-                            _print_progress_update(
-                                "Training job: model training in progress"
-                            )
-                        else:
-                            _print_progress_update(f"Training job: {current_state}")
-                        last_state = current_state
-                    time.sleep(10)
-                    job = job.get()
-                    if job is None:
-                        raise JudgmentRuntimeError(
-                            "Training job was deleted while waiting for completion"
-                        )
+        Provider-specific trainer instance (FireworksTrainer, etc.) that implements
+        the BaseTrainer interface
-            _print_progress(
-                f"Training completed! New model: {job.output_model}",
-                step_num,
-                self.config.num_steps,
-            )
-            dataset.delete()
-        _print_progress("All training steps completed!")
-        with _spinner_progress("Deploying final trained model"):
-            self.trainable_model.advance_to_next_step(self.config.num_steps)
-        return self.trainable_model.get_model_config(training_params)
-    async def train(
-        self,
-        agent_function: Callable[[Any], Any],
-        scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
-        prompts: List[Any],
-        rft_provider: Optional[str] = None,
-    ) -> ModelConfig:
-        """
-        Start the reinforcement learning fine-tuning process.
-        This is the main entry point for running the reinforcement learning training.
-        Args:
-            agent_function: Function/agent to call for generating responses.
-            scorers: List of scorer objects to evaluate responses
-            prompts: List of prompts to use for training
-            rft_provider: RFT provider to use for training. Currently only "fireworks" is supported.
-                         Support for other providers is planned for future releases.
-        Returns:
-            ModelConfig: Configuration of the trained model for future loading
-        """
-        try:
-            if rft_provider is not None:
-                self.config.rft_provider = rft_provider
-            return await self.run_reinforcement_learning(
-                agent_function, scorers, prompts
-            )
-        except JudgmentRuntimeError:
-            # Re-raise JudgmentAPIError as-is
-            raise
-        except Exception as e:
-            raise JudgmentRuntimeError(f"Training process failed: {str(e)}") from e
+    Raises:
+        JudgmentRuntimeError: If the specified provider is not supported
+    """
+    provider = config.rft_provider.lower()
+    if provider == "fireworks":
+        return FireworksTrainer(config, trainable_model, tracer, project_name)
+    elif provider == "verifiers":
+        # Placeholder for future implementation
+        raise JudgmentRuntimeError(
+            "Verifiers provider is not yet implemented. "
+            "Currently supported providers: 'fireworks'"
+        )
+    else:
+        raise JudgmentRuntimeError(
+            f"Unsupported RFT provider: '{config.rft_provider}'. "
+            f"Currently supported providers: 'fireworks'"
+        )

judgeval/utils/project.py ADDED Viewed

@@ -0,0 +1,15 @@
+from judgeval.utils.decorators.dont_throw import dont_throw
+import functools
+from judgeval.api import JudgmentSyncClient
+@dont_throw
+@functools.lru_cache(maxsize=64)
+def _resolve_project_id(project_name: str, api_key: str, organization_id: str) -> str:
+    """Resolve project_id from project_name using the API."""
+    client = JudgmentSyncClient(
+        api_key=api_key,
+        organization_id=organization_id,
+    )
+    response = client.projects_resolve({"project_name": project_name})
+    return response["project_id"]

judgeval/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.16.9"
+__version__ = "0.18.0"
 def get_version() -> str:

{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.16.9
+Version: 0.18.0
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -63,8 +63,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
 await trainer.train(
     agent_function=your_agent_function,  # entry point to your agent
     scorers=[RewardScorer()],  # Custom scorer you define based on task criteria, acts as reward
-    prompts=training_prompts,  # Tasks
-    rft_provider="fireworks"
+    prompts=training_prompts  # Tasks
 )
 ```

{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-judgeval/__init__.py,sha256=MyRK0G0khifeEoYMUkkOH722h_TOSdApoNmXtwnEi0w,5179
-judgeval/cli.py,sha256=C7QfElL1sys6hjRpI8rNkFgGqmnsLQGk3lU-9OGbXT0,1612
+judgeval/__init__.py,sha256=74WyDtb9SisWwYZ8juQSUJpa6c0KTI6zzkiTX7Wvalc,6601
+judgeval/cli.py,sha256=bkwsDqX0sdfChLxm9aTLAIw0sPYv-fUbjmaFeBgPgk8,1803
 judgeval/constants.py,sha256=JZZJ1MqzZZDVk-5PRPRbmLnM8mXI-RDL5vxa1JFuscs,3408
 judgeval/env.py,sha256=37Mn4g0OkpFxXCZGlO_CLqKJnyX-jx_R24tC28XJzig,2112
 judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
 judgeval/logger.py,sha256=VP5blbsJ53mvJbNHfBf5p2KrARUrkrErpPkB-__Hh3U,1562
-judgeval/version.py,sha256=na4SICn1_ldveglTM2Suf3pZLRnw2qbMJMUmIhGkh0Q,74
+judgeval/version.py,sha256=CybtPmbwRv_x6bsmmn5cZhdYjBHKkklFsk3eOsP-fMs,74
 judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
-judgeval/api/__init__.py,sha256=ho8L4wC9y-STYEpk5zHwc2mZJhC4ezW8jiGgOIERBVY,12058
-judgeval/api/api_types.py,sha256=xOHcgK8NTHMuBr1HBHlCvoSYldVOtG8DQsXeo23-YQk,8874
+judgeval/api/__init__.py,sha256=dGZm9KtgLMnmbiyDEJ_D7suuVqmsibR_Cd0YZRJ7qHI,15210
+judgeval/api/api_types.py,sha256=PvwRVxP0_vCXg_ii7jo4SzbB_kbZcL8tiVnX7qotJA8,9878
 judgeval/data/__init__.py,sha256=1tU0EN0ThIfQ1fad5I3dKxAfTcZ5U8cvTLcQ6qLVLU0,407
 judgeval/data/evaluation_run.py,sha256=O41p99wNAuCAf6lsLNKzkZ6W-kL9LlzCYxVls7IcKkA,4727
 judgeval/data/example.py,sha256=eGJpF-lyUH734Cg90B7WtU9f8iKoS3VFGeV6R-GVCCc,1039
-judgeval/data/judgment_types.py,sha256=zrO41lBma66C_Iiz_VCyXLoI2CJjV7fIXAGDlf2fQ-g,16577
+judgeval/data/judgment_types.py,sha256=_LUqYW-fXQcEfa1RQzqTNETnqdNQQ3eH21qBcfJnObU,18542
 judgeval/data/result.py,sha256=XufFGSAkBDfevPUmzSgsR9HEqytISkM0U5HkhJmsjpY,2102
 judgeval/data/scorer_data.py,sha256=HeP15ZgftFTJCF8JmDJCLWXRnZJIaGDJCzl7Hg6gWwE,2006
 judgeval/data/trace.py,sha256=zSiR3o6xt8Z46XA3M9fJBtViF0BsPO6yKp9jxdscOSc,3881
@@ -26,6 +26,7 @@ judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY
 judgeval/judges/litellm_judge.py,sha256=5vEF0IUo7HVWnOF2ww-DMke8Xkarnz32B_qbgKjc0-I,4182
 judgeval/judges/together_judge.py,sha256=GzwlXZJzle8hT-vWKmq39JyIeanJqJfHDOkrksUbzk0,4398
 judgeval/judges/utils.py,sha256=ITbYwvjU3o9-FIAReFvxh24yJrx9LV3l9BnSBgKUpxg,2068
+judgeval/prompts/prompt.py,sha256=N6G7ncVsmeXgTXzYNDrMw2NESzBJjSKvp4h-BACpX_8,10220
 judgeval/scorers/__init__.py,sha256=pomKzEy4YNFyygYp8vbS3co8iB5CMstRkQwdUgi1u4g,744
 judgeval/scorers/agent_scorer.py,sha256=-qcNSkY6i7ur2LXkM7H1jTKuuFbDuXbjTq42o3vjeQ8,595
 judgeval/scorers/api_scorer.py,sha256=jPBQUBs_T3Xq33QoIbIXDzUaXinz56qeDfo96dfdX0g,2036
@@ -40,15 +41,15 @@ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=WUeFy
 judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=ciiFBQQC4UDsk9qou9OiKbAR31s82eRUY1ZTt1gdM-0,407
 judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ucYOI6ztAjfoYmcgTDzN8u5RrehlVqrkeLEfss9b1fk,441
 judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=V3RdrWhnR_vLBrtWw7QbgN9K_A-Och7-v9I2fN4z8gY,506
-judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=SBIjL9oe1YWjVijrC6tkNxFf1M9h31Gq6sAXD2VvnoQ,11360
-judgeval/tracer/__init__.py,sha256=7Axz7nHXCeSyiGqwAHo_Gd4oe4cJaAeRvI8BCbgXDEc,34890
+judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=YdGr2bO5miAtF7fDn2t488RFRi0oYmycqkvm69qCrWs,10754
+judgeval/tracer/__init__.py,sha256=U5RRAYuL_vg1SlWuaaufnobdoLS-J8ovM3FedP_vny4,36398
 judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
-judgeval/tracer/keys.py,sha256=8HsLASFPfqNtXwdXpihIbmFONpUdUGpYvHQK9jqrHvQ,2220
+judgeval/tracer/keys.py,sha256=G2Qgb5ZlFsZvXPMylh-OLhHSnWYQ23g0GdtY9n9XuoE,2280
 judgeval/tracer/managers.py,sha256=NEkovnK8Qaod1U5msT0_hyHUqo9C2uFro2IzNlC8jCo,5071
 judgeval/tracer/utils.py,sha256=xWha5iwC733wCf2HKbNqzxOPS1ovO1OymWIUFLz-UpQ,537
 judgeval/tracer/exporters/__init__.py,sha256=3WDXC28iY5gYMM5s7ejmy7P-DVDQ_iIuzwovZxUKJXg,1295
 judgeval/tracer/exporters/s3.py,sha256=N9gmw17cnR0VkfAQQkLsNj5BksgNRETThR5qYhWRjP4,4360
-judgeval/tracer/exporters/store.py,sha256=KQV3cyqteesByQjR-9VdPXT9OlUZ-6F08ogqj837_c0,1012
+judgeval/tracer/exporters/store.py,sha256=pA_KINcm0amO0WEDYmMFU05SSsMOgJ5ogIRaevSX1sk,1885
 judgeval/tracer/exporters/utils.py,sha256=JRcoSQuEHxMDJbXfyrUIfA2SHBVkZM82h4bTbYGxkNw,1154
 judgeval/tracer/llm/__init__.py,sha256=ENxApieKSktYrIviofXWP9GU0WnhBm0Q9mlGe_m_gMY,139
 judgeval/tracer/llm/config.py,sha256=J8-bTL82bgDqdTJPN-Px3Epvoa9FG7L-X329kitwBTc,2525
@@ -56,33 +57,36 @@ judgeval/tracer/llm/constants.py,sha256=IWa3CMes8wIt_UG7jrGEOztg2sHz54fdOMWIOOr-
 judgeval/tracer/llm/providers.py,sha256=VAimkmChOOjhC1cUv-0iG8pa5PhOw1HIOyt3zrIrbcM,628
 judgeval/tracer/llm/llm_anthropic/__init__.py,sha256=HG0gIlTgaRt-Y0u1ERPQ19pUgb4YHkTh7tZQPeyR4oM,80
 judgeval/tracer/llm/llm_anthropic/config.py,sha256=ICfKODPQvZsRxpK4xWQ-YE79pmWJTmY2wryddxpNdpM,153
-judgeval/tracer/llm/llm_anthropic/messages.py,sha256=dwiXr3g-Cv0zow5aA19EDxTUyW1_6LFkKRdoaMYVnOo,15168
-judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=20VwyDMLeaECw_Gf_FTXj-TgaqGvRMCKY-ZuzLcQK0I,12148
+judgeval/tracer/llm/llm_anthropic/messages.py,sha256=U11364nrTt6M58K218uj8AxGPrNwzJ4idhEmZQtFuik,15152
+judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=ZhHigQujU-zHhklgwSVoQYtSsL_7yC5Rwpq9vozekMc,12140
 judgeval/tracer/llm/llm_anthropic/wrapper.py,sha256=JILcyC4NvjXZSqlFoZp-VB-JsCYZkQPMFEYaB4AysrA,1849
 judgeval/tracer/llm/llm_google/__init__.py,sha256=otBZETsAfVZjtZaN5N36Ln0kw-I9jVB4tFGrV6novHo,74
 judgeval/tracer/llm/llm_google/config.py,sha256=S3yCAE9oHbXjLVYiz5mGD16yIgXMBBUu5UN4lBjoCNQ,162
-judgeval/tracer/llm/llm_google/generate_content.py,sha256=Ilo-TJYg4_G0u_8XgFCC63Lwr6B1JQMC8kdBRQTGy_M,3977
+judgeval/tracer/llm/llm_google/generate_content.py,sha256=meLWeoZ7J2JtSkpt2Lt8qapYi_mxv0204cXWaFZ0FKs,3973
 judgeval/tracer/llm/llm_google/wrapper.py,sha256=jqaMXGoM9dlPBbCFadMI5EqFrNHzBt0h9VkNn7KPVLk,901
 judgeval/tracer/llm/llm_openai/__init__.py,sha256=CyzwhY0-zmqWKlEno7JPBcvO7G_hI8dp6-_5_KEzFqg,74
-judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=Uj5L2twn4mp5Br8T3X85h7CS4cUpSYCvP2BnYdm-M-g,6498
-judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=4ZvZGzmma-2WS4-aJcEWYyIyMiyjI7BCyPGaynRtqDI,15641
+judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=KwetlVexleDSSyRBEezC7Fk5do1Vub3FwLbRhCPgktc,6490
+judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=NWPE_BQTGfTRfsqhzXtNlQAv1Cr2GymolrTGzIbr9Ok,15625
 judgeval/tracer/llm/llm_openai/config.py,sha256=NE0ixKhd4WVeAVjY8jNTncuKYH6R4MQDLPmcCsd3zWY,144
-judgeval/tracer/llm/llm_openai/responses.py,sha256=fLTzvrpyJ6Be6DvlpN3PzC0JmSnPH_3eUOfIGH2QcC8,15891
+judgeval/tracer/llm/llm_openai/responses.py,sha256=lhs4yS-rJU255vo5gsJkGFRloYurlfnXIkstjMwR3vA,15875
 judgeval/tracer/llm/llm_openai/wrapper.py,sha256=Z5Ndib228yd1pXEQ4xIu7_CJHxpW_t0ofZAC6FLc5eU,2055
 judgeval/tracer/llm/llm_together/__init__.py,sha256=MEnsF77IgFD4h73hNCMpo-9a1PHHdm-OxPlOalXOMac,78
-judgeval/tracer/llm/llm_together/chat_completions.py,sha256=YxVL1zqG7Tjoss0BH3hm74UEHq-BaYuPHcxGaSJVdhM,14172
+judgeval/tracer/llm/llm_together/chat_completions.py,sha256=KC8sk40l1VDuFStuVGIV1GLLx3vrtPDk5Y2vJsnRe70,14156
 judgeval/tracer/llm/llm_together/config.py,sha256=jCJY0KQcHJZZJk2vq038GKIDUMusqgvRjQ0B6OV5uEc,150
 judgeval/tracer/llm/llm_together/wrapper.py,sha256=HFqy_MabQeSq8oj2diZhEuk1SDt_hDfk5MFdPn9MFhg,1733
 judgeval/tracer/processors/__init__.py,sha256=BdOOPOD1RfMI5YHW76DNPKR07EAev-JxoolZ3KaXNNU,7100
-judgeval/trainer/__init__.py,sha256=h_DDVV7HFF7HUPAJFpt2d9wjqgnmEVcHxqZyB1k7pPQ,257
-judgeval/trainer/config.py,sha256=sAAVBgeoFDJWYjGIgOvoQoiO0gtqNAOI6MHncwdN_mk,4292
+judgeval/trainer/__init__.py,sha256=nJo913vFdss3E_PR-M1OUjznS0SYgNZ-MP-Y_6Mj5PA,437
+judgeval/trainer/base_trainer.py,sha256=Lxm6OxJpifonLKofNIRG3TU7n_jZWQZ0I_f_jwtb_WU,4018
+judgeval/trainer/config.py,sha256=7ZSwr6p7vq0MRadh9axm6XB-RAotdWqULZ5yDl0xGbQ,4340
 judgeval/trainer/console.py,sha256=SvokkFEU-K1vLV4Rd1m6YJJ7HyYwTr4Azdzwx_JPZUY,4351
+judgeval/trainer/fireworks_trainer.py,sha256=_B-fWovdhIpxh1RbXU0W5BlFGc9ZzuYtFw7CBtKTRO8,16074
 judgeval/trainer/trainable_model.py,sha256=T-Sioi_sXtfYlcu3lE0cd60PHs8DrYaZ-Kxb4h1nU04,8993
-judgeval/trainer/trainer.py,sha256=FBhHq2YPooKADDCC_IEKex81L6a5quCmAMyl9mn3QLk,16675
+judgeval/trainer/trainer.py,sha256=twLEHNaomelTg6ZYG6veI9OpB3wzhPCtPVQMTnDZWx4,2626
 judgeval/utils/async_utils.py,sha256=AF1xdu8Ao5GyhFvfaLOaKJHn1RISyXZ4U70UZe9zfBA,1083
 judgeval/utils/file_utils.py,sha256=vq-n5WZEZjVbZ5S9QTkW8nSH6Pvw-Jx0ttsQ1t0wnPQ,3140
 judgeval/utils/guards.py,sha256=QBb6m6KElxdvt2bskLZCKh_zGHbBcqV-VfGzT63o3hY,807
 judgeval/utils/meta.py,sha256=RAqZuvOlymqMwFoS0joBW_r65lcN9bY8BpNYHoytKps,773
+judgeval/utils/project.py,sha256=kGpYmp6QGTD6h-GjQ-ovT7kBmGnyb99MWDJmRGFQHOg,527
 judgeval/utils/serialize.py,sha256=QXR-8Nj5rqOrI9zLx0oRLdk6DW6Bc7j8eyF4zQ7PLxA,6256
 judgeval/utils/testing.py,sha256=m5Nexv65tmfSj1XvAPK5Ear7aJ7w5xjDtZN0tLZ_RBk,2939
 judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
@@ -100,8 +104,8 @@ judgeval/utils/wrappers/mutable_wrap_async.py,sha256=stHISOUCGFUJXY8seXmxUo4ZpMF
 judgeval/utils/wrappers/mutable_wrap_sync.py,sha256=t5jygAQ1vqhy8s1GfiLeYygYgaLTgfoYASN47U5JiPs,2888
 judgeval/utils/wrappers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 judgeval/utils/wrappers/utils.py,sha256=j18vaa6JWDw2s3nQy1z5PfV_9Xxio-bVARaHG_0XyL0,1228
-judgeval-0.16.9.dist-info/METADATA,sha256=OiLnf6tEWwnFyLkEjqBbqORUSfcTgjJSyK9nFr6dxHo,11513
-judgeval-0.16.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.16.9.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
-judgeval-0.16.9.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.16.9.dist-info/RECORD,,
+judgeval-0.18.0.dist-info/METADATA,sha256=rkPsc8z-trMM27wunxLLI_3CGJNb1UXjuByMomklKIU,11483
+judgeval-0.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.18.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
+judgeval-0.18.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.18.0.dist-info/RECORD,,

{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{judgeval-0.16.9.dist-info → judgeval-0.18.0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.16.9__py3-none-any.whl → 0.18.0__py3-none-any.whl

Potentially problematic release.

judgeval 0.16.9py3-none-any.whl → 0.18.0py3-none-any.whl