PyPI - judgeval - Versions diffs - 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl - Mend

judgeval 0.0.13py3-none-any.whl → 0.0.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

judgeval/common/tracer.py +126 -59
judgeval/common/utils.py +12 -13
judgeval/constants.py +61 -10
judgeval/data/datasets/dataset.py +3 -2
judgeval/data/datasets/eval_dataset_client.py +25 -14
judgeval/data/example.py +8 -1
judgeval/evaluation_run.py +9 -0
judgeval/judges/together_judge.py +1 -1
judgeval/judges/utils.py +1 -1
judgeval/judgment_client.py +163 -28
judgeval/rules.py +384 -0
judgeval/run_evaluation.py +32 -14
judgeval/scorers/api_scorer.py +11 -12
judgeval/scorers/base_scorer.py +1 -1
judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -1
judgeval/utils/alerts.py +43 -0
{judgeval-0.0.13.dist-info → judgeval-0.0.15.dist-info}/METADATA +1 -1
{judgeval-0.0.13.dist-info → judgeval-0.0.15.dist-info}/RECORD +20 -18
{judgeval-0.0.13.dist-info → judgeval-0.0.15.dist-info}/WHEEL +0 -0
{judgeval-0.0.13.dist-info → judgeval-0.0.15.dist-info}/licenses/LICENSE.md +0 -0

judgeval/rules.py ADDED Viewed

@@ -0,0 +1,384 @@
+"""
+Rules system for Judgeval that enables alerts based on metric thresholds.
+"""
+from typing import Dict, List, Optional, Union, Any, Set, Tuple
+from pydantic import BaseModel, Field, field_validator, ConfigDict
+from enum import Enum
+from datetime import datetime
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+import time
+import uuid  # Add import for uuid module
+from judgeval.scorers import APIJudgmentScorer, JudgevalScorer
+from judgeval.scorers.judgeval_scorers import ScorerWrapper  # Import from the correct module
+class AlertStatus(str, Enum):
+    """Status of an alert evaluation."""
+    TRIGGERED = "triggered"
+    NOT_TRIGGERED = "not_triggered"
+class Operator(str, Enum):
+    """Comparison operators for conditions."""
+    GT = ">"
+    GTE = ">="
+    LT = "<"
+    LTE = "<="
+    EQ = "=="
+    NEQ = "!="
+class Condition(BaseModel):
+    """
+    A single metric condition.
+    Example:
+        {
+            "metric": FaithfulnessScorer(threshold=0.7)  # Must be a scorer object: APIJudgmentScorer, JudgevalScorer, or ScorerWrapper
+            "operator": ">=",
+            "threshold": 0.7
+        }
+    """
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    metric: Union[APIJudgmentScorer, JudgevalScorer, ScorerWrapper]
+    operator: Operator
+    threshold: float
+    @property
+    def metric_name(self) -> str:
+        """Get the name of the metric for lookups in scores dictionary."""
+        if isinstance(self.metric, ScorerWrapper):
+            # Handle ScorerWrapper case specifically
+            return self.metric.scorer.score_type if hasattr(self.metric.scorer, 'score_type') else str(self.metric.scorer)
+        elif hasattr(self.metric, 'score_type'):
+            # Handle APIJudgmentScorer and JudgevalScorer which have score_type
+            return self.metric.score_type
+        elif hasattr(self.metric, '__name__'):
+            # Handle cases where metric has a __name__ attribute
+            return self.metric.__name__
+        # Fallback to string representation
+        return str(self.metric)
+    def evaluate(self, value: float) -> bool:
+        """Evaluate this condition against a value."""
+        if self.operator == Operator.GT:
+            return value > self.threshold
+        elif self.operator == Operator.GTE:
+            return value >= self.threshold
+        elif self.operator == Operator.LT:
+            return value < self.threshold
+        elif self.operator == Operator.LTE:
+            return value <= self.threshold
+        elif self.operator == Operator.EQ:
+            return value == self.threshold
+        elif self.operator == Operator.NEQ:
+            return value != self.threshold
+        else:
+            raise ValueError(f"Unknown operator: {self.operator}")
+class Rule(BaseModel):
+    """
+    Configuration for a single rule.
+    Example:
+        {
+            "rule_id": "123e4567-e89b-12d3-a456-426614174000",
+            "name": "Quality Check",
+            "description": "Check if quality metrics meet thresholds",
+            "conditions": [
+                {"metric": FaithfulnessScorer(threshold=0.7), "operator": ">=", "threshold": 0.7},
+                {"metric": AnswerRelevancyScorer(threshold=0.8), "operator": ">=", "threshold": 0.8}
+            ],
+            "combine_type": "all"  # "all" or "any"
+        }
+    """
+    rule_id: str = Field(default_factory=lambda: str(uuid.uuid4()))  # Random UUID string as default value
+    name: str
+    description: Optional[str] = None
+    conditions: List[Condition]
+    combine_type: str = Field(..., pattern="^(all|any)$")  # all = AND, any = OR
+    def model_dump(self, **kwargs):
+        """
+        Custom serialization that properly handles condition serialization.
+        """
+        data = super().model_dump(**kwargs)
+        # Special handling for conditions with complex metric objects
+        if "conditions" in data:
+            for i, condition in enumerate(data["conditions"]):
+                if "metric" in condition:
+                    # Get the actual metric object
+                    metric_obj = self.conditions[i].metric
+                    # Create standardized metric representation needed by server API
+                    metric_data = {
+                        "score_type": "",
+                        "threshold": 0.0
+                    }
+                    # First try to use object's own serialization methods
+                    if hasattr(metric_obj, "to_dict"):
+                        orig_data = metric_obj.to_dict()
+                        # Copy any existing fields
+                        for key, value in orig_data.items():
+                            metric_data[key] = value
+                    elif hasattr(metric_obj, "model_dump"):
+                        orig_data = metric_obj.model_dump()
+                        # Copy any existing fields
+                        for key, value in orig_data.items():
+                            metric_data[key] = value
+                    # If we already have data from original serialization methods but missing required fields
+                    if 'name' in metric_data and 'score_type' not in metric_data:
+                        metric_data['score_type'] = metric_data['name']
+                    # Ensure required fields have values by checking various sources
+                    if not metric_data['score_type']:
+                        # Try to get score_type from different possible attributes
+                        if hasattr(metric_obj, 'score_type'):
+                            metric_data['score_type'] = metric_obj.score_type
+                        elif hasattr(metric_obj, 'name'):
+                            metric_data['score_type'] = metric_obj.name
+                        else:
+                            # Last resort: use string representation
+                            metric_data['score_type'] = str(metric_obj)
+                    # Make sure threshold is set
+                    if not metric_data.get('threshold') and metric_data.get('threshold') != 0.0:
+                        if hasattr(metric_obj, 'threshold'):
+                            metric_data['threshold'] = metric_obj.threshold
+                        else:
+                            # Use condition threshold if metric doesn't have one
+                            metric_data['threshold'] = self.conditions[i].threshold
+                    # Update the condition with our properly serialized metric
+                    condition["metric"] = metric_data
+        return data
+    @field_validator('conditions')
+    def validate_conditions_not_empty(cls, v):
+        if not v:
+            raise ValueError("Conditions list cannot be empty")
+        return v
+    @field_validator('combine_type')
+    def validate_combine_type(cls, v):
+        if v not in ["all", "any"]:
+            raise ValueError(f"combine_type must be 'all' or 'any', got: {v}")
+        return v
+class AlertResult(BaseModel):
+    """
+    Result of evaluating a rule.
+    Example:
+        {
+            "status": "triggered",
+            "rule_name": "Quality Check",
+            "conditions_result": [
+                {"metric": "faithfulness", "value": 0.6, "threshold": 0.7, "passed": False},
+                {"metric": "relevancy", "value": 0.9, "threshold": 0.8, "passed": True}
+            ],
+            "rule_id": "123e4567-e89b-12d3-a456-426614174000",
+            "metadata": {
+                "example_id": "example_123",
+                "timestamp": "20240321_123456"
+            }
+        }
+    """
+    status: AlertStatus
+    rule_id: Optional[str] = None  # The unique identifier of the rule
+    rule_name: str
+    conditions_result: List[Dict[str, Any]]
+    metadata: Dict[str, Any] = {}
+    @property
+    def example_id(self) -> Optional[str]:
+        """Get example_id from metadata for backward compatibility"""
+        return self.metadata.get("example_id")
+    @property
+    def timestamp(self) -> Optional[str]:
+        """Get timestamp from metadata for backward compatibility"""
+        return self.metadata.get("timestamp")
+class RulesEngine:
+    """
+    Engine for evaluating rules and managing alerts.
+    Example usage:
+        rules = {
+            "quality_check": Rule(
+                name="Quality Check",
+                conditions=[
+                    Condition(metric=FaithfulnessScorer(threshold=0.7), operator=">=", threshold=0.7),
+                    Condition(metric=AnswerRelevancyScorer(threshold=0.8), operator=">=", threshold=0.8)
+                ],
+                combine_type="all"
+            )
+        }
+        engine = RulesEngine(rules)
+        scores = {"faithfulness": 0.8, "relevancy": 0.9}
+        alerts = engine.evaluate_rules(scores, example_metadata={
+            "example_id": "example_123",
+            "timestamp": "20240321_123456"
+        })
+    """
+    def __init__(self, rules: Dict[str, Rule]):
+        """
+        Initialize the RulesEngine with rules.
+        Args:
+            rules: Dictionary mapping rule IDs to rule configurations
+        """
+        self.rules = rules
+    def evaluate_rules(self, scores: Dict[str, float], example_metadata: Optional[Dict[str, Any]] = None) -> Dict[str, AlertResult]:
+        """
+        Evaluate all rules against a set of scores.
+        Returns mapping of rule IDs to their alert results.
+        Args:
+            scores: Dictionary of metric names to their score values
+            example_metadata: Optional dictionary containing example metadata (example_id, timestamp)
+        """
+        results = {}
+        for rule_id, rule in self.rules.items():
+            # Evaluate each condition
+            condition_results = []
+            passed_conditions = []
+            for condition in rule.conditions:
+                # Get the metric name for lookup
+                metric_name = condition.metric_name
+                value = scores.get(metric_name)
+                if value is None:
+                    # Skip this condition instead of evaluating it as false
+                    condition_results.append({
+                        "metric": metric_name,
+                        "value": None,
+                        "threshold": condition.threshold,
+                        "operator": condition.operator,
+                        "passed": None,  # Using None to indicate the condition was skipped
+                        "skipped": True  # Add a flag to indicate this condition was skipped
+                    })
+                    continue  # Skip adding to passed_conditions
+                else:
+                    passed = condition.evaluate(value)
+                    condition_results.append({
+                        "metric": metric_name,
+                        "value": value,
+                        "threshold": condition.threshold,
+                        "operator": condition.operator,
+                        "passed": passed,
+                        "skipped": False  # Indicate this condition was evaluated
+                    })
+                    passed_conditions.append(passed)
+            # Determine if alert should trigger - only consider conditions that weren't skipped
+            if not passed_conditions:
+                # If all conditions were skipped, the rule doesn't trigger
+                triggered = False
+            else:
+                triggered = all(passed_conditions) if rule.combine_type == "all" else any(passed_conditions)
+            # Create alert result with example metadata
+            alert_result = AlertResult(
+                status=AlertStatus.TRIGGERED if triggered else AlertStatus.NOT_TRIGGERED,
+                rule_id=rule.rule_id,  # Include the rule's unique identifier
+                rule_name=rule.name,
+                conditions_result=condition_results
+            )
+            # Add example metadata if provided
+            if example_metadata:
+                if "example_id" in example_metadata:
+                    alert_result.metadata["example_id"] = example_metadata["example_id"]
+                if "timestamp" in example_metadata:
+                    alert_result.metadata["timestamp"] = example_metadata["timestamp"]
+            results[rule_id] = alert_result
+        return results
+    async def evaluate_rules_parallel(self,
+                               example_scores: Dict[str, Dict[str, float]],
+                               example_metadata: Dict[str, Dict[str, Any]],
+                               max_concurrent: int = 100) -> Dict[str, Dict[str, AlertResult]]:
+        """
+        Evaluate all rules against multiple examples in parallel.
+        Args:
+            example_scores: Dictionary mapping example_ids to their score dictionaries
+            example_metadata: Dictionary mapping example_ids to their metadata
+            max_concurrent: Maximum number of concurrent evaluations
+        Returns:
+            Dictionary mapping example_ids to dictionaries of rule_ids and their alert results
+        """
+        # Create semaphore to limit concurrent executions
+        semaphore = asyncio.Semaphore(max_concurrent)
+        results = {}
+        tasks = []
+        # Create a task for each example
+        for example_id, scores in example_scores.items():
+            metadata = example_metadata.get(example_id, {})
+            task = self._evaluate_with_semaphore(
+                semaphore=semaphore,
+                example_id=example_id,
+                scores=scores,
+                metadata=metadata
+            )
+            tasks.append(task)
+        # Run all tasks and collect results
+        example_results = await asyncio.gather(*tasks)
+        # Organize results by example_id
+        for example_id, result in example_results:
+            results[example_id] = result
+        return results
+    async def _evaluate_with_semaphore(self,
+                                semaphore: asyncio.Semaphore,
+                                example_id: str,
+                                scores: Dict[str, float],
+                                metadata: Dict[str, Any]) -> Tuple[str, Dict[str, AlertResult]]:
+        """
+        Helper method to evaluate rules for an example with semaphore control.
+        Args:
+            semaphore: Semaphore to control concurrency
+            example_id: ID of the example being evaluated
+            scores: Dictionary of scores for this example
+            metadata: Metadata for this example
+        Returns:
+            Tuple of (example_id, rule_results)
+        """
+        async with semaphore:
+            # Run the evaluation in a thread pool to avoid blocking the event loop
+            # for CPU-bound operations
+            with ThreadPoolExecutor() as executor:
+                start_time = time.perf_counter()
+                rule_results = await asyncio.get_event_loop().run_in_executor(
+                    executor,
+                    self.evaluate_rules,
+                    scores,
+                    metadata
+                )
+                end_time = time.perf_counter()
+                # Could log performance metrics here if needed
+                # debug(f"Rule evaluation for example {example_id} took {end_time - start_time:.4f} seconds")
+                return (example_id, rule_results)

judgeval/run_evaluation.py CHANGED Viewed

@@ -20,6 +20,7 @@ from judgeval.constants import (
     ROOT_API,
     JUDGMENT_EVAL_API_URL,
     JUDGMENT_EVAL_LOG_API_URL,
+    MAX_CONCURRENT_EVALUATIONS
 )
 from judgeval.common.exceptions import JudgmentAPIError
 from judgeval.evaluation_run import EvaluationRun
@@ -30,6 +31,7 @@ from judgeval.common.logger import (
     error,
     example_logging_context
 )
+from judgeval.rules import RulesEngine, Rule, AlertResult, AlertStatus
 def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
@@ -50,9 +52,11 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
         response = requests.post(
             JUDGMENT_EVAL_API_URL, headers={
             "Content-Type": "application/json",
-            "Authorization": f"Bearer {evaluation_run.judgment_api_key}"
+            "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
+            "X-Organization-Id": evaluation_run.organization_id
         },
-        json=payload)
+        json=payload,
+        verify=False)
         response_data = response.json()
     except Exception as e:
         error(f"Error: {e}")
@@ -140,7 +144,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
     return results
-def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str) -> None:
+def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> None:
     """
     Checks if an evaluation run name already exists for a given project.
@@ -158,13 +162,15 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
             f"{ROOT_API}/eval-run-name-exists/",
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {judgment_api_key}"
+                "Authorization": f"Bearer {judgment_api_key}",
+                "X-Organization-Id": organization_id
             },
             json={
                 "eval_name": eval_name,
                 "project_name": project_name,
                 "judgment_api_key": judgment_api_key,
-            }
+            },
+            verify=False
         )
         if response.status_code == 409:
@@ -199,14 +205,15 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
             JUDGMENT_EVAL_LOG_API_URL,
             headers={
                 "Content-Type": "application/json",
-                "Authorization": f"Bearer {evaluation_run.judgment_api_key}"
+                "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
+                "X-Organization-Id": evaluation_run.organization_id
             },
             json={
                 "results": [result.to_dict() for result in merged_results],
-                "judgment_api_key": evaluation_run.judgment_api_key,
                 "project_name": evaluation_run.project_name,
                 "eval_name": evaluation_run.eval_name,
-            }
+            },
+            verify=False
         )
         if not res.ok:
@@ -226,6 +233,7 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
         raise ValueError(f"Failed to save evaluation results to DB: {str(e)}")
 def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[ScoringResult]:
     """
     Executes an evaluation of `Example`s using one or more `Scorer`s
@@ -243,7 +251,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
             metadata (Optional[Dict[str, Any]]): Additional metadata to include for this evaluation run, e.g. comments, dataset name, purpose, etc.
             judgment_api_key (Optional[str]): The API key for running evaluations on the Judgment API
             log_results (bool): Whether to log the results to the Judgment API
+            rules (Optional[List[Rule]]): Rules to evaluate against scoring results
     Returns:
         List[ScoringResult]: The results of the evaluation. Each result is a dictionary containing the fields of a `ScoringResult` object.
@@ -254,7 +262,8 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
         check_eval_run_name_exists(
             evaluation_run.eval_name,
             evaluation_run.project_name,
-            evaluation_run.judgment_api_key
+            evaluation_run.judgment_api_key,
+            evaluation_run.organization_id
         )
     # Set example IDs if not already set
@@ -312,7 +321,9 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
                 aggregator=evaluation_run.aggregator,
                 metadata=evaluation_run.metadata,
                 judgment_api_key=evaluation_run.judgment_api_key,
-                log_results=evaluation_run.log_results
+                organization_id=evaluation_run.organization_id,
+                log_results=evaluation_run.log_results,
+                rules=evaluation_run.rules
             )
             debug("Sending request to Judgment API")
             response_data: List[Dict] = execute_api_eval(api_evaluation_run)  # Dicts are `ScoringResult` objs
@@ -342,7 +353,6 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
                     ]
                 api_results.append(ScoringResult(**filtered_result))
     # Run local evals
     if local_scorers:  # List[JudgevalScorer]
         info("Starting local evaluation")
@@ -360,12 +370,11 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
                 show_indicator=True,
                 _use_bar_indicator=True,
                 throttle_value=0,
-                max_concurrent=100,
+                max_concurrent=MAX_CONCURRENT_EVALUATIONS,
             )
         )
         local_results = results
         info(f"Local evaluation complete with {len(local_results)} results")
     # Aggregate the ScorerData from the API and local evaluations
     debug("Merging API and local results")
     merged_results: List[ScoringResult] = merge_results(api_results, local_results)
@@ -373,6 +382,15 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
     info(f"Successfully merged {len(merged_results)} results")
+    # Evaluate rules against local scoring results if rules exist (this cant be done just yet)
+    # if evaluation_run.rules and merged_results:
+    #     run_rules(
+    #         local_results=merged_results,
+    #         rules=evaluation_run.rules,
+    #         judgment_api_key=evaluation_run.judgment_api_key,
+    #         organization_id=evaluation_run.organization_id
+    #     )
     if evaluation_run.log_results:
         log_evaluation_results(merged_results, evaluation_run)

judgeval/scorers/api_scorer.py CHANGED Viewed

@@ -34,22 +34,22 @@ class APIJudgmentScorer(BaseModel):
     @field_validator('score_type')
     def convert_to_enum_value(cls, v):
         """
-        Validates that the `score_type` is a valid `JudgmentMetric` enum value.
-        Converts string values to `JudgmentMetric` enum values.
+        Validates that the `score_type` is a valid `APIScorer` enum value.
+        Converts string values to `APIScorer` enum values.
         """
         debug(f"Attempting to convert score_type value: {v}")
         if isinstance(v, APIScorer):
-            info(f"Using existing JudgmentMetric: {v.value}")
-            return v.value
+            info(f"Using existing APIScorer: {v}")
+            return v
         elif isinstance(v, str):
-            debug(f"Converting string value to JudgmentMetric enum: {v}")
-            return APIScorer[v.upper()].value
+            debug(f"Converting string value to APIScorer enum: {v}")
+            return APIScorer[v.upper()]
         error(f"Invalid score_type value: {v}")
         raise ValueError(f"Invalid value for score_type: {v}")
     def __str__(self):
-        return f"JudgmentScorer(score_type={self.score_type}, threshold={self.threshold})"
+        return f"JudgmentScorer(score_type={self.score_type.value}, threshold={self.threshold})"
     def to_dict(self) -> dict:
         """
         Converts the scorer configuration to a dictionary format.
@@ -58,7 +58,6 @@ class APIJudgmentScorer(BaseModel):
             dict: A dictionary containing the scorer's configuration
         """
         return {
-            "score_type": self.score_type,
+            "score_type": str(self.score_type.value),  # Convert enum to string for serialization
             "threshold": self.threshold
-        }
+        }

judgeval/scorers/base_scorer.py CHANGED Viewed

@@ -48,5 +48,5 @@ class APIJudgmentScorer(BaseModel):
         raise ValueError(f"Invalid value for score_type: {v}")
     def __str__(self):
-        return f"JudgmentScorer(score_type={self.score_type}, threshold={self.threshold})"
+        return f"JudgmentScorer(score_type={self.score_type.value}, threshold={self.threshold})"

judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py CHANGED Viewed

@@ -46,7 +46,6 @@ class AnswerRelevancyScorer(JudgevalScorer):
         )
         self.model, self.using_native_model = create_judge(model)
         self.evaluation_model = self.model.get_model_name()
-        print(self.model)
     def score_example(
         self,

judgeval/utils/alerts.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+Handling alerts in Judgeval.
+"""
+from enum import Enum
+from typing import Dict, Any, List, Optional
+from pydantic import BaseModel
+class AlertStatus(str, Enum):
+    """Status of an alert evaluation."""
+    TRIGGERED = "triggered"
+    NOT_TRIGGERED = "not_triggered"
+class AlertResult(BaseModel):
+    """
+    Result of a rule evaluation.
+    Attributes:
+        rule_name: Name of the rule that was evaluated
+        rule_id: Unique identifier of the rule
+        status: Status of the alert (triggered or not)
+        conditions_result: List of condition evaluation results
+        metadata: Dictionary containing example_id, timestamp, and other metadata
+    """
+    rule_name: str
+    rule_id: Optional[str] = None  # The unique identifier of the rule
+    status: AlertStatus
+    conditions_result: List[Dict[str, Any]] = []
+    metadata: Dict[str, Any] = {}
+    @property
+    def example_id(self) -> Optional[str]:
+        """Get example_id from metadata for backward compatibility"""
+        return self.metadata.get("example_id")
+    @property
+    def timestamp(self) -> Optional[str]:
+        """Get timestamp from metadata for backward compatibility"""
+        return self.metadata.get("timestamp")
+    @property
+    def conditions_results(self) -> List[Dict[str, Any]]:
+        """Backwards compatibility property for the conditions_result field"""
+        return self.conditions_result

{judgeval-0.0.13.dist-info → judgeval-0.0.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.0.13
+Version: 0.0.15
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

judgeval 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl

judgeval 0.0.13py3-none-any.whl → 0.0.15py3-none-any.whl