PyPI - judgeval - Versions diffs - 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

judgeval/__init__.py +173 -10
judgeval/api/__init__.py +523 -0
judgeval/api/api_types.py +413 -0
judgeval/cli.py +112 -0
judgeval/constants.py +7 -30
judgeval/data/__init__.py +1 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +14 -40
judgeval/data/judgment_types.py +396 -146
judgeval/data/result.py +11 -18
judgeval/data/scorer_data.py +3 -26
judgeval/data/scripts/openapi_transform.py +5 -5
judgeval/data/trace.py +115 -194
judgeval/dataset/__init__.py +335 -0
judgeval/env.py +55 -0
judgeval/evaluation/__init__.py +346 -0
judgeval/exceptions.py +28 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +51 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +77 -16
judgeval/judges/together_judge.py +88 -17
judgeval/judges/utils.py +7 -20
judgeval/judgment_attribute_keys.py +55 -0
judgeval/{common/logger.py → logger.py} +24 -8
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +11 -11
judgeval/scorers/agent_scorer.py +15 -19
judgeval/scorers/api_scorer.py +21 -23
judgeval/scorers/base_scorer.py +54 -36
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
judgeval/scorers/score.py +64 -47
judgeval/scorers/utils.py +2 -107
judgeval/tracer/__init__.py +1111 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +123 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +392 -0
judgeval/trainer/trainable_model.py +252 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +74 -28
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +5 -3
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/v1/__init__.py +88 -0
judgeval/v1/data/__init__.py +7 -0
judgeval/v1/data/example.py +44 -0
judgeval/v1/data/scorer_data.py +42 -0
judgeval/v1/data/scoring_result.py +44 -0
judgeval/v1/datasets/__init__.py +6 -0
judgeval/v1/datasets/dataset.py +214 -0
judgeval/v1/datasets/dataset_factory.py +94 -0
judgeval/v1/evaluation/__init__.py +6 -0
judgeval/v1/evaluation/evaluation.py +182 -0
judgeval/v1/evaluation/evaluation_factory.py +17 -0
judgeval/v1/instrumentation/__init__.py +6 -0
judgeval/v1/instrumentation/llm/__init__.py +7 -0
judgeval/v1/instrumentation/llm/config.py +78 -0
judgeval/v1/instrumentation/llm/constants.py +11 -0
judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
judgeval/v1/instrumentation/llm/providers.py +19 -0
judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
judgeval/v1/integrations/langgraph/__init__.py +13 -0
judgeval/v1/integrations/openlit/__init__.py +47 -0
judgeval/v1/internal/api/__init__.py +525 -0
judgeval/v1/internal/api/api_types.py +413 -0
judgeval/v1/prompts/__init__.py +6 -0
judgeval/v1/prompts/prompt.py +29 -0
judgeval/v1/prompts/prompt_factory.py +189 -0
judgeval/v1/py.typed +0 -0
judgeval/v1/scorers/__init__.py +6 -0
judgeval/v1/scorers/api_scorer.py +82 -0
judgeval/v1/scorers/base_scorer.py +17 -0
judgeval/v1/scorers/built_in/__init__.py +17 -0
judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
judgeval/v1/scorers/built_in/faithfulness.py +28 -0
judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
judgeval/v1/scorers/scorers_factory.py +49 -0
judgeval/v1/tracer/__init__.py +7 -0
judgeval/v1/tracer/base_tracer.py +520 -0
judgeval/v1/tracer/exporters/__init__.py +14 -0
judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
judgeval/v1/tracer/exporters/span_store.py +50 -0
judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
judgeval/v1/tracer/processors/__init__.py +6 -0
judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
judgeval/v1/tracer/tracer.py +67 -0
judgeval/v1/tracer/tracer_factory.py +38 -0
judgeval/v1/trainers/__init__.py +5 -0
judgeval/v1/trainers/base_trainer.py +62 -0
judgeval/v1/trainers/config.py +123 -0
judgeval/v1/trainers/console.py +144 -0
judgeval/v1/trainers/fireworks_trainer.py +392 -0
judgeval/v1/trainers/trainable_model.py +252 -0
judgeval/v1/trainers/trainers_factory.py +37 -0
judgeval/v1/utils.py +18 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.23.0.dist-info/METADATA +266 -0
judgeval-0.23.0.dist-info/RECORD +201 -0
judgeval-0.23.0.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -34
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -352
judgeval/common/api/constants.py +0 -165
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -98
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -1916
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -234
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -211
judgeval/common/tracer/trace_manager.py +0 -92
judgeval/common/utils.py +0 -940
judgeval/data/datasets/__init__.py +0 -4
judgeval/data/datasets/dataset.py +0 -341
judgeval/data/datasets/eval_dataset_client.py +0 -214
judgeval/data/tool.py +0 -5
judgeval/data/trace_run.py +0 -37
judgeval/evaluation_run.py +0 -75
judgeval/integrations/langgraph.py +0 -843
judgeval/judges/mixture_of_judges.py +0 -286
judgeval/judgment_client.py +0 -369
judgeval/rules.py +0 -521
judgeval/run_evaluation.py +0 -684
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.1.0.dist-info/METADATA +0 -202
judgeval-0.1.0.dist-info/RECORD +0 -73
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py DELETED Viewed

@@ -1,52 +0,0 @@
-"""
-`judgeval` tool correctness scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.constants import APIScorerType
-from typing import Optional, Dict
-from judgeval.data import ExampleParams
-class ExecutionOrderScorer(APIScorerConfig):
-    kwargs: Optional[Dict] = None
-    def __init__(
-        self,
-        threshold: float,
-        should_exact_match: bool = False,
-        should_consider_ordering: bool = False,
-    ):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorerType.EXECUTION_ORDER,
-            required_params=[
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.EXPECTED_OUTPUT,
-            ],
-        )
-        self.kwargs = {
-            "should_exact_match": should_exact_match,
-            "should_consider_ordering": should_consider_ordering,
-        }
-    @property
-    def __name__(self):
-        return "Execution Order"
-    def to_dict(self) -> dict:
-        """
-        Converts the scorer configuration to a dictionary format.
-        Returns:
-            dict: A dictionary containing the scorer's configuration
-        """
-        return {
-            "score_type": self.score_type,
-            "threshold": self.threshold,
-            "kwargs": self.kwargs,
-        }

judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py DELETED Viewed

@@ -1,28 +0,0 @@
-"""
-`judgeval` hallucination scorer
-TODO add link to docs page for this scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.constants import APIScorerType
-from judgeval.data import ExampleParams
-class HallucinationScorer(APIScorerConfig):
-    def __init__(self, threshold: float):
-        super().__init__(
-            threshold=threshold,
-            score_type=APIScorerType.HALLUCINATION,
-            required_params=[
-                ExampleParams.INPUT,
-                ExampleParams.ACTUAL_OUTPUT,
-                ExampleParams.CONTEXT,
-            ],
-        )
-    @property
-    def __name__(self):
-        return "Hallucination"

judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py DELETED Viewed

@@ -1,20 +0,0 @@
-"""
-`judgeval` tool dependency scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.constants import APIScorerType
-from typing import Optional, Dict
-class ToolDependencyScorer(APIScorerConfig):
-    kwargs: Optional[Dict] = None
-    def __init__(self, threshold: float = 1.0, enable_param_checking: bool = True):
-        super().__init__(threshold=threshold, score_type=APIScorerType.TOOL_DEPENDENCY)
-        self.kwargs = {"enable_param_checking": enable_param_checking}
-    @property
-    def __name__(self):
-        return "Tool Dependency"

judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py DELETED Viewed

@@ -1,27 +0,0 @@
-"""
-`judgeval` tool order scorer
-"""
-# Internal imports
-from judgeval.scorers.api_scorer import APIScorerConfig
-from judgeval.constants import APIScorerType
-from typing import Dict, Any
-class ToolOrderScorer(APIScorerConfig):
-    score_type: APIScorerType = APIScorerType.TOOL_ORDER
-    threshold: float = 1.0
-    exact_match: bool = False
-    def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
-        base = super().model_dump(*args, **kwargs)
-        base_fields = set(APIScorerConfig.model_fields.keys())
-        all_fields = set(self.__class__.model_fields.keys())
-        extra_fields = all_fields - base_fields - {"kwargs"}
-        base["kwargs"] = {
-            k: getattr(self, k) for k in extra_fields if getattr(self, k) is not None
-        }
-        return base

judgeval/utils/alerts.py DELETED Viewed

@@ -1,93 +0,0 @@
-"""
-Handling alerts in Judgeval.
-"""
-from enum import Enum
-from typing import Dict, Any, List, Optional
-from pydantic import BaseModel
-class AlertStatus(str, Enum):
-    """Status of an alert evaluation."""
-    TRIGGERED = "triggered"
-    NOT_TRIGGERED = "not_triggered"
-class AlertResult(BaseModel):
-    """
-    Result of a rule evaluation.
-    Attributes:
-        rule_name: Name of the rule that was evaluated
-        rule_id: Unique identifier of the rule
-        status: Status of the alert (triggered or not)
-        conditions_result: List of condition evaluation results
-        metadata: Dictionary containing example_id, timestamp, and other metadata
-        notification: Optional notification configuration for triggered alerts
-        combine_type: The combination type used ("all" or "any")
-        project_id: Optional project identifier
-        trace_span_id: Optional trace span identifier
-    """
-    rule_name: str
-    rule_id: Optional[str] = None  # The unique identifier of the rule
-    status: AlertStatus
-    conditions_result: List[Dict[str, Any]] = []
-    metadata: Dict[str, Any] = {}
-    notification: Optional[Any] = (
-        None  # NotificationConfig when triggered, None otherwise
-    )
-    combine_type: Optional[str] = None  # "all" or "any"
-    project_id: Optional[str] = None  # Project identifier
-    trace_span_id: Optional[str] = None  # Trace span identifier
-    @property
-    def example_id(self) -> Optional[str]:
-        """Get example_id from metadata for backward compatibility"""
-        return self.metadata.get("example_id")
-    @property
-    def timestamp(self) -> Optional[str]:
-        """Get timestamp from metadata for backward compatibility"""
-        return self.metadata.get("timestamp")
-    @property
-    def conditions_results(self) -> List[Dict[str, Any]]:
-        """Backwards compatibility property for the conditions_result field"""
-        return self.conditions_result
-    def model_dump(self, **kwargs):
-        """
-        Convert the AlertResult to a dictionary for JSON serialization.
-        Args:
-            **kwargs: Additional arguments to pass to Pydantic's model_dump
-        Returns:
-            dict: Dictionary representation of the AlertResult
-        """
-        data = (
-            super().model_dump(**kwargs)
-            if hasattr(super(), "model_dump")
-            else super().dict(**kwargs)
-        )
-        # Handle the NotificationConfig object if it exists
-        if hasattr(self, "notification") and self.notification is not None:
-            if hasattr(self.notification, "model_dump"):
-                data["notification"] = self.notification.model_dump()
-            elif hasattr(self.notification, "dict"):
-                data["notification"] = self.notification.dict()
-            else:
-                # Manually convert the notification to a dictionary
-                notif = self.notification
-                data["notification"] = {
-                    "enabled": notif.enabled,
-                    "communication_methods": notif.communication_methods,
-                    "email_addresses": notif.email_addresses,
-                    "slack_channels": getattr(notif, "slack_channels", []),
-                    "send_at": notif.send_at,
-                }
-        return data

judgeval/utils/requests.py DELETED Viewed

@@ -1,50 +0,0 @@
-import requests as requests_original
-from requests.adapters import HTTPAdapter
-from urllib3.util.retry import Retry
-from http import HTTPStatus
-class RetrySession(requests_original.Session):
-    def __init__(
-        self,
-        retries=3,
-        backoff_factor=0.5,
-        status_forcelist=[HTTPStatus.BAD_GATEWAY, HTTPStatus.SERVICE_UNAVAILABLE],
-        default_timeout=(10, 60),  # (connect_timeout, read_timeout)
-    ):
-        super().__init__()
-        # Store default timeout
-        self.default_timeout = default_timeout
-        retry_strategy = Retry(
-            total=retries,
-            read=retries,
-            connect=retries,
-            backoff_factor=backoff_factor,
-            status_forcelist=status_forcelist,
-        )
-        adapter = HTTPAdapter(max_retries=retry_strategy)
-        self.mount("http://", adapter)
-        self.mount("https://", adapter)
-    def request(self, method, url, timeout=None, **kwargs):
-        """
-        Override request method to add default timeout if not specified.
-        Args:
-            method: HTTP method
-            url: Request URL
-            timeout: Timeout value. If None, uses default_timeout.
-                    Can be a float (total timeout) or tuple (connect, read).
-            **kwargs: Other request arguments
-        """
-        # Use default timeout if none specified
-        if timeout is None:
-            timeout = self.default_timeout
-        return super().request(method, url, timeout=timeout, **kwargs)
-requests = RetrySession()

judgeval-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,202 +0,0 @@
-Metadata-Version: 2.4
-Name: judgeval
-Version: 0.1.0
-Summary: Judgeval Package
-Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
-Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
-Author-email: Andrew Li <andrew@judgmentlabs.ai>, Alex Shan <alex@judgmentlabs.ai>, Joseph Camyre <joseph@judgmentlabs.ai>
-License-Expression: Apache-2.0
-License-File: LICENSE.md
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
-Requires-Python: >=3.11
-Requires-Dist: anthropic
-Requires-Dist: boto3
-Requires-Dist: datamodel-code-generator>=0.31.1
-Requires-Dist: google-genai
-Requires-Dist: langchain-anthropic
-Requires-Dist: langchain-core
-Requires-Dist: langchain-huggingface
-Requires-Dist: langchain-openai
-Requires-Dist: litellm>=1.61.15
-Requires-Dist: matplotlib>=3.10.3
-Requires-Dist: nest-asyncio
-Requires-Dist: openai
-Requires-Dist: pandas
-Requires-Dist: python-dotenv==1.0.1
-Requires-Dist: python-slugify>=8.0.4
-Requires-Dist: requests
-Requires-Dist: together
-Description-Content-Type: text/markdown
-<div align="center">
-<img src="assets/new_lightmode.svg#gh-light-mode-only" alt="Judgment Logo" width="400" />
-<img src="assets/new_darkmode.svg#gh-dark-mode-only" alt="Judgment Logo" width="400" />
-<br>
-<div style="font-size: 1.5em;">
-    Enable self-learning agents with traces, evals, and environment data.
-</div>
-## [Docs](https://docs.judgmentlabs.ai/)  •  [Judgment Cloud](https://app.judgmentlabs.ai/register)  • [Self-Host](https://docs.judgmentlabs.ai/documentation/self-hosting/get-started)
- [Demo](https://www.youtube.com/watch?v=1S4LixpVbcc) • [Bug Reports](https://github.com/JudgmentLabs/judgeval/issues) • [Changelog](https://docs.judgmentlabs.ai/changelog/2025-04-21)
-We're hiring! Join us in our mission to enable self-learning agents by providing the data and signals needed for monitoring and post-training.
-[![X](https://img.shields.io/badge/-X/Twitter-000?logo=x&logoColor=white)](https://x.com/JudgmentLabs)
-[![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn%20-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/judgmentlabs)
-[![Discord](https://img.shields.io/badge/-Discord-5865F2?logo=discord&logoColor=white)](https://discord.gg/tGVFf8UBUY)
-<img src="assets/product_shot.png" alt="Judgment Platform" width="800" />
-</div>
-Judgeval offers **open-source tooling** for tracing and evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
-## 🎬 See Judgeval in Action
-**[Multi-Agent System](https://github.com/JudgmentLabs/judgment-cookbook/tree/main/cookbooks/agents/multi-agent) with complete observability:** (1) A multi-agent system spawns agents to research topics on the internet. (2) With just **3 lines of code**, Judgeval traces every input/output + environment response across all agent tool calls for debugging. (3) After completion, (4) export all interaction data to enable further environment-specific learning and optimization.
-<table style="width: 100%; max-width: 800px; table-layout: fixed;">
-<tr>
-<td align="center" style="padding: 8px; width: 50%;">
-  <img src="assets/agent.gif" alt="Agent Demo" style="width: 100%; max-width: 350px; height: auto;" />
-  <br><strong>🤖 Agents Running</strong>
-</td>
-<td align="center" style="padding: 8px; width: 50%;">
-  <img src="assets/trace.gif" alt="Trace Demo" style="width: 100%; max-width: 350px; height: auto;" />
-  <br><strong>📊 Real-time Tracing</strong>
-</td>
-</tr>
-<tr>
-<td align="center" style="padding: 8px; width: 50%;">
-  <img src="assets/document.gif" alt="Agent Completed Demo" style="width: 100%; max-width: 350px; height: auto;" />
-  <br><strong>✅ Agents Completed Running</strong>
-</td>
-<td align="center" style="padding: 8px; width: 50%;">
-  <img src="assets/data.gif" alt="Data Export Demo" style="width: 100%; max-width: 350px; height: auto;" />
-  <br><strong>📤 Exporting Agent Environment Data</strong>
-</td>
-</tr>
-</table>
-## 📋 Table of Contents
-- [🛠️ Installation](#️-installation)
-- [🏁 Quickstarts](#-quickstarts)
-- [✨ Features](#-features)
-- [🏢 Self-Hosting](#-self-hosting)
-- [📚 Cookbooks](#-cookbooks)
-- [💻 Development with Cursor](#-development-with-cursor)
-## 🛠️ Installation
-Get started with Judgeval by installing our SDK using pip:
-```bash
-pip install judgeval
-```
-Ensure you have your `JUDGMENT_API_KEY` and `JUDGMENT_ORG_ID` environment variables set to connect to the [Judgment Platform](https://app.judgmentlabs.ai/).
-```bash
-export JUDGMENT_API_KEY=...
-export JUDGMENT_ORG_ID=...
-```
-**If you don't have keys, [create an account](https://app.judgmentlabs.ai/register) on the platform!**
-## 🏁 Quickstarts
-### 🛰️ Tracing
-Create a file named `agent.py` with the following code:
-```python
-from judgeval.tracer import Tracer, wrap
-from openai import OpenAI
-client = wrap(OpenAI())  # tracks all LLM calls
-judgment = Tracer(project_name="my_project")
-@judgment.observe(span_type="tool")
-def format_question(question: str) -> str:
-    # dummy tool
-    return f"Question : {question}"
-@judgment.observe(span_type="function")
-def run_agent(prompt: str) -> str:
-    task = format_question(prompt)
-    response = client.chat.completions.create(
-        model="gpt-4.1",
-        messages=[{"role": "user", "content": task}]
-    )
-    return response.choices[0].message.content
-run_agent("What is the capital of the United States?")
-```
-You'll see your trace exported to the Judgment Platform:
-<p align="center"><img src="assets/trace_demo.png" alt="Judgment Platform Trace Example" width="800" /></p>
-[Click here](https://docs.judgmentlabs.ai/documentation/tracing/introduction) for a more detailed explanation.
-<!-- Created by https://github.com/ekalinin/github-markdown-toc -->
-## ✨ Features
-|  |  |
-|:---|:---:|
-| <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic). **Tracks inputs/outputs, agent tool calls, latency, cost, and custom metadata** at every step.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 📋 Collecting agent environment data <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
-| <h3>🧪 Evals</h3>Build custom evaluators on top of your agents. Judgeval supports LLM-as-a-judge, manual labeling, and code-based evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 A/B testing <br>• 🛡️ Online guardrails | <p align="center"><img src="assets/experiments_page.png" alt="Evaluation metrics" width="800"/></p> |
-| <h3>📡 Monitoring</h3>Get Slack alerts for agent failures in production. Add custom hooks to address production regressions.<br><br> **Useful for:** <br>• 📉 Identifying degradation early <br>• 📈 Visualizing performance trends across agent versions and time | <p align="center"><img src="assets/error_analysis_dashboard.png" alt="Monitoring Dashboard" width="1200"/></p> |
-| <h3>📊 Datasets</h3>Export traces and test cases to datasets for scaled analysis and optimization. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations, enabling continuous learning from production interactions. <br><br> **Useful for:**<br>• 🗃️ Agent environment interaction data for optimization<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
-## 🏢 Self-Hosting
-Run Judgment on your own infrastructure: we provide comprehensive self-hosting capabilities that give you full control over the backend and data plane that Judgeval interfaces with.
-### Key Features
-* Deploy Judgment on your own AWS account
-* Store data in your own Supabase instance
-* Access Judgment through your own custom domain
-### Getting Started
-1. Check out our [self-hosting documentation](https://docs.judgmentlabs.ai/documentation/self-hosting/get-started) for detailed setup instructions, along with how your self-hosted instance can be accessed
-2. Use the [Judgment CLI](https://docs.judgmentlabs.ai/documentation/developer-tools/judgment-cli/installation) to deploy your self-hosted environment
-3. After your self-hosted instance is setup, make sure the `JUDGMENT_API_URL` environmental variable is set to your self-hosted backend endpoint
-## 📚 Cookbooks
-Have your own? We're happy to feature it if you create a PR or message us on [Discord](https://discord.gg/tGVFf8UBUY).
-You can access our repo of cookbooks [here](https://github.com/JudgmentLabs/judgment-cookbook).
-## 💻 Development with Cursor
-Building agents and LLM workflows in Cursor works best when your coding assistant has the proper context about Judgment integration. The Cursor rules file contains the key information needed for your assistant to implement Judgment features effectively.
-Refer to the official [documentation](https://docs.judgmentlabs.ai/documentation/developer-tools/cursor/cursor-rules) for access to the rules file and more information on integrating this rules file with your codebase.
-## ⭐ Star Us on GitHub
-If you find Judgeval useful, please consider giving us a star on GitHub! Your support helps us grow our community and continue improving the repository.
-## ❤️ Contributors
-There are many ways to contribute to Judgeval:
-- Submit [bug reports](https://github.com/JudgmentLabs/judgeval/issues) and [feature requests](https://github.com/JudgmentLabs/judgeval/issues)
-- Review the documentation and submit [Pull Requests](https://github.com/JudgmentLabs/judgeval/pulls) to improve it
-- Speaking or writing about Judgment and letting us know!
-<!-- Contributors collage -->
-[![Contributors](https://contributors-img.web.app/image?repo=JudgmentLabs/judgeval)](https://github.com/JudgmentLabs/judgeval/graphs/contributors)
----
-Judgeval is created and maintained by [Judgment Labs](https://judgmentlabs.ai/).

judgeval-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,73 +0,0 @@
-judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
-judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
-judgeval/constants.py,sha256=rfl4gW9_4irxgamjTC-jvDj2ATSUrjEu0UAHZ4pLLtY,4081
-judgeval/evaluation_run.py,sha256=PZeoKS7JCsO2gzdo8jeq8786yn01Ccrq0xuCNUu9CPo,2797
-judgeval/judgment_client.py,sha256=tUgKS2sV8QZUxjdh3mP2PSBnC7Bci1e8ur8muvrgzBM,14011
-judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
-judgeval/run_evaluation.py,sha256=U-aZyhSryjqzJl5jInc91uY8jIyiY596S6JJO3fH6AI,26105
-judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
-judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
-judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
-judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,1511
-judgeval/common/utils.py,sha256=GhCEv8i_7JK4DJeUlMmibqEUy9ZVHxJAlFO_CriAzg4,34323
-judgeval/common/api/__init__.py,sha256=-E7lpZz1fG8puR_aYUMfPmQ-Vyhd0bgzoaU5EhIuFjQ,114
-judgeval/common/api/api.py,sha256=BGtAGGRDqxs8DrA0ye8BPZ6KBsgJ2C0Dca4vvA55d6g,13049
-judgeval/common/api/constants.py,sha256=azA0eyz4q33SWS795NHhaKDKNmVHBWAAGe5_sk37nDU,4297
-judgeval/common/storage/__init__.py,sha256=a-PI7OL-ydyzugGUKmJKRBASnK-Q-gs82L9K9rSyJP8,90
-judgeval/common/storage/s3_storage.py,sha256=UvAKGSa0S1BnNprzDKHMAfyT-8zlMAOM5kCrXcVN0HE,3743
-judgeval/common/tracer/__init__.py,sha256=tJCJsmVmrL89Phv88gNCJ-j0ITPez6lh8vhMAAlLNSc,795
-judgeval/common/tracer/constants.py,sha256=yu5y8gMe5yb1AaBkPtAH-BNwIaAR3NwYCRoSf45wp5U,621
-judgeval/common/tracer/core.py,sha256=Ij-KDD3dVXHK_6NPk-VbTH_Mo8GZq5h4Zl5ii5QMjnE,72403
-judgeval/common/tracer/otel_exporter.py,sha256=kZLlOQ6afQE4dmb9H1wgU4P3H5PG1D_zKyvnpWcT5Ak,3899
-judgeval/common/tracer/otel_span_processor.py,sha256=3cMETvrNlwrTkS_XDdTNRhjVw_6TdgnojpQhDK9sbOs,7484
-judgeval/common/tracer/span_processor.py,sha256=eFjTgSWSkM6BWE94CrvgafDg_WkxLsFL_MafwBG-p9M,1145
-judgeval/common/tracer/span_transformer.py,sha256=YIHEmr35o6_uX931JbD1PFIcLIWTVumWrJ198Ys391k,7544
-judgeval/common/tracer/trace_manager.py,sha256=7KLWBrz5GE_138DHL_eRjhx4-LNfXKz1q_XIDfg6nw8,2992
-judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
-judgeval/data/example.py,sha256=6xtPTwWUsZ0HdErU-g954nCv64fsbnS1I5xuEvs14EA,2027
-judgeval/data/judgment_types.py,sha256=s1oea01AEBQBdqQntXhTbMiuDGAxvs2iGoxrR2uLnUw,9538
-judgeval/data/result.py,sha256=hHKiMMEl9Qv3EvK5UH8Y5YDu8VyvrHzNqlKatrq4UUY,2450
-judgeval/data/scorer_data.py,sha256=5QBHtvOIWOq0Rn9_uPJzAMRYMlWxMB-rXnG_6kV4Z4Y,2955
-judgeval/data/tool.py,sha256=iWQSdy5uNbIeACu3gQy1DC2oGYxRVYNfkkczWdQMAiA,99
-judgeval/data/trace.py,sha256=_cyCsyg2gwG7lyyv186xo4OvGH2QlJDuyIg-qh-CZNA,6994
-judgeval/data/trace_run.py,sha256=c6pRSv09Vj016hxM49I3kMftCwWg8hhkfT_1kBXluSI,1600
-judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
-judgeval/data/datasets/dataset.py,sha256=dDmTYSBRj4YEUhgYOebAcDm4N14nj3tcCqHj9y2Z1z0,12725
-judgeval/data/datasets/eval_dataset_client.py,sha256=8tiuwRC3oebc19KY-5b99Cxj0qq6ADW1NMDd1R1RhLc,7258
-judgeval/data/scripts/fix_default_factory.py,sha256=lvp2JwYZqz-XpD9LZNa3mANZVP-jJSZoNzolI6JWERM,591
-judgeval/data/scripts/openapi_transform.py,sha256=Rye-fErFtENAq3KKBKRUVR_oJdjYZtNzKRBKFkYS0XQ,3857
-judgeval/integrations/langgraph.py,sha256=kJXLsgBY7DgsUTZyVQ47deDgHm887brFHfyIbuyerGw,29986
-judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
-judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
-judgeval/judges/litellm_judge.py,sha256=LX4_KXb1Jp8IXif3vvOiKfRYH7ZkbQLs9AtWPGmj544,2483
-judgeval/judges/mixture_of_judges.py,sha256=wcHwLi9zU0uwKMqRVhcPdjiYKgWflX4dpUbU2kS9yg0,14825
-judgeval/judges/together_judge.py,sha256=r5k8ZcC6lnsFttGHhrocFtmglx2Cb3G-4ORKAeK-Nmw,2253
-judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
-judgeval/scorers/__init__.py,sha256=4H_cinTQ4EogZv59YEV-3U9EOTLppNwgAPTi1-jI9Fw,746
-judgeval/scorers/agent_scorer.py,sha256=TjwD_YglSywr3EowEojiCyg5qDgCRa5LRGc5nFdmIBc,703
-judgeval/scorers/api_scorer.py,sha256=xlhqkeMUBFxl8daSXOTWOYwZjBAz7o6b4sVD5f8cIHw,2523
-judgeval/scorers/base_scorer.py,sha256=eDfQk8N8TQfM1ayJDWr0NTdSQxcbk9-VZHd0Igb9EbI,2878
-judgeval/scorers/example_scorer.py,sha256=2n45y3LMV1Q-ARyXLHqvVWETlnY1DqS7OLzPu9IBGz8,716
-judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
-judgeval/scorers/score.py,sha256=t9prkpDapcOAyuOXtDHMmwrqVGW0C_Hvx1UIEGyafmI,6610
-judgeval/scorers/utils.py,sha256=WM7mTCQSa2Z_rJ-0Iv9dhuBmtkTfV0pFN7XEhxHdzsM,3959
-judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=GX4KkwPR2p-c0Y5mZingJa8EUfjAbMGhrmRBDBunOGw,1484
-judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=zJsU0VrUmRhY9qav48c6jTyDqUwI3JzhV9ajtlJCe0M,544
-judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=UDfzTO9Fx0FA5o0wfD8kprrGA4eW-43Rn9Gc0BQtKgY,393
-judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=mbBvirNcivu9dP6deM7FogDXrdwI9o8yqsO8IeKPSb4,309
-judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=NABO_iBdkOo3fdPVcoWfUkeN-FTX3t3-bErMjdqBXdk,1361
-judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ps51bTgQsD9xGYsk1v9bx0WxQMqywSllCE9_xlJkLd8,531
-judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=SnFLvU4FGsMeUVUp0SGHSy_6wgfwr_vHPGnZx5YJl_Q,691
-judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=aQzu-TiGqG74JDQ927evv5yGmnZw2AOolyHvlIhiUbI,683
-judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=TS3uZ6YQfMs2yGCwzlz-yxZ3Rid79MGxEQESZkSX_Vo,7038
-judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=Mcp1CjMNyOax9UkvoRdSyUYdO2Os1-Nko43y89m2Luo,594
-judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=Z2FLGBC7m_CLx-CMgXVuTvYvN0vY5yOcWA0ImBkeBfY,787
-judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
-judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
-judgeval/utils/file_utils.py,sha256=wIEn8kjM0WrP216RGU_yhZhFOMWIS5ckigyHbzFSOMk,1774
-judgeval/utils/requests.py,sha256=K3gUKrwL6TvwYKVYO5OeLWdUHn9NiUPmnIXhZEiEaHU,1534
-judgeval-0.1.0.dist-info/METADATA,sha256=B1v_50ikBR0fiojJY97deNf_VvEZn8fQq9qrxBi38ig,10188
-judgeval-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.1.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.1.0.dist-info/RECORD,,

{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl