PyPI - judgeval - Versions diffs - 0.16.6__tar.gz → 0.16.8__tar.gz - Mend

judgeval 0.16.6tar.gz → 0.16.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (178) hide show

{judgeval-0.16.6 → judgeval-0.16.8}/.github/workflows/ci.yaml RENAMED Viewed

@@ -28,6 +28,8 @@ jobs:
       PYTHONPATH: "."
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
+      GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       JUDGMENT_DEV: true
     steps:
@@ -49,7 +51,7 @@ jobs:
           cd src
           export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
           export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
-          uv run pytest tests
+          uv run pytest tests -n auto
   run-e2e-tests:
     needs: [validate-branch]

{judgeval-0.16.6 → judgeval-0.16.8}/.pre-commit-config.yaml RENAMED Viewed

@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/uv-pre-commit
-    rev: 0.8.23
+    rev: 0.9.2
     hooks:
       - id: uv-lock
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.13.3
+    rev: v0.14.0
     hooks:
       - id: ruff
         name: ruff (linter)

{judgeval-0.16.6 → judgeval-0.16.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.16.6
+Version: 0.16.8
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.16.6 → judgeval-0.16.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "judgeval"
-version = "0.16.6"
+version = "0.16.8"
 authors = [
     { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
     { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -19,7 +19,7 @@ license-files = ["LICENSE.md"]
 dependencies = [
     "dotenv",
     "httpx>=0.28.1",
-    "litellm>=1.75.0",
+    "litellm>=1.75.0",
     "opentelemetry-exporter-otlp>=1.36.0",
     "opentelemetry-sdk>=1.36.0",
     "orjson>=3.9.0",

{judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/api/api_types.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-10-09T00:16:42+00:00
+#   timestamp: 2025-10-15T19:25:00+00:00
 from __future__ import annotations
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -94,7 +94,6 @@ class ResolveProjectNameRequest(TypedDict):
 class ResolveProjectNameResponse(TypedDict):
     project_id: str
-    project_created: bool
 class TraceIdRequest(TypedDict):

{judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/data/judgment_types.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-10-09T00:16:41+00:00
+#   timestamp: 2025-10-15T19:24:59+00:00
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -101,7 +101,6 @@ class ResolveProjectNameRequest(BaseModel):
 class ResolveProjectNameResponse(BaseModel):
     project_id: Annotated[str, Field(title="Project Id")]
-    project_created: Annotated[bool, Field(title="Project Created")]
 class TraceIdRequest(BaseModel):

{judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/__init__.py RENAMED Viewed

@@ -66,7 +66,6 @@ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
 from judgeval.api import JudgmentSyncClient
 from judgeval.tracer.llm import wrap_provider
 from judgeval.utils.url import url_for
-from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
 from judgeval.tracer.processors import (
     JudgmentSpanProcessor,
     NoOpJudgmentSpanProcessor,
@@ -99,7 +98,6 @@ class Tracer(metaclass=SingletonMeta):
         "enable_evaluation",
         "resource_attributes",
         "api_client",
-        "local_eval_queue",
         "judgment_processor",
         "tracer",
         "agent_context",
@@ -113,7 +111,6 @@ class Tracer(metaclass=SingletonMeta):
     enable_evaluation: bool
     resource_attributes: Optional[Dict[str, Any]]
     api_client: JudgmentSyncClient
-    local_eval_queue: LocalEvaluationQueue
     judgment_processor: JudgmentSpanProcessor
     tracer: ABCTracer
     agent_context: ContextVar[Optional[AgentContext]]
@@ -148,7 +145,6 @@ class Tracer(metaclass=SingletonMeta):
                 api_key=self.api_key,
                 organization_id=self.organization_id,
             )
-            self.local_eval_queue = LocalEvaluationQueue()
             if initialize:
                 self.initialize()
@@ -159,14 +155,10 @@ class Tracer(metaclass=SingletonMeta):
         self.judgment_processor = NoOpJudgmentSpanProcessor()
         if self.enable_monitoring:
-            project_id, project_created = Tracer._resolve_project_id(
+            project_id = Tracer._resolve_project_id(
                 self.project_name, self.api_key, self.organization_id
-            ) or (None, False)
+            )
             if project_id:
-                if project_created:
-                    judgeval_logger.info(
-                        f"Project {self.project_name} was autocreated successfully."
-                    )
                 self.judgment_processor = self.get_processor(
                     tracer=self,
                     project_name=self.project_name,
@@ -190,9 +182,6 @@ class Tracer(metaclass=SingletonMeta):
             get_version(),
         )
-        if self.enable_evaluation and self.enable_monitoring:
-            self.local_eval_queue.start_workers()
         self._initialized = True
         atexit.register(self._atexit_flush)
         return self
@@ -240,14 +229,14 @@ class Tracer(metaclass=SingletonMeta):
     @staticmethod
     def _resolve_project_id(
         project_name: str, api_key: str, organization_id: str
-    ) -> Tuple[str, bool]:
+    ) -> str:
         """Resolve project_id from project_name using the API."""
         client = JudgmentSyncClient(
             api_key=api_key,
             organization_id=organization_id,
         )
         response = client.projects_resolve({"project_name": project_name})
-        return response["project_id"], response["project_created"]
+        return response["project_id"]
     def get_current_span(self):
         return get_current_span()
@@ -299,6 +288,7 @@ class Tracer(metaclass=SingletonMeta):
         )
         current_agent_context["is_agent_entry_point"] = False
+    @dont_throw
     def record_instance_state(self, record_point: Literal["before", "after"], span):
         current_agent_context = self.agent_context.get()
@@ -955,45 +945,10 @@ class Tracer(metaclass=SingletonMeta):
                 eval_run.model_dump(warnings=False)  # type: ignore
             )
         else:
-            # Enqueue the evaluation run to the local evaluation queue
-            self.local_eval_queue.enqueue(eval_run)
-    def wait_for_completion(self, timeout: Optional[float] = 30.0) -> bool:
-        """Wait for all evaluations and span processing to complete.
-        This method blocks until all queued evaluations are processed and
-        all pending spans are flushed to the server.
-        Args:
-            timeout: Maximum time to wait in seconds. Defaults to 30 seconds.
-                    None means wait indefinitely.
-        Returns:
-            True if all processing completed within the timeout, False otherwise.
-        """
-        try:
-            judgeval_logger.debug(
-                "Waiting for all evaluations and spans to complete..."
+            judgeval_logger.warning(
+                "The scorer provided is not hosted, skipping evaluation."
             )
-            # Wait for all queued evaluation work to complete
-            eval_completed = self.local_eval_queue.wait_for_completion()
-            if not eval_completed:
-                judgeval_logger.warning(
-                    f"Local evaluation queue did not complete within {timeout} seconds"
-                )
-                return False
-            self.force_flush()
-            judgeval_logger.debug("All evaluations and spans completed successfully")
-            return True
-        except Exception as e:
-            judgeval_logger.warning(f"Error while waiting for completion: {e}")
-            return False
 def wrap(client: ApiClient) -> ApiClient:
     try:

judgeval-0.16.8/src/judgeval/tracer/llm/config.py ADDED Viewed

@@ -0,0 +1,78 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from judgeval.logger import judgeval_logger
+from judgeval.tracer.llm.constants import ProviderType
+from judgeval.tracer.llm.providers import (
+    HAS_OPENAI,
+    HAS_TOGETHER,
+    HAS_ANTHROPIC,
+    HAS_GOOGLE_GENAI,
+    ApiClient,
+)
+if TYPE_CHECKING:
+    from judgeval.tracer import Tracer
+def _detect_provider(client: ApiClient) -> ProviderType:
+    if HAS_OPENAI:
+        from openai import OpenAI, AsyncOpenAI
+        if isinstance(client, (OpenAI, AsyncOpenAI)):
+            return ProviderType.OPENAI
+    if HAS_ANTHROPIC:
+        from anthropic import Anthropic, AsyncAnthropic
+        if isinstance(client, (Anthropic, AsyncAnthropic)):
+            return ProviderType.ANTHROPIC
+    if HAS_TOGETHER:
+        from together import Together, AsyncTogether  # type: ignore[import-untyped]
+        if isinstance(client, (Together, AsyncTogether)):
+            return ProviderType.TOGETHER
+    if HAS_GOOGLE_GENAI:
+        from google.genai import Client as GoogleClient
+        if isinstance(client, GoogleClient):
+            return ProviderType.GOOGLE
+    judgeval_logger.warning(
+        f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
+        "If this is a mistake or you think we should support this client, please file an issue at https://github.com/JudgmentLabs/judgeval/issues!"
+    )
+    return ProviderType.DEFAULT
+def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
+    """
+    Wraps an API client to add tracing capabilities.
+    Supports OpenAI, Together, Anthropic, and Google GenAI clients.
+    """
+    provider_type = _detect_provider(client)
+    if provider_type == ProviderType.OPENAI:
+        from .llm_openai.wrapper import wrap_openai_client
+        return wrap_openai_client(tracer, client)
+    elif provider_type == ProviderType.ANTHROPIC:
+        from .llm_anthropic.wrapper import wrap_anthropic_client
+        return wrap_anthropic_client(tracer, client)
+    elif provider_type == ProviderType.TOGETHER:
+        from .llm_together.wrapper import wrap_together_client
+        return wrap_together_client(tracer, client)
+    elif provider_type == ProviderType.GOOGLE:
+        from .llm_google.wrapper import wrap_google_client
+        return wrap_google_client(tracer, client)
+    else:
+        # Default to OpenAI-compatible wrapping for unknown clients
+        from .llm_openai.wrapper import wrap_openai_client
+        return wrap_openai_client(tracer, client)

{judgeval-0.16.6 → judgeval-0.16.8}/src/judgeval/tracer/llm/constants.py RENAMED Viewed

@@ -6,5 +6,4 @@ class ProviderType(Enum):
     ANTHROPIC = "anthropic"
     TOGETHER = "together"
     GOOGLE = "google"
-    GROQ = "groq"
     DEFAULT = "default"

judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/config.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+import importlib.util
+HAS_ANTHROPIC = importlib.util.find_spec("anthropic") is not None
+__all__ = ["HAS_ANTHROPIC"]

judgeval 0.16.6__tar.gz → 0.16.8__tar.gz

Potentially problematic release.

judgeval 0.16.6tar.gz → 0.16.8tar.gz