PyPI - judgeval - Versions diffs - 0.9.3__tar.gz → 0.9.4__tar.gz - Mend

judgeval 0.9.3tar.gz → 0.9.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{judgeval-0.9.3 → judgeval-0.9.4}/.github/workflows/ci.yaml RENAMED Viewed

@@ -18,7 +18,10 @@ jobs:
       matrix:
         os: [ubuntu-latest, macos-latest]
         python-version:
+          - "3.10"
           - "3.11"
+          - "3.12"
+          - "3.13"
     name: Unit Tests
     runs-on: ${{ matrix.os }}
     env:
@@ -49,18 +52,19 @@ jobs:
   run-e2e-tests-staging:
     needs: [validate-branch]
     if: "github.base_ref == 'staging' && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.10"
+          - "3.11"
+          - "3.12"
+          - "3.13"
     name: Staging E2E Tests
     runs-on: ubuntu-latest
     env:
       TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
     steps:
-      - name: Wait for turn
-        uses: softprops/turnstyle@v2
-        with:
-          poll-interval-seconds: 10
-          same-branch-only: false
-          job-to-wait-for: "Staging E2E Tests"
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v4
         with:
@@ -74,7 +78,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
+          python-version: ${{ matrix.python-version }}
       - name: Install judgeval dependencies
         run: |
@@ -95,30 +99,31 @@ jobs:
         run: |
           SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-stg-judgeval/api-keys/judgeval --query SecretString --output text)
           export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
-          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
+          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
       - name: Upload coverage HTML report (staging)
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: coverage-html-staging
+          name: coverage-html-staging-${{ matrix.python-version }}
           path: src/htmlcov
   run-e2e-tests-main:
     needs: [validate-branch]
     if: "github.base_ref == 'main' && !contains(github.actor, '[bot]') && needs.validate-branch.result == 'success'"
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.10"
+          - "3.11"
+          - "3.12"
+          - "3.13"
     name: Production E2E Tests
     runs-on: ubuntu-latest
     env:
       TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
     steps:
-      - name: Wait for turn
-        uses: softprops/turnstyle@v2
-        with:
-          poll-interval-seconds: 10
-          same-branch-only: false
-          job-to-wait-for: "Production E2E Tests"
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v4
         with:
@@ -132,7 +137,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
+          python-version: ${{ matrix.python-version }}
       - name: Install judgeval dependencies
         run: |
@@ -153,11 +158,11 @@ jobs:
         run: |
           SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-judgeval/api-keys/judgeval --query SecretString --output text)
           export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
-          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
+          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
       - name: Upload coverage HTML report (production)
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: coverage-html-production
+          name: coverage-html-production-${{ matrix.python-version }}
           path: src/htmlcov

{judgeval-0.9.3 → judgeval-0.9.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.9.3
+Version: 0.9.4
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -9,7 +9,7 @@ License-Expression: Apache-2.0
 License-File: LICENSE.md
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Requires-Python: >=3.11
+Requires-Python: >=3.10
 Requires-Dist: boto3>=1.40.11
 Requires-Dist: click<8.2.0
 Requires-Dist: dotenv

{judgeval-0.9.3 → judgeval-0.9.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "judgeval"
-version = "0.9.3"
+version = "0.9.4"
 authors = [
     { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
     { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -8,7 +8,7 @@ authors = [
 ]
 description = "Judgeval Package"
 readme = "README.md"
-requires-python = ">=3.11"
+requires-python = ">=3.10"
 classifiers = [
     "Programming Language :: Python :: 3",
     "Operating System :: OS Independent",
@@ -75,6 +75,7 @@ dev = [
     "pytest-cov>=6.2.1",
     "types-tqdm>=4.67.0.20250809",
     "pytest-asyncio>=1.1.0",
+    "pytest-xdist>=3.8.0",
 ]

{judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/__init__.py RENAMED Viewed

@@ -57,7 +57,7 @@ from judgeval.utils.serialize import safe_serialize
 from judgeval.version import get_version
 from judgeval.warnings import JudgmentWarning
-from judgeval.tracer.keys import AttributeKeys, ResourceKeys, InternalAttributeKeys
+from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
 from judgeval.api import JudgmentSyncClient
 from judgeval.tracer.llm import wrap_provider
 from judgeval.utils.url import url_for
@@ -65,6 +65,7 @@ from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
 from judgeval.tracer.processors import (
     JudgmentSpanProcessor,
     NoOpJudgmentSpanProcessor,
+    NoOpSpanProcessor,
 )
 from judgeval.tracer.utils import set_span_attribute, TraceScorerConfig
@@ -85,19 +86,6 @@ class AgentContext(TypedDict):
     parent_agent_id: str | None
-def resolve_project_id(
-    api_key: str, organization_id: str, project_name: str
-) -> str | None:
-    try:
-        client = JudgmentSyncClient(
-            api_key=api_key,
-            organization_id=organization_id,
-        )
-        return client.projects_resolve({"project_name": project_name})["project_id"]
-    except Exception:
-        return None
 class Tracer:
     _active_tracers: List[Tracer] = []
@@ -188,38 +176,20 @@ class Tracer:
         self.cost_context = ContextVar("current_cost_context", default=None)
         if self.enable_monitoring:
-            project_id = resolve_project_id(
-                self.api_key, self.organization_id, self.project_name
-            )
-            resource_attributes = resource_attributes or {}
-            resource_attributes.update(
-                {
-                    ResourceKeys.SERVICE_NAME: self.project_name,
-                    ResourceKeys.TELEMETRY_SDK_NAME: "judgeval",
-                    ResourceKeys.TELEMETRY_SDK_VERSION: get_version(),
-                }
-            )
-            if project_id is not None:
-                resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = project_id
-            else:
-                judgeval_logger.error(
-                    f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/projects. Skipping Judgment export."
-                )
-            resource = Resource.create(resource_attributes)
             self.judgment_processor = JudgmentSpanProcessor(
                 self,
-                self.api_url,
+                self.project_name,
                 self.api_key,
                 self.organization_id,
                 max_queue_size=2**18,
                 export_timeout_millis=30000,
+                resource_attributes=resource_attributes,
             )
-            self.processors.append(self.judgment_processor)
+            resource = Resource.create(self.judgment_processor.resource_attributes)
             self.provider = TracerProvider(resource=resource)
+            self.processors.append(self.judgment_processor)
             for processor in self.processors:
                 self.provider.add_span_processor(processor)
@@ -253,6 +223,14 @@ class Tracer:
     def get_current_cost_context(self):
         return self.cost_context
+    def get_processor(self):
+        """Get the judgment span processor instance.
+        Returns:
+            The JudgmentSpanProcessor or NoOpJudgmentSpanProcessor instance used by this tracer.
+        """
+        return self.judgment_processor
     def set_customer_id(self, customer_id: str) -> None:
         span = self.get_current_span()
         if span and span.is_recording():
@@ -913,11 +891,7 @@ class Tracer:
         proper cleanup before program termination.
         """
         try:
-            success = self.force_flush(timeout_millis=30000)
-            if not success:
-                judgeval_logger.warning(
-                    "Some spans may not have been exported before program exit"
-                )
+            self.force_flush(timeout_millis=30000)
         except Exception as e:
             judgeval_logger.warning(f"Error during atexit flush: {e}")
@@ -1074,3 +1048,13 @@ def format_inputs(
         return inputs
     except Exception:
         return {}
+# Export processor classes for direct access
+__all__ = [
+    "Tracer",
+    "wrap",
+    "JudgmentSpanProcessor",
+    "NoOpJudgmentSpanProcessor",
+    "NoOpSpanProcessor",
+]

{judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/tracer/processors/__init__.py RENAMED Viewed

@@ -6,8 +6,13 @@ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor, SpanConte
 from opentelemetry.sdk.trace.export import (
     BatchSpanProcessor,
 )
+from opentelemetry.sdk.resources import Resource
 from judgeval.tracer.exporters import JudgmentSpanExporter
-from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
+from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys, ResourceKeys
+from judgeval.api import JudgmentSyncClient
+from judgeval.logger import judgeval_logger
+from judgeval.utils.url import url_for
+from judgeval.version import get_version
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -31,15 +36,27 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
     def __init__(
         self,
         tracer: Tracer,
-        endpoint: str,
+        project_name: str,
         api_key: str,
         organization_id: str,
         /,
         *,
         max_queue_size: int = 2**18,
         export_timeout_millis: int = 30000,
+        resource_attributes: Optional[dict[str, Any]] = None,
     ):
         self.tracer = tracer
+        self.project_name = project_name
+        self.api_key = api_key
+        self.organization_id = organization_id
+        # Resolve project_id
+        self.project_id = self._resolve_project_id()
+        # Set up resource attributes with project_id
+        self._setup_resource_attributes(resource_attributes or {})
+        endpoint = url_for("/otel/v1/traces")
         super().__init__(
             JudgmentSpanExporter(
                 endpoint=endpoint,
@@ -53,6 +70,38 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
             defaultdict(dict)
         )
+    def _resolve_project_id(self) -> str | None:
+        """Resolve project_id from project_name using the API."""
+        try:
+            client = JudgmentSyncClient(
+                api_key=self.api_key,
+                organization_id=self.organization_id,
+            )
+            return client.projects_resolve({"project_name": self.project_name})[
+                "project_id"
+            ]
+        except Exception:
+            return None
+    def _setup_resource_attributes(self, resource_attributes: dict[str, Any]) -> None:
+        """Set up resource attributes including project_id."""
+        resource_attributes.update(
+            {
+                ResourceKeys.SERVICE_NAME: self.project_name,
+                ResourceKeys.TELEMETRY_SDK_NAME: "judgeval",
+                ResourceKeys.TELEMETRY_SDK_VERSION: get_version(),
+            }
+        )
+        if self.project_id is not None:
+            resource_attributes[ResourceKeys.JUDGMENT_PROJECT_ID] = self.project_id
+        else:
+            judgeval_logger.error(
+                f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/projects. Skipping Judgment export."
+            )
+        self.resource_attributes = resource_attributes
     def _get_span_key(self, span_context: SpanContext) -> tuple[int, int]:
         return (span_context.trace_id, span_context.span_id)
@@ -103,11 +152,18 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
         attributes = dict(current_span.attributes or {})
         attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = current_update_id
+        existing_resource_attrs = (
+            dict(current_span.resource.attributes) if current_span.resource else {}
+        )
+        merged_resource_attrs = {**existing_resource_attrs, **self.resource_attributes}
+        merged_resource = Resource.create(merged_resource_attrs)
         partial_span = ReadableSpan(
             name=current_span.name,
             context=span_context,
             parent=current_span.parent,
-            resource=current_span.resource,
+            resource=merged_resource,
             attributes=attributes,
             events=current_span.events,
             links=current_span.links,
@@ -137,11 +193,20 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
             attributes = dict(span.attributes or {})
             attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = 20
+            existing_resource_attrs = (
+                dict(span.resource.attributes) if span.resource else {}
+            )
+            merged_resource_attrs = {
+                **existing_resource_attrs,
+                **self.resource_attributes,
+            }
+            merged_resource = Resource.create(merged_resource_attrs)
             final_span = ReadableSpan(
                 name=span.name,
                 context=span.context,
                 parent=span.parent,
-                resource=span.resource,
+                resource=merged_resource,
                 attributes=attributes,
                 events=span.events,
                 links=span.links,
@@ -160,7 +225,7 @@ class JudgmentSpanProcessor(BatchSpanProcessor):
 class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
     def __init__(self):
-        super().__init__(None, "", "", "")  # type: ignore[arg-type]
+        pass
     def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
         pass
@@ -177,5 +242,18 @@ class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
     def emit_partial(self) -> None:
         pass
+    def set_internal_attribute(
+        self, span_context: SpanContext, key: str, value: Any
+    ) -> None:
+        pass
+    def get_internal_attribute(
+        self, span_context: SpanContext, key: str, default: Any = None
+    ) -> Any:
+        return default
+    def increment_update_id(self, span_context: SpanContext) -> int:
+        return 0
-__all__ = ("NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor")
+__all__ = ["NoOpSpanProcessor", "JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor"]

{judgeval-0.9.3 → judgeval-0.9.4}/src/judgeval/utils/serialize.py RENAMED Viewed

@@ -19,6 +19,8 @@ from pydantic import BaseModel
 from pydantic.types import SecretBytes, SecretStr
 import orjson
+from judgeval.logger import judgeval_logger
 """
 This module contains the encoders used by jsonable_encoder to convert Python objects to JSON serializable data types.
@@ -244,4 +246,8 @@ encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
 # Seralize arbitrary object to a json string
 def safe_serialize(obj: Any) -> str:
-    return orjson.dumps(json_encoder(obj)).decode()
+    try:
+        return orjson.dumps(json_encoder(obj)).decode()
+    except Exception as e:
+        judgeval_logger.warning(f"Error serializing object: {e}")
+        return orjson.dumps(repr(obj)).decode()

judgeval 0.9.3__tar.gz → 0.9.4__tar.gz

judgeval 0.9.3tar.gz → 0.9.4tar.gz