PyPI - judgeval - Versions diffs - 0.9.3__tar.gz → 0.10.0__tar.gz - Mend

judgeval 0.9.3tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{judgeval-0.9.3 → judgeval-0.10.0}/.github/workflows/ci.yaml RENAMED Viewed

@@ -18,7 +18,10 @@ jobs:
       matrix:
         os: [ubuntu-latest, macos-latest]
         python-version:
+          - "3.10"
           - "3.11"
+          - "3.12"
+          - "3.13"
     name: Unit Tests
     runs-on: ${{ matrix.os }}
     env:
@@ -49,18 +52,19 @@ jobs:
   run-e2e-tests-staging:
     needs: [validate-branch]
     if: "github.base_ref == 'staging' && !contains(github.actor, '[bot]') && (needs.validate-branch.result == 'success' || needs.validate-branch.result == 'skipped')"
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.10"
+          - "3.11"
+          - "3.12"
+          - "3.13"
     name: Staging E2E Tests
     runs-on: ubuntu-latest
     env:
       TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
     steps:
-      - name: Wait for turn
-        uses: softprops/turnstyle@v2
-        with:
-          poll-interval-seconds: 10
-          same-branch-only: false
-          job-to-wait-for: "Staging E2E Tests"
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v4
         with:
@@ -74,7 +78,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
+          python-version: ${{ matrix.python-version }}
       - name: Install judgeval dependencies
         run: |
@@ -93,32 +97,36 @@ jobs:
       - name: Run E2E tests
         working-directory: src
         run: |
-          SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-stg-judgeval/api-keys/judgeval --query SecretString --output text)
+          SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id stg/api-keys/e2e-tests --query SecretString --output text)
           export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
-          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
+          export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
+          export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
+          export JUDGMENT_API_URL=https://staging.api.judgmentlabs.ai
+          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
       - name: Upload coverage HTML report (staging)
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: coverage-html-staging
+          name: coverage-html-staging-${{ matrix.python-version }}
           path: src/htmlcov
   run-e2e-tests-main:
     needs: [validate-branch]
     if: "github.base_ref == 'main' && !contains(github.actor, '[bot]') && needs.validate-branch.result == 'success'"
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+          - "3.10"
+          - "3.11"
+          - "3.12"
+          - "3.13"
     name: Production E2E Tests
     runs-on: ubuntu-latest
     env:
       TEST_TIMEOUT_SECONDS: ${{ secrets.TEST_TIMEOUT_SECONDS }}
     steps:
-      - name: Wait for turn
-        uses: softprops/turnstyle@v2
-        with:
-          poll-interval-seconds: 10
-          same-branch-only: false
-          job-to-wait-for: "Production E2E Tests"
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v4
         with:
@@ -132,7 +140,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.11"
+          python-version: ${{ matrix.python-version }}
       - name: Install judgeval dependencies
         run: |
@@ -151,13 +159,16 @@ jobs:
       - name: Run E2E tests
         working-directory: src
         run: |
-          SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id gh-actions-judgeval/api-keys/judgeval --query SecretString --output text)
+          SECRET_VARS=$(aws secretsmanager get-secret-value --secret-id prod/api-keys/e2e-tests --query SecretString --output text)
           export $(echo "$SECRET_VARS" | jq -r 'to_entries | .[] | "\(.key)=\(.value)"')
-          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
+          export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
+          export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
+          export JUDGMENT_API_URL=https://api.judgmentlabs.ai
+          timeout ${TEST_TIMEOUT_SECONDS}s uv run pytest -n auto --dist=loadfile --durations=0 --cov=. --cov-config=.coveragerc --cov-report=html ./e2etests
       - name: Upload coverage HTML report (production)
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: coverage-html-production
+          name: coverage-html-production-${{ matrix.python-version }}
           path: src/htmlcov

{judgeval-0.9.3 → judgeval-0.10.0}/.pre-commit-config.yaml RENAMED Viewed

@@ -1,11 +1,11 @@
 repos:
   - repo: https://github.com/astral-sh/uv-pre-commit
-    rev: 0.8.0
+    rev: 0.8.15
     hooks:
       - id: uv-lock
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.4
+    rev: v0.12.12
     hooks:
       - id: ruff
         name: ruff (linter)

{judgeval-0.9.3 → judgeval-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.9.3
+Version: 0.10.0
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -9,7 +9,7 @@ License-Expression: Apache-2.0
 License-File: LICENSE.md
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Requires-Python: >=3.11
+Requires-Python: >=3.10
 Requires-Dist: boto3>=1.40.11
 Requires-Dist: click<8.2.0
 Requires-Dist: dotenv

{judgeval-0.9.3 → judgeval-0.10.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "judgeval"
-version = "0.9.3"
+version = "0.10.0"
 authors = [
     { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
     { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -8,7 +8,7 @@ authors = [
 ]
 description = "Judgeval Package"
 readme = "README.md"
-requires-python = ">=3.11"
+requires-python = ">=3.10"
 classifiers = [
     "Programming Language :: Python :: 3",
     "Operating System :: OS Independent",
@@ -75,6 +75,7 @@ dev = [
     "pytest-cov>=6.2.1",
     "types-tqdm>=4.67.0.20250809",
     "pytest-asyncio>=1.1.0",
+    "pytest-xdist>=3.8.0",
 ]

{judgeval-0.9.3 → judgeval-0.10.0}/scripts/api_generator.py RENAMED Viewed

@@ -36,13 +36,13 @@ JUDGEVAL_PATHS: List[str] = [
     "/fetch_scorer/",
     "/scorer_exists/",
     "/upload_custom_scorer/",
-    "/datasets/push/",
-    "/datasets/insert_examples/",
+    "/datasets/create_for_judgeval/",
+    "/datasets/insert_examples_for_judgeval/",
     "/datasets/pull_for_judgeval/",
-    "/datasets/fetch_stats_by_project/",
     "/projects/resolve/",
     "/e2e_fetch_trace/",
     "/e2e_fetch_span_score/",
+    "/e2e_fetch_trace_scorer_span_score/",
 ]
@@ -253,7 +253,7 @@ def generate_client_class(
 def generate_api_file() -> str:
     lines = [
-        "from typing import List, Dict, Any, Mapping, Literal, Optional",
+        "from typing import Dict, Any, Mapping, Literal, Optional",
         "import httpx",
         "from httpx import Response",
         "from judgeval.exceptions import JudgmentAPIError",

{judgeval-0.9.3 → judgeval-0.10.0}/scripts/openapi_transform.py RENAMED Viewed

@@ -35,10 +35,9 @@ JUDGEVAL_PATHS: List[str] = [
     "/fetch_scorer/",
     "/scorer_exists/",
     "/upload_custom_scorer/",
-    "/datasets/push/",
-    "/datasets/insert_examples/",
+    "/datasets/create_for_judgeval/",
+    "/datasets/insert_examples_for_judgeval/",
     "/datasets/pull_for_judgeval/",
-    "/datasets/fetch_stats_by_project/",
     "/projects/resolve/",
     "/e2e_fetch_trace/",
     "/e2e_fetch_span_score/",

{judgeval-0.9.3 → judgeval-0.10.0}/src/judgeval/__init__.py RENAMED Viewed

@@ -6,7 +6,7 @@ from judgeval.data.evaluation_run import ExampleEvaluationRun
 from typing import List, Optional, Union
-from judgeval.scorers import BaseScorer, APIScorerConfig
+from judgeval.scorers import BaseScorer, ExampleAPIScorerConfig
 from judgeval.data.example import Example
 from judgeval.logger import judgeval_logger
 from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_DEFAULT_GPT_MODEL, JUDGMENT_ORG_ID
@@ -38,7 +38,7 @@ class JudgmentClient(metaclass=SingletonMeta):
     def run_evaluation(
         self,
         examples: List[Example],
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
+        scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
         project_name: str = "default_project",
         eval_run_name: str = "default_eval_run",
         model: str = JUDGMENT_DEFAULT_GPT_MODEL,

{judgeval-0.9.3 → judgeval-0.10.0}/src/judgeval/api/__init__.py RENAMED Viewed

@@ -71,13 +71,6 @@ class JudgmentSyncClient:
             payload,
         )
-    def evaluate_trace(self, payload: TraceRun) -> Any:
-        return self._request(
-            "POST",
-            url_for("/evaluate_trace/"),
-            payload,
-        )
     def evaluate_examples(
         self, payload: ExampleEvaluationRun, stream: Optional[str] = None
     ) -> Any:
@@ -128,59 +121,26 @@ class JudgmentSyncClient:
             query_params,
         )
-    def datasets_insert_examples(self, payload: DatasetInsertExamples) -> Any:
+    def datasets_insert_examples_for_judgeval(
+        self, payload: DatasetInsertExamples
+    ) -> Any:
         return self._request(
             "POST",
-            url_for("/datasets/insert_examples/"),
+            url_for("/datasets/insert_examples_for_judgeval/"),
             payload,
         )
-    def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> Any:
+    def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
         return self._request(
             "POST",
             url_for("/datasets/pull_for_judgeval/"),
             payload,
         )
-    def datasets_push(self, payload: DatasetPush) -> Any:
+    def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
         return self._request(
             "POST",
-            url_for("/datasets/push/"),
-            payload,
-        )
-    def traces_upsert(self, payload: TraceSave) -> Any:
-        return self._request(
-            "POST",
-            url_for("/traces/upsert/"),
-            payload,
-        )
-    def traces_fetch(self, payload: TraceFetch) -> Any:
-        return self._request(
-            "POST",
-            url_for("/traces/fetch/"),
-            payload,
-        )
-    def traces_add_to_dataset(self, payload: TraceAddToDataset) -> Any:
-        return self._request(
-            "POST",
-            url_for("/traces/add_to_dataset/"),
-            payload,
-        )
-    def traces_spans_batch(self, payload: SpansBatchRequest) -> Any:
-        return self._request(
-            "POST",
-            url_for("/traces/spans/batch/"),
-            payload,
-        )
-    def traces_evaluation_runs_batch(self, payload: EvaluationRunsBatchRequest) -> Any:
-        return self._request(
-            "POST",
-            url_for("/traces/evaluation_runs/batch/"),
+            url_for("/datasets/create_for_judgeval/"),
             payload,
         )
@@ -255,6 +215,13 @@ class JudgmentSyncClient:
             payload,
         )
+    def e2e_fetch_trace_scorer_span_score(self, payload: SpanScoreRequest) -> Any:
+        return self._request(
+            "POST",
+            url_for("/e2e_fetch_trace_scorer_span_score/"),
+            payload,
+        )
 class JudgmentAsyncClient:
     __slots__ = ("api_key", "organization_id", "client")
@@ -304,13 +271,6 @@ class JudgmentAsyncClient:
             payload,
         )
-    async def evaluate_trace(self, payload: TraceRun) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/evaluate_trace/"),
-            payload,
-        )
     async def evaluate_examples(
         self, payload: ExampleEvaluationRun, stream: Optional[str] = None
     ) -> Any:
@@ -363,61 +323,26 @@ class JudgmentAsyncClient:
             query_params,
         )
-    async def datasets_insert_examples(self, payload: DatasetInsertExamples) -> Any:
+    async def datasets_insert_examples_for_judgeval(
+        self, payload: DatasetInsertExamples
+    ) -> Any:
         return await self._request(
             "POST",
-            url_for("/datasets/insert_examples/"),
+            url_for("/datasets/insert_examples_for_judgeval/"),
             payload,
         )
-    async def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> Any:
+    async def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
         return await self._request(
             "POST",
             url_for("/datasets/pull_for_judgeval/"),
             payload,
         )
-    async def datasets_push(self, payload: DatasetPush) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/datasets/push/"),
-            payload,
-        )
-    async def traces_upsert(self, payload: TraceSave) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/traces/upsert/"),
-            payload,
-        )
-    async def traces_fetch(self, payload: TraceFetch) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/traces/fetch/"),
-            payload,
-        )
-    async def traces_add_to_dataset(self, payload: TraceAddToDataset) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/traces/add_to_dataset/"),
-            payload,
-        )
-    async def traces_spans_batch(self, payload: SpansBatchRequest) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/traces/spans/batch/"),
-            payload,
-        )
-    async def traces_evaluation_runs_batch(
-        self, payload: EvaluationRunsBatchRequest
-    ) -> Any:
+    async def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
         return await self._request(
             "POST",
-            url_for("/traces/evaluation_runs/batch/"),
+            url_for("/datasets/create_for_judgeval/"),
             payload,
         )
@@ -494,6 +419,13 @@ class JudgmentAsyncClient:
             payload,
         )
+    async def e2e_fetch_trace_scorer_span_score(self, payload: SpanScoreRequest) -> Any:
+        return await self._request(
+            "POST",
+            url_for("/e2e_fetch_trace_scorer_span_score/"),
+            payload,
+        )
 __all__ = [
     "JudgmentSyncClient",

judgeval 0.9.3__tar.gz → 0.10.0__tar.gz

judgeval 0.9.3tar.gz → 0.10.0tar.gz