PyPI - rakam-eval-sdk - Versions diffs - 0.2.4__tar.gz → 0.2.4rc2__tar.gz - Mend

rakam-eval-sdk 0.2.4tar.gz → 0.2.4rc2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rakam-eval-sdk
-Version: 0.2.4
+Version: 0.2.4rc2
 Summary: Evaluation Framework SDK
 Author: Mohamed Bachar Touil
 License: MIT

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "uv_build"
 [project]
 name = "rakam-eval-sdk"
-version = "0.2.4"
+version = "0.2.4rc2"
 description = "Evaluation Framework SDK"
 readme = "README.md"
 requires-python = ">=3.8"

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/src/rakam_eval_sdk/cli.py RENAMED Viewed

@@ -6,7 +6,7 @@ import uuid
 from datetime import datetime
 from pathlib import Path
 from pprint import pprint
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Tuple, Union
 import typer
 from dotenv import load_dotenv
@@ -37,14 +37,14 @@ metrics_app = typer.Typer(help="Metrics utilities")
 app.add_typer(metrics_app, name="metrics")
-def extract_metric_names(config: Any) -> list[tuple[str, str | None]]:
+def extract_metric_names(config: Any) -> list[tuple[str, Optional[str]]]:
     """
     Returns [(type, name)] from EvalConfig / SchemaEvalConfig
     """
     if not hasattr(config, "metrics"):
         return []
-    results: list[tuple[str, str | None]] = []
+    results: list[tuple[str, Optional[str]]] = []
     for metric in config.metrics or []:
         metric_type = getattr(metric, "type", None)
@@ -77,7 +77,7 @@ def metrics(
     files = directory.rglob("*.py") if recursive else directory.glob("*.py")
     TARGET_DECORATOR = eval_run.__name__
-    all_metrics: set[tuple[str, str | None]] = set()
+    all_metrics: set[tuple[str, Optional[str]]] = set()
     found_any = False
     for file in sorted(files):
@@ -159,7 +159,7 @@ def list_evals(
 def list_runs(
     limit: int = typer.Option(20, help="Max number of runs"),
     offset: int = typer.Option(0, help="Pagination offset"),
-):
+) -> None:
     """
     List evaluation runs (newest first).
     """
@@ -170,7 +170,7 @@ def list_runs(
         offset=offset,
         raise_exception=True,
     )
+    assert response is not None
     items = response.get("items", [])
     total = response.get("total", 0)
@@ -199,7 +199,8 @@ def list_runs(
     shown = offset + len(items)
     if shown < total:
         typer.echo()
-        typer.echo(f"Showing {shown} of {total} runs. Use --limit to see more.")
+        typer.echo(
+            f"Showing {shown} of {total} runs. Use --limit to see more.")
 @app.command()
@@ -219,15 +220,15 @@ def show(
         "--raw",
         help="Print raw JSON instead of formatted output",
     ),
-):
+) -> None:
     """
     Show a single evaluation testcase by ID or tag.
     """
     if not id and not tag:
-        raise typer.BadParameter("You must provide either --id or --uid")
+        raise typer.BadParameter("You must provide either --id or --tag")
     if id and tag:
-        raise typer.BadParameter("Provide only one of --id or --uid")
+        raise typer.BadParameter("Provide only one of --id or --tag")
     client = DeepEvalClient()
@@ -235,6 +236,7 @@ def show(
         result = client.get_evaluation_testcase_by_id(id)
         identifier = f"id={id}"
     else:
+        assert tag is not None
         result = client.get_evaluation_testcase_by_tag(tag)
         identifier = f"tag={tag}"
@@ -406,7 +408,7 @@ def run(
 def _print_and_save(
     resp: dict,
     pretty: bool,
-    out: Path | None,
+    out: Optional[Path],
     overwrite: bool,
 ) -> None:
     if pretty:
@@ -419,7 +421,8 @@ def _print_and_save(
         return
     if out.exists() and not overwrite:
-        typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
+        typer.echo(
+            f"❌ File already exists: {out} (use --overwrite to replace)")
         raise typer.Exit(code=1)
     out.parent.mkdir(parents=True, exist_ok=True)
@@ -430,13 +433,13 @@ def _print_and_save(
     typer.echo(f"💾 Result saved to {out}")
-def pct_change(a: float | None, b: float | None) -> str | None:
+def pct_change(a: Optional[float], b: Optional[float]) -> Optional[str]:
     if a is None or b is None or a == 0:
         return None
     return f"{((b - a) / a) * 100:+.2f}%"
-def metric_direction(delta: float | None) -> str:
+def metric_direction(delta: Optional[float]) -> str:
     if delta is None:
         return "unchanged"
     if delta > 0:
@@ -446,7 +449,7 @@ def metric_direction(delta: float | None) -> str:
     return "unchanged"
-def print_metric_diff(diff: MetricDiff):
+def print_metric_diff(diff: MetricDiff) -> None:
     secho(f"\nMetric: {diff.metric}", bold=True)
     if diff.status == "added":
@@ -507,7 +510,7 @@ def summarize(metrics: Any) -> Dict[str, int]:
     return summary
-def pretty_print_comparison(resp: Any, summary_only: bool = False):
+def pretty_print_comparison(resp: Any, summary_only: bool = False) -> None:
     if not summary_only:
         for metric in resp.metrics:
             print_metric_diff(metric)
@@ -516,7 +519,7 @@ def pretty_print_comparison(resp: Any, summary_only: bool = False):
     print_summary(resp.metrics)
-def print_summary(metrics: Any):
+def print_summary(metrics: Any) -> None:
     summary = summarize(metrics)
     secho("\nSummary:", bold=True)
@@ -529,7 +532,7 @@ def print_summary(metrics: Any):
 @app.command()
 def compare(
-    tag: list[str] = typer.Option(
+    tag: List[str] = typer.Option(
         [],
         "--tag",
         help="Label identifying a reference testcase",
@@ -554,7 +557,7 @@ def compare(
         "--raise",
         help="Raise HTTP exceptions instead of swallowing them",
     ),
-    out: Path | None = typer.Option(
+    out: Optional[Path] = typer.Option(
         None,
         "-o",
         "--out",
@@ -575,7 +578,7 @@ def compare(
       - Summary of improvements / regressions
     """
-    targets: list[tuple[str, str | int]] = []
+    targets: List[Tuple[str, Union[str, int]]] = []
     for r in run:
         targets.append(("run", r))
@@ -600,7 +603,7 @@ def compare(
     client = DeepEvalClient()
-    kwargs = {"raise_exception": raise_exception}
+    kwargs: Dict[str, Any] = {"raise_exception": raise_exception}
     if type_a == "run":
         kwargs["testcase_a_id"] = value_a
@@ -665,7 +668,7 @@ def compare_label_latest(
         "--raise",
         help="Raise HTTP exceptions instead of swallowing them",
     ),
-    out: Path | None = typer.Option(
+    out: Optional[Path] = typer.Option(
         None,
         "-o",
         "--out",
@@ -717,7 +720,7 @@ def compare_last(
         "--raise",
         help="Raise HTTP exceptions instead of swallowing them",
     ),
-    out: Path | None = typer.Option(
+    out: Optional[Path] = typer.Option(
         None,
         "-o",
         "--out",
@@ -769,7 +772,7 @@ def tag_command(
         "--delete",
         help="Delete a tag",
     ),
-):
+) -> None:
     """
     Assign a tag to a run or delete a tag.
     """
@@ -786,8 +789,9 @@ def tag_command(
     client = DeepEvalClient()
-    # --- delete mode ---
     if delete:
+        assert run_id is not None
         result = client.update_evaluation_testcase_tag(
             testcase_id=run_id,
             tag=delete,
@@ -796,13 +800,14 @@ def tag_command(
         typer.echo("🗑️ Tag deleted successfully")
         typer.echo(f"Tag: {delete}")
         return
-    # --- assign/update mode ---
+    assert run_id is not None
+    assert tag is not None
     result = client.update_evaluation_testcase_tag(
         testcase_id=run_id,
         tag=tag,
         raise_exception=True,
     )
+    assert result is not None
     typer.echo("✅ Tag assigned successfully")
     typer.echo(f"Run ID: {run_id}")

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/src/rakam_eval_sdk/client.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import os
 import random
-from typing import Any, Dict, List, Literal, Optional, Union, cast, overload
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union, cast, overload
 import requests
@@ -49,8 +49,8 @@ class DeepEvalClient:
         method: HTTPMethod,
         endpoint: str,
         *,
-        json: Dict | None = None,
-        params: Dict | None = None,
+        json: Optional[Dict] = None,
+        params: Optional[Dict] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         url = f"{self.base_url}{endpoint}"
@@ -89,17 +89,63 @@ class DeepEvalClient:
                 "raw": resp.text,
             }
-    def _get(self, endpoint: str, params: Dict, *args, **kw):
-        return self._request("GET", endpoint, params=params, *args, **kw)
+    def _get(
+        self,
+        endpoint: str,
+        params: Dict,
+        raise_exception: bool = False,
+        *args: Tuple,
+        **kw: Dict,
+    ) -> Optional[Dict]:
+        return self._request(
+            "GET", endpoint, params=params, raise_exception=raise_exception, *args, **kw
+        )
-    def _post(self, endpoint: str, payload: Dict, *args, **kw):
-        return self._request("POST", endpoint, json=payload, *args, **kw)
+    def _post(
+        self,
+        endpoint: str,
+        payload: Dict,
+        raise_exception: bool = False,
+        *args: Tuple,
+        **kw: Dict,
+    ) -> Optional[Dict]:
+        return self._request(
+            "POST", endpoint, json=payload, raise_exception=raise_exception, *args, **kw
+        )
-    def _patch(self, endpoint: str, payload: Dict, *args, **kw):
-        return self._request("PATCH", endpoint, json=payload, *args, **kw)
+    def _patch(
+        self,
+        endpoint: str,
+        payload: Dict,
+        raise_exception: bool = False,
+        *args: Tuple,
+        **kw: Dict,
+    ) -> Optional[Dict]:
+        return self._request(
+            "PATCH",
+            endpoint,
+            json=payload,
+            raise_exception=raise_exception,
+            *args,
+            **kw,
+        )
-    def _delete(self, endpoint: str, payload: Dict, *args, **kw):
-        return self._request("DELETE", endpoint, json=payload, *args, **kw)
+    def _delete(
+        self,
+        endpoint: str,
+        payload: Dict,
+        raise_exception: bool = False,
+        *args: Tuple,
+        **kw: Dict,
+    ) -> Optional[Dict]:
+        return self._request(
+            "DELETE",
+            endpoint,
+            json=payload,
+            raise_exception=raise_exception,
+            *args,
+            **kw,
+        )
     def update_evaluation_testcase_tag(
         self,
@@ -197,10 +243,10 @@ class DeepEvalClient:
     def compare_testcases(
         self,
         *,
-        testcase_a_id: int | None = None,
-        testcase_a_tag: str | None = None,
-        testcase_b_id: int | None = None,
-        testcase_b_tag: str | None = None,
+        testcase_a_id: Optional[int] = None,
+        testcase_a_tag: Optional[str] = None,
+        testcase_b_id: Optional[int] = None,
+        testcase_b_tag: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         """
@@ -208,14 +254,14 @@ class DeepEvalClient:
         Exactly one identifier (id or tag) must be provided per testcase.
         """
-        def validate(id_, tag, name: str):
+        def validate(id_: Optional[int], tag: Optional[str], name: str) -> None:
             if bool(id_) == bool(tag):
                 raise ValueError(f"Provide exactly one of {name}_id or {name}_tag")
         validate(testcase_a_id, testcase_a_tag, "testcase_a")
         validate(testcase_b_id, testcase_b_tag, "testcase_b")
-        params: dict[str, int | str] = {}
+        params: Dict[str, Union[int, str]] = {}
         if testcase_a_id is not None:
             params["testcase_a_id"] = testcase_a_id
@@ -284,18 +330,18 @@ class DeepEvalClient:
         data: List[TextInputItem],
         metrics: List[MetricConfig],
         component: str = "unknown",
-        label: str | None = None,
+        label: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]: ...
     def text_eval(
         self,
-        config: EvalConfig | None = None,
+        config: Optional[EvalConfig] = None,
         *,
-        data: List[TextInputItem] | None = None,
-        metrics: List[MetricConfig] | None = None,
+        data: Optional[List[TextInputItem]] = None,
+        metrics: Optional[List[MetricConfig]] = None,
         component: str = "unknown",
-        label: str | None = None,
+        label: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         if config is None:
@@ -337,7 +383,7 @@ class DeepEvalClient:
         data: List[SchemaInputItem],
         metrics: List[SchemaMetricConfig],
         component: str = "unknown",
-        label: str | None = None,
+        label: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]: ...
@@ -351,12 +397,12 @@ class DeepEvalClient:
     def schema_eval(
         self,
-        config: SchemaEvalConfig | None = None,
+        config: Optional[SchemaEvalConfig] = None,
         *,
-        data: List[SchemaInputItem] | None = None,
-        metrics: List[SchemaMetricConfig] | None = None,
+        data: Optional[List[SchemaInputItem]] = None,
+        metrics: Optional[List[SchemaMetricConfig]] = None,
         component: str = "unknown",
-        label: str | None = None,
+        label: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         if config is None:

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/src/rakam_eval_sdk/schema.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # Common base class for all metric configs
 import sys
-from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Union
 # Base class (you can keep this abstract)
 from pydantic import BaseModel, Field
@@ -94,7 +94,8 @@ MetricConfig = Annotated[
 ]
 SchemaMetricConfig = Annotated[
-    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
+    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
+        discriminator="type")
 ]
@@ -107,7 +108,7 @@ class InputItem(BaseModel):
 class TextInputItem(InputItem):
     expected_output: Optional[str] = None
-    retrieval_context: Optional[list[str]] = None
+    retrieval_context: Optional[List[str]] = None
 class SchemaInputItem(InputItem):

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/README.md RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/src/rakam_eval_sdk/__init__.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/src/rakam_eval_sdk/decorators.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.4 → rakam_eval_sdk-0.2.4rc2}/src/rakam_eval_sdk/utils/decorator_utils.py RENAMED Viewed

File without changes

rakam-eval-sdk 0.2.4__tar.gz → 0.2.4rc2__tar.gz

rakam-eval-sdk 0.2.4tar.gz → 0.2.4rc2tar.gz