PyPI - rakam-eval-sdk - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

rakam-eval-sdk 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rakam-eval-sdk
-Version: 0.2.1
+Version: 0.2.2
 Summary: Evaluation Framework SDK
 Author: Mohamed Bachar Touil
 License: MIT

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "uv_build"
 [project]
 name = "rakam-eval-sdk"
-version = "0.2.1"
+version = "0.2.2"
 description = "Evaluation Framework SDK"
 readme = "README.md"
 requires-python = ">=3.8"

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/cli.py RENAMED Viewed

@@ -86,8 +86,8 @@ def list_metrics(
         typer.echo(f"- {name}")
-@list_app.command("eval")
-def list(
+@list_app.command("evals")
+def list_evals(
     directory: Path = typer.Argument(
         Path("./eval"),
         exists=True,
@@ -140,12 +140,12 @@ def list_runs(
         typer.echo("No evaluation runs found.")
         return
-    typer.echo(f"[id] " f"{'unique_id':<20}" f"{'label':<20}" f"created_at")
+    typer.echo(f"[id] " f"{'tag':<20}" f"{'label':<20}" f"created_at")
     # pretty CLI output
     for run in runs:
         run_id = run.get("id")
         label = run.get("label") or "-"
-        uid = run.get("unique_id") or "-"
+        uid = run.get("tag") or "-"
         created_at = run.get("created_at")
         if created_at:
@@ -156,20 +156,21 @@ def list_runs(
             except ValueError:
                 pass
-        typer.echo(f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
+        typer.echo(
+            f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
-@list_app.command("show")
-def show_testcase(
+@app.command()
+def show(
     id: Optional[int] = typer.Option(
         None,
         "--id",
         help="Numeric evaluation testcase ID",
     ),
-    uid: Optional[str] = typer.Option(
+    tag: Optional[str] = typer.Option(
         None,
-        "--uid",
-        help="Evaluation testcase unique_id",
+        "--tag",
+        help="Evaluation testcase tag",
     ),
     raw: bool = typer.Option(
         False,
@@ -178,12 +179,12 @@ def show_testcase(
     ),
 ):
     """
-    Show a single evaluation testcase by ID or unique_id.
+    Show a single evaluation testcase by ID or tag.
     """
-    if not id and not uid:
+    if not id and not tag:
         raise typer.BadParameter("You must provide either --id or --uid")
-    if id and uid:
+    if id and tag:
         raise typer.BadParameter("Provide only one of --id or --uid")
     client = DeepEvalClient()
@@ -192,8 +193,8 @@ def show_testcase(
         result = client.get_evaluation_testcase_by_id(id)
         identifier = f"id={id}"
     else:
-        result = client.get_evaluation_testcase_by_unique_id(uid)
-        identifier = f"unique_id={uid}"
+        result = client.get_evaluation_testcase_by_tag(tag)
+        identifier = f"tag={tag}"
     if not result:
         console.print(
@@ -376,7 +377,8 @@ def _print_and_save(
         return
     if out.exists() and not overwrite:
-        typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
+        typer.echo(
+            f"❌ File already exists: {out} (use --overwrite to replace)")
         raise typer.Exit(code=1)
     out.parent.mkdir(parents=True, exist_ok=True)
@@ -388,7 +390,7 @@ def _print_and_save(
 @app.command()
-def compare_testcases(
+def compare(
     testcase_a_id: int = typer.Argument(
         ...,
         help="ID of the first testcase",
@@ -549,6 +551,45 @@ def compare_last(
     _print_and_save(resp, pretty, out, overwrite)
+@list_app.command("tag")
+def update_run_tag(
+    run_id: int = typer.Argument(..., help="Evaluation run ID"),
+    tag: Optional[str] = typer.Option(
+        None,
+        "--tag",
+        "-t",
+        help="Tag to add or update",
+    ),
+    remove: bool = typer.Option(
+        False,
+        "--remove",
+        help="Remove tag from the run",
+    ),
+):
+    """
+    Add, update, or remove a tag from an evaluation run.
+    """
+    if not tag and not remove:
+        typer.echo("❌ You must provide --tag or --remove")
+        raise typer.Exit(code=1)
+    if tag and remove:
+        typer.echo("❌ Use either --tag or --remove, not both")
+        raise typer.Exit(code=1)
+    client = DeepEvalClient()
+    result = client.update_evaluation_testcase_tag(
+        testcase_id=run_id,
+        tag=None if remove else tag,
+        raise_exception=True,
+    )
+    action = "removed" if remove else "updated"
+    typer.echo(f"✅ Tag {action} successfully")
+    typer.echo(f"Run ID: {run_id}")
+    typer.echo(f"Tag: {result.get('tag') or '-'}")
 def main() -> None:
     app()

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/client.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from typing import Optional, Dict
 import os
 import random
 from typing import Any, Dict, List, Optional, Union, cast, overload
@@ -12,6 +13,9 @@ from .schema import (
     SchemaMetricConfig,
     TextInputItem,
 )
+from typing import Optional, Literal, cast
+HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
 class DeepEvalClient:
@@ -45,53 +49,29 @@ class DeepEvalClient:
     def _request(
         self,
+        method: HTTPMethod,
         endpoint: str,
-        payload: dict,
+        *,
+        json: dict | None = None,
+        params: dict | None = None,
         raise_exception: bool = False,
     ) -> Optional[dict]:
-        """Internal helper to send POST requests with standard headers and error handling."""
         url = f"{self.base_url}{endpoint}"
-        headers = {
-            "accept": "application/json",
-            "Content-Type": "application/json",
-            "X-API-Token": self.api_token,
-        }
-        try:
-            resp = requests.post(
-                url, headers=headers, json=payload, timeout=self.timeout
-            )
-            if raise_exception:
-                resp.raise_for_status()
-        except requests.RequestException as e:
-            if raise_exception:
-                raise
-            return {"error": str(e)}
-        try:
-            return cast(dict, resp.json())
-        except ValueError:
-            if raise_exception:
-                raise
-            return {"error": "Invalid JSON response", "raw": resp.text}
-    def _get(
-        self,
-        endpoint: str,
-        params: dict,
-        raise_exception: bool = False,
-    ) -> Optional[dict]:
-        """Internal helper to send GET requests with standard headers and error handling."""
-        url = f"{self.base_url}{endpoint}"
         headers = {
             "accept": "application/json",
             "X-API-Token": self.api_token,
         }
+        if json is not None:
+            headers["Content-Type"] = "application/json"
         try:
-            resp = requests.get(
-                url,
+            resp = requests.request(
+                method=method,
+                url=url,
                 headers=headers,
+                json=json,
                 params=params,
                 timeout=self.timeout,
             )
@@ -107,7 +87,38 @@ class DeepEvalClient:
         except ValueError:
             if raise_exception:
                 raise
-            return {"error": "Invalid JSON response", "raw": resp.text}
+            return {
+                "error": "Invalid JSON response",
+                "raw": resp.text,
+            }
+    def _get(self, endpoint: str, params: dict, **kw):
+        return self._request("GET", endpoint, params=params, **kw)
+    def _post(self, endpoint: str, payload: dict, **kw):
+        return self._request("POST", endpoint, json=payload, **kw)
+    def _patch(self, endpoint: str, payload: dict, **kw):
+        return self._request("PATCH", endpoint, json=payload, **kw)
+    def update_evaluation_testcase_tag(
+        self,
+        *,
+        testcase_id: int,
+        tag: Optional[str],
+        raise_exception: bool = False,
+    ) -> Optional[Dict]:
+        """
+        Add, update, or remove a tag from an evaluation testcase.
+        - tag="smoke" → add / update tag
+        - tag=None → remove tag
+        """
+        return self._patch(
+            f"/evaluation-testcases/{testcase_id}/tag",
+            payload={"tag": tag},
+            raise_exception=raise_exception,
+        )
     def list_evaluation_testcases(
         self,
@@ -144,17 +155,17 @@ class DeepEvalClient:
             raise_exception=raise_exception,
         )
-    def get_evaluation_testcase_by_unique_id(
+    def get_evaluation_testcase_by_tag(
         self,
-        unique_id: str,
+        tag: str,
         *,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         """
-        Fetch a single evaluation testcase by unique_id.
+        Fetch a single evaluation testcase by tag.
         """
         return self._get(
-            f"/eval-framework/deepeval/uid/{unique_id}",
+            f"/eval-framework/deepeval/tag/{tag}",
             params={},
             raise_exception=raise_exception,
         )
@@ -163,18 +174,18 @@ class DeepEvalClient:
         self,
         *,
         id: Optional[int] = None,
-        unique_id: Optional[str] = None,
+        tag: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         if id is not None:
             return self.get_evaluation_testcase_by_id(
                 id, raise_exception=raise_exception
             )
-        if unique_id is not None:
-            return self.get_evaluation_testcase_by_unique_id(
-                unique_id, raise_exception=raise_exception
+        if tag is not None:
+            return self.get_evaluation_testcase_by_tag(
+                tag, raise_exception=raise_exception
             )
-        raise ValueError("Either id or unique_id must be provided")
+        raise ValueError("Either id or tag must be provided")
     def compare_testcases(
         self,
@@ -268,7 +279,7 @@ class DeepEvalClient:
                 label=label,
             )
-        return self._request(
+        return self._post(
             "/deepeval/text-eval", config.model_dump(), raise_exception
         )
@@ -284,7 +295,7 @@ class DeepEvalClient:
         payload = EvalConfig.model_construct(
             data=data, metrics=metrics, component=component, version=label
         ).model_dump()
-        return self._request("/deepeval/text-eval/background", payload, raise_exception)
+        return self._post("/deepeval/text-eval/background", payload, raise_exception)
     @overload
     def schema_eval(
@@ -328,7 +339,7 @@ class DeepEvalClient:
                 label=label,
             )
-        return self._request(
+        return self._post(
             "/deepeval/schema-eval",
             config.model_dump(),
             raise_exception,
@@ -346,7 +357,7 @@ class DeepEvalClient:
         payload = SchemaEvalConfig.model_construct(
             data=data, metrics=metrics, component=component, version=label
         ).model_dump()
-        return self._request(
+        return self._post(
             "/deepeval/schema-eval/background", payload, raise_exception
         )

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/schema.py RENAMED Viewed

@@ -117,7 +117,6 @@ class SchemaInputItem(InputItem):
 class EvalConfig(BaseModel):
     __eval_config__ = "text_eval"
-    unique_id: Union[str, None] = None
     component: str = "unknown"
     label: Union[str, None] = None
     data: List[TextInputItem]
@@ -127,7 +126,6 @@ class EvalConfig(BaseModel):
 class SchemaEvalConfig(BaseModel):
     __eval_config__ = "schema_eval"
     component: str = "unknown"
-    unique_id: Union[str, None] = None
     label: Union[str, None] = None
     data: List[SchemaInputItem]
     metrics: List[SchemaMetricConfig] = Field(default_factory=list)

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/README.md RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/__init__.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/decorators.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.1 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/utils/decorator_utils.py RENAMED Viewed

File without changes

rakam-eval-sdk 0.2.1__tar.gz → 0.2.2__tar.gz

rakam-eval-sdk 0.2.1tar.gz → 0.2.2tar.gz