PyPI - rakam-eval-sdk - Versions diffs - 0.2.0rc2__tar.gz → 0.2.2__tar.gz - Mend

rakam-eval-sdk 0.2.0rc2tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rakam-eval-sdk
-Version: 0.2.0rc2
+Version: 0.2.2
 Summary: Evaluation Framework SDK
 Author: Mohamed Bachar Touil
 License: MIT
@@ -94,6 +94,7 @@ client = DeepEvalClient(
             )
 ```
 3. Schema Evaluation
 ```python
@@ -137,6 +138,7 @@ client = DeepEvalClient(
             )
 ```
 ## Configuration
 The client can be configured in multiple ways:
@@ -150,7 +152,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
 ### Environment variables
 ```bash
-export EVALFRAMWORK_URL=http://api
+export EVALFRAMEWORK_URL=http://api
 export EVALFRAMWORK_API_KEY=123
 ```

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/README.md RENAMED Viewed

@@ -80,6 +80,7 @@ client = DeepEvalClient(
             )
 ```
 3. Schema Evaluation
 ```python
@@ -123,6 +124,7 @@ client = DeepEvalClient(
             )
 ```
 ## Configuration
 The client can be configured in multiple ways:
@@ -136,7 +138,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
 ### Environment variables
 ```bash
-export EVALFRAMWORK_URL=http://api
+export EVALFRAMEWORK_URL=http://api
 export EVALFRAMWORK_API_KEY=123
 ```

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "uv_build"
 [project]
 name = "rakam-eval-sdk"
-version = "0.2.0rc2"
+version = "0.2.2"
 description = "Evaluation Framework SDK"
 readme = "README.md"
 requires-python = ">=3.8"

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/cli.py RENAMED Viewed

@@ -6,7 +6,7 @@ import uuid
 from datetime import datetime
 from pathlib import Path
 from pprint import pprint
-from typing import Any, Optional
+from typing import Any, Dict, Optional, Sequence
 import typer
 from dotenv import load_dotenv
@@ -31,9 +31,63 @@ if PROJECT_ROOT not in sys.path:
     sys.path.insert(0, PROJECT_ROOT)
 list_app = typer.Typer(help="List resources")
 app.add_typer(list_app, name="list")
+metrics_app = typer.Typer(help="Metrics utilities")
+app.add_typer(metrics_app, name="metrics")
-@list_app.command("eval")
-def list(
+@metrics_app.command("list")
+def list_metrics(
+    limit: int = typer.Option(
+        20,
+        "--limit",
+        help="Number of testcases to inspect for metrics",
+    ),
+):
+    """
+    List unique metric names found in evaluation testcases.
+    """
+    client = DeepEvalClient()
+    testcases = client.list_evaluation_testcases(
+        limit=limit,
+        offset=0,
+        raise_exception=True,
+    )
+    if not testcases:
+        typer.echo("No evaluation testcases found.")
+        return
+    metric_names: set[str] = set()
+    def collect_metrics(entries: Sequence[Dict] | None):
+        if not entries:
+            return
+        for entry in entries:
+            for metric in entry.get("metrics", []) or []:
+                name = metric.get("name")
+                if name:
+                    metric_names.add(name)
+    for tc in testcases:
+        collect_metrics(tc.get("result"))
+    if not metric_names:
+        typer.echo("No metrics found.")
+        return
+    typer.echo(
+        f"📊 Found {len(metric_names)} unique metrics "
+        f"(from latest {limit} testcases)\n"
+    )
+    for name in sorted(metric_names):
+        typer.echo(f"- {name}")
+@list_app.command("evals")
+def list_evals(
     directory: Path = typer.Argument(
         Path("./eval"),
         exists=True,
@@ -66,16 +120,10 @@ def list(
         typer.echo(f"No @{TARGET_DECORATOR} functions found.")
 @list_app.command("runs")
 def list_runs(
     limit: int = typer.Option(20, help="Max number of runs"),
     offset: int = typer.Option(0, help="Pagination offset"),
-    status: Optional[str] = typer.Option(
-        None, help="Filter by status (running, completed, failed)"
-    ),
 ):
     """
     List evaluation runs (newest first).
@@ -92,24 +140,12 @@ def list_runs(
         typer.echo("No evaluation runs found.")
         return
-    # optional status filtering (client-side for now)
-    if status:
-        runs = [
-            r for r in runs
-            if r.get("result", {}).get("status") == status
-        ]
-    typer.echo(
-        f"[id] "
-        f"{'unique_id':<20}"
-        f"{'label':<20}"
-        f"created_at"
-    )
+    typer.echo(f"[id] " f"{'tag':<20}" f"{'label':<20}" f"created_at")
     # pretty CLI output
     for run in runs:
         run_id = run.get("id")
         label = run.get("label") or "-"
-        uid = run.get("unique_id") or "-"
+        uid = run.get("tag") or "-"
         created_at = run.get("created_at")
         if created_at:
@@ -121,24 +157,20 @@ def list_runs(
                 pass
         typer.echo(
-            f"[{run_id}] "
-            f"{uid:<20} "
-            f"{label:<20} "
-            f"{created_at}"
-        )
+            f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
-@list_app.command("show")
-def show_testcase(
+@app.command()
+def show(
     id: Optional[int] = typer.Option(
         None,
         "--id",
         help="Numeric evaluation testcase ID",
     ),
-    uid: Optional[str] = typer.Option(
+    tag: Optional[str] = typer.Option(
         None,
-        "--uid",
-        help="Evaluation testcase unique_id",
+        "--tag",
+        help="Evaluation testcase tag",
     ),
     raw: bool = typer.Option(
         False,
@@ -147,12 +179,12 @@ def show_testcase(
     ),
 ):
     """
-    Show a single evaluation testcase by ID or unique_id.
+    Show a single evaluation testcase by ID or tag.
     """
-    if not id and not uid:
+    if not id and not tag:
         raise typer.BadParameter("You must provide either --id or --uid")
-    if id and uid:
+    if id and tag:
         raise typer.BadParameter("Provide only one of --id or --uid")
     client = DeepEvalClient()
@@ -161,8 +193,8 @@ def show_testcase(
         result = client.get_evaluation_testcase_by_id(id)
         identifier = f"id={id}"
     else:
-        result = client.get_evaluation_testcase_by_unique_id(uid)
-        identifier = f"unique_id={uid}"
+        result = client.get_evaluation_testcase_by_tag(tag)
+        identifier = f"tag={tag}"
     if not result:
         console.print(
@@ -358,7 +390,7 @@ def _print_and_save(
 @app.command()
-def compare_testcases(
+def compare(
     testcase_a_id: int = typer.Argument(
         ...,
         help="ID of the first testcase",
@@ -519,6 +551,45 @@ def compare_last(
     _print_and_save(resp, pretty, out, overwrite)
+@list_app.command("tag")
+def update_run_tag(
+    run_id: int = typer.Argument(..., help="Evaluation run ID"),
+    tag: Optional[str] = typer.Option(
+        None,
+        "--tag",
+        "-t",
+        help="Tag to add or update",
+    ),
+    remove: bool = typer.Option(
+        False,
+        "--remove",
+        help="Remove tag from the run",
+    ),
+):
+    """
+    Add, update, or remove a tag from an evaluation run.
+    """
+    if not tag and not remove:
+        typer.echo("❌ You must provide --tag or --remove")
+        raise typer.Exit(code=1)
+    if tag and remove:
+        typer.echo("❌ Use either --tag or --remove, not both")
+        raise typer.Exit(code=1)
+    client = DeepEvalClient()
+    result = client.update_evaluation_testcase_tag(
+        testcase_id=run_id,
+        tag=None if remove else tag,
+        raise_exception=True,
+    )
+    action = "removed" if remove else "updated"
+    typer.echo(f"✅ Tag {action} successfully")
+    typer.echo(f"Run ID: {run_id}")
+    typer.echo(f"Tag: {result.get('tag') or '-'}")
 def main() -> None:
     app()

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/client.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from typing import Optional, Dict
 import os
 import random
 from typing import Any, Dict, List, Optional, Union, cast, overload
@@ -12,6 +13,9 @@ from .schema import (
     SchemaMetricConfig,
     TextInputItem,
 )
+from typing import Optional, Literal, cast
+HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
 class DeepEvalClient:
@@ -27,71 +31,47 @@ class DeepEvalClient:
         settings_module: Optional[Any] = None,  # optional external settings
         timeout: int = 30,
     ):
-        settings_url = getattr(settings_module, "EVALFRAMWORK_URL", None)
+        settings_url = getattr(settings_module, "EVALFRAMEWORK_URL", None)
         settings_token = getattr(settings_module, "EVALFRAMWORK_API_KEY", None)
         raw_url = (
             base_url
             or settings_url
-            or os.getenv("EVALFRAMWORK_URL")
+            or os.getenv("EVALFRAMEWORK_URL")
             or "http://localhost:8080"
         )
         self.base_url = raw_url.rstrip("/")
         self.api_token = (
             api_token or settings_token or os.getenv(
-                "EVALFRAMWORK_API_KEY", "")
+                "EVALFRAMEWORK_API_KEY", "")
         )
         self.timeout = timeout
     def _request(
         self,
+        method: HTTPMethod,
         endpoint: str,
-        payload: dict,
+        *,
+        json: dict | None = None,
+        params: dict | None = None,
         raise_exception: bool = False,
     ) -> Optional[dict]:
-        """Internal helper to send POST requests with standard headers and error handling."""
         url = f"{self.base_url}{endpoint}"
-        headers = {
-            "accept": "application/json",
-            "Content-Type": "application/json",
-            "X-API-Token": self.api_token,
-        }
-        try:
-            resp = requests.post(
-                url, headers=headers, json=payload, timeout=self.timeout
-            )
-            if raise_exception:
-                resp.raise_for_status()
-        except requests.RequestException as e:
-            if raise_exception:
-                raise
-            return {"error": str(e)}
-        try:
-            return cast(dict, resp.json())
-        except ValueError:
-            if raise_exception:
-                raise
-            return {"error": "Invalid JSON response", "raw": resp.text}
-    def _get(
-        self,
-        endpoint: str,
-        params: dict,
-        raise_exception: bool = False,
-    ) -> Optional[dict]:
-        """Internal helper to send GET requests with standard headers and error handling."""
-        url = f"{self.base_url}{endpoint}"
         headers = {
             "accept": "application/json",
             "X-API-Token": self.api_token,
         }
+        if json is not None:
+            headers["Content-Type"] = "application/json"
         try:
-            resp = requests.get(
-                url,
+            resp = requests.request(
+                method=method,
+                url=url,
                 headers=headers,
+                json=json,
                 params=params,
                 timeout=self.timeout,
             )
@@ -107,7 +87,38 @@ class DeepEvalClient:
         except ValueError:
             if raise_exception:
                 raise
-            return {"error": "Invalid JSON response", "raw": resp.text}
+            return {
+                "error": "Invalid JSON response",
+                "raw": resp.text,
+            }
+    def _get(self, endpoint: str, params: dict, **kw):
+        return self._request("GET", endpoint, params=params, **kw)
+    def _post(self, endpoint: str, payload: dict, **kw):
+        return self._request("POST", endpoint, json=payload, **kw)
+    def _patch(self, endpoint: str, payload: dict, **kw):
+        return self._request("PATCH", endpoint, json=payload, **kw)
+    def update_evaluation_testcase_tag(
+        self,
+        *,
+        testcase_id: int,
+        tag: Optional[str],
+        raise_exception: bool = False,
+    ) -> Optional[Dict]:
+        """
+        Add, update, or remove a tag from an evaluation testcase.
+        - tag="smoke" → add / update tag
+        - tag=None → remove tag
+        """
+        return self._patch(
+            f"/evaluation-testcases/{testcase_id}/tag",
+            payload={"tag": tag},
+            raise_exception=raise_exception,
+        )
     def list_evaluation_testcases(
         self,
@@ -144,17 +155,17 @@ class DeepEvalClient:
             raise_exception=raise_exception,
         )
-    def get_evaluation_testcase_by_unique_id(
+    def get_evaluation_testcase_by_tag(
         self,
-        unique_id: str,
+        tag: str,
         *,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         """
-        Fetch a single evaluation testcase by unique_id.
+        Fetch a single evaluation testcase by tag.
         """
         return self._get(
-            f"/eval-framework/deepeval/uid/{unique_id}",
+            f"/eval-framework/deepeval/tag/{tag}",
             params={},
             raise_exception=raise_exception,
         )
@@ -163,18 +174,18 @@ class DeepEvalClient:
         self,
         *,
         id: Optional[int] = None,
-        unique_id: Optional[str] = None,
+        tag: Optional[str] = None,
         raise_exception: bool = False,
     ) -> Optional[Dict]:
         if id is not None:
             return self.get_evaluation_testcase_by_id(
                 id, raise_exception=raise_exception
             )
-        if unique_id is not None:
-            return self.get_evaluation_testcase_by_unique_id(
-                unique_id, raise_exception=raise_exception
+        if tag is not None:
+            return self.get_evaluation_testcase_by_tag(
+                tag, raise_exception=raise_exception
             )
-        raise ValueError("Either id or unique_id must be provided")
+        raise ValueError("Either id or tag must be provided")
     def compare_testcases(
         self,
@@ -268,7 +279,7 @@ class DeepEvalClient:
                 label=label,
             )
-        return self._request(
+        return self._post(
             "/deepeval/text-eval", config.model_dump(), raise_exception
         )
@@ -284,7 +295,7 @@ class DeepEvalClient:
         payload = EvalConfig.model_construct(
             data=data, metrics=metrics, component=component, version=label
         ).model_dump()
-        return self._request("/deepeval/text-eval/background", payload, raise_exception)
+        return self._post("/deepeval/text-eval/background", payload, raise_exception)
     @overload
     def schema_eval(
@@ -328,7 +339,7 @@ class DeepEvalClient:
                 label=label,
             )
-        return self._request(
+        return self._post(
             "/deepeval/schema-eval",
             config.model_dump(),
             raise_exception,
@@ -346,7 +357,7 @@ class DeepEvalClient:
         payload = SchemaEvalConfig.model_construct(
             data=data, metrics=metrics, component=component, version=label
         ).model_dump()
-        return self._request(
+        return self._post(
             "/deepeval/schema-eval/background", payload, raise_exception
         )

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/schema.py RENAMED Viewed

@@ -94,8 +94,7 @@ MetricConfig = Annotated[
 ]
 SchemaMetricConfig = Annotated[
-    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
-        discriminator="type")
+    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
 ]
@@ -118,7 +117,6 @@ class SchemaInputItem(InputItem):
 class EvalConfig(BaseModel):
     __eval_config__ = "text_eval"
-    unique_id: Union[str, None] = None
     component: str = "unknown"
     label: Union[str, None] = None
     data: List[TextInputItem]
@@ -128,7 +126,6 @@ class EvalConfig(BaseModel):
 class SchemaEvalConfig(BaseModel):
     __eval_config__ = "schema_eval"
     component: str = "unknown"
-    unique_id: Union[str, None] = None
     label: Union[str, None] = None
     data: List[SchemaInputItem]
     metrics: List[SchemaMetricConfig] = Field(default_factory=list)

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/__init__.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/decorators.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/utils/decorator_utils.py RENAMED Viewed

File without changes

rakam-eval-sdk 0.2.0rc2__tar.gz → 0.2.2__tar.gz

rakam-eval-sdk 0.2.0rc2tar.gz → 0.2.2tar.gz