PyPI - rakam-eval-sdk - Versions diffs - 0.2.0rc1__tar.gz → 0.2.1__tar.gz - Mend

rakam-eval-sdk 0.2.0rc1tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rakam-eval-sdk
-Version: 0.2.0rc1
+Version: 0.2.1
 Summary: Evaluation Framework SDK
 Author: Mohamed Bachar Touil
 License: MIT
@@ -94,6 +94,7 @@ client = DeepEvalClient(
             )
 ```
 3. Schema Evaluation
 ```python
@@ -137,6 +138,7 @@ client = DeepEvalClient(
             )
 ```
 ## Configuration
 The client can be configured in multiple ways:
@@ -150,7 +152,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
 ### Environment variables
 ```bash
-export EVALFRAMWORK_URL=http://api
+export EVALFRAMEWORK_URL=http://api
 export EVALFRAMWORK_API_KEY=123
 ```

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/README.md RENAMED Viewed

@@ -80,6 +80,7 @@ client = DeepEvalClient(
             )
 ```
 3. Schema Evaluation
 ```python
@@ -123,6 +124,7 @@ client = DeepEvalClient(
             )
 ```
 ## Configuration
 The client can be configured in multiple ways:
@@ -136,7 +138,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
 ### Environment variables
 ```bash
-export EVALFRAMWORK_URL=http://api
+export EVALFRAMEWORK_URL=http://api
 export EVALFRAMWORK_API_KEY=123
 ```

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "uv_build"
 [project]
 name = "rakam-eval-sdk"
-version = "0.2.0rc1"
+version = "0.2.1"
 description = "Evaluation Framework SDK"
 readme = "README.md"
 requires-python = ">=3.8"

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/src/rakam_eval_sdk/cli.py RENAMED Viewed

@@ -6,7 +6,7 @@ import uuid
 from datetime import datetime
 from pathlib import Path
 from pprint import pprint
-from typing import Any, Optional
+from typing import Any, Dict, Optional, Sequence
 import typer
 from dotenv import load_dotenv
@@ -29,9 +29,64 @@ console = Console()
 PROJECT_ROOT = os.path.abspath(".")
 if PROJECT_ROOT not in sys.path:
     sys.path.insert(0, PROJECT_ROOT)
+list_app = typer.Typer(help="List resources")
+app.add_typer(list_app, name="list")
+metrics_app = typer.Typer(help="Metrics utilities")
+app.add_typer(metrics_app, name="metrics")
-@app.command()
+@metrics_app.command("list")
+def list_metrics(
+    limit: int = typer.Option(
+        20,
+        "--limit",
+        help="Number of testcases to inspect for metrics",
+    ),
+):
+    """
+    List unique metric names found in evaluation testcases.
+    """
+    client = DeepEvalClient()
+    testcases = client.list_evaluation_testcases(
+        limit=limit,
+        offset=0,
+        raise_exception=True,
+    )
+    if not testcases:
+        typer.echo("No evaluation testcases found.")
+        return
+    metric_names: set[str] = set()
+    def collect_metrics(entries: Sequence[Dict] | None):
+        if not entries:
+            return
+        for entry in entries:
+            for metric in entry.get("metrics", []) or []:
+                name = metric.get("name")
+                if name:
+                    metric_names.add(name)
+    for tc in testcases:
+        collect_metrics(tc.get("result"))
+    if not metric_names:
+        typer.echo("No metrics found.")
+        return
+    typer.echo(
+        f"📊 Found {len(metric_names)} unique metrics "
+        f"(from latest {limit} testcases)\n"
+    )
+    for name in sorted(metric_names):
+        typer.echo(f"- {name}")
+@list_app.command("eval")
 def list(
     directory: Path = typer.Argument(
         Path("./eval"),
@@ -65,17 +120,10 @@ def list(
         typer.echo(f"No @{TARGET_DECORATOR} functions found.")
-list_app = typer.Typer(help="List resources")
-app.add_typer(list_app, name="list")
 @list_app.command("runs")
 def list_runs(
     limit: int = typer.Option(20, help="Max number of runs"),
     offset: int = typer.Option(0, help="Pagination offset"),
-    status: Optional[str] = typer.Option(
-        None, help="Filter by status (running, completed, failed)"
-    ),
 ):
     """
     List evaluation runs (newest first).
@@ -92,19 +140,7 @@ def list_runs(
         typer.echo("No evaluation runs found.")
         return
-    # optional status filtering (client-side for now)
-    if status:
-        runs = [
-            r for r in runs
-            if r.get("result", {}).get("status") == status
-        ]
-    typer.echo(
-        f"[id] "
-        f"{'unique_id':<20}"
-        f"{'label':<20}"
-        f"created_at"
-    )
+    typer.echo(f"[id] " f"{'unique_id':<20}" f"{'label':<20}" f"created_at")
     # pretty CLI output
     for run in runs:
         run_id = run.get("id")
@@ -120,12 +156,7 @@ def list_runs(
             except ValueError:
                 pass
-        typer.echo(
-            f"[{run_id}] "
-            f"{uid:<20} "
-            f"{label:<20} "
-            f"{created_at}"
-        )
+        typer.echo(f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
 @list_app.command("show")
@@ -345,8 +376,7 @@ def _print_and_save(
         return
     if out.exists() and not overwrite:
-        typer.echo(
-            f"❌ File already exists: {out} (use --overwrite to replace)")
+        typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
         raise typer.Exit(code=1)
     out.parent.mkdir(parents=True, exist_ok=True)

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/src/rakam_eval_sdk/client.py RENAMED Viewed

@@ -27,19 +27,19 @@ class DeepEvalClient:
         settings_module: Optional[Any] = None,  # optional external settings
         timeout: int = 30,
     ):
-        settings_url = getattr(settings_module, "EVALFRAMWORK_URL", None)
+        settings_url = getattr(settings_module, "EVALFRAMEWORK_URL", None)
         settings_token = getattr(settings_module, "EVALFRAMWORK_API_KEY", None)
         raw_url = (
             base_url
             or settings_url
-            or os.getenv("EVALFRAMWORK_URL")
+            or os.getenv("EVALFRAMEWORK_URL")
             or "http://localhost:8080"
         )
         self.base_url = raw_url.rstrip("/")
         self.api_token = (
             api_token or settings_token or os.getenv(
-                "EVALFRAMWORK_API_KEY", "")
+                "EVALFRAMEWORK_API_KEY", "")
         )
         self.timeout = timeout

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/src/rakam_eval_sdk/schema.py RENAMED Viewed

@@ -94,8 +94,7 @@ MetricConfig = Annotated[
 ]
 SchemaMetricConfig = Annotated[
-    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
-        discriminator="type")
+    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
 ]

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/src/rakam_eval_sdk/__init__.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/src/rakam_eval_sdk/decorators.py RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.0rc1 → rakam_eval_sdk-0.2.1}/src/rakam_eval_sdk/utils/decorator_utils.py RENAMED Viewed

File without changes

rakam-eval-sdk 0.2.0rc1__tar.gz → 0.2.1__tar.gz

rakam-eval-sdk 0.2.0rc1tar.gz → 0.2.1tar.gz