PyPI - rakam-eval-sdk - Versions diffs - 0.2.4rc5__py3-none-any.whl → 0.2.4rc7__py3-none-any.whl - Mend

rakam-eval-sdk 0.2.4rc5py3-none-any.whl → 0.2.4rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

rakam_eval_sdk/cli.py +4 -131
rakam_eval_sdk/decorators.py +1 -2
rakam_eval_sdk/schema.py +1 -2
rakam_eval_sdk/tests/__init__.py +0 -0
rakam_eval_sdk/tests/test_decorator.py +82 -0
rakam_eval_sdk/tests/test_print.py +166 -0
rakam_eval_sdk/utils/print.py +134 -0
{rakam_eval_sdk-0.2.4rc5.dist-info → rakam_eval_sdk-0.2.4rc7.dist-info}/METADATA +1 -1
rakam_eval_sdk-0.2.4rc7.dist-info/RECORD +14 -0
rakam_eval_sdk-0.2.4rc5.dist-info/RECORD +0 -10
{rakam_eval_sdk-0.2.4rc5.dist-info → rakam_eval_sdk-0.2.4rc7.dist-info}/WHEEL +0 -0
{rakam_eval_sdk-0.2.4rc5.dist-info → rakam_eval_sdk-0.2.4rc7.dist-info}/entry_points.txt +0 -0

rakam_eval_sdk/cli.py CHANGED Viewed

@@ -5,23 +5,22 @@ import sys
 import uuid
 from datetime import datetime
 from pathlib import Path
-from pprint import pprint
-from typing import Any, Dict, List, Optional, Tuple, Union, Set
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 import typer
 from dotenv import load_dotenv
 from rich.console import Console
 from rich.panel import Panel
 from rich.pretty import Pretty
-from typer import secho
 from rakam_eval_sdk.client import DeepEvalClient
 from rakam_eval_sdk.decorators import eval_run
-from rakam_eval_sdk.schema import MetricDiff, TestCaseComparison
+from rakam_eval_sdk.schema import TestCaseComparison
 from rakam_eval_sdk.utils.decorator_utils import (
     find_decorated_functions,
     load_module_from_path,
 )
+from rakam_eval_sdk.utils.print import _print_and_save, pretty_print_comparison
 load_dotenv()
 app = typer.Typer(help="CLI tools for evaluation utilities")
@@ -199,8 +198,7 @@ def list_runs(
     shown = offset + len(items)
     if shown < total:
         typer.echo()
-        typer.echo(
-            f"Showing {shown} of {total} runs. Use --limit to see more.")
+        typer.echo(f"Showing {shown} of {total} runs. Use --limit to see more.")
 @app.command()
@@ -405,131 +403,6 @@ def run(
         typer.echo("\nNo @eval_run functions executed.")
-def _print_and_save(
-    resp: dict,
-    pretty: bool,
-    out: Optional[Path],
-    overwrite: bool,
-) -> None:
-    if pretty:
-        typer.echo(typer.style("📊 Result:", bold=True))
-        pprint(resp)
-    else:
-        typer.echo(resp)
-    if out is None:
-        return
-    if out.exists() and not overwrite:
-        typer.echo(
-            f"❌ File already exists: {out} (use --overwrite to replace)")
-        raise typer.Exit(code=1)
-    out.parent.mkdir(parents=True, exist_ok=True)
-    with out.open("w", encoding="utf-8") as f:
-        json.dump(resp, f, indent=2, ensure_ascii=False)
-    typer.echo(f"💾 Result saved to {out}")
-def pct_change(a: Optional[float], b: Optional[float]) -> Optional[str]:
-    if a is None or b is None or a == 0:
-        return None
-    return f"{((b - a) / a) * 100:+.2f}%"
-def metric_direction(delta: Optional[float]) -> str:
-    if delta is None:
-        return "unchanged"
-    if delta > 0:
-        return "improved"
-    if delta < 0:
-        return "regressed"
-    return "unchanged"
-def print_metric_diff(diff: MetricDiff) -> None:
-    secho(f"\nMetric: {diff.metric}", bold=True)
-    if diff.status == "added":
-        secho(f"+ score: {diff.score_b}", fg="green")
-        secho(f"+ threshold: {diff.threshold_b}", fg="green")
-        secho(f"+ success: {diff.success_b}", fg="green")
-        return
-    if diff.status == "removed":
-        secho(f"- score: {diff.score_a}", fg="red")
-        secho(f"- threshold: {diff.threshold_a}", fg="red")
-        secho(f"- success: {diff.success_a}", fg="red")
-        return
-    # unchanged / changed
-    if diff.score_a != diff.score_b:
-        direction = metric_direction(diff.delta)
-        color = "green" if direction == "improved" else "red"
-        pct = pct_change(diff.score_a, diff.score_b)
-        secho(f"- score: {diff.score_a}", fg="red")
-        secho(
-            f"+ score: {diff.score_b}" + (f"   ({pct})" if pct else ""),
-            fg=color,
-        )
-    else:
-        secho(f"  score: {diff.score_a}", dim=True)
-    if diff.threshold_a != diff.threshold_b:
-        secho(f"- threshold: {diff.threshold_a}", fg="red")
-        secho(f"+ threshold: {diff.threshold_b}", fg="green")
-    else:
-        secho(f"  threshold: {diff.threshold_a}", dim=True)
-    if diff.success_a != diff.success_b:
-        secho(f"- success: {diff.success_a}", fg="red")
-        secho(f"+ success: {diff.success_b}", fg="green")
-    else:
-        secho(f"  success: {diff.success_a}", dim=True)
-def summarize(metrics: Any) -> Dict[str, int]:
-    summary = {
-        "improved": 0,
-        "regressed": 0,
-        "unchanged": 0,
-        "added": 0,
-        "removed": 0,
-    }
-    for m in metrics:
-        if m.status in ("added", "removed"):
-            summary[m.status] += 1
-        else:
-            direction = metric_direction(m.delta)
-            summary[direction] += 1
-    return summary
-def pretty_print_comparison(resp: Any, summary_only: bool = False) -> None:
-    if not summary_only:
-        for metric in resp.metrics:
-            print_metric_diff(metric)
-        return
-    print_summary(resp.metrics)
-def print_summary(metrics: Any) -> None:
-    summary = summarize(metrics)
-    secho("\nSummary:", bold=True)
-    secho(f"  ↑ Improved: {summary['improved']}", fg="green")
-    secho(f"  ↓ Regressed: {summary['regressed']}", fg="red")
-    secho(f"  ± Unchanged: {summary['unchanged']}", dim=True)
-    secho(f"  + Added: {summary['added']}", fg="green")
-    secho(f"  - Removed: {summary['removed']}", fg="red")
 @app.command()
 def compare(
     tag: List[str] = typer.Option(

rakam_eval_sdk/decorators.py CHANGED Viewed

@@ -6,9 +6,8 @@ from typing import Callable, Dict, Optional, TypeVar, Union, overload
 import psutil
-if sys.version_info < (3, 9):
+if sys.version_info < (3, 10):
     from typing_extensions import ParamSpec
 else:
     from typing import ParamSpec
 P = ParamSpec("P")

rakam_eval_sdk/schema.py CHANGED Viewed

@@ -94,8 +94,7 @@ MetricConfig = Annotated[
 ]
 SchemaMetricConfig = Annotated[
-    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
-        discriminator="type")
+    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
 ]

rakam_eval_sdk/tests/__init__.py ADDED Viewed

File without changes

rakam_eval_sdk/tests/test_decorator.py ADDED Viewed

@@ -0,0 +1,82 @@
+import os
+import pytest
+from rakam_eval_sdk.client import DeepEvalClient
+from rakam_eval_sdk.decorators import eval_run
+@pytest.fixture
+def client() -> DeepEvalClient:
+    return DeepEvalClient(base_url="http://testserver", api_token="testtoken")
+class FakeCPUTimes:
+    def __init__(self, user: float = 1.0, system: float = 1.0) -> None:
+        self.user: float = user
+        self.system: float = system
+class FakeMemInfo:
+    def __init__(self, rss: int) -> None:
+        self.rss: int = rss
+class FakeProcess:
+    def __init__(self) -> None:
+        self._cpu_calls: int = 0
+        self._mem_calls: int = 0
+    def cpu_times(self) -> FakeCPUTimes:
+        self._cpu_calls += 1
+        # simulate CPU usage increase
+        return FakeCPUTimes(
+            user=1.0 + self._cpu_calls,
+            system=1.0,
+        )
+    def memory_info(self) -> FakeMemInfo:
+        self._mem_calls += 1
+        return FakeMemInfo(rss=100_000_000 + (self._mem_calls * 10_000))
+@pytest.fixture(autouse=True)
+def patch_psutil(
+    monkeypatch: pytest.MonkeyPatch,
+) -> FakeProcess:
+    fake_process: FakeProcess = FakeProcess()
+    monkeypatch.setattr(
+        "rakam_eval_sdk.decorators.psutil.Process",
+        lambda pid: fake_process,
+    )
+    monkeypatch.setattr(os, "getpid", lambda: 123)
+    return fake_process
+def test_eval_run_basic(capsys: pytest.CaptureFixture[str]) -> None:
+    @eval_run
+    def add(a: int, b: int) -> int:
+        return a + b
+    result: int = add(2, 3)
+    assert result == 5
+    out: str = capsys.readouterr().out
+    assert "[eval_run]" in out
+    assert "add" in out
+    assert "time=" in out
+    assert "cpu=" in out
+    assert "mem_delta=" in out
+def test_eval_run_with_parentheses(capsys: pytest.CaptureFixture[str]) -> None:
+    @eval_run()
+    def mul(a: int, b: int) -> int:
+        return a * b
+    result: int = mul(3, 4)
+    assert result == 12
+    assert "[eval_run]" in capsys.readouterr().out

rakam_eval_sdk/tests/test_print.py ADDED Viewed

@@ -0,0 +1,166 @@
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional
+import pytest
+import typer
+from rakam_eval_sdk.utils.print import (
+    _print_and_save,
+    metric_direction,
+    pct_change,
+    pretty_print_comparison,
+    summarize,
+)
+@pytest.mark.parametrize(
+    "a,b,expected",
+    [
+        (10, 20, "+100.00%"),
+        (20, 10, "-50.00%"),
+        (10, 10, "+0.00%"),
+        (None, 10, None),
+        (10, None, None),
+        (0, 10, None),
+    ],
+)
+def test_pct_change(a: Optional[int], b: Optional[int], expected: Optional[str]):
+    assert pct_change(a, b) == expected
+# -------------------------
+# metric_direction
+# -------------------------
+@pytest.mark.parametrize(
+    "delta,expected",
+    [
+        (1.0, "improved"),
+        (-1.0, "regressed"),
+        (0.0, "unchanged"),
+        (None, "unchanged"),
+    ],
+)
+def test_metric_direction(delta: Optional[float], expected: str):
+    assert metric_direction(delta) == expected
+def test_print_and_save_no_output_path(capsys: pytest.CaptureFixture[str]) -> None:
+    resp: Dict[str, int] = {"a": 1}
+    _print_and_save(resp, pretty=False, out=None, overwrite=False)
+    captured = capsys.readouterr()
+    assert str(resp) in captured.out
+def test_print_and_save_creates_file(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    out: Path = tmp_path / "result.json"
+    resp: Dict[str, str] = {"hello": "world"}
+    _print_and_save(resp, pretty=False, out=out, overwrite=False)
+    assert out.exists()
+    assert json.loads(out.read_text()) == resp
+    captured = capsys.readouterr()
+    assert "Result saved" in captured.out
+def test_print_and_save_refuses_overwrite(tmp_path: Path) -> None:
+    out: Path = tmp_path / "result.json"
+    out.write_text("{}")
+    with pytest.raises(typer.Exit):
+        _print_and_save({"x": 1}, pretty=False, out=out, overwrite=False)
+def test_print_and_save_overwrites(tmp_path: Path) -> None:
+    out: Path = tmp_path / "result.json"
+    out.write_text("{}")
+    _print_and_save({"x": 2}, pretty=False, out=out, overwrite=True)
+    assert json.loads(out.read_text()) == {"x": 2}
+# -------------------------
+# summarize
+# -------------------------
+def make_metric(
+    *,
+    status: str = "changed",
+    delta: Optional[float] = None,
+) -> SimpleNamespace:
+    return SimpleNamespace(status=status, delta=delta)
+def test_summarize_counts_all_states() -> None:
+    metrics: List[SimpleNamespace] = [
+        make_metric(status="added"),
+        make_metric(status="removed"),
+        make_metric(delta=1.0),
+        make_metric(delta=-1.0),
+        make_metric(delta=0.0),
+        make_metric(delta=None),
+    ]
+    summary: Dict[str, int] = summarize(metrics)
+    assert summary == {
+        "improved": 1,
+        "regressed": 1,
+        "unchanged": 2,
+        "added": 1,
+        "removed": 1,
+    }
+# -------------------------
+# pretty_print_comparison
+# -------------------------
+def test_pretty_print_comparison_summary_only(monkeypatch: pytest.MonkeyPatch) -> None:
+    metrics: List[SimpleNamespace] = [
+        make_metric(status="added"),
+        make_metric(delta=1.0),
+    ]
+    resp: SimpleNamespace = SimpleNamespace(metrics=metrics)
+    called: Dict[str, Any] = {}
+    def fake_print_summary(arg: Any) -> None:
+        called["metrics"] = arg
+    import rakam_eval_sdk.utils.print as mod
+    monkeypatch.setattr(mod, "print_summary", fake_print_summary)
+    pretty_print_comparison(resp, summary_only=True)
+    assert called["metrics"] == metrics
+def test_pretty_print_comparison_full(monkeypatch: pytest.MonkeyPatch) -> None:
+    metrics: List[SimpleNamespace] = [
+        make_metric(status="added"),
+        make_metric(delta=1.0),
+    ]
+    resp: SimpleNamespace = SimpleNamespace(metrics=metrics)
+    calls: List[Any] = []
+    def fake_print_metric_diff(metric: Any) -> None:
+        calls.append(metric)
+    import rakam_eval_sdk.utils.print as mod
+    monkeypatch.setattr(mod, "print_metric_diff", fake_print_metric_diff)
+    pretty_print_comparison(resp, summary_only=False)
+    assert calls == metrics

rakam_eval_sdk/utils/print.py ADDED Viewed

@@ -0,0 +1,134 @@
+import json
+from pathlib import Path
+from pprint import pprint
+from typing import Any, Dict,  Optional
+import typer
+from typer import secho
+from rakam_eval_sdk.schema import MetricDiff
+def _print_and_save(
+    resp: dict,
+    pretty: bool,
+    out: Optional[Path],
+    overwrite: bool,
+) -> None:
+    if pretty:
+        typer.echo(typer.style("📊 Result:", bold=True))
+        pprint(resp)
+    else:
+        typer.echo(resp)
+    if out is None:
+        return
+    if out.exists() and not overwrite:
+        typer.echo(
+            f"❌ File already exists: {out} (use --overwrite to replace)")
+        raise typer.Exit(code=1)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with out.open("w", encoding="utf-8") as f:
+        json.dump(resp, f, indent=2, ensure_ascii=False)
+    typer.echo(f"💾 Result saved to {out}")
+def pct_change(a: Optional[float], b: Optional[float]) -> Optional[str]:
+    if a is None or b is None or a == 0:
+        return None
+    return f"{((b - a) / a) * 100:+.2f}%"
+def metric_direction(delta: Optional[float]) -> str:
+    if delta is None:
+        return "unchanged"
+    if delta > 0:
+        return "improved"
+    if delta < 0:
+        return "regressed"
+    return "unchanged"
+def print_metric_diff(diff: MetricDiff) -> None:
+    secho(f"\nMetric: {diff.metric}", bold=True)
+    if diff.status == "added":
+        secho(f"+ score: {diff.score_b}", fg="green")
+        secho(f"+ threshold: {diff.threshold_b}", fg="green")
+        secho(f"+ success: {diff.success_b}", fg="green")
+        return
+    if diff.status == "removed":
+        secho(f"- score: {diff.score_a}", fg="red")
+        secho(f"- threshold: {diff.threshold_a}", fg="red")
+        secho(f"- success: {diff.success_a}", fg="red")
+        return
+    # unchanged / changed
+    if diff.score_a != diff.score_b:
+        direction = metric_direction(diff.delta)
+        color = "green" if direction == "improved" else "red"
+        pct = pct_change(diff.score_a, diff.score_b)
+        secho(f"- score: {diff.score_a}", fg="red")
+        secho(
+            f"+ score: {diff.score_b}" + (f"   ({pct})" if pct else ""),
+            fg=color,
+        )
+    else:
+        secho(f"  score: {diff.score_a}", dim=True)
+    if diff.threshold_a != diff.threshold_b:
+        secho(f"- threshold: {diff.threshold_a}", fg="red")
+        secho(f"+ threshold: {diff.threshold_b}", fg="green")
+    else:
+        secho(f"  threshold: {diff.threshold_a}", dim=True)
+    if diff.success_a != diff.success_b:
+        secho(f"- success: {diff.success_a}", fg="red")
+        secho(f"+ success: {diff.success_b}", fg="green")
+    else:
+        secho(f"  success: {diff.success_a}", dim=True)
+def summarize(metrics: Any) -> Dict[str, int]:
+    summary = {
+        "improved": 0,
+        "regressed": 0,
+        "unchanged": 0,
+        "added": 0,
+        "removed": 0,
+    }
+    for m in metrics:
+        if m.status in ("added", "removed"):
+            summary[m.status] += 1
+        else:
+            direction = metric_direction(m.delta)
+            summary[direction] += 1
+    return summary
+def print_summary(metrics: Any) -> None:
+    summary = summarize(metrics)
+    secho("\nSummary:", bold=True)
+    secho(f"  ↑ Improved: {summary['improved']}", fg="green")
+    secho(f"  ↓ Regressed: {summary['regressed']}", fg="red")
+    secho(f"  ± Unchanged: {summary['unchanged']}", dim=True)
+    secho(f"  + Added: {summary['added']}", fg="green")
+    secho(f"  - Removed: {summary['removed']}", fg="red")
+def pretty_print_comparison(resp: Any, summary_only: bool = False) -> None:
+    if not summary_only:
+        for metric in resp.metrics:
+            print_metric_diff(metric)
+        return
+    print_summary(resp.metrics)

{rakam_eval_sdk-0.2.4rc5.dist-info → rakam_eval_sdk-0.2.4rc7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: rakam-eval-sdk
-Version: 0.2.4rc5
+Version: 0.2.4rc7
 Summary: Evaluation Framework SDK
 Author: Mohamed Bachar Touil
 License: MIT

rakam_eval_sdk-0.2.4rc7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+rakam_eval_sdk/cli.py,sha256=ZFsEXVBlQNThu8dK_JVGpirmY5ppuzE8lubw-XqE1_k,18385
+rakam_eval_sdk/client.py,sha256=8KwpNt7WHgMfcXAQtrAlnUihai9TiADRieiuWeIxg6E,15575
+rakam_eval_sdk/decorators.py,sha256=IOe4r4P8JoWM9thPtb3bJ1EXvGxkjydHbKS0W1fiYSk,1908
+rakam_eval_sdk/schema.py,sha256=ouxiV1ydQXiVn5DjIAYDT4yqsXA0h95bq6c-oSkqxL8,3912
+rakam_eval_sdk/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+rakam_eval_sdk/tests/test_decorator.py,sha256=ppqdhHamyC-rs_RsEjCegsnkV4kLhX-qpG5UicA6D6w,1956
+rakam_eval_sdk/tests/test_print.py,sha256=uraR7qrZxnB3BCCUB6l9k1L62-VU7f0hJeArL3ellrk,4148
+rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
+rakam_eval_sdk/utils/print.py,sha256=PaeJrgPtLvVBr7uCXA-Q9UNJqrMyky_UMCd7JGFCQ2w,3743
+rakam_eval_sdk-0.2.4rc7.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
+rakam_eval_sdk-0.2.4rc7.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
+rakam_eval_sdk-0.2.4rc7.dist-info/METADATA,sha256=JkNU5IxCvwkTYOQA8f2yZcMpYYEl0QHOOybylw6--L0,6022
+rakam_eval_sdk-0.2.4rc7.dist-info/RECORD,,

rakam_eval_sdk-0.2.4rc5.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rakam_eval_sdk/cli.py,sha256=-H5ggvyHllO3Epnvy3wcV_J0ktAdAU0ypk4ugXMO52Q,21936
-rakam_eval_sdk/client.py,sha256=8KwpNt7WHgMfcXAQtrAlnUihai9TiADRieiuWeIxg6E,15575
-rakam_eval_sdk/decorators.py,sha256=GoaAqnKg-VeYf5eBI-mEgJj6Asqmdqw4QG_qqHu5x2s,1908
-rakam_eval_sdk/schema.py,sha256=X01IBtexmiJSJbdjAoqcMxCG_X1t1XfbIcFKGJ6-6q0,3921
-rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
-rakam_eval_sdk-0.2.4rc5.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
-rakam_eval_sdk-0.2.4rc5.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
-rakam_eval_sdk-0.2.4rc5.dist-info/METADATA,sha256=kxqmnzvDuSCTJXxtDdKt4hj95Zja9aulpCfAkTvpZRg,6022
-rakam_eval_sdk-0.2.4rc5.dist-info/RECORD,,

{rakam_eval_sdk-0.2.4rc5.dist-info → rakam_eval_sdk-0.2.4rc7.dist-info}/WHEEL RENAMED Viewed

File without changes

{rakam_eval_sdk-0.2.4rc5.dist-info → rakam_eval_sdk-0.2.4rc7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

rakam-eval-sdk 0.2.4rc5__py3-none-any.whl → 0.2.4rc7__py3-none-any.whl

rakam-eval-sdk 0.2.4rc5py3-none-any.whl → 0.2.4rc7py3-none-any.whl