rakam-eval-sdk 0.2.0__py3-none-any.whl → 0.2.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rakam_eval_sdk/cli.py +30 -60
- rakam_eval_sdk/client.py +2 -1
- rakam_eval_sdk/schema.py +2 -1
- {rakam_eval_sdk-0.2.0.dist-info → rakam_eval_sdk-0.2.0rc1.dist-info}/METADATA +1 -1
- rakam_eval_sdk-0.2.0rc1.dist-info/RECORD +10 -0
- rakam_eval_sdk-0.2.0.dist-info/RECORD +0 -10
- {rakam_eval_sdk-0.2.0.dist-info → rakam_eval_sdk-0.2.0rc1.dist-info}/WHEEL +0 -0
- {rakam_eval_sdk-0.2.0.dist-info → rakam_eval_sdk-0.2.0rc1.dist-info}/entry_points.txt +0 -0
rakam_eval_sdk/cli.py
CHANGED
|
@@ -6,7 +6,7 @@ import uuid
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from pprint import pprint
|
|
9
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, Optional
|
|
10
10
|
|
|
11
11
|
import typer
|
|
12
12
|
from dotenv import load_dotenv
|
|
@@ -29,64 +29,9 @@ console = Console()
|
|
|
29
29
|
PROJECT_ROOT = os.path.abspath(".")
|
|
30
30
|
if PROJECT_ROOT not in sys.path:
|
|
31
31
|
sys.path.insert(0, PROJECT_ROOT)
|
|
32
|
-
list_app = typer.Typer(help="List resources")
|
|
33
|
-
app.add_typer(list_app, name="list")
|
|
34
|
-
metrics_app = typer.Typer(help="Metrics utilities")
|
|
35
|
-
app.add_typer(metrics_app, name="metrics")
|
|
36
|
-
|
|
37
32
|
|
|
38
|
-
@metrics_app.command("list")
|
|
39
|
-
def list_metrics(
|
|
40
|
-
limit: int = typer.Option(
|
|
41
|
-
20,
|
|
42
|
-
"--limit",
|
|
43
|
-
help="Number of testcases to inspect for metrics",
|
|
44
|
-
),
|
|
45
|
-
):
|
|
46
|
-
"""
|
|
47
|
-
List unique metric names found in evaluation testcases.
|
|
48
|
-
"""
|
|
49
|
-
client = DeepEvalClient()
|
|
50
33
|
|
|
51
|
-
|
|
52
|
-
limit=limit,
|
|
53
|
-
offset=0,
|
|
54
|
-
raise_exception=True,
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
if not testcases:
|
|
58
|
-
typer.echo("No evaluation testcases found.")
|
|
59
|
-
return
|
|
60
|
-
|
|
61
|
-
metric_names: set[str] = set()
|
|
62
|
-
|
|
63
|
-
def collect_metrics(entries: Sequence[Dict] | None):
|
|
64
|
-
if not entries:
|
|
65
|
-
return
|
|
66
|
-
for entry in entries:
|
|
67
|
-
for metric in entry.get("metrics", []) or []:
|
|
68
|
-
name = metric.get("name")
|
|
69
|
-
if name:
|
|
70
|
-
metric_names.add(name)
|
|
71
|
-
|
|
72
|
-
for tc in testcases:
|
|
73
|
-
|
|
74
|
-
collect_metrics(tc.get("result"))
|
|
75
|
-
|
|
76
|
-
if not metric_names:
|
|
77
|
-
typer.echo("No metrics found.")
|
|
78
|
-
return
|
|
79
|
-
|
|
80
|
-
typer.echo(
|
|
81
|
-
f"📊 Found {len(metric_names)} unique metrics "
|
|
82
|
-
f"(from latest {limit} testcases)\n"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
for name in sorted(metric_names):
|
|
86
|
-
typer.echo(f"- {name}")
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
@list_app.command("eval")
|
|
34
|
+
@app.command()
|
|
90
35
|
def list(
|
|
91
36
|
directory: Path = typer.Argument(
|
|
92
37
|
Path("./eval"),
|
|
@@ -120,10 +65,17 @@ def list(
|
|
|
120
65
|
typer.echo(f"No @{TARGET_DECORATOR} functions found.")
|
|
121
66
|
|
|
122
67
|
|
|
68
|
+
list_app = typer.Typer(help="List resources")
|
|
69
|
+
app.add_typer(list_app, name="list")
|
|
70
|
+
|
|
71
|
+
|
|
123
72
|
@list_app.command("runs")
|
|
124
73
|
def list_runs(
|
|
125
74
|
limit: int = typer.Option(20, help="Max number of runs"),
|
|
126
75
|
offset: int = typer.Option(0, help="Pagination offset"),
|
|
76
|
+
status: Optional[str] = typer.Option(
|
|
77
|
+
None, help="Filter by status (running, completed, failed)"
|
|
78
|
+
),
|
|
127
79
|
):
|
|
128
80
|
"""
|
|
129
81
|
List evaluation runs (newest first).
|
|
@@ -140,7 +92,19 @@ def list_runs(
|
|
|
140
92
|
typer.echo("No evaluation runs found.")
|
|
141
93
|
return
|
|
142
94
|
|
|
143
|
-
|
|
95
|
+
# optional status filtering (client-side for now)
|
|
96
|
+
if status:
|
|
97
|
+
runs = [
|
|
98
|
+
r for r in runs
|
|
99
|
+
if r.get("result", {}).get("status") == status
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
typer.echo(
|
|
103
|
+
f"[id] "
|
|
104
|
+
f"{'unique_id':<20}"
|
|
105
|
+
f"{'label':<20}"
|
|
106
|
+
f"created_at"
|
|
107
|
+
)
|
|
144
108
|
# pretty CLI output
|
|
145
109
|
for run in runs:
|
|
146
110
|
run_id = run.get("id")
|
|
@@ -156,7 +120,12 @@ def list_runs(
|
|
|
156
120
|
except ValueError:
|
|
157
121
|
pass
|
|
158
122
|
|
|
159
|
-
typer.echo(
|
|
123
|
+
typer.echo(
|
|
124
|
+
f"[{run_id}] "
|
|
125
|
+
f"{uid:<20} "
|
|
126
|
+
f"{label:<20} "
|
|
127
|
+
f"{created_at}"
|
|
128
|
+
)
|
|
160
129
|
|
|
161
130
|
|
|
162
131
|
@list_app.command("show")
|
|
@@ -376,7 +345,8 @@ def _print_and_save(
|
|
|
376
345
|
return
|
|
377
346
|
|
|
378
347
|
if out.exists() and not overwrite:
|
|
379
|
-
typer.echo(
|
|
348
|
+
typer.echo(
|
|
349
|
+
f"❌ File already exists: {out} (use --overwrite to replace)")
|
|
380
350
|
raise typer.Exit(code=1)
|
|
381
351
|
|
|
382
352
|
out.parent.mkdir(parents=True, exist_ok=True)
|
rakam_eval_sdk/client.py
CHANGED
|
@@ -38,7 +38,8 @@ class DeepEvalClient:
|
|
|
38
38
|
)
|
|
39
39
|
self.base_url = raw_url.rstrip("/")
|
|
40
40
|
self.api_token = (
|
|
41
|
-
api_token or settings_token or os.getenv(
|
|
41
|
+
api_token or settings_token or os.getenv(
|
|
42
|
+
"EVALFRAMWORK_API_KEY", "")
|
|
42
43
|
)
|
|
43
44
|
self.timeout = timeout
|
|
44
45
|
|
rakam_eval_sdk/schema.py
CHANGED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
rakam_eval_sdk/cli.py,sha256=eEfBIPcE8vrXsLc1pvah7FeQrH3KdmUcm4ndlTURlF4,13590
|
|
3
|
+
rakam_eval_sdk/client.py,sha256=4qUG8cLGqY8026s28uCHM3zFuGDzekLokZZDu7VRJ_8,13077
|
|
4
|
+
rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
|
|
5
|
+
rakam_eval_sdk/schema.py,sha256=ozNC56ygzR1G6UABjnqnJVAPVcF4rJMH1pUNH0a1K4M,3617
|
|
6
|
+
rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
|
|
7
|
+
rakam_eval_sdk-0.2.0rc1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
+
rakam_eval_sdk-0.2.0rc1.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
+
rakam_eval_sdk-0.2.0rc1.dist-info/METADATA,sha256=ZPMVvCST3fb48UJSJfa1fj5qyjrLi-pQ3N_J1_4pEnA,6019
|
|
10
|
+
rakam_eval_sdk-0.2.0rc1.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rakam_eval_sdk/cli.py,sha256=0Ym7C83xirGEf0YCh4-agwfjYPJ3yieeS02nXJ_iE-Y,14486
|
|
3
|
-
rakam_eval_sdk/client.py,sha256=JQ-vCJmMLqXql7nNGYBy5dlkZsCq05gOOQhucwwexC8,13060
|
|
4
|
-
rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
|
|
5
|
-
rakam_eval_sdk/schema.py,sha256=P4LlnaInXWTq-ve6qPTTxPyzmj3j_1gcqV9i7CYRYec,3608
|
|
6
|
-
rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
|
|
7
|
-
rakam_eval_sdk-0.2.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
-
rakam_eval_sdk-0.2.0.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
-
rakam_eval_sdk-0.2.0.dist-info/METADATA,sha256=1l6TbYR49zIKpDyNjgqJFLQ_b1mAvBHKizAWF085-9M,6016
|
|
10
|
-
rakam_eval_sdk-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|