rakam-eval-sdk 0.2.0rc2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rakam_eval_sdk/cli.py +58 -28
- rakam_eval_sdk/client.py +3 -3
- rakam_eval_sdk/schema.py +1 -2
- {rakam_eval_sdk-0.2.0rc2.dist-info → rakam_eval_sdk-0.2.1.dist-info}/METADATA +4 -2
- rakam_eval_sdk-0.2.1.dist-info/RECORD +10 -0
- rakam_eval_sdk-0.2.0rc2.dist-info/RECORD +0 -10
- {rakam_eval_sdk-0.2.0rc2.dist-info → rakam_eval_sdk-0.2.1.dist-info}/WHEEL +0 -0
- {rakam_eval_sdk-0.2.0rc2.dist-info → rakam_eval_sdk-0.2.1.dist-info}/entry_points.txt +0 -0
rakam_eval_sdk/cli.py
CHANGED
|
@@ -6,7 +6,7 @@ import uuid
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from pprint import pprint
|
|
9
|
-
from typing import Any, Optional
|
|
9
|
+
from typing import Any, Dict, Optional, Sequence
|
|
10
10
|
|
|
11
11
|
import typer
|
|
12
12
|
from dotenv import load_dotenv
|
|
@@ -31,6 +31,60 @@ if PROJECT_ROOT not in sys.path:
|
|
|
31
31
|
sys.path.insert(0, PROJECT_ROOT)
|
|
32
32
|
list_app = typer.Typer(help="List resources")
|
|
33
33
|
app.add_typer(list_app, name="list")
|
|
34
|
+
metrics_app = typer.Typer(help="Metrics utilities")
|
|
35
|
+
app.add_typer(metrics_app, name="metrics")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@metrics_app.command("list")
|
|
39
|
+
def list_metrics(
|
|
40
|
+
limit: int = typer.Option(
|
|
41
|
+
20,
|
|
42
|
+
"--limit",
|
|
43
|
+
help="Number of testcases to inspect for metrics",
|
|
44
|
+
),
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
List unique metric names found in evaluation testcases.
|
|
48
|
+
"""
|
|
49
|
+
client = DeepEvalClient()
|
|
50
|
+
|
|
51
|
+
testcases = client.list_evaluation_testcases(
|
|
52
|
+
limit=limit,
|
|
53
|
+
offset=0,
|
|
54
|
+
raise_exception=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if not testcases:
|
|
58
|
+
typer.echo("No evaluation testcases found.")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
metric_names: set[str] = set()
|
|
62
|
+
|
|
63
|
+
def collect_metrics(entries: Sequence[Dict] | None):
|
|
64
|
+
if not entries:
|
|
65
|
+
return
|
|
66
|
+
for entry in entries:
|
|
67
|
+
for metric in entry.get("metrics", []) or []:
|
|
68
|
+
name = metric.get("name")
|
|
69
|
+
if name:
|
|
70
|
+
metric_names.add(name)
|
|
71
|
+
|
|
72
|
+
for tc in testcases:
|
|
73
|
+
|
|
74
|
+
collect_metrics(tc.get("result"))
|
|
75
|
+
|
|
76
|
+
if not metric_names:
|
|
77
|
+
typer.echo("No metrics found.")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
typer.echo(
|
|
81
|
+
f"📊 Found {len(metric_names)} unique metrics "
|
|
82
|
+
f"(from latest {limit} testcases)\n"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
for name in sorted(metric_names):
|
|
86
|
+
typer.echo(f"- {name}")
|
|
87
|
+
|
|
34
88
|
|
|
35
89
|
@list_app.command("eval")
|
|
36
90
|
def list(
|
|
@@ -66,16 +120,10 @@ def list(
|
|
|
66
120
|
typer.echo(f"No @{TARGET_DECORATOR} functions found.")
|
|
67
121
|
|
|
68
122
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
123
|
@list_app.command("runs")
|
|
73
124
|
def list_runs(
|
|
74
125
|
limit: int = typer.Option(20, help="Max number of runs"),
|
|
75
126
|
offset: int = typer.Option(0, help="Pagination offset"),
|
|
76
|
-
status: Optional[str] = typer.Option(
|
|
77
|
-
None, help="Filter by status (running, completed, failed)"
|
|
78
|
-
),
|
|
79
127
|
):
|
|
80
128
|
"""
|
|
81
129
|
List evaluation runs (newest first).
|
|
@@ -92,19 +140,7 @@ def list_runs(
|
|
|
92
140
|
typer.echo("No evaluation runs found.")
|
|
93
141
|
return
|
|
94
142
|
|
|
95
|
-
|
|
96
|
-
if status:
|
|
97
|
-
runs = [
|
|
98
|
-
r for r in runs
|
|
99
|
-
if r.get("result", {}).get("status") == status
|
|
100
|
-
]
|
|
101
|
-
|
|
102
|
-
typer.echo(
|
|
103
|
-
f"[id] "
|
|
104
|
-
f"{'unique_id':<20}"
|
|
105
|
-
f"{'label':<20}"
|
|
106
|
-
f"created_at"
|
|
107
|
-
)
|
|
143
|
+
typer.echo(f"[id] " f"{'unique_id':<20}" f"{'label':<20}" f"created_at")
|
|
108
144
|
# pretty CLI output
|
|
109
145
|
for run in runs:
|
|
110
146
|
run_id = run.get("id")
|
|
@@ -120,12 +156,7 @@ def list_runs(
|
|
|
120
156
|
except ValueError:
|
|
121
157
|
pass
|
|
122
158
|
|
|
123
|
-
typer.echo(
|
|
124
|
-
f"[{run_id}] "
|
|
125
|
-
f"{uid:<20} "
|
|
126
|
-
f"{label:<20} "
|
|
127
|
-
f"{created_at}"
|
|
128
|
-
)
|
|
159
|
+
typer.echo(f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
|
|
129
160
|
|
|
130
161
|
|
|
131
162
|
@list_app.command("show")
|
|
@@ -345,8 +376,7 @@ def _print_and_save(
|
|
|
345
376
|
return
|
|
346
377
|
|
|
347
378
|
if out.exists() and not overwrite:
|
|
348
|
-
typer.echo(
|
|
349
|
-
f"❌ File already exists: {out} (use --overwrite to replace)")
|
|
379
|
+
typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
|
|
350
380
|
raise typer.Exit(code=1)
|
|
351
381
|
|
|
352
382
|
out.parent.mkdir(parents=True, exist_ok=True)
|
rakam_eval_sdk/client.py
CHANGED
|
@@ -27,19 +27,19 @@ class DeepEvalClient:
|
|
|
27
27
|
settings_module: Optional[Any] = None, # optional external settings
|
|
28
28
|
timeout: int = 30,
|
|
29
29
|
):
|
|
30
|
-
settings_url = getattr(settings_module, "
|
|
30
|
+
settings_url = getattr(settings_module, "EVALFRAMEWORK_URL", None)
|
|
31
31
|
settings_token = getattr(settings_module, "EVALFRAMWORK_API_KEY", None)
|
|
32
32
|
|
|
33
33
|
raw_url = (
|
|
34
34
|
base_url
|
|
35
35
|
or settings_url
|
|
36
|
-
or os.getenv("
|
|
36
|
+
or os.getenv("EVALFRAMEWORK_URL")
|
|
37
37
|
or "http://localhost:8080"
|
|
38
38
|
)
|
|
39
39
|
self.base_url = raw_url.rstrip("/")
|
|
40
40
|
self.api_token = (
|
|
41
41
|
api_token or settings_token or os.getenv(
|
|
42
|
-
"
|
|
42
|
+
"EVALFRAMEWORK_API_KEY", "")
|
|
43
43
|
)
|
|
44
44
|
self.timeout = timeout
|
|
45
45
|
|
rakam_eval_sdk/schema.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: rakam-eval-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Evaluation Framework SDK
|
|
5
5
|
Author: Mohamed Bachar Touil
|
|
6
6
|
License: MIT
|
|
@@ -94,6 +94,7 @@ client = DeepEvalClient(
|
|
|
94
94
|
)
|
|
95
95
|
|
|
96
96
|
```
|
|
97
|
+
|
|
97
98
|
3. Schema Evaluation
|
|
98
99
|
|
|
99
100
|
```python
|
|
@@ -137,6 +138,7 @@ client = DeepEvalClient(
|
|
|
137
138
|
)
|
|
138
139
|
|
|
139
140
|
```
|
|
141
|
+
|
|
140
142
|
## Configuration
|
|
141
143
|
|
|
142
144
|
The client can be configured in multiple ways:
|
|
@@ -150,7 +152,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
|
|
|
150
152
|
### Environment variables
|
|
151
153
|
|
|
152
154
|
```bash
|
|
153
|
-
export
|
|
155
|
+
export EVALFRAMEWORK_URL=http://api
|
|
154
156
|
export EVALFRAMWORK_API_KEY=123
|
|
155
157
|
```
|
|
156
158
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
rakam_eval_sdk/cli.py,sha256=0Ym7C83xirGEf0YCh4-agwfjYPJ3yieeS02nXJ_iE-Y,14486
|
|
3
|
+
rakam_eval_sdk/client.py,sha256=XExVLDzk9Ed3zW5uezAxs5KLROiTODrdmXEcw0g7IE4,13080
|
|
4
|
+
rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
|
|
5
|
+
rakam_eval_sdk/schema.py,sha256=P4LlnaInXWTq-ve6qPTTxPyzmj3j_1gcqV9i7CYRYec,3608
|
|
6
|
+
rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
|
|
7
|
+
rakam_eval_sdk-0.2.1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
+
rakam_eval_sdk-0.2.1.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
+
rakam_eval_sdk-0.2.1.dist-info/METADATA,sha256=u-k-VOQD7BzOaWC0itLYUyQd-55ujT1IdJCma2Y4gf8,6019
|
|
10
|
+
rakam_eval_sdk-0.2.1.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rakam_eval_sdk/cli.py,sha256=WNpfbUJvoAt4IuH9ZKSrzq4bqHa0QZir5g0M9FkJmw8,13601
|
|
3
|
-
rakam_eval_sdk/client.py,sha256=4qUG8cLGqY8026s28uCHM3zFuGDzekLokZZDu7VRJ_8,13077
|
|
4
|
-
rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
|
|
5
|
-
rakam_eval_sdk/schema.py,sha256=ozNC56ygzR1G6UABjnqnJVAPVcF4rJMH1pUNH0a1K4M,3617
|
|
6
|
-
rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
|
|
7
|
-
rakam_eval_sdk-0.2.0rc2.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
-
rakam_eval_sdk-0.2.0rc2.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
-
rakam_eval_sdk-0.2.0rc2.dist-info/METADATA,sha256=IokdzEVaNyxqiXYAywxGAAolJA9JI1nIXQxSd7c-bEU,6019
|
|
10
|
-
rakam_eval_sdk-0.2.0rc2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|