rakam-eval-sdk 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rakam_eval_sdk/cli.py +58 -17
- rakam_eval_sdk/client.py +60 -49
- rakam_eval_sdk/schema.py +0 -2
- {rakam_eval_sdk-0.2.1.dist-info → rakam_eval_sdk-0.2.2.dist-info}/METADATA +1 -1
- rakam_eval_sdk-0.2.2.dist-info/RECORD +10 -0
- rakam_eval_sdk-0.2.1.dist-info/RECORD +0 -10
- {rakam_eval_sdk-0.2.1.dist-info → rakam_eval_sdk-0.2.2.dist-info}/WHEEL +0 -0
- {rakam_eval_sdk-0.2.1.dist-info → rakam_eval_sdk-0.2.2.dist-info}/entry_points.txt +0 -0
rakam_eval_sdk/cli.py
CHANGED
|
@@ -86,8 +86,8 @@ def list_metrics(
|
|
|
86
86
|
typer.echo(f"- {name}")
|
|
87
87
|
|
|
88
88
|
|
|
89
|
-
@list_app.command("
|
|
90
|
-
def
|
|
89
|
+
@list_app.command("evals")
|
|
90
|
+
def list_evals(
|
|
91
91
|
directory: Path = typer.Argument(
|
|
92
92
|
Path("./eval"),
|
|
93
93
|
exists=True,
|
|
@@ -140,12 +140,12 @@ def list_runs(
|
|
|
140
140
|
typer.echo("No evaluation runs found.")
|
|
141
141
|
return
|
|
142
142
|
|
|
143
|
-
typer.echo(f"[id] " f"{'
|
|
143
|
+
typer.echo(f"[id] " f"{'tag':<20}" f"{'label':<20}" f"created_at")
|
|
144
144
|
# pretty CLI output
|
|
145
145
|
for run in runs:
|
|
146
146
|
run_id = run.get("id")
|
|
147
147
|
label = run.get("label") or "-"
|
|
148
|
-
uid = run.get("
|
|
148
|
+
uid = run.get("tag") or "-"
|
|
149
149
|
created_at = run.get("created_at")
|
|
150
150
|
|
|
151
151
|
if created_at:
|
|
@@ -156,20 +156,21 @@ def list_runs(
|
|
|
156
156
|
except ValueError:
|
|
157
157
|
pass
|
|
158
158
|
|
|
159
|
-
typer.echo(
|
|
159
|
+
typer.echo(
|
|
160
|
+
f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
|
|
160
161
|
|
|
161
162
|
|
|
162
|
-
@
|
|
163
|
-
def
|
|
163
|
+
@app.command()
|
|
164
|
+
def show(
|
|
164
165
|
id: Optional[int] = typer.Option(
|
|
165
166
|
None,
|
|
166
167
|
"--id",
|
|
167
168
|
help="Numeric evaluation testcase ID",
|
|
168
169
|
),
|
|
169
|
-
|
|
170
|
+
tag: Optional[str] = typer.Option(
|
|
170
171
|
None,
|
|
171
|
-
"--
|
|
172
|
-
help="Evaluation testcase
|
|
172
|
+
"--tag",
|
|
173
|
+
help="Evaluation testcase tag",
|
|
173
174
|
),
|
|
174
175
|
raw: bool = typer.Option(
|
|
175
176
|
False,
|
|
@@ -178,12 +179,12 @@ def show_testcase(
|
|
|
178
179
|
),
|
|
179
180
|
):
|
|
180
181
|
"""
|
|
181
|
-
Show a single evaluation testcase by ID or
|
|
182
|
+
Show a single evaluation testcase by ID or tag.
|
|
182
183
|
"""
|
|
183
|
-
if not id and not
|
|
184
|
+
if not id and not tag:
|
|
184
185
|
raise typer.BadParameter("You must provide either --id or --uid")
|
|
185
186
|
|
|
186
|
-
if id and
|
|
187
|
+
if id and tag:
|
|
187
188
|
raise typer.BadParameter("Provide only one of --id or --uid")
|
|
188
189
|
|
|
189
190
|
client = DeepEvalClient()
|
|
@@ -192,8 +193,8 @@ def show_testcase(
|
|
|
192
193
|
result = client.get_evaluation_testcase_by_id(id)
|
|
193
194
|
identifier = f"id={id}"
|
|
194
195
|
else:
|
|
195
|
-
result = client.
|
|
196
|
-
identifier = f"
|
|
196
|
+
result = client.get_evaluation_testcase_by_tag(tag)
|
|
197
|
+
identifier = f"tag={tag}"
|
|
197
198
|
|
|
198
199
|
if not result:
|
|
199
200
|
console.print(
|
|
@@ -376,7 +377,8 @@ def _print_and_save(
|
|
|
376
377
|
return
|
|
377
378
|
|
|
378
379
|
if out.exists() and not overwrite:
|
|
379
|
-
typer.echo(
|
|
380
|
+
typer.echo(
|
|
381
|
+
f"❌ File already exists: {out} (use --overwrite to replace)")
|
|
380
382
|
raise typer.Exit(code=1)
|
|
381
383
|
|
|
382
384
|
out.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -388,7 +390,7 @@ def _print_and_save(
|
|
|
388
390
|
|
|
389
391
|
|
|
390
392
|
@app.command()
|
|
391
|
-
def
|
|
393
|
+
def compare(
|
|
392
394
|
testcase_a_id: int = typer.Argument(
|
|
393
395
|
...,
|
|
394
396
|
help="ID of the first testcase",
|
|
@@ -549,6 +551,45 @@ def compare_last(
|
|
|
549
551
|
_print_and_save(resp, pretty, out, overwrite)
|
|
550
552
|
|
|
551
553
|
|
|
554
|
+
@list_app.command("tag")
|
|
555
|
+
def update_run_tag(
|
|
556
|
+
run_id: int = typer.Argument(..., help="Evaluation run ID"),
|
|
557
|
+
tag: Optional[str] = typer.Option(
|
|
558
|
+
None,
|
|
559
|
+
"--tag",
|
|
560
|
+
"-t",
|
|
561
|
+
help="Tag to add or update",
|
|
562
|
+
),
|
|
563
|
+
remove: bool = typer.Option(
|
|
564
|
+
False,
|
|
565
|
+
"--remove",
|
|
566
|
+
help="Remove tag from the run",
|
|
567
|
+
),
|
|
568
|
+
):
|
|
569
|
+
"""
|
|
570
|
+
Add, update, or remove a tag from an evaluation run.
|
|
571
|
+
"""
|
|
572
|
+
if not tag and not remove:
|
|
573
|
+
typer.echo("❌ You must provide --tag or --remove")
|
|
574
|
+
raise typer.Exit(code=1)
|
|
575
|
+
|
|
576
|
+
if tag and remove:
|
|
577
|
+
typer.echo("❌ Use either --tag or --remove, not both")
|
|
578
|
+
raise typer.Exit(code=1)
|
|
579
|
+
|
|
580
|
+
client = DeepEvalClient()
|
|
581
|
+
|
|
582
|
+
result = client.update_evaluation_testcase_tag(
|
|
583
|
+
testcase_id=run_id,
|
|
584
|
+
tag=None if remove else tag,
|
|
585
|
+
raise_exception=True,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
action = "removed" if remove else "updated"
|
|
589
|
+
typer.echo(f"✅ Tag {action} successfully")
|
|
590
|
+
typer.echo(f"Run ID: {run_id}")
|
|
591
|
+
typer.echo(f"Tag: {result.get('tag') or '-'}")
|
|
592
|
+
|
|
552
593
|
def main() -> None:
|
|
553
594
|
app()
|
|
554
595
|
|
rakam_eval_sdk/client.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import Optional, Dict
|
|
1
2
|
import os
|
|
2
3
|
import random
|
|
3
4
|
from typing import Any, Dict, List, Optional, Union, cast, overload
|
|
@@ -12,6 +13,9 @@ from .schema import (
|
|
|
12
13
|
SchemaMetricConfig,
|
|
13
14
|
TextInputItem,
|
|
14
15
|
)
|
|
16
|
+
from typing import Optional, Literal, cast
|
|
17
|
+
|
|
18
|
+
HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class DeepEvalClient:
|
|
@@ -45,53 +49,29 @@ class DeepEvalClient:
|
|
|
45
49
|
|
|
46
50
|
def _request(
|
|
47
51
|
self,
|
|
52
|
+
method: HTTPMethod,
|
|
48
53
|
endpoint: str,
|
|
49
|
-
|
|
54
|
+
*,
|
|
55
|
+
json: dict | None = None,
|
|
56
|
+
params: dict | None = None,
|
|
50
57
|
raise_exception: bool = False,
|
|
51
58
|
) -> Optional[dict]:
|
|
52
|
-
"""Internal helper to send POST requests with standard headers and error handling."""
|
|
53
59
|
url = f"{self.base_url}{endpoint}"
|
|
54
|
-
headers = {
|
|
55
|
-
"accept": "application/json",
|
|
56
|
-
"Content-Type": "application/json",
|
|
57
|
-
"X-API-Token": self.api_token,
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
try:
|
|
61
|
-
resp = requests.post(
|
|
62
|
-
url, headers=headers, json=payload, timeout=self.timeout
|
|
63
|
-
)
|
|
64
|
-
if raise_exception:
|
|
65
|
-
resp.raise_for_status()
|
|
66
|
-
except requests.RequestException as e:
|
|
67
|
-
if raise_exception:
|
|
68
|
-
raise
|
|
69
|
-
return {"error": str(e)}
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
return cast(dict, resp.json())
|
|
73
|
-
except ValueError:
|
|
74
|
-
if raise_exception:
|
|
75
|
-
raise
|
|
76
|
-
return {"error": "Invalid JSON response", "raw": resp.text}
|
|
77
60
|
|
|
78
|
-
def _get(
|
|
79
|
-
self,
|
|
80
|
-
endpoint: str,
|
|
81
|
-
params: dict,
|
|
82
|
-
raise_exception: bool = False,
|
|
83
|
-
) -> Optional[dict]:
|
|
84
|
-
"""Internal helper to send GET requests with standard headers and error handling."""
|
|
85
|
-
url = f"{self.base_url}{endpoint}"
|
|
86
61
|
headers = {
|
|
87
62
|
"accept": "application/json",
|
|
88
63
|
"X-API-Token": self.api_token,
|
|
89
64
|
}
|
|
90
65
|
|
|
66
|
+
if json is not None:
|
|
67
|
+
headers["Content-Type"] = "application/json"
|
|
68
|
+
|
|
91
69
|
try:
|
|
92
|
-
resp = requests.
|
|
93
|
-
|
|
70
|
+
resp = requests.request(
|
|
71
|
+
method=method,
|
|
72
|
+
url=url,
|
|
94
73
|
headers=headers,
|
|
74
|
+
json=json,
|
|
95
75
|
params=params,
|
|
96
76
|
timeout=self.timeout,
|
|
97
77
|
)
|
|
@@ -107,7 +87,38 @@ class DeepEvalClient:
|
|
|
107
87
|
except ValueError:
|
|
108
88
|
if raise_exception:
|
|
109
89
|
raise
|
|
110
|
-
return {
|
|
90
|
+
return {
|
|
91
|
+
"error": "Invalid JSON response",
|
|
92
|
+
"raw": resp.text,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
def _get(self, endpoint: str, params: dict, **kw):
|
|
96
|
+
return self._request("GET", endpoint, params=params, **kw)
|
|
97
|
+
|
|
98
|
+
def _post(self, endpoint: str, payload: dict, **kw):
|
|
99
|
+
return self._request("POST", endpoint, json=payload, **kw)
|
|
100
|
+
|
|
101
|
+
def _patch(self, endpoint: str, payload: dict, **kw):
|
|
102
|
+
return self._request("PATCH", endpoint, json=payload, **kw)
|
|
103
|
+
|
|
104
|
+
def update_evaluation_testcase_tag(
|
|
105
|
+
self,
|
|
106
|
+
*,
|
|
107
|
+
testcase_id: int,
|
|
108
|
+
tag: Optional[str],
|
|
109
|
+
raise_exception: bool = False,
|
|
110
|
+
) -> Optional[Dict]:
|
|
111
|
+
"""
|
|
112
|
+
Add, update, or remove a tag from an evaluation testcase.
|
|
113
|
+
|
|
114
|
+
- tag="smoke" → add / update tag
|
|
115
|
+
- tag=None → remove tag
|
|
116
|
+
"""
|
|
117
|
+
return self._patch(
|
|
118
|
+
f"/evaluation-testcases/{testcase_id}/tag",
|
|
119
|
+
payload={"tag": tag},
|
|
120
|
+
raise_exception=raise_exception,
|
|
121
|
+
)
|
|
111
122
|
|
|
112
123
|
def list_evaluation_testcases(
|
|
113
124
|
self,
|
|
@@ -144,17 +155,17 @@ class DeepEvalClient:
|
|
|
144
155
|
raise_exception=raise_exception,
|
|
145
156
|
)
|
|
146
157
|
|
|
147
|
-
def
|
|
158
|
+
def get_evaluation_testcase_by_tag(
|
|
148
159
|
self,
|
|
149
|
-
|
|
160
|
+
tag: str,
|
|
150
161
|
*,
|
|
151
162
|
raise_exception: bool = False,
|
|
152
163
|
) -> Optional[Dict]:
|
|
153
164
|
"""
|
|
154
|
-
Fetch a single evaluation testcase by
|
|
165
|
+
Fetch a single evaluation testcase by tag.
|
|
155
166
|
"""
|
|
156
167
|
return self._get(
|
|
157
|
-
f"/eval-framework/deepeval/
|
|
168
|
+
f"/eval-framework/deepeval/tag/{tag}",
|
|
158
169
|
params={},
|
|
159
170
|
raise_exception=raise_exception,
|
|
160
171
|
)
|
|
@@ -163,18 +174,18 @@ class DeepEvalClient:
|
|
|
163
174
|
self,
|
|
164
175
|
*,
|
|
165
176
|
id: Optional[int] = None,
|
|
166
|
-
|
|
177
|
+
tag: Optional[str] = None,
|
|
167
178
|
raise_exception: bool = False,
|
|
168
179
|
) -> Optional[Dict]:
|
|
169
180
|
if id is not None:
|
|
170
181
|
return self.get_evaluation_testcase_by_id(
|
|
171
182
|
id, raise_exception=raise_exception
|
|
172
183
|
)
|
|
173
|
-
if
|
|
174
|
-
return self.
|
|
175
|
-
|
|
184
|
+
if tag is not None:
|
|
185
|
+
return self.get_evaluation_testcase_by_tag(
|
|
186
|
+
tag, raise_exception=raise_exception
|
|
176
187
|
)
|
|
177
|
-
raise ValueError("Either id or
|
|
188
|
+
raise ValueError("Either id or tag must be provided")
|
|
178
189
|
|
|
179
190
|
def compare_testcases(
|
|
180
191
|
self,
|
|
@@ -268,7 +279,7 @@ class DeepEvalClient:
|
|
|
268
279
|
label=label,
|
|
269
280
|
)
|
|
270
281
|
|
|
271
|
-
return self.
|
|
282
|
+
return self._post(
|
|
272
283
|
"/deepeval/text-eval", config.model_dump(), raise_exception
|
|
273
284
|
)
|
|
274
285
|
|
|
@@ -284,7 +295,7 @@ class DeepEvalClient:
|
|
|
284
295
|
payload = EvalConfig.model_construct(
|
|
285
296
|
data=data, metrics=metrics, component=component, version=label
|
|
286
297
|
).model_dump()
|
|
287
|
-
return self.
|
|
298
|
+
return self._post("/deepeval/text-eval/background", payload, raise_exception)
|
|
288
299
|
|
|
289
300
|
@overload
|
|
290
301
|
def schema_eval(
|
|
@@ -328,7 +339,7 @@ class DeepEvalClient:
|
|
|
328
339
|
label=label,
|
|
329
340
|
)
|
|
330
341
|
|
|
331
|
-
return self.
|
|
342
|
+
return self._post(
|
|
332
343
|
"/deepeval/schema-eval",
|
|
333
344
|
config.model_dump(),
|
|
334
345
|
raise_exception,
|
|
@@ -346,7 +357,7 @@ class DeepEvalClient:
|
|
|
346
357
|
payload = SchemaEvalConfig.model_construct(
|
|
347
358
|
data=data, metrics=metrics, component=component, version=label
|
|
348
359
|
).model_dump()
|
|
349
|
-
return self.
|
|
360
|
+
return self._post(
|
|
350
361
|
"/deepeval/schema-eval/background", payload, raise_exception
|
|
351
362
|
)
|
|
352
363
|
|
rakam_eval_sdk/schema.py
CHANGED
|
@@ -117,7 +117,6 @@ class SchemaInputItem(InputItem):
|
|
|
117
117
|
|
|
118
118
|
class EvalConfig(BaseModel):
|
|
119
119
|
__eval_config__ = "text_eval"
|
|
120
|
-
unique_id: Union[str, None] = None
|
|
121
120
|
component: str = "unknown"
|
|
122
121
|
label: Union[str, None] = None
|
|
123
122
|
data: List[TextInputItem]
|
|
@@ -127,7 +126,6 @@ class EvalConfig(BaseModel):
|
|
|
127
126
|
class SchemaEvalConfig(BaseModel):
|
|
128
127
|
__eval_config__ = "schema_eval"
|
|
129
128
|
component: str = "unknown"
|
|
130
|
-
unique_id: Union[str, None] = None
|
|
131
129
|
label: Union[str, None] = None
|
|
132
130
|
data: List[SchemaInputItem]
|
|
133
131
|
metrics: List[SchemaMetricConfig] = Field(default_factory=list)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
rakam_eval_sdk/cli.py,sha256=LQBaTJGpXGRMNPkkqw_NzK4d1dbF8p4A4_8SN_XALgI,15500
|
|
3
|
+
rakam_eval_sdk/client.py,sha256=e15d7ycm8G7oJuX_Ie9cB39TK33taMjFvVwc_4kLMww,13255
|
|
4
|
+
rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
|
|
5
|
+
rakam_eval_sdk/schema.py,sha256=woXu5GT1wuP_d7G11idopwpGmk0nV1SrxGShgWk-RA8,3530
|
|
6
|
+
rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
|
|
7
|
+
rakam_eval_sdk-0.2.2.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
+
rakam_eval_sdk-0.2.2.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
+
rakam_eval_sdk-0.2.2.dist-info/METADATA,sha256=2KBfQDA4cIq634ZFZ0z--FtOtty_PG_T8Pgh8a9CLJk,6019
|
|
10
|
+
rakam_eval_sdk-0.2.2.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
rakam_eval_sdk/cli.py,sha256=0Ym7C83xirGEf0YCh4-agwfjYPJ3yieeS02nXJ_iE-Y,14486
|
|
3
|
-
rakam_eval_sdk/client.py,sha256=XExVLDzk9Ed3zW5uezAxs5KLROiTODrdmXEcw0g7IE4,13080
|
|
4
|
-
rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
|
|
5
|
-
rakam_eval_sdk/schema.py,sha256=P4LlnaInXWTq-ve6qPTTxPyzmj3j_1gcqV9i7CYRYec,3608
|
|
6
|
-
rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
|
|
7
|
-
rakam_eval_sdk-0.2.1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
-
rakam_eval_sdk-0.2.1.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
|
|
9
|
-
rakam_eval_sdk-0.2.1.dist-info/METADATA,sha256=u-k-VOQD7BzOaWC0itLYUyQd-55ujT1IdJCma2Y4gf8,6019
|
|
10
|
-
rakam_eval_sdk-0.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|