rakam-eval-sdk 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rakam-eval-sdk
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Evaluation Framework SDK
5
5
  Author: Mohamed Bachar Touil
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "rakam-eval-sdk"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  description = "Evaluation Framework SDK"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -86,8 +86,8 @@ def list_metrics(
86
86
  typer.echo(f"- {name}")
87
87
 
88
88
 
89
- @list_app.command("eval")
90
- def list(
89
+ @list_app.command("evals")
90
+ def list_evals(
91
91
  directory: Path = typer.Argument(
92
92
  Path("./eval"),
93
93
  exists=True,
@@ -140,12 +140,12 @@ def list_runs(
140
140
  typer.echo("No evaluation runs found.")
141
141
  return
142
142
 
143
- typer.echo(f"[id] " f"{'unique_id':<20}" f"{'label':<20}" f"created_at")
143
+ typer.echo(f"[id] " f"{'tag':<20}" f"{'label':<20}" f"created_at")
144
144
  # pretty CLI output
145
145
  for run in runs:
146
146
  run_id = run.get("id")
147
147
  label = run.get("label") or "-"
148
- uid = run.get("unique_id") or "-"
148
+ uid = run.get("tag") or "-"
149
149
  created_at = run.get("created_at")
150
150
 
151
151
  if created_at:
@@ -156,20 +156,21 @@ def list_runs(
156
156
  except ValueError:
157
157
  pass
158
158
 
159
- typer.echo(f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
159
+ typer.echo(
160
+ f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
160
161
 
161
162
 
162
- @list_app.command("show")
163
- def show_testcase(
163
+ @app.command()
164
+ def show(
164
165
  id: Optional[int] = typer.Option(
165
166
  None,
166
167
  "--id",
167
168
  help="Numeric evaluation testcase ID",
168
169
  ),
169
- uid: Optional[str] = typer.Option(
170
+ tag: Optional[str] = typer.Option(
170
171
  None,
171
- "--uid",
172
- help="Evaluation testcase unique_id",
172
+ "--tag",
173
+ help="Evaluation testcase tag",
173
174
  ),
174
175
  raw: bool = typer.Option(
175
176
  False,
@@ -178,12 +179,12 @@ def show_testcase(
178
179
  ),
179
180
  ):
180
181
  """
181
- Show a single evaluation testcase by ID or unique_id.
182
+ Show a single evaluation testcase by ID or tag.
182
183
  """
183
- if not id and not uid:
184
+ if not id and not tag:
184
185
  raise typer.BadParameter("You must provide either --id or --uid")
185
186
 
186
- if id and uid:
187
+ if id and tag:
187
188
  raise typer.BadParameter("Provide only one of --id or --uid")
188
189
 
189
190
  client = DeepEvalClient()
@@ -192,8 +193,8 @@ def show_testcase(
192
193
  result = client.get_evaluation_testcase_by_id(id)
193
194
  identifier = f"id={id}"
194
195
  else:
195
- result = client.get_evaluation_testcase_by_unique_id(uid)
196
- identifier = f"unique_id={uid}"
196
+ result = client.get_evaluation_testcase_by_tag(tag)
197
+ identifier = f"tag={tag}"
197
198
 
198
199
  if not result:
199
200
  console.print(
@@ -376,7 +377,8 @@ def _print_and_save(
376
377
  return
377
378
 
378
379
  if out.exists() and not overwrite:
379
- typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
380
+ typer.echo(
381
+ f"❌ File already exists: {out} (use --overwrite to replace)")
380
382
  raise typer.Exit(code=1)
381
383
 
382
384
  out.parent.mkdir(parents=True, exist_ok=True)
@@ -388,7 +390,7 @@ def _print_and_save(
388
390
 
389
391
 
390
392
  @app.command()
391
- def compare_testcases(
393
+ def compare(
392
394
  testcase_a_id: int = typer.Argument(
393
395
  ...,
394
396
  help="ID of the first testcase",
@@ -549,6 +551,45 @@ def compare_last(
549
551
  _print_and_save(resp, pretty, out, overwrite)
550
552
 
551
553
 
554
+ @list_app.command("tag")
555
+ def update_run_tag(
556
+ run_id: int = typer.Argument(..., help="Evaluation run ID"),
557
+ tag: Optional[str] = typer.Option(
558
+ None,
559
+ "--tag",
560
+ "-t",
561
+ help="Tag to add or update",
562
+ ),
563
+ remove: bool = typer.Option(
564
+ False,
565
+ "--remove",
566
+ help="Remove tag from the run",
567
+ ),
568
+ ):
569
+ """
570
+ Add, update, or remove a tag from an evaluation run.
571
+ """
572
+ if not tag and not remove:
573
+ typer.echo("❌ You must provide --tag or --remove")
574
+ raise typer.Exit(code=1)
575
+
576
+ if tag and remove:
577
+ typer.echo("❌ Use either --tag or --remove, not both")
578
+ raise typer.Exit(code=1)
579
+
580
+ client = DeepEvalClient()
581
+
582
+ result = client.update_evaluation_testcase_tag(
583
+ testcase_id=run_id,
584
+ tag=None if remove else tag,
585
+ raise_exception=True,
586
+ )
587
+
588
+ action = "removed" if remove else "updated"
589
+ typer.echo(f"✅ Tag {action} successfully")
590
+ typer.echo(f"Run ID: {run_id}")
591
+ typer.echo(f"Tag: {result.get('tag') or '-'}")
592
+
552
593
  def main() -> None:
553
594
  app()
554
595
 
@@ -1,3 +1,4 @@
1
+ from typing import Optional, Dict
1
2
  import os
2
3
  import random
3
4
  from typing import Any, Dict, List, Optional, Union, cast, overload
@@ -12,6 +13,9 @@ from .schema import (
12
13
  SchemaMetricConfig,
13
14
  TextInputItem,
14
15
  )
16
+ from typing import Optional, Literal, cast
17
+
18
+ HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
15
19
 
16
20
 
17
21
  class DeepEvalClient:
@@ -45,53 +49,29 @@ class DeepEvalClient:
45
49
 
46
50
  def _request(
47
51
  self,
52
+ method: HTTPMethod,
48
53
  endpoint: str,
49
- payload: dict,
54
+ *,
55
+ json: dict | None = None,
56
+ params: dict | None = None,
50
57
  raise_exception: bool = False,
51
58
  ) -> Optional[dict]:
52
- """Internal helper to send POST requests with standard headers and error handling."""
53
59
  url = f"{self.base_url}{endpoint}"
54
- headers = {
55
- "accept": "application/json",
56
- "Content-Type": "application/json",
57
- "X-API-Token": self.api_token,
58
- }
59
-
60
- try:
61
- resp = requests.post(
62
- url, headers=headers, json=payload, timeout=self.timeout
63
- )
64
- if raise_exception:
65
- resp.raise_for_status()
66
- except requests.RequestException as e:
67
- if raise_exception:
68
- raise
69
- return {"error": str(e)}
70
-
71
- try:
72
- return cast(dict, resp.json())
73
- except ValueError:
74
- if raise_exception:
75
- raise
76
- return {"error": "Invalid JSON response", "raw": resp.text}
77
60
 
78
- def _get(
79
- self,
80
- endpoint: str,
81
- params: dict,
82
- raise_exception: bool = False,
83
- ) -> Optional[dict]:
84
- """Internal helper to send GET requests with standard headers and error handling."""
85
- url = f"{self.base_url}{endpoint}"
86
61
  headers = {
87
62
  "accept": "application/json",
88
63
  "X-API-Token": self.api_token,
89
64
  }
90
65
 
66
+ if json is not None:
67
+ headers["Content-Type"] = "application/json"
68
+
91
69
  try:
92
- resp = requests.get(
93
- url,
70
+ resp = requests.request(
71
+ method=method,
72
+ url=url,
94
73
  headers=headers,
74
+ json=json,
95
75
  params=params,
96
76
  timeout=self.timeout,
97
77
  )
@@ -107,7 +87,38 @@ class DeepEvalClient:
107
87
  except ValueError:
108
88
  if raise_exception:
109
89
  raise
110
- return {"error": "Invalid JSON response", "raw": resp.text}
90
+ return {
91
+ "error": "Invalid JSON response",
92
+ "raw": resp.text,
93
+ }
94
+
95
+ def _get(self, endpoint: str, params: dict, **kw):
96
+ return self._request("GET", endpoint, params=params, **kw)
97
+
98
+ def _post(self, endpoint: str, payload: dict, **kw):
99
+ return self._request("POST", endpoint, json=payload, **kw)
100
+
101
+ def _patch(self, endpoint: str, payload: dict, **kw):
102
+ return self._request("PATCH", endpoint, json=payload, **kw)
103
+
104
+ def update_evaluation_testcase_tag(
105
+ self,
106
+ *,
107
+ testcase_id: int,
108
+ tag: Optional[str],
109
+ raise_exception: bool = False,
110
+ ) -> Optional[Dict]:
111
+ """
112
+ Add, update, or remove a tag from an evaluation testcase.
113
+
114
+ - tag="smoke" → add / update tag
115
+ - tag=None → remove tag
116
+ """
117
+ return self._patch(
118
+ f"/evaluation-testcases/{testcase_id}/tag",
119
+ payload={"tag": tag},
120
+ raise_exception=raise_exception,
121
+ )
111
122
 
112
123
  def list_evaluation_testcases(
113
124
  self,
@@ -144,17 +155,17 @@ class DeepEvalClient:
144
155
  raise_exception=raise_exception,
145
156
  )
146
157
 
147
- def get_evaluation_testcase_by_unique_id(
158
+ def get_evaluation_testcase_by_tag(
148
159
  self,
149
- unique_id: str,
160
+ tag: str,
150
161
  *,
151
162
  raise_exception: bool = False,
152
163
  ) -> Optional[Dict]:
153
164
  """
154
- Fetch a single evaluation testcase by unique_id.
165
+ Fetch a single evaluation testcase by tag.
155
166
  """
156
167
  return self._get(
157
- f"/eval-framework/deepeval/uid/{unique_id}",
168
+ f"/eval-framework/deepeval/tag/{tag}",
158
169
  params={},
159
170
  raise_exception=raise_exception,
160
171
  )
@@ -163,18 +174,18 @@ class DeepEvalClient:
163
174
  self,
164
175
  *,
165
176
  id: Optional[int] = None,
166
- unique_id: Optional[str] = None,
177
+ tag: Optional[str] = None,
167
178
  raise_exception: bool = False,
168
179
  ) -> Optional[Dict]:
169
180
  if id is not None:
170
181
  return self.get_evaluation_testcase_by_id(
171
182
  id, raise_exception=raise_exception
172
183
  )
173
- if unique_id is not None:
174
- return self.get_evaluation_testcase_by_unique_id(
175
- unique_id, raise_exception=raise_exception
184
+ if tag is not None:
185
+ return self.get_evaluation_testcase_by_tag(
186
+ tag, raise_exception=raise_exception
176
187
  )
177
- raise ValueError("Either id or unique_id must be provided")
188
+ raise ValueError("Either id or tag must be provided")
178
189
 
179
190
  def compare_testcases(
180
191
  self,
@@ -268,7 +279,7 @@ class DeepEvalClient:
268
279
  label=label,
269
280
  )
270
281
 
271
- return self._request(
282
+ return self._post(
272
283
  "/deepeval/text-eval", config.model_dump(), raise_exception
273
284
  )
274
285
 
@@ -284,7 +295,7 @@ class DeepEvalClient:
284
295
  payload = EvalConfig.model_construct(
285
296
  data=data, metrics=metrics, component=component, version=label
286
297
  ).model_dump()
287
- return self._request("/deepeval/text-eval/background", payload, raise_exception)
298
+ return self._post("/deepeval/text-eval/background", payload, raise_exception)
288
299
 
289
300
  @overload
290
301
  def schema_eval(
@@ -328,7 +339,7 @@ class DeepEvalClient:
328
339
  label=label,
329
340
  )
330
341
 
331
- return self._request(
342
+ return self._post(
332
343
  "/deepeval/schema-eval",
333
344
  config.model_dump(),
334
345
  raise_exception,
@@ -346,7 +357,7 @@ class DeepEvalClient:
346
357
  payload = SchemaEvalConfig.model_construct(
347
358
  data=data, metrics=metrics, component=component, version=label
348
359
  ).model_dump()
349
- return self._request(
360
+ return self._post(
350
361
  "/deepeval/schema-eval/background", payload, raise_exception
351
362
  )
352
363
 
@@ -117,7 +117,6 @@ class SchemaInputItem(InputItem):
117
117
 
118
118
  class EvalConfig(BaseModel):
119
119
  __eval_config__ = "text_eval"
120
- unique_id: Union[str, None] = None
121
120
  component: str = "unknown"
122
121
  label: Union[str, None] = None
123
122
  data: List[TextInputItem]
@@ -127,7 +126,6 @@ class EvalConfig(BaseModel):
127
126
  class SchemaEvalConfig(BaseModel):
128
127
  __eval_config__ = "schema_eval"
129
128
  component: str = "unknown"
130
- unique_id: Union[str, None] = None
131
129
  label: Union[str, None] = None
132
130
  data: List[SchemaInputItem]
133
131
  metrics: List[SchemaMetricConfig] = Field(default_factory=list)
File without changes