rakam-eval-sdk 0.2.4__py3-none-any.whl → 0.2.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rakam_eval_sdk/cli.py CHANGED
@@ -6,7 +6,7 @@ import uuid
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
  from pprint import pprint
9
- from typing import Any, Dict, Optional
9
+ from typing import Any, Dict, List, Optional, Tuple, Union
10
10
 
11
11
  import typer
12
12
  from dotenv import load_dotenv
@@ -37,14 +37,14 @@ metrics_app = typer.Typer(help="Metrics utilities")
37
37
  app.add_typer(metrics_app, name="metrics")
38
38
 
39
39
 
40
- def extract_metric_names(config: Any) -> list[tuple[str, str | None]]:
40
+ def extract_metric_names(config: Any) -> list[tuple[str, Optional[str]]]:
41
41
  """
42
42
  Returns [(type, name)] from EvalConfig / SchemaEvalConfig
43
43
  """
44
44
  if not hasattr(config, "metrics"):
45
45
  return []
46
46
 
47
- results: list[tuple[str, str | None]] = []
47
+ results: list[tuple[str, Optional[str]]] = []
48
48
 
49
49
  for metric in config.metrics or []:
50
50
  metric_type = getattr(metric, "type", None)
@@ -77,7 +77,7 @@ def metrics(
77
77
  files = directory.rglob("*.py") if recursive else directory.glob("*.py")
78
78
  TARGET_DECORATOR = eval_run.__name__
79
79
 
80
- all_metrics: set[tuple[str, str | None]] = set()
80
+ all_metrics: set[tuple[str, Optional[str]]] = set()
81
81
  found_any = False
82
82
 
83
83
  for file in sorted(files):
@@ -159,7 +159,7 @@ def list_evals(
159
159
  def list_runs(
160
160
  limit: int = typer.Option(20, help="Max number of runs"),
161
161
  offset: int = typer.Option(0, help="Pagination offset"),
162
- ):
162
+ ) -> None:
163
163
  """
164
164
  List evaluation runs (newest first).
165
165
  """
@@ -170,7 +170,7 @@ def list_runs(
170
170
  offset=offset,
171
171
  raise_exception=True,
172
172
  )
173
-
173
+ assert response is not None
174
174
  items = response.get("items", [])
175
175
  total = response.get("total", 0)
176
176
 
@@ -219,15 +219,15 @@ def show(
219
219
  "--raw",
220
220
  help="Print raw JSON instead of formatted output",
221
221
  ),
222
- ):
222
+ ) -> None:
223
223
  """
224
224
  Show a single evaluation testcase by ID or tag.
225
225
  """
226
226
  if not id and not tag:
227
- raise typer.BadParameter("You must provide either --id or --uid")
227
+ raise typer.BadParameter("You must provide either --id or --tag")
228
228
 
229
229
  if id and tag:
230
- raise typer.BadParameter("Provide only one of --id or --uid")
230
+ raise typer.BadParameter("Provide only one of --id or --tag")
231
231
 
232
232
  client = DeepEvalClient()
233
233
 
@@ -235,6 +235,7 @@ def show(
235
235
  result = client.get_evaluation_testcase_by_id(id)
236
236
  identifier = f"id={id}"
237
237
  else:
238
+ assert tag is not None
238
239
  result = client.get_evaluation_testcase_by_tag(tag)
239
240
  identifier = f"tag={tag}"
240
241
 
@@ -406,7 +407,7 @@ def run(
406
407
  def _print_and_save(
407
408
  resp: dict,
408
409
  pretty: bool,
409
- out: Path | None,
410
+ out: Optional[Path],
410
411
  overwrite: bool,
411
412
  ) -> None:
412
413
  if pretty:
@@ -430,13 +431,13 @@ def _print_and_save(
430
431
  typer.echo(f"💾 Result saved to {out}")
431
432
 
432
433
 
433
- def pct_change(a: float | None, b: float | None) -> str | None:
434
+ def pct_change(a: Optional[float], b: Optional[float]) -> Optional[str]:
434
435
  if a is None or b is None or a == 0:
435
436
  return None
436
437
  return f"{((b - a) / a) * 100:+.2f}%"
437
438
 
438
439
 
439
- def metric_direction(delta: float | None) -> str:
440
+ def metric_direction(delta: Optional[float]) -> str:
440
441
  if delta is None:
441
442
  return "unchanged"
442
443
  if delta > 0:
@@ -446,7 +447,7 @@ def metric_direction(delta: float | None) -> str:
446
447
  return "unchanged"
447
448
 
448
449
 
449
- def print_metric_diff(diff: MetricDiff):
450
+ def print_metric_diff(diff: MetricDiff) -> None:
450
451
  secho(f"\nMetric: {diff.metric}", bold=True)
451
452
 
452
453
  if diff.status == "added":
@@ -507,7 +508,7 @@ def summarize(metrics: Any) -> Dict[str, int]:
507
508
  return summary
508
509
 
509
510
 
510
- def pretty_print_comparison(resp: Any, summary_only: bool = False):
511
+ def pretty_print_comparison(resp: Any, summary_only: bool = False) -> None:
511
512
  if not summary_only:
512
513
  for metric in resp.metrics:
513
514
  print_metric_diff(metric)
@@ -516,7 +517,7 @@ def pretty_print_comparison(resp: Any, summary_only: bool = False):
516
517
  print_summary(resp.metrics)
517
518
 
518
519
 
519
- def print_summary(metrics: Any):
520
+ def print_summary(metrics: Any) -> None:
520
521
  summary = summarize(metrics)
521
522
 
522
523
  secho("\nSummary:", bold=True)
@@ -554,7 +555,7 @@ def compare(
554
555
  "--raise",
555
556
  help="Raise HTTP exceptions instead of swallowing them",
556
557
  ),
557
- out: Path | None = typer.Option(
558
+ out: Optional[Path] = typer.Option(
558
559
  None,
559
560
  "-o",
560
561
  "--out",
@@ -575,7 +576,7 @@ def compare(
575
576
  - Summary of improvements / regressions
576
577
  """
577
578
 
578
- targets: list[tuple[str, str | int]] = []
579
+ targets: List[Tuple[str, Union[str, int]]] = []
579
580
 
580
581
  for r in run:
581
582
  targets.append(("run", r))
@@ -600,7 +601,7 @@ def compare(
600
601
 
601
602
  client = DeepEvalClient()
602
603
 
603
- kwargs = {"raise_exception": raise_exception}
604
+ kwargs: Dict[str, Any] = {"raise_exception": raise_exception}
604
605
 
605
606
  if type_a == "run":
606
607
  kwargs["testcase_a_id"] = value_a
@@ -665,7 +666,7 @@ def compare_label_latest(
665
666
  "--raise",
666
667
  help="Raise HTTP exceptions instead of swallowing them",
667
668
  ),
668
- out: Path | None = typer.Option(
669
+ out: Optional[Path] = typer.Option(
669
670
  None,
670
671
  "-o",
671
672
  "--out",
@@ -717,7 +718,7 @@ def compare_last(
717
718
  "--raise",
718
719
  help="Raise HTTP exceptions instead of swallowing them",
719
720
  ),
720
- out: Path | None = typer.Option(
721
+ out: Optional[Path] = typer.Option(
721
722
  None,
722
723
  "-o",
723
724
  "--out",
@@ -769,7 +770,7 @@ def tag_command(
769
770
  "--delete",
770
771
  help="Delete a tag",
771
772
  ),
772
- ):
773
+ ) -> None:
773
774
  """
774
775
  Assign a tag to a run or delete a tag.
775
776
  """
@@ -786,8 +787,9 @@ def tag_command(
786
787
 
787
788
  client = DeepEvalClient()
788
789
 
789
- # --- delete mode ---
790
790
  if delete:
791
+ assert run_id is not None
792
+
791
793
  result = client.update_evaluation_testcase_tag(
792
794
  testcase_id=run_id,
793
795
  tag=delete,
@@ -796,13 +798,14 @@ def tag_command(
796
798
  typer.echo("🗑️ Tag deleted successfully")
797
799
  typer.echo(f"Tag: {delete}")
798
800
  return
799
-
800
- # --- assign/update mode ---
801
+ assert run_id is not None
802
+ assert tag is not None
801
803
  result = client.update_evaluation_testcase_tag(
802
804
  testcase_id=run_id,
803
805
  tag=tag,
804
806
  raise_exception=True,
805
807
  )
808
+ assert result is not None
806
809
 
807
810
  typer.echo("✅ Tag assigned successfully")
808
811
  typer.echo(f"Run ID: {run_id}")
rakam_eval_sdk/client.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import random
3
- from typing import Any, Dict, List, Literal, Optional, Union, cast, overload
3
+ from typing import Any, Dict, List, Literal, Optional, Tuple, Union, cast, overload
4
4
 
5
5
  import requests
6
6
 
@@ -49,8 +49,8 @@ class DeepEvalClient:
49
49
  method: HTTPMethod,
50
50
  endpoint: str,
51
51
  *,
52
- json: Dict | None = None,
53
- params: Dict | None = None,
52
+ json: Optional[Dict] = None,
53
+ params: Optional[Dict] = None,
54
54
  raise_exception: bool = False,
55
55
  ) -> Optional[Dict]:
56
56
  url = f"{self.base_url}{endpoint}"
@@ -89,17 +89,63 @@ class DeepEvalClient:
89
89
  "raw": resp.text,
90
90
  }
91
91
 
92
- def _get(self, endpoint: str, params: Dict, *args, **kw):
93
- return self._request("GET", endpoint, params=params, *args, **kw)
92
+ def _get(
93
+ self,
94
+ endpoint: str,
95
+ params: Dict,
96
+ raise_exception: bool = False,
97
+ *args: Tuple,
98
+ **kw: Dict,
99
+ ) -> Optional[Dict]:
100
+ return self._request(
101
+ "GET", endpoint, params=params, raise_exception=raise_exception, *args, **kw
102
+ )
94
103
 
95
- def _post(self, endpoint: str, payload: Dict, *args, **kw):
96
- return self._request("POST", endpoint, json=payload, *args, **kw)
104
+ def _post(
105
+ self,
106
+ endpoint: str,
107
+ payload: Dict,
108
+ raise_exception: bool = False,
109
+ *args: Tuple,
110
+ **kw: Dict,
111
+ ) -> Optional[Dict]:
112
+ return self._request(
113
+ "POST", endpoint, json=payload, raise_exception=raise_exception, *args, **kw
114
+ )
97
115
 
98
- def _patch(self, endpoint: str, payload: Dict, *args, **kw):
99
- return self._request("PATCH", endpoint, json=payload, *args, **kw)
116
+ def _patch(
117
+ self,
118
+ endpoint: str,
119
+ payload: Dict,
120
+ raise_exception: bool = False,
121
+ *args: Tuple,
122
+ **kw: Dict,
123
+ ) -> Optional[Dict]:
124
+ return self._request(
125
+ "PATCH",
126
+ endpoint,
127
+ json=payload,
128
+ raise_exception=raise_exception,
129
+ *args,
130
+ **kw,
131
+ )
100
132
 
101
- def _delete(self, endpoint: str, payload: Dict, *args, **kw):
102
- return self._request("DELETE", endpoint, json=payload, *args, **kw)
133
+ def _delete(
134
+ self,
135
+ endpoint: str,
136
+ payload: Dict,
137
+ raise_exception: bool = False,
138
+ *args: Tuple,
139
+ **kw: Dict,
140
+ ) -> Optional[Dict]:
141
+ return self._request(
142
+ "DELETE",
143
+ endpoint,
144
+ json=payload,
145
+ raise_exception=raise_exception,
146
+ *args,
147
+ **kw,
148
+ )
103
149
 
104
150
  def update_evaluation_testcase_tag(
105
151
  self,
@@ -197,10 +243,10 @@ class DeepEvalClient:
197
243
  def compare_testcases(
198
244
  self,
199
245
  *,
200
- testcase_a_id: int | None = None,
201
- testcase_a_tag: str | None = None,
202
- testcase_b_id: int | None = None,
203
- testcase_b_tag: str | None = None,
246
+ testcase_a_id: Optional[int] = None,
247
+ testcase_a_tag: Optional[str] = None,
248
+ testcase_b_id: Optional[int] = None,
249
+ testcase_b_tag: Optional[str] = None,
204
250
  raise_exception: bool = False,
205
251
  ) -> Optional[Dict]:
206
252
  """
@@ -208,14 +254,14 @@ class DeepEvalClient:
208
254
  Exactly one identifier (id or tag) must be provided per testcase.
209
255
  """
210
256
 
211
- def validate(id_, tag, name: str):
257
+ def validate(id_: Optional[int], tag: Optional[str], name: str) -> None:
212
258
  if bool(id_) == bool(tag):
213
259
  raise ValueError(f"Provide exactly one of {name}_id or {name}_tag")
214
260
 
215
261
  validate(testcase_a_id, testcase_a_tag, "testcase_a")
216
262
  validate(testcase_b_id, testcase_b_tag, "testcase_b")
217
263
 
218
- params: dict[str, int | str] = {}
264
+ params: Dict[str, Union[int, str]] = {}
219
265
 
220
266
  if testcase_a_id is not None:
221
267
  params["testcase_a_id"] = testcase_a_id
@@ -284,18 +330,18 @@ class DeepEvalClient:
284
330
  data: List[TextInputItem],
285
331
  metrics: List[MetricConfig],
286
332
  component: str = "unknown",
287
- label: str | None = None,
333
+ label: Optional[str] = None,
288
334
  raise_exception: bool = False,
289
335
  ) -> Optional[Dict]: ...
290
336
 
291
337
  def text_eval(
292
338
  self,
293
- config: EvalConfig | None = None,
339
+ config: Optional[EvalConfig] = None,
294
340
  *,
295
- data: List[TextInputItem] | None = None,
296
- metrics: List[MetricConfig] | None = None,
341
+ data: Optional[List[TextInputItem]] = None,
342
+ metrics: Optional[List[MetricConfig]] = None,
297
343
  component: str = "unknown",
298
- label: str | None = None,
344
+ label: Optional[str] = None,
299
345
  raise_exception: bool = False,
300
346
  ) -> Optional[Dict]:
301
347
  if config is None:
@@ -337,7 +383,7 @@ class DeepEvalClient:
337
383
  data: List[SchemaInputItem],
338
384
  metrics: List[SchemaMetricConfig],
339
385
  component: str = "unknown",
340
- label: str | None = None,
386
+ label: Optional[str] = None,
341
387
  raise_exception: bool = False,
342
388
  ) -> Optional[Dict]: ...
343
389
 
@@ -351,12 +397,12 @@ class DeepEvalClient:
351
397
 
352
398
  def schema_eval(
353
399
  self,
354
- config: SchemaEvalConfig | None = None,
400
+ config: Optional[SchemaEvalConfig] = None,
355
401
  *,
356
- data: List[SchemaInputItem] | None = None,
357
- metrics: List[SchemaMetricConfig] | None = None,
402
+ data: Optional[List[SchemaInputItem]] = None,
403
+ metrics: Optional[List[SchemaMetricConfig]] = None,
358
404
  component: str = "unknown",
359
- label: str | None = None,
405
+ label: Optional[str] = None,
360
406
  raise_exception: bool = False,
361
407
  ) -> Optional[Dict]:
362
408
  if config is None:
rakam_eval_sdk/schema.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Common base class for all metric configs
2
2
  import sys
3
- from typing import Annotated, Any, Dict, List, Literal, Optional, Union
3
+ from typing import Any, Dict, List, Literal, Optional, Union
4
4
 
5
5
  # Base class (you can keep this abstract)
6
6
  from pydantic import BaseModel, Field
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rakam-eval-sdk
3
- Version: 0.2.4
3
+ Version: 0.2.4rc1
4
4
  Summary: Evaluation Framework SDK
5
5
  Author: Mohamed Bachar Touil
6
6
  License: MIT
@@ -0,0 +1,10 @@
1
+ rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ rakam_eval_sdk/cli.py,sha256=HjymI0UTKRgJb-ewDuINuTtH605MkcB9nD0qNlbdFyc,21905
3
+ rakam_eval_sdk/client.py,sha256=8KwpNt7WHgMfcXAQtrAlnUihai9TiADRieiuWeIxg6E,15575
4
+ rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
5
+ rakam_eval_sdk/schema.py,sha256=DT-uQsE3XB7AAii68QACwIslLqXRQFb1c6efkqLQysI,3960
6
+ rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
7
+ rakam_eval_sdk-0.2.4rc1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
8
+ rakam_eval_sdk-0.2.4rc1.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
9
+ rakam_eval_sdk-0.2.4rc1.dist-info/METADATA,sha256=Sy2Ghp5wcrmS7AzBkhp8C8LVb0dTS7tSNtgEAJCfbU8,6022
10
+ rakam_eval_sdk-0.2.4rc1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- rakam_eval_sdk/cli.py,sha256=IvC45LyALCCoxZ7ZNifVCbPs6UveGtUCfjr1q9e1A_Y,21658
3
- rakam_eval_sdk/client.py,sha256=7spK7P3h5uiA3Q3erOHOwf04N1_RT1pysWyt_E-cz0Q,14666
4
- rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
5
- rakam_eval_sdk/schema.py,sha256=YfZgHtPh41xFYdQTsbS0OSvNdI8yIv5zoh76v21hemo,3971
6
- rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
7
- rakam_eval_sdk-0.2.4.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
8
- rakam_eval_sdk-0.2.4.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
9
- rakam_eval_sdk-0.2.4.dist-info/METADATA,sha256=XOTmtgO7NiG8NuLn4B_yJujUJONSJCvwGQ9ak3zIhv4,6019
10
- rakam_eval_sdk-0.2.4.dist-info/RECORD,,