rakam-eval-sdk 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rakam_eval_sdk/cli.py CHANGED
@@ -35,59 +35,92 @@ metrics_app = typer.Typer(help="Metrics utilities")
35
35
  app.add_typer(metrics_app, name="metrics")
36
36
 
37
37
 
38
+ def extract_metric_names(config: Any) -> list[tuple[str, str | None]]:
39
+ """
40
+ Returns [(type, name)] from EvalConfig / SchemaEvalConfig
41
+ """
42
+ if not hasattr(config, "metrics"):
43
+ return []
44
+
45
+ results: list[tuple[str, str | None]] = []
46
+
47
+ for metric in config.metrics or []:
48
+ metric_type = getattr(metric, "type", None)
49
+ metric_name = getattr(metric, "name", None)
50
+ if metric_type:
51
+ results.append((metric_type, metric_name))
52
+
53
+ return results
54
+
55
+
38
56
  @metrics_app.command("list")
39
- def list_metrics(
40
- limit: int = typer.Option(
41
- 20,
42
- "--limit",
43
- help="Number of testcases to inspect for metrics",
57
+ def metrics(
58
+ directory: Path = typer.Argument(
59
+ Path("./eval"),
60
+ exists=True,
61
+ file_okay=False,
62
+ dir_okay=True,
63
+ help="Directory to scan (default: ./eval)",
44
64
  ),
45
- ):
65
+ recursive: bool = typer.Option(
66
+ False,
67
+ "-r",
68
+ "--recursive",
69
+ help="Recursively search for Python files",
70
+ ),
71
+ ) -> None:
46
72
  """
47
- List unique metric names found in evaluation testcases.
73
+ List all metric types used by loaded eval configs.
48
74
  """
49
- client = DeepEvalClient()
75
+ files = directory.rglob("*.py") if recursive else directory.glob("*.py")
76
+ TARGET_DECORATOR = eval_run.__name__
50
77
 
51
- testcases = client.list_evaluation_testcases(
52
- limit=limit,
53
- offset=0,
54
- raise_exception=True,
55
- )
78
+ all_metrics: set[tuple[str, str | None]] = set()
79
+ found_any = False
56
80
 
57
- if not testcases:
58
- typer.echo("No evaluation testcases found.")
59
- return
81
+ for file in sorted(files):
82
+ functions = find_decorated_functions(file, TARGET_DECORATOR)
83
+ if not functions:
84
+ continue
60
85
 
61
- metric_names: set[str] = set()
86
+ typer.echo(f"\n📄 {file}")
62
87
 
63
- def collect_metrics(entries: Sequence[Dict] | None):
64
- if not entries:
65
- return
66
- for entry in entries:
67
- for metric in entry.get("metrics", []) or []:
68
- name = metric.get("name")
69
- if name:
70
- metric_names.add(name)
88
+ try:
89
+ module = load_module_from_path(file)
90
+ except Exception as e:
91
+ typer.echo(f" ❌ Failed to import module: {e}")
92
+ continue
71
93
 
72
- for tc in testcases:
94
+ for fn_name in functions:
95
+ try:
96
+ func = getattr(module, fn_name)
97
+ result = func()
73
98
 
74
- collect_metrics(tc.get("result"))
99
+ metrics = extract_metric_names(result)
100
+ if not metrics:
101
+ continue
75
102
 
76
- if not metric_names:
77
- typer.echo("No metrics found.")
78
- return
103
+ found_any = True
104
+ for metric_type, metric_name in metrics:
105
+ all_metrics.add((metric_type, metric_name))
79
106
 
80
- typer.echo(
81
- f"📊 Found {len(metric_names)} unique metrics "
82
- f"(from latest {limit} testcases)\n"
83
- )
107
+ if metric_name:
108
+ typer.echo(f" {metric_type} (alias: {metric_name})")
109
+ else:
110
+ typer.echo(f" • {metric_type}")
84
111
 
85
- for name in sorted(metric_names):
86
- typer.echo(f"- {name}")
112
+ except Exception as e:
113
+ typer.echo(f" Failed to inspect {fn_name}: {e}")
114
+
115
+ if not found_any:
116
+ typer.echo("\nNo metrics found.")
117
+ raise typer.Exit(code=0)
118
+
119
+ typer.echo(f"\n✅ {len(all_metrics)} unique metrics found")
87
120
 
88
121
 
89
- @list_app.command("eval")
90
- def list(
122
+ @list_app.command("evals")
123
+ def list_evals(
91
124
  directory: Path = typer.Argument(
92
125
  Path("./eval"),
93
126
  exists=True,
@@ -140,12 +173,12 @@ def list_runs(
140
173
  typer.echo("No evaluation runs found.")
141
174
  return
142
175
 
143
- typer.echo(f"[id] " f"{'unique_id':<20}" f"{'label':<20}" f"created_at")
176
+ typer.echo(f"[id] " f"{'tag':<20}" f"{'label':<20}" f"created_at")
144
177
  # pretty CLI output
145
178
  for run in runs:
146
179
  run_id = run.get("id")
147
180
  label = run.get("label") or "-"
148
- uid = run.get("unique_id") or "-"
181
+ uid = run.get("tag") or "-"
149
182
  created_at = run.get("created_at")
150
183
 
151
184
  if created_at:
@@ -156,20 +189,21 @@ def list_runs(
156
189
  except ValueError:
157
190
  pass
158
191
 
159
- typer.echo(f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
192
+ typer.echo(
193
+ f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
160
194
 
161
195
 
162
- @list_app.command("show")
163
- def show_testcase(
196
+ @app.command()
197
+ def show(
164
198
  id: Optional[int] = typer.Option(
165
199
  None,
166
200
  "--id",
167
- help="Numeric evaluation testcase ID",
201
+ help="Run ID",
168
202
  ),
169
- uid: Optional[str] = typer.Option(
203
+ tag: Optional[str] = typer.Option(
170
204
  None,
171
- "--uid",
172
- help="Evaluation testcase unique_id",
205
+ "--tag",
206
+ help="Run tag",
173
207
  ),
174
208
  raw: bool = typer.Option(
175
209
  False,
@@ -178,12 +212,12 @@ def show_testcase(
178
212
  ),
179
213
  ):
180
214
  """
181
- Show a single evaluation testcase by ID or unique_id.
215
+ Show a single evaluation testcase by ID or tag.
182
216
  """
183
- if not id and not uid:
217
+ if not id and not tag:
184
218
  raise typer.BadParameter("You must provide either --id or --uid")
185
219
 
186
- if id and uid:
220
+ if id and tag:
187
221
  raise typer.BadParameter("Provide only one of --id or --uid")
188
222
 
189
223
  client = DeepEvalClient()
@@ -192,8 +226,8 @@ def show_testcase(
192
226
  result = client.get_evaluation_testcase_by_id(id)
193
227
  identifier = f"id={id}"
194
228
  else:
195
- result = client.get_evaluation_testcase_by_unique_id(uid)
196
- identifier = f"unique_id={uid}"
229
+ result = client.get_evaluation_testcase_by_tag(tag)
230
+ identifier = f"tag={tag}"
197
231
 
198
232
  if not result:
199
233
  console.print(
@@ -376,7 +410,8 @@ def _print_and_save(
376
410
  return
377
411
 
378
412
  if out.exists() and not overwrite:
379
- typer.echo(f"❌ File already exists: {out} (use --overwrite to replace)")
413
+ typer.echo(
414
+ f"❌ File already exists: {out} (use --overwrite to replace)")
380
415
  raise typer.Exit(code=1)
381
416
 
382
417
  out.parent.mkdir(parents=True, exist_ok=True)
@@ -388,14 +423,16 @@ def _print_and_save(
388
423
 
389
424
 
390
425
  @app.command()
391
- def compare_testcases(
392
- testcase_a_id: int = typer.Argument(
393
- ...,
394
- help="ID of the first testcase",
426
+ def compare(
427
+ tag: list[str] = typer.Option(
428
+ [],
429
+ "--tag",
430
+ help="Tag identifying a reference testcase",
395
431
  ),
396
- testcase_b_id: int = typer.Argument(
397
- ...,
398
- help="ID of the second testcase",
432
+ run: list[int] = typer.Option(
433
+ [],
434
+ "--run",
435
+ help="Run ID identifying an evaluation testcase",
399
436
  ),
400
437
  pretty: bool = typer.Option(
401
438
  True,
@@ -420,18 +457,45 @@ def compare_testcases(
420
457
  ),
421
458
  ) -> None:
422
459
  """
423
- Compare two DeepEval evaluation testcases.
460
+ Compare two evaluation testcases using runs and/or labels.
424
461
  """
462
+
463
+ targets = []
464
+
465
+ for r in run:
466
+ targets.append(("run", r))
467
+
468
+ for l in tag:
469
+ targets.append(("label", l))
470
+
471
+ if len(targets) != 2:
472
+ typer.echo(
473
+ "❌ Provide exactly two targets using --run and/or --label"
474
+ )
475
+ raise typer.Exit(code=1)
476
+
425
477
  client = DeepEvalClient()
426
478
 
427
- typer.echo(f"🔍 Comparing testcases {testcase_a_id} {testcase_b_id}")
479
+ (type_a, value_a), (type_b, value_b) = targets
480
+
481
+ typer.echo(
482
+ f"🔍 Comparing {type_a} '{value_a}' ↔ {type_b} '{value_b}'"
483
+ )
484
+
485
+ kwargs = {"raise_exception": raise_exception}
486
+
487
+ if type_a == "run":
488
+ kwargs["testcase_a_id"] = value_a
489
+ else:
490
+ kwargs["testcase_a_tag"] = value_a
491
+
492
+ if type_b == "run":
493
+ kwargs["testcase_b_id"] = value_b
494
+ else:
495
+ kwargs["testcase_b_tag"] = value_b
428
496
 
429
497
  try:
430
- resp = client.compare_testcases(
431
- testcase_a_id=testcase_a_id,
432
- testcase_b_id=testcase_b_id,
433
- raise_exception=raise_exception,
434
- )
498
+ resp = client.compare_testcases(**kwargs)
435
499
  except Exception as e:
436
500
  typer.echo(f"❌ Request failed: {e}")
437
501
  raise typer.Exit(code=1)
@@ -439,10 +503,11 @@ def compare_testcases(
439
503
  if not resp:
440
504
  typer.echo("⚠️ No response received")
441
505
  raise typer.Exit(code=1)
506
+
442
507
  _print_and_save(resp, pretty, out, overwrite)
443
508
 
444
509
 
445
- @app.command()
510
+ @app.command(hidden=True)
446
511
  def compare_label_latest(
447
512
  label_a: str = typer.Argument(
448
513
  ...,
@@ -498,7 +563,7 @@ def compare_label_latest(
498
563
  _print_and_save(resp, pretty, out, overwrite)
499
564
 
500
565
 
501
- @app.command()
566
+ @app.command(hidden=True)
502
567
  def compare_last(
503
568
  label: str = typer.Argument(
504
569
  ...,
@@ -549,6 +614,63 @@ def compare_last(
549
614
  _print_and_save(resp, pretty, out, overwrite)
550
615
 
551
616
 
617
+ @app.command("tag")
618
+ def tag_command(
619
+ run_id: Optional[int] = typer.Option(
620
+ None,
621
+ "--run",
622
+ help="Evaluation run ID",
623
+ ),
624
+ tag: Optional[str] = typer.Option(
625
+ None,
626
+ "--tag",
627
+ help="Tag to assign to the run",
628
+ ),
629
+ delete: Optional[str] = typer.Option(
630
+ None,
631
+ "--delete",
632
+ help="Delete a tag",
633
+ ),
634
+ ):
635
+ """
636
+ Assign a tag to a run or delete a tag.
637
+ """
638
+
639
+ # --- validation ---
640
+ if delete:
641
+ if run_id or tag:
642
+ typer.echo("❌ --delete cannot be used with --run or --tag")
643
+ raise typer.Exit(code=1)
644
+ else:
645
+ if not run_id or not tag:
646
+ typer.echo("❌ Use --run and --tag together, or --delete")
647
+ raise typer.Exit(code=1)
648
+
649
+ client = DeepEvalClient()
650
+
651
+ # --- delete mode ---
652
+ if delete:
653
+ result = client.update_evaluation_testcase_tag(
654
+ testcase_id=run_id,
655
+ tag=delete,
656
+ raise_exception=True,
657
+ )
658
+ typer.echo("🗑️ Tag deleted successfully")
659
+ typer.echo(f"Tag: {delete}")
660
+ return
661
+
662
+ # --- assign/update mode ---
663
+ result = client.update_evaluation_testcase_tag(
664
+ testcase_id=run_id,
665
+ tag=tag,
666
+ raise_exception=True,
667
+ )
668
+
669
+ typer.echo("✅ Tag assigned successfully")
670
+ typer.echo(f"Run ID: {run_id}")
671
+ typer.echo(f"Tag: {result.get('tag')}")
672
+
673
+
552
674
  def main() -> None:
553
675
  app()
554
676
 
rakam_eval_sdk/client.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import random
3
- from typing import Any, Dict, List, Optional, Union, cast, overload
3
+ from typing import Any, Dict, List, Literal, Optional, Union, cast, overload
4
4
 
5
5
  import requests
6
6
 
@@ -13,6 +13,8 @@ from .schema import (
13
13
  TextInputItem,
14
14
  )
15
15
 
16
+ HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
17
+
16
18
 
17
19
  class DeepEvalClient:
18
20
  """
@@ -45,21 +47,31 @@ class DeepEvalClient:
45
47
 
46
48
  def _request(
47
49
  self,
50
+ method: HTTPMethod,
48
51
  endpoint: str,
49
- payload: dict,
52
+ *,
53
+ json: dict | None = None,
54
+ params: dict | None = None,
50
55
  raise_exception: bool = False,
51
56
  ) -> Optional[dict]:
52
- """Internal helper to send POST requests with standard headers and error handling."""
53
57
  url = f"{self.base_url}{endpoint}"
58
+
54
59
  headers = {
55
60
  "accept": "application/json",
56
- "Content-Type": "application/json",
57
61
  "X-API-Token": self.api_token,
58
62
  }
59
63
 
64
+ if json is not None:
65
+ headers["Content-Type"] = "application/json"
66
+
60
67
  try:
61
- resp = requests.post(
62
- url, headers=headers, json=payload, timeout=self.timeout
68
+ resp = requests.request(
69
+ method=method,
70
+ url=url,
71
+ headers=headers,
72
+ json=json,
73
+ params=params,
74
+ timeout=self.timeout,
63
75
  )
64
76
  if raise_exception:
65
77
  resp.raise_for_status()
@@ -73,41 +85,47 @@ class DeepEvalClient:
73
85
  except ValueError:
74
86
  if raise_exception:
75
87
  raise
76
- return {"error": "Invalid JSON response", "raw": resp.text}
88
+ return {
89
+ "error": "Invalid JSON response",
90
+ "raw": resp.text,
91
+ }
77
92
 
78
- def _get(
93
+ def _get(self, endpoint: str, params: dict, *args, **kw):
94
+ return self._request("GET", endpoint, params=params, *args, **kw)
95
+
96
+ def _post(self, endpoint: str, payload: dict, *args, **kw):
97
+ return self._request("POST", endpoint, json=payload, *args, **kw)
98
+
99
+ def _patch(self, endpoint: str, payload: dict, *args, **kw):
100
+ return self._request("PATCH", endpoint, json=payload, *args, **kw)
101
+
102
+ def _delete(self, endpoint: str, payload: dict, *args, **kw):
103
+ return self._request("DELETE", endpoint, json=payload, *args, **kw)
104
+
105
+ def update_evaluation_testcase_tag(
79
106
  self,
80
- endpoint: str,
81
- params: dict,
107
+ *,
108
+ testcase_id: int,
109
+ tag: Optional[str],
82
110
  raise_exception: bool = False,
83
- ) -> Optional[dict]:
84
- """Internal helper to send GET requests with standard headers and error handling."""
85
- url = f"{self.base_url}{endpoint}"
86
- headers = {
87
- "accept": "application/json",
88
- "X-API-Token": self.api_token,
89
- }
111
+ ) -> Optional[Dict]:
112
+ """
113
+ Add, update, or remove a tag from an evaluation testcase.
90
114
 
91
- try:
92
- resp = requests.get(
93
- url,
94
- headers=headers,
95
- params=params,
96
- timeout=self.timeout,
115
+ - tag="smoke" → add / update tag
116
+ - tag=None → remove tag
117
+ """
118
+ if testcase_id is None:
119
+ return self._delete(
120
+ endpoint=f"/deepeval/tag/{tag}",
121
+ payload={},
122
+ raise_exception=raise_exception,
97
123
  )
98
- if raise_exception:
99
- resp.raise_for_status()
100
- except requests.RequestException as e:
101
- if raise_exception:
102
- raise
103
- return {"error": str(e)}
104
-
105
- try:
106
- return cast(dict, resp.json())
107
- except ValueError:
108
- if raise_exception:
109
- raise
110
- return {"error": "Invalid JSON response", "raw": resp.text}
124
+ return self._patch(
125
+ endpoint=f"/deepeval/{testcase_id}/tag",
126
+ payload={"tag": tag},
127
+ raise_exception=raise_exception,
128
+ )
111
129
 
112
130
  def list_evaluation_testcases(
113
131
  self,
@@ -121,7 +139,7 @@ class DeepEvalClient:
121
139
  Sorted by created_at DESC (newest first).
122
140
  """
123
141
  return self._get(
124
- "/eval-framework/deepeval/evaluation-testcases/token",
142
+ endpoint="/eval-framework/deepeval/evaluation-testcases/token",
125
143
  params={
126
144
  "limit": limit,
127
145
  "offset": offset,
@@ -139,22 +157,22 @@ class DeepEvalClient:
139
157
  Fetch a single evaluation testcase by numeric ID.
140
158
  """
141
159
  return self._get(
142
- f"/eval-framework/deepeval/id/{testcase_id}",
160
+ endpoint=f"/eval-framework/deepeval/id/{testcase_id}",
143
161
  params={},
144
162
  raise_exception=raise_exception,
145
163
  )
146
164
 
147
- def get_evaluation_testcase_by_unique_id(
165
+ def get_evaluation_testcase_by_tag(
148
166
  self,
149
- unique_id: str,
167
+ tag: str,
150
168
  *,
151
169
  raise_exception: bool = False,
152
170
  ) -> Optional[Dict]:
153
171
  """
154
- Fetch a single evaluation testcase by unique_id.
172
+ Fetch a single evaluation testcase by tag.
155
173
  """
156
174
  return self._get(
157
- f"/eval-framework/deepeval/uid/{unique_id}",
175
+ endpoint=f"/eval-framework/deepeval/tag/{tag}",
158
176
  params={},
159
177
  raise_exception=raise_exception,
160
178
  )
@@ -163,35 +181,57 @@ class DeepEvalClient:
163
181
  self,
164
182
  *,
165
183
  id: Optional[int] = None,
166
- unique_id: Optional[str] = None,
184
+ tag: Optional[str] = None,
167
185
  raise_exception: bool = False,
168
186
  ) -> Optional[Dict]:
169
187
  if id is not None:
170
188
  return self.get_evaluation_testcase_by_id(
171
189
  id, raise_exception=raise_exception
172
190
  )
173
- if unique_id is not None:
174
- return self.get_evaluation_testcase_by_unique_id(
175
- unique_id, raise_exception=raise_exception
191
+ if tag is not None:
192
+ return self.get_evaluation_testcase_by_tag(
193
+ tag, raise_exception=raise_exception
176
194
  )
177
- raise ValueError("Either id or unique_id must be provided")
195
+ raise ValueError("Either id or tag must be provided")
178
196
 
179
197
  def compare_testcases(
180
198
  self,
181
199
  *,
182
- testcase_a_id: int,
183
- testcase_b_id: int,
200
+ testcase_a_id: int | None = None,
201
+ testcase_a_tag: str | None = None,
202
+ testcase_b_id: int | None = None,
203
+ testcase_b_tag: str | None = None,
184
204
  raise_exception: bool = False,
185
205
  ) -> Optional[dict]:
186
206
  """
187
- Compare two evaluation testcases.
207
+ Compare two evaluation testcases using IDs or tags.
208
+ Exactly one identifier (id or tag) must be provided per testcase.
188
209
  """
210
+
211
+ def validate(id_, tag, name: str):
212
+ if bool(id_) == bool(tag):
213
+ raise ValueError(
214
+ f"Provide exactly one of {name}_id or {name}_tag"
215
+ )
216
+
217
+ validate(testcase_a_id, testcase_a_tag, "testcase_a")
218
+ validate(testcase_b_id, testcase_b_tag, "testcase_b")
219
+
220
+ params: dict[str, int | str] = {}
221
+
222
+ if testcase_a_id is not None:
223
+ params["testcase_a_id"] = testcase_a_id
224
+ else:
225
+ params["testcase_a_tag"] = testcase_a_tag # type: ignore
226
+
227
+ if testcase_b_id is not None:
228
+ params["testcase_b_id"] = testcase_b_id
229
+ else:
230
+ params["testcase_b_tag"] = testcase_b_tag # type: ignore
231
+
189
232
  return self._get(
190
- "/eval-framework/deepeval/evaluation-testcases/compare",
191
- params={
192
- "testcase_a_id": testcase_a_id,
193
- "testcase_b_id": testcase_b_id,
194
- },
233
+ endpoint="/eval-framework/deepeval/evaluation-testcases/compare",
234
+ params=params,
195
235
  raise_exception=raise_exception,
196
236
  )
197
237
 
@@ -206,7 +246,7 @@ class DeepEvalClient:
206
246
  Compare the latest evaluation testcases for two labels.
207
247
  """
208
248
  return self._get(
209
- "/eval-framework/deepeval/evaluation-testcases/compare-latest",
249
+ endpoint="/eval-framework/deepeval/evaluation-testcases/compare-latest",
210
250
  params={
211
251
  "label_a": label_a,
212
252
  "label_b": label_b,
@@ -224,7 +264,7 @@ class DeepEvalClient:
224
264
  Compare the last two evaluation testcases for a given label.
225
265
  """
226
266
  return self._get(
227
- "/eval-framework/deepeval/evaluation-testcases/compare-last-two",
267
+ endpoint="/eval-framework/deepeval/evaluation-testcases/compare-last-two",
228
268
  params={
229
269
  "label": label,
230
270
  },
@@ -268,8 +308,8 @@ class DeepEvalClient:
268
308
  label=label,
269
309
  )
270
310
 
271
- return self._request(
272
- "/deepeval/text-eval", config.model_dump(), raise_exception
311
+ return self._post(
312
+ endpoint="/deepeval/text-eval", payload=config.model_dump(), raise_exception=raise_exception
273
313
  )
274
314
 
275
315
  def text_eval_background(
@@ -284,7 +324,7 @@ class DeepEvalClient:
284
324
  payload = EvalConfig.model_construct(
285
325
  data=data, metrics=metrics, component=component, version=label
286
326
  ).model_dump()
287
- return self._request("/deepeval/text-eval/background", payload, raise_exception)
327
+ return self._post(endpoint="/deepeval/text-eval/background", payload=payload, raise_exception=raise_exception)
288
328
 
289
329
  @overload
290
330
  def schema_eval(
@@ -328,10 +368,10 @@ class DeepEvalClient:
328
368
  label=label,
329
369
  )
330
370
 
331
- return self._request(
332
- "/deepeval/schema-eval",
333
- config.model_dump(),
334
- raise_exception,
371
+ return self._post(
372
+ endpoint="/deepeval/schema-eval",
373
+ payload=config.model_dump(),
374
+ raise_exception=raise_exception,
335
375
  )
336
376
 
337
377
  def schema_eval_background(
@@ -346,8 +386,8 @@ class DeepEvalClient:
346
386
  payload = SchemaEvalConfig.model_construct(
347
387
  data=data, metrics=metrics, component=component, version=label
348
388
  ).model_dump()
349
- return self._request(
350
- "/deepeval/schema-eval/background", payload, raise_exception
389
+ return self._post(
390
+ endpoint="/deepeval/schema-eval/background", payload=payload, raise_exception=raise_exception
351
391
  )
352
392
 
353
393
  def maybe_text_eval(
rakam_eval_sdk/schema.py CHANGED
@@ -117,7 +117,6 @@ class SchemaInputItem(InputItem):
117
117
 
118
118
  class EvalConfig(BaseModel):
119
119
  __eval_config__ = "text_eval"
120
- unique_id: Union[str, None] = None
121
120
  component: str = "unknown"
122
121
  label: Union[str, None] = None
123
122
  data: List[TextInputItem]
@@ -127,7 +126,6 @@ class EvalConfig(BaseModel):
127
126
  class SchemaEvalConfig(BaseModel):
128
127
  __eval_config__ = "schema_eval"
129
128
  component: str = "unknown"
130
- unique_id: Union[str, None] = None
131
129
  label: Union[str, None] = None
132
130
  data: List[SchemaInputItem]
133
131
  metrics: List[SchemaMetricConfig] = Field(default_factory=list)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rakam-eval-sdk
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Evaluation Framework SDK
5
5
  Author: Mohamed Bachar Touil
6
6
  License: MIT
@@ -0,0 +1,10 @@
1
+ rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ rakam_eval_sdk/cli.py,sha256=gV4FsAde1WB5rRPLW2mRqTO4X0SZhvepcWH10KE5IIY,17642
3
+ rakam_eval_sdk/client.py,sha256=sEXogMqtXgKGt_1Xuwg-S2Q2zRC6Y0ZVG_Wy1djbi28,14585
4
+ rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
5
+ rakam_eval_sdk/schema.py,sha256=woXu5GT1wuP_d7G11idopwpGmk0nV1SrxGShgWk-RA8,3530
6
+ rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
7
+ rakam_eval_sdk-0.2.3.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
8
+ rakam_eval_sdk-0.2.3.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
9
+ rakam_eval_sdk-0.2.3.dist-info/METADATA,sha256=ybu9LsKuQw0gfhJGPW-wTO6LC4bJw7sDlVuUQJQRqZ8,6019
10
+ rakam_eval_sdk-0.2.3.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- rakam_eval_sdk/cli.py,sha256=0Ym7C83xirGEf0YCh4-agwfjYPJ3yieeS02nXJ_iE-Y,14486
3
- rakam_eval_sdk/client.py,sha256=XExVLDzk9Ed3zW5uezAxs5KLROiTODrdmXEcw0g7IE4,13080
4
- rakam_eval_sdk/decorators.py,sha256=_9VFQmoYWd6cqnNryZJWEwYHQRxY7vIOam4z45zBk3c,1794
5
- rakam_eval_sdk/schema.py,sha256=P4LlnaInXWTq-ve6qPTTxPyzmj3j_1gcqV9i7CYRYec,3608
6
- rakam_eval_sdk/utils/decorator_utils.py,sha256=g0TjXtG9o4hwhUAFP8GJsXAkjhZhzeseTAg-YBFjj2g,1763
7
- rakam_eval_sdk-0.2.1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
8
- rakam_eval_sdk-0.2.1.dist-info/entry_points.txt,sha256=tNhwmM_UGELb3h0zOfgCrtTheUkP-k8jGv0rTOfRSps,56
9
- rakam_eval_sdk-0.2.1.dist-info/METADATA,sha256=u-k-VOQD7BzOaWC0itLYUyQd-55ujT1IdJCma2Y4gf8,6019
10
- rakam_eval_sdk-0.2.1.dist-info/RECORD,,