rakam-eval-sdk 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/PKG-INFO +1 -1
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/pyproject.toml +1 -1
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/src/rakam_eval_sdk/cli.py +154 -73
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/src/rakam_eval_sdk/client.py +58 -29
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/README.md +0 -0
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/src/rakam_eval_sdk/__init__.py +0 -0
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/src/rakam_eval_sdk/decorators.py +0 -0
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/src/rakam_eval_sdk/schema.py +0 -0
- {rakam_eval_sdk-0.2.2 → rakam_eval_sdk-0.2.3}/src/rakam_eval_sdk/utils/decorator_utils.py +0 -0
|
@@ -35,55 +35,88 @@ metrics_app = typer.Typer(help="Metrics utilities")
|
|
|
35
35
|
app.add_typer(metrics_app, name="metrics")
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
def extract_metric_names(config: Any) -> list[tuple[str, str | None]]:
|
|
39
|
+
"""
|
|
40
|
+
Returns [(type, name)] from EvalConfig / SchemaEvalConfig
|
|
41
|
+
"""
|
|
42
|
+
if not hasattr(config, "metrics"):
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
results: list[tuple[str, str | None]] = []
|
|
46
|
+
|
|
47
|
+
for metric in config.metrics or []:
|
|
48
|
+
metric_type = getattr(metric, "type", None)
|
|
49
|
+
metric_name = getattr(metric, "name", None)
|
|
50
|
+
if metric_type:
|
|
51
|
+
results.append((metric_type, metric_name))
|
|
52
|
+
|
|
53
|
+
return results
|
|
54
|
+
|
|
55
|
+
|
|
38
56
|
@metrics_app.command("list")
|
|
39
|
-
def
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
57
|
+
def metrics(
|
|
58
|
+
directory: Path = typer.Argument(
|
|
59
|
+
Path("./eval"),
|
|
60
|
+
exists=True,
|
|
61
|
+
file_okay=False,
|
|
62
|
+
dir_okay=True,
|
|
63
|
+
help="Directory to scan (default: ./eval)",
|
|
44
64
|
),
|
|
45
|
-
|
|
65
|
+
recursive: bool = typer.Option(
|
|
66
|
+
False,
|
|
67
|
+
"-r",
|
|
68
|
+
"--recursive",
|
|
69
|
+
help="Recursively search for Python files",
|
|
70
|
+
),
|
|
71
|
+
) -> None:
|
|
46
72
|
"""
|
|
47
|
-
List
|
|
73
|
+
List all metric types used by loaded eval configs.
|
|
48
74
|
"""
|
|
49
|
-
|
|
75
|
+
files = directory.rglob("*.py") if recursive else directory.glob("*.py")
|
|
76
|
+
TARGET_DECORATOR = eval_run.__name__
|
|
50
77
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
offset=0,
|
|
54
|
-
raise_exception=True,
|
|
55
|
-
)
|
|
78
|
+
all_metrics: set[tuple[str, str | None]] = set()
|
|
79
|
+
found_any = False
|
|
56
80
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
81
|
+
for file in sorted(files):
|
|
82
|
+
functions = find_decorated_functions(file, TARGET_DECORATOR)
|
|
83
|
+
if not functions:
|
|
84
|
+
continue
|
|
60
85
|
|
|
61
|
-
|
|
86
|
+
typer.echo(f"\n📄 {file}")
|
|
62
87
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
name = metric.get("name")
|
|
69
|
-
if name:
|
|
70
|
-
metric_names.add(name)
|
|
88
|
+
try:
|
|
89
|
+
module = load_module_from_path(file)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
typer.echo(f" ❌ Failed to import module: {e}")
|
|
92
|
+
continue
|
|
71
93
|
|
|
72
|
-
|
|
94
|
+
for fn_name in functions:
|
|
95
|
+
try:
|
|
96
|
+
func = getattr(module, fn_name)
|
|
97
|
+
result = func()
|
|
73
98
|
|
|
74
|
-
|
|
99
|
+
metrics = extract_metric_names(result)
|
|
100
|
+
if not metrics:
|
|
101
|
+
continue
|
|
75
102
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
103
|
+
found_any = True
|
|
104
|
+
for metric_type, metric_name in metrics:
|
|
105
|
+
all_metrics.add((metric_type, metric_name))
|
|
79
106
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
107
|
+
if metric_name:
|
|
108
|
+
typer.echo(f" • {metric_type} (alias: {metric_name})")
|
|
109
|
+
else:
|
|
110
|
+
typer.echo(f" • {metric_type}")
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
typer.echo(f" ❌ Failed to inspect {fn_name}: {e}")
|
|
114
|
+
|
|
115
|
+
if not found_any:
|
|
116
|
+
typer.echo("\nNo metrics found.")
|
|
117
|
+
raise typer.Exit(code=0)
|
|
84
118
|
|
|
85
|
-
|
|
86
|
-
typer.echo(f"- {name}")
|
|
119
|
+
typer.echo(f"\n✅ {len(all_metrics)} unique metrics found")
|
|
87
120
|
|
|
88
121
|
|
|
89
122
|
@list_app.command("evals")
|
|
@@ -165,12 +198,12 @@ def show(
|
|
|
165
198
|
id: Optional[int] = typer.Option(
|
|
166
199
|
None,
|
|
167
200
|
"--id",
|
|
168
|
-
help="
|
|
201
|
+
help="Run ID",
|
|
169
202
|
),
|
|
170
203
|
tag: Optional[str] = typer.Option(
|
|
171
204
|
None,
|
|
172
205
|
"--tag",
|
|
173
|
-
help="
|
|
206
|
+
help="Run tag",
|
|
174
207
|
),
|
|
175
208
|
raw: bool = typer.Option(
|
|
176
209
|
False,
|
|
@@ -391,13 +424,15 @@ def _print_and_save(
|
|
|
391
424
|
|
|
392
425
|
@app.command()
|
|
393
426
|
def compare(
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
427
|
+
tag: list[str] = typer.Option(
|
|
428
|
+
[],
|
|
429
|
+
"--tag",
|
|
430
|
+
help="Tag identifying a reference testcase",
|
|
397
431
|
),
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
432
|
+
run: list[int] = typer.Option(
|
|
433
|
+
[],
|
|
434
|
+
"--run",
|
|
435
|
+
help="Run ID identifying an evaluation testcase",
|
|
401
436
|
),
|
|
402
437
|
pretty: bool = typer.Option(
|
|
403
438
|
True,
|
|
@@ -422,18 +457,45 @@ def compare(
|
|
|
422
457
|
),
|
|
423
458
|
) -> None:
|
|
424
459
|
"""
|
|
425
|
-
Compare two
|
|
460
|
+
Compare two evaluation testcases using runs and/or labels.
|
|
426
461
|
"""
|
|
462
|
+
|
|
463
|
+
targets = []
|
|
464
|
+
|
|
465
|
+
for r in run:
|
|
466
|
+
targets.append(("run", r))
|
|
467
|
+
|
|
468
|
+
for l in tag:
|
|
469
|
+
targets.append(("label", l))
|
|
470
|
+
|
|
471
|
+
if len(targets) != 2:
|
|
472
|
+
typer.echo(
|
|
473
|
+
"❌ Provide exactly two targets using --run and/or --label"
|
|
474
|
+
)
|
|
475
|
+
raise typer.Exit(code=1)
|
|
476
|
+
|
|
427
477
|
client = DeepEvalClient()
|
|
428
478
|
|
|
429
|
-
|
|
479
|
+
(type_a, value_a), (type_b, value_b) = targets
|
|
480
|
+
|
|
481
|
+
typer.echo(
|
|
482
|
+
f"🔍 Comparing {type_a} '{value_a}' ↔ {type_b} '{value_b}'"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
kwargs = {"raise_exception": raise_exception}
|
|
486
|
+
|
|
487
|
+
if type_a == "run":
|
|
488
|
+
kwargs["testcase_a_id"] = value_a
|
|
489
|
+
else:
|
|
490
|
+
kwargs["testcase_a_tag"] = value_a
|
|
491
|
+
|
|
492
|
+
if type_b == "run":
|
|
493
|
+
kwargs["testcase_b_id"] = value_b
|
|
494
|
+
else:
|
|
495
|
+
kwargs["testcase_b_tag"] = value_b
|
|
430
496
|
|
|
431
497
|
try:
|
|
432
|
-
resp = client.compare_testcases(
|
|
433
|
-
testcase_a_id=testcase_a_id,
|
|
434
|
-
testcase_b_id=testcase_b_id,
|
|
435
|
-
raise_exception=raise_exception,
|
|
436
|
-
)
|
|
498
|
+
resp = client.compare_testcases(**kwargs)
|
|
437
499
|
except Exception as e:
|
|
438
500
|
typer.echo(f"❌ Request failed: {e}")
|
|
439
501
|
raise typer.Exit(code=1)
|
|
@@ -441,10 +503,11 @@ def compare(
|
|
|
441
503
|
if not resp:
|
|
442
504
|
typer.echo("⚠️ No response received")
|
|
443
505
|
raise typer.Exit(code=1)
|
|
506
|
+
|
|
444
507
|
_print_and_save(resp, pretty, out, overwrite)
|
|
445
508
|
|
|
446
509
|
|
|
447
|
-
@app.command()
|
|
510
|
+
@app.command(hidden=True)
|
|
448
511
|
def compare_label_latest(
|
|
449
512
|
label_a: str = typer.Argument(
|
|
450
513
|
...,
|
|
@@ -500,7 +563,7 @@ def compare_label_latest(
|
|
|
500
563
|
_print_and_save(resp, pretty, out, overwrite)
|
|
501
564
|
|
|
502
565
|
|
|
503
|
-
@app.command()
|
|
566
|
+
@app.command(hidden=True)
|
|
504
567
|
def compare_last(
|
|
505
568
|
label: str = typer.Argument(
|
|
506
569
|
...,
|
|
@@ -551,44 +614,62 @@ def compare_last(
|
|
|
551
614
|
_print_and_save(resp, pretty, out, overwrite)
|
|
552
615
|
|
|
553
616
|
|
|
554
|
-
@
|
|
555
|
-
def
|
|
556
|
-
run_id: int = typer.
|
|
617
|
+
@app.command("tag")
|
|
618
|
+
def tag_command(
|
|
619
|
+
run_id: Optional[int] = typer.Option(
|
|
620
|
+
None,
|
|
621
|
+
"--run",
|
|
622
|
+
help="Evaluation run ID",
|
|
623
|
+
),
|
|
557
624
|
tag: Optional[str] = typer.Option(
|
|
558
625
|
None,
|
|
559
626
|
"--tag",
|
|
560
|
-
"
|
|
561
|
-
help="Tag to add or update",
|
|
627
|
+
help="Tag to assign to the run",
|
|
562
628
|
),
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
"--
|
|
566
|
-
help="
|
|
629
|
+
delete: Optional[str] = typer.Option(
|
|
630
|
+
None,
|
|
631
|
+
"--delete",
|
|
632
|
+
help="Delete a tag",
|
|
567
633
|
),
|
|
568
634
|
):
|
|
569
635
|
"""
|
|
570
|
-
|
|
636
|
+
Assign a tag to a run or delete a tag.
|
|
571
637
|
"""
|
|
572
|
-
if not tag and not remove:
|
|
573
|
-
typer.echo("❌ You must provide --tag or --remove")
|
|
574
|
-
raise typer.Exit(code=1)
|
|
575
638
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
639
|
+
# --- validation ---
|
|
640
|
+
if delete:
|
|
641
|
+
if run_id or tag:
|
|
642
|
+
typer.echo("❌ --delete cannot be used with --run or --tag")
|
|
643
|
+
raise typer.Exit(code=1)
|
|
644
|
+
else:
|
|
645
|
+
if not run_id or not tag:
|
|
646
|
+
typer.echo("❌ Use --run and --tag together, or --delete")
|
|
647
|
+
raise typer.Exit(code=1)
|
|
579
648
|
|
|
580
649
|
client = DeepEvalClient()
|
|
581
650
|
|
|
651
|
+
# --- delete mode ---
|
|
652
|
+
if delete:
|
|
653
|
+
result = client.update_evaluation_testcase_tag(
|
|
654
|
+
testcase_id=run_id,
|
|
655
|
+
tag=delete,
|
|
656
|
+
raise_exception=True,
|
|
657
|
+
)
|
|
658
|
+
typer.echo("🗑️ Tag deleted successfully")
|
|
659
|
+
typer.echo(f"Tag: {delete}")
|
|
660
|
+
return
|
|
661
|
+
|
|
662
|
+
# --- assign/update mode ---
|
|
582
663
|
result = client.update_evaluation_testcase_tag(
|
|
583
664
|
testcase_id=run_id,
|
|
584
|
-
tag=
|
|
665
|
+
tag=tag,
|
|
585
666
|
raise_exception=True,
|
|
586
667
|
)
|
|
587
668
|
|
|
588
|
-
|
|
589
|
-
typer.echo(f"✅ Tag {action} successfully")
|
|
669
|
+
typer.echo("✅ Tag assigned successfully")
|
|
590
670
|
typer.echo(f"Run ID: {run_id}")
|
|
591
|
-
typer.echo(f"Tag: {result.get('tag')
|
|
671
|
+
typer.echo(f"Tag: {result.get('tag')}")
|
|
672
|
+
|
|
592
673
|
|
|
593
674
|
def main() -> None:
|
|
594
675
|
app()
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from typing import Optional, Dict
|
|
2
1
|
import os
|
|
3
2
|
import random
|
|
4
|
-
from typing import Any, Dict, List, Optional, Union, cast, overload
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional, Union, cast, overload
|
|
5
4
|
|
|
6
5
|
import requests
|
|
7
6
|
|
|
@@ -13,7 +12,6 @@ from .schema import (
|
|
|
13
12
|
SchemaMetricConfig,
|
|
14
13
|
TextInputItem,
|
|
15
14
|
)
|
|
16
|
-
from typing import Optional, Literal, cast
|
|
17
15
|
|
|
18
16
|
HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
|
|
19
17
|
|
|
@@ -92,14 +90,17 @@ class DeepEvalClient:
|
|
|
92
90
|
"raw": resp.text,
|
|
93
91
|
}
|
|
94
92
|
|
|
95
|
-
def _get(self, endpoint: str, params: dict, **kw):
|
|
96
|
-
return self._request("GET", endpoint, params=params, **kw)
|
|
93
|
+
def _get(self, endpoint: str, params: dict, *args, **kw):
|
|
94
|
+
return self._request("GET", endpoint, params=params, *args, **kw)
|
|
97
95
|
|
|
98
|
-
def _post(self, endpoint: str, payload: dict, **kw):
|
|
99
|
-
return self._request("POST", endpoint, json=payload, **kw)
|
|
96
|
+
def _post(self, endpoint: str, payload: dict, *args, **kw):
|
|
97
|
+
return self._request("POST", endpoint, json=payload, *args, **kw)
|
|
100
98
|
|
|
101
|
-
def _patch(self, endpoint: str, payload: dict, **kw):
|
|
102
|
-
return self._request("PATCH", endpoint, json=payload, **kw)
|
|
99
|
+
def _patch(self, endpoint: str, payload: dict, *args, **kw):
|
|
100
|
+
return self._request("PATCH", endpoint, json=payload, *args, **kw)
|
|
101
|
+
|
|
102
|
+
def _delete(self, endpoint: str, payload: dict, *args, **kw):
|
|
103
|
+
return self._request("DELETE", endpoint, json=payload, *args, **kw)
|
|
103
104
|
|
|
104
105
|
def update_evaluation_testcase_tag(
|
|
105
106
|
self,
|
|
@@ -114,8 +115,14 @@ class DeepEvalClient:
|
|
|
114
115
|
- tag="smoke" → add / update tag
|
|
115
116
|
- tag=None → remove tag
|
|
116
117
|
"""
|
|
118
|
+
if testcase_id is None:
|
|
119
|
+
return self._delete(
|
|
120
|
+
endpoint=f"/deepeval/tag/{tag}",
|
|
121
|
+
payload={},
|
|
122
|
+
raise_exception=raise_exception,
|
|
123
|
+
)
|
|
117
124
|
return self._patch(
|
|
118
|
-
f"/
|
|
125
|
+
endpoint=f"/deepeval/{testcase_id}/tag",
|
|
119
126
|
payload={"tag": tag},
|
|
120
127
|
raise_exception=raise_exception,
|
|
121
128
|
)
|
|
@@ -132,7 +139,7 @@ class DeepEvalClient:
|
|
|
132
139
|
Sorted by created_at DESC (newest first).
|
|
133
140
|
"""
|
|
134
141
|
return self._get(
|
|
135
|
-
"/eval-framework/deepeval/evaluation-testcases/token",
|
|
142
|
+
endpoint="/eval-framework/deepeval/evaluation-testcases/token",
|
|
136
143
|
params={
|
|
137
144
|
"limit": limit,
|
|
138
145
|
"offset": offset,
|
|
@@ -150,7 +157,7 @@ class DeepEvalClient:
|
|
|
150
157
|
Fetch a single evaluation testcase by numeric ID.
|
|
151
158
|
"""
|
|
152
159
|
return self._get(
|
|
153
|
-
f"/eval-framework/deepeval/id/{testcase_id}",
|
|
160
|
+
endpoint=f"/eval-framework/deepeval/id/{testcase_id}",
|
|
154
161
|
params={},
|
|
155
162
|
raise_exception=raise_exception,
|
|
156
163
|
)
|
|
@@ -165,7 +172,7 @@ class DeepEvalClient:
|
|
|
165
172
|
Fetch a single evaluation testcase by tag.
|
|
166
173
|
"""
|
|
167
174
|
return self._get(
|
|
168
|
-
f"/eval-framework/deepeval/tag/{tag}",
|
|
175
|
+
endpoint=f"/eval-framework/deepeval/tag/{tag}",
|
|
169
176
|
params={},
|
|
170
177
|
raise_exception=raise_exception,
|
|
171
178
|
)
|
|
@@ -190,19 +197,41 @@ class DeepEvalClient:
|
|
|
190
197
|
def compare_testcases(
|
|
191
198
|
self,
|
|
192
199
|
*,
|
|
193
|
-
testcase_a_id: int,
|
|
194
|
-
|
|
200
|
+
testcase_a_id: int | None = None,
|
|
201
|
+
testcase_a_tag: str | None = None,
|
|
202
|
+
testcase_b_id: int | None = None,
|
|
203
|
+
testcase_b_tag: str | None = None,
|
|
195
204
|
raise_exception: bool = False,
|
|
196
205
|
) -> Optional[dict]:
|
|
197
206
|
"""
|
|
198
|
-
Compare two evaluation testcases.
|
|
207
|
+
Compare two evaluation testcases using IDs or tags.
|
|
208
|
+
Exactly one identifier (id or tag) must be provided per testcase.
|
|
199
209
|
"""
|
|
210
|
+
|
|
211
|
+
def validate(id_, tag, name: str):
|
|
212
|
+
if bool(id_) == bool(tag):
|
|
213
|
+
raise ValueError(
|
|
214
|
+
f"Provide exactly one of {name}_id or {name}_tag"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
validate(testcase_a_id, testcase_a_tag, "testcase_a")
|
|
218
|
+
validate(testcase_b_id, testcase_b_tag, "testcase_b")
|
|
219
|
+
|
|
220
|
+
params: dict[str, int | str] = {}
|
|
221
|
+
|
|
222
|
+
if testcase_a_id is not None:
|
|
223
|
+
params["testcase_a_id"] = testcase_a_id
|
|
224
|
+
else:
|
|
225
|
+
params["testcase_a_tag"] = testcase_a_tag # type: ignore
|
|
226
|
+
|
|
227
|
+
if testcase_b_id is not None:
|
|
228
|
+
params["testcase_b_id"] = testcase_b_id
|
|
229
|
+
else:
|
|
230
|
+
params["testcase_b_tag"] = testcase_b_tag # type: ignore
|
|
231
|
+
|
|
200
232
|
return self._get(
|
|
201
|
-
"/eval-framework/deepeval/evaluation-testcases/compare",
|
|
202
|
-
params=
|
|
203
|
-
"testcase_a_id": testcase_a_id,
|
|
204
|
-
"testcase_b_id": testcase_b_id,
|
|
205
|
-
},
|
|
233
|
+
endpoint="/eval-framework/deepeval/evaluation-testcases/compare",
|
|
234
|
+
params=params,
|
|
206
235
|
raise_exception=raise_exception,
|
|
207
236
|
)
|
|
208
237
|
|
|
@@ -217,7 +246,7 @@ class DeepEvalClient:
|
|
|
217
246
|
Compare the latest evaluation testcases for two labels.
|
|
218
247
|
"""
|
|
219
248
|
return self._get(
|
|
220
|
-
"/eval-framework/deepeval/evaluation-testcases/compare-latest",
|
|
249
|
+
endpoint="/eval-framework/deepeval/evaluation-testcases/compare-latest",
|
|
221
250
|
params={
|
|
222
251
|
"label_a": label_a,
|
|
223
252
|
"label_b": label_b,
|
|
@@ -235,7 +264,7 @@ class DeepEvalClient:
|
|
|
235
264
|
Compare the last two evaluation testcases for a given label.
|
|
236
265
|
"""
|
|
237
266
|
return self._get(
|
|
238
|
-
"/eval-framework/deepeval/evaluation-testcases/compare-last-two",
|
|
267
|
+
endpoint="/eval-framework/deepeval/evaluation-testcases/compare-last-two",
|
|
239
268
|
params={
|
|
240
269
|
"label": label,
|
|
241
270
|
},
|
|
@@ -280,7 +309,7 @@ class DeepEvalClient:
|
|
|
280
309
|
)
|
|
281
310
|
|
|
282
311
|
return self._post(
|
|
283
|
-
"/deepeval/text-eval", config.model_dump(), raise_exception
|
|
312
|
+
endpoint="/deepeval/text-eval", payload=config.model_dump(), raise_exception=raise_exception
|
|
284
313
|
)
|
|
285
314
|
|
|
286
315
|
def text_eval_background(
|
|
@@ -295,7 +324,7 @@ class DeepEvalClient:
|
|
|
295
324
|
payload = EvalConfig.model_construct(
|
|
296
325
|
data=data, metrics=metrics, component=component, version=label
|
|
297
326
|
).model_dump()
|
|
298
|
-
return self._post("/deepeval/text-eval/background", payload, raise_exception)
|
|
327
|
+
return self._post(endpoint="/deepeval/text-eval/background", payload=payload, raise_exception=raise_exception)
|
|
299
328
|
|
|
300
329
|
@overload
|
|
301
330
|
def schema_eval(
|
|
@@ -340,9 +369,9 @@ class DeepEvalClient:
|
|
|
340
369
|
)
|
|
341
370
|
|
|
342
371
|
return self._post(
|
|
343
|
-
"/deepeval/schema-eval",
|
|
344
|
-
config.model_dump(),
|
|
345
|
-
raise_exception,
|
|
372
|
+
endpoint="/deepeval/schema-eval",
|
|
373
|
+
payload=config.model_dump(),
|
|
374
|
+
raise_exception=raise_exception,
|
|
346
375
|
)
|
|
347
376
|
|
|
348
377
|
def schema_eval_background(
|
|
@@ -358,7 +387,7 @@ class DeepEvalClient:
|
|
|
358
387
|
data=data, metrics=metrics, component=component, version=label
|
|
359
388
|
).model_dump()
|
|
360
389
|
return self._post(
|
|
361
|
-
"/deepeval/schema-eval/background", payload, raise_exception
|
|
390
|
+
endpoint="/deepeval/schema-eval/background", payload=payload, raise_exception=raise_exception
|
|
362
391
|
)
|
|
363
392
|
|
|
364
393
|
def maybe_text_eval(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|