rakam-eval-sdk 0.2.0rc2__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/PKG-INFO +4 -2
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/README.md +3 -1
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/pyproject.toml +1 -1
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/cli.py +110 -39
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/client.py +63 -52
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/schema.py +1 -4
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/__init__.py +0 -0
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/decorators.py +0 -0
- {rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/utils/decorator_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: rakam-eval-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Evaluation Framework SDK
|
|
5
5
|
Author: Mohamed Bachar Touil
|
|
6
6
|
License: MIT
|
|
@@ -94,6 +94,7 @@ client = DeepEvalClient(
|
|
|
94
94
|
)
|
|
95
95
|
|
|
96
96
|
```
|
|
97
|
+
|
|
97
98
|
3. Schema Evaluation
|
|
98
99
|
|
|
99
100
|
```python
|
|
@@ -137,6 +138,7 @@ client = DeepEvalClient(
|
|
|
137
138
|
)
|
|
138
139
|
|
|
139
140
|
```
|
|
141
|
+
|
|
140
142
|
## Configuration
|
|
141
143
|
|
|
142
144
|
The client can be configured in multiple ways:
|
|
@@ -150,7 +152,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
|
|
|
150
152
|
### Environment variables
|
|
151
153
|
|
|
152
154
|
```bash
|
|
153
|
-
export
|
|
155
|
+
export EVALFRAMEWORK_URL=http://api
|
|
154
156
|
export EVALFRAMWORK_API_KEY=123
|
|
155
157
|
```
|
|
156
158
|
|
|
@@ -80,6 +80,7 @@ client = DeepEvalClient(
|
|
|
80
80
|
)
|
|
81
81
|
|
|
82
82
|
```
|
|
83
|
+
|
|
83
84
|
3. Schema Evaluation
|
|
84
85
|
|
|
85
86
|
```python
|
|
@@ -123,6 +124,7 @@ client = DeepEvalClient(
|
|
|
123
124
|
)
|
|
124
125
|
|
|
125
126
|
```
|
|
127
|
+
|
|
126
128
|
## Configuration
|
|
127
129
|
|
|
128
130
|
The client can be configured in multiple ways:
|
|
@@ -136,7 +138,7 @@ DeepEvalClient(base_url="http://api", api_token="123")
|
|
|
136
138
|
### Environment variables
|
|
137
139
|
|
|
138
140
|
```bash
|
|
139
|
-
export
|
|
141
|
+
export EVALFRAMEWORK_URL=http://api
|
|
140
142
|
export EVALFRAMWORK_API_KEY=123
|
|
141
143
|
```
|
|
142
144
|
|
|
@@ -6,7 +6,7 @@ import uuid
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from pprint import pprint
|
|
9
|
-
from typing import Any, Optional
|
|
9
|
+
from typing import Any, Dict, Optional, Sequence
|
|
10
10
|
|
|
11
11
|
import typer
|
|
12
12
|
from dotenv import load_dotenv
|
|
@@ -31,9 +31,63 @@ if PROJECT_ROOT not in sys.path:
|
|
|
31
31
|
sys.path.insert(0, PROJECT_ROOT)
|
|
32
32
|
list_app = typer.Typer(help="List resources")
|
|
33
33
|
app.add_typer(list_app, name="list")
|
|
34
|
+
metrics_app = typer.Typer(help="Metrics utilities")
|
|
35
|
+
app.add_typer(metrics_app, name="metrics")
|
|
34
36
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
|
|
38
|
+
@metrics_app.command("list")
|
|
39
|
+
def list_metrics(
|
|
40
|
+
limit: int = typer.Option(
|
|
41
|
+
20,
|
|
42
|
+
"--limit",
|
|
43
|
+
help="Number of testcases to inspect for metrics",
|
|
44
|
+
),
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
List unique metric names found in evaluation testcases.
|
|
48
|
+
"""
|
|
49
|
+
client = DeepEvalClient()
|
|
50
|
+
|
|
51
|
+
testcases = client.list_evaluation_testcases(
|
|
52
|
+
limit=limit,
|
|
53
|
+
offset=0,
|
|
54
|
+
raise_exception=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if not testcases:
|
|
58
|
+
typer.echo("No evaluation testcases found.")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
metric_names: set[str] = set()
|
|
62
|
+
|
|
63
|
+
def collect_metrics(entries: Sequence[Dict] | None):
|
|
64
|
+
if not entries:
|
|
65
|
+
return
|
|
66
|
+
for entry in entries:
|
|
67
|
+
for metric in entry.get("metrics", []) or []:
|
|
68
|
+
name = metric.get("name")
|
|
69
|
+
if name:
|
|
70
|
+
metric_names.add(name)
|
|
71
|
+
|
|
72
|
+
for tc in testcases:
|
|
73
|
+
|
|
74
|
+
collect_metrics(tc.get("result"))
|
|
75
|
+
|
|
76
|
+
if not metric_names:
|
|
77
|
+
typer.echo("No metrics found.")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
typer.echo(
|
|
81
|
+
f"📊 Found {len(metric_names)} unique metrics "
|
|
82
|
+
f"(from latest {limit} testcases)\n"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
for name in sorted(metric_names):
|
|
86
|
+
typer.echo(f"- {name}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@list_app.command("evals")
|
|
90
|
+
def list_evals(
|
|
37
91
|
directory: Path = typer.Argument(
|
|
38
92
|
Path("./eval"),
|
|
39
93
|
exists=True,
|
|
@@ -66,16 +120,10 @@ def list(
|
|
|
66
120
|
typer.echo(f"No @{TARGET_DECORATOR} functions found.")
|
|
67
121
|
|
|
68
122
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
123
|
@list_app.command("runs")
|
|
73
124
|
def list_runs(
|
|
74
125
|
limit: int = typer.Option(20, help="Max number of runs"),
|
|
75
126
|
offset: int = typer.Option(0, help="Pagination offset"),
|
|
76
|
-
status: Optional[str] = typer.Option(
|
|
77
|
-
None, help="Filter by status (running, completed, failed)"
|
|
78
|
-
),
|
|
79
127
|
):
|
|
80
128
|
"""
|
|
81
129
|
List evaluation runs (newest first).
|
|
@@ -92,24 +140,12 @@ def list_runs(
|
|
|
92
140
|
typer.echo("No evaluation runs found.")
|
|
93
141
|
return
|
|
94
142
|
|
|
95
|
-
|
|
96
|
-
if status:
|
|
97
|
-
runs = [
|
|
98
|
-
r for r in runs
|
|
99
|
-
if r.get("result", {}).get("status") == status
|
|
100
|
-
]
|
|
101
|
-
|
|
102
|
-
typer.echo(
|
|
103
|
-
f"[id] "
|
|
104
|
-
f"{'unique_id':<20}"
|
|
105
|
-
f"{'label':<20}"
|
|
106
|
-
f"created_at"
|
|
107
|
-
)
|
|
143
|
+
typer.echo(f"[id] " f"{'tag':<20}" f"{'label':<20}" f"created_at")
|
|
108
144
|
# pretty CLI output
|
|
109
145
|
for run in runs:
|
|
110
146
|
run_id = run.get("id")
|
|
111
147
|
label = run.get("label") or "-"
|
|
112
|
-
uid = run.get("
|
|
148
|
+
uid = run.get("tag") or "-"
|
|
113
149
|
created_at = run.get("created_at")
|
|
114
150
|
|
|
115
151
|
if created_at:
|
|
@@ -121,24 +157,20 @@ def list_runs(
|
|
|
121
157
|
pass
|
|
122
158
|
|
|
123
159
|
typer.echo(
|
|
124
|
-
f"[{run_id}] "
|
|
125
|
-
f"{uid:<20} "
|
|
126
|
-
f"{label:<20} "
|
|
127
|
-
f"{created_at}"
|
|
128
|
-
)
|
|
160
|
+
f"[{run_id}] " f"{uid:<20} " f"{label:<20} " f"{created_at}")
|
|
129
161
|
|
|
130
162
|
|
|
131
|
-
@
|
|
132
|
-
def
|
|
163
|
+
@app.command()
|
|
164
|
+
def show(
|
|
133
165
|
id: Optional[int] = typer.Option(
|
|
134
166
|
None,
|
|
135
167
|
"--id",
|
|
136
168
|
help="Numeric evaluation testcase ID",
|
|
137
169
|
),
|
|
138
|
-
|
|
170
|
+
tag: Optional[str] = typer.Option(
|
|
139
171
|
None,
|
|
140
|
-
"--
|
|
141
|
-
help="Evaluation testcase
|
|
172
|
+
"--tag",
|
|
173
|
+
help="Evaluation testcase tag",
|
|
142
174
|
),
|
|
143
175
|
raw: bool = typer.Option(
|
|
144
176
|
False,
|
|
@@ -147,12 +179,12 @@ def show_testcase(
|
|
|
147
179
|
),
|
|
148
180
|
):
|
|
149
181
|
"""
|
|
150
|
-
Show a single evaluation testcase by ID or
|
|
182
|
+
Show a single evaluation testcase by ID or tag.
|
|
151
183
|
"""
|
|
152
|
-
if not id and not
|
|
184
|
+
if not id and not tag:
|
|
153
185
|
raise typer.BadParameter("You must provide either --id or --uid")
|
|
154
186
|
|
|
155
|
-
if id and
|
|
187
|
+
if id and tag:
|
|
156
188
|
raise typer.BadParameter("Provide only one of --id or --uid")
|
|
157
189
|
|
|
158
190
|
client = DeepEvalClient()
|
|
@@ -161,8 +193,8 @@ def show_testcase(
|
|
|
161
193
|
result = client.get_evaluation_testcase_by_id(id)
|
|
162
194
|
identifier = f"id={id}"
|
|
163
195
|
else:
|
|
164
|
-
result = client.
|
|
165
|
-
identifier = f"
|
|
196
|
+
result = client.get_evaluation_testcase_by_tag(tag)
|
|
197
|
+
identifier = f"tag={tag}"
|
|
166
198
|
|
|
167
199
|
if not result:
|
|
168
200
|
console.print(
|
|
@@ -358,7 +390,7 @@ def _print_and_save(
|
|
|
358
390
|
|
|
359
391
|
|
|
360
392
|
@app.command()
|
|
361
|
-
def
|
|
393
|
+
def compare(
|
|
362
394
|
testcase_a_id: int = typer.Argument(
|
|
363
395
|
...,
|
|
364
396
|
help="ID of the first testcase",
|
|
@@ -519,6 +551,45 @@ def compare_last(
|
|
|
519
551
|
_print_and_save(resp, pretty, out, overwrite)
|
|
520
552
|
|
|
521
553
|
|
|
554
|
+
@list_app.command("tag")
|
|
555
|
+
def update_run_tag(
|
|
556
|
+
run_id: int = typer.Argument(..., help="Evaluation run ID"),
|
|
557
|
+
tag: Optional[str] = typer.Option(
|
|
558
|
+
None,
|
|
559
|
+
"--tag",
|
|
560
|
+
"-t",
|
|
561
|
+
help="Tag to add or update",
|
|
562
|
+
),
|
|
563
|
+
remove: bool = typer.Option(
|
|
564
|
+
False,
|
|
565
|
+
"--remove",
|
|
566
|
+
help="Remove tag from the run",
|
|
567
|
+
),
|
|
568
|
+
):
|
|
569
|
+
"""
|
|
570
|
+
Add, update, or remove a tag from an evaluation run.
|
|
571
|
+
"""
|
|
572
|
+
if not tag and not remove:
|
|
573
|
+
typer.echo("❌ You must provide --tag or --remove")
|
|
574
|
+
raise typer.Exit(code=1)
|
|
575
|
+
|
|
576
|
+
if tag and remove:
|
|
577
|
+
typer.echo("❌ Use either --tag or --remove, not both")
|
|
578
|
+
raise typer.Exit(code=1)
|
|
579
|
+
|
|
580
|
+
client = DeepEvalClient()
|
|
581
|
+
|
|
582
|
+
result = client.update_evaluation_testcase_tag(
|
|
583
|
+
testcase_id=run_id,
|
|
584
|
+
tag=None if remove else tag,
|
|
585
|
+
raise_exception=True,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
action = "removed" if remove else "updated"
|
|
589
|
+
typer.echo(f"✅ Tag {action} successfully")
|
|
590
|
+
typer.echo(f"Run ID: {run_id}")
|
|
591
|
+
typer.echo(f"Tag: {result.get('tag') or '-'}")
|
|
592
|
+
|
|
522
593
|
def main() -> None:
|
|
523
594
|
app()
|
|
524
595
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import Optional, Dict
|
|
1
2
|
import os
|
|
2
3
|
import random
|
|
3
4
|
from typing import Any, Dict, List, Optional, Union, cast, overload
|
|
@@ -12,6 +13,9 @@ from .schema import (
|
|
|
12
13
|
SchemaMetricConfig,
|
|
13
14
|
TextInputItem,
|
|
14
15
|
)
|
|
16
|
+
from typing import Optional, Literal, cast
|
|
17
|
+
|
|
18
|
+
HTTPMethod = Literal["GET", "POST", "PATCH", "PUT", "DELETE"]
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class DeepEvalClient:
|
|
@@ -27,71 +31,47 @@ class DeepEvalClient:
|
|
|
27
31
|
settings_module: Optional[Any] = None, # optional external settings
|
|
28
32
|
timeout: int = 30,
|
|
29
33
|
):
|
|
30
|
-
settings_url = getattr(settings_module, "
|
|
34
|
+
settings_url = getattr(settings_module, "EVALFRAMEWORK_URL", None)
|
|
31
35
|
settings_token = getattr(settings_module, "EVALFRAMWORK_API_KEY", None)
|
|
32
36
|
|
|
33
37
|
raw_url = (
|
|
34
38
|
base_url
|
|
35
39
|
or settings_url
|
|
36
|
-
or os.getenv("
|
|
40
|
+
or os.getenv("EVALFRAMEWORK_URL")
|
|
37
41
|
or "http://localhost:8080"
|
|
38
42
|
)
|
|
39
43
|
self.base_url = raw_url.rstrip("/")
|
|
40
44
|
self.api_token = (
|
|
41
45
|
api_token or settings_token or os.getenv(
|
|
42
|
-
"
|
|
46
|
+
"EVALFRAMEWORK_API_KEY", "")
|
|
43
47
|
)
|
|
44
48
|
self.timeout = timeout
|
|
45
49
|
|
|
46
50
|
def _request(
|
|
47
51
|
self,
|
|
52
|
+
method: HTTPMethod,
|
|
48
53
|
endpoint: str,
|
|
49
|
-
|
|
54
|
+
*,
|
|
55
|
+
json: dict | None = None,
|
|
56
|
+
params: dict | None = None,
|
|
50
57
|
raise_exception: bool = False,
|
|
51
58
|
) -> Optional[dict]:
|
|
52
|
-
"""Internal helper to send POST requests with standard headers and error handling."""
|
|
53
59
|
url = f"{self.base_url}{endpoint}"
|
|
54
|
-
headers = {
|
|
55
|
-
"accept": "application/json",
|
|
56
|
-
"Content-Type": "application/json",
|
|
57
|
-
"X-API-Token": self.api_token,
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
try:
|
|
61
|
-
resp = requests.post(
|
|
62
|
-
url, headers=headers, json=payload, timeout=self.timeout
|
|
63
|
-
)
|
|
64
|
-
if raise_exception:
|
|
65
|
-
resp.raise_for_status()
|
|
66
|
-
except requests.RequestException as e:
|
|
67
|
-
if raise_exception:
|
|
68
|
-
raise
|
|
69
|
-
return {"error": str(e)}
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
return cast(dict, resp.json())
|
|
73
|
-
except ValueError:
|
|
74
|
-
if raise_exception:
|
|
75
|
-
raise
|
|
76
|
-
return {"error": "Invalid JSON response", "raw": resp.text}
|
|
77
60
|
|
|
78
|
-
def _get(
|
|
79
|
-
self,
|
|
80
|
-
endpoint: str,
|
|
81
|
-
params: dict,
|
|
82
|
-
raise_exception: bool = False,
|
|
83
|
-
) -> Optional[dict]:
|
|
84
|
-
"""Internal helper to send GET requests with standard headers and error handling."""
|
|
85
|
-
url = f"{self.base_url}{endpoint}"
|
|
86
61
|
headers = {
|
|
87
62
|
"accept": "application/json",
|
|
88
63
|
"X-API-Token": self.api_token,
|
|
89
64
|
}
|
|
90
65
|
|
|
66
|
+
if json is not None:
|
|
67
|
+
headers["Content-Type"] = "application/json"
|
|
68
|
+
|
|
91
69
|
try:
|
|
92
|
-
resp = requests.
|
|
93
|
-
|
|
70
|
+
resp = requests.request(
|
|
71
|
+
method=method,
|
|
72
|
+
url=url,
|
|
94
73
|
headers=headers,
|
|
74
|
+
json=json,
|
|
95
75
|
params=params,
|
|
96
76
|
timeout=self.timeout,
|
|
97
77
|
)
|
|
@@ -107,7 +87,38 @@ class DeepEvalClient:
|
|
|
107
87
|
except ValueError:
|
|
108
88
|
if raise_exception:
|
|
109
89
|
raise
|
|
110
|
-
return {
|
|
90
|
+
return {
|
|
91
|
+
"error": "Invalid JSON response",
|
|
92
|
+
"raw": resp.text,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
def _get(self, endpoint: str, params: dict, **kw):
|
|
96
|
+
return self._request("GET", endpoint, params=params, **kw)
|
|
97
|
+
|
|
98
|
+
def _post(self, endpoint: str, payload: dict, **kw):
|
|
99
|
+
return self._request("POST", endpoint, json=payload, **kw)
|
|
100
|
+
|
|
101
|
+
def _patch(self, endpoint: str, payload: dict, **kw):
|
|
102
|
+
return self._request("PATCH", endpoint, json=payload, **kw)
|
|
103
|
+
|
|
104
|
+
def update_evaluation_testcase_tag(
|
|
105
|
+
self,
|
|
106
|
+
*,
|
|
107
|
+
testcase_id: int,
|
|
108
|
+
tag: Optional[str],
|
|
109
|
+
raise_exception: bool = False,
|
|
110
|
+
) -> Optional[Dict]:
|
|
111
|
+
"""
|
|
112
|
+
Add, update, or remove a tag from an evaluation testcase.
|
|
113
|
+
|
|
114
|
+
- tag="smoke" → add / update tag
|
|
115
|
+
- tag=None → remove tag
|
|
116
|
+
"""
|
|
117
|
+
return self._patch(
|
|
118
|
+
f"/evaluation-testcases/{testcase_id}/tag",
|
|
119
|
+
payload={"tag": tag},
|
|
120
|
+
raise_exception=raise_exception,
|
|
121
|
+
)
|
|
111
122
|
|
|
112
123
|
def list_evaluation_testcases(
|
|
113
124
|
self,
|
|
@@ -144,17 +155,17 @@ class DeepEvalClient:
|
|
|
144
155
|
raise_exception=raise_exception,
|
|
145
156
|
)
|
|
146
157
|
|
|
147
|
-
def
|
|
158
|
+
def get_evaluation_testcase_by_tag(
|
|
148
159
|
self,
|
|
149
|
-
|
|
160
|
+
tag: str,
|
|
150
161
|
*,
|
|
151
162
|
raise_exception: bool = False,
|
|
152
163
|
) -> Optional[Dict]:
|
|
153
164
|
"""
|
|
154
|
-
Fetch a single evaluation testcase by
|
|
165
|
+
Fetch a single evaluation testcase by tag.
|
|
155
166
|
"""
|
|
156
167
|
return self._get(
|
|
157
|
-
f"/eval-framework/deepeval/
|
|
168
|
+
f"/eval-framework/deepeval/tag/{tag}",
|
|
158
169
|
params={},
|
|
159
170
|
raise_exception=raise_exception,
|
|
160
171
|
)
|
|
@@ -163,18 +174,18 @@ class DeepEvalClient:
|
|
|
163
174
|
self,
|
|
164
175
|
*,
|
|
165
176
|
id: Optional[int] = None,
|
|
166
|
-
|
|
177
|
+
tag: Optional[str] = None,
|
|
167
178
|
raise_exception: bool = False,
|
|
168
179
|
) -> Optional[Dict]:
|
|
169
180
|
if id is not None:
|
|
170
181
|
return self.get_evaluation_testcase_by_id(
|
|
171
182
|
id, raise_exception=raise_exception
|
|
172
183
|
)
|
|
173
|
-
if
|
|
174
|
-
return self.
|
|
175
|
-
|
|
184
|
+
if tag is not None:
|
|
185
|
+
return self.get_evaluation_testcase_by_tag(
|
|
186
|
+
tag, raise_exception=raise_exception
|
|
176
187
|
)
|
|
177
|
-
raise ValueError("Either id or
|
|
188
|
+
raise ValueError("Either id or tag must be provided")
|
|
178
189
|
|
|
179
190
|
def compare_testcases(
|
|
180
191
|
self,
|
|
@@ -268,7 +279,7 @@ class DeepEvalClient:
|
|
|
268
279
|
label=label,
|
|
269
280
|
)
|
|
270
281
|
|
|
271
|
-
return self.
|
|
282
|
+
return self._post(
|
|
272
283
|
"/deepeval/text-eval", config.model_dump(), raise_exception
|
|
273
284
|
)
|
|
274
285
|
|
|
@@ -284,7 +295,7 @@ class DeepEvalClient:
|
|
|
284
295
|
payload = EvalConfig.model_construct(
|
|
285
296
|
data=data, metrics=metrics, component=component, version=label
|
|
286
297
|
).model_dump()
|
|
287
|
-
return self.
|
|
298
|
+
return self._post("/deepeval/text-eval/background", payload, raise_exception)
|
|
288
299
|
|
|
289
300
|
@overload
|
|
290
301
|
def schema_eval(
|
|
@@ -328,7 +339,7 @@ class DeepEvalClient:
|
|
|
328
339
|
label=label,
|
|
329
340
|
)
|
|
330
341
|
|
|
331
|
-
return self.
|
|
342
|
+
return self._post(
|
|
332
343
|
"/deepeval/schema-eval",
|
|
333
344
|
config.model_dump(),
|
|
334
345
|
raise_exception,
|
|
@@ -346,7 +357,7 @@ class DeepEvalClient:
|
|
|
346
357
|
payload = SchemaEvalConfig.model_construct(
|
|
347
358
|
data=data, metrics=metrics, component=component, version=label
|
|
348
359
|
).model_dump()
|
|
349
|
-
return self.
|
|
360
|
+
return self._post(
|
|
350
361
|
"/deepeval/schema-eval/background", payload, raise_exception
|
|
351
362
|
)
|
|
352
363
|
|
|
@@ -94,8 +94,7 @@ MetricConfig = Annotated[
|
|
|
94
94
|
]
|
|
95
95
|
|
|
96
96
|
SchemaMetricConfig = Annotated[
|
|
97
|
-
Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
|
|
98
|
-
discriminator="type")
|
|
97
|
+
Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(discriminator="type")
|
|
99
98
|
]
|
|
100
99
|
|
|
101
100
|
|
|
@@ -118,7 +117,6 @@ class SchemaInputItem(InputItem):
|
|
|
118
117
|
|
|
119
118
|
class EvalConfig(BaseModel):
|
|
120
119
|
__eval_config__ = "text_eval"
|
|
121
|
-
unique_id: Union[str, None] = None
|
|
122
120
|
component: str = "unknown"
|
|
123
121
|
label: Union[str, None] = None
|
|
124
122
|
data: List[TextInputItem]
|
|
@@ -128,7 +126,6 @@ class EvalConfig(BaseModel):
|
|
|
128
126
|
class SchemaEvalConfig(BaseModel):
|
|
129
127
|
__eval_config__ = "schema_eval"
|
|
130
128
|
component: str = "unknown"
|
|
131
|
-
unique_id: Union[str, None] = None
|
|
132
129
|
label: Union[str, None] = None
|
|
133
130
|
data: List[SchemaInputItem]
|
|
134
131
|
metrics: List[SchemaMetricConfig] = Field(default_factory=list)
|
|
File without changes
|
|
File without changes
|
{rakam_eval_sdk-0.2.0rc2 → rakam_eval_sdk-0.2.2}/src/rakam_eval_sdk/utils/decorator_utils.py
RENAMED
|
File without changes
|