judgeval 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/tracer.py +352 -118
- judgeval/constants.py +3 -2
- judgeval/data/datasets/dataset.py +3 -0
- judgeval/data/datasets/eval_dataset_client.py +63 -3
- judgeval/integrations/langgraph.py +1961 -299
- judgeval/judgment_client.py +8 -2
- judgeval/run_evaluation.py +67 -18
- judgeval/scorers/score.py +1 -0
- {judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/METADATA +1 -2
- {judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/RECORD +12 -12
- {judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/WHEEL +0 -0
- {judgeval-0.0.35.dist-info → judgeval-0.0.36.dist-info}/licenses/LICENSE.md +0 -0
judgeval/constants.py
CHANGED
@@ -42,14 +42,15 @@ ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
|
|
42
42
|
JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
|
43
43
|
JUDGMENT_SEQUENCE_EVAL_API_URL = f"{ROOT_API}/evaluate_sequence/"
|
44
44
|
JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
|
45
|
-
|
45
|
+
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
|
46
|
+
JUDGMENT_DATASETS_APPEND_SEQUENCES_API_URL = f"{ROOT_API}/datasets/insert_sequences/"
|
46
47
|
JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
|
47
48
|
JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
|
48
49
|
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
|
49
50
|
JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
|
50
51
|
JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
|
51
52
|
JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
|
52
|
-
JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/
|
53
|
+
JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
|
53
54
|
JUDGMENT_EVAL_DELETE_API_URL = f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
|
54
55
|
JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
|
55
56
|
JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
|
@@ -224,6 +224,9 @@ class EvalDataset:
|
|
224
224
|
self.examples = self.examples + [e]
|
225
225
|
# TODO if we need to add rank, then we need to do it here
|
226
226
|
|
227
|
+
def add_sequence(self, s: Sequence) -> None:
|
228
|
+
self.sequences = self.sequences + [s]
|
229
|
+
|
227
230
|
def save_as(self, file_type: Literal["json", "csv", "yaml"], dir_path: str, save_name: str = None) -> None:
|
228
231
|
"""
|
229
232
|
Saves the dataset as a file. Save only the examples.
|
@@ -6,7 +6,8 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
6
6
|
from judgeval.common.logger import debug, error, warning, info
|
7
7
|
from judgeval.constants import (
|
8
8
|
JUDGMENT_DATASETS_PUSH_API_URL,
|
9
|
-
|
9
|
+
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
10
|
+
JUDGMENT_DATASETS_APPEND_SEQUENCES_API_URL,
|
10
11
|
JUDGMENT_DATASETS_PULL_API_URL,
|
11
12
|
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
12
13
|
JUDGMENT_DATASETS_DELETE_API_URL,
|
@@ -58,6 +59,8 @@ class EvalDatasetClient:
|
|
58
59
|
"dataset_alias": alias,
|
59
60
|
"project_name": project_name,
|
60
61
|
"examples": [e.to_dict() for e in dataset.examples],
|
62
|
+
"sequences": [s.model_dump() for s in dataset.sequences],
|
63
|
+
"is_sequence": len(dataset.sequences) > 0,
|
61
64
|
"overwrite": overwrite,
|
62
65
|
}
|
63
66
|
try:
|
@@ -92,7 +95,7 @@ class EvalDatasetClient:
|
|
92
95
|
return True
|
93
96
|
|
94
97
|
|
95
|
-
def
|
98
|
+
def append_examples(self, alias: str, examples: List[Example], project_name: str) -> bool:
|
96
99
|
debug(f"Appending dataset with alias '{alias}'")
|
97
100
|
"""
|
98
101
|
Appends the dataset to Judgment platform
|
@@ -124,7 +127,7 @@ class EvalDatasetClient:
|
|
124
127
|
}
|
125
128
|
try:
|
126
129
|
response = requests.post(
|
127
|
-
|
130
|
+
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
128
131
|
json=content,
|
129
132
|
headers={
|
130
133
|
"Content-Type": "application/json",
|
@@ -149,6 +152,63 @@ class EvalDatasetClient:
|
|
149
152
|
)
|
150
153
|
return True
|
151
154
|
|
155
|
+
def append_sequences(self, alias: str, sequences: List[Sequence], project_name: str) -> bool:
|
156
|
+
debug(f"Appending dataset with alias '{alias}'")
|
157
|
+
"""
|
158
|
+
Appends the dataset to Judgment platform
|
159
|
+
|
160
|
+
Mock request:
|
161
|
+
dataset = {
|
162
|
+
"alias": alias,
|
163
|
+
"examples": [...],
|
164
|
+
"project_name": project_name
|
165
|
+
} ==>
|
166
|
+
{
|
167
|
+
"_alias": alias,
|
168
|
+
"_id": "..." # ID of the dataset
|
169
|
+
}
|
170
|
+
"""
|
171
|
+
with Progress(
|
172
|
+
SpinnerColumn(style="rgb(106,0,255)"),
|
173
|
+
TextColumn("[progress.description]{task.description}"),
|
174
|
+
transient=False,
|
175
|
+
) as progress:
|
176
|
+
task_id = progress.add_task(
|
177
|
+
f"Appending [rgb(106,0,255)]'{alias}' to Judgment...",
|
178
|
+
total=100,
|
179
|
+
)
|
180
|
+
content = {
|
181
|
+
"dataset_alias": alias,
|
182
|
+
"project_name": project_name,
|
183
|
+
"sequences": [s.model_dump() for s in sequences],
|
184
|
+
}
|
185
|
+
try:
|
186
|
+
response = requests.post(
|
187
|
+
JUDGMENT_DATASETS_APPEND_SEQUENCES_API_URL,
|
188
|
+
json=content,
|
189
|
+
headers={
|
190
|
+
"Content-Type": "application/json",
|
191
|
+
"Authorization": f"Bearer {self.judgment_api_key}",
|
192
|
+
"X-Organization-Id": self.organization_id
|
193
|
+
},
|
194
|
+
verify=True
|
195
|
+
)
|
196
|
+
if response.status_code != 200:
|
197
|
+
error(f"Server error during append: {response.json()}")
|
198
|
+
raise Exception(f"Server error during append: {response.json()}")
|
199
|
+
response.raise_for_status()
|
200
|
+
except requests.exceptions.HTTPError as err:
|
201
|
+
if response.status_code == 422:
|
202
|
+
error(f"Validation error during append: {err.response.json()}")
|
203
|
+
else:
|
204
|
+
error(f"HTTP error during append: {err}")
|
205
|
+
|
206
|
+
progress.update(
|
207
|
+
task_id,
|
208
|
+
description=f"{progress.tasks[task_id].description} [rgb(25,227,160)]Done!)",
|
209
|
+
)
|
210
|
+
return True
|
211
|
+
|
152
212
|
def pull(self, alias: str, project_name: str) -> EvalDataset:
|
153
213
|
debug(f"Pulling dataset with alias '{alias}'")
|
154
214
|
"""
|