judgeval 0.0.54__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/api/__init__.py +3 -0
- judgeval/common/api/api.py +352 -0
- judgeval/common/api/constants.py +165 -0
- judgeval/common/storage/__init__.py +6 -0
- judgeval/common/tracer/__init__.py +31 -0
- judgeval/common/tracer/constants.py +22 -0
- judgeval/common/tracer/core.py +1916 -0
- judgeval/common/tracer/otel_exporter.py +108 -0
- judgeval/common/tracer/otel_span_processor.py +234 -0
- judgeval/common/tracer/span_processor.py +37 -0
- judgeval/common/tracer/span_transformer.py +211 -0
- judgeval/common/tracer/trace_manager.py +92 -0
- judgeval/common/utils.py +2 -2
- judgeval/constants.py +3 -30
- judgeval/data/datasets/eval_dataset_client.py +29 -156
- judgeval/data/judgment_types.py +4 -12
- judgeval/data/result.py +1 -1
- judgeval/data/scorer_data.py +2 -2
- judgeval/data/scripts/openapi_transform.py +1 -1
- judgeval/data/trace.py +66 -1
- judgeval/data/trace_run.py +0 -3
- judgeval/evaluation_run.py +0 -2
- judgeval/integrations/langgraph.py +43 -164
- judgeval/judgment_client.py +17 -211
- judgeval/run_evaluation.py +209 -611
- judgeval/scorers/__init__.py +2 -6
- judgeval/scorers/base_scorer.py +4 -23
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +3 -3
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +215 -0
- judgeval/scorers/score.py +2 -1
- judgeval/scorers/utils.py +1 -13
- judgeval/utils/requests.py +21 -0
- judgeval-0.1.0.dist-info/METADATA +202 -0
- {judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/RECORD +37 -29
- judgeval/common/tracer.py +0 -3215
- judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -73
- judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -53
- judgeval-0.0.54.dist-info/METADATA +0 -1384
- /judgeval/common/{s3_storage.py → storage/s3_storage.py} +0 -0
- {judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/WHEEL +0 -0
- {judgeval-0.0.54.dist-info → judgeval-0.1.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,352 @@
|
|
1
|
+
from typing import Literal, List, Dict, Any
|
2
|
+
from requests import exceptions
|
3
|
+
from judgeval.common.api.constants import (
|
4
|
+
JUDGMENT_TRACES_FETCH_API_URL,
|
5
|
+
JUDGMENT_TRACES_UPSERT_API_URL,
|
6
|
+
JUDGMENT_TRACES_DELETE_API_URL,
|
7
|
+
JUDGMENT_TRACES_SPANS_BATCH_API_URL,
|
8
|
+
JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL,
|
9
|
+
JUDGMENT_DATASETS_PUSH_API_URL,
|
10
|
+
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL,
|
11
|
+
JUDGMENT_DATASETS_PULL_API_URL,
|
12
|
+
JUDGMENT_DATASETS_DELETE_API_URL,
|
13
|
+
JUDGMENT_DATASETS_PROJECT_STATS_API_URL,
|
14
|
+
JUDGMENT_PROJECT_DELETE_API_URL,
|
15
|
+
JUDGMENT_PROJECT_CREATE_API_URL,
|
16
|
+
JUDGMENT_EVAL_API_URL,
|
17
|
+
JUDGMENT_TRACE_EVAL_API_URL,
|
18
|
+
JUDGMENT_EVAL_LOG_API_URL,
|
19
|
+
JUDGMENT_EVAL_FETCH_API_URL,
|
20
|
+
JUDGMENT_EVAL_DELETE_API_URL,
|
21
|
+
JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
|
22
|
+
JUDGMENT_GET_EVAL_STATUS_API_URL,
|
23
|
+
JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL,
|
24
|
+
JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL,
|
25
|
+
JUDGMENT_SCORER_SAVE_API_URL,
|
26
|
+
JUDGMENT_SCORER_FETCH_API_URL,
|
27
|
+
JUDGMENT_SCORER_EXISTS_API_URL,
|
28
|
+
)
|
29
|
+
from judgeval.common.api.constants import (
|
30
|
+
TraceFetchPayload,
|
31
|
+
TraceDeletePayload,
|
32
|
+
SpansBatchPayload,
|
33
|
+
EvaluationEntryResponse,
|
34
|
+
EvaluationRunsBatchPayload,
|
35
|
+
DatasetPushPayload,
|
36
|
+
DatasetAppendPayload,
|
37
|
+
DatasetPullPayload,
|
38
|
+
DatasetDeletePayload,
|
39
|
+
DatasetStatsPayload,
|
40
|
+
ProjectCreatePayload,
|
41
|
+
ProjectDeletePayload,
|
42
|
+
EvalRunRequestBody,
|
43
|
+
DeleteEvalRunRequestBody,
|
44
|
+
EvalLogPayload,
|
45
|
+
EvalStatusPayload,
|
46
|
+
CheckExperimentTypePayload,
|
47
|
+
EvalRunNameExistsPayload,
|
48
|
+
ScorerSavePayload,
|
49
|
+
ScorerFetchPayload,
|
50
|
+
ScorerExistsPayload,
|
51
|
+
)
|
52
|
+
from judgeval.utils.requests import requests
|
53
|
+
|
54
|
+
|
55
|
+
class JudgmentAPIException(exceptions.HTTPError):
|
56
|
+
"""
|
57
|
+
Exception raised when an error occurs while executing a Judgment API request.
|
58
|
+
Extends requests.exceptions.HTTPError to provide access to the response object.
|
59
|
+
"""
|
60
|
+
|
61
|
+
def __init__(self, message: str, response=None, request=None):
|
62
|
+
super().__init__(message, response=response, request=request)
|
63
|
+
self.message = message
|
64
|
+
self.response = response
|
65
|
+
self.request = request
|
66
|
+
|
67
|
+
@property
|
68
|
+
def status_code(self) -> int | None:
|
69
|
+
"""Get the HTTP status code from the response."""
|
70
|
+
return self.response.status_code if self.response else None
|
71
|
+
|
72
|
+
@property
|
73
|
+
def response_json(self) -> Dict[str, Any]:
|
74
|
+
"""Get the JSON response body."""
|
75
|
+
try:
|
76
|
+
return self.response.json() if self.response else {}
|
77
|
+
except (ValueError, AttributeError):
|
78
|
+
return {}
|
79
|
+
|
80
|
+
@property
|
81
|
+
def error_detail(self) -> str:
|
82
|
+
"""Get the error detail from the response JSON."""
|
83
|
+
return self.response_json.get("detail", "An unknown error occurred.")
|
84
|
+
|
85
|
+
|
86
|
+
class JudgmentApiClient:
|
87
|
+
def __init__(self, api_key: str, organization_id: str):
|
88
|
+
self.api_key = api_key
|
89
|
+
self.organization_id = organization_id
|
90
|
+
|
91
|
+
def _do_request(
|
92
|
+
self,
|
93
|
+
method: Literal["POST", "PATCH", "GET", "DELETE"],
|
94
|
+
url: str,
|
95
|
+
payload: Any,
|
96
|
+
) -> Any:
|
97
|
+
if method == "GET":
|
98
|
+
r = requests.request(
|
99
|
+
method,
|
100
|
+
url,
|
101
|
+
params=payload,
|
102
|
+
headers=self._headers(),
|
103
|
+
**self._request_kwargs(),
|
104
|
+
)
|
105
|
+
else:
|
106
|
+
r = requests.request(
|
107
|
+
method,
|
108
|
+
url,
|
109
|
+
data=self._serialize(payload),
|
110
|
+
headers=self._headers(),
|
111
|
+
**self._request_kwargs(),
|
112
|
+
)
|
113
|
+
|
114
|
+
try:
|
115
|
+
r.raise_for_status()
|
116
|
+
except exceptions.HTTPError as e:
|
117
|
+
raise JudgmentAPIException(
|
118
|
+
f"HTTP {r.status_code}: {r.reason}", response=r, request=e.request
|
119
|
+
)
|
120
|
+
|
121
|
+
return r.json()
|
122
|
+
|
123
|
+
def send_spans_batch(self, spans: List[Dict[str, Any]]):
|
124
|
+
payload: SpansBatchPayload = {
|
125
|
+
"spans": spans,
|
126
|
+
"organization_id": self.organization_id,
|
127
|
+
}
|
128
|
+
|
129
|
+
return self._do_request("POST", JUDGMENT_TRACES_SPANS_BATCH_API_URL, payload)
|
130
|
+
|
131
|
+
def send_evaluation_runs_batch(
|
132
|
+
self, evaluation_entries: List[EvaluationEntryResponse]
|
133
|
+
):
|
134
|
+
payload: EvaluationRunsBatchPayload = {
|
135
|
+
"organization_id": self.organization_id,
|
136
|
+
"evaluation_entries": evaluation_entries,
|
137
|
+
}
|
138
|
+
|
139
|
+
return self._do_request(
|
140
|
+
"POST", JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL, payload
|
141
|
+
)
|
142
|
+
|
143
|
+
def fetch_trace(self, trace_id: str):
|
144
|
+
payload: TraceFetchPayload = {"trace_id": trace_id}
|
145
|
+
return self._do_request("POST", JUDGMENT_TRACES_FETCH_API_URL, payload)
|
146
|
+
|
147
|
+
def upsert_trace(self, trace_data: Dict[str, Any]):
|
148
|
+
return self._do_request("POST", JUDGMENT_TRACES_UPSERT_API_URL, trace_data)
|
149
|
+
|
150
|
+
def delete_trace(self, trace_id: str):
|
151
|
+
payload: TraceDeletePayload = {"trace_ids": [trace_id]}
|
152
|
+
return self._do_request("DELETE", JUDGMENT_TRACES_DELETE_API_URL, payload)
|
153
|
+
|
154
|
+
def delete_traces(self, trace_ids: List[str]):
|
155
|
+
payload: TraceDeletePayload = {"trace_ids": trace_ids}
|
156
|
+
return self._do_request("DELETE", JUDGMENT_TRACES_DELETE_API_URL, payload)
|
157
|
+
|
158
|
+
def delete_project(self, project_name: str):
|
159
|
+
payload: ProjectDeletePayload = {"project_name": project_name}
|
160
|
+
return self._do_request("DELETE", JUDGMENT_PROJECT_DELETE_API_URL, payload)
|
161
|
+
|
162
|
+
def create_project(self, project_name: str):
|
163
|
+
payload: ProjectCreatePayload = {"project_name": project_name}
|
164
|
+
return self._do_request("POST", JUDGMENT_PROJECT_CREATE_API_URL, payload)
|
165
|
+
|
166
|
+
def run_evaluation(self, evaluation_run: Dict[str, Any]):
|
167
|
+
return self._do_request("POST", JUDGMENT_EVAL_API_URL, evaluation_run)
|
168
|
+
|
169
|
+
def run_trace_evaluation(self, trace_run: Dict[str, Any]):
|
170
|
+
return self._do_request("POST", JUDGMENT_TRACE_EVAL_API_URL, trace_run)
|
171
|
+
|
172
|
+
def log_evaluation_results(
|
173
|
+
self, results: List[Dict[str, Any]], run: Dict[str, Any]
|
174
|
+
):
|
175
|
+
payload: EvalLogPayload = {"results": results, "run": run}
|
176
|
+
return self._do_request("POST", JUDGMENT_EVAL_LOG_API_URL, payload)
|
177
|
+
|
178
|
+
def fetch_evaluation_results(self, project_name: str, eval_name: str):
|
179
|
+
payload: EvalRunRequestBody = {
|
180
|
+
"project_name": project_name,
|
181
|
+
"eval_name": eval_name,
|
182
|
+
}
|
183
|
+
return self._do_request("POST", JUDGMENT_EVAL_FETCH_API_URL, payload)
|
184
|
+
|
185
|
+
def delete_evaluation_results(self, project_name: str, eval_names: List[str]):
|
186
|
+
payload: DeleteEvalRunRequestBody = {
|
187
|
+
"project_name": project_name,
|
188
|
+
"eval_names": eval_names,
|
189
|
+
"judgment_api_key": self.api_key,
|
190
|
+
}
|
191
|
+
return self._do_request("POST", JUDGMENT_EVAL_DELETE_API_URL, payload)
|
192
|
+
|
193
|
+
def add_to_evaluation_queue(self, payload: Dict[str, Any]):
|
194
|
+
return self._do_request("POST", JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, payload)
|
195
|
+
|
196
|
+
def get_evaluation_status(self, eval_name: str, project_name: str):
|
197
|
+
payload: EvalStatusPayload = {
|
198
|
+
"eval_name": eval_name,
|
199
|
+
"project_name": project_name,
|
200
|
+
"judgment_api_key": self.api_key,
|
201
|
+
}
|
202
|
+
return self._do_request("GET", JUDGMENT_GET_EVAL_STATUS_API_URL, payload)
|
203
|
+
|
204
|
+
def check_experiment_type(self, eval_name: str, project_name: str, is_trace: bool):
|
205
|
+
payload: CheckExperimentTypePayload = {
|
206
|
+
"eval_name": eval_name,
|
207
|
+
"project_name": project_name,
|
208
|
+
"judgment_api_key": self.api_key,
|
209
|
+
"is_trace": is_trace,
|
210
|
+
}
|
211
|
+
return self._do_request("POST", JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL, payload)
|
212
|
+
|
213
|
+
def check_eval_run_name_exists(self, eval_name: str, project_name: str):
|
214
|
+
payload: EvalRunNameExistsPayload = {
|
215
|
+
"eval_name": eval_name,
|
216
|
+
"project_name": project_name,
|
217
|
+
"judgment_api_key": self.api_key,
|
218
|
+
}
|
219
|
+
return self._do_request("POST", JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL, payload)
|
220
|
+
|
221
|
+
def save_scorer(self, name: str, prompt: str, options: dict):
|
222
|
+
payload: ScorerSavePayload = {
|
223
|
+
"name": name,
|
224
|
+
"prompt": prompt,
|
225
|
+
"options": options,
|
226
|
+
}
|
227
|
+
try:
|
228
|
+
return self._do_request("POST", JUDGMENT_SCORER_SAVE_API_URL, payload)
|
229
|
+
except JudgmentAPIException as e:
|
230
|
+
if e.status_code == 500:
|
231
|
+
raise JudgmentAPIException(
|
232
|
+
f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
|
233
|
+
response=e.response,
|
234
|
+
request=e.request,
|
235
|
+
)
|
236
|
+
raise JudgmentAPIException(
|
237
|
+
f"Failed to save classifier scorer: {e.error_detail}",
|
238
|
+
response=e.response,
|
239
|
+
request=e.request,
|
240
|
+
)
|
241
|
+
|
242
|
+
def fetch_scorer(self, name: str):
|
243
|
+
payload: ScorerFetchPayload = {"name": name}
|
244
|
+
try:
|
245
|
+
return self._do_request("POST", JUDGMENT_SCORER_FETCH_API_URL, payload)
|
246
|
+
except JudgmentAPIException as e:
|
247
|
+
if e.status_code == 500:
|
248
|
+
raise JudgmentAPIException(
|
249
|
+
f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
|
250
|
+
response=e.response,
|
251
|
+
request=e.request,
|
252
|
+
)
|
253
|
+
raise JudgmentAPIException(
|
254
|
+
f"Failed to fetch classifier scorer '{name}': {e.error_detail}",
|
255
|
+
response=e.response,
|
256
|
+
request=e.request,
|
257
|
+
)
|
258
|
+
|
259
|
+
def scorer_exists(self, name: str):
|
260
|
+
payload: ScorerExistsPayload = {"name": name}
|
261
|
+
try:
|
262
|
+
return self._do_request("POST", JUDGMENT_SCORER_EXISTS_API_URL, payload)
|
263
|
+
except JudgmentAPIException as e:
|
264
|
+
if e.status_code == 500:
|
265
|
+
raise JudgmentAPIException(
|
266
|
+
f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.error_detail}",
|
267
|
+
response=e.response,
|
268
|
+
request=e.request,
|
269
|
+
)
|
270
|
+
raise JudgmentAPIException(
|
271
|
+
f"Failed to check if scorer exists: {e.error_detail}",
|
272
|
+
response=e.response,
|
273
|
+
request=e.request,
|
274
|
+
)
|
275
|
+
|
276
|
+
def push_dataset(
|
277
|
+
self,
|
278
|
+
dataset_alias: str,
|
279
|
+
project_name: str,
|
280
|
+
examples: List[Dict[str, Any]],
|
281
|
+
traces: List[Dict[str, Any]],
|
282
|
+
overwrite: bool,
|
283
|
+
):
|
284
|
+
payload: DatasetPushPayload = {
|
285
|
+
"dataset_alias": dataset_alias,
|
286
|
+
"project_name": project_name,
|
287
|
+
"examples": examples,
|
288
|
+
"traces": traces,
|
289
|
+
"overwrite": overwrite,
|
290
|
+
}
|
291
|
+
return self._do_request("POST", JUDGMENT_DATASETS_PUSH_API_URL, payload)
|
292
|
+
|
293
|
+
def append_examples(
|
294
|
+
self, dataset_alias: str, project_name: str, examples: List[Dict[str, Any]]
|
295
|
+
):
|
296
|
+
payload: DatasetAppendPayload = {
|
297
|
+
"dataset_alias": dataset_alias,
|
298
|
+
"project_name": project_name,
|
299
|
+
"examples": examples,
|
300
|
+
}
|
301
|
+
return self._do_request(
|
302
|
+
"POST", JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL, payload
|
303
|
+
)
|
304
|
+
|
305
|
+
def pull_dataset(self, dataset_alias: str, project_name: str):
|
306
|
+
payload: DatasetPullPayload = {
|
307
|
+
"dataset_alias": dataset_alias,
|
308
|
+
"project_name": project_name,
|
309
|
+
}
|
310
|
+
return self._do_request("POST", JUDGMENT_DATASETS_PULL_API_URL, payload)
|
311
|
+
|
312
|
+
def delete_dataset(self, dataset_alias: str, project_name: str):
|
313
|
+
payload: DatasetDeletePayload = {
|
314
|
+
"dataset_alias": dataset_alias,
|
315
|
+
"project_name": project_name,
|
316
|
+
}
|
317
|
+
return self._do_request("POST", JUDGMENT_DATASETS_DELETE_API_URL, payload)
|
318
|
+
|
319
|
+
def get_project_dataset_stats(self, project_name: str):
|
320
|
+
payload: DatasetStatsPayload = {"project_name": project_name}
|
321
|
+
return self._do_request(
|
322
|
+
"POST", JUDGMENT_DATASETS_PROJECT_STATS_API_URL, payload
|
323
|
+
)
|
324
|
+
|
325
|
+
def _headers(self) -> Dict[str, str]:
|
326
|
+
return {
|
327
|
+
"Content-Type": "application/json",
|
328
|
+
"Authorization": f"Bearer {self.api_key}",
|
329
|
+
"X-Organization-Id": self.organization_id,
|
330
|
+
}
|
331
|
+
|
332
|
+
def _request_kwargs(self):
|
333
|
+
# NOTE: We may want to configure custom kwargs that different requests may need.
|
334
|
+
# For this purpose we can store that as a property of self, and return the appropriate kwargs from this method.
|
335
|
+
return {
|
336
|
+
"verify": True,
|
337
|
+
"timeout": 30,
|
338
|
+
}
|
339
|
+
|
340
|
+
def _serialize(self, data: Any) -> str:
|
341
|
+
def fallback_encoder(obj):
|
342
|
+
try:
|
343
|
+
return repr(obj)
|
344
|
+
except Exception:
|
345
|
+
try:
|
346
|
+
return str(obj)
|
347
|
+
except Exception as e:
|
348
|
+
return f"<Unserializable object of type {type(obj).__name__}: {e}>"
|
349
|
+
|
350
|
+
import json
|
351
|
+
|
352
|
+
return json.dumps(data, default=fallback_encoder)
|
@@ -0,0 +1,165 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Optional, TypedDict, List, Dict, Any
|
3
|
+
|
4
|
+
ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
|
5
|
+
|
6
|
+
# Traces API
|
7
|
+
JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
|
8
|
+
JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
|
9
|
+
JUDGMENT_TRACES_UPSERT_API_URL = f"{ROOT_API}/traces/upsert/"
|
10
|
+
JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
|
11
|
+
JUDGMENT_TRACES_SPANS_BATCH_API_URL = f"{ROOT_API}/traces/spans/batch/"
|
12
|
+
JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = (
|
13
|
+
f"{ROOT_API}/traces/evaluation_runs/batch/"
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
class TraceFetchPayload(TypedDict):
|
18
|
+
trace_id: str
|
19
|
+
|
20
|
+
|
21
|
+
class TraceDeletePayload(TypedDict):
|
22
|
+
trace_ids: List[str]
|
23
|
+
|
24
|
+
|
25
|
+
class SpansBatchPayload(TypedDict):
|
26
|
+
spans: List[Dict[str, Any]]
|
27
|
+
organization_id: str
|
28
|
+
|
29
|
+
|
30
|
+
class EvaluationEntryResponse(TypedDict):
|
31
|
+
evaluation_run: Dict[str, Any]
|
32
|
+
associated_span: Dict[str, Any]
|
33
|
+
queued_at: Optional[float]
|
34
|
+
|
35
|
+
|
36
|
+
class EvaluationRunsBatchPayload(TypedDict):
|
37
|
+
organization_id: str
|
38
|
+
evaluation_entries: List[EvaluationEntryResponse]
|
39
|
+
|
40
|
+
|
41
|
+
# Evaluation API
|
42
|
+
JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
|
43
|
+
JUDGMENT_TRACE_EVAL_API_URL = f"{ROOT_API}/evaluate_trace/"
|
44
|
+
JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
|
45
|
+
JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
|
46
|
+
JUDGMENT_EVAL_DELETE_API_URL = (
|
47
|
+
f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
|
48
|
+
)
|
49
|
+
JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
|
50
|
+
JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
|
51
|
+
JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
|
52
|
+
JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL = f"{ROOT_API}/check_experiment_type/"
|
53
|
+
JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL = f"{ROOT_API}/eval-run-name-exists/"
|
54
|
+
|
55
|
+
|
56
|
+
# Evaluation API Payloads
|
57
|
+
class EvalRunRequestBody(TypedDict):
|
58
|
+
eval_name: str
|
59
|
+
project_name: str
|
60
|
+
judgment_api_key: str
|
61
|
+
|
62
|
+
|
63
|
+
class DeleteEvalRunRequestBody(TypedDict):
|
64
|
+
eval_names: List[str]
|
65
|
+
project_name: str
|
66
|
+
judgment_api_key: str
|
67
|
+
|
68
|
+
|
69
|
+
class EvalLogPayload(TypedDict):
|
70
|
+
results: List[Dict[str, Any]]
|
71
|
+
run: Dict[str, Any]
|
72
|
+
|
73
|
+
|
74
|
+
class EvalStatusPayload(TypedDict):
|
75
|
+
eval_name: str
|
76
|
+
project_name: str
|
77
|
+
judgment_api_key: str
|
78
|
+
|
79
|
+
|
80
|
+
class CheckExperimentTypePayload(TypedDict):
|
81
|
+
eval_name: str
|
82
|
+
project_name: str
|
83
|
+
judgment_api_key: str
|
84
|
+
is_trace: bool
|
85
|
+
|
86
|
+
|
87
|
+
class EvalRunNameExistsPayload(TypedDict):
|
88
|
+
eval_name: str
|
89
|
+
project_name: str
|
90
|
+
judgment_api_key: str
|
91
|
+
|
92
|
+
|
93
|
+
# Datasets API
|
94
|
+
JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
|
95
|
+
JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
|
96
|
+
JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
|
97
|
+
JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
|
98
|
+
JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
|
99
|
+
JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
|
100
|
+
JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
|
101
|
+
|
102
|
+
|
103
|
+
class DatasetPushPayload(TypedDict):
|
104
|
+
dataset_alias: str
|
105
|
+
project_name: str
|
106
|
+
examples: List[Dict[str, Any]]
|
107
|
+
traces: List[Dict[str, Any]]
|
108
|
+
overwrite: bool
|
109
|
+
|
110
|
+
|
111
|
+
class DatasetAppendPayload(TypedDict):
|
112
|
+
dataset_alias: str
|
113
|
+
project_name: str
|
114
|
+
examples: List[Dict[str, Any]]
|
115
|
+
|
116
|
+
|
117
|
+
class DatasetPullPayload(TypedDict):
|
118
|
+
dataset_alias: str
|
119
|
+
project_name: str
|
120
|
+
|
121
|
+
|
122
|
+
class DatasetDeletePayload(TypedDict):
|
123
|
+
dataset_alias: str
|
124
|
+
project_name: str
|
125
|
+
|
126
|
+
|
127
|
+
class DatasetExportPayload(TypedDict):
|
128
|
+
dataset_alias: str
|
129
|
+
project_name: str
|
130
|
+
|
131
|
+
|
132
|
+
class DatasetStatsPayload(TypedDict):
|
133
|
+
project_name: str
|
134
|
+
|
135
|
+
|
136
|
+
# Projects API
|
137
|
+
JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
|
138
|
+
JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
|
139
|
+
|
140
|
+
|
141
|
+
class ProjectDeletePayload(TypedDict):
|
142
|
+
project_list: List[str]
|
143
|
+
|
144
|
+
|
145
|
+
class ProjectCreatePayload(TypedDict):
|
146
|
+
project_name: str
|
147
|
+
|
148
|
+
|
149
|
+
JUDGMENT_SCORER_SAVE_API_URL = f"{ROOT_API}/save_scorer/"
|
150
|
+
JUDGMENT_SCORER_FETCH_API_URL = f"{ROOT_API}/fetch_scorer/"
|
151
|
+
JUDGMENT_SCORER_EXISTS_API_URL = f"{ROOT_API}/scorer_exists/"
|
152
|
+
|
153
|
+
|
154
|
+
class ScorerSavePayload(TypedDict):
|
155
|
+
name: str
|
156
|
+
prompt: str
|
157
|
+
options: dict
|
158
|
+
|
159
|
+
|
160
|
+
class ScorerFetchPayload(TypedDict):
|
161
|
+
name: str
|
162
|
+
|
163
|
+
|
164
|
+
class ScorerExistsPayload(TypedDict):
|
165
|
+
name: str
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from judgeval.common.tracer.core import (
|
2
|
+
TraceClient,
|
3
|
+
_DeepTracer,
|
4
|
+
Tracer,
|
5
|
+
wrap,
|
6
|
+
current_span_var,
|
7
|
+
current_trace_var,
|
8
|
+
SpanType,
|
9
|
+
cost_per_token,
|
10
|
+
)
|
11
|
+
from judgeval.common.tracer.otel_exporter import JudgmentAPISpanExporter
|
12
|
+
from judgeval.common.tracer.otel_span_processor import JudgmentSpanProcessor
|
13
|
+
from judgeval.common.tracer.span_processor import SpanProcessorBase
|
14
|
+
from judgeval.common.tracer.trace_manager import TraceManagerClient
|
15
|
+
from judgeval.data import TraceSpan
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"_DeepTracer",
|
19
|
+
"TraceClient",
|
20
|
+
"Tracer",
|
21
|
+
"wrap",
|
22
|
+
"current_span_var",
|
23
|
+
"current_trace_var",
|
24
|
+
"TraceManagerClient",
|
25
|
+
"JudgmentAPISpanExporter",
|
26
|
+
"JudgmentSpanProcessor",
|
27
|
+
"SpanProcessorBase",
|
28
|
+
"SpanType",
|
29
|
+
"cost_per_token",
|
30
|
+
"TraceSpan",
|
31
|
+
]
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import os
|
2
|
+
import site
|
3
|
+
import sysconfig
|
4
|
+
|
5
|
+
|
6
|
+
# NOTE: This builds once, can be tweaked if we are missing / capturing other unncessary modules
|
7
|
+
# @link https://docs.python.org/3.13/library/sysconfig.html
|
8
|
+
_TRACE_FILEPATH_BLOCKLIST = tuple(
|
9
|
+
os.path.realpath(p) + os.sep
|
10
|
+
for p in {
|
11
|
+
sysconfig.get_paths()["stdlib"],
|
12
|
+
sysconfig.get_paths().get("platstdlib", ""),
|
13
|
+
*site.getsitepackages(),
|
14
|
+
site.getusersitepackages(),
|
15
|
+
*(
|
16
|
+
[os.path.join(os.path.dirname(__file__), "../../judgeval/")]
|
17
|
+
if os.environ.get("JUDGMENT_DEV")
|
18
|
+
else []
|
19
|
+
),
|
20
|
+
}
|
21
|
+
if p
|
22
|
+
)
|