judgeval 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
judgeval/cli.py ADDED
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import typer
4
+ from pathlib import Path
5
+ from dotenv import load_dotenv
6
+ from judgeval.common.logger import judgeval_logger
7
+ from judgeval.judgment_client import JudgmentClient
8
+
9
+ load_dotenv()
10
+
11
+ app = typer.Typer(
12
+ no_args_is_help=True,
13
+ rich_markup_mode=None,
14
+ rich_help_panel=None,
15
+ pretty_exceptions_enable=False,
16
+ pretty_exceptions_show_locals=False,
17
+ pretty_exceptions_short=False,
18
+ )
19
+
20
+
21
+ @app.command("upload_scorer")
22
+ def upload_scorer(
23
+ scorer_file_path: str,
24
+ requirements_file_path: str,
25
+ unique_name: str = typer.Option(
26
+ None, help="Custom name for the scorer (auto-detected if not provided)"
27
+ ),
28
+ ):
29
+ # Validate file paths
30
+ if not Path(scorer_file_path).exists():
31
+ judgeval_logger.error(f"Scorer file not found: {scorer_file_path}")
32
+ raise typer.Exit(1)
33
+
34
+ if not Path(requirements_file_path).exists():
35
+ judgeval_logger.error(f"Requirements file not found: {requirements_file_path}")
36
+ raise typer.Exit(1)
37
+
38
+ try:
39
+ client = JudgmentClient()
40
+
41
+ result = client.save_custom_scorer(
42
+ scorer_file_path=scorer_file_path,
43
+ requirements_file_path=requirements_file_path,
44
+ unique_name=unique_name,
45
+ )
46
+
47
+ if not result:
48
+ judgeval_logger.error("Failed to upload custom scorer")
49
+ raise typer.Exit(1)
50
+
51
+ raise typer.Exit(0)
52
+ except Exception:
53
+ raise
54
+
55
+
56
+ @app.command()
57
+ def version():
58
+ """Show version info"""
59
+ judgeval_logger.info("JudgEval CLI v0.0.0")
60
+
61
+
62
+ if __name__ == "__main__":
63
+ app()
64
+
65
+ # judgeval upload_scorer /Users/alanzhang/repo/JudgmentLabs/judgeval/src/demo/profile_match_scorer.py /Users/alanzhang/repo/JudgmentLabs/judgeval/src/demo/requirements.txt
@@ -20,13 +20,11 @@ from judgeval.common.api.constants import (
20
20
  JUDGMENT_EVAL_DELETE_API_URL,
21
21
  JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
22
22
  JUDGMENT_GET_EVAL_STATUS_API_URL,
23
- JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL,
24
- JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL,
25
23
  JUDGMENT_SCORER_SAVE_API_URL,
26
24
  JUDGMENT_SCORER_FETCH_API_URL,
27
25
  JUDGMENT_SCORER_EXISTS_API_URL,
26
+ JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
28
27
  JUDGMENT_DATASETS_APPEND_TRACES_API_URL,
29
- JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL,
30
28
  )
31
29
  from judgeval.common.api.constants import (
32
30
  TraceFetchPayload,
@@ -45,12 +43,11 @@ from judgeval.common.api.constants import (
45
43
  DeleteEvalRunRequestBody,
46
44
  EvalLogPayload,
47
45
  EvalStatusPayload,
48
- CheckExperimentTypePayload,
49
- EvalRunNameExistsPayload,
50
46
  ScorerSavePayload,
51
47
  ScorerFetchPayload,
52
48
  ScorerExistsPayload,
53
- CheckExampleKeysPayload,
49
+ CustomScorerUploadPayload,
50
+ CustomScorerTemplateResponse,
54
51
  )
55
52
  from judgeval.utils.requests import requests
56
53
  from judgeval.common.api.json_encoder import json_encoder
@@ -97,14 +94,20 @@ class JudgmentApiClient:
97
94
  method: Literal["POST", "PATCH", "GET", "DELETE"],
98
95
  url: str,
99
96
  payload: Any,
97
+ timeout: Optional[Union[float, tuple]] = None,
100
98
  ) -> Any:
99
+ # Prepare request kwargs with optional timeout
100
+ request_kwargs = self._request_kwargs()
101
+ if timeout is not None:
102
+ request_kwargs["timeout"] = timeout
103
+
101
104
  if method == "GET":
102
105
  r = requests.request(
103
106
  method,
104
107
  url,
105
108
  params=payload,
106
109
  headers=self._headers(),
107
- **self._request_kwargs(),
110
+ **request_kwargs,
108
111
  )
109
112
  else:
110
113
  r = requests.request(
@@ -112,7 +115,7 @@ class JudgmentApiClient:
112
115
  url,
113
116
  json=json_encoder(payload),
114
117
  headers=self._headers(),
115
- **self._request_kwargs(),
118
+ **request_kwargs,
116
119
  )
117
120
 
118
121
  try:
@@ -186,10 +189,10 @@ class JudgmentApiClient:
186
189
  payload: EvalLogPayload = {"results": results, "run": run}
187
190
  return self._do_request("POST", JUDGMENT_EVAL_LOG_API_URL, payload)
188
191
 
189
- def fetch_evaluation_results(self, project_name: str, eval_name: str):
192
+ def fetch_evaluation_results(self, experiment_run_id: str, project_name: str):
190
193
  payload: EvalRunRequestBody = {
191
194
  "project_name": project_name,
192
- "eval_name": eval_name,
195
+ "experiment_run_id": experiment_run_id,
193
196
  }
194
197
  return self._do_request("POST", JUDGMENT_EVAL_FETCH_API_URL, payload)
195
198
 
@@ -204,43 +207,21 @@ class JudgmentApiClient:
204
207
  def add_to_evaluation_queue(self, payload: Dict[str, Any]):
205
208
  return self._do_request("POST", JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, payload)
206
209
 
207
- def get_evaluation_status(self, eval_name: str, project_name: str):
210
+ def get_evaluation_status(self, experiment_run_id: str, project_name: str):
208
211
  payload: EvalStatusPayload = {
209
- "eval_name": eval_name,
212
+ "experiment_run_id": experiment_run_id,
210
213
  "project_name": project_name,
211
214
  "judgment_api_key": self.api_key,
212
215
  }
213
216
  return self._do_request("GET", JUDGMENT_GET_EVAL_STATUS_API_URL, payload)
214
217
 
215
- def check_experiment_type(self, eval_name: str, project_name: str, is_trace: bool):
216
- payload: CheckExperimentTypePayload = {
217
- "eval_name": eval_name,
218
- "project_name": project_name,
219
- "judgment_api_key": self.api_key,
220
- "is_trace": is_trace,
221
- }
222
- return self._do_request("POST", JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL, payload)
223
-
224
- def check_eval_run_name_exists(self, eval_name: str, project_name: str):
225
- payload: EvalRunNameExistsPayload = {
226
- "eval_name": eval_name,
227
- "project_name": project_name,
228
- "judgment_api_key": self.api_key,
229
- }
230
- return self._do_request("POST", JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL, payload)
231
-
232
- def check_example_keys(self, keys: List[str], eval_name: str, project_name: str):
233
- payload: CheckExampleKeysPayload = {
234
- "keys": keys,
235
- "eval_name": eval_name,
236
- "project_name": project_name,
237
- }
238
- return self._do_request("POST", JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL, payload)
239
-
240
- def save_scorer(self, name: str, prompt: str, options: Optional[dict] = None):
218
+ def save_scorer(
219
+ self, name: str, prompt: str, threshold: float, options: Optional[dict] = None
220
+ ):
241
221
  payload: ScorerSavePayload = {
242
222
  "name": name,
243
223
  "prompt": prompt,
224
+ "threshold": threshold,
244
225
  "options": options,
245
226
  }
246
227
  try:
@@ -292,6 +273,31 @@ class JudgmentApiClient:
292
273
  request=e.request,
293
274
  )
294
275
 
276
+ def upload_custom_scorer(
277
+ self,
278
+ scorer_name: str,
279
+ scorer_code: str,
280
+ requirements_text: str,
281
+ ) -> CustomScorerTemplateResponse:
282
+ """Upload custom scorer to backend"""
283
+ payload: CustomScorerUploadPayload = {
284
+ "scorer_name": scorer_name,
285
+ "scorer_code": scorer_code,
286
+ "requirements_text": requirements_text,
287
+ }
288
+
289
+ try:
290
+ # Use longer timeout for custom scorer upload (5 minutes)
291
+ response = self._do_request(
292
+ "POST",
293
+ JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
294
+ payload,
295
+ timeout=(10, 300),
296
+ )
297
+ return response
298
+ except JudgmentAPIException as e:
299
+ raise e
300
+
295
301
  def push_dataset(
296
302
  self,
297
303
  dataset_alias: str,
@@ -49,9 +49,9 @@ JUDGMENT_EVAL_DELETE_API_URL = (
49
49
  JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
50
50
  JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
51
51
  JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
52
- JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL = f"{ROOT_API}/check_experiment_type/"
53
- JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL = f"{ROOT_API}/eval-run-name-exists/"
54
- JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL = f"{ROOT_API}/check_example_keys/"
52
+
53
+ # Custom Scorers API
54
+ JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL = f"{ROOT_API}/build_sandbox_template/"
55
55
 
56
56
 
57
57
  # Evaluation API Payloads
@@ -73,9 +73,9 @@ class EvalLogPayload(TypedDict):
73
73
 
74
74
 
75
75
  class EvalStatusPayload(TypedDict):
76
- eval_name: str
77
- project_name: str
76
+ experiment_run_id: str
78
77
  judgment_api_key: str
78
+ project_name: str
79
79
 
80
80
 
81
81
  class CheckExperimentTypePayload(TypedDict):
@@ -162,6 +162,7 @@ JUDGMENT_SCORER_EXISTS_API_URL = f"{ROOT_API}/scorer_exists/"
162
162
  class ScorerSavePayload(TypedDict):
163
163
  name: str
164
164
  prompt: str
165
+ threshold: float
165
166
  options: Optional[dict]
166
167
 
167
168
 
@@ -171,3 +172,15 @@ class ScorerFetchPayload(TypedDict):
171
172
 
172
173
  class ScorerExistsPayload(TypedDict):
173
174
  name: str
175
+
176
+
177
+ class CustomScorerUploadPayload(TypedDict):
178
+ scorer_name: str
179
+ scorer_code: str
180
+ requirements_text: str
181
+
182
+
183
+ class CustomScorerTemplateResponse(TypedDict):
184
+ scorer_name: str
185
+ status: str
186
+ message: str
@@ -84,7 +84,7 @@ def json_encoder(
84
84
  )
85
85
 
86
86
  # Sequences
87
- if isinstance(obj, (list, set, frozenset, GeneratorType, tuple, deque)):
87
+ if isinstance(obj, (list, set, frozenset, tuple, deque)):
88
88
  return _dump_sequence(
89
89
  obj=obj,
90
90
  )
@@ -169,16 +169,15 @@ def _dump_other(
169
169
  obj: Any,
170
170
  ) -> Any:
171
171
  """
172
- Dump an object to a hashable object, using the same parameters as jsonable_encoder
172
+ Dump an object to a representation without iterating it.
173
+
174
+ Avoids calling dict(obj) which can consume iterators/generators or
175
+ invoke user-defined iteration protocols.
173
176
  """
174
177
  try:
175
- data = dict(obj)
176
- except Exception:
177
178
  return repr(obj)
178
-
179
- return json_encoder(
180
- data,
181
- )
179
+ except Exception:
180
+ return str(obj)
182
181
 
183
182
 
184
183
  def iso_format(o: Union[datetime.date, datetime.time]) -> str:
@@ -218,7 +217,7 @@ ENCODERS_BY_TYPE: Dict[Type[Any], Callable[[Any], Any]] = {
218
217
  Enum: lambda o: o.value,
219
218
  frozenset: list,
220
219
  deque: list,
221
- GeneratorType: list,
220
+ GeneratorType: repr,
222
221
  Path: str,
223
222
  Pattern: lambda o: o.pattern,
224
223
  SecretBytes: str,