judgeval 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. judgeval/__init__.py +5 -4
  2. judgeval/clients.py +6 -6
  3. judgeval/common/__init__.py +7 -2
  4. judgeval/common/exceptions.py +2 -3
  5. judgeval/common/logger.py +74 -49
  6. judgeval/common/s3_storage.py +30 -23
  7. judgeval/common/tracer.py +1273 -939
  8. judgeval/common/utils.py +416 -244
  9. judgeval/constants.py +73 -61
  10. judgeval/data/__init__.py +1 -1
  11. judgeval/data/custom_example.py +3 -2
  12. judgeval/data/datasets/dataset.py +80 -54
  13. judgeval/data/datasets/eval_dataset_client.py +131 -181
  14. judgeval/data/example.py +67 -43
  15. judgeval/data/result.py +11 -9
  16. judgeval/data/scorer_data.py +4 -2
  17. judgeval/data/tool.py +25 -16
  18. judgeval/data/trace.py +57 -29
  19. judgeval/data/trace_run.py +5 -11
  20. judgeval/evaluation_run.py +22 -82
  21. judgeval/integrations/langgraph.py +546 -184
  22. judgeval/judges/base_judge.py +1 -2
  23. judgeval/judges/litellm_judge.py +33 -11
  24. judgeval/judges/mixture_of_judges.py +128 -78
  25. judgeval/judges/together_judge.py +22 -9
  26. judgeval/judges/utils.py +14 -5
  27. judgeval/judgment_client.py +259 -271
  28. judgeval/rules.py +169 -142
  29. judgeval/run_evaluation.py +462 -305
  30. judgeval/scorers/api_scorer.py +20 -11
  31. judgeval/scorers/exceptions.py +1 -0
  32. judgeval/scorers/judgeval_scorer.py +77 -58
  33. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +46 -15
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +3 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +3 -2
  36. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +12 -11
  37. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +7 -5
  38. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +3 -2
  39. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +3 -2
  40. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +5 -2
  41. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +2 -1
  42. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +17 -8
  43. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +3 -2
  44. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +3 -2
  45. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +3 -2
  46. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +3 -2
  47. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +8 -9
  48. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +4 -4
  49. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +5 -5
  50. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +5 -2
  51. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +9 -10
  52. judgeval/scorers/prompt_scorer.py +48 -37
  53. judgeval/scorers/score.py +86 -53
  54. judgeval/scorers/utils.py +11 -7
  55. judgeval/tracer/__init__.py +1 -1
  56. judgeval/utils/alerts.py +23 -12
  57. judgeval/utils/{data_utils.py → file_utils.py} +5 -9
  58. judgeval/utils/requests.py +29 -0
  59. judgeval/version_check.py +5 -2
  60. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/METADATA +79 -135
  61. judgeval-0.0.46.dist-info/RECORD +69 -0
  62. judgeval-0.0.44.dist-info/RECORD +0 -68
  63. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/WHEEL +0 -0
  64. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/licenses/LICENSE.md +0 -0
judgeval/scorers/score.py CHANGED
@@ -2,15 +2,14 @@
2
2
  Infrastructure for executing evaluations of `Example`s using one or more `JudgevalScorer`s.
3
3
  """
4
4
 
5
-
6
5
  import asyncio
7
- import time
6
+ import time
8
7
  from tqdm.asyncio import tqdm_asyncio
9
8
  from typing import List, Union, Optional, Callable
10
9
  from rich.progress import Progress, SpinnerColumn, TextColumn
11
10
 
12
11
  from judgeval.data import (
13
- Example,
12
+ Example,
14
13
  CustomExample,
15
14
  ScoringResult,
16
15
  generate_scoring_result,
@@ -22,6 +21,7 @@ from judgeval.common.exceptions import MissingTestCaseParamsError
22
21
  from judgeval.common.logger import example_logging_context, debug, error, warning, info
23
22
  from judgeval.judges import JudgevalJudge
24
23
 
24
+
25
25
  async def safe_a_score_example(
26
26
  scorer: JudgevalScorer,
27
27
  example: Example,
@@ -35,32 +35,42 @@ async def safe_a_score_example(
35
35
  Args:
36
36
  scorer (JudgevalScorer): The `JudgevalScorer` to use for scoring the example.
37
37
  example (Example): The `Example` to be scored.
38
-
39
- ignore_errors (bool): Whether to ignore errors during the evaluation.
38
+
39
+ ignore_errors (bool): Whether to ignore errors during the evaluation.
40
40
  If set to false, any error will be raised and stop the evaluation.
41
41
  If set to true, the error will be stored in the `error` attribute of the `JudgevalScorer` and the `success` attribute will be set to False.
42
-
43
- skip_on_missing_params (bool): Whether to skip the test case if required parameters are missing.
42
+
43
+ skip_on_missing_params (bool): Whether to skip the test case if required parameters are missing.
44
44
  """
45
45
  debug(f"Starting safe_a_score_example for example {example.example_id}")
46
46
  try:
47
47
  await scorer.a_score_example(example, _show_indicator=False)
48
48
  info(f"Successfully scored example {example.example_id}")
49
49
  except MissingTestCaseParamsError as e:
50
- if skip_on_missing_params: # Skip the example if the scorer requires parameters that are missing
50
+ if (
51
+ skip_on_missing_params
52
+ ): # Skip the example if the scorer requires parameters that are missing
51
53
  with example_logging_context(example.created_at, example.example_id):
52
- warning(f"Skipping example {example.example_id} due to missing parameters")
54
+ warning(
55
+ f"Skipping example {example.example_id} due to missing parameters"
56
+ )
53
57
  scorer.skipped = True
54
58
  return
55
59
  else:
56
- if ignore_errors: # Gracefully handle the error, does not stop the evaluation
60
+ if (
61
+ ignore_errors
62
+ ): # Gracefully handle the error, does not stop the evaluation
57
63
  scorer.error = str(e)
58
64
  scorer.success = False
59
65
  with example_logging_context(example.created_at, example.example_id):
60
- warning(f"Ignoring errors for example {example.example_id}: {str(e)} due to missing parameters")
66
+ warning(
67
+ f"Ignoring errors for example {example.example_id}: {str(e)} due to missing parameters"
68
+ )
61
69
  else: # Raise the error and stop the evaluation
62
70
  with example_logging_context(example.created_at, example.example_id):
63
- error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
71
+ error(
72
+ f"Stopping example {example.example_id}: {str(e)} due to missing parameters"
73
+ )
64
74
  raise
65
75
  except TypeError: # in case a_score_example does not accept _show_indicator
66
76
  try:
@@ -69,17 +79,27 @@ async def safe_a_score_example(
69
79
  if skip_on_missing_params:
70
80
  scorer.skipped = True
71
81
  with example_logging_context(example.created_at, example.example_id):
72
- warning(f"Skipping example {example.example_id} due to missing parameters")
82
+ warning(
83
+ f"Skipping example {example.example_id} due to missing parameters"
84
+ )
73
85
  return
74
86
  else:
75
87
  if ignore_errors:
76
88
  scorer.error = str(e)
77
- scorer.success = False
78
- with example_logging_context(example.created_at, example.example_id):
79
- warning(f"Ignoring errors for example {example.example_id}: {str(e)} due to missing parameters")
89
+ scorer.success = False
90
+ with example_logging_context(
91
+ example.created_at, example.example_id
92
+ ):
93
+ warning(
94
+ f"Ignoring errors for example {example.example_id}: {str(e)} due to missing parameters"
95
+ )
80
96
  else:
81
- with example_logging_context(example.created_at, example.example_id):
82
- error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
97
+ with example_logging_context(
98
+ example.created_at, example.example_id
99
+ ):
100
+ error(
101
+ f"Stopping example {example.example_id}: {str(e)} due to missing parameters"
102
+ )
83
103
  raise
84
104
  except Exception as e:
85
105
  if ignore_errors:
@@ -121,7 +141,7 @@ async def score_task(
121
141
  """
122
142
  while not progress.finished:
123
143
  start_time = time.perf_counter()
124
-
144
+
125
145
  try:
126
146
  await scorer.a_score_example(example, _show_indicator=False)
127
147
  finish_text = "Completed"
@@ -129,7 +149,9 @@ async def score_task(
129
149
  if skip_on_missing_params:
130
150
  scorer.skipped = True
131
151
  with example_logging_context(example.created_at, example.example_id):
132
- debug(f"Skipping example {example.example_id} due to missing parameters")
152
+ debug(
153
+ f"Skipping example {example.example_id} due to missing parameters"
154
+ )
133
155
  return
134
156
  else:
135
157
  if ignore_errors:
@@ -137,8 +159,12 @@ async def score_task(
137
159
  scorer.success = False # Override success
138
160
  finish_text = "Failed"
139
161
  else:
140
- with example_logging_context(example.created_at, example.example_id):
141
- error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
162
+ with example_logging_context(
163
+ example.created_at, example.example_id
164
+ ):
165
+ error(
166
+ f"Stopping example {example.example_id}: {str(e)} due to missing parameters"
167
+ )
142
168
  raise
143
169
  except TypeError:
144
170
  try:
@@ -147,8 +173,12 @@ async def score_task(
147
173
  except MissingTestCaseParamsError as e:
148
174
  if skip_on_missing_params:
149
175
  scorer.skipped = True
150
- with example_logging_context(example.created_at, example.example_id):
151
- debug(f"Skipping example {example.example_id} due to missing parameters")
176
+ with example_logging_context(
177
+ example.created_at, example.example_id
178
+ ):
179
+ debug(
180
+ f"Skipping example {example.example_id} due to missing parameters"
181
+ )
152
182
  return
153
183
  else:
154
184
  if ignore_errors:
@@ -156,8 +186,12 @@ async def score_task(
156
186
  scorer.success = False # Override success
157
187
  finish_text = "Failed"
158
188
  else:
159
- with example_logging_context(example.created_at, example.example_id):
160
- error(f"Stopping example {example.example_id}: {str(e)} due to missing parameters")
189
+ with example_logging_context(
190
+ example.created_at, example.example_id
191
+ ):
192
+ error(
193
+ f"Stopping example {example.example_id}: {str(e)} due to missing parameters"
194
+ )
161
195
  raise
162
196
  except Exception as e:
163
197
  if ignore_errors:
@@ -165,7 +199,9 @@ async def score_task(
165
199
  scorer.success = False # Override success
166
200
  finish_text = "Failed"
167
201
  with example_logging_context(example.created_at, example.example_id):
168
- warning(f"Ignoring errors for example {example.example_id}: {str(e)}")
202
+ warning(
203
+ f"Ignoring errors for example {example.example_id}: {str(e)}"
204
+ )
169
205
  else:
170
206
  with example_logging_context(example.created_at, example.example_id):
171
207
  error(f"Stopping example {example.example_id}: {str(e)}")
@@ -213,9 +249,7 @@ async def score_with_indicator(
213
249
  tasks = []
214
250
  for scorer in scorers:
215
251
  task_id = progress.add_task(
216
- description=scorer_console_msg(
217
- scorer, async_mode=True
218
- ),
252
+ description=scorer_console_msg(scorer, async_mode=True),
219
253
  total=100,
220
254
  ) # Add task to progress bar
221
255
  tasks.append(
@@ -231,9 +265,7 @@ async def score_with_indicator(
231
265
  await asyncio.gather(*tasks)
232
266
  else:
233
267
  tasks = [
234
- safe_a_score_example(
235
- scorer, example, ignore_errors, skip_on_missing_params
236
- )
268
+ safe_a_score_example(scorer, example, ignore_errors, skip_on_missing_params)
237
269
  for scorer in scorers
238
270
  ]
239
271
 
@@ -280,7 +312,7 @@ async def a_execute_scoring(
280
312
  return await func(*args, **kwargs)
281
313
  except Exception as e:
282
314
  print(f"Error executing function: {e}")
283
- if kwargs.get('ignore_errors', False):
315
+ if kwargs.get("ignore_errors", False):
284
316
  # Simply return None when ignoring errors, as expected by the test
285
317
  return None
286
318
  # If we're not ignoring errors, propagate the exception
@@ -290,12 +322,13 @@ async def a_execute_scoring(
290
322
  for scorer in scorers:
291
323
  scorer.verbose_mode = verbose_mode
292
324
 
293
- # Add model to scorers
325
+ # Add model to scorers
294
326
  for scorer in scorers:
295
327
  scorer._add_model(model)
296
328
 
297
329
  scoring_results: List[ScoringResult] = [None for _ in examples]
298
330
  tasks = []
331
+ cloned_scorers: List[JudgevalScorer]
299
332
 
300
333
  if show_indicator and _use_bar_indicator:
301
334
  with tqdm_asyncio(
@@ -311,18 +344,16 @@ async def a_execute_scoring(
311
344
  debug(f"Using {len(scorers)} scorers")
312
345
  for scorer in scorers:
313
346
  debug(f"Using scorer: {type(scorer).__name__}")
314
- if hasattr(scorer, 'threshold'):
347
+ if hasattr(scorer, "threshold"):
315
348
  debug(f"Scorer threshold: {scorer.threshold}")
316
- if hasattr(scorer, 'model'):
349
+ if hasattr(scorer, "model"):
317
350
  debug(f"Scorer model: {type(scorer.model).__name__}")
318
351
  if isinstance(ex, Example) or isinstance(ex, CustomExample):
319
352
  if len(scorers) == 0:
320
353
  pbar.update(1)
321
354
  continue
322
-
323
- cloned_scorers: List[JudgevalScorer] = clone_scorers(
324
- scorers
325
- )
355
+
356
+ cloned_scorers = clone_scorers(scorers)
326
357
  task = execute_with_semaphore(
327
358
  func=a_eval_examples_helper,
328
359
  scorers=cloned_scorers,
@@ -345,9 +376,7 @@ async def a_execute_scoring(
345
376
  if len(scorers) == 0:
346
377
  continue
347
378
 
348
- cloned_scorers: List[JudgevalScorer] = clone_scorers(
349
- scorers
350
- )
379
+ cloned_scorers = clone_scorers(scorers)
351
380
  task = execute_with_semaphore(
352
381
  func=a_eval_examples_helper,
353
382
  scorers=cloned_scorers,
@@ -376,10 +405,10 @@ async def a_eval_examples_helper(
376
405
  show_indicator: bool,
377
406
  _use_bar_indicator: bool,
378
407
  pbar: Optional[tqdm_asyncio] = None,
379
- ) -> None:
408
+ ) -> None:
380
409
  """
381
410
  Evaluate a single example asynchronously using a list of scorers.
382
-
411
+
383
412
  Args:
384
413
  scorers (List[JudgevalScorer]): List of JudgevalScorer objects to evaluate the example.
385
414
  example (Example): The example to be evaluated.
@@ -410,23 +439,27 @@ async def a_eval_examples_helper(
410
439
  show_indicator=show_metrics_indicator,
411
440
  ) # execute the scoring functions of each scorer on the example
412
441
 
413
- # Now that all the scoring functions of each scorer have executed, we collect
442
+ # Now that all the scoring functions of each scorer have executed, we collect
414
443
  # the results and update the ScoringResult with the scorer data
415
444
  success = True
416
445
  scorer_data_list = []
417
446
  for scorer in scorers:
418
447
  # At this point, the scorer has been executed and already contains data.
419
- if getattr(scorer, 'skipped', False):
448
+ if getattr(scorer, "skipped", False):
420
449
  continue
421
- scorer_data = create_scorer_data(scorer) # Fetch scorer data from completed scorer evaluation
450
+ scorer_data = create_scorer_data(
451
+ scorer
452
+ ) # Fetch scorer data from completed scorer evaluation
422
453
  success = success and scorer_data.success
423
454
  scorer_data_list.append(scorer_data)
424
-
455
+
425
456
  scoring_end_time = time.perf_counter()
426
457
  run_duration = scoring_end_time - scoring_start_time
427
-
428
- scoring_result = generate_scoring_result(example, scorer_data_list, run_duration, success)
458
+
459
+ scoring_result = generate_scoring_result(
460
+ example, scorer_data_list, run_duration, success
461
+ )
429
462
  scoring_results[score_index] = scoring_result
430
-
463
+
431
464
  if pbar is not None:
432
465
  pbar.update(1)
judgeval/scorers/utils.py CHANGED
@@ -83,7 +83,9 @@ def scorer_progress_meter(
83
83
  yield
84
84
 
85
85
 
86
- def parse_response_json(llm_response: str, scorer: Optional[JudgevalScorer] = None) -> dict:
86
+ def parse_response_json(
87
+ llm_response: str, scorer: Optional[JudgevalScorer] = None
88
+ ) -> dict:
87
89
  """
88
90
  Extracts JSON output from an LLM response and returns it as a dictionary.
89
91
 
@@ -100,8 +102,12 @@ def parse_response_json(llm_response: str, scorer: Optional[JudgevalScorer] = No
100
102
  llm_response = llm_response + "}"
101
103
  end = len(llm_response)
102
104
 
103
- json_str = llm_response[start:end] if start != -1 and end != 0 else "" # extract the JSON string
104
- json_str = re.sub(r",\s*([\]}])", r"\1", json_str) # Remove trailing comma if present
105
+ json_str = (
106
+ llm_response[start:end] if start != -1 and end != 0 else ""
107
+ ) # extract the JSON string
108
+ json_str = re.sub(
109
+ r",\s*([\]}])", r"\1", json_str
110
+ ) # Remove trailing comma if present
105
111
 
106
112
  try:
107
113
  return json.loads(json_str)
@@ -131,7 +137,7 @@ def create_verbose_logs(metric: JudgevalScorer, steps: List[str]) -> str:
131
137
  Args:
132
138
  metric (JudgevalScorer): The scorer object.
133
139
  steps (List[str]): The steps to be included in the verbose logs.
134
-
140
+
135
141
  Returns:
136
142
  str: The verbose logs (Concatenated steps).
137
143
  """
@@ -157,7 +163,7 @@ def get_or_create_event_loop() -> asyncio.AbstractEventLoop:
157
163
 
158
164
  Returns:
159
165
  asyncio.AbstractEventLoop: The current or newly created event loop.
160
-
166
+
161
167
  Raises:
162
168
  RuntimeError: If the event loop is closed.
163
169
  """
@@ -205,5 +211,3 @@ def check_example_params(
205
211
  error_str = f"{missing_params_str} fields in example cannot be None for the '{scorer.__name__}' scorer"
206
212
  scorer.error = error_str
207
213
  raise MissingExampleParamsError(error_str)
208
-
209
-
@@ -1,3 +1,3 @@
1
1
  from judgeval.common.tracer import Tracer, wrap, TraceClient, TraceManagerClient
2
2
 
3
- __all__ = ["Tracer", "wrap", "TraceClient", "TraceManagerClient"]
3
+ __all__ = ["Tracer", "wrap", "TraceClient", "TraceManagerClient"]
judgeval/utils/alerts.py CHANGED
@@ -1,19 +1,23 @@
1
1
  """
2
2
  Handling alerts in Judgeval.
3
3
  """
4
+
4
5
  from enum import Enum
5
6
  from typing import Dict, Any, List, Optional
6
7
  from pydantic import BaseModel
7
8
 
9
+
8
10
  class AlertStatus(str, Enum):
9
11
  """Status of an alert evaluation."""
12
+
10
13
  TRIGGERED = "triggered"
11
14
  NOT_TRIGGERED = "not_triggered"
12
15
 
16
+
13
17
  class AlertResult(BaseModel):
14
18
  """
15
19
  Result of a rule evaluation.
16
-
20
+
17
21
  Attributes:
18
22
  rule_name: Name of the rule that was evaluated
19
23
  rule_id: Unique identifier of the rule
@@ -25,26 +29,29 @@ class AlertResult(BaseModel):
25
29
  project_id: Optional project identifier
26
30
  trace_span_id: Optional trace span identifier
27
31
  """
32
+
28
33
  rule_name: str
29
34
  rule_id: Optional[str] = None # The unique identifier of the rule
30
35
  status: AlertStatus
31
36
  conditions_result: List[Dict[str, Any]] = []
32
37
  metadata: Dict[str, Any] = {}
33
- notification: Optional[Any] = None # NotificationConfig when triggered, None otherwise
38
+ notification: Optional[Any] = (
39
+ None # NotificationConfig when triggered, None otherwise
40
+ )
34
41
  combine_type: Optional[str] = None # "all" or "any"
35
42
  project_id: Optional[str] = None # Project identifier
36
43
  trace_span_id: Optional[str] = None # Trace span identifier
37
-
44
+
38
45
  @property
39
46
  def example_id(self) -> Optional[str]:
40
47
  """Get example_id from metadata for backward compatibility"""
41
48
  return self.metadata.get("example_id")
42
-
49
+
43
50
  @property
44
51
  def timestamp(self) -> Optional[str]:
45
52
  """Get timestamp from metadata for backward compatibility"""
46
53
  return self.metadata.get("timestamp")
47
-
54
+
48
55
  @property
49
56
  def conditions_results(self) -> List[Dict[str, Any]]:
50
57
  """Backwards compatibility property for the conditions_result field"""
@@ -53,15 +60,19 @@ class AlertResult(BaseModel):
53
60
  def model_dump(self, **kwargs):
54
61
  """
55
62
  Convert the AlertResult to a dictionary for JSON serialization.
56
-
63
+
57
64
  Args:
58
65
  **kwargs: Additional arguments to pass to Pydantic's model_dump
59
-
66
+
60
67
  Returns:
61
68
  dict: Dictionary representation of the AlertResult
62
69
  """
63
- data = super().model_dump(**kwargs) if hasattr(super(), "model_dump") else super().dict(**kwargs)
64
-
70
+ data = (
71
+ super().model_dump(**kwargs)
72
+ if hasattr(super(), "model_dump")
73
+ else super().dict(**kwargs)
74
+ )
75
+
65
76
  # Handle the NotificationConfig object if it exists
66
77
  if hasattr(self, "notification") and self.notification is not None:
67
78
  if hasattr(self.notification, "model_dump"):
@@ -76,7 +87,7 @@ class AlertResult(BaseModel):
76
87
  "communication_methods": notif.communication_methods,
77
88
  "email_addresses": notif.email_addresses,
78
89
  "slack_channels": getattr(notif, "slack_channels", []),
79
- "send_at": notif.send_at
90
+ "send_at": notif.send_at,
80
91
  }
81
-
82
- return data
92
+
93
+ return data
@@ -1,15 +1,11 @@
1
1
  import yaml
2
- from judgeval.common.logger import (
3
- debug,
4
- info,
5
- error,
6
- example_logging_context
7
- )
2
+ from typing import List
3
+ from judgeval.common.logger import debug, info, error
8
4
 
9
5
  from judgeval.data import Example
10
6
 
11
7
 
12
- def add_from_yaml(file_path: str) -> None:
8
+ def get_examples_from_yaml(file_path: str) -> List[Example] | None:
13
9
  debug(f"Loading dataset from YAML file: {file_path}")
14
10
  """
15
11
  Adds examples from a YAML file.
@@ -51,7 +47,7 @@ def add_from_yaml(file_path: str) -> None:
51
47
  except yaml.YAMLError:
52
48
  error(f"Invalid YAML file: {file_path}")
53
49
  raise ValueError(f"The file {file_path} is not a valid YAML file.")
54
-
50
+
55
51
  info(f"Added {len(examples)} examples from YAML")
56
52
  new_examples = [Example(**e) for e in examples]
57
- return new_examples
53
+ return new_examples
@@ -0,0 +1,29 @@
1
+ import requests as requests_original
2
+ from requests.adapters import HTTPAdapter
3
+ from urllib3.util.retry import Retry
4
+ from http import HTTPStatus
5
+
6
+
7
+ class RetrySession(requests_original.Session):
8
+ def __init__(
9
+ self,
10
+ retries=3,
11
+ backoff_factor=0.5,
12
+ status_forcelist=[HTTPStatus.BAD_GATEWAY, HTTPStatus.SERVICE_UNAVAILABLE],
13
+ ):
14
+ super().__init__()
15
+
16
+ retry_strategy = Retry(
17
+ total=retries,
18
+ read=retries,
19
+ connect=retries,
20
+ backoff_factor=backoff_factor,
21
+ status_forcelist=status_forcelist,
22
+ )
23
+
24
+ adapter = HTTPAdapter(max_retries=retry_strategy)
25
+ self.mount("http://", adapter)
26
+ self.mount("https://", adapter)
27
+
28
+
29
+ requests = RetrySession()
judgeval/version_check.py CHANGED
@@ -1,12 +1,15 @@
1
1
  import importlib.metadata
2
- import requests
2
+ from judgeval.utils.requests import requests
3
3
  import threading
4
4
 
5
+
5
6
  def check_latest_version(package_name: str = "judgeval"):
6
7
  def _check():
7
8
  try:
8
9
  current_version = importlib.metadata.version(package_name)
9
- response = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=2)
10
+ response = requests.get(
11
+ f"https://pypi.org/pypi/{package_name}/json", timeout=2
12
+ )
10
13
  latest_version = response.json()["info"]["version"]
11
14
 
12
15
  if current_version != latest_version: