judgeval 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. judgeval/__init__.py +139 -12
  2. judgeval/api/__init__.py +501 -0
  3. judgeval/api/api_types.py +344 -0
  4. judgeval/cli.py +2 -4
  5. judgeval/constants.py +10 -26
  6. judgeval/data/evaluation_run.py +49 -26
  7. judgeval/data/example.py +2 -2
  8. judgeval/data/judgment_types.py +266 -82
  9. judgeval/data/result.py +4 -5
  10. judgeval/data/scorer_data.py +4 -2
  11. judgeval/data/tool.py +2 -2
  12. judgeval/data/trace.py +7 -50
  13. judgeval/data/trace_run.py +7 -4
  14. judgeval/{dataset.py → dataset/__init__.py} +43 -28
  15. judgeval/env.py +67 -0
  16. judgeval/{run_evaluation.py → evaluation/__init__.py} +29 -95
  17. judgeval/exceptions.py +27 -0
  18. judgeval/integrations/langgraph/__init__.py +788 -0
  19. judgeval/judges/__init__.py +2 -2
  20. judgeval/judges/litellm_judge.py +75 -15
  21. judgeval/judges/together_judge.py +86 -18
  22. judgeval/judges/utils.py +7 -21
  23. judgeval/{common/logger.py → logger.py} +8 -6
  24. judgeval/scorers/__init__.py +0 -4
  25. judgeval/scorers/agent_scorer.py +3 -7
  26. judgeval/scorers/api_scorer.py +8 -13
  27. judgeval/scorers/base_scorer.py +52 -32
  28. judgeval/scorers/example_scorer.py +1 -3
  29. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
  30. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
  31. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
  32. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
  33. judgeval/scorers/score.py +21 -31
  34. judgeval/scorers/trace_api_scorer.py +5 -0
  35. judgeval/scorers/utils.py +1 -103
  36. judgeval/tracer/__init__.py +1075 -2
  37. judgeval/tracer/constants.py +1 -0
  38. judgeval/tracer/exporters/__init__.py +37 -0
  39. judgeval/tracer/exporters/s3.py +119 -0
  40. judgeval/tracer/exporters/store.py +43 -0
  41. judgeval/tracer/exporters/utils.py +32 -0
  42. judgeval/tracer/keys.py +67 -0
  43. judgeval/tracer/llm/__init__.py +1233 -0
  44. judgeval/{common/tracer → tracer/llm}/providers.py +5 -10
  45. judgeval/{local_eval_queue.py → tracer/local_eval_queue.py} +15 -10
  46. judgeval/tracer/managers.py +188 -0
  47. judgeval/tracer/processors/__init__.py +181 -0
  48. judgeval/tracer/utils.py +20 -0
  49. judgeval/trainer/__init__.py +5 -0
  50. judgeval/{common/trainer → trainer}/config.py +12 -9
  51. judgeval/{common/trainer → trainer}/console.py +2 -9
  52. judgeval/{common/trainer → trainer}/trainable_model.py +12 -7
  53. judgeval/{common/trainer → trainer}/trainer.py +119 -17
  54. judgeval/utils/async_utils.py +2 -3
  55. judgeval/utils/decorators.py +24 -0
  56. judgeval/utils/file_utils.py +37 -4
  57. judgeval/utils/guards.py +32 -0
  58. judgeval/utils/meta.py +14 -0
  59. judgeval/{common/api/json_encoder.py → utils/serialize.py} +7 -1
  60. judgeval/utils/testing.py +88 -0
  61. judgeval/utils/url.py +10 -0
  62. judgeval/{version_check.py → utils/version_check.py} +3 -3
  63. judgeval/version.py +5 -0
  64. judgeval/warnings.py +4 -0
  65. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/METADATA +12 -14
  66. judgeval-0.9.0.dist-info/RECORD +80 -0
  67. judgeval/clients.py +0 -35
  68. judgeval/common/__init__.py +0 -13
  69. judgeval/common/api/__init__.py +0 -3
  70. judgeval/common/api/api.py +0 -375
  71. judgeval/common/api/constants.py +0 -186
  72. judgeval/common/exceptions.py +0 -27
  73. judgeval/common/storage/__init__.py +0 -6
  74. judgeval/common/storage/s3_storage.py +0 -97
  75. judgeval/common/tracer/__init__.py +0 -31
  76. judgeval/common/tracer/constants.py +0 -22
  77. judgeval/common/tracer/core.py +0 -2427
  78. judgeval/common/tracer/otel_exporter.py +0 -108
  79. judgeval/common/tracer/otel_span_processor.py +0 -188
  80. judgeval/common/tracer/span_processor.py +0 -37
  81. judgeval/common/tracer/span_transformer.py +0 -207
  82. judgeval/common/tracer/trace_manager.py +0 -101
  83. judgeval/common/trainer/__init__.py +0 -5
  84. judgeval/common/utils.py +0 -948
  85. judgeval/integrations/langgraph.py +0 -844
  86. judgeval/judges/mixture_of_judges.py +0 -287
  87. judgeval/judgment_client.py +0 -267
  88. judgeval/rules.py +0 -521
  89. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  90. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  91. judgeval/utils/alerts.py +0 -93
  92. judgeval/utils/requests.py +0 -50
  93. judgeval-0.7.1.dist-info/RECORD +0 -82
  94. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/WHEEL +0 -0
  95. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/entry_points.txt +0 -0
  96. {judgeval-0.7.1.dist-info → judgeval-0.9.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -7,8 +7,9 @@ from typing import List, Literal, Optional
7
7
 
8
8
  from judgeval.data import Example, Trace
9
9
  from judgeval.utils.file_utils import get_examples_from_yaml, get_examples_from_json
10
- from judgeval.common.api.api import JudgmentApiClient
11
- from judgeval.common.logger import judgeval_logger
10
+ from judgeval.api import JudgmentSyncClient
11
+ from judgeval.logger import judgeval_logger
12
+ from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
12
13
 
13
14
 
14
15
  @dataclass
@@ -17,8 +18,8 @@ class Dataset:
17
18
  traces: List[Trace]
18
19
  name: str
19
20
  project_name: str
20
- judgment_api_key: str = os.getenv("JUDGMENT_API_KEY") or ""
21
- organization_id: str = os.getenv("JUDGMENT_ORG_ID") or ""
21
+ judgment_api_key: str = JUDGMENT_API_KEY or ""
22
+ organization_id: str = JUDGMENT_ORG_ID or ""
22
23
 
23
24
  @classmethod
24
25
  def get(
@@ -26,10 +27,14 @@ class Dataset:
26
27
  name: str,
27
28
  project_name: str,
28
29
  ):
29
- client = JudgmentApiClient(cls.judgment_api_key, cls.organization_id)
30
- dataset = client.pull_dataset(name, project_name)
30
+ client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
31
+ dataset = client.datasets_pull_for_judgeval(
32
+ {
33
+ "dataset_alias": name,
34
+ "project_name": project_name,
35
+ },
36
+ )
31
37
  if not dataset:
32
- judgeval_logger.error(f"Dataset {name} not found in project {project_name}")
33
38
  raise ValueError(f"Dataset {name} not found in project {project_name}")
34
39
  examples = dataset.get("examples", [])
35
40
  for e in examples:
@@ -61,14 +66,17 @@ class Dataset:
61
66
  if not traces:
62
67
  traces = []
63
68
 
64
- client = JudgmentApiClient(cls.judgment_api_key, cls.organization_id)
65
- client.push_dataset(
66
- name,
67
- project_name,
68
- examples=[e.model_dump() for e in examples],
69
- traces=[t.model_dump() for t in traces],
70
- overwrite=overwrite,
69
+ client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
70
+ client.datasets_push(
71
+ {
72
+ "dataset_alias": name,
73
+ "project_name": project_name,
74
+ "examples": [e.model_dump() for e in examples], # type: ignore
75
+ "traces": [t.model_dump() for t in traces], # type: ignore
76
+ "overwrite": overwrite,
77
+ }
71
78
  )
79
+
72
80
  judgeval_logger.info(f"Succesfull created dataset {name}!")
73
81
  return cls(
74
82
  name=name,
@@ -115,19 +123,30 @@ class Dataset:
115
123
  self.add_examples(examples)
116
124
 
117
125
  def add_examples(self, examples: List[Example]) -> None:
118
- client = JudgmentApiClient(self.judgment_api_key, self.organization_id)
119
- client.append_examples(
120
- dataset_alias=self.name,
121
- project_name=self.project_name,
122
- examples=[e.model_dump() for e in examples],
126
+ client = JudgmentSyncClient(self.judgment_api_key, self.organization_id)
127
+ client.datasets_insert_examples(
128
+ {
129
+ "dataset_alias": self.name,
130
+ "project_name": self.project_name,
131
+ "examples": [
132
+ {
133
+ "name": e.name,
134
+ "created_at": e.created_at,
135
+ "example_id": e.example_id,
136
+ }
137
+ for e in examples
138
+ ],
139
+ }
123
140
  )
124
141
 
125
142
  def add_traces(self, traces: List[Trace]) -> None:
126
- client = JudgmentApiClient(self.judgment_api_key, self.organization_id)
127
- client.append_traces(
128
- dataset_alias=self.name,
129
- project_name=self.project_name,
130
- traces=[t.model_dump() for t in traces],
143
+ client = JudgmentSyncClient(self.judgment_api_key, self.organization_id)
144
+ client.traces_add_to_dataset(
145
+ {
146
+ "dataset_alias": self.name,
147
+ "project_name": self.project_name,
148
+ "traces": [t.model_dump() for t in traces], # type: ignore
149
+ }
131
150
  )
132
151
 
133
152
  def save_as(
@@ -174,10 +193,6 @@ class Dataset:
174
193
  f"Invalid file type: {file_type}. Please choose from {ACCEPTABLE_FILE_TYPES}"
175
194
  )
176
195
 
177
- def delete(self):
178
- client = JudgmentApiClient(self.judgment_api_key, self.organization_id)
179
- client.delete_dataset(self.name, self.project_name)
180
-
181
196
  def __iter__(self):
182
197
  return iter(self.examples)
183
198
 
judgeval/env.py ADDED
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ import os
7
+ from typing import overload
8
+
9
+
10
+ @overload
11
+ def optional_env_var(var_name: str) -> str | None: ...
12
+
13
+
14
+ @overload
15
+ def optional_env_var(var_name: str, default: str) -> str: ...
16
+
17
+
18
+ def optional_env_var(var_name: str, default: str | None = None) -> str | None:
19
+ return os.getenv(var_name, default)
20
+
21
+
22
+ JUDGMENT_API_KEY = optional_env_var("JUDGMENT_API_KEY")
23
+ JUDGMENT_ORG_ID = optional_env_var("JUDGMENT_ORG_ID")
24
+ JUDGMENT_API_URL = optional_env_var("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
25
+
26
+ JUDGMENT_DEFAULT_GPT_MODEL = optional_env_var("JUDGMENT_DEFAULT_GPT_MODEL", "gpt-4.1")
27
+ JUDGMENT_DEFAULT_TOGETHER_MODEL = optional_env_var(
28
+ "JUDGMENT_DEFAULT_TOGETHER_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct-Lite"
29
+ )
30
+ JUDGMENT_MAX_CONCURRENT_EVALUATIONS = int(
31
+ optional_env_var("JUDGMENT_MAX_CONCURRENT_EVALUATIONS", "10")
32
+ )
33
+
34
+ JUDGMENT_S3_ACCESS_KEY_ID = optional_env_var("JUDGMENT_S3_ACCESS_KEY_ID")
35
+ JUDGMENT_S3_SECRET_ACCESS_KEY = optional_env_var("JUDGMENT_S3_SECRET_ACCESS_KEY")
36
+ JUDGMENT_S3_REGION_NAME = optional_env_var("JUDGMENT_S3_REGION_NAME")
37
+ JUDGMENT_S3_BUCKET_NAME = optional_env_var("JUDGMENT_S3_BUCKET_NAME")
38
+ JUDGMENT_S3_PREFIX = optional_env_var("JUDGMENT_S3_PREFIX", "spans/")
39
+ JUDGMENT_S3_ENDPOINT_URL = optional_env_var("JUDGMENT_S3_ENDPOINT_URL")
40
+ JUDGMENT_S3_SIGNATURE_VERSION = optional_env_var("JUDGMENT_S3_SIGNATURE_VERSION", "s3")
41
+ JUDGMENT_S3_ADDRESSING_STYLE = optional_env_var("JUDGMENT_S3_ADDRESSING_STYLE", "auto")
42
+
43
+
44
+ JUDGMENT_NO_COLOR = optional_env_var("JUDGMENT_NO_COLOR")
45
+
46
+
47
+ TOGETHERAI_API_KEY = optional_env_var("TOGETHERAI_API_KEY")
48
+ TOGETHER_API_KEY = optional_env_var("TOGETHER_API_KEY")
49
+
50
+ __all__ = (
51
+ "JUDGMENT_API_KEY",
52
+ "JUDGMENT_ORG_ID",
53
+ "JUDGMENT_API_URL",
54
+ "JUDGMENT_DEFAULT_GPT_MODEL",
55
+ "JUDGMENT_DEFAULT_TOGETHER_MODEL",
56
+ "JUDGMENT_MAX_CONCURRENT_EVALUATIONS",
57
+ "JUDGMENT_S3_ACCESS_KEY_ID",
58
+ "JUDGMENT_S3_SECRET_ACCESS_KEY",
59
+ "JUDGMENT_S3_REGION_NAME",
60
+ "JUDGMENT_S3_BUCKET_NAME",
61
+ "JUDGMENT_S3_PREFIX",
62
+ "JUDGMENT_S3_ENDPOINT_URL",
63
+ "JUDGMENT_S3_ADDRESSING_STYLE",
64
+ "JUDGMENT_NO_COLOR",
65
+ "TOGETHERAI_API_KEY",
66
+ "TOGETHER_API_KEY",
67
+ )
@@ -6,19 +6,18 @@ import time
6
6
  import orjson
7
7
  import sys
8
8
  import threading
9
- from typing import List, Dict, Union, Tuple, Any, TYPE_CHECKING
9
+ from typing import List, Dict, Union, Tuple, TYPE_CHECKING
10
10
  from rich import print as rprint
11
11
 
12
12
  from judgeval.data import ScorerData, ScoringResult, Example
13
13
  from judgeval.scorers import BaseScorer, APIScorerConfig
14
14
  from judgeval.scorers.score import a_execute_scoring
15
- from judgeval.common.api import JudgmentApiClient
16
- from judgeval.constants import (
17
- MAX_CONCURRENT_EVALUATIONS,
15
+ from judgeval.api import JudgmentSyncClient
16
+ from judgeval.env import (
17
+ JUDGMENT_MAX_CONCURRENT_EVALUATIONS,
18
18
  )
19
- from judgeval.common.exceptions import JudgmentAPIError
20
- from judgeval.common.api.api import JudgmentAPIException
21
- from judgeval.common.logger import judgeval_logger
19
+ from judgeval.exceptions import JudgmentAPIError, JudgmentRuntimeError
20
+ from judgeval.logger import judgeval_logger
22
21
 
23
22
 
24
23
  if TYPE_CHECKING:
@@ -48,72 +47,6 @@ def safe_run_async(coro):
48
47
  return asyncio.run(coro)
49
48
 
50
49
 
51
- def send_to_rabbitmq(evaluation_run: EvaluationRun) -> Dict[str, Any]:
52
- """
53
- Sends an evaluation run to the RabbitMQ evaluation queue.
54
- """
55
- if not evaluation_run.judgment_api_key or not evaluation_run.organization_id:
56
- raise ValueError("API key and organization ID are required")
57
- if not evaluation_run.eval_name or not evaluation_run.project_name:
58
- raise ValueError("Eval name and project name are required")
59
- api_client = JudgmentApiClient(
60
- evaluation_run.judgment_api_key, evaluation_run.organization_id
61
- )
62
- return api_client.add_to_evaluation_queue(
63
- evaluation_run.eval_name, evaluation_run.project_name
64
- )
65
-
66
-
67
- def execute_api_eval(evaluation_run: EvaluationRun) -> Dict:
68
- """
69
- Executes an evaluation of a list of `Example`s using one or more `JudgmentScorer`s via the Judgment API.
70
-
71
- Args:
72
- evaluation_run (EvaluationRun): The evaluation run object containing the examples, scorers, and metadata
73
-
74
- Returns:
75
- List[Dict]: The results of the evaluation. Each result is a dictionary containing the fields of a `ScoringResult`
76
- object.
77
- """
78
-
79
- try:
80
- # submit API request to execute evals
81
- if not evaluation_run.judgment_api_key or not evaluation_run.organization_id:
82
- raise ValueError("API key and organization ID are required")
83
- api_client = JudgmentApiClient(
84
- evaluation_run.judgment_api_key, evaluation_run.organization_id
85
- )
86
- return api_client.run_evaluation(evaluation_run.model_dump())
87
- except Exception as e:
88
- judgeval_logger.error(f"Error: {e}")
89
-
90
- details = "No details provided"
91
- if isinstance(e, JudgmentAPIException):
92
- details = e.response_json.get("detail", "No details provided")
93
-
94
- raise JudgmentAPIError(
95
- "An error occurred while executing the Judgment API request: " + details
96
- )
97
-
98
-
99
- def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResult]:
100
- """
101
- Checks if any `ScoringResult` objects are missing `scorers_data`.
102
-
103
- If any are missing, logs an error and returns the results.
104
- """
105
- for i, result in enumerate(results):
106
- if not result.scorers_data:
107
- judgeval_logger.error(
108
- f"Scorer data is missing for example {i}. "
109
- "This is usually caused when the example does not contain "
110
- "the fields required by the scorer. "
111
- "Check that your example contains the fields required by the scorers. "
112
- "TODO add docs link here for reference."
113
- )
114
- return results
115
-
116
-
117
50
  def log_evaluation_results(
118
51
  scoring_results: List[ScoringResult],
119
52
  run: EvaluationRun,
@@ -135,17 +68,19 @@ def log_evaluation_results(
135
68
  if not judgment_api_key or not run.organization_id:
136
69
  raise ValueError("API key and organization ID are required")
137
70
 
138
- api_client = JudgmentApiClient(judgment_api_key, run.organization_id)
139
- response = api_client.log_evaluation_results(
140
- scoring_results,
141
- run.model_dump(warnings=False),
71
+ api_client = JudgmentSyncClient(judgment_api_key, run.organization_id)
72
+ response = api_client.log_eval_results(
73
+ {
74
+ "results": scoring_results, # type: ignore
75
+ "run": run.model_dump(warnings=False), # type: ignore
76
+ }
142
77
  )
143
78
  url = response.get("ui_results_url")
144
79
  return url
145
80
 
146
81
  except Exception as e:
147
82
  judgeval_logger.error(f"Failed to save evaluation results to DB: {str(e)}")
148
- raise JudgmentAPIError(
83
+ raise JudgmentRuntimeError(
149
84
  f"Request failed while saving evaluation results to DB: {str(e)}"
150
85
  )
151
86
 
@@ -209,7 +144,7 @@ def _poll_evaluation_until_complete(
209
144
  """
210
145
  poll_count = 0
211
146
  exception_count = 0
212
- api_client = JudgmentApiClient(judgment_api_key, organization_id)
147
+ api_client = JudgmentSyncClient(judgment_api_key, organization_id)
213
148
  while poll_count < max_poll_count:
214
149
  poll_count += 1
215
150
  try:
@@ -222,8 +157,11 @@ def _poll_evaluation_until_complete(
222
157
  time.sleep(poll_interval_seconds)
223
158
  continue
224
159
 
225
- results_response = api_client.fetch_evaluation_results(
226
- experiment_run_id, project_name
160
+ results_response = api_client.fetch_experiment_run(
161
+ {
162
+ "experiment_run_id": experiment_run_id,
163
+ "project_name": project_name,
164
+ }
227
165
  )
228
166
  url = results_response.get("ui_results_url")
229
167
 
@@ -264,13 +202,13 @@ def _poll_evaluation_until_complete(
264
202
 
265
203
  judgeval_logger.error(f"Error checking evaluation status: {str(e)}")
266
204
  if exception_count > max_failures:
267
- raise JudgmentAPIError(
205
+ raise JudgmentRuntimeError(
268
206
  f"Error checking evaluation status after {poll_count} attempts: {str(e)}"
269
207
  )
270
208
 
271
209
  time.sleep(poll_interval_seconds)
272
210
 
273
- raise JudgmentAPIError(
211
+ raise JudgmentRuntimeError(
274
212
  f"Error checking evaluation status after {poll_count} attempts"
275
213
  )
276
214
 
@@ -286,15 +224,12 @@ def progress_logger(stop_event, msg="Working...", interval=5):
286
224
  def run_eval(
287
225
  evaluation_run: EvaluationRun,
288
226
  judgment_api_key: str,
289
- show_url: bool = True,
290
227
  ) -> List[ScoringResult]:
291
228
  """
292
229
  Executes an evaluation of `Example`s using one or more `Scorer`s
293
230
 
294
231
  Args:
295
232
  evaluation_run (EvaluationRun): Stores example and evaluation together for running
296
- judgment_api_key (str): API key for authentication
297
- show_url (bool): Whether to display the evaluation results URL. Defaults to True.
298
233
 
299
234
  Returns:
300
235
  List[ScoringResult]: A list of ScoringResult objects
@@ -339,11 +274,11 @@ def run_eval(
339
274
  )
340
275
  t.start()
341
276
  try:
342
- api_client = JudgmentApiClient(
277
+ api_client = JudgmentSyncClient(
343
278
  judgment_api_key, evaluation_run.organization_id
344
279
  )
345
- response = api_client.add_to_evaluation_queue(
346
- evaluation_run.model_dump(warnings=False)
280
+ response = api_client.add_to_run_eval_queue_examples(
281
+ evaluation_run.model_dump(warnings=False) # type: ignore
347
282
  )
348
283
 
349
284
  if not response.get("success", False):
@@ -351,7 +286,7 @@ def run_eval(
351
286
  judgeval_logger.error(
352
287
  f"Error adding evaluation to queue: {error_message}"
353
288
  )
354
- raise JudgmentAPIError(error_message)
289
+ raise JudgmentRuntimeError(error_message)
355
290
 
356
291
  num_scorers = (
357
292
  len(evaluation_run.judgment_scorers)
@@ -375,7 +310,7 @@ def run_eval(
375
310
  evaluation_run.custom_scorers,
376
311
  model=evaluation_run.model,
377
312
  throttle_value=0,
378
- max_concurrent=MAX_CONCURRENT_EVALUATIONS,
313
+ max_concurrent=JUDGMENT_MAX_CONCURRENT_EVALUATIONS,
379
314
  )
380
315
  )
381
316
 
@@ -383,10 +318,9 @@ def run_eval(
383
318
  scoring_result.model_dump(warnings=False) for scoring_result in results
384
319
  ]
385
320
  url = log_evaluation_results(send_results, evaluation_run, judgment_api_key)
386
- if show_url:
387
- rprint(
388
- f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
389
- )
321
+ rprint(
322
+ f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
323
+ )
390
324
  return results
391
325
 
392
326
 
judgeval/exceptions.py ADDED
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from httpx import HTTPError, Response
4
+
5
+
6
+ class JudgmentAPIError(HTTPError):
7
+ status_code: int
8
+ detail: str
9
+ response: Response
10
+
11
+ def __init__(self, status_code: int, detail: str, response: Response):
12
+ self.status_code = status_code
13
+ self.detail = detail
14
+ self.response = response
15
+ super().__init__(f"{status_code}: {detail}")
16
+
17
+
18
+ class JudgmentTestError(Exception): ...
19
+
20
+
21
+ class JudgmentRuntimeError(RuntimeError): ...
22
+
23
+
24
+ class InvalidJudgeModelError(Exception): ...
25
+
26
+
27
+ __all__ = ("JudgmentAPIError", "JudgmentRuntimeError", "InvalidJudgeModelError")