judgeval 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. judgeval/__init__.py +4 -4
  2. judgeval/api/__init__.py +22 -8
  3. judgeval/api/api_types.py +30 -17
  4. judgeval/data/evaluation_run.py +10 -11
  5. judgeval/data/judgment_types.py +25 -14
  6. judgeval/data/result.py +1 -0
  7. judgeval/data/scorer_data.py +1 -26
  8. judgeval/dataset/__init__.py +32 -8
  9. judgeval/env.py +11 -2
  10. judgeval/evaluation/__init__.py +20 -63
  11. judgeval/integrations/langgraph/__init__.py +2 -1
  12. judgeval/scorers/__init__.py +0 -4
  13. judgeval/scorers/agent_scorer.py +15 -15
  14. judgeval/scorers/api_scorer.py +0 -8
  15. judgeval/scorers/base_scorer.py +2 -2
  16. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -2
  17. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  18. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -2
  19. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -2
  20. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +3 -5
  21. judgeval/scorers/score.py +1 -1
  22. judgeval/tracer/__init__.py +7 -10
  23. judgeval/tracer/local_eval_queue.py +11 -7
  24. judgeval/tracer/utils.py +2 -2
  25. judgeval/trainer/config.py +1 -1
  26. judgeval/trainer/trainable_model.py +1 -1
  27. judgeval/trainer/trainer.py +8 -6
  28. judgeval/utils/async_utils.py +7 -3
  29. judgeval/utils/testing.py +0 -4
  30. {judgeval-0.10.0.dist-info → judgeval-0.11.0.dist-info}/METADATA +1 -1
  31. {judgeval-0.10.0.dist-info → judgeval-0.11.0.dist-info}/RECORD +34 -35
  32. judgeval/data/tool.py +0 -5
  33. {judgeval-0.10.0.dist-info → judgeval-0.11.0.dist-info}/WHEEL +0 -0
  34. {judgeval-0.10.0.dist-info → judgeval-0.11.0.dist-info}/entry_points.txt +0 -0
  35. {judgeval-0.10.0.dist-info → judgeval-0.11.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/__init__.py CHANGED
@@ -6,7 +6,8 @@ from judgeval.data.evaluation_run import ExampleEvaluationRun
6
6
 
7
7
 
8
8
  from typing import List, Optional, Union
9
- from judgeval.scorers import BaseScorer, ExampleAPIScorerConfig
9
+ from judgeval.scorers import APIScorerConfig
10
+ from judgeval.scorers.example_scorer import ExampleScorer
10
11
  from judgeval.data.example import Example
11
12
  from judgeval.logger import judgeval_logger
12
13
  from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_DEFAULT_GPT_MODEL, JUDGMENT_ORG_ID
@@ -38,7 +39,7 @@ class JudgmentClient(metaclass=SingletonMeta):
38
39
  def run_evaluation(
39
40
  self,
40
41
  examples: List[Example],
41
- scorers: List[Union[ExampleAPIScorerConfig, BaseScorer]],
42
+ scorers: List[Union[APIScorerConfig, ExampleScorer]],
42
43
  project_name: str = "default_project",
43
44
  eval_run_name: str = "default_eval_run",
44
45
  model: str = JUDGMENT_DEFAULT_GPT_MODEL,
@@ -51,10 +52,9 @@ class JudgmentClient(metaclass=SingletonMeta):
51
52
  examples=examples,
52
53
  scorers=scorers,
53
54
  model=model,
54
- organization_id=self.organization_id,
55
55
  )
56
56
 
57
- results = run_eval(eval, self.api_key)
57
+ results = run_eval(eval)
58
58
  if assert_test:
59
59
  assert_test_results(results)
60
60
 
judgeval/api/__init__.py CHANGED
@@ -137,6 +137,13 @@ class JudgmentSyncClient:
137
137
  payload,
138
138
  )
139
139
 
140
+ def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
141
+ return self._request(
142
+ "POST",
143
+ url_for("/datasets/pull_all_for_judgeval/"),
144
+ payload,
145
+ )
146
+
140
147
  def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
141
148
  return self._request(
142
149
  "POST",
@@ -174,12 +181,12 @@ class JudgmentSyncClient:
174
181
  payload,
175
182
  )
176
183
 
177
- def fetch_scorer(
178
- self, payload: FetchPromptScorerRequest
179
- ) -> FetchPromptScorerResponse:
184
+ def fetch_scorers(
185
+ self, payload: FetchPromptScorersRequest
186
+ ) -> FetchPromptScorersResponse:
180
187
  return self._request(
181
188
  "POST",
182
- url_for("/fetch_scorer/"),
189
+ url_for("/fetch_scorers/"),
183
190
  payload,
184
191
  )
185
192
 
@@ -339,6 +346,13 @@ class JudgmentAsyncClient:
339
346
  payload,
340
347
  )
341
348
 
349
+ async def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
350
+ return await self._request(
351
+ "POST",
352
+ url_for("/datasets/pull_all_for_judgeval/"),
353
+ payload,
354
+ )
355
+
342
356
  async def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
343
357
  return await self._request(
344
358
  "POST",
@@ -378,12 +392,12 @@ class JudgmentAsyncClient:
378
392
  payload,
379
393
  )
380
394
 
381
- async def fetch_scorer(
382
- self, payload: FetchPromptScorerRequest
383
- ) -> FetchPromptScorerResponse:
395
+ async def fetch_scorers(
396
+ self, payload: FetchPromptScorersRequest
397
+ ) -> FetchPromptScorersResponse:
384
398
  return await self._request(
385
399
  "POST",
386
- url_for("/fetch_scorer/"),
400
+ url_for("/fetch_scorers/"),
387
401
  payload,
388
402
  )
389
403
 
judgeval/api/api_types.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-09-10T17:42:12+00:00
3
+ # timestamp: 2025-09-12T16:54:35+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -20,6 +20,10 @@ class DatasetFetch(TypedDict):
20
20
  project_name: str
21
21
 
22
22
 
23
+ class DatasetsFetch(TypedDict):
24
+ project_name: str
25
+
26
+
23
27
  class ProjectAdd(TypedDict):
24
28
  project_name: str
25
29
 
@@ -57,8 +61,8 @@ class SavePromptScorerResponse(TypedDict):
57
61
  name: str
58
62
 
59
63
 
60
- class FetchPromptScorerRequest(TypedDict):
61
- name: str
64
+ class FetchPromptScorersRequest(TypedDict):
65
+ names: NotRequired[Optional[List[str]]]
62
66
 
63
67
 
64
68
  class CustomScorerUploadPayload(TypedDict):
@@ -151,7 +155,7 @@ class ScorerData(TypedDict):
151
155
  score: NotRequired[Optional[float]]
152
156
  reason: NotRequired[Optional[str]]
153
157
  strict_mode: NotRequired[Optional[bool]]
154
- evaluation_model: NotRequired[str]
158
+ evaluation_model: NotRequired[Optional[str]]
155
159
  error: NotRequired[Optional[str]]
156
160
  additional_metadata: NotRequired[Optional[Dict[str, Any]]]
157
161
 
@@ -186,13 +190,13 @@ class OtelTraceSpan(TypedDict):
186
190
 
187
191
 
188
192
  class ExampleEvaluationRun(TypedDict):
189
- id: NotRequired[Optional[str]]
190
- project_name: NotRequired[Optional[str]]
191
- eval_name: NotRequired[Optional[str]]
193
+ id: NotRequired[str]
194
+ project_name: str
195
+ eval_name: str
192
196
  custom_scorers: NotRequired[List[BaseScorer]]
193
197
  judgment_scorers: NotRequired[List[ScorerConfig]]
194
198
  model: str
195
- created_at: NotRequired[Optional[str]]
199
+ created_at: NotRequired[str]
196
200
  examples: List[Example]
197
201
  trace_span_id: NotRequired[Optional[str]]
198
202
  trace_id: NotRequired[Optional[str]]
@@ -203,13 +207,13 @@ class HTTPValidationError(TypedDict):
203
207
 
204
208
 
205
209
  class TraceEvaluationRun(TypedDict):
206
- id: NotRequired[Optional[str]]
207
- project_name: NotRequired[Optional[str]]
208
- eval_name: NotRequired[Optional[str]]
210
+ id: NotRequired[str]
211
+ project_name: str
212
+ eval_name: str
209
213
  custom_scorers: NotRequired[List[BaseScorer]]
210
214
  judgment_scorers: NotRequired[List[ScorerConfig]]
211
215
  model: str
212
- created_at: NotRequired[Optional[str]]
216
+ created_at: NotRequired[str]
213
217
  trace_and_span_ids: List[TraceAndSpanId]
214
218
  is_offline: NotRequired[bool]
215
219
 
@@ -226,21 +230,30 @@ class DatasetReturn(TypedDict):
226
230
  examples: NotRequired[Optional[List[Example]]]
227
231
 
228
232
 
233
+ class DatasetInfo(TypedDict):
234
+ dataset_id: str
235
+ name: str
236
+ created_at: str
237
+ dataset_kind: DatasetKind
238
+ entries: int
239
+ creator: str
240
+
241
+
229
242
  class DatasetCreate(TypedDict):
230
243
  name: str
231
244
  dataset_kind: DatasetKind
232
245
  project_name: str
233
- examples: NotRequired[Optional[List[Example]]]
234
- overwrite: NotRequired[Optional[bool]]
246
+ examples: List[Example]
247
+ overwrite: bool
235
248
 
236
249
 
237
- class FetchPromptScorerResponse(TypedDict):
238
- scorer: PromptScorer
250
+ class FetchPromptScorersResponse(TypedDict):
251
+ scorers: List[PromptScorer]
239
252
 
240
253
 
241
254
  class ScoringResult(TypedDict):
242
255
  success: bool
243
- scorers_data: Optional[List[ScorerData]]
256
+ scorers_data: List[ScorerData]
244
257
  name: NotRequired[Optional[str]]
245
258
  data_object: NotRequired[Optional[Union[OtelTraceSpan, Example]]]
246
259
  trace_id: NotRequired[Optional[str]]
@@ -1,11 +1,11 @@
1
1
  from typing import List, Optional, Union, Tuple
2
- from litellm.files.main import BaseModel
3
- from pydantic import field_validator, model_validator, Field
2
+ from pydantic import field_validator, model_validator, Field, BaseModel
4
3
  from datetime import datetime, timezone
5
4
  import uuid
6
5
 
7
6
  from judgeval.data import Example
8
- from judgeval.scorers import BaseScorer, APIScorerConfig
7
+ from judgeval.scorers import APIScorerConfig
8
+ from judgeval.scorers.example_scorer import ExampleScorer
9
9
  from judgeval.constants import ACCEPTABLE_MODELS
10
10
  from judgeval.data.judgment_types import (
11
11
  ExampleEvaluationRun as ExampleEvaluationRunJudgmentType,
@@ -14,19 +14,18 @@ from judgeval.data.judgment_types import (
14
14
 
15
15
 
16
16
  class EvaluationRun(BaseModel):
17
- id: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4()))
18
- created_at: Optional[str] = Field(
17
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
18
+ created_at: str = Field(
19
19
  default_factory=lambda: datetime.now(timezone.utc).isoformat()
20
20
  )
21
- organization_id: Optional[str] = None
22
- custom_scorers: Optional[List[BaseScorer]] = None
23
- judgment_scorers: Optional[List[APIScorerConfig]] = None
24
- scorers: Optional[List[Union[BaseScorer, APIScorerConfig]]] = None
21
+ custom_scorers: List[ExampleScorer] = Field(default_factory=list)
22
+ judgment_scorers: List[APIScorerConfig] = Field(default_factory=list)
23
+ scorers: List[Union[ExampleScorer, APIScorerConfig]] = Field(default_factory=list)
25
24
  model: str
26
25
 
27
26
  def __init__(
28
27
  self,
29
- scorers: Optional[List[Union[BaseScorer, APIScorerConfig]]] = None,
28
+ scorers: Optional[List[Union[ExampleScorer, APIScorerConfig]]] = None,
30
29
  **kwargs,
31
30
  ):
32
31
  """
@@ -38,7 +37,7 @@ class EvaluationRun(BaseModel):
38
37
  """
39
38
  if scorers is not None:
40
39
  # Automatically sort scorers into appropriate fields
41
- custom_scorers = [s for s in scorers if isinstance(s, BaseScorer)]
40
+ custom_scorers = [s for s in scorers if isinstance(s, ExampleScorer)]
42
41
  judgment_scorers = [s for s in scorers if isinstance(s, APIScorerConfig)]
43
42
 
44
43
  # Always set both fields as lists (even if empty) to satisfy validation
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-09-10T17:42:11+00:00
3
+ # timestamp: 2025-09-12T16:54:34+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Annotated, Any, Dict, List, Optional, Union
@@ -22,6 +22,10 @@ class DatasetFetch(BaseModel):
22
22
  project_name: Annotated[str, Field(title="Project Name")]
23
23
 
24
24
 
25
+ class DatasetsFetch(BaseModel):
26
+ project_name: Annotated[str, Field(title="Project Name")]
27
+
28
+
25
29
  class ProjectAdd(BaseModel):
26
30
  project_name: Annotated[str, Field(title="Project Name")]
27
31
 
@@ -59,8 +63,8 @@ class SavePromptScorerResponse(BaseModel):
59
63
  name: Annotated[str, Field(title="Name")]
60
64
 
61
65
 
62
- class FetchPromptScorerRequest(BaseModel):
63
- name: Annotated[str, Field(title="Name")]
66
+ class FetchPromptScorersRequest(BaseModel):
67
+ names: Annotated[Optional[List[str]], Field(title="Names")] = None
64
68
 
65
69
 
66
70
  class CustomScorerUploadPayload(BaseModel):
@@ -210,8 +214,8 @@ class OtelTraceSpan(BaseModel):
210
214
 
211
215
  class ExampleEvaluationRun(BaseModel):
212
216
  id: Annotated[Optional[str], Field(title="Id")] = None
213
- project_name: Annotated[Optional[str], Field(title="Project Name")] = None
214
- eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
217
+ project_name: Annotated[str, Field(title="Project Name")]
218
+ eval_name: Annotated[str, Field(title="Eval Name")]
215
219
  custom_scorers: Annotated[
216
220
  Optional[List[BaseScorer]], Field(title="Custom Scorers")
217
221
  ] = []
@@ -231,8 +235,8 @@ class HTTPValidationError(BaseModel):
231
235
 
232
236
  class TraceEvaluationRun(BaseModel):
233
237
  id: Annotated[Optional[str], Field(title="Id")] = None
234
- project_name: Annotated[Optional[str], Field(title="Project Name")] = None
235
- eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
238
+ project_name: Annotated[str, Field(title="Project Name")]
239
+ eval_name: Annotated[str, Field(title="Eval Name")]
236
240
  custom_scorers: Annotated[
237
241
  Optional[List[BaseScorer]], Field(title="Custom Scorers")
238
242
  ] = []
@@ -259,23 +263,30 @@ class DatasetReturn(BaseModel):
259
263
  examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
260
264
 
261
265
 
266
+ class DatasetInfo(BaseModel):
267
+ dataset_id: Annotated[str, Field(title="Dataset Id")]
268
+ name: Annotated[str, Field(title="Name")]
269
+ created_at: Annotated[str, Field(title="Created At")]
270
+ dataset_kind: DatasetKind
271
+ entries: Annotated[int, Field(title="Entries")]
272
+ creator: Annotated[str, Field(title="Creator")]
273
+
274
+
262
275
  class DatasetCreate(BaseModel):
263
276
  name: Annotated[str, Field(title="Name")]
264
277
  dataset_kind: DatasetKind
265
278
  project_name: Annotated[str, Field(title="Project Name")]
266
- examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
267
- overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
279
+ examples: Annotated[List[Example], Field(title="Examples")]
280
+ overwrite: Annotated[bool, Field(title="Overwrite")]
268
281
 
269
282
 
270
- class FetchPromptScorerResponse(BaseModel):
271
- scorer: PromptScorer
283
+ class FetchPromptScorersResponse(BaseModel):
284
+ scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
272
285
 
273
286
 
274
287
  class ScoringResult(BaseModel):
275
288
  success: Annotated[bool, Field(title="Success")]
276
- scorers_data: Annotated[Optional[List[ScorerData]], Field(title="Scorers Data")] = (
277
- None
278
- )
289
+ scorers_data: Annotated[List[ScorerData], Field(title="Scorers Data")]
279
290
  name: Annotated[Optional[str], Field(title="Name")] = None
280
291
  data_object: Annotated[
281
292
  Optional[Union[OtelTraceSpan, Example]], Field(title="Data Object")
judgeval/data/result.py CHANGED
@@ -18,6 +18,7 @@ class ScoringResult(JudgmentScoringResult):
18
18
 
19
19
  # Need to override this so that it uses this repo's Example class
20
20
  data_object: Example
21
+ scorers_data: List[ScorerData]
21
22
 
22
23
  def model_dump(self, **kwargs):
23
24
  data = super().model_dump(**kwargs)
@@ -6,36 +6,11 @@ ScorerData holds the information related to a single, completed Scorer evaluatio
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- from judgeval.data.judgment_types import ScorerData as JudgmentScorerData
9
+ from judgeval.data.judgment_types import ScorerData
10
10
  from judgeval.scorers import BaseScorer
11
11
  from typing import List
12
12
 
13
13
 
14
- class ScorerData(JudgmentScorerData):
15
- """
16
- ScorerData holds the information related to a single, completed Scorer evaluation run.
17
-
18
- For example, if running the Judgment Faithfulness scorer on an example, the ScorerData
19
- object will contain whether the example passed its threshold expectation, as well as more detailed
20
- information surrounding the evaluation run such as the claims and verdicts generated by the
21
- judge model(s).
22
- """
23
-
24
- def to_dict(self) -> dict:
25
- """Convert the ScorerData instance to a JSON-serializable dictionary."""
26
- return {
27
- "name": self.name,
28
- "threshold": self.threshold,
29
- "success": self.success,
30
- "score": self.score,
31
- "reason": self.reason,
32
- "strict_mode": self.strict_mode,
33
- "evaluation_model": self.evaluation_model,
34
- "error": self.error,
35
- "additional_metadata": self.additional_metadata,
36
- }
37
-
38
-
39
14
  def create_scorer_data(scorer: BaseScorer) -> List[ScorerData]:
40
15
  """
41
16
  After a `scorer` is run, it contains information about the example that was evaluated
@@ -3,7 +3,7 @@ import orjson
3
3
  import os
4
4
  import yaml
5
5
  from dataclasses import dataclass
6
- from typing import List, Literal, Optional
6
+ from typing import List, Literal
7
7
 
8
8
  from judgeval.data import Example
9
9
  from judgeval.utils.file_utils import get_examples_from_yaml, get_examples_from_json
@@ -11,6 +11,18 @@ from judgeval.api import JudgmentSyncClient
11
11
  from judgeval.logger import judgeval_logger
12
12
  from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
13
13
 
14
+ from judgeval.api.api_types import DatasetKind
15
+
16
+
17
+ @dataclass
18
+ class DatasetInfo:
19
+ dataset_id: str
20
+ name: str
21
+ created_at: str
22
+ dataset_kind: DatasetKind
23
+ entries: int
24
+ creator: str
25
+
14
26
 
15
27
  @dataclass
16
28
  class Dataset:
@@ -36,13 +48,16 @@ class Dataset:
36
48
  if not dataset:
37
49
  raise ValueError(f"Dataset {name} not found in project {project_name}")
38
50
  examples = dataset.get("examples", [])
51
+ if examples is None:
52
+ examples = []
53
+
39
54
  for e in examples:
40
- if isinstance(e, dict) and isinstance(e.get("data"), dict):
41
- e.update(e.pop("data"))
55
+ if isinstance(e, dict) and isinstance(e.get("data", {}), dict):
56
+ e.update(e.pop("data")) # type: ignore
42
57
  e.pop(
43
58
  "example_id"
44
59
  ) # TODO: remove once scorer data migraiton is complete
45
- judgeval_logger.info(f"Succesfully retrieved dataset {name}!")
60
+ judgeval_logger.info(f"Successfully retrieved dataset {name}!")
46
61
  return cls(
47
62
  name=name,
48
63
  project_name=project_name,
@@ -54,7 +69,7 @@ class Dataset:
54
69
  cls,
55
70
  name: str,
56
71
  project_name: str,
57
- examples: Optional[List[Example]] = None,
72
+ examples: List[Example] = [],
58
73
  overwrite: bool = False,
59
74
  ):
60
75
  if not examples:
@@ -65,19 +80,28 @@ class Dataset:
65
80
  {
66
81
  "name": name,
67
82
  "project_name": project_name,
68
- "examples": [e.model_dump() for e in examples],
83
+ "examples": examples, # type: ignore
69
84
  "dataset_kind": "example",
70
85
  "overwrite": overwrite,
71
86
  }
72
87
  )
73
88
 
74
- judgeval_logger.info(f"Succesfull created dataset {name}!")
89
+ judgeval_logger.info(f"Successfully created dataset {name}!")
75
90
  return cls(
76
91
  name=name,
77
92
  project_name=project_name,
78
93
  examples=examples,
79
94
  )
80
95
 
96
+ @classmethod
97
+ def list(cls, project_name: str):
98
+ client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
99
+ datasets = client.datasets_pull_all_for_judgeval({"project_name": project_name})
100
+
101
+ judgeval_logger.info(f"Fetched all datasets for project {project_name}!")
102
+
103
+ return [DatasetInfo(**dataset_info) for dataset_info in datasets]
104
+
81
105
  def add_from_json(self, file_path: str) -> None:
82
106
  """
83
107
  Adds examples from a JSON file.
@@ -124,7 +148,7 @@ class Dataset:
124
148
  {
125
149
  "dataset_name": self.name,
126
150
  "project_name": self.project_name,
127
- "examples": [e.model_dump() for e in examples],
151
+ "examples": examples, # type: ignore
128
152
  }
129
153
  )
130
154
 
judgeval/env.py CHANGED
@@ -19,8 +19,17 @@ def optional_env_var(var_name: str, default: str | None = None) -> str | None:
19
19
  return os.getenv(var_name, default)
20
20
 
21
21
 
22
- JUDGMENT_API_KEY = optional_env_var("JUDGMENT_API_KEY")
23
- JUDGMENT_ORG_ID = optional_env_var("JUDGMENT_ORG_ID")
22
+ def required_env_var(var_name: str) -> str:
23
+ value = os.getenv(var_name)
24
+ if value is None:
25
+ raise EnvironmentError(
26
+ f"Environment variable '{var_name}' is required but not set."
27
+ )
28
+ return value
29
+
30
+
31
+ JUDGMENT_API_KEY = required_env_var("JUDGMENT_API_KEY")
32
+ JUDGMENT_ORG_ID = required_env_var("JUDGMENT_ORG_ID")
24
33
  JUDGMENT_API_URL = optional_env_var("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
25
34
 
26
35
  JUDGMENT_DEFAULT_GPT_MODEL = optional_env_var("JUDGMENT_DEFAULT_GPT_MODEL", "gpt-4.1")