judgeval 0.19.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

judgeval/__init__.py CHANGED
@@ -146,6 +146,8 @@ class JudgmentClient(metaclass=SingletonMeta):
146
146
  requirements_text = f.read()
147
147
 
148
148
  try:
149
+ if not self.api_key or not self.organization_id:
150
+ raise ValueError("Judgment API key and organization ID are required")
149
151
  client = JudgmentSyncClient(
150
152
  api_key=self.api_key,
151
153
  organization_id=self.organization_id,
@@ -168,8 +170,7 @@ class JudgmentClient(metaclass=SingletonMeta):
168
170
  judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
169
171
  return False
170
172
 
171
- except Exception as e:
172
- judgeval_logger.error(f"Error uploading custom scorer: {e}")
173
+ except Exception:
173
174
  raise
174
175
 
175
176
 
judgeval/api/api_types.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-21T01:37:42+00:00
3
+ # timestamp: 2025-10-25T22:30:20+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -24,15 +24,6 @@ class DatasetsFetch(TypedDict):
24
24
  project_name: str
25
25
 
26
26
 
27
- class DatasetsTableRow(TypedDict):
28
- dataset_id: str
29
- name: str
30
- created_at: str
31
- kind: Literal["trace", "example"]
32
- entries: int
33
- creator: str
34
-
35
-
36
27
  class ProjectAdd(TypedDict):
37
28
  project_name: str
38
29
 
@@ -67,13 +58,9 @@ class SavePromptScorerRequest(TypedDict):
67
58
  description: NotRequired[Optional[str]]
68
59
 
69
60
 
70
- class SavePromptScorerResponse(TypedDict):
71
- message: str
72
- name: str
73
-
74
-
75
61
  class FetchPromptScorersRequest(TypedDict):
76
62
  names: NotRequired[Optional[List[str]]]
63
+ is_trace: NotRequired[Optional[bool]]
77
64
 
78
65
 
79
66
  class CustomScorerUploadPayload(TypedDict):
@@ -193,6 +180,9 @@ DatasetKind = Literal["trace", "example"]
193
180
 
194
181
 
195
182
  class PromptScorer(TypedDict):
183
+ id: str
184
+ user_id: str
185
+ organization_id: str
196
186
  name: str
197
187
  prompt: str
198
188
  threshold: float
@@ -202,6 +192,7 @@ class PromptScorer(TypedDict):
202
192
  created_at: NotRequired[Optional[str]]
203
193
  updated_at: NotRequired[Optional[str]]
204
194
  is_trace: NotRequired[Optional[bool]]
195
+ is_bucket_rubric: NotRequired[Optional[bool]]
205
196
 
206
197
 
207
198
  class PromptCommitInfo(TypedDict):
@@ -292,6 +283,7 @@ class TraceEvaluationRun(TypedDict):
292
283
  created_at: NotRequired[str]
293
284
  trace_and_span_ids: List[TraceAndSpanId]
294
285
  is_offline: NotRequired[bool]
286
+ is_bucket_run: NotRequired[bool]
295
287
 
296
288
 
297
289
  class DatasetInsertExamples(TypedDict):
@@ -300,6 +292,15 @@ class DatasetInsertExamples(TypedDict):
300
292
  project_name: str
301
293
 
302
294
 
295
+ class DatasetInfo(TypedDict):
296
+ dataset_id: str
297
+ name: str
298
+ created_at: str
299
+ kind: DatasetKind
300
+ entries: int
301
+ creator: str
302
+
303
+
303
304
  class DatasetCreate(TypedDict):
304
305
  name: str
305
306
  dataset_kind: DatasetKind
@@ -308,6 +309,10 @@ class DatasetCreate(TypedDict):
308
309
  overwrite: bool
309
310
 
310
311
 
312
+ class SavePromptScorerResponse(TypedDict):
313
+ scorer_response: PromptScorer
314
+
315
+
311
316
  class FetchPromptScorersResponse(TypedDict):
312
317
  scorers: List[PromptScorer]
313
318
 
@@ -342,6 +347,7 @@ class OtelTraceListItem(TypedDict):
342
347
  llm_cost: NotRequired[Optional[float]]
343
348
  error: NotRequired[str]
344
349
  scores: NotRequired[List[OtelSpanListItemScores]]
350
+ rules_invoked: NotRequired[List[str]]
345
351
  customer_id: NotRequired[Optional[str]]
346
352
  input: NotRequired[Optional[str]]
347
353
  output: NotRequired[Optional[str]]
judgeval/cli.py CHANGED
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
6
6
  from judgeval.logger import judgeval_logger
7
7
  from judgeval import JudgmentClient
8
8
  from judgeval.version import get_version
9
+ from judgeval.exceptions import JudgmentAPIError
9
10
 
10
11
  load_dotenv()
11
12
 
@@ -56,8 +57,15 @@ def upload_scorer(
56
57
  judgeval_logger.error("Failed to upload custom scorer")
57
58
  raise typer.Exit(1)
58
59
 
60
+ judgeval_logger.info("Custom scorer uploaded successfully!")
59
61
  raise typer.Exit(0)
60
- except Exception:
62
+ except Exception as e:
63
+ if isinstance(e, JudgmentAPIError) and e.status_code == 409:
64
+ judgeval_logger.error(
65
+ "Duplicate scorer detected. Use --overwrite flag to replace the existing scorer"
66
+ )
67
+ raise typer.Exit(1)
68
+ # Re-raise other exceptions
61
69
  raise
62
70
 
63
71
 
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-21T01:37:41+00:00
3
+ # timestamp: 2025-10-25T22:30:19+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Annotated, Any, Dict, List, Optional, Union
@@ -26,20 +26,6 @@ class DatasetsFetch(BaseModel):
26
26
  project_name: Annotated[str, Field(title="Project Name")]
27
27
 
28
28
 
29
- class Kind(Enum):
30
- trace = "trace"
31
- example = "example"
32
-
33
-
34
- class DatasetsTableRow(BaseModel):
35
- dataset_id: Annotated[str, Field(title="Dataset Id")]
36
- name: Annotated[str, Field(title="Name")]
37
- created_at: Annotated[str, Field(title="Created At")]
38
- kind: Annotated[Kind, Field(title="Kind")]
39
- entries: Annotated[int, Field(title="Entries")]
40
- creator: Annotated[str, Field(title="Creator")]
41
-
42
-
43
29
  class ProjectAdd(BaseModel):
44
30
  project_name: Annotated[str, Field(title="Project Name")]
45
31
 
@@ -74,13 +60,9 @@ class SavePromptScorerRequest(BaseModel):
74
60
  description: Annotated[Optional[str], Field(title="Description")] = None
75
61
 
76
62
 
77
- class SavePromptScorerResponse(BaseModel):
78
- message: Annotated[str, Field(title="Message")]
79
- name: Annotated[str, Field(title="Name")]
80
-
81
-
82
63
  class FetchPromptScorersRequest(BaseModel):
83
64
  names: Annotated[Optional[List[str]], Field(title="Names")] = None
65
+ is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = None
84
66
 
85
67
 
86
68
  class CustomScorerUploadPayload(BaseModel):
@@ -211,6 +193,9 @@ class DatasetKind(Enum):
211
193
 
212
194
 
213
195
  class PromptScorer(BaseModel):
196
+ id: Annotated[str, Field(title="Id")]
197
+ user_id: Annotated[str, Field(title="User Id")]
198
+ organization_id: Annotated[str, Field(title="Organization Id")]
214
199
  name: Annotated[str, Field(title="Name")]
215
200
  prompt: Annotated[str, Field(title="Prompt")]
216
201
  threshold: Annotated[float, Field(title="Threshold")]
@@ -220,6 +205,7 @@ class PromptScorer(BaseModel):
220
205
  created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
221
206
  updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
222
207
  is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
208
+ is_bucket_rubric: Annotated[Optional[bool], Field(title="Is Bucket Rubric")] = None
223
209
 
224
210
 
225
211
  class PromptCommitInfo(BaseModel):
@@ -326,6 +312,7 @@ class TraceEvaluationRun(BaseModel):
326
312
  List[TraceAndSpanId], Field(title="Trace And Span Ids")
327
313
  ]
328
314
  is_offline: Annotated[Optional[bool], Field(title="Is Offline")] = False
315
+ is_bucket_run: Annotated[Optional[bool], Field(title="Is Bucket Run")] = False
329
316
 
330
317
 
331
318
  class DatasetInsertExamples(BaseModel):
@@ -334,6 +321,15 @@ class DatasetInsertExamples(BaseModel):
334
321
  project_name: Annotated[str, Field(title="Project Name")]
335
322
 
336
323
 
324
+ class DatasetInfo(BaseModel):
325
+ dataset_id: Annotated[str, Field(title="Dataset Id")]
326
+ name: Annotated[str, Field(title="Name")]
327
+ created_at: Annotated[str, Field(title="Created At")]
328
+ kind: DatasetKind
329
+ entries: Annotated[int, Field(title="Entries")]
330
+ creator: Annotated[str, Field(title="Creator")]
331
+
332
+
337
333
  class DatasetCreate(BaseModel):
338
334
  name: Annotated[str, Field(title="Name")]
339
335
  dataset_kind: DatasetKind
@@ -342,6 +338,10 @@ class DatasetCreate(BaseModel):
342
338
  overwrite: Annotated[bool, Field(title="Overwrite")]
343
339
 
344
340
 
341
+ class SavePromptScorerResponse(BaseModel):
342
+ scorer_response: PromptScorer
343
+
344
+
345
345
  class FetchPromptScorersResponse(BaseModel):
346
346
  scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
347
347
 
@@ -380,6 +380,7 @@ class OtelTraceListItem(BaseModel):
380
380
  scores: Annotated[
381
381
  Optional[List[OtelSpanListItemScores]], Field(title="Scores")
382
382
  ] = []
383
+ rules_invoked: Annotated[Optional[List[str]], Field(title="Rules Invoked")] = []
383
384
  customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
384
385
  input: Annotated[Optional[str], Field(title="Input")] = None
385
386
  output: Annotated[Optional[str], Field(title="Output")] = None
@@ -32,8 +32,8 @@ class Dataset:
32
32
  dataset_kind: DatasetKind = DatasetKind.example
33
33
  examples: Optional[List[Example]] = None
34
34
  traces: Optional[List[Trace]] = None
35
- judgment_api_key: str = JUDGMENT_API_KEY or ""
36
- organization_id: str = JUDGMENT_ORG_ID or ""
35
+ judgment_api_key: str | None = JUDGMENT_API_KEY
36
+ organization_id: str | None = JUDGMENT_ORG_ID
37
37
 
38
38
  @classmethod
39
39
  def get(
@@ -41,6 +41,8 @@ class Dataset:
41
41
  name: str,
42
42
  project_name: str,
43
43
  ):
44
+ if not cls.judgment_api_key or not cls.organization_id:
45
+ raise ValueError("Judgment API key and organization ID are required")
44
46
  client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
45
47
  dataset = client.datasets_pull_for_judgeval(
46
48
  {
@@ -102,6 +104,8 @@ class Dataset:
102
104
  examples: List[Example] = [],
103
105
  overwrite: bool = False,
104
106
  ):
107
+ if not cls.judgment_api_key or not cls.organization_id:
108
+ raise ValueError("Judgment API key and organization ID are required")
105
109
  if not examples:
106
110
  examples = []
107
111
 
@@ -125,6 +129,8 @@ class Dataset:
125
129
 
126
130
  @classmethod
127
131
  def list(cls, project_name: str):
132
+ if not cls.judgment_api_key or not cls.organization_id:
133
+ raise ValueError("Judgment API key and organization ID are required")
128
134
  client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
129
135
  datasets = client.datasets_pull_all_for_judgeval({"project_name": project_name})
130
136
 
@@ -173,6 +179,9 @@ class Dataset:
173
179
  if not isinstance(examples, list):
174
180
  raise TypeError("examples must be a list")
175
181
 
182
+ if not self.judgment_api_key or not self.organization_id:
183
+ raise ValueError("Judgment API key and organization ID are required")
184
+
176
185
  client = JudgmentSyncClient(self.judgment_api_key, self.organization_id)
177
186
  client.datasets_insert_examples_for_judgeval(
178
187
  {
judgeval/env.py CHANGED
@@ -19,17 +19,8 @@ def optional_env_var(var_name: str, default: str | None = None) -> str | None:
19
19
  return os.getenv(var_name, default)
20
20
 
21
21
 
22
- def required_env_var(var_name: str) -> str:
23
- value = os.getenv(var_name)
24
- if value is None:
25
- raise EnvironmentError(
26
- f"Environment variable '{var_name}' is required but not set."
27
- )
28
- return value
29
-
30
-
31
- JUDGMENT_API_KEY = required_env_var("JUDGMENT_API_KEY")
32
- JUDGMENT_ORG_ID = required_env_var("JUDGMENT_ORG_ID")
22
+ JUDGMENT_API_KEY = optional_env_var("JUDGMENT_API_KEY")
23
+ JUDGMENT_ORG_ID = optional_env_var("JUDGMENT_ORG_ID")
33
24
  JUDGMENT_API_URL = optional_env_var("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
34
25
 
35
26
  JUDGMENT_DEFAULT_GPT_MODEL = optional_env_var("JUDGMENT_DEFAULT_GPT_MODEL", "gpt-5")
@@ -112,6 +112,8 @@ def _poll_evaluation_until_complete(
112
112
 
113
113
  poll_count = 0
114
114
  exception_count = 0
115
+ if not JUDGMENT_API_KEY or not JUDGMENT_ORG_ID:
116
+ raise ValueError("Judgment API key and organization ID are required")
115
117
  api_client = JudgmentSyncClient(JUDGMENT_API_KEY, JUDGMENT_ORG_ID)
116
118
  while poll_count < max_poll_count:
117
119
  poll_count += 1
@@ -222,6 +224,8 @@ def run_eval(
222
224
  )
223
225
  t.start()
224
226
  try:
227
+ if not JUDGMENT_API_KEY or not JUDGMENT_ORG_ID:
228
+ raise ValueError("Judgment API key and organization ID are required")
225
229
  api_client = JudgmentSyncClient(JUDGMENT_API_KEY, JUDGMENT_ORG_ID)
226
230
  response = api_client.add_to_run_eval_queue_examples(
227
231
  evaluation_run.model_dump(warnings=False) # type: ignore
@@ -19,9 +19,11 @@ def push_prompt(
19
19
  name: str,
20
20
  prompt: str,
21
21
  tags: List[str],
22
- judgment_api_key: str = JUDGMENT_API_KEY,
23
- organization_id: str = JUDGMENT_ORG_ID,
22
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
23
+ organization_id: str | None = JUDGMENT_ORG_ID,
24
24
  ) -> tuple[str, Optional[str], str]:
25
+ if not judgment_api_key or not organization_id:
26
+ raise ValueError("Judgment API key and organization ID are required")
25
27
  client = JudgmentSyncClient(judgment_api_key, organization_id)
26
28
  try:
27
29
  project_id = _resolve_project_id(
@@ -55,9 +57,11 @@ def fetch_prompt(
55
57
  name: str,
56
58
  commit_id: Optional[str] = None,
57
59
  tag: Optional[str] = None,
58
- judgment_api_key: str = JUDGMENT_API_KEY,
59
- organization_id: str = JUDGMENT_ORG_ID,
60
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
61
+ organization_id: str | None = JUDGMENT_ORG_ID,
60
62
  ) -> Optional[PromptCommitInfo]:
63
+ if not judgment_api_key or not organization_id:
64
+ raise ValueError("Judgment API key and organization ID are required")
61
65
  client = JudgmentSyncClient(judgment_api_key, organization_id)
62
66
  try:
63
67
  project_id = _resolve_project_id(
@@ -89,9 +93,11 @@ def tag_prompt(
89
93
  name: str,
90
94
  commit_id: str,
91
95
  tags: List[str],
92
- judgment_api_key: str = JUDGMENT_API_KEY,
93
- organization_id: str = JUDGMENT_ORG_ID,
96
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
97
+ organization_id: str | None = JUDGMENT_ORG_ID,
94
98
  ) -> PromptTagResponse:
99
+ if not judgment_api_key or not organization_id:
100
+ raise ValueError("Judgment API key and organization ID are required")
95
101
  client = JudgmentSyncClient(judgment_api_key, organization_id)
96
102
  try:
97
103
  project_id = _resolve_project_id(
@@ -124,9 +130,11 @@ def untag_prompt(
124
130
  project_name: str,
125
131
  name: str,
126
132
  tags: List[str],
127
- judgment_api_key: str = JUDGMENT_API_KEY,
128
- organization_id: str = JUDGMENT_ORG_ID,
133
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
134
+ organization_id: str | None = JUDGMENT_ORG_ID,
129
135
  ) -> PromptUntagResponse:
136
+ if not judgment_api_key or not organization_id:
137
+ raise ValueError("Judgment API key and organization ID are required")
130
138
  client = JudgmentSyncClient(judgment_api_key, organization_id)
131
139
  try:
132
140
  project_id = _resolve_project_id(
@@ -153,9 +161,11 @@ def untag_prompt(
153
161
  def list_prompt(
154
162
  project_name: str,
155
163
  name: str,
156
- judgment_api_key: str = JUDGMENT_API_KEY,
157
- organization_id: str = JUDGMENT_ORG_ID,
164
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
165
+ organization_id: str | None = JUDGMENT_ORG_ID,
158
166
  ) -> PromptVersionsResponse:
167
+ if not judgment_api_key or not organization_id:
168
+ raise ValueError("Judgment API key and organization ID are required")
159
169
  client = JudgmentSyncClient(judgment_api_key, organization_id)
160
170
  try:
161
171
  project_id = _resolve_project_id(
@@ -201,8 +211,8 @@ class Prompt:
201
211
  name: str,
202
212
  prompt: str,
203
213
  tags: Optional[List[str]] = None,
204
- judgment_api_key: str = JUDGMENT_API_KEY,
205
- organization_id: str = JUDGMENT_ORG_ID,
214
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
215
+ organization_id: str | None = JUDGMENT_ORG_ID,
206
216
  ):
207
217
  if tags is None:
208
218
  tags = []
@@ -225,8 +235,8 @@ class Prompt:
225
235
  name: str,
226
236
  commit_id: Optional[str] = None,
227
237
  tag: Optional[str] = None,
228
- judgment_api_key: str = JUDGMENT_API_KEY,
229
- organization_id: str = JUDGMENT_ORG_ID,
238
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
239
+ organization_id: str | None = JUDGMENT_ORG_ID,
230
240
  ):
231
241
  if commit_id is not None and tag is not None:
232
242
  raise ValueError(
@@ -262,8 +272,8 @@ class Prompt:
262
272
  name: str,
263
273
  commit_id: str,
264
274
  tags: List[str],
265
- judgment_api_key: str = JUDGMENT_API_KEY,
266
- organization_id: str = JUDGMENT_ORG_ID,
275
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
276
+ organization_id: str | None = JUDGMENT_ORG_ID,
267
277
  ):
268
278
  prompt_config = tag_prompt(
269
279
  project_name, name, commit_id, tags, judgment_api_key, organization_id
@@ -276,8 +286,8 @@ class Prompt:
276
286
  project_name: str,
277
287
  name: str,
278
288
  tags: List[str],
279
- judgment_api_key: str = JUDGMENT_API_KEY,
280
- organization_id: str = JUDGMENT_ORG_ID,
289
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
290
+ organization_id: str | None = JUDGMENT_ORG_ID,
281
291
  ):
282
292
  prompt_config = untag_prompt(
283
293
  project_name, name, tags, judgment_api_key, organization_id
@@ -289,8 +299,8 @@ class Prompt:
289
299
  cls,
290
300
  project_name: str,
291
301
  name: str,
292
- judgment_api_key: str = JUDGMENT_API_KEY,
293
- organization_id: str = JUDGMENT_ORG_ID,
302
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
303
+ organization_id: str | None = JUDGMENT_ORG_ID,
294
304
  ):
295
305
  prompt_configs = list_prompt(
296
306
  project_name, name, judgment_api_key, organization_id
@@ -45,7 +45,7 @@ def push_prompt_scorer(
45
45
  detail=f"Failed to save prompt scorer: {e.detail}",
46
46
  response=e.response,
47
47
  )
48
- return r["name"]
48
+ return r["scorer_response"]["name"]
49
49
 
50
50
 
51
51
  def fetch_prompt_scorer(
@@ -106,8 +106,8 @@ class Tracer(metaclass=SingletonMeta):
106
106
  "_initialized",
107
107
  )
108
108
 
109
- api_key: str
110
- organization_id: str
109
+ api_key: str | None
110
+ organization_id: str | None
111
111
  project_name: str
112
112
  enable_monitoring: bool
113
113
  enable_evaluation: bool
@@ -124,8 +124,8 @@ class Tracer(metaclass=SingletonMeta):
124
124
  /,
125
125
  *,
126
126
  project_name: str,
127
- api_key: Optional[str] = None,
128
- organization_id: Optional[str] = None,
127
+ api_key: str | None = None,
128
+ organization_id: str | None = None,
129
129
  enable_monitoring: bool = JUDGMENT_ENABLE_MONITORING.lower() == "true",
130
130
  enable_evaluation: bool = JUDGMENT_ENABLE_EVALUATIONS.lower() == "true",
131
131
  resource_attributes: Optional[Dict[str, Any]] = None,
@@ -145,10 +145,14 @@ class Tracer(metaclass=SingletonMeta):
145
145
  self.enable_evaluation = enable_evaluation
146
146
  self.resource_attributes = resource_attributes
147
147
 
148
- self.api_client = JudgmentSyncClient(
149
- api_key=self.api_key,
150
- organization_id=self.organization_id,
151
- )
148
+ if self.api_key and self.organization_id:
149
+ self.api_client = JudgmentSyncClient(
150
+ api_key=self.api_key, organization_id=self.organization_id
151
+ )
152
+ else:
153
+ judgeval_logger.error(
154
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
155
+ )
152
156
 
153
157
  if initialize:
154
158
  self.initialize()
@@ -162,7 +166,7 @@ class Tracer(metaclass=SingletonMeta):
162
166
  project_id = _resolve_project_id(
163
167
  self.project_name, self.api_key, self.organization_id
164
168
  )
165
- if project_id:
169
+ if self.api_key and self.organization_id and project_id:
166
170
  self.judgment_processor = self.get_processor(
167
171
  tracer=self,
168
172
  project_name=self.project_name,
@@ -177,9 +181,10 @@ class Tracer(metaclass=SingletonMeta):
177
181
  provider.add_span_processor(self.judgment_processor)
178
182
  set_tracer_provider(provider)
179
183
  else:
180
- judgeval_logger.error(
181
- f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
182
- )
184
+ if self.api_key and self.organization_id:
185
+ judgeval_logger.error(
186
+ f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
187
+ )
183
188
 
184
189
  self.tracer = get_tracer_provider().get_tracer(
185
190
  JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME,
@@ -198,10 +203,19 @@ class Tracer(metaclass=SingletonMeta):
198
203
  ):
199
204
  from judgeval.tracer.exporters import JudgmentSpanExporter
200
205
 
206
+ api_key = api_key or JUDGMENT_API_KEY
207
+ organization_id = organization_id or JUDGMENT_ORG_ID
208
+
209
+ if not api_key or not organization_id:
210
+ judgeval_logger.error(
211
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
212
+ )
213
+ return None
214
+
201
215
  return JudgmentSpanExporter(
202
216
  endpoint=url_for("/otel/v1/traces"),
203
- api_key=api_key or JUDGMENT_API_KEY,
204
- organization_id=organization_id or JUDGMENT_ORG_ID,
217
+ api_key=api_key,
218
+ organization_id=organization_id,
205
219
  project_id=project_id,
206
220
  )
207
221
 
@@ -217,12 +231,19 @@ class Tracer(metaclass=SingletonMeta):
217
231
  resource_attributes: Optional[Dict[str, Any]] = None,
218
232
  ) -> JudgmentSpanProcessor:
219
233
  """Create a JudgmentSpanProcessor using the correct constructor."""
234
+ api_key = api_key or JUDGMENT_API_KEY
235
+ organization_id = organization_id or JUDGMENT_ORG_ID
236
+ if not api_key or not organization_id:
237
+ judgeval_logger.error(
238
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
239
+ )
240
+ return NoOpJudgmentSpanProcessor()
220
241
  return JudgmentSpanProcessor(
221
242
  tracer,
222
243
  project_name,
223
244
  project_id,
224
- api_key or JUDGMENT_API_KEY,
225
- organization_id or JUDGMENT_ORG_ID,
245
+ api_key,
246
+ organization_id,
226
247
  max_queue_size=max_queue_size,
227
248
  export_timeout_millis=export_timeout_millis,
228
249
  resource_attributes=resource_attributes,
@@ -244,6 +265,7 @@ class Tracer(metaclass=SingletonMeta):
244
265
  """Get the internal span processor of this tracer instance."""
245
266
  return self.judgment_processor
246
267
 
268
+ @dont_throw
247
269
  def set_customer_id(self, customer_id: str) -> None:
248
270
  if not customer_id:
249
271
  judgeval_logger.warning("Customer ID is empty, skipping.")
@@ -1 +1 @@
1
- JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "opentelemetry.instrumentation.judgeval"
1
+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "judgeval"
judgeval/tracer/keys.py CHANGED
@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
26
26
 
27
27
  PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
28
28
 
29
+ JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
30
+ JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
31
+ JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
32
+ JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
33
+ "judgment.usage.cache_creation_input_tokens"
34
+ )
35
+ JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
36
+ JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
37
+ JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
38
+
29
39
  GEN_AI_PROMPT = "gen_ai.prompt"
30
40
  GEN_AI_COMPLETION = "gen_ai.completion"
31
- GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
32
- GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
33
41
  GEN_AI_SYSTEM = "gen_ai.system"
34
- GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
35
- GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
36
- GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
37
- "gen_ai.usage.cache_creation_input_tokens"
38
- )
39
- GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
40
-
41
42
  GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
42
43
  GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
43
44
  GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"