judgeval 0.20.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

judgeval/__init__.py CHANGED
@@ -170,8 +170,7 @@ class JudgmentClient(metaclass=SingletonMeta):
170
170
  judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
171
171
  return False
172
172
 
173
- except Exception as e:
174
- judgeval_logger.error(f"Error uploading custom scorer: {e}")
173
+ except Exception:
175
174
  raise
176
175
 
177
176
 
judgeval/cli.py CHANGED
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
6
6
  from judgeval.logger import judgeval_logger
7
7
  from judgeval import JudgmentClient
8
8
  from judgeval.version import get_version
9
+ from judgeval.exceptions import JudgmentAPIError
9
10
 
10
11
  load_dotenv()
11
12
 
@@ -56,8 +57,15 @@ def upload_scorer(
56
57
  judgeval_logger.error("Failed to upload custom scorer")
57
58
  raise typer.Exit(1)
58
59
 
60
+ judgeval_logger.info("Custom scorer uploaded successfully!")
59
61
  raise typer.Exit(0)
60
- except Exception:
62
+ except Exception as e:
63
+ if isinstance(e, JudgmentAPIError) and e.status_code == 409:
64
+ judgeval_logger.error(
65
+ "Duplicate scorer detected. Use --overwrite flag to replace the existing scorer"
66
+ )
67
+ raise typer.Exit(1)
68
+ # Re-raise other exceptions
61
69
  raise
62
70
 
63
71
 
@@ -1 +1 @@
1
- JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "opentelemetry.instrumentation.judgeval"
1
+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "judgeval"
judgeval/tracer/keys.py CHANGED
@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
26
26
 
27
27
  PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
28
28
 
29
+ JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
30
+ JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
31
+ JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
32
+ JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
33
+ "judgment.usage.cache_creation_input_tokens"
34
+ )
35
+ JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
36
+ JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
37
+ JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
38
+
29
39
  GEN_AI_PROMPT = "gen_ai.prompt"
30
40
  GEN_AI_COMPLETION = "gen_ai.completion"
31
- GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
32
- GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
33
41
  GEN_AI_SYSTEM = "gen_ai.system"
34
- GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
35
- GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
36
- GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
37
- "gen_ai.usage.cache_creation_input_tokens"
38
- )
39
- GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
40
-
41
42
  GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
42
43
  GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
43
44
  GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
@@ -95,7 +95,7 @@ def _wrap_non_streaming_sync(
95
95
  )
96
96
  ctx["model_name"] = kwargs.get("model", "")
97
97
  set_span_attribute(
98
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
98
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
99
99
  )
100
100
 
101
101
  def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -112,17 +112,19 @@ def _wrap_non_streaming_sync(
112
112
  _extract_anthropic_tokens(result.usage)
113
113
  )
114
114
  set_span_attribute(
115
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
115
+ span,
116
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
117
+ prompt_tokens,
116
118
  )
117
119
  set_span_attribute(
118
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
120
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
119
121
  )
120
122
  set_span_attribute(
121
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
123
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
122
124
  )
123
125
  set_span_attribute(
124
126
  span,
125
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
127
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
126
128
  cache_creation,
127
129
  )
128
130
  set_span_attribute(
@@ -133,7 +135,7 @@ def _wrap_non_streaming_sync(
133
135
 
134
136
  set_span_attribute(
135
137
  span,
136
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
138
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
137
139
  result.model,
138
140
  )
139
141
 
@@ -169,7 +171,7 @@ def _wrap_streaming_sync(
169
171
  )
170
172
  ctx["model_name"] = kwargs.get("model", "")
171
173
  set_span_attribute(
172
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
174
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
173
175
  )
174
176
  ctx["accumulated_content"] = ""
175
177
 
@@ -197,17 +199,21 @@ def _wrap_streaming_sync(
197
199
  _extract_anthropic_tokens(usage_data)
198
200
  )
199
201
  set_span_attribute(
200
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
202
+ span,
203
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
204
+ prompt_tokens,
201
205
  )
202
206
  set_span_attribute(
203
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
207
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
204
208
  )
205
209
  set_span_attribute(
206
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
210
+ span,
211
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
212
+ cache_read,
207
213
  )
208
214
  set_span_attribute(
209
215
  span,
210
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
216
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
211
217
  cache_creation,
212
218
  )
213
219
  set_span_attribute(
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
279
285
  )
280
286
  ctx["model_name"] = kwargs.get("model", "")
281
287
  set_span_attribute(
282
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
288
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
283
289
  )
284
290
 
285
291
  def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -296,17 +302,19 @@ def _wrap_non_streaming_async(
296
302
  _extract_anthropic_tokens(result.usage)
297
303
  )
298
304
  set_span_attribute(
299
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
305
+ span,
306
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
307
+ prompt_tokens,
300
308
  )
301
309
  set_span_attribute(
302
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
310
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
303
311
  )
304
312
  set_span_attribute(
305
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
313
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
306
314
  )
307
315
  set_span_attribute(
308
316
  span,
309
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
317
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
310
318
  cache_creation,
311
319
  )
312
320
  set_span_attribute(
@@ -317,7 +325,7 @@ def _wrap_non_streaming_async(
317
325
 
318
326
  set_span_attribute(
319
327
  span,
320
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
328
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
321
329
  result.model,
322
330
  )
323
331
 
@@ -354,7 +362,7 @@ def _wrap_streaming_async(
354
362
  )
355
363
  ctx["model_name"] = kwargs.get("model", "")
356
364
  set_span_attribute(
357
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
365
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
358
366
  )
359
367
  ctx["accumulated_content"] = ""
360
368
 
@@ -382,17 +390,21 @@ def _wrap_streaming_async(
382
390
  _extract_anthropic_tokens(usage_data)
383
391
  )
384
392
  set_span_attribute(
385
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
393
+ span,
394
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
395
+ prompt_tokens,
386
396
  )
387
397
  set_span_attribute(
388
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
398
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
389
399
  )
390
400
  set_span_attribute(
391
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
401
+ span,
402
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
403
+ cache_read,
392
404
  )
393
405
  set_span_attribute(
394
406
  span,
395
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
407
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
396
408
  cache_creation,
397
409
  )
398
410
  set_span_attribute(
@@ -44,7 +44,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
44
44
 
45
45
  ctx["model_name"] = kwargs.get("model", "")
46
46
  set_span_attribute(
47
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
47
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
48
48
  )
49
49
  ctx["accumulated_content"] = ""
50
50
 
@@ -125,22 +125,22 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
125
125
  ) = _extract_anthropic_tokens(final_message.usage)
126
126
  set_span_attribute(
127
127
  span,
128
- AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
128
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
129
129
  prompt_tokens,
130
130
  )
131
131
  set_span_attribute(
132
132
  span,
133
- AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
133
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
134
134
  completion_tokens,
135
135
  )
136
136
  set_span_attribute(
137
137
  span,
138
- AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
138
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
139
139
  cache_read,
140
140
  )
141
141
  set_span_attribute(
142
142
  span,
143
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
143
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
144
144
  cache_creation,
145
145
  )
146
146
  set_span_attribute(
@@ -151,7 +151,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
151
151
 
152
152
  set_span_attribute(
153
153
  span,
154
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
154
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
155
155
  final_message.model,
156
156
  )
157
157
  except Exception:
@@ -190,7 +190,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
190
190
 
191
191
  ctx["model_name"] = kwargs.get("model", "")
192
192
  set_span_attribute(
193
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
193
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
194
194
  )
195
195
  ctx["accumulated_content"] = ""
196
196
 
@@ -271,22 +271,22 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
271
271
  ) = _extract_anthropic_tokens(final_message.usage)
272
272
  set_span_attribute(
273
273
  span,
274
- AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
274
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
275
275
  prompt_tokens,
276
276
  )
277
277
  set_span_attribute(
278
278
  span,
279
- AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
279
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
280
280
  completion_tokens,
281
281
  )
282
282
  set_span_attribute(
283
283
  span,
284
- AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
284
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
285
285
  cache_read,
286
286
  )
287
287
  set_span_attribute(
288
288
  span,
289
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
289
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
290
290
  cache_creation,
291
291
  )
292
292
  set_span_attribute(
@@ -297,7 +297,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
297
297
 
298
298
  set_span_attribute(
299
299
  span,
300
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
300
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
301
301
  final_message.model,
302
302
  )
303
303
  except Exception:
@@ -63,7 +63,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
63
63
  )
64
64
  ctx["model_name"] = kwargs.get("model", "")
65
65
  set_span_attribute(
66
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
66
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
67
67
  )
68
68
 
69
69
  def post_hook(ctx: Dict[str, Any], result: GenerateContentResponse) -> None:
@@ -79,17 +79,19 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
79
79
  _extract_google_tokens(usage_data)
80
80
  )
81
81
  set_span_attribute(
82
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
82
+ span,
83
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
84
+ prompt_tokens,
83
85
  )
84
86
  set_span_attribute(
85
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
87
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
86
88
  )
87
89
  set_span_attribute(
88
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
90
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
89
91
  )
90
92
  set_span_attribute(
91
93
  span,
92
- AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
94
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
93
95
  cache_creation,
94
96
  )
95
97
  set_span_attribute(
@@ -100,7 +102,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
100
102
 
101
103
  set_span_attribute(
102
104
  span,
103
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
105
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
104
106
  result.model_version if result.model_version else ctx["model_name"],
105
107
  )
106
108
 
@@ -16,6 +16,7 @@ from judgeval.utils.wrappers import (
16
16
  immutable_wrap_sync,
17
17
  immutable_wrap_async,
18
18
  )
19
+ from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
19
20
 
20
21
  if TYPE_CHECKING:
21
22
  from judgeval.tracer import Tracer
@@ -45,7 +46,7 @@ def _wrap_beta_non_streaming_sync(
45
46
  )
46
47
  ctx["model_name"] = kwargs.get("model", "")
47
48
  set_span_attribute(
48
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
49
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
49
50
  )
50
51
 
51
52
  def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -66,17 +67,29 @@ def _wrap_beta_non_streaming_sync(
66
67
  if prompt_tokens_details:
67
68
  cache_read = prompt_tokens_details.cached_tokens or 0
68
69
 
70
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
71
+ openai_tokens_converter(
72
+ prompt_tokens,
73
+ completion_tokens,
74
+ cache_read,
75
+ 0,
76
+ usage_data.total_tokens,
77
+ )
78
+ )
79
+
69
80
  set_span_attribute(
70
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
81
+ span,
82
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
83
+ prompt_tokens,
71
84
  )
72
85
  set_span_attribute(
73
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
86
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
74
87
  )
75
88
  set_span_attribute(
76
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
89
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
77
90
  )
78
91
  set_span_attribute(
79
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
92
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
80
93
  )
81
94
  set_span_attribute(
82
95
  span,
@@ -86,7 +99,7 @@ def _wrap_beta_non_streaming_sync(
86
99
 
87
100
  set_span_attribute(
88
101
  span,
89
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
102
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
90
103
  result.model or ctx["model_name"],
91
104
  )
92
105
 
@@ -128,7 +141,7 @@ def _wrap_beta_non_streaming_async(
128
141
  )
129
142
  ctx["model_name"] = kwargs.get("model", "")
130
143
  set_span_attribute(
131
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
144
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
132
145
  )
133
146
 
134
147
  def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -149,17 +162,28 @@ def _wrap_beta_non_streaming_async(
149
162
  if prompt_tokens_details:
150
163
  cache_read = prompt_tokens_details.cached_tokens or 0
151
164
 
165
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
166
+ openai_tokens_converter(
167
+ prompt_tokens,
168
+ completion_tokens,
169
+ cache_read,
170
+ 0,
171
+ usage_data.total_tokens,
172
+ )
173
+ )
152
174
  set_span_attribute(
153
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
175
+ span,
176
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
177
+ prompt_tokens,
154
178
  )
155
179
  set_span_attribute(
156
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
180
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
157
181
  )
158
182
  set_span_attribute(
159
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
183
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
160
184
  )
161
185
  set_span_attribute(
162
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
186
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
163
187
  )
164
188
  set_span_attribute(
165
189
  span,
@@ -169,7 +193,7 @@ def _wrap_beta_non_streaming_async(
169
193
 
170
194
  set_span_attribute(
171
195
  span,
172
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
196
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
173
197
  result.model or ctx["model_name"],
174
198
  )
175
199
 
@@ -25,6 +25,7 @@ from judgeval.utils.wrappers import (
25
25
  immutable_wrap_sync_iterator,
26
26
  immutable_wrap_async_iterator,
27
27
  )
28
+ from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
28
29
 
29
30
  if TYPE_CHECKING:
30
31
  from judgeval.tracer import Tracer
@@ -68,7 +69,7 @@ def _wrap_non_streaming_sync(
68
69
  )
69
70
  ctx["model_name"] = kwargs.get("model", "")
70
71
  set_span_attribute(
71
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
72
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
72
73
  )
73
74
 
74
75
  def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -89,17 +90,29 @@ def _wrap_non_streaming_sync(
89
90
  if prompt_tokens_details:
90
91
  cache_read = prompt_tokens_details.cached_tokens or 0
91
92
 
93
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
94
+ openai_tokens_converter(
95
+ prompt_tokens,
96
+ completion_tokens,
97
+ cache_read,
98
+ 0,
99
+ usage_data.total_tokens,
100
+ )
101
+ )
102
+
92
103
  set_span_attribute(
93
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
104
+ span,
105
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
106
+ prompt_tokens,
94
107
  )
95
108
  set_span_attribute(
96
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
109
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
97
110
  )
98
111
  set_span_attribute(
99
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
112
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
100
113
  )
101
114
  set_span_attribute(
102
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
115
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
103
116
  )
104
117
  set_span_attribute(
105
118
  span,
@@ -109,7 +122,7 @@ def _wrap_non_streaming_sync(
109
122
 
110
123
  set_span_attribute(
111
124
  span,
112
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
125
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
113
126
  result.model or ctx["model_name"],
114
127
  )
115
128
 
@@ -145,7 +158,7 @@ def _wrap_streaming_sync(
145
158
  )
146
159
  ctx["model_name"] = kwargs.get("model", "")
147
160
  set_span_attribute(
148
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
161
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
149
162
  )
150
163
  ctx["accumulated_content"] = ""
151
164
 
@@ -182,17 +195,31 @@ def _wrap_streaming_sync(
182
195
  if chunk.usage.prompt_tokens_details:
183
196
  cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
184
197
 
198
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
199
+ openai_tokens_converter(
200
+ prompt_tokens,
201
+ completion_tokens,
202
+ cache_read,
203
+ 0,
204
+ chunk.usage.total_tokens,
205
+ )
206
+ )
207
+
185
208
  set_span_attribute(
186
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
209
+ span,
210
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
211
+ prompt_tokens,
187
212
  )
188
213
  set_span_attribute(
189
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
214
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
190
215
  )
191
216
  set_span_attribute(
192
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
217
+ span,
218
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
219
+ cache_read,
193
220
  )
194
221
  set_span_attribute(
195
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
222
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
196
223
  )
197
224
  set_span_attribute(
198
225
  span,
@@ -264,7 +291,7 @@ def _wrap_non_streaming_async(
264
291
  )
265
292
  ctx["model_name"] = kwargs.get("model", "")
266
293
  set_span_attribute(
267
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
294
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
268
295
  )
269
296
 
270
297
  def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -285,17 +312,29 @@ def _wrap_non_streaming_async(
285
312
  if prompt_tokens_details:
286
313
  cache_read = prompt_tokens_details.cached_tokens or 0
287
314
 
315
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
316
+ openai_tokens_converter(
317
+ prompt_tokens,
318
+ completion_tokens,
319
+ cache_read,
320
+ 0,
321
+ usage_data.total_tokens,
322
+ )
323
+ )
324
+
288
325
  set_span_attribute(
289
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
326
+ span,
327
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
328
+ prompt_tokens,
290
329
  )
291
330
  set_span_attribute(
292
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
331
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
293
332
  )
294
333
  set_span_attribute(
295
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
334
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
296
335
  )
297
336
  set_span_attribute(
298
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
337
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
299
338
  )
300
339
  set_span_attribute(
301
340
  span,
@@ -305,7 +344,7 @@ def _wrap_non_streaming_async(
305
344
 
306
345
  set_span_attribute(
307
346
  span,
308
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
347
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
309
348
  result.model or ctx["model_name"],
310
349
  )
311
350
 
@@ -342,7 +381,7 @@ def _wrap_streaming_async(
342
381
  )
343
382
  ctx["model_name"] = kwargs.get("model", "")
344
383
  set_span_attribute(
345
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
384
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
346
385
  )
347
386
  ctx["accumulated_content"] = ""
348
387
 
@@ -379,17 +418,31 @@ def _wrap_streaming_async(
379
418
  if chunk.usage.prompt_tokens_details:
380
419
  cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
381
420
 
421
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
422
+ openai_tokens_converter(
423
+ prompt_tokens,
424
+ completion_tokens,
425
+ cache_read,
426
+ 0,
427
+ chunk.usage.total_tokens,
428
+ )
429
+ )
430
+
382
431
  set_span_attribute(
383
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
432
+ span,
433
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
434
+ prompt_tokens,
384
435
  )
385
436
  set_span_attribute(
386
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
437
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
387
438
  )
388
439
  set_span_attribute(
389
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
440
+ span,
441
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
442
+ cache_read,
390
443
  )
391
444
  set_span_attribute(
392
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
445
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
393
446
  )
394
447
  set_span_attribute(
395
448
  span,
@@ -24,6 +24,7 @@ from judgeval.utils.wrappers import (
24
24
  immutable_wrap_sync_iterator,
25
25
  immutable_wrap_async_iterator,
26
26
  )
27
+ from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from judgeval.tracer import Tracer
@@ -62,7 +63,7 @@ def _wrap_responses_non_streaming_sync(
62
63
  )
63
64
  ctx["model_name"] = kwargs.get("model", "")
64
65
  set_span_attribute(
65
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
66
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
66
67
  )
67
68
 
68
69
  def post_hook(ctx: Dict[str, Any], result: Response) -> None:
@@ -80,17 +81,29 @@ def _wrap_responses_non_streaming_sync(
80
81
  completion_tokens = usage_data.output_tokens or 0
81
82
  cache_read = usage_data.input_tokens_details.cached_tokens or 0
82
83
 
84
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
85
+ openai_tokens_converter(
86
+ prompt_tokens,
87
+ completion_tokens,
88
+ cache_read,
89
+ 0,
90
+ usage_data.total_tokens,
91
+ )
92
+ )
93
+
83
94
  set_span_attribute(
84
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
95
+ span,
96
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
97
+ prompt_tokens,
85
98
  )
86
99
  set_span_attribute(
87
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
100
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
88
101
  )
89
102
  set_span_attribute(
90
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
103
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
91
104
  )
92
105
  set_span_attribute(
93
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
106
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
94
107
  )
95
108
  set_span_attribute(
96
109
  span,
@@ -101,7 +114,7 @@ def _wrap_responses_non_streaming_sync(
101
114
  if hasattr(result, "model"):
102
115
  set_span_attribute(
103
116
  span,
104
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
117
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
105
118
  result.model or ctx["model_name"],
106
119
  )
107
120
 
@@ -137,7 +150,7 @@ def _wrap_responses_streaming_sync(
137
150
  )
138
151
  ctx["model_name"] = kwargs.get("model", "")
139
152
  set_span_attribute(
140
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
153
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
141
154
  )
142
155
  ctx["accumulated_content"] = ""
143
156
 
@@ -167,6 +180,7 @@ def _wrap_responses_streaming_sync(
167
180
  ):
168
181
  prompt_tokens = chunk.response.usage.input_tokens or 0
169
182
  completion_tokens = chunk.response.usage.output_tokens or 0
183
+ total_tokens = chunk.response.usage.total_tokens or 0
170
184
  # Safely access nested cached_tokens
171
185
  input_tokens_details = getattr(
172
186
  chunk.response.usage, "input_tokens_details", None
@@ -177,21 +191,35 @@ def _wrap_responses_streaming_sync(
177
191
  else 0
178
192
  )
179
193
 
194
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
195
+ openai_tokens_converter(
196
+ prompt_tokens,
197
+ completion_tokens,
198
+ cache_read,
199
+ 0,
200
+ total_tokens,
201
+ )
202
+ )
203
+
180
204
  set_span_attribute(
181
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
205
+ span,
206
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
207
+ prompt_tokens,
182
208
  )
183
209
  set_span_attribute(
184
210
  span,
185
- AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
211
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
186
212
  completion_tokens,
187
213
  )
188
214
  set_span_attribute(
189
215
  span,
190
- AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
216
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
191
217
  cache_read,
192
218
  )
193
219
  set_span_attribute(
194
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
220
+ span,
221
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
222
+ 0,
195
223
  )
196
224
  set_span_attribute(
197
225
  span,
@@ -266,7 +294,7 @@ def _wrap_responses_non_streaming_async(
266
294
  )
267
295
  ctx["model_name"] = kwargs.get("model", "")
268
296
  set_span_attribute(
269
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
297
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
270
298
  )
271
299
 
272
300
  def post_hook(ctx: Dict[str, Any], result: Response) -> None:
@@ -284,17 +312,29 @@ def _wrap_responses_non_streaming_async(
284
312
  completion_tokens = usage_data.output_tokens or 0
285
313
  cache_read = usage_data.input_tokens_details.cached_tokens or 0
286
314
 
315
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
316
+ openai_tokens_converter(
317
+ prompt_tokens,
318
+ completion_tokens,
319
+ cache_read,
320
+ 0,
321
+ usage_data.total_tokens,
322
+ )
323
+ )
324
+
287
325
  set_span_attribute(
288
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
326
+ span,
327
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
328
+ prompt_tokens,
289
329
  )
290
330
  set_span_attribute(
291
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
331
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
292
332
  )
293
333
  set_span_attribute(
294
- span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
334
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
295
335
  )
296
336
  set_span_attribute(
297
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
337
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
298
338
  )
299
339
  set_span_attribute(
300
340
  span,
@@ -305,7 +345,7 @@ def _wrap_responses_non_streaming_async(
305
345
  if hasattr(result, "model"):
306
346
  set_span_attribute(
307
347
  span,
308
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
348
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
309
349
  result.model or ctx["model_name"],
310
350
  )
311
351
 
@@ -341,7 +381,7 @@ def _wrap_responses_streaming_async(
341
381
  )
342
382
  ctx["model_name"] = kwargs.get("model", "")
343
383
  set_span_attribute(
344
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
384
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
345
385
  )
346
386
  ctx["accumulated_content"] = ""
347
387
 
@@ -373,6 +413,7 @@ def _wrap_responses_streaming_async(
373
413
  ):
374
414
  prompt_tokens = chunk.response.usage.input_tokens or 0
375
415
  completion_tokens = chunk.response.usage.output_tokens or 0
416
+ total_tokens = chunk.response.usage.total_tokens or 0
376
417
  # Safely access nested cached_tokens
377
418
  input_tokens_details = getattr(
378
419
  chunk.response.usage, "input_tokens_details", None
@@ -383,21 +424,35 @@ def _wrap_responses_streaming_async(
383
424
  else 0
384
425
  )
385
426
 
427
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
428
+ openai_tokens_converter(
429
+ prompt_tokens,
430
+ completion_tokens,
431
+ cache_read,
432
+ 0,
433
+ total_tokens,
434
+ )
435
+ )
436
+
386
437
  set_span_attribute(
387
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
438
+ span,
439
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
440
+ prompt_tokens,
388
441
  )
389
442
  set_span_attribute(
390
443
  span,
391
- AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
444
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
392
445
  completion_tokens,
393
446
  )
394
447
  set_span_attribute(
395
448
  span,
396
- AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
449
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
397
450
  cache_read,
398
451
  )
399
452
  set_span_attribute(
400
- span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
453
+ span,
454
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
455
+ 0,
401
456
  )
402
457
  set_span_attribute(
403
458
  span,
@@ -0,0 +1,22 @@
1
+ def openai_tokens_converter(
2
+ prompt_tokens: int,
3
+ completion_tokens: int,
4
+ cache_read: int,
5
+ cache_creation: int,
6
+ total_tokens: int,
7
+ ) -> tuple[int, int, int, int]:
8
+ """
9
+ Returns:
10
+ tuple[int, int, int, int]:
11
+ - judgment.usage.non_cached_input
12
+ - judgment.usage.output_tokens
13
+ - judgment.usage.cached_input_tokens
14
+ - judgment.usage.cache_creation_tokens
15
+ """
16
+ manual_tokens = prompt_tokens + completion_tokens + cache_read + cache_creation
17
+
18
+ if manual_tokens > total_tokens:
19
+ # This is the openAI case where we need to subtract the cached tokens from the input tokens
20
+ return prompt_tokens - cache_read, completion_tokens, cache_read, cache_creation
21
+ else:
22
+ return prompt_tokens, completion_tokens, cache_read, cache_creation
@@ -73,7 +73,7 @@ def _wrap_non_streaming_sync(
73
73
  )
74
74
  ctx["model_name"] = prefixed_model_name
75
75
  set_span_attribute(
76
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
76
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
77
77
  )
78
78
 
79
79
  def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
@@ -90,10 +90,12 @@ def _wrap_non_streaming_sync(
90
90
  result.usage
91
91
  )
92
92
  set_span_attribute(
93
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
93
+ span,
94
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
95
+ prompt_tokens,
94
96
  )
95
97
  set_span_attribute(
96
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
98
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
97
99
  )
98
100
  set_span_attribute(
99
101
  span,
@@ -103,7 +105,7 @@ def _wrap_non_streaming_sync(
103
105
 
104
106
  set_span_attribute(
105
107
  span,
106
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
108
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
107
109
  ctx["model_name"],
108
110
  )
109
111
 
@@ -143,7 +145,7 @@ def _wrap_streaming_sync(
143
145
  )
144
146
  ctx["model_name"] = prefixed_model_name
145
147
  set_span_attribute(
146
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
148
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
147
149
  )
148
150
  ctx["accumulated_content"] = ""
149
151
 
@@ -171,10 +173,12 @@ def _wrap_streaming_sync(
171
173
  chunk.usage
172
174
  )
173
175
  set_span_attribute(
174
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
176
+ span,
177
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
178
+ prompt_tokens,
175
179
  )
176
180
  set_span_attribute(
177
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
181
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
178
182
  )
179
183
  set_span_attribute(
180
184
  span,
@@ -249,7 +253,7 @@ def _wrap_non_streaming_async(
249
253
  )
250
254
  ctx["model_name"] = prefixed_model_name
251
255
  set_span_attribute(
252
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
256
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
253
257
  )
254
258
 
255
259
  def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
@@ -266,10 +270,12 @@ def _wrap_non_streaming_async(
266
270
  result.usage
267
271
  )
268
272
  set_span_attribute(
269
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
273
+ span,
274
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
275
+ prompt_tokens,
270
276
  )
271
277
  set_span_attribute(
272
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
278
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
273
279
  )
274
280
  set_span_attribute(
275
281
  span,
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
279
285
 
280
286
  set_span_attribute(
281
287
  span,
282
- AttributeKeys.GEN_AI_RESPONSE_MODEL,
288
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
283
289
  ctx["model_name"],
284
290
  )
285
291
 
@@ -320,7 +326,7 @@ def _wrap_streaming_async(
320
326
  )
321
327
  ctx["model_name"] = prefixed_model_name
322
328
  set_span_attribute(
323
- ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
329
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
324
330
  )
325
331
  ctx["accumulated_content"] = ""
326
332
 
@@ -348,10 +354,12 @@ def _wrap_streaming_async(
348
354
  chunk.usage
349
355
  )
350
356
  set_span_attribute(
351
- span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
357
+ span,
358
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
359
+ prompt_tokens,
352
360
  )
353
361
  set_span_attribute(
354
- span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
362
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
355
363
  )
356
364
  set_span_attribute(
357
365
  span,
@@ -247,7 +247,7 @@ encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
247
247
  # Seralize arbitrary object to a json string
248
248
  def safe_serialize(obj: Any) -> str:
249
249
  try:
250
- return orjson.dumps(json_encoder(obj)).decode()
250
+ return orjson.dumps(json_encoder(obj), option=orjson.OPT_NON_STR_KEYS).decode()
251
251
  except Exception as e:
252
252
  judgeval_logger.warning(f"Error serializing object: {e}")
253
- return orjson.dumps(repr(obj)).decode()
253
+ return repr(obj)
judgeval/version.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.20.0"
1
+ __version__ = "0.22.0"
2
2
 
3
3
 
4
4
  def get_version() -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.20.0
3
+ Version: 0.22.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,10 +1,10 @@
1
- judgeval/__init__.py,sha256=GzRWhiT4QKa8bSAF3wMfPfGaN-_WwkRmtfie03vnLLc,6748
2
- judgeval/cli.py,sha256=bkwsDqX0sdfChLxm9aTLAIw0sPYv-fUbjmaFeBgPgk8,1803
1
+ judgeval/__init__.py,sha256=RRiBbXUj7M1VW3NqFvMZlXyI72duh3VA5bfIWqPmKNw,6670
2
+ judgeval/cli.py,sha256=T9nKO9eHMOiLCgxaxuihqtRHsG_dMT06sW6X873MmnI,2209
3
3
  judgeval/constants.py,sha256=JZZJ1MqzZZDVk-5PRPRbmLnM8mXI-RDL5vxa1JFuscs,3408
4
4
  judgeval/env.py,sha256=uFggNNKmfDaa5dmZMwwXVIDdHAHe524jDWUpByV4hm4,1879
5
5
  judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
6
6
  judgeval/logger.py,sha256=VP5blbsJ53mvJbNHfBf5p2KrARUrkrErpPkB-__Hh3U,1562
7
- judgeval/version.py,sha256=Gd2zP_LUu1hS1WAozlH-tSGeW3sqNN8PHFKbLHXGpoY,74
7
+ judgeval/version.py,sha256=JT1Ltu1j1xi9sHf7Inhosvl-1tp0QPms04o2RCvwJUI,74
8
8
  judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
9
9
  judgeval/api/__init__.py,sha256=dGZm9KtgLMnmbiyDEJ_D7suuVqmsibR_Cd0YZRJ7qHI,15210
10
10
  judgeval/api/api_types.py,sha256=PJ5ZQWuvCl5GXFzhcpOw6Iuktr50lo5BaILmZcAKWfc,10085
@@ -43,8 +43,8 @@ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ucYOI6ztAjf
43
43
  judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=V3RdrWhnR_vLBrtWw7QbgN9K_A-Och7-v9I2fN4z8gY,506
44
44
  judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=WhSkPs8tWyQ_cS-y-VTzrKAPlizKp-6zi_DmfgW4AgM,10773
45
45
  judgeval/tracer/__init__.py,sha256=E1rcegwIQyqEIvM4yfPLm2M0B6e27dhPrJPhMotBjtw,37432
46
- judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
47
- judgeval/tracer/keys.py,sha256=G2Qgb5ZlFsZvXPMylh-OLhHSnWYQ23g0GdtY9n9XuoE,2280
46
+ judgeval/tracer/constants.py,sha256=tLR5ClDaNlNg_MAv2XRdk62uQW4KyBnWaNbG_YYblTc,55
47
+ judgeval/tracer/keys.py,sha256=mYBo_X6-rC9xfiI-WpjHlO7rUtcMORtQXCQyO1F3Ycc,2387
48
48
  judgeval/tracer/managers.py,sha256=NEkovnK8Qaod1U5msT0_hyHUqo9C2uFro2IzNlC8jCo,5071
49
49
  judgeval/tracer/utils.py,sha256=xWha5iwC733wCf2HKbNqzxOPS1ovO1OymWIUFLz-UpQ,537
50
50
  judgeval/tracer/exporters/__init__.py,sha256=3WDXC28iY5gYMM5s7ejmy7P-DVDQ_iIuzwovZxUKJXg,1295
@@ -57,21 +57,22 @@ judgeval/tracer/llm/constants.py,sha256=IWa3CMes8wIt_UG7jrGEOztg2sHz54fdOMWIOOr-
57
57
  judgeval/tracer/llm/providers.py,sha256=VAimkmChOOjhC1cUv-0iG8pa5PhOw1HIOyt3zrIrbcM,628
58
58
  judgeval/tracer/llm/llm_anthropic/__init__.py,sha256=HG0gIlTgaRt-Y0u1ERPQ19pUgb4YHkTh7tZQPeyR4oM,80
59
59
  judgeval/tracer/llm/llm_anthropic/config.py,sha256=ICfKODPQvZsRxpK4xWQ-YE79pmWJTmY2wryddxpNdpM,153
60
- judgeval/tracer/llm/llm_anthropic/messages.py,sha256=U11364nrTt6M58K218uj8AxGPrNwzJ4idhEmZQtFuik,15152
61
- judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=ZhHigQujU-zHhklgwSVoQYtSsL_7yC5Rwpq9vozekMc,12140
60
+ judgeval/tracer/llm/llm_anthropic/messages.py,sha256=T7dApxJCsOWEpquYSZICACwTioZG3ZcxHdJjvF04T2E,15474
61
+ judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=DKlZZnfK_yv_tEMwF2XxvsjgUjOFI3c5JUMQwERNV7k,12188
62
62
  judgeval/tracer/llm/llm_anthropic/wrapper.py,sha256=JILcyC4NvjXZSqlFoZp-VB-JsCYZkQPMFEYaB4AysrA,1849
63
63
  judgeval/tracer/llm/llm_google/__init__.py,sha256=otBZETsAfVZjtZaN5N36Ln0kw-I9jVB4tFGrV6novHo,74
64
64
  judgeval/tracer/llm/llm_google/config.py,sha256=S3yCAE9oHbXjLVYiz5mGD16yIgXMBBUu5UN4lBjoCNQ,162
65
- judgeval/tracer/llm/llm_google/generate_content.py,sha256=meLWeoZ7J2JtSkpt2Lt8qapYi_mxv0204cXWaFZ0FKs,3973
65
+ judgeval/tracer/llm/llm_google/generate_content.py,sha256=w1rIh1cTBYnkfBQTL4qHntwsKfBcSrf2VSS2y-BOMRU,4030
66
66
  judgeval/tracer/llm/llm_google/wrapper.py,sha256=jqaMXGoM9dlPBbCFadMI5EqFrNHzBt0h9VkNn7KPVLk,901
67
67
  judgeval/tracer/llm/llm_openai/__init__.py,sha256=CyzwhY0-zmqWKlEno7JPBcvO7G_hI8dp6-_5_KEzFqg,74
68
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=KwetlVexleDSSyRBEezC7Fk5do1Vub3FwLbRhCPgktc,6490
69
- judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=NWPE_BQTGfTRfsqhzXtNlQAv1Cr2GymolrTGzIbr9Ok,15625
68
+ judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=IXw-Gu-WUxQ-gaBUIe-aAKOn1Pakn_RFl0b1C_1toP8,7326
69
+ judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=HaZEZMla-VbacJTyOYeVNNXsRSSqE2di36NIpyzGBVY,17394
70
70
  judgeval/tracer/llm/llm_openai/config.py,sha256=NE0ixKhd4WVeAVjY8jNTncuKYH6R4MQDLPmcCsd3zWY,144
71
- judgeval/tracer/llm/llm_openai/responses.py,sha256=lhs4yS-rJU255vo5gsJkGFRloYurlfnXIkstjMwR3vA,15875
71
+ judgeval/tracer/llm/llm_openai/responses.py,sha256=5le9rmUtLeVnz10DKS5ajkVcVCHQL3hi1b89pihmoL4,17872
72
+ judgeval/tracer/llm/llm_openai/utils.py,sha256=H32OYE9JeN2Hmyf8qRjqz2ky9QCjSV7KAEKXNAtDWpE,832
72
73
  judgeval/tracer/llm/llm_openai/wrapper.py,sha256=Z5Ndib228yd1pXEQ4xIu7_CJHxpW_t0ofZAC6FLc5eU,2055
73
74
  judgeval/tracer/llm/llm_together/__init__.py,sha256=MEnsF77IgFD4h73hNCMpo-9a1PHHdm-OxPlOalXOMac,78
74
- judgeval/tracer/llm/llm_together/chat_completions.py,sha256=KC8sk40l1VDuFStuVGIV1GLLx3vrtPDk5Y2vJsnRe70,14156
75
+ judgeval/tracer/llm/llm_together/chat_completions.py,sha256=RySsK3tqG0NpJHPlVQ705bXxIfseSQUhvIoS-sz4rOg,14380
75
76
  judgeval/tracer/llm/llm_together/config.py,sha256=jCJY0KQcHJZZJk2vq038GKIDUMusqgvRjQ0B6OV5uEc,150
76
77
  judgeval/tracer/llm/llm_together/wrapper.py,sha256=HFqy_MabQeSq8oj2diZhEuk1SDt_hDfk5MFdPn9MFhg,1733
77
78
  judgeval/tracer/processors/__init__.py,sha256=BdOOPOD1RfMI5YHW76DNPKR07EAev-JxoolZ3KaXNNU,7100
@@ -87,7 +88,7 @@ judgeval/utils/file_utils.py,sha256=vq-n5WZEZjVbZ5S9QTkW8nSH6Pvw-Jx0ttsQ1t0wnPQ,
87
88
  judgeval/utils/guards.py,sha256=_DaKZxvjD10J97Ze2paHhbCiV2MpDz3FZQmNwaL5k0w,945
88
89
  judgeval/utils/meta.py,sha256=RAqZuvOlymqMwFoS0joBW_r65lcN9bY8BpNYHoytKps,773
89
90
  judgeval/utils/project.py,sha256=kGpYmp6QGTD6h-GjQ-ovT7kBmGnyb99MWDJmRGFQHOg,527
90
- judgeval/utils/serialize.py,sha256=QXR-8Nj5rqOrI9zLx0oRLdk6DW6Bc7j8eyF4zQ7PLxA,6256
91
+ judgeval/utils/serialize.py,sha256=WbforbVFGINuk68T2YtWhj-ECMC6rWol3g5dxz9nsm8,6265
91
92
  judgeval/utils/testing.py,sha256=m5Nexv65tmfSj1XvAPK5Ear7aJ7w5xjDtZN0tLZ_RBk,2939
92
93
  judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
93
94
  judgeval/utils/version_check.py,sha256=se4Ft8rjcl5u7fHMxSGQpka844V2AcZpOYl6StLWTio,1081
@@ -104,8 +105,8 @@ judgeval/utils/wrappers/mutable_wrap_async.py,sha256=stHISOUCGFUJXY8seXmxUo4ZpMF
104
105
  judgeval/utils/wrappers/mutable_wrap_sync.py,sha256=t5jygAQ1vqhy8s1GfiLeYygYgaLTgfoYASN47U5JiPs,2888
105
106
  judgeval/utils/wrappers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
107
  judgeval/utils/wrappers/utils.py,sha256=j18vaa6JWDw2s3nQy1z5PfV_9Xxio-bVARaHG_0XyL0,1228
107
- judgeval-0.20.0.dist-info/METADATA,sha256=dOkZ3SyecH3UHCerW1wwdVpfkzOAjCOkSHtl_D0mGqY,11483
108
- judgeval-0.20.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
109
- judgeval-0.20.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
110
- judgeval-0.20.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
111
- judgeval-0.20.0.dist-info/RECORD,,
108
+ judgeval-0.22.0.dist-info/METADATA,sha256=xPrltYImsdj9e2CAxKmwop-VJCf9ZPXzgKi83qkUm8E,11483
109
+ judgeval-0.22.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
110
+ judgeval-0.22.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
111
+ judgeval-0.22.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
112
+ judgeval-0.22.0.dist-info/RECORD,,