judgeval 0.20.0__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/__init__.py +1 -2
- judgeval/cli.py +9 -1
- judgeval/tracer/constants.py +1 -1
- judgeval/tracer/keys.py +10 -9
- judgeval/tracer/llm/llm_anthropic/messages.py +34 -22
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +12 -12
- judgeval/tracer/llm/llm_google/generate_content.py +8 -6
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +36 -12
- judgeval/tracer/llm/llm_openai/chat_completions.py +75 -22
- judgeval/tracer/llm/llm_openai/responses.py +77 -22
- judgeval/tracer/llm/llm_openai/utils.py +22 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +22 -14
- judgeval/utils/serialize.py +2 -2
- judgeval/version.py +1 -1
- {judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/METADATA +1 -1
- {judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/RECORD +19 -18
- {judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/WHEEL +0 -0
- {judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/entry_points.txt +0 -0
- {judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/__init__.py
CHANGED
|
@@ -170,8 +170,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
170
170
|
judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
|
|
171
171
|
return False
|
|
172
172
|
|
|
173
|
-
except Exception
|
|
174
|
-
judgeval_logger.error(f"Error uploading custom scorer: {e}")
|
|
173
|
+
except Exception:
|
|
175
174
|
raise
|
|
176
175
|
|
|
177
176
|
|
judgeval/cli.py
CHANGED
|
@@ -6,6 +6,7 @@ from dotenv import load_dotenv
|
|
|
6
6
|
from judgeval.logger import judgeval_logger
|
|
7
7
|
from judgeval import JudgmentClient
|
|
8
8
|
from judgeval.version import get_version
|
|
9
|
+
from judgeval.exceptions import JudgmentAPIError
|
|
9
10
|
|
|
10
11
|
load_dotenv()
|
|
11
12
|
|
|
@@ -56,8 +57,15 @@ def upload_scorer(
|
|
|
56
57
|
judgeval_logger.error("Failed to upload custom scorer")
|
|
57
58
|
raise typer.Exit(1)
|
|
58
59
|
|
|
60
|
+
judgeval_logger.info("Custom scorer uploaded successfully!")
|
|
59
61
|
raise typer.Exit(0)
|
|
60
|
-
except Exception:
|
|
62
|
+
except Exception as e:
|
|
63
|
+
if isinstance(e, JudgmentAPIError) and e.status_code == 409:
|
|
64
|
+
judgeval_logger.error(
|
|
65
|
+
"Duplicate scorer detected. Use --overwrite flag to replace the existing scorer"
|
|
66
|
+
)
|
|
67
|
+
raise typer.Exit(1)
|
|
68
|
+
# Re-raise other exceptions
|
|
61
69
|
raise
|
|
62
70
|
|
|
63
71
|
|
judgeval/tracer/constants.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "
|
|
1
|
+
JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "judgeval"
|
judgeval/tracer/keys.py
CHANGED
|
@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
|
|
|
26
26
|
|
|
27
27
|
PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
|
|
28
28
|
|
|
29
|
+
JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
|
|
30
|
+
JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
|
|
31
|
+
JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
|
|
32
|
+
JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
|
|
33
|
+
"judgment.usage.cache_creation_input_tokens"
|
|
34
|
+
)
|
|
35
|
+
JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
|
|
36
|
+
JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
|
|
37
|
+
JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
|
|
38
|
+
|
|
29
39
|
GEN_AI_PROMPT = "gen_ai.prompt"
|
|
30
40
|
GEN_AI_COMPLETION = "gen_ai.completion"
|
|
31
|
-
GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
|
|
32
|
-
GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
|
|
33
41
|
GEN_AI_SYSTEM = "gen_ai.system"
|
|
34
|
-
GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
|
|
35
|
-
GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
|
|
36
|
-
GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
|
|
37
|
-
"gen_ai.usage.cache_creation_input_tokens"
|
|
38
|
-
)
|
|
39
|
-
GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
|
|
40
|
-
|
|
41
42
|
GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
|
|
42
43
|
GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
|
|
43
44
|
GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
|
|
@@ -95,7 +95,7 @@ def _wrap_non_streaming_sync(
|
|
|
95
95
|
)
|
|
96
96
|
ctx["model_name"] = kwargs.get("model", "")
|
|
97
97
|
set_span_attribute(
|
|
98
|
-
ctx["span"], AttributeKeys.
|
|
98
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
def post_hook(ctx: Dict[str, Any], result: Message) -> None:
|
|
@@ -112,17 +112,19 @@ def _wrap_non_streaming_sync(
|
|
|
112
112
|
_extract_anthropic_tokens(result.usage)
|
|
113
113
|
)
|
|
114
114
|
set_span_attribute(
|
|
115
|
-
span,
|
|
115
|
+
span,
|
|
116
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
117
|
+
prompt_tokens,
|
|
116
118
|
)
|
|
117
119
|
set_span_attribute(
|
|
118
|
-
span, AttributeKeys.
|
|
120
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
119
121
|
)
|
|
120
122
|
set_span_attribute(
|
|
121
|
-
span, AttributeKeys.
|
|
123
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
122
124
|
)
|
|
123
125
|
set_span_attribute(
|
|
124
126
|
span,
|
|
125
|
-
AttributeKeys.
|
|
127
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
126
128
|
cache_creation,
|
|
127
129
|
)
|
|
128
130
|
set_span_attribute(
|
|
@@ -133,7 +135,7 @@ def _wrap_non_streaming_sync(
|
|
|
133
135
|
|
|
134
136
|
set_span_attribute(
|
|
135
137
|
span,
|
|
136
|
-
AttributeKeys.
|
|
138
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
137
139
|
result.model,
|
|
138
140
|
)
|
|
139
141
|
|
|
@@ -169,7 +171,7 @@ def _wrap_streaming_sync(
|
|
|
169
171
|
)
|
|
170
172
|
ctx["model_name"] = kwargs.get("model", "")
|
|
171
173
|
set_span_attribute(
|
|
172
|
-
ctx["span"], AttributeKeys.
|
|
174
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
173
175
|
)
|
|
174
176
|
ctx["accumulated_content"] = ""
|
|
175
177
|
|
|
@@ -197,17 +199,21 @@ def _wrap_streaming_sync(
|
|
|
197
199
|
_extract_anthropic_tokens(usage_data)
|
|
198
200
|
)
|
|
199
201
|
set_span_attribute(
|
|
200
|
-
span,
|
|
202
|
+
span,
|
|
203
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
204
|
+
prompt_tokens,
|
|
201
205
|
)
|
|
202
206
|
set_span_attribute(
|
|
203
|
-
span, AttributeKeys.
|
|
207
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
204
208
|
)
|
|
205
209
|
set_span_attribute(
|
|
206
|
-
span,
|
|
210
|
+
span,
|
|
211
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
212
|
+
cache_read,
|
|
207
213
|
)
|
|
208
214
|
set_span_attribute(
|
|
209
215
|
span,
|
|
210
|
-
AttributeKeys.
|
|
216
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
211
217
|
cache_creation,
|
|
212
218
|
)
|
|
213
219
|
set_span_attribute(
|
|
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
|
|
|
279
285
|
)
|
|
280
286
|
ctx["model_name"] = kwargs.get("model", "")
|
|
281
287
|
set_span_attribute(
|
|
282
|
-
ctx["span"], AttributeKeys.
|
|
288
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
283
289
|
)
|
|
284
290
|
|
|
285
291
|
def post_hook(ctx: Dict[str, Any], result: Message) -> None:
|
|
@@ -296,17 +302,19 @@ def _wrap_non_streaming_async(
|
|
|
296
302
|
_extract_anthropic_tokens(result.usage)
|
|
297
303
|
)
|
|
298
304
|
set_span_attribute(
|
|
299
|
-
span,
|
|
305
|
+
span,
|
|
306
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
307
|
+
prompt_tokens,
|
|
300
308
|
)
|
|
301
309
|
set_span_attribute(
|
|
302
|
-
span, AttributeKeys.
|
|
310
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
303
311
|
)
|
|
304
312
|
set_span_attribute(
|
|
305
|
-
span, AttributeKeys.
|
|
313
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
306
314
|
)
|
|
307
315
|
set_span_attribute(
|
|
308
316
|
span,
|
|
309
|
-
AttributeKeys.
|
|
317
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
310
318
|
cache_creation,
|
|
311
319
|
)
|
|
312
320
|
set_span_attribute(
|
|
@@ -317,7 +325,7 @@ def _wrap_non_streaming_async(
|
|
|
317
325
|
|
|
318
326
|
set_span_attribute(
|
|
319
327
|
span,
|
|
320
|
-
AttributeKeys.
|
|
328
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
321
329
|
result.model,
|
|
322
330
|
)
|
|
323
331
|
|
|
@@ -354,7 +362,7 @@ def _wrap_streaming_async(
|
|
|
354
362
|
)
|
|
355
363
|
ctx["model_name"] = kwargs.get("model", "")
|
|
356
364
|
set_span_attribute(
|
|
357
|
-
ctx["span"], AttributeKeys.
|
|
365
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
358
366
|
)
|
|
359
367
|
ctx["accumulated_content"] = ""
|
|
360
368
|
|
|
@@ -382,17 +390,21 @@ def _wrap_streaming_async(
|
|
|
382
390
|
_extract_anthropic_tokens(usage_data)
|
|
383
391
|
)
|
|
384
392
|
set_span_attribute(
|
|
385
|
-
span,
|
|
393
|
+
span,
|
|
394
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
395
|
+
prompt_tokens,
|
|
386
396
|
)
|
|
387
397
|
set_span_attribute(
|
|
388
|
-
span, AttributeKeys.
|
|
398
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
389
399
|
)
|
|
390
400
|
set_span_attribute(
|
|
391
|
-
span,
|
|
401
|
+
span,
|
|
402
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
403
|
+
cache_read,
|
|
392
404
|
)
|
|
393
405
|
set_span_attribute(
|
|
394
406
|
span,
|
|
395
|
-
AttributeKeys.
|
|
407
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
396
408
|
cache_creation,
|
|
397
409
|
)
|
|
398
410
|
set_span_attribute(
|
|
@@ -44,7 +44,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
|
|
|
44
44
|
|
|
45
45
|
ctx["model_name"] = kwargs.get("model", "")
|
|
46
46
|
set_span_attribute(
|
|
47
|
-
ctx["span"], AttributeKeys.
|
|
47
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
48
48
|
)
|
|
49
49
|
ctx["accumulated_content"] = ""
|
|
50
50
|
|
|
@@ -125,22 +125,22 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
|
|
|
125
125
|
) = _extract_anthropic_tokens(final_message.usage)
|
|
126
126
|
set_span_attribute(
|
|
127
127
|
span,
|
|
128
|
-
AttributeKeys.
|
|
128
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
129
129
|
prompt_tokens,
|
|
130
130
|
)
|
|
131
131
|
set_span_attribute(
|
|
132
132
|
span,
|
|
133
|
-
AttributeKeys.
|
|
133
|
+
AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
|
|
134
134
|
completion_tokens,
|
|
135
135
|
)
|
|
136
136
|
set_span_attribute(
|
|
137
137
|
span,
|
|
138
|
-
AttributeKeys.
|
|
138
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
139
139
|
cache_read,
|
|
140
140
|
)
|
|
141
141
|
set_span_attribute(
|
|
142
142
|
span,
|
|
143
|
-
AttributeKeys.
|
|
143
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
144
144
|
cache_creation,
|
|
145
145
|
)
|
|
146
146
|
set_span_attribute(
|
|
@@ -151,7 +151,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
|
|
|
151
151
|
|
|
152
152
|
set_span_attribute(
|
|
153
153
|
span,
|
|
154
|
-
AttributeKeys.
|
|
154
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
155
155
|
final_message.model,
|
|
156
156
|
)
|
|
157
157
|
except Exception:
|
|
@@ -190,7 +190,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
|
|
|
190
190
|
|
|
191
191
|
ctx["model_name"] = kwargs.get("model", "")
|
|
192
192
|
set_span_attribute(
|
|
193
|
-
ctx["span"], AttributeKeys.
|
|
193
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
194
194
|
)
|
|
195
195
|
ctx["accumulated_content"] = ""
|
|
196
196
|
|
|
@@ -271,22 +271,22 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
|
|
|
271
271
|
) = _extract_anthropic_tokens(final_message.usage)
|
|
272
272
|
set_span_attribute(
|
|
273
273
|
span,
|
|
274
|
-
AttributeKeys.
|
|
274
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
275
275
|
prompt_tokens,
|
|
276
276
|
)
|
|
277
277
|
set_span_attribute(
|
|
278
278
|
span,
|
|
279
|
-
AttributeKeys.
|
|
279
|
+
AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
|
|
280
280
|
completion_tokens,
|
|
281
281
|
)
|
|
282
282
|
set_span_attribute(
|
|
283
283
|
span,
|
|
284
|
-
AttributeKeys.
|
|
284
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
285
285
|
cache_read,
|
|
286
286
|
)
|
|
287
287
|
set_span_attribute(
|
|
288
288
|
span,
|
|
289
|
-
AttributeKeys.
|
|
289
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
290
290
|
cache_creation,
|
|
291
291
|
)
|
|
292
292
|
set_span_attribute(
|
|
@@ -297,7 +297,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
|
|
|
297
297
|
|
|
298
298
|
set_span_attribute(
|
|
299
299
|
span,
|
|
300
|
-
AttributeKeys.
|
|
300
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
301
301
|
final_message.model,
|
|
302
302
|
)
|
|
303
303
|
except Exception:
|
|
@@ -63,7 +63,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
|
|
|
63
63
|
)
|
|
64
64
|
ctx["model_name"] = kwargs.get("model", "")
|
|
65
65
|
set_span_attribute(
|
|
66
|
-
ctx["span"], AttributeKeys.
|
|
66
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
67
67
|
)
|
|
68
68
|
|
|
69
69
|
def post_hook(ctx: Dict[str, Any], result: GenerateContentResponse) -> None:
|
|
@@ -79,17 +79,19 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
|
|
|
79
79
|
_extract_google_tokens(usage_data)
|
|
80
80
|
)
|
|
81
81
|
set_span_attribute(
|
|
82
|
-
span,
|
|
82
|
+
span,
|
|
83
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
84
|
+
prompt_tokens,
|
|
83
85
|
)
|
|
84
86
|
set_span_attribute(
|
|
85
|
-
span, AttributeKeys.
|
|
87
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
86
88
|
)
|
|
87
89
|
set_span_attribute(
|
|
88
|
-
span, AttributeKeys.
|
|
90
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
89
91
|
)
|
|
90
92
|
set_span_attribute(
|
|
91
93
|
span,
|
|
92
|
-
AttributeKeys.
|
|
94
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
93
95
|
cache_creation,
|
|
94
96
|
)
|
|
95
97
|
set_span_attribute(
|
|
@@ -100,7 +102,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
|
|
|
100
102
|
|
|
101
103
|
set_span_attribute(
|
|
102
104
|
span,
|
|
103
|
-
AttributeKeys.
|
|
105
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
104
106
|
result.model_version if result.model_version else ctx["model_name"],
|
|
105
107
|
)
|
|
106
108
|
|
|
@@ -16,6 +16,7 @@ from judgeval.utils.wrappers import (
|
|
|
16
16
|
immutable_wrap_sync,
|
|
17
17
|
immutable_wrap_async,
|
|
18
18
|
)
|
|
19
|
+
from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
|
|
19
20
|
|
|
20
21
|
if TYPE_CHECKING:
|
|
21
22
|
from judgeval.tracer import Tracer
|
|
@@ -45,7 +46,7 @@ def _wrap_beta_non_streaming_sync(
|
|
|
45
46
|
)
|
|
46
47
|
ctx["model_name"] = kwargs.get("model", "")
|
|
47
48
|
set_span_attribute(
|
|
48
|
-
ctx["span"], AttributeKeys.
|
|
49
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
49
50
|
)
|
|
50
51
|
|
|
51
52
|
def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
|
|
@@ -66,17 +67,29 @@ def _wrap_beta_non_streaming_sync(
|
|
|
66
67
|
if prompt_tokens_details:
|
|
67
68
|
cache_read = prompt_tokens_details.cached_tokens or 0
|
|
68
69
|
|
|
70
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
71
|
+
openai_tokens_converter(
|
|
72
|
+
prompt_tokens,
|
|
73
|
+
completion_tokens,
|
|
74
|
+
cache_read,
|
|
75
|
+
0,
|
|
76
|
+
usage_data.total_tokens,
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
|
|
69
80
|
set_span_attribute(
|
|
70
|
-
span,
|
|
81
|
+
span,
|
|
82
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
83
|
+
prompt_tokens,
|
|
71
84
|
)
|
|
72
85
|
set_span_attribute(
|
|
73
|
-
span, AttributeKeys.
|
|
86
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
74
87
|
)
|
|
75
88
|
set_span_attribute(
|
|
76
|
-
span, AttributeKeys.
|
|
89
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
77
90
|
)
|
|
78
91
|
set_span_attribute(
|
|
79
|
-
span, AttributeKeys.
|
|
92
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
80
93
|
)
|
|
81
94
|
set_span_attribute(
|
|
82
95
|
span,
|
|
@@ -86,7 +99,7 @@ def _wrap_beta_non_streaming_sync(
|
|
|
86
99
|
|
|
87
100
|
set_span_attribute(
|
|
88
101
|
span,
|
|
89
|
-
AttributeKeys.
|
|
102
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
90
103
|
result.model or ctx["model_name"],
|
|
91
104
|
)
|
|
92
105
|
|
|
@@ -128,7 +141,7 @@ def _wrap_beta_non_streaming_async(
|
|
|
128
141
|
)
|
|
129
142
|
ctx["model_name"] = kwargs.get("model", "")
|
|
130
143
|
set_span_attribute(
|
|
131
|
-
ctx["span"], AttributeKeys.
|
|
144
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
132
145
|
)
|
|
133
146
|
|
|
134
147
|
def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
|
|
@@ -149,17 +162,28 @@ def _wrap_beta_non_streaming_async(
|
|
|
149
162
|
if prompt_tokens_details:
|
|
150
163
|
cache_read = prompt_tokens_details.cached_tokens or 0
|
|
151
164
|
|
|
165
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
166
|
+
openai_tokens_converter(
|
|
167
|
+
prompt_tokens,
|
|
168
|
+
completion_tokens,
|
|
169
|
+
cache_read,
|
|
170
|
+
0,
|
|
171
|
+
usage_data.total_tokens,
|
|
172
|
+
)
|
|
173
|
+
)
|
|
152
174
|
set_span_attribute(
|
|
153
|
-
span,
|
|
175
|
+
span,
|
|
176
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
177
|
+
prompt_tokens,
|
|
154
178
|
)
|
|
155
179
|
set_span_attribute(
|
|
156
|
-
span, AttributeKeys.
|
|
180
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
157
181
|
)
|
|
158
182
|
set_span_attribute(
|
|
159
|
-
span, AttributeKeys.
|
|
183
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
160
184
|
)
|
|
161
185
|
set_span_attribute(
|
|
162
|
-
span, AttributeKeys.
|
|
186
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
163
187
|
)
|
|
164
188
|
set_span_attribute(
|
|
165
189
|
span,
|
|
@@ -169,7 +193,7 @@ def _wrap_beta_non_streaming_async(
|
|
|
169
193
|
|
|
170
194
|
set_span_attribute(
|
|
171
195
|
span,
|
|
172
|
-
AttributeKeys.
|
|
196
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
173
197
|
result.model or ctx["model_name"],
|
|
174
198
|
)
|
|
175
199
|
|
|
@@ -25,6 +25,7 @@ from judgeval.utils.wrappers import (
|
|
|
25
25
|
immutable_wrap_sync_iterator,
|
|
26
26
|
immutable_wrap_async_iterator,
|
|
27
27
|
)
|
|
28
|
+
from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
|
|
28
29
|
|
|
29
30
|
if TYPE_CHECKING:
|
|
30
31
|
from judgeval.tracer import Tracer
|
|
@@ -68,7 +69,7 @@ def _wrap_non_streaming_sync(
|
|
|
68
69
|
)
|
|
69
70
|
ctx["model_name"] = kwargs.get("model", "")
|
|
70
71
|
set_span_attribute(
|
|
71
|
-
ctx["span"], AttributeKeys.
|
|
72
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
|
|
@@ -89,17 +90,29 @@ def _wrap_non_streaming_sync(
|
|
|
89
90
|
if prompt_tokens_details:
|
|
90
91
|
cache_read = prompt_tokens_details.cached_tokens or 0
|
|
91
92
|
|
|
93
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
94
|
+
openai_tokens_converter(
|
|
95
|
+
prompt_tokens,
|
|
96
|
+
completion_tokens,
|
|
97
|
+
cache_read,
|
|
98
|
+
0,
|
|
99
|
+
usage_data.total_tokens,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
92
103
|
set_span_attribute(
|
|
93
|
-
span,
|
|
104
|
+
span,
|
|
105
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
106
|
+
prompt_tokens,
|
|
94
107
|
)
|
|
95
108
|
set_span_attribute(
|
|
96
|
-
span, AttributeKeys.
|
|
109
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
97
110
|
)
|
|
98
111
|
set_span_attribute(
|
|
99
|
-
span, AttributeKeys.
|
|
112
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
100
113
|
)
|
|
101
114
|
set_span_attribute(
|
|
102
|
-
span, AttributeKeys.
|
|
115
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
103
116
|
)
|
|
104
117
|
set_span_attribute(
|
|
105
118
|
span,
|
|
@@ -109,7 +122,7 @@ def _wrap_non_streaming_sync(
|
|
|
109
122
|
|
|
110
123
|
set_span_attribute(
|
|
111
124
|
span,
|
|
112
|
-
AttributeKeys.
|
|
125
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
113
126
|
result.model or ctx["model_name"],
|
|
114
127
|
)
|
|
115
128
|
|
|
@@ -145,7 +158,7 @@ def _wrap_streaming_sync(
|
|
|
145
158
|
)
|
|
146
159
|
ctx["model_name"] = kwargs.get("model", "")
|
|
147
160
|
set_span_attribute(
|
|
148
|
-
ctx["span"], AttributeKeys.
|
|
161
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
149
162
|
)
|
|
150
163
|
ctx["accumulated_content"] = ""
|
|
151
164
|
|
|
@@ -182,17 +195,31 @@ def _wrap_streaming_sync(
|
|
|
182
195
|
if chunk.usage.prompt_tokens_details:
|
|
183
196
|
cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
|
|
184
197
|
|
|
198
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
199
|
+
openai_tokens_converter(
|
|
200
|
+
prompt_tokens,
|
|
201
|
+
completion_tokens,
|
|
202
|
+
cache_read,
|
|
203
|
+
0,
|
|
204
|
+
chunk.usage.total_tokens,
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
|
|
185
208
|
set_span_attribute(
|
|
186
|
-
span,
|
|
209
|
+
span,
|
|
210
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
211
|
+
prompt_tokens,
|
|
187
212
|
)
|
|
188
213
|
set_span_attribute(
|
|
189
|
-
span, AttributeKeys.
|
|
214
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
190
215
|
)
|
|
191
216
|
set_span_attribute(
|
|
192
|
-
span,
|
|
217
|
+
span,
|
|
218
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
219
|
+
cache_read,
|
|
193
220
|
)
|
|
194
221
|
set_span_attribute(
|
|
195
|
-
span, AttributeKeys.
|
|
222
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
196
223
|
)
|
|
197
224
|
set_span_attribute(
|
|
198
225
|
span,
|
|
@@ -264,7 +291,7 @@ def _wrap_non_streaming_async(
|
|
|
264
291
|
)
|
|
265
292
|
ctx["model_name"] = kwargs.get("model", "")
|
|
266
293
|
set_span_attribute(
|
|
267
|
-
ctx["span"], AttributeKeys.
|
|
294
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
268
295
|
)
|
|
269
296
|
|
|
270
297
|
def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
|
|
@@ -285,17 +312,29 @@ def _wrap_non_streaming_async(
|
|
|
285
312
|
if prompt_tokens_details:
|
|
286
313
|
cache_read = prompt_tokens_details.cached_tokens or 0
|
|
287
314
|
|
|
315
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
316
|
+
openai_tokens_converter(
|
|
317
|
+
prompt_tokens,
|
|
318
|
+
completion_tokens,
|
|
319
|
+
cache_read,
|
|
320
|
+
0,
|
|
321
|
+
usage_data.total_tokens,
|
|
322
|
+
)
|
|
323
|
+
)
|
|
324
|
+
|
|
288
325
|
set_span_attribute(
|
|
289
|
-
span,
|
|
326
|
+
span,
|
|
327
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
328
|
+
prompt_tokens,
|
|
290
329
|
)
|
|
291
330
|
set_span_attribute(
|
|
292
|
-
span, AttributeKeys.
|
|
331
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
293
332
|
)
|
|
294
333
|
set_span_attribute(
|
|
295
|
-
span, AttributeKeys.
|
|
334
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
296
335
|
)
|
|
297
336
|
set_span_attribute(
|
|
298
|
-
span, AttributeKeys.
|
|
337
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
299
338
|
)
|
|
300
339
|
set_span_attribute(
|
|
301
340
|
span,
|
|
@@ -305,7 +344,7 @@ def _wrap_non_streaming_async(
|
|
|
305
344
|
|
|
306
345
|
set_span_attribute(
|
|
307
346
|
span,
|
|
308
|
-
AttributeKeys.
|
|
347
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
309
348
|
result.model or ctx["model_name"],
|
|
310
349
|
)
|
|
311
350
|
|
|
@@ -342,7 +381,7 @@ def _wrap_streaming_async(
|
|
|
342
381
|
)
|
|
343
382
|
ctx["model_name"] = kwargs.get("model", "")
|
|
344
383
|
set_span_attribute(
|
|
345
|
-
ctx["span"], AttributeKeys.
|
|
384
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
346
385
|
)
|
|
347
386
|
ctx["accumulated_content"] = ""
|
|
348
387
|
|
|
@@ -379,17 +418,31 @@ def _wrap_streaming_async(
|
|
|
379
418
|
if chunk.usage.prompt_tokens_details:
|
|
380
419
|
cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
|
|
381
420
|
|
|
421
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
422
|
+
openai_tokens_converter(
|
|
423
|
+
prompt_tokens,
|
|
424
|
+
completion_tokens,
|
|
425
|
+
cache_read,
|
|
426
|
+
0,
|
|
427
|
+
chunk.usage.total_tokens,
|
|
428
|
+
)
|
|
429
|
+
)
|
|
430
|
+
|
|
382
431
|
set_span_attribute(
|
|
383
|
-
span,
|
|
432
|
+
span,
|
|
433
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
434
|
+
prompt_tokens,
|
|
384
435
|
)
|
|
385
436
|
set_span_attribute(
|
|
386
|
-
span, AttributeKeys.
|
|
437
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
387
438
|
)
|
|
388
439
|
set_span_attribute(
|
|
389
|
-
span,
|
|
440
|
+
span,
|
|
441
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
442
|
+
cache_read,
|
|
390
443
|
)
|
|
391
444
|
set_span_attribute(
|
|
392
|
-
span, AttributeKeys.
|
|
445
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
393
446
|
)
|
|
394
447
|
set_span_attribute(
|
|
395
448
|
span,
|
|
@@ -24,6 +24,7 @@ from judgeval.utils.wrappers import (
|
|
|
24
24
|
immutable_wrap_sync_iterator,
|
|
25
25
|
immutable_wrap_async_iterator,
|
|
26
26
|
)
|
|
27
|
+
from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
|
|
27
28
|
|
|
28
29
|
if TYPE_CHECKING:
|
|
29
30
|
from judgeval.tracer import Tracer
|
|
@@ -62,7 +63,7 @@ def _wrap_responses_non_streaming_sync(
|
|
|
62
63
|
)
|
|
63
64
|
ctx["model_name"] = kwargs.get("model", "")
|
|
64
65
|
set_span_attribute(
|
|
65
|
-
ctx["span"], AttributeKeys.
|
|
66
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
66
67
|
)
|
|
67
68
|
|
|
68
69
|
def post_hook(ctx: Dict[str, Any], result: Response) -> None:
|
|
@@ -80,17 +81,29 @@ def _wrap_responses_non_streaming_sync(
|
|
|
80
81
|
completion_tokens = usage_data.output_tokens or 0
|
|
81
82
|
cache_read = usage_data.input_tokens_details.cached_tokens or 0
|
|
82
83
|
|
|
84
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
85
|
+
openai_tokens_converter(
|
|
86
|
+
prompt_tokens,
|
|
87
|
+
completion_tokens,
|
|
88
|
+
cache_read,
|
|
89
|
+
0,
|
|
90
|
+
usage_data.total_tokens,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
|
|
83
94
|
set_span_attribute(
|
|
84
|
-
span,
|
|
95
|
+
span,
|
|
96
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
97
|
+
prompt_tokens,
|
|
85
98
|
)
|
|
86
99
|
set_span_attribute(
|
|
87
|
-
span, AttributeKeys.
|
|
100
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
88
101
|
)
|
|
89
102
|
set_span_attribute(
|
|
90
|
-
span, AttributeKeys.
|
|
103
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
91
104
|
)
|
|
92
105
|
set_span_attribute(
|
|
93
|
-
span, AttributeKeys.
|
|
106
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
94
107
|
)
|
|
95
108
|
set_span_attribute(
|
|
96
109
|
span,
|
|
@@ -101,7 +114,7 @@ def _wrap_responses_non_streaming_sync(
|
|
|
101
114
|
if hasattr(result, "model"):
|
|
102
115
|
set_span_attribute(
|
|
103
116
|
span,
|
|
104
|
-
AttributeKeys.
|
|
117
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
105
118
|
result.model or ctx["model_name"],
|
|
106
119
|
)
|
|
107
120
|
|
|
@@ -137,7 +150,7 @@ def _wrap_responses_streaming_sync(
|
|
|
137
150
|
)
|
|
138
151
|
ctx["model_name"] = kwargs.get("model", "")
|
|
139
152
|
set_span_attribute(
|
|
140
|
-
ctx["span"], AttributeKeys.
|
|
153
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
141
154
|
)
|
|
142
155
|
ctx["accumulated_content"] = ""
|
|
143
156
|
|
|
@@ -167,6 +180,7 @@ def _wrap_responses_streaming_sync(
|
|
|
167
180
|
):
|
|
168
181
|
prompt_tokens = chunk.response.usage.input_tokens or 0
|
|
169
182
|
completion_tokens = chunk.response.usage.output_tokens or 0
|
|
183
|
+
total_tokens = chunk.response.usage.total_tokens or 0
|
|
170
184
|
# Safely access nested cached_tokens
|
|
171
185
|
input_tokens_details = getattr(
|
|
172
186
|
chunk.response.usage, "input_tokens_details", None
|
|
@@ -177,21 +191,35 @@ def _wrap_responses_streaming_sync(
|
|
|
177
191
|
else 0
|
|
178
192
|
)
|
|
179
193
|
|
|
194
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
195
|
+
openai_tokens_converter(
|
|
196
|
+
prompt_tokens,
|
|
197
|
+
completion_tokens,
|
|
198
|
+
cache_read,
|
|
199
|
+
0,
|
|
200
|
+
total_tokens,
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
|
|
180
204
|
set_span_attribute(
|
|
181
|
-
span,
|
|
205
|
+
span,
|
|
206
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
207
|
+
prompt_tokens,
|
|
182
208
|
)
|
|
183
209
|
set_span_attribute(
|
|
184
210
|
span,
|
|
185
|
-
AttributeKeys.
|
|
211
|
+
AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
|
|
186
212
|
completion_tokens,
|
|
187
213
|
)
|
|
188
214
|
set_span_attribute(
|
|
189
215
|
span,
|
|
190
|
-
AttributeKeys.
|
|
216
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
191
217
|
cache_read,
|
|
192
218
|
)
|
|
193
219
|
set_span_attribute(
|
|
194
|
-
span,
|
|
220
|
+
span,
|
|
221
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
222
|
+
0,
|
|
195
223
|
)
|
|
196
224
|
set_span_attribute(
|
|
197
225
|
span,
|
|
@@ -266,7 +294,7 @@ def _wrap_responses_non_streaming_async(
|
|
|
266
294
|
)
|
|
267
295
|
ctx["model_name"] = kwargs.get("model", "")
|
|
268
296
|
set_span_attribute(
|
|
269
|
-
ctx["span"], AttributeKeys.
|
|
297
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
270
298
|
)
|
|
271
299
|
|
|
272
300
|
def post_hook(ctx: Dict[str, Any], result: Response) -> None:
|
|
@@ -284,17 +312,29 @@ def _wrap_responses_non_streaming_async(
|
|
|
284
312
|
completion_tokens = usage_data.output_tokens or 0
|
|
285
313
|
cache_read = usage_data.input_tokens_details.cached_tokens or 0
|
|
286
314
|
|
|
315
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
316
|
+
openai_tokens_converter(
|
|
317
|
+
prompt_tokens,
|
|
318
|
+
completion_tokens,
|
|
319
|
+
cache_read,
|
|
320
|
+
0,
|
|
321
|
+
usage_data.total_tokens,
|
|
322
|
+
)
|
|
323
|
+
)
|
|
324
|
+
|
|
287
325
|
set_span_attribute(
|
|
288
|
-
span,
|
|
326
|
+
span,
|
|
327
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
328
|
+
prompt_tokens,
|
|
289
329
|
)
|
|
290
330
|
set_span_attribute(
|
|
291
|
-
span, AttributeKeys.
|
|
331
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
292
332
|
)
|
|
293
333
|
set_span_attribute(
|
|
294
|
-
span, AttributeKeys.
|
|
334
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
|
|
295
335
|
)
|
|
296
336
|
set_span_attribute(
|
|
297
|
-
span, AttributeKeys.
|
|
337
|
+
span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
|
|
298
338
|
)
|
|
299
339
|
set_span_attribute(
|
|
300
340
|
span,
|
|
@@ -305,7 +345,7 @@ def _wrap_responses_non_streaming_async(
|
|
|
305
345
|
if hasattr(result, "model"):
|
|
306
346
|
set_span_attribute(
|
|
307
347
|
span,
|
|
308
|
-
AttributeKeys.
|
|
348
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
309
349
|
result.model or ctx["model_name"],
|
|
310
350
|
)
|
|
311
351
|
|
|
@@ -341,7 +381,7 @@ def _wrap_responses_streaming_async(
|
|
|
341
381
|
)
|
|
342
382
|
ctx["model_name"] = kwargs.get("model", "")
|
|
343
383
|
set_span_attribute(
|
|
344
|
-
ctx["span"], AttributeKeys.
|
|
384
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
|
|
345
385
|
)
|
|
346
386
|
ctx["accumulated_content"] = ""
|
|
347
387
|
|
|
@@ -373,6 +413,7 @@ def _wrap_responses_streaming_async(
|
|
|
373
413
|
):
|
|
374
414
|
prompt_tokens = chunk.response.usage.input_tokens or 0
|
|
375
415
|
completion_tokens = chunk.response.usage.output_tokens or 0
|
|
416
|
+
total_tokens = chunk.response.usage.total_tokens or 0
|
|
376
417
|
# Safely access nested cached_tokens
|
|
377
418
|
input_tokens_details = getattr(
|
|
378
419
|
chunk.response.usage, "input_tokens_details", None
|
|
@@ -383,21 +424,35 @@ def _wrap_responses_streaming_async(
|
|
|
383
424
|
else 0
|
|
384
425
|
)
|
|
385
426
|
|
|
427
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
428
|
+
openai_tokens_converter(
|
|
429
|
+
prompt_tokens,
|
|
430
|
+
completion_tokens,
|
|
431
|
+
cache_read,
|
|
432
|
+
0,
|
|
433
|
+
total_tokens,
|
|
434
|
+
)
|
|
435
|
+
)
|
|
436
|
+
|
|
386
437
|
set_span_attribute(
|
|
387
|
-
span,
|
|
438
|
+
span,
|
|
439
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
440
|
+
prompt_tokens,
|
|
388
441
|
)
|
|
389
442
|
set_span_attribute(
|
|
390
443
|
span,
|
|
391
|
-
AttributeKeys.
|
|
444
|
+
AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
|
|
392
445
|
completion_tokens,
|
|
393
446
|
)
|
|
394
447
|
set_span_attribute(
|
|
395
448
|
span,
|
|
396
|
-
AttributeKeys.
|
|
449
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
397
450
|
cache_read,
|
|
398
451
|
)
|
|
399
452
|
set_span_attribute(
|
|
400
|
-
span,
|
|
453
|
+
span,
|
|
454
|
+
AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
455
|
+
0,
|
|
401
456
|
)
|
|
402
457
|
set_span_attribute(
|
|
403
458
|
span,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
def openai_tokens_converter(
|
|
2
|
+
prompt_tokens: int,
|
|
3
|
+
completion_tokens: int,
|
|
4
|
+
cache_read: int,
|
|
5
|
+
cache_creation: int,
|
|
6
|
+
total_tokens: int,
|
|
7
|
+
) -> tuple[int, int, int, int]:
|
|
8
|
+
"""
|
|
9
|
+
Returns:
|
|
10
|
+
tuple[int, int, int, int]:
|
|
11
|
+
- judgment.usage.non_cached_input
|
|
12
|
+
- judgment.usage.output_tokens
|
|
13
|
+
- judgment.usage.cached_input_tokens
|
|
14
|
+
- judgment.usage.cache_creation_tokens
|
|
15
|
+
"""
|
|
16
|
+
manual_tokens = prompt_tokens + completion_tokens + cache_read + cache_creation
|
|
17
|
+
|
|
18
|
+
if manual_tokens > total_tokens:
|
|
19
|
+
# This is the openAI case where we need to subtract the cached tokens from the input tokens
|
|
20
|
+
return prompt_tokens - cache_read, completion_tokens, cache_read, cache_creation
|
|
21
|
+
else:
|
|
22
|
+
return prompt_tokens, completion_tokens, cache_read, cache_creation
|
|
@@ -73,7 +73,7 @@ def _wrap_non_streaming_sync(
|
|
|
73
73
|
)
|
|
74
74
|
ctx["model_name"] = prefixed_model_name
|
|
75
75
|
set_span_attribute(
|
|
76
|
-
ctx["span"], AttributeKeys.
|
|
76
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
|
|
77
77
|
)
|
|
78
78
|
|
|
79
79
|
def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
|
|
@@ -90,10 +90,12 @@ def _wrap_non_streaming_sync(
|
|
|
90
90
|
result.usage
|
|
91
91
|
)
|
|
92
92
|
set_span_attribute(
|
|
93
|
-
span,
|
|
93
|
+
span,
|
|
94
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
95
|
+
prompt_tokens,
|
|
94
96
|
)
|
|
95
97
|
set_span_attribute(
|
|
96
|
-
span, AttributeKeys.
|
|
98
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
97
99
|
)
|
|
98
100
|
set_span_attribute(
|
|
99
101
|
span,
|
|
@@ -103,7 +105,7 @@ def _wrap_non_streaming_sync(
|
|
|
103
105
|
|
|
104
106
|
set_span_attribute(
|
|
105
107
|
span,
|
|
106
|
-
AttributeKeys.
|
|
108
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
107
109
|
ctx["model_name"],
|
|
108
110
|
)
|
|
109
111
|
|
|
@@ -143,7 +145,7 @@ def _wrap_streaming_sync(
|
|
|
143
145
|
)
|
|
144
146
|
ctx["model_name"] = prefixed_model_name
|
|
145
147
|
set_span_attribute(
|
|
146
|
-
ctx["span"], AttributeKeys.
|
|
148
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
|
|
147
149
|
)
|
|
148
150
|
ctx["accumulated_content"] = ""
|
|
149
151
|
|
|
@@ -171,10 +173,12 @@ def _wrap_streaming_sync(
|
|
|
171
173
|
chunk.usage
|
|
172
174
|
)
|
|
173
175
|
set_span_attribute(
|
|
174
|
-
span,
|
|
176
|
+
span,
|
|
177
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
178
|
+
prompt_tokens,
|
|
175
179
|
)
|
|
176
180
|
set_span_attribute(
|
|
177
|
-
span, AttributeKeys.
|
|
181
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
178
182
|
)
|
|
179
183
|
set_span_attribute(
|
|
180
184
|
span,
|
|
@@ -249,7 +253,7 @@ def _wrap_non_streaming_async(
|
|
|
249
253
|
)
|
|
250
254
|
ctx["model_name"] = prefixed_model_name
|
|
251
255
|
set_span_attribute(
|
|
252
|
-
ctx["span"], AttributeKeys.
|
|
256
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
|
|
253
257
|
)
|
|
254
258
|
|
|
255
259
|
def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
|
|
@@ -266,10 +270,12 @@ def _wrap_non_streaming_async(
|
|
|
266
270
|
result.usage
|
|
267
271
|
)
|
|
268
272
|
set_span_attribute(
|
|
269
|
-
span,
|
|
273
|
+
span,
|
|
274
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
275
|
+
prompt_tokens,
|
|
270
276
|
)
|
|
271
277
|
set_span_attribute(
|
|
272
|
-
span, AttributeKeys.
|
|
278
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
273
279
|
)
|
|
274
280
|
set_span_attribute(
|
|
275
281
|
span,
|
|
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
|
|
|
279
285
|
|
|
280
286
|
set_span_attribute(
|
|
281
287
|
span,
|
|
282
|
-
AttributeKeys.
|
|
288
|
+
AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
|
|
283
289
|
ctx["model_name"],
|
|
284
290
|
)
|
|
285
291
|
|
|
@@ -320,7 +326,7 @@ def _wrap_streaming_async(
|
|
|
320
326
|
)
|
|
321
327
|
ctx["model_name"] = prefixed_model_name
|
|
322
328
|
set_span_attribute(
|
|
323
|
-
ctx["span"], AttributeKeys.
|
|
329
|
+
ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
|
|
324
330
|
)
|
|
325
331
|
ctx["accumulated_content"] = ""
|
|
326
332
|
|
|
@@ -348,10 +354,12 @@ def _wrap_streaming_async(
|
|
|
348
354
|
chunk.usage
|
|
349
355
|
)
|
|
350
356
|
set_span_attribute(
|
|
351
|
-
span,
|
|
357
|
+
span,
|
|
358
|
+
AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
|
|
359
|
+
prompt_tokens,
|
|
352
360
|
)
|
|
353
361
|
set_span_attribute(
|
|
354
|
-
span, AttributeKeys.
|
|
362
|
+
span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
|
|
355
363
|
)
|
|
356
364
|
set_span_attribute(
|
|
357
365
|
span,
|
judgeval/utils/serialize.py
CHANGED
|
@@ -247,7 +247,7 @@ encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
|
|
|
247
247
|
# Seralize arbitrary object to a json string
|
|
248
248
|
def safe_serialize(obj: Any) -> str:
|
|
249
249
|
try:
|
|
250
|
-
return orjson.dumps(json_encoder(obj)).decode()
|
|
250
|
+
return orjson.dumps(json_encoder(obj), option=orjson.OPT_NON_STR_KEYS).decode()
|
|
251
251
|
except Exception as e:
|
|
252
252
|
judgeval_logger.warning(f"Error serializing object: {e}")
|
|
253
|
-
return
|
|
253
|
+
return repr(obj)
|
judgeval/version.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
judgeval/__init__.py,sha256=
|
|
2
|
-
judgeval/cli.py,sha256=
|
|
1
|
+
judgeval/__init__.py,sha256=RRiBbXUj7M1VW3NqFvMZlXyI72duh3VA5bfIWqPmKNw,6670
|
|
2
|
+
judgeval/cli.py,sha256=T9nKO9eHMOiLCgxaxuihqtRHsG_dMT06sW6X873MmnI,2209
|
|
3
3
|
judgeval/constants.py,sha256=JZZJ1MqzZZDVk-5PRPRbmLnM8mXI-RDL5vxa1JFuscs,3408
|
|
4
4
|
judgeval/env.py,sha256=uFggNNKmfDaa5dmZMwwXVIDdHAHe524jDWUpByV4hm4,1879
|
|
5
5
|
judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
|
|
6
6
|
judgeval/logger.py,sha256=VP5blbsJ53mvJbNHfBf5p2KrARUrkrErpPkB-__Hh3U,1562
|
|
7
|
-
judgeval/version.py,sha256=
|
|
7
|
+
judgeval/version.py,sha256=JT1Ltu1j1xi9sHf7Inhosvl-1tp0QPms04o2RCvwJUI,74
|
|
8
8
|
judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
|
|
9
9
|
judgeval/api/__init__.py,sha256=dGZm9KtgLMnmbiyDEJ_D7suuVqmsibR_Cd0YZRJ7qHI,15210
|
|
10
10
|
judgeval/api/api_types.py,sha256=PJ5ZQWuvCl5GXFzhcpOw6Iuktr50lo5BaILmZcAKWfc,10085
|
|
@@ -43,8 +43,8 @@ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ucYOI6ztAjf
|
|
|
43
43
|
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=V3RdrWhnR_vLBrtWw7QbgN9K_A-Och7-v9I2fN4z8gY,506
|
|
44
44
|
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=WhSkPs8tWyQ_cS-y-VTzrKAPlizKp-6zi_DmfgW4AgM,10773
|
|
45
45
|
judgeval/tracer/__init__.py,sha256=E1rcegwIQyqEIvM4yfPLm2M0B6e27dhPrJPhMotBjtw,37432
|
|
46
|
-
judgeval/tracer/constants.py,sha256=
|
|
47
|
-
judgeval/tracer/keys.py,sha256=
|
|
46
|
+
judgeval/tracer/constants.py,sha256=tLR5ClDaNlNg_MAv2XRdk62uQW4KyBnWaNbG_YYblTc,55
|
|
47
|
+
judgeval/tracer/keys.py,sha256=mYBo_X6-rC9xfiI-WpjHlO7rUtcMORtQXCQyO1F3Ycc,2387
|
|
48
48
|
judgeval/tracer/managers.py,sha256=NEkovnK8Qaod1U5msT0_hyHUqo9C2uFro2IzNlC8jCo,5071
|
|
49
49
|
judgeval/tracer/utils.py,sha256=xWha5iwC733wCf2HKbNqzxOPS1ovO1OymWIUFLz-UpQ,537
|
|
50
50
|
judgeval/tracer/exporters/__init__.py,sha256=3WDXC28iY5gYMM5s7ejmy7P-DVDQ_iIuzwovZxUKJXg,1295
|
|
@@ -57,21 +57,22 @@ judgeval/tracer/llm/constants.py,sha256=IWa3CMes8wIt_UG7jrGEOztg2sHz54fdOMWIOOr-
|
|
|
57
57
|
judgeval/tracer/llm/providers.py,sha256=VAimkmChOOjhC1cUv-0iG8pa5PhOw1HIOyt3zrIrbcM,628
|
|
58
58
|
judgeval/tracer/llm/llm_anthropic/__init__.py,sha256=HG0gIlTgaRt-Y0u1ERPQ19pUgb4YHkTh7tZQPeyR4oM,80
|
|
59
59
|
judgeval/tracer/llm/llm_anthropic/config.py,sha256=ICfKODPQvZsRxpK4xWQ-YE79pmWJTmY2wryddxpNdpM,153
|
|
60
|
-
judgeval/tracer/llm/llm_anthropic/messages.py,sha256=
|
|
61
|
-
judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=
|
|
60
|
+
judgeval/tracer/llm/llm_anthropic/messages.py,sha256=T7dApxJCsOWEpquYSZICACwTioZG3ZcxHdJjvF04T2E,15474
|
|
61
|
+
judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=DKlZZnfK_yv_tEMwF2XxvsjgUjOFI3c5JUMQwERNV7k,12188
|
|
62
62
|
judgeval/tracer/llm/llm_anthropic/wrapper.py,sha256=JILcyC4NvjXZSqlFoZp-VB-JsCYZkQPMFEYaB4AysrA,1849
|
|
63
63
|
judgeval/tracer/llm/llm_google/__init__.py,sha256=otBZETsAfVZjtZaN5N36Ln0kw-I9jVB4tFGrV6novHo,74
|
|
64
64
|
judgeval/tracer/llm/llm_google/config.py,sha256=S3yCAE9oHbXjLVYiz5mGD16yIgXMBBUu5UN4lBjoCNQ,162
|
|
65
|
-
judgeval/tracer/llm/llm_google/generate_content.py,sha256=
|
|
65
|
+
judgeval/tracer/llm/llm_google/generate_content.py,sha256=w1rIh1cTBYnkfBQTL4qHntwsKfBcSrf2VSS2y-BOMRU,4030
|
|
66
66
|
judgeval/tracer/llm/llm_google/wrapper.py,sha256=jqaMXGoM9dlPBbCFadMI5EqFrNHzBt0h9VkNn7KPVLk,901
|
|
67
67
|
judgeval/tracer/llm/llm_openai/__init__.py,sha256=CyzwhY0-zmqWKlEno7JPBcvO7G_hI8dp6-_5_KEzFqg,74
|
|
68
|
-
judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=
|
|
69
|
-
judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=
|
|
68
|
+
judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=IXw-Gu-WUxQ-gaBUIe-aAKOn1Pakn_RFl0b1C_1toP8,7326
|
|
69
|
+
judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=HaZEZMla-VbacJTyOYeVNNXsRSSqE2di36NIpyzGBVY,17394
|
|
70
70
|
judgeval/tracer/llm/llm_openai/config.py,sha256=NE0ixKhd4WVeAVjY8jNTncuKYH6R4MQDLPmcCsd3zWY,144
|
|
71
|
-
judgeval/tracer/llm/llm_openai/responses.py,sha256=
|
|
71
|
+
judgeval/tracer/llm/llm_openai/responses.py,sha256=5le9rmUtLeVnz10DKS5ajkVcVCHQL3hi1b89pihmoL4,17872
|
|
72
|
+
judgeval/tracer/llm/llm_openai/utils.py,sha256=H32OYE9JeN2Hmyf8qRjqz2ky9QCjSV7KAEKXNAtDWpE,832
|
|
72
73
|
judgeval/tracer/llm/llm_openai/wrapper.py,sha256=Z5Ndib228yd1pXEQ4xIu7_CJHxpW_t0ofZAC6FLc5eU,2055
|
|
73
74
|
judgeval/tracer/llm/llm_together/__init__.py,sha256=MEnsF77IgFD4h73hNCMpo-9a1PHHdm-OxPlOalXOMac,78
|
|
74
|
-
judgeval/tracer/llm/llm_together/chat_completions.py,sha256=
|
|
75
|
+
judgeval/tracer/llm/llm_together/chat_completions.py,sha256=RySsK3tqG0NpJHPlVQ705bXxIfseSQUhvIoS-sz4rOg,14380
|
|
75
76
|
judgeval/tracer/llm/llm_together/config.py,sha256=jCJY0KQcHJZZJk2vq038GKIDUMusqgvRjQ0B6OV5uEc,150
|
|
76
77
|
judgeval/tracer/llm/llm_together/wrapper.py,sha256=HFqy_MabQeSq8oj2diZhEuk1SDt_hDfk5MFdPn9MFhg,1733
|
|
77
78
|
judgeval/tracer/processors/__init__.py,sha256=BdOOPOD1RfMI5YHW76DNPKR07EAev-JxoolZ3KaXNNU,7100
|
|
@@ -87,7 +88,7 @@ judgeval/utils/file_utils.py,sha256=vq-n5WZEZjVbZ5S9QTkW8nSH6Pvw-Jx0ttsQ1t0wnPQ,
|
|
|
87
88
|
judgeval/utils/guards.py,sha256=_DaKZxvjD10J97Ze2paHhbCiV2MpDz3FZQmNwaL5k0w,945
|
|
88
89
|
judgeval/utils/meta.py,sha256=RAqZuvOlymqMwFoS0joBW_r65lcN9bY8BpNYHoytKps,773
|
|
89
90
|
judgeval/utils/project.py,sha256=kGpYmp6QGTD6h-GjQ-ovT7kBmGnyb99MWDJmRGFQHOg,527
|
|
90
|
-
judgeval/utils/serialize.py,sha256=
|
|
91
|
+
judgeval/utils/serialize.py,sha256=WbforbVFGINuk68T2YtWhj-ECMC6rWol3g5dxz9nsm8,6265
|
|
91
92
|
judgeval/utils/testing.py,sha256=m5Nexv65tmfSj1XvAPK5Ear7aJ7w5xjDtZN0tLZ_RBk,2939
|
|
92
93
|
judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
|
|
93
94
|
judgeval/utils/version_check.py,sha256=se4Ft8rjcl5u7fHMxSGQpka844V2AcZpOYl6StLWTio,1081
|
|
@@ -104,8 +105,8 @@ judgeval/utils/wrappers/mutable_wrap_async.py,sha256=stHISOUCGFUJXY8seXmxUo4ZpMF
|
|
|
104
105
|
judgeval/utils/wrappers/mutable_wrap_sync.py,sha256=t5jygAQ1vqhy8s1GfiLeYygYgaLTgfoYASN47U5JiPs,2888
|
|
105
106
|
judgeval/utils/wrappers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
107
|
judgeval/utils/wrappers/utils.py,sha256=j18vaa6JWDw2s3nQy1z5PfV_9Xxio-bVARaHG_0XyL0,1228
|
|
107
|
-
judgeval-0.
|
|
108
|
-
judgeval-0.
|
|
109
|
-
judgeval-0.
|
|
110
|
-
judgeval-0.
|
|
111
|
-
judgeval-0.
|
|
108
|
+
judgeval-0.22.0.dist-info/METADATA,sha256=xPrltYImsdj9e2CAxKmwop-VJCf9ZPXzgKi83qkUm8E,11483
|
|
109
|
+
judgeval-0.22.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
110
|
+
judgeval-0.22.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
|
|
111
|
+
judgeval-0.22.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
|
|
112
|
+
judgeval-0.22.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|