judgeval 0.16.7__py3-none-any.whl → 0.16.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/api/api_types.py +1 -2
- judgeval/data/judgment_types.py +1 -2
- judgeval/tracer/__init__.py +7 -52
- judgeval/tracer/llm/config.py +12 -44
- judgeval/tracer/llm/constants.py +0 -1
- judgeval/tracer/llm/llm_anthropic/config.py +3 -17
- judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
- judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval/tracer/llm/llm_google/config.py +3 -21
- judgeval/tracer/llm/llm_google/generate_content.py +125 -0
- judgeval/tracer/llm/llm_google/wrapper.py +19 -454
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
- judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
- judgeval/tracer/llm/llm_openai/config.py +3 -29
- judgeval/tracer/llm/llm_openai/responses.py +444 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
- judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
- judgeval/tracer/llm/llm_together/config.py +3 -20
- judgeval/tracer/llm/llm_together/wrapper.py +34 -485
- judgeval/tracer/llm/providers.py +4 -48
- judgeval/utils/decorators/dont_throw.py +30 -14
- judgeval/utils/wrappers/README.md +3 -0
- judgeval/utils/wrappers/__init__.py +15 -0
- judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval/utils/wrappers/utils.py +35 -0
- judgeval/version.py +1 -1
- {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/METADATA +1 -1
- {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/RECORD +40 -27
- judgeval/tracer/llm/llm_groq/config.py +0 -23
- judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
- judgeval/tracer/local_eval_queue.py +0 -199
- /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/WHEEL +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/entry_points.txt +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1,661 +1,63 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
TYPE_CHECKING,
|
|
5
|
-
Any,
|
|
6
|
-
Optional,
|
|
7
|
-
Tuple,
|
|
8
|
-
Protocol,
|
|
9
|
-
TypeVar,
|
|
10
|
-
Union,
|
|
11
|
-
Sequence,
|
|
12
|
-
Callable,
|
|
13
|
-
Iterator,
|
|
14
|
-
AsyncIterator,
|
|
15
|
-
runtime_checkable,
|
|
16
|
-
)
|
|
2
|
+
from typing import TYPE_CHECKING, Union
|
|
3
|
+
import typing
|
|
17
4
|
|
|
18
|
-
from judgeval.tracer.llm.llm_openai.
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
5
|
+
from judgeval.tracer.llm.llm_openai.chat_completions import (
|
|
6
|
+
wrap_chat_completions_create_sync,
|
|
7
|
+
wrap_chat_completions_create_async,
|
|
8
|
+
)
|
|
9
|
+
from judgeval.tracer.llm.llm_openai.responses import (
|
|
10
|
+
wrap_responses_create_sync,
|
|
11
|
+
wrap_responses_create_async,
|
|
12
|
+
)
|
|
13
|
+
from judgeval.tracer.llm.llm_openai.beta_chat_completions import (
|
|
14
|
+
wrap_beta_chat_completions_parse_sync,
|
|
15
|
+
wrap_beta_chat_completions_parse_async,
|
|
22
16
|
)
|
|
23
|
-
from judgeval.tracer.managers import sync_span_context, async_span_context
|
|
24
|
-
from judgeval.logger import judgeval_logger
|
|
25
|
-
from judgeval.tracer.keys import AttributeKeys
|
|
26
|
-
from judgeval.tracer.utils import set_span_attribute
|
|
27
|
-
from judgeval.utils.serialize import safe_serialize
|
|
28
17
|
|
|
29
18
|
if TYPE_CHECKING:
|
|
30
19
|
from judgeval.tracer import Tracer
|
|
31
|
-
from
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
@runtime_checkable
|
|
35
|
-
class OpenAIPromptTokensDetails(Protocol):
|
|
36
|
-
cached_tokens: Optional[int]
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@runtime_checkable
|
|
40
|
-
class OpenAIUsage(Protocol):
|
|
41
|
-
prompt_tokens: Optional[int]
|
|
42
|
-
completion_tokens: Optional[int]
|
|
43
|
-
total_tokens: Optional[int]
|
|
44
|
-
prompt_tokens_details: Optional[OpenAIPromptTokensDetails]
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
@runtime_checkable
|
|
48
|
-
class OpenAIResponseUsage(Protocol):
|
|
49
|
-
input_tokens: Optional[int]
|
|
50
|
-
output_tokens: Optional[int]
|
|
51
|
-
total_tokens: Optional[int]
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@runtime_checkable
|
|
55
|
-
class OpenAIUnifiedUsage(Protocol):
|
|
56
|
-
prompt_tokens: Optional[int]
|
|
57
|
-
completion_tokens: Optional[int]
|
|
58
|
-
|
|
59
|
-
input_tokens: Optional[int]
|
|
60
|
-
output_tokens: Optional[int]
|
|
61
|
-
|
|
62
|
-
total_tokens: Optional[int]
|
|
63
|
-
prompt_tokens_details: Optional[OpenAIPromptTokensDetails]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
@runtime_checkable
|
|
67
|
-
class OpenAIMessage(Protocol):
|
|
68
|
-
content: Optional[str]
|
|
69
|
-
role: str
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@runtime_checkable
|
|
73
|
-
class OpenAIParsedMessage(Protocol):
|
|
74
|
-
parsed: Optional[str]
|
|
75
|
-
content: Optional[str]
|
|
76
|
-
role: str
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@runtime_checkable
|
|
80
|
-
class OpenAIChoice(Protocol):
|
|
81
|
-
index: int
|
|
82
|
-
message: OpenAIMessage
|
|
83
|
-
finish_reason: Optional[str]
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
@runtime_checkable
|
|
87
|
-
class OpenAIParsedChoice(Protocol):
|
|
88
|
-
index: int
|
|
89
|
-
message: OpenAIParsedMessage
|
|
90
|
-
finish_reason: Optional[str]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
@runtime_checkable
|
|
94
|
-
class OpenAIResponseContent(Protocol):
|
|
95
|
-
text: str
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
@runtime_checkable
|
|
99
|
-
class OpenAIResponseOutput(Protocol):
|
|
100
|
-
content: Sequence[OpenAIResponseContent]
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@runtime_checkable
|
|
104
|
-
class OpenAIChatCompletionBase(Protocol):
|
|
105
|
-
id: str
|
|
106
|
-
object: str
|
|
107
|
-
created: int
|
|
108
|
-
model: str
|
|
109
|
-
choices: Sequence[Union[OpenAIChoice, OpenAIParsedChoice]]
|
|
110
|
-
usage: Optional[OpenAIUnifiedUsage]
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
OpenAIChatCompletion = OpenAIChatCompletionBase
|
|
114
|
-
OpenAIParsedChatCompletion = OpenAIChatCompletionBase
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
@runtime_checkable
|
|
118
|
-
class OpenAIResponse(Protocol):
|
|
119
|
-
id: str
|
|
120
|
-
object: str
|
|
121
|
-
created: int
|
|
122
|
-
model: str
|
|
123
|
-
output: Sequence[OpenAIResponseOutput]
|
|
124
|
-
usage: Optional[OpenAIUnifiedUsage]
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@runtime_checkable
|
|
128
|
-
class OpenAIStreamDelta(Protocol):
|
|
129
|
-
content: Optional[str]
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@runtime_checkable
|
|
133
|
-
class OpenAIStreamChoice(Protocol):
|
|
134
|
-
index: int
|
|
135
|
-
delta: OpenAIStreamDelta
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
@runtime_checkable
|
|
139
|
-
class OpenAIStreamChunk(Protocol):
|
|
140
|
-
choices: Sequence[OpenAIStreamChoice]
|
|
141
|
-
usage: Optional[OpenAIUnifiedUsage]
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
@runtime_checkable
|
|
145
|
-
class OpenAIClient(Protocol):
|
|
146
|
-
pass
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
@runtime_checkable
|
|
150
|
-
class OpenAIAsyncClient(Protocol):
|
|
151
|
-
pass
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
OpenAIResponseType = Union[OpenAIChatCompletionBase, OpenAIResponse]
|
|
155
|
-
OpenAIStreamType = Union[Iterator[OpenAIStreamChunk], AsyncIterator[OpenAIStreamChunk]]
|
|
20
|
+
from openai import OpenAI, AsyncOpenAI
|
|
156
21
|
|
|
22
|
+
TClient = Union[OpenAI, AsyncOpenAI]
|
|
157
23
|
|
|
158
|
-
def _extract_openai_content(chunk: OpenAIStreamChunk) -> str:
|
|
159
|
-
if chunk.choices and len(chunk.choices) > 0:
|
|
160
|
-
delta_content = chunk.choices[0].delta.content
|
|
161
|
-
if delta_content:
|
|
162
|
-
return delta_content
|
|
163
|
-
return ""
|
|
164
24
|
|
|
25
|
+
def wrap_openai_client_sync(tracer: Tracer, client: OpenAI) -> OpenAI:
|
|
26
|
+
wrap_chat_completions_create_sync(tracer, client)
|
|
27
|
+
wrap_responses_create_sync(tracer, client)
|
|
28
|
+
wrap_beta_chat_completions_parse_sync(tracer, client)
|
|
29
|
+
return client
|
|
165
30
|
|
|
166
|
-
def _extract_openai_tokens(usage_data: OpenAIUnifiedUsage) -> Tuple[int, int, int, int]:
|
|
167
|
-
if hasattr(usage_data, "prompt_tokens") and usage_data.prompt_tokens is not None:
|
|
168
|
-
prompt_tokens = usage_data.prompt_tokens
|
|
169
|
-
completion_tokens = usage_data.completion_tokens or 0
|
|
170
|
-
|
|
171
|
-
elif hasattr(usage_data, "input_tokens") and usage_data.input_tokens is not None:
|
|
172
|
-
prompt_tokens = usage_data.input_tokens
|
|
173
|
-
completion_tokens = usage_data.output_tokens or 0
|
|
174
|
-
else:
|
|
175
|
-
prompt_tokens = 0
|
|
176
|
-
completion_tokens = 0
|
|
177
|
-
|
|
178
|
-
# Extract cached tokens
|
|
179
|
-
cache_read_input_tokens = 0
|
|
180
|
-
if (
|
|
181
|
-
usage_data.prompt_tokens_details
|
|
182
|
-
and usage_data.prompt_tokens_details.cached_tokens
|
|
183
|
-
):
|
|
184
|
-
cache_read_input_tokens = usage_data.prompt_tokens_details.cached_tokens
|
|
185
|
-
|
|
186
|
-
cache_creation_input_tokens = 0 # OpenAI doesn't have cache creation tokens
|
|
187
|
-
|
|
188
|
-
return (
|
|
189
|
-
prompt_tokens,
|
|
190
|
-
completion_tokens,
|
|
191
|
-
cache_read_input_tokens,
|
|
192
|
-
cache_creation_input_tokens,
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def _format_openai_output(
|
|
197
|
-
response: OpenAIResponseType,
|
|
198
|
-
) -> Tuple[Optional[Union[str, list[dict[str, Any]]]], Optional[OpenAIUnifiedUsage]]:
|
|
199
|
-
message_content: Optional[Union[str, list[dict[str, Any]]]] = None
|
|
200
|
-
usage_data: Optional[OpenAIUnifiedUsage] = None
|
|
201
|
-
|
|
202
|
-
try:
|
|
203
|
-
if isinstance(response, OpenAIResponse):
|
|
204
|
-
usage_data = response.usage
|
|
205
|
-
if response.output and len(response.output) > 0:
|
|
206
|
-
output0 = response.output[0]
|
|
207
|
-
if output0.content and len(output0.content) > 0:
|
|
208
|
-
try:
|
|
209
|
-
content_blocks = []
|
|
210
|
-
for seg in output0.content:
|
|
211
|
-
if hasattr(seg, "type"):
|
|
212
|
-
seg_type = getattr(seg, "type", None)
|
|
213
|
-
if seg_type == "text" and hasattr(seg, "text"):
|
|
214
|
-
block_data = {
|
|
215
|
-
"type": "text",
|
|
216
|
-
"text": getattr(seg, "text", ""),
|
|
217
|
-
}
|
|
218
|
-
elif seg_type == "function_call":
|
|
219
|
-
block_data = {
|
|
220
|
-
"type": "function_call",
|
|
221
|
-
"name": getattr(seg, "name", None),
|
|
222
|
-
"call_id": getattr(seg, "call_id", None),
|
|
223
|
-
"arguments": getattr(seg, "arguments", None),
|
|
224
|
-
}
|
|
225
|
-
else:
|
|
226
|
-
# Handle unknown types
|
|
227
|
-
block_data = {"type": seg_type}
|
|
228
|
-
for attr in [
|
|
229
|
-
"text",
|
|
230
|
-
"name",
|
|
231
|
-
"call_id",
|
|
232
|
-
"arguments",
|
|
233
|
-
"content",
|
|
234
|
-
]:
|
|
235
|
-
if hasattr(seg, attr):
|
|
236
|
-
block_data[attr] = getattr(seg, attr)
|
|
237
|
-
content_blocks.append(block_data)
|
|
238
|
-
elif hasattr(seg, "text") and seg.text:
|
|
239
|
-
# Fallback for segments without type
|
|
240
|
-
content_blocks.append(
|
|
241
|
-
{"type": "text", "text": seg.text}
|
|
242
|
-
)
|
|
243
|
-
|
|
244
|
-
message_content = (
|
|
245
|
-
content_blocks if content_blocks else str(output0.content)
|
|
246
|
-
)
|
|
247
|
-
except (TypeError, AttributeError):
|
|
248
|
-
message_content = str(output0.content)
|
|
249
|
-
elif isinstance(response, OpenAIChatCompletionBase):
|
|
250
|
-
usage_data = response.usage
|
|
251
|
-
if response.choices and len(response.choices) > 0:
|
|
252
|
-
message = response.choices[0].message
|
|
253
|
-
|
|
254
|
-
if (
|
|
255
|
-
hasattr(message, "parsed")
|
|
256
|
-
and getattr(message, "parsed", None) is not None
|
|
257
|
-
):
|
|
258
|
-
# For parsed responses, return as structured data
|
|
259
|
-
parsed_data = getattr(message, "parsed")
|
|
260
|
-
message_content = [{"type": "parsed", "content": parsed_data}]
|
|
261
|
-
else:
|
|
262
|
-
content_blocks = []
|
|
263
|
-
|
|
264
|
-
# Handle regular content
|
|
265
|
-
if hasattr(message, "content") and message.content:
|
|
266
|
-
content_blocks.append(
|
|
267
|
-
{"type": "text", "text": str(message.content)}
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
# Handle tool calls (standard Chat Completions API)
|
|
271
|
-
if hasattr(message, "tool_calls") and message.tool_calls:
|
|
272
|
-
for tool_call in message.tool_calls:
|
|
273
|
-
tool_call_data = {
|
|
274
|
-
"type": "tool_call",
|
|
275
|
-
"id": getattr(tool_call, "id", None),
|
|
276
|
-
"function": {
|
|
277
|
-
"name": getattr(tool_call.function, "name", None)
|
|
278
|
-
if hasattr(tool_call, "function")
|
|
279
|
-
else None,
|
|
280
|
-
"arguments": getattr(
|
|
281
|
-
tool_call.function, "arguments", None
|
|
282
|
-
)
|
|
283
|
-
if hasattr(tool_call, "function")
|
|
284
|
-
else None,
|
|
285
|
-
},
|
|
286
|
-
}
|
|
287
|
-
content_blocks.append(tool_call_data)
|
|
288
|
-
|
|
289
|
-
message_content = content_blocks if content_blocks else None
|
|
290
|
-
except (AttributeError, IndexError, TypeError):
|
|
291
|
-
pass
|
|
292
|
-
|
|
293
|
-
return message_content, usage_data
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
class TracedOpenAIGenerator:
|
|
297
|
-
def __init__(
|
|
298
|
-
self,
|
|
299
|
-
tracer: Tracer,
|
|
300
|
-
generator: Iterator[OpenAIStreamChunk],
|
|
301
|
-
client: OpenAIClient,
|
|
302
|
-
span: Span,
|
|
303
|
-
model_name: str,
|
|
304
|
-
):
|
|
305
|
-
self.tracer = tracer
|
|
306
|
-
self.generator = generator
|
|
307
|
-
self.client = client
|
|
308
|
-
self.span = span
|
|
309
|
-
self.model_name = model_name
|
|
310
|
-
self.accumulated_content = ""
|
|
311
|
-
|
|
312
|
-
def __iter__(self) -> Iterator[OpenAIStreamChunk]:
|
|
313
|
-
return self
|
|
314
|
-
|
|
315
|
-
def __next__(self) -> OpenAIStreamChunk:
|
|
316
|
-
try:
|
|
317
|
-
chunk = next(self.generator)
|
|
318
|
-
content = _extract_openai_content(chunk)
|
|
319
|
-
if content:
|
|
320
|
-
self.accumulated_content += content
|
|
321
|
-
if chunk.usage:
|
|
322
|
-
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
323
|
-
_extract_openai_tokens(chunk.usage)
|
|
324
|
-
)
|
|
325
|
-
set_span_attribute(
|
|
326
|
-
self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
|
|
327
|
-
)
|
|
328
|
-
set_span_attribute(
|
|
329
|
-
self.span,
|
|
330
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
331
|
-
completion_tokens,
|
|
332
|
-
)
|
|
333
|
-
set_span_attribute(
|
|
334
|
-
self.span,
|
|
335
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
336
|
-
cache_read,
|
|
337
|
-
)
|
|
338
|
-
set_span_attribute(
|
|
339
|
-
self.span,
|
|
340
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
341
|
-
cache_creation,
|
|
342
|
-
)
|
|
343
|
-
set_span_attribute(
|
|
344
|
-
self.span,
|
|
345
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
346
|
-
safe_serialize(chunk.usage),
|
|
347
|
-
)
|
|
348
|
-
return chunk
|
|
349
|
-
except StopIteration:
|
|
350
|
-
set_span_attribute(
|
|
351
|
-
self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
|
|
352
|
-
)
|
|
353
|
-
self.span.end()
|
|
354
|
-
raise
|
|
355
|
-
except Exception as e:
|
|
356
|
-
if self.span:
|
|
357
|
-
self.span.record_exception(e)
|
|
358
|
-
self.span.end()
|
|
359
|
-
raise
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
class TracedOpenAIAsyncGenerator:
|
|
363
|
-
def __init__(
|
|
364
|
-
self,
|
|
365
|
-
tracer: Tracer,
|
|
366
|
-
async_generator: AsyncIterator[OpenAIStreamChunk],
|
|
367
|
-
client: OpenAIAsyncClient,
|
|
368
|
-
span: Span,
|
|
369
|
-
model_name: str,
|
|
370
|
-
):
|
|
371
|
-
self.tracer = tracer
|
|
372
|
-
self.async_generator = async_generator
|
|
373
|
-
self.client = client
|
|
374
|
-
self.span = span
|
|
375
|
-
self.model_name = model_name
|
|
376
|
-
self.accumulated_content = ""
|
|
377
|
-
|
|
378
|
-
def __aiter__(self) -> AsyncIterator[OpenAIStreamChunk]:
|
|
379
|
-
return self
|
|
380
|
-
|
|
381
|
-
async def __anext__(self) -> OpenAIStreamChunk:
|
|
382
|
-
try:
|
|
383
|
-
chunk = await self.async_generator.__anext__()
|
|
384
|
-
content = _extract_openai_content(chunk)
|
|
385
|
-
if content:
|
|
386
|
-
self.accumulated_content += content
|
|
387
|
-
if chunk.usage:
|
|
388
|
-
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
389
|
-
_extract_openai_tokens(chunk.usage)
|
|
390
|
-
)
|
|
391
|
-
set_span_attribute(
|
|
392
|
-
self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
|
|
393
|
-
)
|
|
394
|
-
set_span_attribute(
|
|
395
|
-
self.span,
|
|
396
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
397
|
-
completion_tokens,
|
|
398
|
-
)
|
|
399
|
-
set_span_attribute(
|
|
400
|
-
self.span,
|
|
401
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
402
|
-
cache_read,
|
|
403
|
-
)
|
|
404
|
-
set_span_attribute(
|
|
405
|
-
self.span,
|
|
406
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
407
|
-
cache_creation,
|
|
408
|
-
)
|
|
409
31
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
return chunk
|
|
416
|
-
except StopAsyncIteration:
|
|
417
|
-
set_span_attribute(
|
|
418
|
-
self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
|
|
419
|
-
)
|
|
420
|
-
self.span.end()
|
|
421
|
-
raise
|
|
422
|
-
except Exception as e:
|
|
423
|
-
if self.span:
|
|
424
|
-
self.span.record_exception(e)
|
|
425
|
-
self.span.end()
|
|
426
|
-
raise
|
|
32
|
+
def wrap_openai_client_async(tracer: Tracer, client: AsyncOpenAI) -> AsyncOpenAI:
|
|
33
|
+
wrap_chat_completions_create_async(tracer, client)
|
|
34
|
+
wrap_responses_create_async(tracer, client)
|
|
35
|
+
wrap_beta_chat_completions_parse_async(tracer, client)
|
|
36
|
+
return client
|
|
427
37
|
|
|
428
38
|
|
|
429
|
-
|
|
39
|
+
@typing.overload
|
|
40
|
+
def wrap_openai_client(tracer: Tracer, client: OpenAI) -> OpenAI: ...
|
|
41
|
+
@typing.overload
|
|
42
|
+
def wrap_openai_client(tracer: Tracer, client: AsyncOpenAI) -> AsyncOpenAI: ...
|
|
430
43
|
|
|
431
44
|
|
|
432
45
|
def wrap_openai_client(tracer: Tracer, client: TClient) -> TClient:
|
|
46
|
+
from judgeval.tracer.llm.llm_openai.config import HAS_OPENAI
|
|
47
|
+
from judgeval.logger import judgeval_logger
|
|
48
|
+
|
|
433
49
|
if not HAS_OPENAI:
|
|
50
|
+
judgeval_logger.error(
|
|
51
|
+
"Cannot wrap OpenAI client: 'openai' library not installed. "
|
|
52
|
+
"Install it with: pip install openai"
|
|
53
|
+
)
|
|
434
54
|
return client
|
|
435
55
|
|
|
436
|
-
|
|
437
|
-
assert openai_AsyncOpenAI is not None
|
|
438
|
-
|
|
439
|
-
def wrapped(function: Callable, span_name: str):
|
|
440
|
-
@functools.wraps(function)
|
|
441
|
-
def wrapper(*args, **kwargs):
|
|
442
|
-
if kwargs.get("stream", False):
|
|
443
|
-
span = tracer.get_tracer().start_span(
|
|
444
|
-
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
445
|
-
)
|
|
446
|
-
tracer.add_agent_attributes_to_span(span)
|
|
447
|
-
set_span_attribute(
|
|
448
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
449
|
-
)
|
|
450
|
-
model_name = kwargs.get("model", "")
|
|
451
|
-
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
|
452
|
-
stream_response = function(*args, **kwargs)
|
|
453
|
-
return TracedOpenAIGenerator(
|
|
454
|
-
tracer, stream_response, client, span, model_name
|
|
455
|
-
)
|
|
456
|
-
else:
|
|
457
|
-
with sync_span_context(
|
|
458
|
-
tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
459
|
-
) as span:
|
|
460
|
-
try:
|
|
461
|
-
tracer.add_agent_attributes_to_span(span)
|
|
462
|
-
set_span_attribute(
|
|
463
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
464
|
-
)
|
|
465
|
-
model_name = kwargs.get("model", "")
|
|
466
|
-
set_span_attribute(
|
|
467
|
-
span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
|
|
468
|
-
)
|
|
469
|
-
except Exception as e:
|
|
470
|
-
judgeval_logger.error(
|
|
471
|
-
f"[openai wrapped] Error adding span metadata: {e}"
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
response = function(*args, **kwargs)
|
|
475
|
-
|
|
476
|
-
try:
|
|
477
|
-
if isinstance(
|
|
478
|
-
response, (OpenAIChatCompletionBase, OpenAIResponse)
|
|
479
|
-
):
|
|
480
|
-
output, usage_data = _format_openai_output(response)
|
|
481
|
-
# Serialize structured data to JSON for span attribute
|
|
482
|
-
if isinstance(output, list):
|
|
483
|
-
output_str = safe_serialize(output)
|
|
484
|
-
else:
|
|
485
|
-
output_str = str(output) if output is not None else None
|
|
486
|
-
set_span_attribute(
|
|
487
|
-
span, AttributeKeys.GEN_AI_COMPLETION, output_str
|
|
488
|
-
)
|
|
489
|
-
if usage_data:
|
|
490
|
-
(
|
|
491
|
-
prompt_tokens,
|
|
492
|
-
completion_tokens,
|
|
493
|
-
cache_read,
|
|
494
|
-
cache_creation,
|
|
495
|
-
) = _extract_openai_tokens(usage_data)
|
|
496
|
-
set_span_attribute(
|
|
497
|
-
span,
|
|
498
|
-
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
|
499
|
-
prompt_tokens,
|
|
500
|
-
)
|
|
501
|
-
set_span_attribute(
|
|
502
|
-
span,
|
|
503
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
504
|
-
completion_tokens,
|
|
505
|
-
)
|
|
506
|
-
set_span_attribute(
|
|
507
|
-
span,
|
|
508
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
509
|
-
cache_read,
|
|
510
|
-
)
|
|
511
|
-
set_span_attribute(
|
|
512
|
-
span,
|
|
513
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
514
|
-
cache_creation,
|
|
515
|
-
)
|
|
516
|
-
set_span_attribute(
|
|
517
|
-
span,
|
|
518
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
519
|
-
safe_serialize(usage_data),
|
|
520
|
-
)
|
|
521
|
-
set_span_attribute(
|
|
522
|
-
span,
|
|
523
|
-
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
|
524
|
-
getattr(response, "model", model_name),
|
|
525
|
-
)
|
|
526
|
-
except Exception as e:
|
|
527
|
-
judgeval_logger.error(
|
|
528
|
-
f"[openai wrapped] Error adding span metadata: {e}"
|
|
529
|
-
)
|
|
530
|
-
finally:
|
|
531
|
-
return response
|
|
532
|
-
|
|
533
|
-
return wrapper
|
|
534
|
-
|
|
535
|
-
def wrapped_async(function: Callable, span_name: str):
|
|
536
|
-
@functools.wraps(function)
|
|
537
|
-
async def wrapper(*args, **kwargs):
|
|
538
|
-
if kwargs.get("stream", False):
|
|
539
|
-
span = tracer.get_tracer().start_span(
|
|
540
|
-
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
541
|
-
)
|
|
542
|
-
tracer.add_agent_attributes_to_span(span)
|
|
543
|
-
set_span_attribute(
|
|
544
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
545
|
-
)
|
|
546
|
-
model_name = kwargs.get("model", "")
|
|
547
|
-
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
|
548
|
-
stream_response = await function(*args, **kwargs)
|
|
549
|
-
return TracedOpenAIAsyncGenerator(
|
|
550
|
-
tracer, stream_response, client, span, model_name
|
|
551
|
-
)
|
|
552
|
-
else:
|
|
553
|
-
async with async_span_context(
|
|
554
|
-
tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
555
|
-
) as span:
|
|
556
|
-
try:
|
|
557
|
-
tracer.add_agent_attributes_to_span(span)
|
|
558
|
-
set_span_attribute(
|
|
559
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
560
|
-
)
|
|
561
|
-
model_name = kwargs.get("model", "")
|
|
562
|
-
set_span_attribute(
|
|
563
|
-
span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
|
|
564
|
-
)
|
|
565
|
-
except Exception as e:
|
|
566
|
-
judgeval_logger.error(
|
|
567
|
-
f"[openai wrapped_async] Error adding span metadata: {e}"
|
|
568
|
-
)
|
|
569
|
-
|
|
570
|
-
response = await function(*args, **kwargs)
|
|
56
|
+
from openai import OpenAI, AsyncOpenAI
|
|
571
57
|
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
if isinstance(output, list):
|
|
579
|
-
output_str = safe_serialize(output)
|
|
580
|
-
else:
|
|
581
|
-
output_str = str(output) if output is not None else None
|
|
582
|
-
set_span_attribute(
|
|
583
|
-
span, AttributeKeys.GEN_AI_COMPLETION, output_str
|
|
584
|
-
)
|
|
585
|
-
if usage_data:
|
|
586
|
-
(
|
|
587
|
-
prompt_tokens,
|
|
588
|
-
completion_tokens,
|
|
589
|
-
cache_read,
|
|
590
|
-
cache_creation,
|
|
591
|
-
) = _extract_openai_tokens(usage_data)
|
|
592
|
-
set_span_attribute(
|
|
593
|
-
span,
|
|
594
|
-
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
|
595
|
-
prompt_tokens,
|
|
596
|
-
)
|
|
597
|
-
set_span_attribute(
|
|
598
|
-
span,
|
|
599
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
600
|
-
completion_tokens,
|
|
601
|
-
)
|
|
602
|
-
set_span_attribute(
|
|
603
|
-
span,
|
|
604
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
605
|
-
cache_read,
|
|
606
|
-
)
|
|
607
|
-
set_span_attribute(
|
|
608
|
-
span,
|
|
609
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
610
|
-
cache_creation,
|
|
611
|
-
)
|
|
612
|
-
set_span_attribute(
|
|
613
|
-
span,
|
|
614
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
615
|
-
safe_serialize(usage_data),
|
|
616
|
-
)
|
|
617
|
-
set_span_attribute(
|
|
618
|
-
span,
|
|
619
|
-
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
|
620
|
-
getattr(response, "model", model_name),
|
|
621
|
-
)
|
|
622
|
-
except Exception as e:
|
|
623
|
-
judgeval_logger.error(
|
|
624
|
-
f"[openai wrapped_async] Error adding span metadata: {e}"
|
|
625
|
-
)
|
|
626
|
-
finally:
|
|
627
|
-
return response
|
|
628
|
-
|
|
629
|
-
return wrapper
|
|
630
|
-
|
|
631
|
-
span_name = "OPENAI_API_CALL"
|
|
632
|
-
if isinstance(client, openai_OpenAI):
|
|
633
|
-
setattr(
|
|
634
|
-
client.chat.completions,
|
|
635
|
-
"create",
|
|
636
|
-
wrapped(client.chat.completions.create, span_name),
|
|
637
|
-
)
|
|
638
|
-
setattr(client.responses, "create", wrapped(client.responses.create, span_name))
|
|
639
|
-
setattr(
|
|
640
|
-
client.beta.chat.completions,
|
|
641
|
-
"parse",
|
|
642
|
-
wrapped(client.beta.chat.completions.parse, span_name),
|
|
643
|
-
)
|
|
644
|
-
elif isinstance(client, openai_AsyncOpenAI):
|
|
645
|
-
setattr(
|
|
646
|
-
client.chat.completions,
|
|
647
|
-
"create",
|
|
648
|
-
wrapped_async(client.chat.completions.create, span_name),
|
|
649
|
-
)
|
|
650
|
-
setattr(
|
|
651
|
-
client.responses,
|
|
652
|
-
"create",
|
|
653
|
-
wrapped_async(client.responses.create, span_name),
|
|
654
|
-
)
|
|
655
|
-
setattr(
|
|
656
|
-
client.beta.chat.completions,
|
|
657
|
-
"parse",
|
|
658
|
-
wrapped_async(client.beta.chat.completions.parse, span_name),
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
return client
|
|
58
|
+
if isinstance(client, AsyncOpenAI):
|
|
59
|
+
return wrap_openai_client_async(tracer, client)
|
|
60
|
+
elif isinstance(client, OpenAI):
|
|
61
|
+
return wrap_openai_client_sync(tracer, client)
|
|
62
|
+
else:
|
|
63
|
+
raise TypeError(f"Invalid client type: {type(client)}")
|