judgeval 0.16.7__py3-none-any.whl → 0.16.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/api/api_types.py +1 -2
- judgeval/data/judgment_types.py +1 -2
- judgeval/tracer/__init__.py +7 -52
- judgeval/tracer/llm/config.py +12 -44
- judgeval/tracer/llm/constants.py +0 -1
- judgeval/tracer/llm/llm_anthropic/config.py +3 -17
- judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
- judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval/tracer/llm/llm_google/config.py +3 -21
- judgeval/tracer/llm/llm_google/generate_content.py +125 -0
- judgeval/tracer/llm/llm_google/wrapper.py +19 -454
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
- judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
- judgeval/tracer/llm/llm_openai/config.py +3 -29
- judgeval/tracer/llm/llm_openai/responses.py +444 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
- judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
- judgeval/tracer/llm/llm_together/config.py +3 -20
- judgeval/tracer/llm/llm_together/wrapper.py +34 -485
- judgeval/tracer/llm/providers.py +4 -48
- judgeval/utils/decorators/dont_throw.py +30 -14
- judgeval/utils/wrappers/README.md +3 -0
- judgeval/utils/wrappers/__init__.py +15 -0
- judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval/utils/wrappers/utils.py +35 -0
- judgeval/version.py +1 -1
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/METADATA +1 -1
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/RECORD +40 -27
- judgeval/tracer/llm/llm_groq/config.py +0 -23
- judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
- judgeval/tracer/local_eval_queue.py +0 -199
- /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/WHEEL +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/entry_points.txt +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1,640 +1,59 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
TYPE_CHECKING,
|
|
5
|
-
Callable,
|
|
6
|
-
Optional,
|
|
7
|
-
Protocol,
|
|
8
|
-
TypeVar,
|
|
9
|
-
Tuple,
|
|
10
|
-
Union,
|
|
11
|
-
Iterator,
|
|
12
|
-
AsyncIterator,
|
|
13
|
-
Sequence,
|
|
14
|
-
runtime_checkable,
|
|
15
|
-
)
|
|
2
|
+
from typing import TYPE_CHECKING, Union
|
|
3
|
+
import typing
|
|
16
4
|
|
|
17
|
-
from judgeval.tracer.llm.llm_anthropic.
|
|
18
|
-
|
|
19
|
-
|
|
5
|
+
from judgeval.tracer.llm.llm_anthropic.messages import (
|
|
6
|
+
wrap_messages_create_sync,
|
|
7
|
+
wrap_messages_create_async,
|
|
8
|
+
)
|
|
9
|
+
from judgeval.tracer.llm.llm_anthropic.messages_stream import (
|
|
10
|
+
wrap_messages_stream_sync,
|
|
11
|
+
wrap_messages_stream_async,
|
|
20
12
|
)
|
|
21
|
-
from judgeval.tracer.managers import sync_span_context, async_span_context
|
|
22
|
-
from judgeval.logger import judgeval_logger
|
|
23
|
-
from judgeval.tracer.keys import AttributeKeys
|
|
24
|
-
from judgeval.tracer.utils import set_span_attribute
|
|
25
|
-
from judgeval.utils.serialize import safe_serialize
|
|
26
13
|
|
|
27
14
|
if TYPE_CHECKING:
|
|
28
15
|
from judgeval.tracer import Tracer
|
|
29
|
-
from
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# Content block protocols
|
|
33
|
-
@runtime_checkable
|
|
34
|
-
class AnthropicContentBlock(Protocol):
|
|
35
|
-
text: str
|
|
36
|
-
type: str
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
# Usage protocols
|
|
40
|
-
@runtime_checkable
|
|
41
|
-
class AnthropicUsage(Protocol):
|
|
42
|
-
input_tokens: int
|
|
43
|
-
output_tokens: int
|
|
44
|
-
cache_read_input_tokens: Optional[int]
|
|
45
|
-
cache_creation_input_tokens: Optional[int]
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# Message protocols
|
|
49
|
-
@runtime_checkable
|
|
50
|
-
class AnthropicMessage(Protocol):
|
|
51
|
-
content: Sequence[AnthropicContentBlock]
|
|
52
|
-
usage: AnthropicUsage
|
|
53
|
-
model: Optional[str]
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# Stream event protocols
|
|
57
|
-
@runtime_checkable
|
|
58
|
-
class AnthropicStreamDelta(Protocol):
|
|
59
|
-
text: Optional[str]
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
@runtime_checkable
|
|
63
|
-
class AnthropicStreamEvent(Protocol):
|
|
64
|
-
type: str
|
|
65
|
-
delta: Optional[AnthropicStreamDelta]
|
|
66
|
-
message: Optional[AnthropicMessage]
|
|
67
|
-
usage: Optional[AnthropicUsage]
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# Client protocols
|
|
71
|
-
@runtime_checkable
|
|
72
|
-
class AnthropicClient(Protocol):
|
|
73
|
-
pass
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
@runtime_checkable
|
|
77
|
-
class AnthropicAsyncClient(Protocol):
|
|
78
|
-
pass
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
# Generic client type bound to both sync and async client protocols
|
|
82
|
-
TClient = TypeVar("TClient", bound=Union[AnthropicClient, AnthropicAsyncClient])
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# Union types
|
|
86
|
-
AnthropicResponseType = AnthropicMessage
|
|
87
|
-
AnthropicStreamType = Union[
|
|
88
|
-
Iterator[AnthropicStreamEvent], AsyncIterator[AnthropicStreamEvent]
|
|
89
|
-
]
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def _extract_anthropic_content(chunk: AnthropicStreamEvent) -> str:
|
|
93
|
-
if hasattr(chunk, "delta") and chunk.delta and hasattr(chunk.delta, "text"):
|
|
94
|
-
return chunk.delta.text or ""
|
|
95
|
-
|
|
96
|
-
if isinstance(chunk, AnthropicStreamEvent) and chunk.type == "content_block_delta":
|
|
97
|
-
if chunk.delta and chunk.delta.text:
|
|
98
|
-
return chunk.delta.text
|
|
99
|
-
return ""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def _extract_anthropic_tokens(usage_data: AnthropicUsage) -> Tuple[int, int, int, int]:
|
|
103
|
-
prompt_tokens = usage_data.input_tokens or 0
|
|
104
|
-
completion_tokens = usage_data.output_tokens or 0
|
|
105
|
-
cache_read_input_tokens = usage_data.cache_read_input_tokens or 0
|
|
106
|
-
cache_creation_input_tokens = usage_data.cache_creation_input_tokens or 0
|
|
107
|
-
|
|
108
|
-
return (
|
|
109
|
-
prompt_tokens,
|
|
110
|
-
completion_tokens,
|
|
111
|
-
cache_read_input_tokens,
|
|
112
|
-
cache_creation_input_tokens,
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def _extract_anthropic_chunk_usage(
|
|
117
|
-
chunk: AnthropicStreamEvent,
|
|
118
|
-
) -> Optional[AnthropicUsage]:
|
|
119
|
-
if hasattr(chunk, "usage") and chunk.usage:
|
|
120
|
-
return chunk.usage
|
|
121
|
-
|
|
122
|
-
if isinstance(chunk, AnthropicStreamEvent):
|
|
123
|
-
if chunk.type == "message_start" and chunk.message:
|
|
124
|
-
return chunk.message.usage
|
|
125
|
-
elif chunk.type in ("message_delta", "message_stop"):
|
|
126
|
-
return chunk.usage
|
|
127
|
-
return None
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def _format_anthropic_output(
|
|
131
|
-
response: AnthropicMessage,
|
|
132
|
-
) -> Tuple[Optional[Union[str, list]], Optional[AnthropicUsage]]:
|
|
133
|
-
message_content: Optional[Union[str, list]] = None
|
|
134
|
-
usage_data: Optional[AnthropicUsage] = None
|
|
135
|
-
|
|
136
|
-
try:
|
|
137
|
-
if isinstance(response, AnthropicMessage):
|
|
138
|
-
usage_data = response.usage
|
|
139
|
-
if response.content:
|
|
140
|
-
content_blocks = []
|
|
141
|
-
for block in response.content:
|
|
142
|
-
if isinstance(block, AnthropicContentBlock):
|
|
143
|
-
block_type = getattr(block, "type", None)
|
|
144
|
-
if block_type == "text":
|
|
145
|
-
block_data = {
|
|
146
|
-
"type": "text",
|
|
147
|
-
"text": getattr(block, "text", ""),
|
|
148
|
-
}
|
|
149
|
-
# Add citations if present
|
|
150
|
-
if hasattr(block, "citations"):
|
|
151
|
-
block_data["citations"] = getattr(
|
|
152
|
-
block, "citations", None
|
|
153
|
-
)
|
|
154
|
-
elif block_type == "tool_use":
|
|
155
|
-
block_data = {
|
|
156
|
-
"type": "tool_use",
|
|
157
|
-
"id": getattr(block, "id", None),
|
|
158
|
-
"name": getattr(block, "name", None),
|
|
159
|
-
"input": getattr(block, "input", None),
|
|
160
|
-
}
|
|
161
|
-
elif block_type == "tool_result":
|
|
162
|
-
block_data = {
|
|
163
|
-
"type": "tool_result",
|
|
164
|
-
"tool_use_id": getattr(block, "tool_use_id", None),
|
|
165
|
-
"content": getattr(block, "content", None),
|
|
166
|
-
}
|
|
167
|
-
else:
|
|
168
|
-
# Handle unknown block types
|
|
169
|
-
block_data = {"type": block_type}
|
|
170
|
-
for attr in [
|
|
171
|
-
"id",
|
|
172
|
-
"text",
|
|
173
|
-
"name",
|
|
174
|
-
"input",
|
|
175
|
-
"content",
|
|
176
|
-
"tool_use_id",
|
|
177
|
-
"citations",
|
|
178
|
-
]:
|
|
179
|
-
if hasattr(block, attr):
|
|
180
|
-
block_data[attr] = getattr(block, attr)
|
|
181
|
-
|
|
182
|
-
content_blocks.append(block_data)
|
|
183
|
-
|
|
184
|
-
# Return structured data instead of string
|
|
185
|
-
message_content = content_blocks if content_blocks else None
|
|
186
|
-
except (AttributeError, IndexError, TypeError):
|
|
187
|
-
pass
|
|
188
|
-
|
|
189
|
-
return message_content, usage_data
|
|
16
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
190
17
|
|
|
18
|
+
TClient = Union[Anthropic, AsyncAnthropic]
|
|
191
19
|
|
|
192
|
-
class TracedAnthropicGenerator:
|
|
193
|
-
def __init__(
|
|
194
|
-
self,
|
|
195
|
-
tracer: Tracer,
|
|
196
|
-
generator: Iterator[AnthropicStreamEvent],
|
|
197
|
-
client: AnthropicClient,
|
|
198
|
-
span: Span,
|
|
199
|
-
model_name: str,
|
|
200
|
-
):
|
|
201
|
-
self.tracer = tracer
|
|
202
|
-
self.generator = generator
|
|
203
|
-
self.client = client
|
|
204
|
-
self.span = span
|
|
205
|
-
self.model_name = model_name
|
|
206
|
-
self.accumulated_content = ""
|
|
207
20
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
try:
|
|
213
|
-
chunk = next(self.generator)
|
|
214
|
-
content = _extract_anthropic_content(chunk)
|
|
215
|
-
if content:
|
|
216
|
-
self.accumulated_content += content
|
|
217
|
-
|
|
218
|
-
usage_data = _extract_anthropic_chunk_usage(chunk)
|
|
219
|
-
if usage_data:
|
|
220
|
-
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
221
|
-
_extract_anthropic_tokens(usage_data)
|
|
222
|
-
)
|
|
223
|
-
set_span_attribute(
|
|
224
|
-
self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
|
|
225
|
-
)
|
|
226
|
-
set_span_attribute(
|
|
227
|
-
self.span,
|
|
228
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
229
|
-
completion_tokens,
|
|
230
|
-
)
|
|
231
|
-
set_span_attribute(
|
|
232
|
-
self.span,
|
|
233
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
234
|
-
cache_read,
|
|
235
|
-
)
|
|
236
|
-
set_span_attribute(
|
|
237
|
-
self.span,
|
|
238
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
239
|
-
cache_creation,
|
|
240
|
-
)
|
|
241
|
-
set_span_attribute(
|
|
242
|
-
self.span,
|
|
243
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
244
|
-
safe_serialize(usage_data),
|
|
245
|
-
)
|
|
246
|
-
return chunk
|
|
247
|
-
except StopIteration:
|
|
248
|
-
set_span_attribute(
|
|
249
|
-
self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
|
|
250
|
-
)
|
|
251
|
-
self.span.end()
|
|
252
|
-
raise
|
|
253
|
-
except Exception as e:
|
|
254
|
-
if self.span:
|
|
255
|
-
self.span.record_exception(e)
|
|
256
|
-
self.span.end()
|
|
257
|
-
raise
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
class TracedAnthropicAsyncGenerator:
|
|
261
|
-
def __init__(
|
|
262
|
-
self,
|
|
263
|
-
tracer: Tracer,
|
|
264
|
-
async_generator: AsyncIterator[AnthropicStreamEvent],
|
|
265
|
-
client: AnthropicAsyncClient,
|
|
266
|
-
span: Span,
|
|
267
|
-
model_name: str,
|
|
268
|
-
):
|
|
269
|
-
self.tracer = tracer
|
|
270
|
-
self.async_generator = async_generator
|
|
271
|
-
self.client = client
|
|
272
|
-
self.span = span
|
|
273
|
-
self.model_name = model_name
|
|
274
|
-
self.accumulated_content = ""
|
|
275
|
-
|
|
276
|
-
def __aiter__(self) -> AsyncIterator[AnthropicStreamEvent]:
|
|
277
|
-
return self
|
|
278
|
-
|
|
279
|
-
async def __anext__(self) -> AnthropicStreamEvent:
|
|
280
|
-
try:
|
|
281
|
-
chunk = await self.async_generator.__anext__()
|
|
282
|
-
except StopAsyncIteration:
|
|
283
|
-
set_span_attribute(
|
|
284
|
-
self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
|
|
285
|
-
)
|
|
286
|
-
self.span.end()
|
|
287
|
-
raise
|
|
288
|
-
except Exception as e:
|
|
289
|
-
if self.span:
|
|
290
|
-
self.span.record_exception(e)
|
|
291
|
-
self.span.end()
|
|
292
|
-
raise
|
|
293
|
-
|
|
294
|
-
try:
|
|
295
|
-
content = _extract_anthropic_content(chunk)
|
|
296
|
-
if content:
|
|
297
|
-
self.accumulated_content += content
|
|
298
|
-
|
|
299
|
-
usage_data = _extract_anthropic_chunk_usage(chunk)
|
|
300
|
-
if usage_data:
|
|
301
|
-
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
|
302
|
-
_extract_anthropic_tokens(usage_data)
|
|
303
|
-
)
|
|
304
|
-
set_span_attribute(
|
|
305
|
-
self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
|
|
306
|
-
)
|
|
307
|
-
set_span_attribute(
|
|
308
|
-
self.span,
|
|
309
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
310
|
-
completion_tokens,
|
|
311
|
-
)
|
|
312
|
-
set_span_attribute(
|
|
313
|
-
self.span,
|
|
314
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
315
|
-
cache_read,
|
|
316
|
-
)
|
|
317
|
-
set_span_attribute(
|
|
318
|
-
self.span,
|
|
319
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
320
|
-
cache_creation,
|
|
321
|
-
)
|
|
322
|
-
set_span_attribute(
|
|
323
|
-
self.span,
|
|
324
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
325
|
-
safe_serialize(usage_data),
|
|
326
|
-
)
|
|
327
|
-
except Exception as e:
|
|
328
|
-
if self.span:
|
|
329
|
-
self.span.end()
|
|
330
|
-
judgeval_logger.error(
|
|
331
|
-
f"[anthropic wrapped_async] Error adding span metadata: {e}"
|
|
332
|
-
)
|
|
333
|
-
finally:
|
|
334
|
-
return chunk
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
class TracedAnthropicSyncContextManager:
|
|
338
|
-
def __init__(
|
|
339
|
-
self,
|
|
340
|
-
tracer: Tracer,
|
|
341
|
-
context_manager,
|
|
342
|
-
client: AnthropicClient,
|
|
343
|
-
span: Span,
|
|
344
|
-
model_name: str,
|
|
345
|
-
):
|
|
346
|
-
self.tracer = tracer
|
|
347
|
-
self.context_manager = context_manager
|
|
348
|
-
self.client = client
|
|
349
|
-
self.span = span
|
|
350
|
-
self.model_name = model_name
|
|
351
|
-
|
|
352
|
-
def __enter__(self):
|
|
353
|
-
self.stream = self.context_manager.__enter__()
|
|
354
|
-
return TracedAnthropicGenerator(
|
|
355
|
-
self.tracer, self.stream, self.client, self.span, self.model_name
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
359
|
-
return self.context_manager.__exit__(exc_type, exc_val, exc_tb)
|
|
21
|
+
def wrap_anthropic_client_sync(tracer: Tracer, client: Anthropic) -> Anthropic:
|
|
22
|
+
wrap_messages_create_sync(tracer, client)
|
|
23
|
+
wrap_messages_stream_sync(tracer, client)
|
|
24
|
+
return client
|
|
360
25
|
|
|
361
26
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
span: Span,
|
|
369
|
-
model_name: str,
|
|
370
|
-
):
|
|
371
|
-
self.tracer = tracer
|
|
372
|
-
self.context_manager = context_manager
|
|
373
|
-
self.client = client
|
|
374
|
-
self.span = span
|
|
375
|
-
self.model_name = model_name
|
|
27
|
+
def wrap_anthropic_client_async(
|
|
28
|
+
tracer: Tracer, client: AsyncAnthropic
|
|
29
|
+
) -> AsyncAnthropic:
|
|
30
|
+
wrap_messages_create_async(tracer, client)
|
|
31
|
+
wrap_messages_stream_async(tracer, client)
|
|
32
|
+
return client
|
|
376
33
|
|
|
377
|
-
async def __aenter__(self):
|
|
378
|
-
self.stream = await self.context_manager.__aenter__()
|
|
379
|
-
return TracedAnthropicAsyncGenerator(
|
|
380
|
-
self.tracer, self.stream, self.client, self.span, self.model_name
|
|
381
|
-
)
|
|
382
34
|
|
|
383
|
-
|
|
384
|
-
|
|
35
|
+
@typing.overload
|
|
36
|
+
def wrap_anthropic_client(tracer: Tracer, client: Anthropic) -> Anthropic: ...
|
|
37
|
+
@typing.overload
|
|
38
|
+
def wrap_anthropic_client(tracer: Tracer, client: AsyncAnthropic) -> AsyncAnthropic: ...
|
|
385
39
|
|
|
386
40
|
|
|
387
41
|
def wrap_anthropic_client(tracer: Tracer, client: TClient) -> TClient:
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
def wrapper(*args, **kwargs):
|
|
391
|
-
if kwargs.get("stream", False):
|
|
392
|
-
span = tracer.get_tracer().start_span(
|
|
393
|
-
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
394
|
-
)
|
|
395
|
-
tracer.add_agent_attributes_to_span(span)
|
|
396
|
-
set_span_attribute(
|
|
397
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
398
|
-
)
|
|
399
|
-
model_name = kwargs.get("model", "")
|
|
400
|
-
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
|
401
|
-
stream_response = function(*args, **kwargs)
|
|
402
|
-
return TracedAnthropicGenerator(
|
|
403
|
-
tracer, stream_response, client, span, model_name
|
|
404
|
-
)
|
|
405
|
-
else:
|
|
406
|
-
with sync_span_context(
|
|
407
|
-
tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
408
|
-
) as span:
|
|
409
|
-
try:
|
|
410
|
-
tracer.add_agent_attributes_to_span(span)
|
|
411
|
-
set_span_attribute(
|
|
412
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
413
|
-
)
|
|
414
|
-
model_name = kwargs.get("model", "")
|
|
415
|
-
set_span_attribute(
|
|
416
|
-
span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
|
|
417
|
-
)
|
|
418
|
-
except Exception as e:
|
|
419
|
-
judgeval_logger.error(
|
|
420
|
-
f"[anthropic wrapped] Error adding span metadata: {e}"
|
|
421
|
-
)
|
|
422
|
-
|
|
423
|
-
response = function(*args, **kwargs)
|
|
424
|
-
|
|
425
|
-
try:
|
|
426
|
-
if isinstance(response, AnthropicMessage):
|
|
427
|
-
output, usage_data = _format_anthropic_output(response)
|
|
428
|
-
# Serialize structured data to JSON for span attribute
|
|
429
|
-
if isinstance(output, list):
|
|
430
|
-
output_str = safe_serialize(output)
|
|
431
|
-
else:
|
|
432
|
-
output_str = str(output) if output is not None else None
|
|
433
|
-
set_span_attribute(
|
|
434
|
-
span, AttributeKeys.GEN_AI_COMPLETION, output_str
|
|
435
|
-
)
|
|
42
|
+
from judgeval.tracer.llm.llm_anthropic.config import HAS_ANTHROPIC
|
|
43
|
+
from judgeval.logger import judgeval_logger
|
|
436
44
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
cache_read,
|
|
442
|
-
cache_creation,
|
|
443
|
-
) = _extract_anthropic_tokens(usage_data)
|
|
444
|
-
set_span_attribute(
|
|
445
|
-
span,
|
|
446
|
-
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
|
447
|
-
prompt_tokens,
|
|
448
|
-
)
|
|
449
|
-
set_span_attribute(
|
|
450
|
-
span,
|
|
451
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
452
|
-
completion_tokens,
|
|
453
|
-
)
|
|
454
|
-
set_span_attribute(
|
|
455
|
-
span,
|
|
456
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
457
|
-
cache_read,
|
|
458
|
-
)
|
|
459
|
-
set_span_attribute(
|
|
460
|
-
span,
|
|
461
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
462
|
-
cache_creation,
|
|
463
|
-
)
|
|
464
|
-
set_span_attribute(
|
|
465
|
-
span,
|
|
466
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
467
|
-
safe_serialize(usage_data),
|
|
468
|
-
)
|
|
469
|
-
set_span_attribute(
|
|
470
|
-
span,
|
|
471
|
-
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
|
472
|
-
getattr(response, "model", model_name),
|
|
473
|
-
)
|
|
474
|
-
except Exception as e:
|
|
475
|
-
judgeval_logger.error(
|
|
476
|
-
f"[anthropic wrapped] Error adding span metadata: {e}"
|
|
477
|
-
)
|
|
478
|
-
finally:
|
|
479
|
-
return response
|
|
480
|
-
|
|
481
|
-
return wrapper
|
|
482
|
-
|
|
483
|
-
def wrapped_async(function: Callable, span_name: str):
|
|
484
|
-
@functools.wraps(function)
|
|
485
|
-
async def wrapper(*args, **kwargs):
|
|
486
|
-
if kwargs.get("stream", False):
|
|
487
|
-
span = tracer.get_tracer().start_span(
|
|
488
|
-
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
489
|
-
)
|
|
490
|
-
tracer.add_agent_attributes_to_span(span)
|
|
491
|
-
set_span_attribute(
|
|
492
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
493
|
-
)
|
|
494
|
-
model_name = kwargs.get("model", "")
|
|
495
|
-
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
|
496
|
-
stream_response = await function(*args, **kwargs)
|
|
497
|
-
return TracedAnthropicAsyncGenerator(
|
|
498
|
-
tracer, stream_response, client, span, model_name
|
|
499
|
-
)
|
|
500
|
-
else:
|
|
501
|
-
async with async_span_context(
|
|
502
|
-
tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
503
|
-
) as span:
|
|
504
|
-
try:
|
|
505
|
-
tracer.add_agent_attributes_to_span(span)
|
|
506
|
-
set_span_attribute(
|
|
507
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
508
|
-
)
|
|
509
|
-
model_name = kwargs.get("model", "")
|
|
510
|
-
set_span_attribute(
|
|
511
|
-
span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
|
|
512
|
-
)
|
|
513
|
-
except Exception as e:
|
|
514
|
-
judgeval_logger.error(
|
|
515
|
-
f"[anthropic wrapped_async] Error adding span metadata: {e}"
|
|
516
|
-
)
|
|
517
|
-
|
|
518
|
-
response = await function(*args, **kwargs)
|
|
519
|
-
|
|
520
|
-
try:
|
|
521
|
-
if isinstance(response, AnthropicMessage):
|
|
522
|
-
output, usage_data = _format_anthropic_output(response)
|
|
523
|
-
# Serialize structured data to JSON for span attribute
|
|
524
|
-
if isinstance(output, list):
|
|
525
|
-
output_str = safe_serialize(output)
|
|
526
|
-
else:
|
|
527
|
-
output_str = str(output) if output is not None else None
|
|
528
|
-
set_span_attribute(
|
|
529
|
-
span, AttributeKeys.GEN_AI_COMPLETION, output_str
|
|
530
|
-
)
|
|
531
|
-
|
|
532
|
-
if usage_data:
|
|
533
|
-
(
|
|
534
|
-
prompt_tokens,
|
|
535
|
-
completion_tokens,
|
|
536
|
-
cache_read,
|
|
537
|
-
cache_creation,
|
|
538
|
-
) = _extract_anthropic_tokens(usage_data)
|
|
539
|
-
set_span_attribute(
|
|
540
|
-
span,
|
|
541
|
-
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
|
542
|
-
prompt_tokens,
|
|
543
|
-
)
|
|
544
|
-
set_span_attribute(
|
|
545
|
-
span,
|
|
546
|
-
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
547
|
-
completion_tokens,
|
|
548
|
-
)
|
|
549
|
-
set_span_attribute(
|
|
550
|
-
span,
|
|
551
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
552
|
-
cache_read,
|
|
553
|
-
)
|
|
554
|
-
set_span_attribute(
|
|
555
|
-
span,
|
|
556
|
-
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
557
|
-
cache_creation,
|
|
558
|
-
)
|
|
559
|
-
set_span_attribute(
|
|
560
|
-
span,
|
|
561
|
-
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
562
|
-
safe_serialize(usage_data),
|
|
563
|
-
)
|
|
564
|
-
set_span_attribute(
|
|
565
|
-
span,
|
|
566
|
-
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
|
567
|
-
getattr(response, "model", model_name),
|
|
568
|
-
)
|
|
569
|
-
except Exception as e:
|
|
570
|
-
judgeval_logger.error(
|
|
571
|
-
f"[anthropic wrapped_async] Error adding span metadata: {e}"
|
|
572
|
-
)
|
|
573
|
-
finally:
|
|
574
|
-
return response
|
|
575
|
-
|
|
576
|
-
return wrapper
|
|
577
|
-
|
|
578
|
-
def wrapped_sync_context_manager(function, span_name: str):
|
|
579
|
-
@functools.wraps(function)
|
|
580
|
-
def wrapper(*args, **kwargs):
|
|
581
|
-
span = tracer.get_tracer().start_span(
|
|
582
|
-
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
583
|
-
)
|
|
584
|
-
tracer.add_agent_attributes_to_span(span)
|
|
585
|
-
set_span_attribute(
|
|
586
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
587
|
-
)
|
|
588
|
-
model_name = kwargs.get("model", "")
|
|
589
|
-
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
|
590
|
-
|
|
591
|
-
original_context_manager = function(*args, **kwargs)
|
|
592
|
-
return TracedAnthropicSyncContextManager(
|
|
593
|
-
tracer, original_context_manager, client, span, model_name
|
|
594
|
-
)
|
|
595
|
-
|
|
596
|
-
return wrapper
|
|
597
|
-
|
|
598
|
-
def wrapped_async_context_manager(function, span_name: str):
|
|
599
|
-
@functools.wraps(function)
|
|
600
|
-
def wrapper(*args, **kwargs):
|
|
601
|
-
span = tracer.get_tracer().start_span(
|
|
602
|
-
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
603
|
-
)
|
|
604
|
-
tracer.add_agent_attributes_to_span(span)
|
|
605
|
-
set_span_attribute(
|
|
606
|
-
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
607
|
-
)
|
|
608
|
-
model_name = kwargs.get("model", "")
|
|
609
|
-
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
|
610
|
-
|
|
611
|
-
original_context_manager = function(*args, **kwargs)
|
|
612
|
-
return TracedAnthropicAsyncContextManager(
|
|
613
|
-
tracer, original_context_manager, client, span, model_name
|
|
614
|
-
)
|
|
615
|
-
|
|
616
|
-
return wrapper
|
|
617
|
-
|
|
618
|
-
span_name = "ANTHROPIC_API_CALL"
|
|
619
|
-
if anthropic_Anthropic is not None and isinstance(client, anthropic_Anthropic):
|
|
620
|
-
setattr(client.messages, "create", wrapped(client.messages.create, span_name))
|
|
621
|
-
setattr(
|
|
622
|
-
client.messages,
|
|
623
|
-
"stream",
|
|
624
|
-
wrapped_sync_context_manager(client.messages.stream, span_name),
|
|
625
|
-
)
|
|
626
|
-
elif anthropic_AsyncAnthropic is not None and isinstance(
|
|
627
|
-
client, anthropic_AsyncAnthropic
|
|
628
|
-
):
|
|
629
|
-
setattr(
|
|
630
|
-
client.messages,
|
|
631
|
-
"create",
|
|
632
|
-
wrapped_async(client.messages.create, span_name),
|
|
633
|
-
)
|
|
634
|
-
setattr(
|
|
635
|
-
client.messages,
|
|
636
|
-
"stream",
|
|
637
|
-
wrapped_async_context_manager(client.messages.stream, span_name),
|
|
45
|
+
if not HAS_ANTHROPIC:
|
|
46
|
+
judgeval_logger.error(
|
|
47
|
+
"Cannot wrap Anthropic client: 'anthropic' library not installed. "
|
|
48
|
+
"Install it with: pip install anthropic"
|
|
638
49
|
)
|
|
50
|
+
return client
|
|
639
51
|
|
|
640
|
-
|
|
52
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
53
|
+
|
|
54
|
+
if isinstance(client, AsyncAnthropic):
|
|
55
|
+
return wrap_anthropic_client_async(tracer, client)
|
|
56
|
+
elif isinstance(client, Anthropic):
|
|
57
|
+
return wrap_anthropic_client_sync(tracer, client)
|
|
58
|
+
else:
|
|
59
|
+
raise TypeError(f"Invalid client type: {type(client)}")
|