judgeval 0.16.6__py3-none-any.whl → 0.16.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/api/api_types.py +1 -2
- judgeval/data/judgment_types.py +1 -2
- judgeval/tracer/__init__.py +7 -52
- judgeval/tracer/llm/config.py +12 -44
- judgeval/tracer/llm/constants.py +0 -1
- judgeval/tracer/llm/llm_anthropic/config.py +3 -17
- judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
- judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval/tracer/llm/llm_google/config.py +3 -21
- judgeval/tracer/llm/llm_google/generate_content.py +125 -0
- judgeval/tracer/llm/llm_google/wrapper.py +19 -454
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
- judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
- judgeval/tracer/llm/llm_openai/config.py +3 -29
- judgeval/tracer/llm/llm_openai/responses.py +444 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
- judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
- judgeval/tracer/llm/llm_together/config.py +3 -20
- judgeval/tracer/llm/llm_together/wrapper.py +34 -485
- judgeval/tracer/llm/providers.py +4 -48
- judgeval/utils/decorators/dont_throw.py +30 -14
- judgeval/utils/wrappers/README.md +3 -0
- judgeval/utils/wrappers/__init__.py +15 -0
- judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval/utils/wrappers/utils.py +35 -0
- judgeval/version.py +1 -1
- {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/METADATA +1 -1
- {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/RECORD +40 -27
- judgeval/tracer/llm/llm_groq/config.py +0 -23
- judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
- judgeval/tracer/local_eval_queue.py +0 -199
- /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
- {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/WHEEL +0 -0
- {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/entry_points.txt +0 -0
- {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import (
|
|
3
|
+
TYPE_CHECKING,
|
|
4
|
+
Any,
|
|
5
|
+
Dict,
|
|
6
|
+
Generator,
|
|
7
|
+
AsyncGenerator,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from judgeval.tracer.keys import AttributeKeys
|
|
11
|
+
from judgeval.tracer.utils import set_span_attribute
|
|
12
|
+
from judgeval.utils.serialize import safe_serialize
|
|
13
|
+
from judgeval.utils.wrappers import (
|
|
14
|
+
mutable_wrap_sync,
|
|
15
|
+
immutable_wrap_sync_iterator,
|
|
16
|
+
immutable_wrap_async_iterator,
|
|
17
|
+
)
|
|
18
|
+
from judgeval.tracer.llm.llm_anthropic.messages import (
|
|
19
|
+
_extract_anthropic_tokens,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from judgeval.tracer import Tracer
|
|
24
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
25
|
+
from anthropic.lib.streaming import (
|
|
26
|
+
MessageStreamManager,
|
|
27
|
+
AsyncMessageStreamManager,
|
|
28
|
+
MessageStream,
|
|
29
|
+
AsyncMessageStream,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
|
|
34
|
+
original_func = client.messages.stream
|
|
35
|
+
|
|
36
|
+
def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
|
|
37
|
+
ctx["span"] = tracer.get_tracer().start_span(
|
|
38
|
+
"ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
39
|
+
)
|
|
40
|
+
tracer.add_agent_attributes_to_span(ctx["span"])
|
|
41
|
+
set_span_attribute(
|
|
42
|
+
ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
ctx["model_name"] = kwargs.get("model", "")
|
|
46
|
+
set_span_attribute(
|
|
47
|
+
ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
|
|
48
|
+
)
|
|
49
|
+
ctx["accumulated_content"] = ""
|
|
50
|
+
|
|
51
|
+
def mutate_hook(
|
|
52
|
+
ctx: Dict[str, Any], result: MessageStreamManager
|
|
53
|
+
) -> MessageStreamManager:
|
|
54
|
+
original_manager = result
|
|
55
|
+
|
|
56
|
+
class WrappedMessageStreamManager:
|
|
57
|
+
def __init__(self, manager: MessageStreamManager):
|
|
58
|
+
self._manager = manager
|
|
59
|
+
|
|
60
|
+
def __enter__(self) -> MessageStream:
|
|
61
|
+
stream = self._manager.__enter__()
|
|
62
|
+
post_hook_enter_impl(stream)
|
|
63
|
+
return stream
|
|
64
|
+
|
|
65
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
66
|
+
result = self._manager.__exit__(exc_type, exc_val, exc_tb)
|
|
67
|
+
post_hook_exit_impl()
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
def __getattr__(self, name):
|
|
71
|
+
return getattr(self._manager, name)
|
|
72
|
+
|
|
73
|
+
def post_hook_enter_impl(stream: MessageStream) -> None:
|
|
74
|
+
ctx["stream"] = stream
|
|
75
|
+
original_text_stream = stream.text_stream
|
|
76
|
+
|
|
77
|
+
def traced_text_stream() -> Generator[str, None, None]:
|
|
78
|
+
for text_chunk in original_text_stream:
|
|
79
|
+
yield text_chunk
|
|
80
|
+
|
|
81
|
+
def yield_hook(inner_ctx: Dict[str, Any], text_chunk: str) -> None:
|
|
82
|
+
span = ctx.get("span")
|
|
83
|
+
if span and text_chunk:
|
|
84
|
+
ctx["accumulated_content"] = (
|
|
85
|
+
ctx.get("accumulated_content", "") + text_chunk
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
|
|
92
|
+
span = ctx.get("span")
|
|
93
|
+
if span:
|
|
94
|
+
span.record_exception(error)
|
|
95
|
+
|
|
96
|
+
def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
wrapped_text_stream = immutable_wrap_sync_iterator(
|
|
100
|
+
traced_text_stream,
|
|
101
|
+
yield_hook=yield_hook,
|
|
102
|
+
post_hook=post_hook_inner,
|
|
103
|
+
error_hook=error_hook_inner,
|
|
104
|
+
finally_hook=finally_hook_inner,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
stream.text_stream = wrapped_text_stream()
|
|
108
|
+
|
|
109
|
+
def post_hook_exit_impl() -> None:
|
|
110
|
+
span = ctx.get("span")
|
|
111
|
+
if span:
|
|
112
|
+
accumulated = ctx.get("accumulated_content", "")
|
|
113
|
+
set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
|
|
114
|
+
|
|
115
|
+
stream: MessageStream | None = ctx.get("stream")
|
|
116
|
+
if stream:
|
|
117
|
+
try:
|
|
118
|
+
final_message = stream.get_final_message()
|
|
119
|
+
if final_message.usage:
|
|
120
|
+
(
|
|
121
|
+
prompt_tokens,
|
|
122
|
+
completion_tokens,
|
|
123
|
+
cache_read,
|
|
124
|
+
cache_creation,
|
|
125
|
+
) = _extract_anthropic_tokens(final_message.usage)
|
|
126
|
+
set_span_attribute(
|
|
127
|
+
span,
|
|
128
|
+
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
|
129
|
+
prompt_tokens,
|
|
130
|
+
)
|
|
131
|
+
set_span_attribute(
|
|
132
|
+
span,
|
|
133
|
+
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
134
|
+
completion_tokens,
|
|
135
|
+
)
|
|
136
|
+
set_span_attribute(
|
|
137
|
+
span,
|
|
138
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
139
|
+
cache_read,
|
|
140
|
+
)
|
|
141
|
+
set_span_attribute(
|
|
142
|
+
span,
|
|
143
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
144
|
+
cache_creation,
|
|
145
|
+
)
|
|
146
|
+
set_span_attribute(
|
|
147
|
+
span,
|
|
148
|
+
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
149
|
+
safe_serialize(final_message.usage),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
set_span_attribute(
|
|
153
|
+
span,
|
|
154
|
+
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
|
155
|
+
final_message.model,
|
|
156
|
+
)
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
span.end()
|
|
161
|
+
|
|
162
|
+
return WrappedMessageStreamManager(original_manager) # type: ignore[return-value]
|
|
163
|
+
|
|
164
|
+
def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
|
|
165
|
+
span = ctx.get("span")
|
|
166
|
+
if span:
|
|
167
|
+
span.record_exception(error)
|
|
168
|
+
|
|
169
|
+
wrapped = mutable_wrap_sync(
|
|
170
|
+
original_func,
|
|
171
|
+
pre_hook=pre_hook,
|
|
172
|
+
mutate_hook=mutate_hook,
|
|
173
|
+
error_hook=error_hook,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
setattr(client.messages, "stream", wrapped)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
|
|
180
|
+
original_func = client.messages.stream
|
|
181
|
+
|
|
182
|
+
def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
|
|
183
|
+
ctx["span"] = tracer.get_tracer().start_span(
|
|
184
|
+
"ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
|
185
|
+
)
|
|
186
|
+
tracer.add_agent_attributes_to_span(ctx["span"])
|
|
187
|
+
set_span_attribute(
|
|
188
|
+
ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
ctx["model_name"] = kwargs.get("model", "")
|
|
192
|
+
set_span_attribute(
|
|
193
|
+
ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
|
|
194
|
+
)
|
|
195
|
+
ctx["accumulated_content"] = ""
|
|
196
|
+
|
|
197
|
+
def mutate_hook(
|
|
198
|
+
ctx: Dict[str, Any], result: AsyncMessageStreamManager
|
|
199
|
+
) -> AsyncMessageStreamManager:
|
|
200
|
+
original_manager = result
|
|
201
|
+
|
|
202
|
+
class WrappedAsyncMessageStreamManager:
|
|
203
|
+
def __init__(self, manager: AsyncMessageStreamManager):
|
|
204
|
+
self._manager = manager
|
|
205
|
+
|
|
206
|
+
async def __aenter__(self) -> AsyncMessageStream:
|
|
207
|
+
stream = await self._manager.__aenter__()
|
|
208
|
+
post_hook_aenter_impl(stream)
|
|
209
|
+
return stream
|
|
210
|
+
|
|
211
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
212
|
+
result = await self._manager.__aexit__(exc_type, exc_val, exc_tb)
|
|
213
|
+
await post_hook_aexit_impl()
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
def __getattr__(self, name):
|
|
217
|
+
return getattr(self._manager, name)
|
|
218
|
+
|
|
219
|
+
def post_hook_aenter_impl(stream: AsyncMessageStream) -> None:
|
|
220
|
+
ctx["stream"] = stream
|
|
221
|
+
original_text_stream = stream.text_stream
|
|
222
|
+
|
|
223
|
+
async def traced_text_stream() -> AsyncGenerator[str, None]:
|
|
224
|
+
async for text_chunk in original_text_stream:
|
|
225
|
+
yield text_chunk
|
|
226
|
+
|
|
227
|
+
def yield_hook(inner_ctx: Dict[str, Any], text_chunk: str) -> None:
|
|
228
|
+
span = ctx.get("span")
|
|
229
|
+
if span and text_chunk:
|
|
230
|
+
ctx["accumulated_content"] = (
|
|
231
|
+
ctx.get("accumulated_content", "") + text_chunk
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
|
|
238
|
+
span = ctx.get("span")
|
|
239
|
+
if span:
|
|
240
|
+
span.record_exception(error)
|
|
241
|
+
|
|
242
|
+
def finally_hook_inner_sync(inner_ctx: Dict[str, Any]) -> None:
|
|
243
|
+
pass
|
|
244
|
+
|
|
245
|
+
wrapped_text_stream = immutable_wrap_async_iterator(
|
|
246
|
+
traced_text_stream,
|
|
247
|
+
yield_hook=yield_hook,
|
|
248
|
+
post_hook=post_hook_inner,
|
|
249
|
+
error_hook=error_hook_inner,
|
|
250
|
+
finally_hook=finally_hook_inner_sync,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
stream.text_stream = wrapped_text_stream()
|
|
254
|
+
|
|
255
|
+
async def post_hook_aexit_impl() -> None:
|
|
256
|
+
span = ctx.get("span")
|
|
257
|
+
if span:
|
|
258
|
+
accumulated = ctx.get("accumulated_content", "")
|
|
259
|
+
set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
|
|
260
|
+
|
|
261
|
+
stream: AsyncMessageStream | None = ctx.get("stream")
|
|
262
|
+
if stream:
|
|
263
|
+
try:
|
|
264
|
+
final_message = await stream.get_final_message()
|
|
265
|
+
if final_message.usage:
|
|
266
|
+
(
|
|
267
|
+
prompt_tokens,
|
|
268
|
+
completion_tokens,
|
|
269
|
+
cache_read,
|
|
270
|
+
cache_creation,
|
|
271
|
+
) = _extract_anthropic_tokens(final_message.usage)
|
|
272
|
+
set_span_attribute(
|
|
273
|
+
span,
|
|
274
|
+
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
|
275
|
+
prompt_tokens,
|
|
276
|
+
)
|
|
277
|
+
set_span_attribute(
|
|
278
|
+
span,
|
|
279
|
+
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
280
|
+
completion_tokens,
|
|
281
|
+
)
|
|
282
|
+
set_span_attribute(
|
|
283
|
+
span,
|
|
284
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
285
|
+
cache_read,
|
|
286
|
+
)
|
|
287
|
+
set_span_attribute(
|
|
288
|
+
span,
|
|
289
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
290
|
+
cache_creation,
|
|
291
|
+
)
|
|
292
|
+
set_span_attribute(
|
|
293
|
+
span,
|
|
294
|
+
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
|
295
|
+
safe_serialize(final_message.usage),
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
set_span_attribute(
|
|
299
|
+
span,
|
|
300
|
+
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
|
301
|
+
final_message.model,
|
|
302
|
+
)
|
|
303
|
+
except Exception:
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
span.end()
|
|
307
|
+
|
|
308
|
+
return WrappedAsyncMessageStreamManager(original_manager) # type: ignore[return-value]
|
|
309
|
+
|
|
310
|
+
def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
|
|
311
|
+
span = ctx.get("span")
|
|
312
|
+
if span:
|
|
313
|
+
span.record_exception(error)
|
|
314
|
+
|
|
315
|
+
wrapped = mutable_wrap_sync(
|
|
316
|
+
original_func,
|
|
317
|
+
pre_hook=pre_hook,
|
|
318
|
+
mutate_hook=mutate_hook,
|
|
319
|
+
error_hook=error_hook,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
setattr(client.messages, "stream", wrapped)
|