judgeval 0.16.7__py3-none-any.whl → 0.16.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (43) hide show
  1. judgeval/api/api_types.py +1 -2
  2. judgeval/data/judgment_types.py +1 -2
  3. judgeval/tracer/__init__.py +7 -52
  4. judgeval/tracer/llm/config.py +12 -44
  5. judgeval/tracer/llm/constants.py +0 -1
  6. judgeval/tracer/llm/llm_anthropic/config.py +3 -17
  7. judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
  8. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  9. judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
  10. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  11. judgeval/tracer/llm/llm_google/config.py +3 -21
  12. judgeval/tracer/llm/llm_google/generate_content.py +125 -0
  13. judgeval/tracer/llm/llm_google/wrapper.py +19 -454
  14. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
  15. judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
  16. judgeval/tracer/llm/llm_openai/config.py +3 -29
  17. judgeval/tracer/llm/llm_openai/responses.py +444 -0
  18. judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
  19. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  20. judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
  21. judgeval/tracer/llm/llm_together/config.py +3 -20
  22. judgeval/tracer/llm/llm_together/wrapper.py +34 -485
  23. judgeval/tracer/llm/providers.py +4 -48
  24. judgeval/utils/decorators/dont_throw.py +30 -14
  25. judgeval/utils/wrappers/README.md +3 -0
  26. judgeval/utils/wrappers/__init__.py +15 -0
  27. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  28. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  29. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  30. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  31. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  32. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  33. judgeval/utils/wrappers/utils.py +35 -0
  34. judgeval/version.py +1 -1
  35. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/METADATA +1 -1
  36. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/RECORD +40 -27
  37. judgeval/tracer/llm/llm_groq/config.py +0 -23
  38. judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
  39. judgeval/tracer/local_eval_queue.py +0 -199
  40. /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
  41. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/WHEEL +0 -0
  42. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/entry_points.txt +0 -0
  43. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,440 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ Tuple,
13
+ )
14
+
15
+ from judgeval.tracer.keys import AttributeKeys
16
+ from judgeval.tracer.utils import set_span_attribute
17
+ from judgeval.utils.serialize import safe_serialize
18
+ from judgeval.utils.wrappers import (
19
+ immutable_wrap_sync,
20
+ immutable_wrap_async,
21
+ mutable_wrap_sync,
22
+ mutable_wrap_async,
23
+ immutable_wrap_sync_iterator,
24
+ immutable_wrap_async_iterator,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ from judgeval.tracer import Tracer
29
+ from anthropic import Anthropic, AsyncAnthropic
30
+ from anthropic.types import (
31
+ Message,
32
+ Usage,
33
+ MessageDeltaUsage,
34
+ RawMessageStreamEvent,
35
+ )
36
+
37
+
38
+ def _extract_anthropic_content(chunk: RawMessageStreamEvent) -> str:
39
+ if chunk.type == "content_block_delta":
40
+ delta = chunk.delta
41
+ if delta.type == "text_delta" and delta.text:
42
+ return delta.text
43
+ return ""
44
+
45
+
46
+ def _extract_anthropic_tokens(
47
+ usage: Usage | MessageDeltaUsage,
48
+ ) -> Tuple[int, int, int, int]:
49
+ input_tokens = usage.input_tokens if usage.input_tokens is not None else 0
50
+ output_tokens = usage.output_tokens if usage.output_tokens is not None else 0
51
+ cache_read = (
52
+ usage.cache_read_input_tokens
53
+ if usage.cache_read_input_tokens is not None
54
+ else 0
55
+ )
56
+ cache_creation = (
57
+ usage.cache_creation_input_tokens
58
+ if usage.cache_creation_input_tokens is not None
59
+ else 0
60
+ )
61
+ return (input_tokens, output_tokens, cache_read, cache_creation)
62
+
63
+
64
+ def _extract_anthropic_chunk_usage(
65
+ chunk: RawMessageStreamEvent,
66
+ ) -> Usage | MessageDeltaUsage | None:
67
+ if chunk.type == "message_start":
68
+ return chunk.message.usage if chunk.message else None
69
+ elif chunk.type == "message_delta":
70
+ return chunk.usage if hasattr(chunk, "usage") else None
71
+ return None
72
+
73
+
74
+ def wrap_messages_create_sync(tracer: Tracer, client: Anthropic) -> None:
75
+ original_func = client.messages.create
76
+
77
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
78
+ if kwargs.get("stream", False):
79
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs)
80
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs)
81
+
82
+ setattr(client.messages, "create", dispatcher)
83
+
84
+
85
+ def _wrap_non_streaming_sync(
86
+ tracer: Tracer, original_func: Callable[..., Message]
87
+ ) -> Callable[..., Message]:
88
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
89
+ ctx["span"] = tracer.get_tracer().start_span(
90
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
91
+ )
92
+ tracer.add_agent_attributes_to_span(ctx["span"])
93
+ set_span_attribute(
94
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
95
+ )
96
+ ctx["model_name"] = kwargs.get("model", "")
97
+ set_span_attribute(
98
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
99
+ )
100
+
101
+ def post_hook(ctx: Dict[str, Any], result: Message) -> None:
102
+ span = ctx.get("span")
103
+ if not span:
104
+ return
105
+
106
+ set_span_attribute(
107
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
108
+ )
109
+
110
+ if result.usage:
111
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
112
+ _extract_anthropic_tokens(result.usage)
113
+ )
114
+ set_span_attribute(
115
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
116
+ )
117
+ set_span_attribute(
118
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
119
+ )
120
+ set_span_attribute(
121
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
122
+ )
123
+ set_span_attribute(
124
+ span,
125
+ AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
126
+ cache_creation,
127
+ )
128
+ set_span_attribute(
129
+ span,
130
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
131
+ safe_serialize(result.usage),
132
+ )
133
+
134
+ set_span_attribute(
135
+ span,
136
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
137
+ result.model,
138
+ )
139
+
140
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
141
+ span = ctx.get("span")
142
+ if span:
143
+ span.record_exception(error)
144
+
145
+ def finally_hook(ctx: Dict[str, Any]) -> None:
146
+ span = ctx.get("span")
147
+ if span:
148
+ span.end()
149
+
150
+ return immutable_wrap_sync(
151
+ original_func,
152
+ pre_hook=pre_hook,
153
+ post_hook=post_hook,
154
+ error_hook=error_hook,
155
+ finally_hook=finally_hook,
156
+ )
157
+
158
+
159
+ def _wrap_streaming_sync(
160
+ tracer: Tracer, original_func: Callable[..., Iterator[RawMessageStreamEvent]]
161
+ ) -> Callable[..., Iterator[RawMessageStreamEvent]]:
162
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
163
+ ctx["span"] = tracer.get_tracer().start_span(
164
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
165
+ )
166
+ tracer.add_agent_attributes_to_span(ctx["span"])
167
+ set_span_attribute(
168
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
169
+ )
170
+ ctx["model_name"] = kwargs.get("model", "")
171
+ set_span_attribute(
172
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
173
+ )
174
+ ctx["accumulated_content"] = ""
175
+
176
+ def mutate_hook(
177
+ ctx: Dict[str, Any], result: Iterator[RawMessageStreamEvent]
178
+ ) -> Iterator[RawMessageStreamEvent]:
179
+ def traced_generator() -> Generator[RawMessageStreamEvent, None, None]:
180
+ for chunk in result:
181
+ yield chunk
182
+
183
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: RawMessageStreamEvent) -> None:
184
+ span = ctx.get("span")
185
+ if not span:
186
+ return
187
+
188
+ content = _extract_anthropic_content(chunk)
189
+ if content:
190
+ ctx["accumulated_content"] = (
191
+ ctx.get("accumulated_content", "") + content
192
+ )
193
+
194
+ usage_data = _extract_anthropic_chunk_usage(chunk)
195
+ if usage_data:
196
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
197
+ _extract_anthropic_tokens(usage_data)
198
+ )
199
+ set_span_attribute(
200
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
201
+ )
202
+ set_span_attribute(
203
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
204
+ )
205
+ set_span_attribute(
206
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
207
+ )
208
+ set_span_attribute(
209
+ span,
210
+ AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
211
+ cache_creation,
212
+ )
213
+ set_span_attribute(
214
+ span,
215
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
216
+ safe_serialize(usage_data),
217
+ )
218
+
219
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
220
+ span = ctx.get("span")
221
+ if span:
222
+ accumulated = ctx.get("accumulated_content", "")
223
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
224
+
225
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
226
+ span = ctx.get("span")
227
+ if span:
228
+ span.record_exception(error)
229
+
230
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
231
+ span = ctx.get("span")
232
+ if span:
233
+ span.end()
234
+
235
+ wrapped_generator = immutable_wrap_sync_iterator(
236
+ traced_generator,
237
+ yield_hook=yield_hook,
238
+ post_hook=post_hook_inner,
239
+ error_hook=error_hook_inner,
240
+ finally_hook=finally_hook_inner,
241
+ )
242
+
243
+ return wrapped_generator()
244
+
245
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
246
+ span = ctx.get("span")
247
+ if span:
248
+ span.record_exception(error)
249
+
250
+ return mutable_wrap_sync(
251
+ original_func,
252
+ pre_hook=pre_hook,
253
+ mutate_hook=mutate_hook,
254
+ error_hook=error_hook,
255
+ )
256
+
257
+
258
+ def wrap_messages_create_async(tracer: Tracer, client: AsyncAnthropic) -> None:
259
+ original_func = client.messages.create
260
+
261
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
262
+ if kwargs.get("stream", False):
263
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs)
264
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs)
265
+
266
+ setattr(client.messages, "create", dispatcher)
267
+
268
+
269
+ def _wrap_non_streaming_async(
270
+ tracer: Tracer, original_func: Callable[..., Awaitable[Message]]
271
+ ) -> Callable[..., Awaitable[Message]]:
272
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
273
+ ctx["span"] = tracer.get_tracer().start_span(
274
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
275
+ )
276
+ tracer.add_agent_attributes_to_span(ctx["span"])
277
+ set_span_attribute(
278
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
279
+ )
280
+ ctx["model_name"] = kwargs.get("model", "")
281
+ set_span_attribute(
282
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
283
+ )
284
+
285
+ def post_hook(ctx: Dict[str, Any], result: Message) -> None:
286
+ span = ctx.get("span")
287
+ if not span:
288
+ return
289
+
290
+ set_span_attribute(
291
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
292
+ )
293
+
294
+ if result.usage:
295
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
296
+ _extract_anthropic_tokens(result.usage)
297
+ )
298
+ set_span_attribute(
299
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
300
+ )
301
+ set_span_attribute(
302
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
303
+ )
304
+ set_span_attribute(
305
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
306
+ )
307
+ set_span_attribute(
308
+ span,
309
+ AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
310
+ cache_creation,
311
+ )
312
+ set_span_attribute(
313
+ span,
314
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
315
+ safe_serialize(result.usage),
316
+ )
317
+
318
+ set_span_attribute(
319
+ span,
320
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
321
+ result.model,
322
+ )
323
+
324
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
325
+ span = ctx.get("span")
326
+ if span:
327
+ span.record_exception(error)
328
+
329
+ def finally_hook(ctx: Dict[str, Any]) -> None:
330
+ span = ctx.get("span")
331
+ if span:
332
+ span.end()
333
+
334
+ return immutable_wrap_async(
335
+ original_func,
336
+ pre_hook=pre_hook,
337
+ post_hook=post_hook,
338
+ error_hook=error_hook,
339
+ finally_hook=finally_hook,
340
+ )
341
+
342
+
343
+ def _wrap_streaming_async(
344
+ tracer: Tracer,
345
+ original_func: Callable[..., Awaitable[AsyncIterator[RawMessageStreamEvent]]],
346
+ ) -> Callable[..., Awaitable[AsyncIterator[RawMessageStreamEvent]]]:
347
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
348
+ ctx["span"] = tracer.get_tracer().start_span(
349
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
350
+ )
351
+ tracer.add_agent_attributes_to_span(ctx["span"])
352
+ set_span_attribute(
353
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
354
+ )
355
+ ctx["model_name"] = kwargs.get("model", "")
356
+ set_span_attribute(
357
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
358
+ )
359
+ ctx["accumulated_content"] = ""
360
+
361
+ def mutate_hook(
362
+ ctx: Dict[str, Any], result: AsyncIterator[RawMessageStreamEvent]
363
+ ) -> AsyncIterator[RawMessageStreamEvent]:
364
+ async def traced_generator() -> AsyncGenerator[RawMessageStreamEvent, None]:
365
+ async for chunk in result:
366
+ yield chunk
367
+
368
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: RawMessageStreamEvent) -> None:
369
+ span = ctx.get("span")
370
+ if not span:
371
+ return
372
+
373
+ content = _extract_anthropic_content(chunk)
374
+ if content:
375
+ ctx["accumulated_content"] = (
376
+ ctx.get("accumulated_content", "") + content
377
+ )
378
+
379
+ usage_data = _extract_anthropic_chunk_usage(chunk)
380
+ if usage_data:
381
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
382
+ _extract_anthropic_tokens(usage_data)
383
+ )
384
+ set_span_attribute(
385
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
386
+ )
387
+ set_span_attribute(
388
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
389
+ )
390
+ set_span_attribute(
391
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
392
+ )
393
+ set_span_attribute(
394
+ span,
395
+ AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
396
+ cache_creation,
397
+ )
398
+ set_span_attribute(
399
+ span,
400
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
401
+ safe_serialize(usage_data),
402
+ )
403
+
404
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
405
+ span = ctx.get("span")
406
+ if span:
407
+ accumulated = ctx.get("accumulated_content", "")
408
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
409
+
410
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
411
+ span = ctx.get("span")
412
+ if span:
413
+ span.record_exception(error)
414
+
415
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
416
+ span = ctx.get("span")
417
+ if span:
418
+ span.end()
419
+
420
+ wrapped_generator = immutable_wrap_async_iterator(
421
+ traced_generator,
422
+ yield_hook=yield_hook,
423
+ post_hook=post_hook_inner,
424
+ error_hook=error_hook_inner,
425
+ finally_hook=finally_hook_inner,
426
+ )
427
+
428
+ return wrapped_generator()
429
+
430
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
431
+ span = ctx.get("span")
432
+ if span:
433
+ span.record_exception(error)
434
+
435
+ return mutable_wrap_async(
436
+ original_func,
437
+ pre_hook=pre_hook,
438
+ mutate_hook=mutate_hook,
439
+ error_hook=error_hook,
440
+ )