judgeval 0.16.6__py3-none-any.whl → 0.16.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (43) hide show
  1. judgeval/api/api_types.py +1 -2
  2. judgeval/data/judgment_types.py +1 -2
  3. judgeval/tracer/__init__.py +7 -52
  4. judgeval/tracer/llm/config.py +12 -44
  5. judgeval/tracer/llm/constants.py +0 -1
  6. judgeval/tracer/llm/llm_anthropic/config.py +3 -17
  7. judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
  8. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  9. judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
  10. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  11. judgeval/tracer/llm/llm_google/config.py +3 -21
  12. judgeval/tracer/llm/llm_google/generate_content.py +125 -0
  13. judgeval/tracer/llm/llm_google/wrapper.py +19 -454
  14. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
  15. judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
  16. judgeval/tracer/llm/llm_openai/config.py +3 -29
  17. judgeval/tracer/llm/llm_openai/responses.py +444 -0
  18. judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
  19. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  20. judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
  21. judgeval/tracer/llm/llm_together/config.py +3 -20
  22. judgeval/tracer/llm/llm_together/wrapper.py +34 -485
  23. judgeval/tracer/llm/providers.py +4 -48
  24. judgeval/utils/decorators/dont_throw.py +30 -14
  25. judgeval/utils/wrappers/README.md +3 -0
  26. judgeval/utils/wrappers/__init__.py +15 -0
  27. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  28. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  29. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  30. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  31. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  32. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  33. judgeval/utils/wrappers/utils.py +35 -0
  34. judgeval/version.py +1 -1
  35. {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/METADATA +1 -1
  36. {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/RECORD +40 -27
  37. judgeval/tracer/llm/llm_groq/config.py +0 -23
  38. judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
  39. judgeval/tracer/local_eval_queue.py +0 -199
  40. /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
  41. {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/WHEEL +0 -0
  42. {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/entry_points.txt +0 -0
  43. {judgeval-0.16.6.dist-info → judgeval-0.16.8.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,444 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ ParamSpec,
13
+ TypeVar,
14
+ )
15
+
16
+ from judgeval.tracer.keys import AttributeKeys
17
+ from judgeval.tracer.utils import set_span_attribute
18
+ from judgeval.utils.serialize import safe_serialize
19
+ from judgeval.utils.wrappers import (
20
+ immutable_wrap_sync,
21
+ immutable_wrap_async,
22
+ mutable_wrap_sync,
23
+ mutable_wrap_async,
24
+ immutable_wrap_sync_iterator,
25
+ immutable_wrap_async_iterator,
26
+ )
27
+
28
+ if TYPE_CHECKING:
29
+ from judgeval.tracer import Tracer
30
+ from openai import OpenAI, AsyncOpenAI
31
+ from openai.types.responses import Response
32
+
33
+ P = ParamSpec("P")
34
+ T = TypeVar("T")
35
+
36
+
37
+ def wrap_responses_create_sync(tracer: Tracer, client: OpenAI) -> None:
38
+ original_func = client.responses.create
39
+
40
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
41
+ if kwargs.get("stream", False):
42
+ return _wrap_responses_streaming_sync(tracer, original_func)(
43
+ *args, **kwargs
44
+ )
45
+ return _wrap_responses_non_streaming_sync(tracer, original_func)(
46
+ *args, **kwargs
47
+ )
48
+
49
+ setattr(client.responses, "create", dispatcher)
50
+
51
+
52
+ def _wrap_responses_non_streaming_sync(
53
+ tracer: Tracer, original_func: Callable[..., Response]
54
+ ) -> Callable[..., Response]:
55
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
56
+ ctx["span"] = tracer.get_tracer().start_span(
57
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
58
+ )
59
+ tracer.add_agent_attributes_to_span(ctx["span"])
60
+ set_span_attribute(
61
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
62
+ )
63
+ ctx["model_name"] = kwargs.get("model", "")
64
+ set_span_attribute(
65
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
66
+ )
67
+
68
+ def post_hook(ctx: Dict[str, Any], result: Response) -> None:
69
+ span = ctx.get("span")
70
+ if not span:
71
+ return
72
+
73
+ set_span_attribute(
74
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
75
+ )
76
+
77
+ usage_data = result.usage if hasattr(result, "usage") else None
78
+ if usage_data:
79
+ prompt_tokens = usage_data.input_tokens or 0
80
+ completion_tokens = usage_data.output_tokens or 0
81
+ cache_read = usage_data.input_tokens_details.cached_tokens or 0
82
+
83
+ set_span_attribute(
84
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
85
+ )
86
+ set_span_attribute(
87
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
88
+ )
89
+ set_span_attribute(
90
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
91
+ )
92
+ set_span_attribute(
93
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
94
+ )
95
+ set_span_attribute(
96
+ span,
97
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
98
+ safe_serialize(usage_data),
99
+ )
100
+
101
+ if hasattr(result, "model"):
102
+ set_span_attribute(
103
+ span,
104
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
105
+ result.model or ctx["model_name"],
106
+ )
107
+
108
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
109
+ span = ctx.get("span")
110
+ if span:
111
+ span.record_exception(error)
112
+
113
+ def finally_hook(ctx: Dict[str, Any]) -> None:
114
+ span = ctx.get("span")
115
+ if span:
116
+ span.end()
117
+
118
+ return immutable_wrap_sync(
119
+ original_func,
120
+ pre_hook=pre_hook,
121
+ post_hook=post_hook,
122
+ error_hook=error_hook,
123
+ finally_hook=finally_hook,
124
+ )
125
+
126
+
127
+ def _wrap_responses_streaming_sync(
128
+ tracer: Tracer, original_func: Callable[..., Iterator[Any]]
129
+ ) -> Callable[..., Iterator[Any]]:
130
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
131
+ ctx["span"] = tracer.get_tracer().start_span(
132
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
133
+ )
134
+ tracer.add_agent_attributes_to_span(ctx["span"])
135
+ set_span_attribute(
136
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
137
+ )
138
+ ctx["model_name"] = kwargs.get("model", "")
139
+ set_span_attribute(
140
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
141
+ )
142
+ ctx["accumulated_content"] = ""
143
+
144
+ def mutate_hook(ctx: Dict[str, Any], result: Iterator[Any]) -> Iterator[Any]:
145
+ def traced_generator() -> Generator[Any, None, None]:
146
+ for chunk in result:
147
+ yield chunk
148
+
149
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: Any) -> None:
150
+ span = ctx.get("span")
151
+ if not span:
152
+ return
153
+
154
+ if hasattr(chunk, "type") and chunk.type == "response.output_text.delta":
155
+ delta = getattr(chunk, "delta", None)
156
+ if delta:
157
+ ctx["accumulated_content"] = (
158
+ ctx.get("accumulated_content", "") + delta
159
+ )
160
+
161
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
162
+ if (
163
+ hasattr(chunk, "response")
164
+ and chunk.response
165
+ and hasattr(chunk.response, "usage")
166
+ and chunk.response.usage
167
+ ):
168
+ prompt_tokens = chunk.response.usage.input_tokens or 0
169
+ completion_tokens = chunk.response.usage.output_tokens or 0
170
+ # Safely access nested cached_tokens
171
+ input_tokens_details = getattr(
172
+ chunk.response.usage, "input_tokens_details", None
173
+ )
174
+ cache_read = (
175
+ getattr(input_tokens_details, "cached_tokens", 0)
176
+ if input_tokens_details
177
+ else 0
178
+ )
179
+
180
+ set_span_attribute(
181
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
182
+ )
183
+ set_span_attribute(
184
+ span,
185
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
186
+ completion_tokens,
187
+ )
188
+ set_span_attribute(
189
+ span,
190
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
191
+ cache_read,
192
+ )
193
+ set_span_attribute(
194
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
195
+ )
196
+ set_span_attribute(
197
+ span,
198
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
199
+ safe_serialize(chunk.response.usage),
200
+ )
201
+
202
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
203
+ span = ctx.get("span")
204
+ if span:
205
+ accumulated = ctx.get("accumulated_content", "")
206
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
207
+
208
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
209
+ span = ctx.get("span")
210
+ if span:
211
+ span.record_exception(error)
212
+
213
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
214
+ span = ctx.get("span")
215
+ if span:
216
+ span.end()
217
+
218
+ wrapped_generator = immutable_wrap_sync_iterator(
219
+ traced_generator,
220
+ yield_hook=yield_hook,
221
+ post_hook=post_hook_inner,
222
+ error_hook=error_hook_inner,
223
+ finally_hook=finally_hook_inner,
224
+ )
225
+
226
+ return wrapped_generator()
227
+
228
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
229
+ span = ctx.get("span")
230
+ if span:
231
+ span.record_exception(error)
232
+
233
+ return mutable_wrap_sync(
234
+ original_func,
235
+ pre_hook=pre_hook,
236
+ mutate_hook=mutate_hook,
237
+ error_hook=error_hook,
238
+ )
239
+
240
+
241
+ def wrap_responses_create_async(tracer: Tracer, client: AsyncOpenAI) -> None:
242
+ original_func = client.responses.create
243
+
244
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
245
+ if kwargs.get("stream", False):
246
+ return await _wrap_responses_streaming_async(tracer, original_func)(
247
+ *args, **kwargs
248
+ )
249
+ return await _wrap_responses_non_streaming_async(tracer, original_func)(
250
+ *args, **kwargs
251
+ )
252
+
253
+ setattr(client.responses, "create", dispatcher)
254
+
255
+
256
+ def _wrap_responses_non_streaming_async(
257
+ tracer: Tracer, original_func: Callable[..., Awaitable[Response]]
258
+ ) -> Callable[..., Awaitable[Response]]:
259
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
260
+ ctx["span"] = tracer.get_tracer().start_span(
261
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
262
+ )
263
+ tracer.add_agent_attributes_to_span(ctx["span"])
264
+ set_span_attribute(
265
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
266
+ )
267
+ ctx["model_name"] = kwargs.get("model", "")
268
+ set_span_attribute(
269
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
270
+ )
271
+
272
+ def post_hook(ctx: Dict[str, Any], result: Response) -> None:
273
+ span = ctx.get("span")
274
+ if not span:
275
+ return
276
+
277
+ set_span_attribute(
278
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
279
+ )
280
+
281
+ usage_data = result.usage if hasattr(result, "usage") else None
282
+ if usage_data:
283
+ prompt_tokens = usage_data.input_tokens or 0
284
+ completion_tokens = usage_data.output_tokens or 0
285
+ cache_read = usage_data.input_tokens_details.cached_tokens or 0
286
+
287
+ set_span_attribute(
288
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
289
+ )
290
+ set_span_attribute(
291
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
292
+ )
293
+ set_span_attribute(
294
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
295
+ )
296
+ set_span_attribute(
297
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
298
+ )
299
+ set_span_attribute(
300
+ span,
301
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
302
+ safe_serialize(usage_data),
303
+ )
304
+
305
+ if hasattr(result, "model"):
306
+ set_span_attribute(
307
+ span,
308
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
309
+ result.model or ctx["model_name"],
310
+ )
311
+
312
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
313
+ span = ctx.get("span")
314
+ if span:
315
+ span.record_exception(error)
316
+
317
+ def finally_hook(ctx: Dict[str, Any]) -> None:
318
+ span = ctx.get("span")
319
+ if span:
320
+ span.end()
321
+
322
+ return immutable_wrap_async(
323
+ original_func,
324
+ pre_hook=pre_hook,
325
+ post_hook=post_hook,
326
+ error_hook=error_hook,
327
+ finally_hook=finally_hook,
328
+ )
329
+
330
+
331
+ def _wrap_responses_streaming_async(
332
+ tracer: Tracer, original_func: Callable[..., Awaitable[AsyncIterator[Any]]]
333
+ ) -> Callable[..., Awaitable[AsyncIterator[Any]]]:
334
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
335
+ ctx["span"] = tracer.get_tracer().start_span(
336
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
337
+ )
338
+ tracer.add_agent_attributes_to_span(ctx["span"])
339
+ set_span_attribute(
340
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
341
+ )
342
+ ctx["model_name"] = kwargs.get("model", "")
343
+ set_span_attribute(
344
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
345
+ )
346
+ ctx["accumulated_content"] = ""
347
+
348
+ def mutate_hook(
349
+ ctx: Dict[str, Any], result: AsyncIterator[Any]
350
+ ) -> AsyncIterator[Any]:
351
+ async def traced_generator() -> AsyncGenerator[Any, None]:
352
+ async for chunk in result:
353
+ yield chunk
354
+
355
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: Any) -> None:
356
+ span = ctx.get("span")
357
+ if not span:
358
+ return
359
+
360
+ if hasattr(chunk, "type") and chunk.type == "response.output_text.delta":
361
+ delta = getattr(chunk, "delta", None)
362
+ if delta:
363
+ ctx["accumulated_content"] = (
364
+ ctx.get("accumulated_content", "") + delta
365
+ )
366
+
367
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
368
+ if (
369
+ hasattr(chunk, "response")
370
+ and chunk.response
371
+ and hasattr(chunk.response, "usage")
372
+ and chunk.response.usage
373
+ ):
374
+ prompt_tokens = chunk.response.usage.input_tokens or 0
375
+ completion_tokens = chunk.response.usage.output_tokens or 0
376
+ # Safely access nested cached_tokens
377
+ input_tokens_details = getattr(
378
+ chunk.response.usage, "input_tokens_details", None
379
+ )
380
+ cache_read = (
381
+ getattr(input_tokens_details, "cached_tokens", 0)
382
+ if input_tokens_details
383
+ else 0
384
+ )
385
+
386
+ set_span_attribute(
387
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
388
+ )
389
+ set_span_attribute(
390
+ span,
391
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
392
+ completion_tokens,
393
+ )
394
+ set_span_attribute(
395
+ span,
396
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
397
+ cache_read,
398
+ )
399
+ set_span_attribute(
400
+ span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
401
+ )
402
+ set_span_attribute(
403
+ span,
404
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
405
+ safe_serialize(chunk.response.usage),
406
+ )
407
+
408
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
409
+ span = ctx.get("span")
410
+ if span:
411
+ accumulated = ctx.get("accumulated_content", "")
412
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
413
+
414
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
415
+ span = ctx.get("span")
416
+ if span:
417
+ span.record_exception(error)
418
+
419
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
420
+ span = ctx.get("span")
421
+ if span:
422
+ span.end()
423
+
424
+ wrapped_generator = immutable_wrap_async_iterator(
425
+ traced_generator,
426
+ yield_hook=yield_hook,
427
+ post_hook=post_hook_inner,
428
+ error_hook=error_hook_inner,
429
+ finally_hook=finally_hook_inner,
430
+ )
431
+
432
+ return wrapped_generator()
433
+
434
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
435
+ span = ctx.get("span")
436
+ if span:
437
+ span.record_exception(error)
438
+
439
+ return mutable_wrap_async(
440
+ original_func,
441
+ pre_hook=pre_hook,
442
+ mutate_hook=mutate_hook,
443
+ error_hook=error_hook,
444
+ )