judgeval 0.16.7__py3-none-any.whl → 0.16.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (43) hide show
  1. judgeval/api/api_types.py +1 -2
  2. judgeval/data/judgment_types.py +1 -2
  3. judgeval/tracer/__init__.py +7 -52
  4. judgeval/tracer/llm/config.py +12 -44
  5. judgeval/tracer/llm/constants.py +0 -1
  6. judgeval/tracer/llm/llm_anthropic/config.py +3 -17
  7. judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
  8. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  9. judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
  10. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  11. judgeval/tracer/llm/llm_google/config.py +3 -21
  12. judgeval/tracer/llm/llm_google/generate_content.py +125 -0
  13. judgeval/tracer/llm/llm_google/wrapper.py +19 -454
  14. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
  15. judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
  16. judgeval/tracer/llm/llm_openai/config.py +3 -29
  17. judgeval/tracer/llm/llm_openai/responses.py +444 -0
  18. judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
  19. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  20. judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
  21. judgeval/tracer/llm/llm_together/config.py +3 -20
  22. judgeval/tracer/llm/llm_together/wrapper.py +34 -485
  23. judgeval/tracer/llm/providers.py +4 -48
  24. judgeval/utils/decorators/dont_throw.py +30 -14
  25. judgeval/utils/wrappers/README.md +3 -0
  26. judgeval/utils/wrappers/__init__.py +15 -0
  27. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  28. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  29. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  30. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  31. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  32. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  33. judgeval/utils/wrappers/utils.py +35 -0
  34. judgeval/version.py +1 -1
  35. {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/METADATA +1 -1
  36. {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/RECORD +40 -27
  37. judgeval/tracer/llm/llm_groq/config.py +0 -23
  38. judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
  39. judgeval/tracer/local_eval_queue.py +0 -199
  40. /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
  41. {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/WHEEL +0 -0
  42. {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/entry_points.txt +0 -0
  43. {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,3 @@
1
+ from .wrapper import wrap_together_client
2
+
3
+ __all__ = ["wrap_together_client"]
@@ -0,0 +1,398 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ )
13
+
14
+ from judgeval.tracer.keys import AttributeKeys
15
+ from judgeval.tracer.utils import set_span_attribute
16
+ from judgeval.utils.serialize import safe_serialize
17
+ from judgeval.utils.wrappers import (
18
+ immutable_wrap_async,
19
+ immutable_wrap_sync,
20
+ mutable_wrap_sync,
21
+ mutable_wrap_async,
22
+ immutable_wrap_sync_iterator,
23
+ immutable_wrap_async_iterator,
24
+ )
25
+
26
+ if TYPE_CHECKING:
27
+ from judgeval.tracer import Tracer
28
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
29
+ from together.types import ChatCompletionResponse, ChatCompletionChunk # type: ignore[import-untyped]
30
+ from together.types.common import UsageData # type: ignore[import-untyped]
31
+
32
+
33
+ def _extract_together_tokens(usage: UsageData) -> tuple[int, int, int, int]:
34
+ prompt_tokens = usage.prompt_tokens if usage.prompt_tokens is not None else 0
35
+ completion_tokens = (
36
+ usage.completion_tokens if usage.completion_tokens is not None else 0
37
+ )
38
+ cache_read_input_tokens = 0
39
+ cache_creation_input_tokens = 0
40
+ return (
41
+ prompt_tokens,
42
+ completion_tokens,
43
+ cache_read_input_tokens,
44
+ cache_creation_input_tokens,
45
+ )
46
+
47
+
48
+ def wrap_chat_completions_create_sync(tracer: Tracer, client: Together) -> None:
49
+ original_func = client.chat.completions.create
50
+
51
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
52
+ if kwargs.get("stream", False):
53
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
54
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
55
+
56
+ setattr(client.chat.completions, "create", dispatcher)
57
+
58
+
59
+ def _wrap_non_streaming_sync(
60
+ tracer: Tracer, original_func: Callable[..., ChatCompletionResponse]
61
+ ) -> Callable[..., ChatCompletionResponse]:
62
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
63
+ ctx["span"] = tracer.get_tracer().start_span(
64
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
65
+ )
66
+ tracer.add_agent_attributes_to_span(ctx["span"])
67
+ set_span_attribute(
68
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
69
+ )
70
+ ctx["model_name"] = kwargs.get("model", "")
71
+ prefixed_model_name = (
72
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
73
+ )
74
+ ctx["model_name"] = prefixed_model_name
75
+ set_span_attribute(
76
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
77
+ )
78
+
79
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
80
+ span = ctx.get("span")
81
+ if not span:
82
+ return
83
+
84
+ set_span_attribute(
85
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
86
+ )
87
+
88
+ if result.usage:
89
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
90
+ result.usage
91
+ )
92
+ set_span_attribute(
93
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
94
+ )
95
+ set_span_attribute(
96
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
97
+ )
98
+ set_span_attribute(
99
+ span,
100
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
101
+ safe_serialize(result.usage),
102
+ )
103
+
104
+ set_span_attribute(
105
+ span,
106
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
107
+ ctx["model_name"],
108
+ )
109
+
110
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
111
+ span = ctx.get("span")
112
+ if span:
113
+ span.record_exception(error)
114
+
115
+ def finally_hook(ctx: Dict[str, Any]) -> None:
116
+ span = ctx.get("span")
117
+ if span:
118
+ span.end()
119
+
120
+ return immutable_wrap_sync(
121
+ original_func,
122
+ pre_hook=pre_hook,
123
+ post_hook=post_hook,
124
+ error_hook=error_hook,
125
+ finally_hook=finally_hook,
126
+ )
127
+
128
+
129
+ def _wrap_streaming_sync(
130
+ tracer: Tracer, original_func: Callable[..., Iterator[ChatCompletionChunk]]
131
+ ) -> Callable[..., Iterator[ChatCompletionChunk]]:
132
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
133
+ ctx["span"] = tracer.get_tracer().start_span(
134
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
135
+ )
136
+ tracer.add_agent_attributes_to_span(ctx["span"])
137
+ set_span_attribute(
138
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
139
+ )
140
+ ctx["model_name"] = kwargs.get("model", "")
141
+ prefixed_model_name = (
142
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
143
+ )
144
+ ctx["model_name"] = prefixed_model_name
145
+ set_span_attribute(
146
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
147
+ )
148
+ ctx["accumulated_content"] = ""
149
+
150
+ def mutate_hook(
151
+ ctx: Dict[str, Any], result: Iterator[ChatCompletionChunk]
152
+ ) -> Iterator[ChatCompletionChunk]:
153
+ def traced_generator() -> Generator[ChatCompletionChunk, None, None]:
154
+ for chunk in result:
155
+ yield chunk
156
+
157
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
158
+ span = ctx.get("span")
159
+ if not span:
160
+ return
161
+
162
+ if chunk.choices and len(chunk.choices) > 0:
163
+ delta = chunk.choices[0].delta
164
+ if delta and hasattr(delta, "content") and delta.content:
165
+ ctx["accumulated_content"] = (
166
+ ctx.get("accumulated_content", "") + delta.content
167
+ )
168
+
169
+ if chunk.usage:
170
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
171
+ chunk.usage
172
+ )
173
+ set_span_attribute(
174
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
175
+ )
176
+ set_span_attribute(
177
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
178
+ )
179
+ set_span_attribute(
180
+ span,
181
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
182
+ safe_serialize(chunk.usage),
183
+ )
184
+
185
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
186
+ span = ctx.get("span")
187
+ if span:
188
+ accumulated = ctx.get("accumulated_content", "")
189
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
190
+
191
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
192
+ span = ctx.get("span")
193
+ if span:
194
+ span.record_exception(error)
195
+
196
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
197
+ span = ctx.get("span")
198
+ if span:
199
+ span.end()
200
+
201
+ wrapped_generator = immutable_wrap_sync_iterator(
202
+ traced_generator,
203
+ yield_hook=yield_hook,
204
+ post_hook=post_hook_inner,
205
+ error_hook=error_hook_inner,
206
+ finally_hook=finally_hook_inner,
207
+ )
208
+
209
+ return wrapped_generator()
210
+
211
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
212
+ span = ctx.get("span")
213
+ if span:
214
+ span.record_exception(error)
215
+
216
+ return mutable_wrap_sync(
217
+ original_func,
218
+ pre_hook=pre_hook,
219
+ mutate_hook=mutate_hook,
220
+ error_hook=error_hook,
221
+ )
222
+
223
+
224
+ def wrap_chat_completions_create_async(tracer: Tracer, client: AsyncTogether) -> None:
225
+ original_func = client.chat.completions.create
226
+
227
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
228
+ if kwargs.get("stream", False):
229
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
230
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
231
+
232
+ setattr(client.chat.completions, "create", dispatcher)
233
+
234
+
235
+ def _wrap_non_streaming_async(
236
+ tracer: Tracer, original_func: Callable[..., Awaitable[ChatCompletionResponse]]
237
+ ) -> Callable[..., Awaitable[ChatCompletionResponse]]:
238
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
239
+ ctx["span"] = tracer.get_tracer().start_span(
240
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
241
+ )
242
+ tracer.add_agent_attributes_to_span(ctx["span"])
243
+ set_span_attribute(
244
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
245
+ )
246
+ ctx["model_name"] = kwargs.get("model", "")
247
+ prefixed_model_name = (
248
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
249
+ )
250
+ ctx["model_name"] = prefixed_model_name
251
+ set_span_attribute(
252
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
253
+ )
254
+
255
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
256
+ span = ctx.get("span")
257
+ if not span:
258
+ return
259
+
260
+ set_span_attribute(
261
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
262
+ )
263
+
264
+ if result.usage:
265
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
266
+ result.usage
267
+ )
268
+ set_span_attribute(
269
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
270
+ )
271
+ set_span_attribute(
272
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
273
+ )
274
+ set_span_attribute(
275
+ span,
276
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
277
+ safe_serialize(result.usage),
278
+ )
279
+
280
+ set_span_attribute(
281
+ span,
282
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
283
+ ctx["model_name"],
284
+ )
285
+
286
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
287
+ span = ctx.get("span")
288
+ if span:
289
+ span.record_exception(error)
290
+
291
+ def finally_hook(ctx: Dict[str, Any]) -> None:
292
+ span = ctx.get("span")
293
+ if span:
294
+ span.end()
295
+
296
+ return immutable_wrap_async(
297
+ original_func,
298
+ pre_hook=pre_hook,
299
+ post_hook=post_hook,
300
+ error_hook=error_hook,
301
+ finally_hook=finally_hook,
302
+ )
303
+
304
+
305
+ def _wrap_streaming_async(
306
+ tracer: Tracer,
307
+ original_func: Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]],
308
+ ) -> Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]]:
309
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
310
+ ctx["span"] = tracer.get_tracer().start_span(
311
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
312
+ )
313
+ tracer.add_agent_attributes_to_span(ctx["span"])
314
+ set_span_attribute(
315
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
316
+ )
317
+ ctx["model_name"] = kwargs.get("model", "")
318
+ prefixed_model_name = (
319
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
320
+ )
321
+ ctx["model_name"] = prefixed_model_name
322
+ set_span_attribute(
323
+ ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
324
+ )
325
+ ctx["accumulated_content"] = ""
326
+
327
+ def mutate_hook(
328
+ ctx: Dict[str, Any], result: AsyncIterator[ChatCompletionChunk]
329
+ ) -> AsyncIterator[ChatCompletionChunk]:
330
+ async def traced_generator() -> AsyncGenerator[ChatCompletionChunk, None]:
331
+ async for chunk in result:
332
+ yield chunk
333
+
334
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
335
+ span = ctx.get("span")
336
+ if not span:
337
+ return
338
+
339
+ if chunk.choices and len(chunk.choices) > 0:
340
+ delta = chunk.choices[0].delta
341
+ if delta and hasattr(delta, "content") and delta.content:
342
+ ctx["accumulated_content"] = (
343
+ ctx.get("accumulated_content", "") + delta.content
344
+ )
345
+
346
+ if chunk.usage:
347
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
348
+ chunk.usage
349
+ )
350
+ set_span_attribute(
351
+ span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
352
+ )
353
+ set_span_attribute(
354
+ span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
355
+ )
356
+ set_span_attribute(
357
+ span,
358
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
359
+ safe_serialize(chunk.usage),
360
+ )
361
+
362
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
363
+ span = ctx.get("span")
364
+ if span:
365
+ accumulated = ctx.get("accumulated_content", "")
366
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
367
+
368
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
369
+ span = ctx.get("span")
370
+ if span:
371
+ span.record_exception(error)
372
+
373
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
374
+ span = ctx.get("span")
375
+ if span:
376
+ span.end()
377
+
378
+ wrapped_generator = immutable_wrap_async_iterator(
379
+ traced_generator,
380
+ yield_hook=yield_hook,
381
+ post_hook=post_hook_inner,
382
+ error_hook=error_hook_inner,
383
+ finally_hook=finally_hook_inner,
384
+ )
385
+
386
+ return wrapped_generator()
387
+
388
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
389
+ span = ctx.get("span")
390
+ if span:
391
+ span.record_exception(error)
392
+
393
+ return mutable_wrap_async(
394
+ original_func,
395
+ pre_hook=pre_hook,
396
+ mutate_hook=mutate_hook,
397
+ error_hook=error_hook,
398
+ )
@@ -1,23 +1,6 @@
1
1
  from __future__ import annotations
2
- from typing import TYPE_CHECKING
2
+ import importlib.util
3
3
 
4
- if TYPE_CHECKING:
5
- from together import Together, AsyncTogether # type: ignore[import-untyped]
4
+ HAS_TOGETHER = importlib.util.find_spec("together") is not None
6
5
 
7
- try:
8
- from together import Together, AsyncTogether # type: ignore[import-untyped]
9
-
10
- HAS_TOGETHER = True
11
- except ImportError:
12
- HAS_TOGETHER = False
13
- Together = AsyncTogether = None # type: ignore[misc,assignment]
14
-
15
- # Export the classes for runtime use
16
- together_Together = Together
17
- together_AsyncTogether = AsyncTogether
18
-
19
- __all__ = [
20
- "HAS_TOGETHER",
21
- "together_Together",
22
- "together_AsyncTogether",
23
- ]
6
+ __all__ = ["HAS_TOGETHER"]