judgeval 0.16.0__py3-none-any.whl → 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. judgeval/api/api_types.py +2 -1
  2. judgeval/data/judgment_types.py +2 -1
  3. judgeval/logger.py +1 -1
  4. judgeval/tracer/__init__.py +10 -7
  5. judgeval/tracer/keys.py +7 -3
  6. judgeval/tracer/llm/__init__.py +2 -1259
  7. judgeval/tracer/llm/config.py +110 -0
  8. judgeval/tracer/llm/constants.py +10 -0
  9. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  10. judgeval/tracer/llm/llm_anthropic/wrapper.py +611 -0
  11. judgeval/tracer/llm/llm_google/__init__.py +0 -0
  12. judgeval/tracer/llm/llm_google/config.py +24 -0
  13. judgeval/tracer/llm/llm_google/wrapper.py +426 -0
  14. judgeval/tracer/llm/llm_groq/__init__.py +0 -0
  15. judgeval/tracer/llm/llm_groq/config.py +23 -0
  16. judgeval/tracer/llm/llm_groq/wrapper.py +477 -0
  17. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  18. judgeval/tracer/llm/llm_openai/wrapper.py +637 -0
  19. judgeval/tracer/llm/llm_together/__init__.py +0 -0
  20. judgeval/tracer/llm/llm_together/config.py +23 -0
  21. judgeval/tracer/llm/llm_together/wrapper.py +478 -0
  22. judgeval/tracer/llm/providers.py +5 -5
  23. judgeval/tracer/processors/__init__.py +1 -1
  24. judgeval/trainer/console.py +1 -1
  25. judgeval/utils/decorators/__init__.py +0 -0
  26. judgeval/utils/decorators/dont_throw.py +21 -0
  27. judgeval/utils/{decorators.py → decorators/use_once.py} +0 -11
  28. judgeval/utils/meta.py +1 -1
  29. judgeval/utils/version_check.py +1 -1
  30. judgeval/version.py +1 -1
  31. {judgeval-0.16.0.dist-info → judgeval-0.16.1.dist-info}/METADATA +1 -1
  32. {judgeval-0.16.0.dist-info → judgeval-0.16.1.dist-info}/RECORD +37 -23
  33. judgeval/tracer/llm/google/__init__.py +0 -21
  34. judgeval/tracer/llm/groq/__init__.py +0 -20
  35. judgeval/tracer/llm/together/__init__.py +0 -20
  36. /judgeval/tracer/llm/{anthropic/__init__.py → llm_anthropic/config.py} +0 -0
  37. /judgeval/tracer/llm/{openai/__init__.py → llm_openai/config.py} +0 -0
  38. {judgeval-0.16.0.dist-info → judgeval-0.16.1.dist-info}/WHEEL +0 -0
  39. {judgeval-0.16.0.dist-info → judgeval-0.16.1.dist-info}/entry_points.txt +0 -0
  40. {judgeval-0.16.0.dist-info → judgeval-0.16.1.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,477 @@
1
+ from __future__ import annotations
2
+ import functools
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Any,
6
+ Callable,
7
+ Optional,
8
+ Protocol,
9
+ Tuple,
10
+ Union,
11
+ Iterator,
12
+ AsyncIterator,
13
+ Sequence,
14
+ runtime_checkable,
15
+ )
16
+
17
+ from judgeval.tracer.llm.llm_groq.config import (
18
+ groq_Groq,
19
+ groq_AsyncGroq,
20
+ )
21
+ from judgeval.tracer.managers import sync_span_context, async_span_context
22
+ from judgeval.tracer.keys import AttributeKeys
23
+ from judgeval.tracer.utils import set_span_attribute
24
+ from judgeval.utils.serialize import safe_serialize
25
+
26
+ if TYPE_CHECKING:
27
+ from judgeval.tracer import Tracer
28
+ from opentelemetry.trace import Span
29
+
30
+ # Keep the original client type for runtime compatibility
31
+ GroqClientType = Union[groq_Groq, groq_AsyncGroq]
32
+
33
+
34
+ # Usage protocols
35
+ @runtime_checkable
36
+ class GroqPromptTokensDetails(Protocol):
37
+ cached_tokens: Optional[int]
38
+
39
+
40
+ @runtime_checkable
41
+ class GroqUsage(Protocol):
42
+ prompt_tokens: Optional[int]
43
+ completion_tokens: Optional[int]
44
+ total_tokens: Optional[int]
45
+ prompt_tokens_details: Optional[GroqPromptTokensDetails]
46
+
47
+
48
+ # Message protocols
49
+ @runtime_checkable
50
+ class GroqMessage(Protocol):
51
+ content: Optional[str]
52
+ role: str
53
+
54
+
55
+ @runtime_checkable
56
+ class GroqChoice(Protocol):
57
+ index: int
58
+ message: GroqMessage
59
+ finish_reason: Optional[str]
60
+
61
+
62
+ @runtime_checkable
63
+ class GroqChatCompletion(Protocol):
64
+ id: str
65
+ object: str
66
+ created: int
67
+ model: str
68
+ choices: Sequence[GroqChoice]
69
+ usage: Optional[GroqUsage]
70
+
71
+
72
+ # Stream protocols
73
+ @runtime_checkable
74
+ class GroqStreamDelta(Protocol):
75
+ content: Optional[str]
76
+
77
+
78
+ @runtime_checkable
79
+ class GroqStreamChoice(Protocol):
80
+ index: int
81
+ delta: GroqStreamDelta
82
+
83
+
84
+ @runtime_checkable
85
+ class GroqStreamChunk(Protocol):
86
+ choices: Sequence[GroqStreamChoice]
87
+ usage: Optional[GroqUsage]
88
+
89
+
90
+ # Client protocols
91
+ @runtime_checkable
92
+ class GroqClient(Protocol):
93
+ pass
94
+
95
+
96
+ @runtime_checkable
97
+ class GroqAsyncClient(Protocol):
98
+ pass
99
+
100
+
101
+ # Union types
102
+ GroqResponseType = GroqChatCompletion
103
+ GroqStreamType = Union[Iterator[GroqStreamChunk], AsyncIterator[GroqStreamChunk]]
104
+
105
+
106
+ def _extract_groq_content(chunk: GroqStreamChunk) -> str:
107
+ if chunk.choices and len(chunk.choices) > 0:
108
+ delta_content = chunk.choices[0].delta.content
109
+ if delta_content:
110
+ return delta_content
111
+ return ""
112
+
113
+
114
+ def _extract_groq_tokens(usage_data: GroqUsage) -> Tuple[int, int, int, int]:
115
+ prompt_tokens = usage_data.prompt_tokens or 0
116
+ completion_tokens = usage_data.completion_tokens or 0
117
+ cache_read_input_tokens = 0
118
+ if (
119
+ hasattr(usage_data, "prompt_tokens_details")
120
+ and usage_data.prompt_tokens_details
121
+ and hasattr(usage_data.prompt_tokens_details, "cached_tokens")
122
+ and usage_data.prompt_tokens_details.cached_tokens is not None
123
+ ):
124
+ cache_read_input_tokens = usage_data.prompt_tokens_details.cached_tokens
125
+ cache_creation_input_tokens = 0 # Groq doesn't have cache creation tokens
126
+ return (
127
+ prompt_tokens,
128
+ completion_tokens,
129
+ cache_read_input_tokens,
130
+ cache_creation_input_tokens,
131
+ )
132
+
133
+
134
+ def _format_groq_output(
135
+ response: GroqChatCompletion,
136
+ ) -> Tuple[Optional[Union[str, list[dict[str, Any]]]], Optional[GroqUsage]]:
137
+ message_content: Optional[Union[str, list[dict[str, Any]]]] = None
138
+ usage_data: Optional[GroqUsage] = None
139
+
140
+ try:
141
+ if isinstance(response, GroqChatCompletion):
142
+ usage_data = response.usage
143
+ if response.choices and len(response.choices) > 0:
144
+ content = response.choices[0].message.content
145
+ if content:
146
+ # Return structured data for consistency with other providers
147
+ message_content = [{"type": "text", "text": str(content)}]
148
+ except (AttributeError, IndexError, TypeError):
149
+ pass
150
+
151
+ return message_content, usage_data
152
+
153
+
154
+ class TracedGroqGenerator:
155
+ def __init__(
156
+ self,
157
+ tracer: Tracer,
158
+ generator: Iterator[GroqStreamChunk],
159
+ client: GroqClientType,
160
+ span: Span,
161
+ model_name: str,
162
+ ):
163
+ self.tracer = tracer
164
+ self.generator = generator
165
+ self.client = client
166
+ self.span = span
167
+ self.model_name = model_name
168
+ self.accumulated_content = ""
169
+
170
+ def __iter__(self) -> Iterator[GroqStreamChunk]:
171
+ return self
172
+
173
+ def __next__(self) -> GroqStreamChunk:
174
+ try:
175
+ chunk = next(self.generator)
176
+ content = _extract_groq_content(chunk)
177
+ if content:
178
+ self.accumulated_content += content
179
+ if chunk.usage:
180
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
181
+ _extract_groq_tokens(chunk.usage)
182
+ )
183
+ set_span_attribute(
184
+ self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
185
+ )
186
+ set_span_attribute(
187
+ self.span,
188
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
189
+ completion_tokens,
190
+ )
191
+ set_span_attribute(
192
+ self.span,
193
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
194
+ cache_read,
195
+ )
196
+ set_span_attribute(
197
+ self.span,
198
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
199
+ safe_serialize(chunk.usage),
200
+ )
201
+ return chunk
202
+ except StopIteration:
203
+ set_span_attribute(
204
+ self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
205
+ )
206
+ self.span.end()
207
+ raise
208
+ except Exception as e:
209
+ if self.span:
210
+ self.span.record_exception(e)
211
+ self.span.end()
212
+ raise
213
+
214
+
215
+ class TracedGroqAsyncGenerator:
216
+ def __init__(
217
+ self,
218
+ tracer: Tracer,
219
+ async_generator: AsyncIterator[GroqStreamChunk],
220
+ client: GroqClientType,
221
+ span: Span,
222
+ model_name: str,
223
+ ):
224
+ self.tracer = tracer
225
+ self.async_generator = async_generator
226
+ self.client = client
227
+ self.span = span
228
+ self.model_name = model_name
229
+ self.accumulated_content = ""
230
+
231
+ def __aiter__(self) -> AsyncIterator[GroqStreamChunk]:
232
+ return self
233
+
234
+ async def __anext__(self) -> GroqStreamChunk:
235
+ try:
236
+ chunk = await self.async_generator.__anext__()
237
+ content = _extract_groq_content(chunk)
238
+ if content:
239
+ self.accumulated_content += content
240
+ if chunk.usage:
241
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
242
+ _extract_groq_tokens(chunk.usage)
243
+ )
244
+ set_span_attribute(
245
+ self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
246
+ )
247
+ set_span_attribute(
248
+ self.span,
249
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
250
+ completion_tokens,
251
+ )
252
+ set_span_attribute(
253
+ self.span,
254
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
255
+ cache_read,
256
+ )
257
+ set_span_attribute(
258
+ self.span,
259
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
260
+ safe_serialize(chunk.usage),
261
+ )
262
+ return chunk
263
+ except StopAsyncIteration:
264
+ set_span_attribute(
265
+ self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
266
+ )
267
+ self.span.end()
268
+ raise
269
+ except Exception as e:
270
+ if self.span:
271
+ self.span.record_exception(e)
272
+ self.span.end()
273
+ raise
274
+
275
+
276
+ def wrap_groq_client(tracer: Tracer, client: GroqClientType) -> GroqClientType:
277
+ def wrapped(function: Callable, span_name: str):
278
+ @functools.wraps(function)
279
+ def wrapper(*args, **kwargs):
280
+ if kwargs.get("stream", False):
281
+ span = tracer.get_tracer().start_span(
282
+ span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
283
+ )
284
+ tracer.add_agent_attributes_to_span(span)
285
+ set_span_attribute(
286
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
287
+ )
288
+ model_name = kwargs.get("model", "")
289
+ set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
290
+ stream_response = function(*args, **kwargs)
291
+ return TracedGroqGenerator(
292
+ tracer, stream_response, client, span, model_name
293
+ )
294
+ else:
295
+ with sync_span_context(
296
+ tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
297
+ ) as span:
298
+ tracer.add_agent_attributes_to_span(span)
299
+ set_span_attribute(
300
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
301
+ )
302
+ model_name = kwargs.get("model", "")
303
+ # Add groq/ prefix for server-side cost calculation
304
+ prefixed_model_name = f"groq/{model_name}" if model_name else ""
305
+ set_span_attribute(
306
+ span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
307
+ )
308
+ response = function(*args, **kwargs)
309
+
310
+ if isinstance(response, GroqChatCompletion):
311
+ output, usage_data = _format_groq_output(response)
312
+ # Serialize structured data to JSON for span attribute
313
+ if output:
314
+ if isinstance(output, list):
315
+ import orjson
316
+
317
+ output_str = orjson.dumps(
318
+ output, option=orjson.OPT_INDENT_2
319
+ ).decode()
320
+ else:
321
+ output_str = str(output)
322
+ set_span_attribute(
323
+ span, AttributeKeys.GEN_AI_COMPLETION, output_str
324
+ )
325
+ if usage_data:
326
+ (
327
+ prompt_tokens,
328
+ completion_tokens,
329
+ cache_read,
330
+ cache_creation,
331
+ ) = _extract_groq_tokens(usage_data)
332
+ set_span_attribute(
333
+ span,
334
+ AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
335
+ prompt_tokens,
336
+ )
337
+ set_span_attribute(
338
+ span,
339
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
340
+ completion_tokens,
341
+ )
342
+ set_span_attribute(
343
+ span,
344
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
345
+ cache_read,
346
+ )
347
+ set_span_attribute(
348
+ span,
349
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
350
+ safe_serialize(usage_data),
351
+ )
352
+ # Add groq/ prefix to response model for server-side cost calculation
353
+ response_model = getattr(response, "model", model_name)
354
+ prefixed_response_model = (
355
+ f"groq/{response_model}" if response_model else ""
356
+ )
357
+ set_span_attribute(
358
+ span,
359
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
360
+ prefixed_response_model,
361
+ )
362
+ return response
363
+
364
+ return wrapper
365
+
366
+ def wrapped_async(function: Callable, span_name: str):
367
+ @functools.wraps(function)
368
+ async def wrapper(*args, **kwargs):
369
+ if kwargs.get("stream", False):
370
+ span = tracer.get_tracer().start_span(
371
+ span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
372
+ )
373
+ tracer.add_agent_attributes_to_span(span)
374
+ set_span_attribute(
375
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
376
+ )
377
+ model_name = kwargs.get("model", "")
378
+ # Add groq/ prefix for server-side cost calculation
379
+ prefixed_model_name = f"groq/{model_name}" if model_name else ""
380
+ set_span_attribute(
381
+ span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
382
+ )
383
+ stream_response = await function(*args, **kwargs)
384
+ return TracedGroqAsyncGenerator(
385
+ tracer, stream_response, client, span, model_name
386
+ )
387
+ else:
388
+ async with async_span_context(
389
+ tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
390
+ ) as span:
391
+ tracer.add_agent_attributes_to_span(span)
392
+ set_span_attribute(
393
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
394
+ )
395
+ model_name = kwargs.get("model", "")
396
+ # Add groq/ prefix for server-side cost calculation
397
+ prefixed_model_name = f"groq/{model_name}" if model_name else ""
398
+ set_span_attribute(
399
+ span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
400
+ )
401
+ response = await function(*args, **kwargs)
402
+
403
+ if isinstance(response, GroqChatCompletion):
404
+ output, usage_data = _format_groq_output(response)
405
+ # Serialize structured data to JSON for span attribute
406
+ if output:
407
+ if isinstance(output, list):
408
+ import orjson
409
+
410
+ output_str = orjson.dumps(
411
+ output, option=orjson.OPT_INDENT_2
412
+ ).decode()
413
+ else:
414
+ output_str = str(output)
415
+ set_span_attribute(
416
+ span, AttributeKeys.GEN_AI_COMPLETION, output_str
417
+ )
418
+ if usage_data:
419
+ (
420
+ prompt_tokens,
421
+ completion_tokens,
422
+ cache_read,
423
+ cache_creation,
424
+ ) = _extract_groq_tokens(usage_data)
425
+ set_span_attribute(
426
+ span,
427
+ AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
428
+ prompt_tokens,
429
+ )
430
+ set_span_attribute(
431
+ span,
432
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
433
+ completion_tokens,
434
+ )
435
+ set_span_attribute(
436
+ span,
437
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
438
+ cache_read,
439
+ )
440
+ set_span_attribute(
441
+ span,
442
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
443
+ safe_serialize(usage_data),
444
+ )
445
+ # Add groq/ prefix to response model for server-side cost calculation
446
+ response_model = getattr(response, "model", model_name)
447
+ prefixed_response_model = (
448
+ f"groq/{response_model}" if response_model else ""
449
+ )
450
+ set_span_attribute(
451
+ span,
452
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
453
+ prefixed_response_model,
454
+ )
455
+ return response
456
+
457
+ return wrapper
458
+
459
+ span_name = "GROQ_API_CALL"
460
+ if groq_Groq is not None and isinstance(client, groq_Groq):
461
+ # Type narrowing for mypy
462
+ groq_client = client # type: ignore[assignment]
463
+ setattr(
464
+ groq_client.chat.completions,
465
+ "create",
466
+ wrapped(groq_client.chat.completions.create, span_name),
467
+ )
468
+ elif groq_AsyncGroq is not None and isinstance(client, groq_AsyncGroq):
469
+ # Type narrowing for mypy
470
+ async_groq_client = client # type: ignore[assignment]
471
+ setattr(
472
+ async_groq_client.chat.completions,
473
+ "create",
474
+ wrapped_async(async_groq_client.chat.completions.create, span_name),
475
+ )
476
+
477
+ return client
@@ -0,0 +1,3 @@
1
+ from .wrapper import wrap_openai_client
2
+
3
+ __all__ = ["wrap_openai_client"]