judgeval 0.16.0__py3-none-any.whl → 0.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. judgeval/api/api_types.py +2 -1
  2. judgeval/data/judgment_types.py +2 -1
  3. judgeval/logger.py +1 -1
  4. judgeval/tracer/__init__.py +10 -7
  5. judgeval/tracer/keys.py +7 -3
  6. judgeval/tracer/llm/__init__.py +2 -1259
  7. judgeval/tracer/llm/config.py +110 -0
  8. judgeval/tracer/llm/constants.py +10 -0
  9. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  10. judgeval/tracer/llm/llm_anthropic/wrapper.py +611 -0
  11. judgeval/tracer/llm/llm_google/__init__.py +0 -0
  12. judgeval/tracer/llm/llm_google/config.py +24 -0
  13. judgeval/tracer/llm/llm_google/wrapper.py +426 -0
  14. judgeval/tracer/llm/llm_groq/__init__.py +0 -0
  15. judgeval/tracer/llm/llm_groq/config.py +23 -0
  16. judgeval/tracer/llm/llm_groq/wrapper.py +477 -0
  17. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  18. judgeval/tracer/llm/llm_openai/wrapper.py +637 -0
  19. judgeval/tracer/llm/llm_together/__init__.py +0 -0
  20. judgeval/tracer/llm/llm_together/config.py +23 -0
  21. judgeval/tracer/llm/llm_together/wrapper.py +478 -0
  22. judgeval/tracer/llm/providers.py +5 -5
  23. judgeval/tracer/processors/__init__.py +1 -1
  24. judgeval/trainer/console.py +1 -1
  25. judgeval/utils/decorators/__init__.py +0 -0
  26. judgeval/utils/decorators/dont_throw.py +21 -0
  27. judgeval/utils/{decorators.py → decorators/use_once.py} +0 -11
  28. judgeval/utils/meta.py +1 -1
  29. judgeval/utils/version_check.py +1 -1
  30. judgeval/version.py +1 -1
  31. {judgeval-0.16.0.dist-info → judgeval-0.16.2.dist-info}/METADATA +1 -1
  32. {judgeval-0.16.0.dist-info → judgeval-0.16.2.dist-info}/RECORD +37 -23
  33. judgeval/tracer/llm/google/__init__.py +0 -21
  34. judgeval/tracer/llm/groq/__init__.py +0 -20
  35. judgeval/tracer/llm/together/__init__.py +0 -20
  36. /judgeval/tracer/llm/{anthropic/__init__.py → llm_anthropic/config.py} +0 -0
  37. /judgeval/tracer/llm/{openai/__init__.py → llm_openai/config.py} +0 -0
  38. {judgeval-0.16.0.dist-info → judgeval-0.16.2.dist-info}/WHEEL +0 -0
  39. {judgeval-0.16.0.dist-info → judgeval-0.16.2.dist-info}/entry_points.txt +0 -0
  40. {judgeval-0.16.0.dist-info → judgeval-0.16.2.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,426 @@
1
+ from __future__ import annotations
2
+ import functools
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Callable,
6
+ Optional,
7
+ Protocol,
8
+ Tuple,
9
+ Union,
10
+ Iterator,
11
+ AsyncIterator,
12
+ Sequence,
13
+ runtime_checkable,
14
+ )
15
+
16
+ from judgeval.tracer.llm.llm_google.config import (
17
+ google_genai_Client,
18
+ google_genai_AsyncClient,
19
+ )
20
+ from judgeval.tracer.managers import sync_span_context, async_span_context
21
+ from judgeval.tracer.keys import AttributeKeys
22
+ from judgeval.tracer.utils import set_span_attribute
23
+ from judgeval.utils.serialize import safe_serialize
24
+
25
+ if TYPE_CHECKING:
26
+ from judgeval.tracer import Tracer
27
+ from opentelemetry.trace import Span
28
+
29
+ # Keep the original client type for runtime compatibility
30
+ GoogleClientType = Union[google_genai_Client, google_genai_AsyncClient]
31
+
32
+
33
+ # Usage protocols
34
+ @runtime_checkable
35
+ class GoogleUsageMetadata(Protocol):
36
+ prompt_token_count: Optional[int]
37
+ candidates_token_count: Optional[int]
38
+ total_token_count: Optional[int]
39
+ cached_content_token_count: Optional[int]
40
+
41
+
42
+ # Content protocols
43
+ @runtime_checkable
44
+ class GooglePart(Protocol):
45
+ text: str
46
+
47
+
48
+ @runtime_checkable
49
+ class GoogleContent(Protocol):
50
+ parts: Sequence[GooglePart]
51
+
52
+
53
+ @runtime_checkable
54
+ class GoogleCandidate(Protocol):
55
+ content: GoogleContent
56
+ finish_reason: Optional[str]
57
+
58
+
59
+ @runtime_checkable
60
+ class GoogleGenerateContentResponse(Protocol):
61
+ candidates: Sequence[GoogleCandidate]
62
+ usage_metadata: Optional[GoogleUsageMetadata]
63
+ model_version: Optional[str]
64
+
65
+
66
+ # Stream protocols
67
+ @runtime_checkable
68
+ class GoogleStreamChunk(Protocol):
69
+ candidates: Sequence[GoogleCandidate]
70
+ usage_metadata: Optional[GoogleUsageMetadata]
71
+
72
+
73
+ # Client protocols
74
+ @runtime_checkable
75
+ class GoogleClient(Protocol):
76
+ pass
77
+
78
+
79
+ @runtime_checkable
80
+ class GoogleAsyncClient(Protocol):
81
+ pass
82
+
83
+
84
+ # Union types
85
+ GoogleResponseType = GoogleGenerateContentResponse
86
+ GoogleStreamType = Union[Iterator[GoogleStreamChunk], AsyncIterator[GoogleStreamChunk]]
87
+
88
+
89
+ def _extract_google_content(chunk: GoogleStreamChunk) -> str:
90
+ if chunk.candidates and len(chunk.candidates) > 0:
91
+ candidate = chunk.candidates[0]
92
+ if (
93
+ candidate.content
94
+ and candidate.content.parts
95
+ and len(candidate.content.parts) > 0
96
+ ):
97
+ return candidate.content.parts[0].text or ""
98
+ return ""
99
+
100
+
101
+ def _extract_google_tokens(
102
+ usage_data: GoogleUsageMetadata,
103
+ ) -> Tuple[int, int, int, int]:
104
+ prompt_tokens = usage_data.prompt_token_count or 0
105
+ completion_tokens = usage_data.candidates_token_count or 0
106
+ cache_read_input_tokens = usage_data.cached_content_token_count or 0
107
+ cache_creation_input_tokens = 0 # Google GenAI doesn't have cache creation tokens
108
+ return (
109
+ prompt_tokens,
110
+ completion_tokens,
111
+ cache_read_input_tokens,
112
+ cache_creation_input_tokens,
113
+ )
114
+
115
+
116
+ def _format_google_output(
117
+ response: GoogleGenerateContentResponse,
118
+ ) -> Tuple[Optional[str], Optional[GoogleUsageMetadata]]:
119
+ message_content: Optional[str] = None
120
+ usage_data: Optional[GoogleUsageMetadata] = None
121
+
122
+ try:
123
+ if isinstance(response, GoogleGenerateContentResponse):
124
+ usage_data = response.usage_metadata
125
+ if response.candidates and len(response.candidates) > 0:
126
+ candidate = response.candidates[0]
127
+ if (
128
+ candidate.content
129
+ and candidate.content.parts
130
+ and len(candidate.content.parts) > 0
131
+ ):
132
+ message_content = candidate.content.parts[0].text
133
+ except (AttributeError, IndexError, TypeError):
134
+ pass
135
+
136
+ return message_content, usage_data
137
+
138
+
139
+ class TracedGoogleGenerator:
140
+ def __init__(
141
+ self,
142
+ tracer: Tracer,
143
+ generator: Iterator[GoogleStreamChunk],
144
+ client: GoogleClientType,
145
+ span: Span,
146
+ model_name: str,
147
+ ):
148
+ self.tracer = tracer
149
+ self.generator = generator
150
+ self.client = client
151
+ self.span = span
152
+ self.model_name = model_name
153
+ self.accumulated_content = ""
154
+
155
+ def __iter__(self) -> Iterator[GoogleStreamChunk]:
156
+ return self
157
+
158
+ def __next__(self) -> GoogleStreamChunk:
159
+ try:
160
+ chunk = next(self.generator)
161
+ content = _extract_google_content(chunk)
162
+ if content:
163
+ self.accumulated_content += content
164
+ if chunk.usage_metadata:
165
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
166
+ _extract_google_tokens(chunk.usage_metadata)
167
+ )
168
+ set_span_attribute(
169
+ self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
170
+ )
171
+ set_span_attribute(
172
+ self.span,
173
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
174
+ completion_tokens,
175
+ )
176
+ set_span_attribute(
177
+ self.span,
178
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
179
+ cache_read,
180
+ )
181
+ set_span_attribute(
182
+ self.span,
183
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
184
+ safe_serialize(chunk.usage_metadata),
185
+ )
186
+ return chunk
187
+ except StopIteration:
188
+ set_span_attribute(
189
+ self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
190
+ )
191
+ self.span.end()
192
+ raise
193
+ except Exception as e:
194
+ if self.span:
195
+ self.span.record_exception(e)
196
+ self.span.end()
197
+ raise
198
+
199
+
200
+ class TracedGoogleAsyncGenerator:
201
+ def __init__(
202
+ self,
203
+ tracer: Tracer,
204
+ async_generator: AsyncIterator[GoogleStreamChunk],
205
+ client: GoogleClientType,
206
+ span: Span,
207
+ model_name: str,
208
+ ):
209
+ self.tracer = tracer
210
+ self.async_generator = async_generator
211
+ self.client = client
212
+ self.span = span
213
+ self.model_name = model_name
214
+ self.accumulated_content = ""
215
+
216
+ def __aiter__(self) -> AsyncIterator[GoogleStreamChunk]:
217
+ return self
218
+
219
+ async def __anext__(self) -> GoogleStreamChunk:
220
+ try:
221
+ chunk = await self.async_generator.__anext__()
222
+ content = _extract_google_content(chunk)
223
+ if content:
224
+ self.accumulated_content += content
225
+ if chunk.usage_metadata:
226
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
227
+ _extract_google_tokens(chunk.usage_metadata)
228
+ )
229
+ set_span_attribute(
230
+ self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
231
+ )
232
+ set_span_attribute(
233
+ self.span,
234
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
235
+ completion_tokens,
236
+ )
237
+ set_span_attribute(
238
+ self.span,
239
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
240
+ cache_read,
241
+ )
242
+ set_span_attribute(
243
+ self.span,
244
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
245
+ safe_serialize(chunk.usage_metadata),
246
+ )
247
+ return chunk
248
+ except StopAsyncIteration:
249
+ set_span_attribute(
250
+ self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
251
+ )
252
+ self.span.end()
253
+ raise
254
+ except Exception as e:
255
+ if self.span:
256
+ self.span.record_exception(e)
257
+ self.span.end()
258
+ raise
259
+
260
+
261
+ def wrap_google_client(tracer: Tracer, client: GoogleClientType) -> GoogleClientType:
262
+ def wrapped(function: Callable, span_name: str):
263
+ @functools.wraps(function)
264
+ def wrapper(*args, **kwargs):
265
+ if kwargs.get("stream", False):
266
+ span = tracer.get_tracer().start_span(
267
+ span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
268
+ )
269
+ tracer.add_agent_attributes_to_span(span)
270
+ set_span_attribute(
271
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
272
+ )
273
+ model_name = kwargs.get("model", "")
274
+ set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
275
+ stream_response = function(*args, **kwargs)
276
+ return TracedGoogleGenerator(
277
+ tracer, stream_response, client, span, model_name
278
+ )
279
+ else:
280
+ with sync_span_context(
281
+ tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
282
+ ) as span:
283
+ tracer.add_agent_attributes_to_span(span)
284
+ set_span_attribute(
285
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
286
+ )
287
+ model_name = kwargs.get("model", "")
288
+ set_span_attribute(
289
+ span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
290
+ )
291
+ response = function(*args, **kwargs)
292
+
293
+ if isinstance(response, GoogleGenerateContentResponse):
294
+ output, usage_data = _format_google_output(response)
295
+ set_span_attribute(
296
+ span, AttributeKeys.GEN_AI_COMPLETION, output
297
+ )
298
+ if usage_data:
299
+ (
300
+ prompt_tokens,
301
+ completion_tokens,
302
+ cache_read,
303
+ cache_creation,
304
+ ) = _extract_google_tokens(usage_data)
305
+ set_span_attribute(
306
+ span,
307
+ AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
308
+ prompt_tokens,
309
+ )
310
+ set_span_attribute(
311
+ span,
312
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
313
+ completion_tokens,
314
+ )
315
+ set_span_attribute(
316
+ span,
317
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
318
+ cache_read,
319
+ )
320
+ set_span_attribute(
321
+ span,
322
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
323
+ safe_serialize(usage_data),
324
+ )
325
+ set_span_attribute(
326
+ span,
327
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
328
+ getattr(response, "model_version", model_name),
329
+ )
330
+ return response
331
+
332
+ return wrapper
333
+
334
+ def wrapped_async(function: Callable, span_name: str):
335
+ @functools.wraps(function)
336
+ async def wrapper(*args, **kwargs):
337
+ if kwargs.get("stream", False):
338
+ span = tracer.get_tracer().start_span(
339
+ span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
340
+ )
341
+ tracer.add_agent_attributes_to_span(span)
342
+ set_span_attribute(
343
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
344
+ )
345
+ model_name = kwargs.get("model", "")
346
+ set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
347
+ stream_response = await function(*args, **kwargs)
348
+ return TracedGoogleAsyncGenerator(
349
+ tracer, stream_response, client, span, model_name
350
+ )
351
+ else:
352
+ async with async_span_context(
353
+ tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
354
+ ) as span:
355
+ tracer.add_agent_attributes_to_span(span)
356
+ set_span_attribute(
357
+ span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
358
+ )
359
+ model_name = kwargs.get("model", "")
360
+ set_span_attribute(
361
+ span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
362
+ )
363
+ response = await function(*args, **kwargs)
364
+
365
+ if isinstance(response, GoogleGenerateContentResponse):
366
+ output, usage_data = _format_google_output(response)
367
+ set_span_attribute(
368
+ span, AttributeKeys.GEN_AI_COMPLETION, output
369
+ )
370
+ if usage_data:
371
+ (
372
+ prompt_tokens,
373
+ completion_tokens,
374
+ cache_read,
375
+ cache_creation,
376
+ ) = _extract_google_tokens(usage_data)
377
+ set_span_attribute(
378
+ span,
379
+ AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
380
+ prompt_tokens,
381
+ )
382
+ set_span_attribute(
383
+ span,
384
+ AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
385
+ completion_tokens,
386
+ )
387
+ set_span_attribute(
388
+ span,
389
+ AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
390
+ cache_read,
391
+ )
392
+ set_span_attribute(
393
+ span,
394
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
395
+ safe_serialize(usage_data),
396
+ )
397
+ set_span_attribute(
398
+ span,
399
+ AttributeKeys.GEN_AI_RESPONSE_MODEL,
400
+ getattr(response, "model_version", model_name),
401
+ )
402
+ return response
403
+
404
+ return wrapper
405
+
406
+ span_name = "GOOGLE_API_CALL"
407
+ if google_genai_Client is not None and isinstance(client, google_genai_Client):
408
+ # Type narrowing for mypy
409
+ google_client = client # type: ignore[assignment]
410
+ setattr(
411
+ google_client.models,
412
+ "generate_content",
413
+ wrapped(google_client.models.generate_content, span_name),
414
+ )
415
+ elif google_genai_AsyncClient is not None and isinstance(
416
+ client, google_genai_AsyncClient
417
+ ):
418
+ # Type narrowing for mypy
419
+ async_google_client = client # type: ignore[assignment]
420
+ setattr(
421
+ async_google_client.models,
422
+ "generate_content",
423
+ wrapped_async(async_google_client.models.generate_content, span_name),
424
+ )
425
+
426
+ return client
File without changes
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from groq import Groq, AsyncGroq
6
+
7
+ try:
8
+ from groq import Groq, AsyncGroq
9
+
10
+ HAS_GROQ = True
11
+ except ImportError:
12
+ HAS_GROQ = False
13
+ Groq = AsyncGroq = None # type: ignore[misc,assignment]
14
+
15
+ # Export the classes for runtime use
16
+ groq_Groq = Groq
17
+ groq_AsyncGroq = AsyncGroq
18
+
19
+ __all__ = [
20
+ "HAS_GROQ",
21
+ "groq_Groq",
22
+ "groq_AsyncGroq",
23
+ ]