judgeval 0.15.0__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/api/__init__.py +4 -18
- judgeval/api/api_types.py +18 -2
- judgeval/data/judgment_types.py +18 -2
- judgeval/logger.py +1 -1
- judgeval/tracer/__init__.py +10 -7
- judgeval/tracer/keys.py +7 -3
- judgeval/tracer/llm/__init__.py +2 -1227
- judgeval/tracer/llm/config.py +110 -0
- judgeval/tracer/llm/constants.py +10 -0
- judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +611 -0
- judgeval/tracer/llm/llm_google/__init__.py +0 -0
- judgeval/tracer/llm/llm_google/config.py +24 -0
- judgeval/tracer/llm/llm_google/wrapper.py +426 -0
- judgeval/tracer/llm/llm_groq/__init__.py +0 -0
- judgeval/tracer/llm/llm_groq/config.py +23 -0
- judgeval/tracer/llm/llm_groq/wrapper.py +477 -0
- judgeval/tracer/llm/llm_openai/__init__.py +3 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +637 -0
- judgeval/tracer/llm/llm_together/__init__.py +0 -0
- judgeval/tracer/llm/llm_together/config.py +23 -0
- judgeval/tracer/llm/llm_together/wrapper.py +478 -0
- judgeval/tracer/llm/providers.py +5 -5
- judgeval/tracer/processors/__init__.py +1 -1
- judgeval/trainer/console.py +1 -1
- judgeval/utils/decorators/__init__.py +0 -0
- judgeval/utils/decorators/dont_throw.py +21 -0
- judgeval/utils/{decorators.py → decorators/use_once.py} +0 -11
- judgeval/utils/meta.py +1 -1
- judgeval/utils/version_check.py +1 -1
- judgeval/version.py +1 -1
- judgeval-0.16.1.dist-info/METADATA +266 -0
- {judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/RECORD +38 -24
- judgeval/tracer/llm/google/__init__.py +0 -21
- judgeval/tracer/llm/groq/__init__.py +0 -20
- judgeval/tracer/llm/together/__init__.py +0 -20
- judgeval-0.15.0.dist-info/METADATA +0 -158
- /judgeval/tracer/llm/{anthropic/__init__.py → llm_anthropic/config.py} +0 -0
- /judgeval/tracer/llm/{openai/__init__.py → llm_openai/config.py} +0 -0
- {judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/WHEEL +0 -0
- {judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/entry_points.txt +0 -0
- {judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,426 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import functools
|
3
|
+
from typing import (
|
4
|
+
TYPE_CHECKING,
|
5
|
+
Callable,
|
6
|
+
Optional,
|
7
|
+
Protocol,
|
8
|
+
Tuple,
|
9
|
+
Union,
|
10
|
+
Iterator,
|
11
|
+
AsyncIterator,
|
12
|
+
Sequence,
|
13
|
+
runtime_checkable,
|
14
|
+
)
|
15
|
+
|
16
|
+
from judgeval.tracer.llm.llm_google.config import (
|
17
|
+
google_genai_Client,
|
18
|
+
google_genai_AsyncClient,
|
19
|
+
)
|
20
|
+
from judgeval.tracer.managers import sync_span_context, async_span_context
|
21
|
+
from judgeval.tracer.keys import AttributeKeys
|
22
|
+
from judgeval.tracer.utils import set_span_attribute
|
23
|
+
from judgeval.utils.serialize import safe_serialize
|
24
|
+
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
from judgeval.tracer import Tracer
|
27
|
+
from opentelemetry.trace import Span
|
28
|
+
|
29
|
+
# Keep the original client type for runtime compatibility
|
30
|
+
GoogleClientType = Union[google_genai_Client, google_genai_AsyncClient]
|
31
|
+
|
32
|
+
|
33
|
+
# Usage protocols
|
34
|
+
@runtime_checkable
|
35
|
+
class GoogleUsageMetadata(Protocol):
|
36
|
+
prompt_token_count: Optional[int]
|
37
|
+
candidates_token_count: Optional[int]
|
38
|
+
total_token_count: Optional[int]
|
39
|
+
cached_content_token_count: Optional[int]
|
40
|
+
|
41
|
+
|
42
|
+
# Content protocols
|
43
|
+
@runtime_checkable
|
44
|
+
class GooglePart(Protocol):
|
45
|
+
text: str
|
46
|
+
|
47
|
+
|
48
|
+
@runtime_checkable
|
49
|
+
class GoogleContent(Protocol):
|
50
|
+
parts: Sequence[GooglePart]
|
51
|
+
|
52
|
+
|
53
|
+
@runtime_checkable
|
54
|
+
class GoogleCandidate(Protocol):
|
55
|
+
content: GoogleContent
|
56
|
+
finish_reason: Optional[str]
|
57
|
+
|
58
|
+
|
59
|
+
@runtime_checkable
|
60
|
+
class GoogleGenerateContentResponse(Protocol):
|
61
|
+
candidates: Sequence[GoogleCandidate]
|
62
|
+
usage_metadata: Optional[GoogleUsageMetadata]
|
63
|
+
model_version: Optional[str]
|
64
|
+
|
65
|
+
|
66
|
+
# Stream protocols
|
67
|
+
@runtime_checkable
|
68
|
+
class GoogleStreamChunk(Protocol):
|
69
|
+
candidates: Sequence[GoogleCandidate]
|
70
|
+
usage_metadata: Optional[GoogleUsageMetadata]
|
71
|
+
|
72
|
+
|
73
|
+
# Client protocols
|
74
|
+
@runtime_checkable
|
75
|
+
class GoogleClient(Protocol):
|
76
|
+
pass
|
77
|
+
|
78
|
+
|
79
|
+
@runtime_checkable
|
80
|
+
class GoogleAsyncClient(Protocol):
|
81
|
+
pass
|
82
|
+
|
83
|
+
|
84
|
+
# Union types
|
85
|
+
GoogleResponseType = GoogleGenerateContentResponse
|
86
|
+
GoogleStreamType = Union[Iterator[GoogleStreamChunk], AsyncIterator[GoogleStreamChunk]]
|
87
|
+
|
88
|
+
|
89
|
+
def _extract_google_content(chunk: GoogleStreamChunk) -> str:
|
90
|
+
if chunk.candidates and len(chunk.candidates) > 0:
|
91
|
+
candidate = chunk.candidates[0]
|
92
|
+
if (
|
93
|
+
candidate.content
|
94
|
+
and candidate.content.parts
|
95
|
+
and len(candidate.content.parts) > 0
|
96
|
+
):
|
97
|
+
return candidate.content.parts[0].text or ""
|
98
|
+
return ""
|
99
|
+
|
100
|
+
|
101
|
+
def _extract_google_tokens(
|
102
|
+
usage_data: GoogleUsageMetadata,
|
103
|
+
) -> Tuple[int, int, int, int]:
|
104
|
+
prompt_tokens = usage_data.prompt_token_count or 0
|
105
|
+
completion_tokens = usage_data.candidates_token_count or 0
|
106
|
+
cache_read_input_tokens = usage_data.cached_content_token_count or 0
|
107
|
+
cache_creation_input_tokens = 0 # Google GenAI doesn't have cache creation tokens
|
108
|
+
return (
|
109
|
+
prompt_tokens,
|
110
|
+
completion_tokens,
|
111
|
+
cache_read_input_tokens,
|
112
|
+
cache_creation_input_tokens,
|
113
|
+
)
|
114
|
+
|
115
|
+
|
116
|
+
def _format_google_output(
|
117
|
+
response: GoogleGenerateContentResponse,
|
118
|
+
) -> Tuple[Optional[str], Optional[GoogleUsageMetadata]]:
|
119
|
+
message_content: Optional[str] = None
|
120
|
+
usage_data: Optional[GoogleUsageMetadata] = None
|
121
|
+
|
122
|
+
try:
|
123
|
+
if isinstance(response, GoogleGenerateContentResponse):
|
124
|
+
usage_data = response.usage_metadata
|
125
|
+
if response.candidates and len(response.candidates) > 0:
|
126
|
+
candidate = response.candidates[0]
|
127
|
+
if (
|
128
|
+
candidate.content
|
129
|
+
and candidate.content.parts
|
130
|
+
and len(candidate.content.parts) > 0
|
131
|
+
):
|
132
|
+
message_content = candidate.content.parts[0].text
|
133
|
+
except (AttributeError, IndexError, TypeError):
|
134
|
+
pass
|
135
|
+
|
136
|
+
return message_content, usage_data
|
137
|
+
|
138
|
+
|
139
|
+
class TracedGoogleGenerator:
|
140
|
+
def __init__(
|
141
|
+
self,
|
142
|
+
tracer: Tracer,
|
143
|
+
generator: Iterator[GoogleStreamChunk],
|
144
|
+
client: GoogleClientType,
|
145
|
+
span: Span,
|
146
|
+
model_name: str,
|
147
|
+
):
|
148
|
+
self.tracer = tracer
|
149
|
+
self.generator = generator
|
150
|
+
self.client = client
|
151
|
+
self.span = span
|
152
|
+
self.model_name = model_name
|
153
|
+
self.accumulated_content = ""
|
154
|
+
|
155
|
+
def __iter__(self) -> Iterator[GoogleStreamChunk]:
|
156
|
+
return self
|
157
|
+
|
158
|
+
def __next__(self) -> GoogleStreamChunk:
|
159
|
+
try:
|
160
|
+
chunk = next(self.generator)
|
161
|
+
content = _extract_google_content(chunk)
|
162
|
+
if content:
|
163
|
+
self.accumulated_content += content
|
164
|
+
if chunk.usage_metadata:
|
165
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
166
|
+
_extract_google_tokens(chunk.usage_metadata)
|
167
|
+
)
|
168
|
+
set_span_attribute(
|
169
|
+
self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
|
170
|
+
)
|
171
|
+
set_span_attribute(
|
172
|
+
self.span,
|
173
|
+
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
174
|
+
completion_tokens,
|
175
|
+
)
|
176
|
+
set_span_attribute(
|
177
|
+
self.span,
|
178
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
179
|
+
cache_read,
|
180
|
+
)
|
181
|
+
set_span_attribute(
|
182
|
+
self.span,
|
183
|
+
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
184
|
+
safe_serialize(chunk.usage_metadata),
|
185
|
+
)
|
186
|
+
return chunk
|
187
|
+
except StopIteration:
|
188
|
+
set_span_attribute(
|
189
|
+
self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
|
190
|
+
)
|
191
|
+
self.span.end()
|
192
|
+
raise
|
193
|
+
except Exception as e:
|
194
|
+
if self.span:
|
195
|
+
self.span.record_exception(e)
|
196
|
+
self.span.end()
|
197
|
+
raise
|
198
|
+
|
199
|
+
|
200
|
+
class TracedGoogleAsyncGenerator:
|
201
|
+
def __init__(
|
202
|
+
self,
|
203
|
+
tracer: Tracer,
|
204
|
+
async_generator: AsyncIterator[GoogleStreamChunk],
|
205
|
+
client: GoogleClientType,
|
206
|
+
span: Span,
|
207
|
+
model_name: str,
|
208
|
+
):
|
209
|
+
self.tracer = tracer
|
210
|
+
self.async_generator = async_generator
|
211
|
+
self.client = client
|
212
|
+
self.span = span
|
213
|
+
self.model_name = model_name
|
214
|
+
self.accumulated_content = ""
|
215
|
+
|
216
|
+
def __aiter__(self) -> AsyncIterator[GoogleStreamChunk]:
|
217
|
+
return self
|
218
|
+
|
219
|
+
async def __anext__(self) -> GoogleStreamChunk:
|
220
|
+
try:
|
221
|
+
chunk = await self.async_generator.__anext__()
|
222
|
+
content = _extract_google_content(chunk)
|
223
|
+
if content:
|
224
|
+
self.accumulated_content += content
|
225
|
+
if chunk.usage_metadata:
|
226
|
+
prompt_tokens, completion_tokens, cache_read, cache_creation = (
|
227
|
+
_extract_google_tokens(chunk.usage_metadata)
|
228
|
+
)
|
229
|
+
set_span_attribute(
|
230
|
+
self.span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
|
231
|
+
)
|
232
|
+
set_span_attribute(
|
233
|
+
self.span,
|
234
|
+
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
235
|
+
completion_tokens,
|
236
|
+
)
|
237
|
+
set_span_attribute(
|
238
|
+
self.span,
|
239
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
240
|
+
cache_read,
|
241
|
+
)
|
242
|
+
set_span_attribute(
|
243
|
+
self.span,
|
244
|
+
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
245
|
+
safe_serialize(chunk.usage_metadata),
|
246
|
+
)
|
247
|
+
return chunk
|
248
|
+
except StopAsyncIteration:
|
249
|
+
set_span_attribute(
|
250
|
+
self.span, AttributeKeys.GEN_AI_COMPLETION, self.accumulated_content
|
251
|
+
)
|
252
|
+
self.span.end()
|
253
|
+
raise
|
254
|
+
except Exception as e:
|
255
|
+
if self.span:
|
256
|
+
self.span.record_exception(e)
|
257
|
+
self.span.end()
|
258
|
+
raise
|
259
|
+
|
260
|
+
|
261
|
+
def wrap_google_client(tracer: Tracer, client: GoogleClientType) -> GoogleClientType:
|
262
|
+
def wrapped(function: Callable, span_name: str):
|
263
|
+
@functools.wraps(function)
|
264
|
+
def wrapper(*args, **kwargs):
|
265
|
+
if kwargs.get("stream", False):
|
266
|
+
span = tracer.get_tracer().start_span(
|
267
|
+
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
268
|
+
)
|
269
|
+
tracer.add_agent_attributes_to_span(span)
|
270
|
+
set_span_attribute(
|
271
|
+
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
272
|
+
)
|
273
|
+
model_name = kwargs.get("model", "")
|
274
|
+
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
275
|
+
stream_response = function(*args, **kwargs)
|
276
|
+
return TracedGoogleGenerator(
|
277
|
+
tracer, stream_response, client, span, model_name
|
278
|
+
)
|
279
|
+
else:
|
280
|
+
with sync_span_context(
|
281
|
+
tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
282
|
+
) as span:
|
283
|
+
tracer.add_agent_attributes_to_span(span)
|
284
|
+
set_span_attribute(
|
285
|
+
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
286
|
+
)
|
287
|
+
model_name = kwargs.get("model", "")
|
288
|
+
set_span_attribute(
|
289
|
+
span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
|
290
|
+
)
|
291
|
+
response = function(*args, **kwargs)
|
292
|
+
|
293
|
+
if isinstance(response, GoogleGenerateContentResponse):
|
294
|
+
output, usage_data = _format_google_output(response)
|
295
|
+
set_span_attribute(
|
296
|
+
span, AttributeKeys.GEN_AI_COMPLETION, output
|
297
|
+
)
|
298
|
+
if usage_data:
|
299
|
+
(
|
300
|
+
prompt_tokens,
|
301
|
+
completion_tokens,
|
302
|
+
cache_read,
|
303
|
+
cache_creation,
|
304
|
+
) = _extract_google_tokens(usage_data)
|
305
|
+
set_span_attribute(
|
306
|
+
span,
|
307
|
+
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
308
|
+
prompt_tokens,
|
309
|
+
)
|
310
|
+
set_span_attribute(
|
311
|
+
span,
|
312
|
+
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
313
|
+
completion_tokens,
|
314
|
+
)
|
315
|
+
set_span_attribute(
|
316
|
+
span,
|
317
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
318
|
+
cache_read,
|
319
|
+
)
|
320
|
+
set_span_attribute(
|
321
|
+
span,
|
322
|
+
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
323
|
+
safe_serialize(usage_data),
|
324
|
+
)
|
325
|
+
set_span_attribute(
|
326
|
+
span,
|
327
|
+
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
328
|
+
getattr(response, "model_version", model_name),
|
329
|
+
)
|
330
|
+
return response
|
331
|
+
|
332
|
+
return wrapper
|
333
|
+
|
334
|
+
def wrapped_async(function: Callable, span_name: str):
|
335
|
+
@functools.wraps(function)
|
336
|
+
async def wrapper(*args, **kwargs):
|
337
|
+
if kwargs.get("stream", False):
|
338
|
+
span = tracer.get_tracer().start_span(
|
339
|
+
span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
340
|
+
)
|
341
|
+
tracer.add_agent_attributes_to_span(span)
|
342
|
+
set_span_attribute(
|
343
|
+
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
344
|
+
)
|
345
|
+
model_name = kwargs.get("model", "")
|
346
|
+
set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
|
347
|
+
stream_response = await function(*args, **kwargs)
|
348
|
+
return TracedGoogleAsyncGenerator(
|
349
|
+
tracer, stream_response, client, span, model_name
|
350
|
+
)
|
351
|
+
else:
|
352
|
+
async with async_span_context(
|
353
|
+
tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
|
354
|
+
) as span:
|
355
|
+
tracer.add_agent_attributes_to_span(span)
|
356
|
+
set_span_attribute(
|
357
|
+
span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
|
358
|
+
)
|
359
|
+
model_name = kwargs.get("model", "")
|
360
|
+
set_span_attribute(
|
361
|
+
span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
|
362
|
+
)
|
363
|
+
response = await function(*args, **kwargs)
|
364
|
+
|
365
|
+
if isinstance(response, GoogleGenerateContentResponse):
|
366
|
+
output, usage_data = _format_google_output(response)
|
367
|
+
set_span_attribute(
|
368
|
+
span, AttributeKeys.GEN_AI_COMPLETION, output
|
369
|
+
)
|
370
|
+
if usage_data:
|
371
|
+
(
|
372
|
+
prompt_tokens,
|
373
|
+
completion_tokens,
|
374
|
+
cache_read,
|
375
|
+
cache_creation,
|
376
|
+
) = _extract_google_tokens(usage_data)
|
377
|
+
set_span_attribute(
|
378
|
+
span,
|
379
|
+
AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
|
380
|
+
prompt_tokens,
|
381
|
+
)
|
382
|
+
set_span_attribute(
|
383
|
+
span,
|
384
|
+
AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
|
385
|
+
completion_tokens,
|
386
|
+
)
|
387
|
+
set_span_attribute(
|
388
|
+
span,
|
389
|
+
AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
|
390
|
+
cache_read,
|
391
|
+
)
|
392
|
+
set_span_attribute(
|
393
|
+
span,
|
394
|
+
AttributeKeys.JUDGMENT_USAGE_METADATA,
|
395
|
+
safe_serialize(usage_data),
|
396
|
+
)
|
397
|
+
set_span_attribute(
|
398
|
+
span,
|
399
|
+
AttributeKeys.GEN_AI_RESPONSE_MODEL,
|
400
|
+
getattr(response, "model_version", model_name),
|
401
|
+
)
|
402
|
+
return response
|
403
|
+
|
404
|
+
return wrapper
|
405
|
+
|
406
|
+
span_name = "GOOGLE_API_CALL"
|
407
|
+
if google_genai_Client is not None and isinstance(client, google_genai_Client):
|
408
|
+
# Type narrowing for mypy
|
409
|
+
google_client = client # type: ignore[assignment]
|
410
|
+
setattr(
|
411
|
+
google_client.models,
|
412
|
+
"generate_content",
|
413
|
+
wrapped(google_client.models.generate_content, span_name),
|
414
|
+
)
|
415
|
+
elif google_genai_AsyncClient is not None and isinstance(
|
416
|
+
client, google_genai_AsyncClient
|
417
|
+
):
|
418
|
+
# Type narrowing for mypy
|
419
|
+
async_google_client = client # type: ignore[assignment]
|
420
|
+
setattr(
|
421
|
+
async_google_client.models,
|
422
|
+
"generate_content",
|
423
|
+
wrapped_async(async_google_client.models.generate_content, span_name),
|
424
|
+
)
|
425
|
+
|
426
|
+
return client
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from typing import TYPE_CHECKING
|
3
|
+
|
4
|
+
if TYPE_CHECKING:
|
5
|
+
from groq import Groq, AsyncGroq
|
6
|
+
|
7
|
+
try:
|
8
|
+
from groq import Groq, AsyncGroq
|
9
|
+
|
10
|
+
HAS_GROQ = True
|
11
|
+
except ImportError:
|
12
|
+
HAS_GROQ = False
|
13
|
+
Groq = AsyncGroq = None # type: ignore[misc,assignment]
|
14
|
+
|
15
|
+
# Export the classes for runtime use
|
16
|
+
groq_Groq = Groq
|
17
|
+
groq_AsyncGroq = AsyncGroq
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
"HAS_GROQ",
|
21
|
+
"groq_Groq",
|
22
|
+
"groq_AsyncGroq",
|
23
|
+
]
|