judgeval 0.16.7__py3-none-any.whl → 0.16.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/api/api_types.py +1 -2
- judgeval/data/judgment_types.py +1 -2
- judgeval/tracer/__init__.py +7 -52
- judgeval/tracer/llm/config.py +12 -44
- judgeval/tracer/llm/constants.py +0 -1
- judgeval/tracer/llm/llm_anthropic/config.py +3 -17
- judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
- judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval/tracer/llm/llm_google/config.py +3 -21
- judgeval/tracer/llm/llm_google/generate_content.py +125 -0
- judgeval/tracer/llm/llm_google/wrapper.py +19 -454
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
- judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
- judgeval/tracer/llm/llm_openai/config.py +3 -29
- judgeval/tracer/llm/llm_openai/responses.py +444 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
- judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
- judgeval/tracer/llm/llm_together/config.py +3 -20
- judgeval/tracer/llm/llm_together/wrapper.py +34 -485
- judgeval/tracer/llm/providers.py +4 -48
- judgeval/utils/decorators/dont_throw.py +30 -14
- judgeval/utils/wrappers/README.md +3 -0
- judgeval/utils/wrappers/__init__.py +15 -0
- judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval/utils/wrappers/utils.py +35 -0
- judgeval/version.py +1 -1
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/METADATA +1 -1
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/RECORD +40 -27
- judgeval/tracer/llm/llm_groq/config.py +0 -23
- judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
- judgeval/tracer/local_eval_queue.py +0 -199
- /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/WHEEL +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/entry_points.txt +0 -0
- {judgeval-0.16.7.dist-info → judgeval-0.16.9.dist-info}/licenses/LICENSE.md +0 -0
judgeval/api/api_types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-15T19:25:00+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
|
@@ -94,7 +94,6 @@ class ResolveProjectNameRequest(TypedDict):
|
|
|
94
94
|
|
|
95
95
|
class ResolveProjectNameResponse(TypedDict):
|
|
96
96
|
project_id: str
|
|
97
|
-
project_created: bool
|
|
98
97
|
|
|
99
98
|
|
|
100
99
|
class TraceIdRequest(TypedDict):
|
judgeval/data/judgment_types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-15T19:24:59+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Annotated, Any, Dict, List, Optional, Union
|
|
@@ -101,7 +101,6 @@ class ResolveProjectNameRequest(BaseModel):
|
|
|
101
101
|
|
|
102
102
|
class ResolveProjectNameResponse(BaseModel):
|
|
103
103
|
project_id: Annotated[str, Field(title="Project Id")]
|
|
104
|
-
project_created: Annotated[bool, Field(title="Project Created")]
|
|
105
104
|
|
|
106
105
|
|
|
107
106
|
class TraceIdRequest(BaseModel):
|
judgeval/tracer/__init__.py
CHANGED
|
@@ -66,7 +66,6 @@ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
|
|
|
66
66
|
from judgeval.api import JudgmentSyncClient
|
|
67
67
|
from judgeval.tracer.llm import wrap_provider
|
|
68
68
|
from judgeval.utils.url import url_for
|
|
69
|
-
from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
|
|
70
69
|
from judgeval.tracer.processors import (
|
|
71
70
|
JudgmentSpanProcessor,
|
|
72
71
|
NoOpJudgmentSpanProcessor,
|
|
@@ -99,7 +98,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
99
98
|
"enable_evaluation",
|
|
100
99
|
"resource_attributes",
|
|
101
100
|
"api_client",
|
|
102
|
-
"local_eval_queue",
|
|
103
101
|
"judgment_processor",
|
|
104
102
|
"tracer",
|
|
105
103
|
"agent_context",
|
|
@@ -113,7 +111,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
113
111
|
enable_evaluation: bool
|
|
114
112
|
resource_attributes: Optional[Dict[str, Any]]
|
|
115
113
|
api_client: JudgmentSyncClient
|
|
116
|
-
local_eval_queue: LocalEvaluationQueue
|
|
117
114
|
judgment_processor: JudgmentSpanProcessor
|
|
118
115
|
tracer: ABCTracer
|
|
119
116
|
agent_context: ContextVar[Optional[AgentContext]]
|
|
@@ -148,7 +145,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
148
145
|
api_key=self.api_key,
|
|
149
146
|
organization_id=self.organization_id,
|
|
150
147
|
)
|
|
151
|
-
self.local_eval_queue = LocalEvaluationQueue()
|
|
152
148
|
|
|
153
149
|
if initialize:
|
|
154
150
|
self.initialize()
|
|
@@ -159,14 +155,10 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
159
155
|
|
|
160
156
|
self.judgment_processor = NoOpJudgmentSpanProcessor()
|
|
161
157
|
if self.enable_monitoring:
|
|
162
|
-
project_id
|
|
158
|
+
project_id = Tracer._resolve_project_id(
|
|
163
159
|
self.project_name, self.api_key, self.organization_id
|
|
164
|
-
)
|
|
160
|
+
)
|
|
165
161
|
if project_id:
|
|
166
|
-
if project_created:
|
|
167
|
-
judgeval_logger.info(
|
|
168
|
-
f"Project {self.project_name} was autocreated successfully."
|
|
169
|
-
)
|
|
170
162
|
self.judgment_processor = self.get_processor(
|
|
171
163
|
tracer=self,
|
|
172
164
|
project_name=self.project_name,
|
|
@@ -190,9 +182,6 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
190
182
|
get_version(),
|
|
191
183
|
)
|
|
192
184
|
|
|
193
|
-
if self.enable_evaluation and self.enable_monitoring:
|
|
194
|
-
self.local_eval_queue.start_workers()
|
|
195
|
-
|
|
196
185
|
self._initialized = True
|
|
197
186
|
atexit.register(self._atexit_flush)
|
|
198
187
|
return self
|
|
@@ -240,14 +229,14 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
240
229
|
@staticmethod
|
|
241
230
|
def _resolve_project_id(
|
|
242
231
|
project_name: str, api_key: str, organization_id: str
|
|
243
|
-
) ->
|
|
232
|
+
) -> str:
|
|
244
233
|
"""Resolve project_id from project_name using the API."""
|
|
245
234
|
client = JudgmentSyncClient(
|
|
246
235
|
api_key=api_key,
|
|
247
236
|
organization_id=organization_id,
|
|
248
237
|
)
|
|
249
238
|
response = client.projects_resolve({"project_name": project_name})
|
|
250
|
-
return response["project_id"]
|
|
239
|
+
return response["project_id"]
|
|
251
240
|
|
|
252
241
|
def get_current_span(self):
|
|
253
242
|
return get_current_span()
|
|
@@ -299,6 +288,7 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
299
288
|
)
|
|
300
289
|
current_agent_context["is_agent_entry_point"] = False
|
|
301
290
|
|
|
291
|
+
@dont_throw
|
|
302
292
|
def record_instance_state(self, record_point: Literal["before", "after"], span):
|
|
303
293
|
current_agent_context = self.agent_context.get()
|
|
304
294
|
|
|
@@ -955,45 +945,10 @@ class Tracer(metaclass=SingletonMeta):
|
|
|
955
945
|
eval_run.model_dump(warnings=False) # type: ignore
|
|
956
946
|
)
|
|
957
947
|
else:
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
def wait_for_completion(self, timeout: Optional[float] = 30.0) -> bool:
|
|
962
|
-
"""Wait for all evaluations and span processing to complete.
|
|
963
|
-
|
|
964
|
-
This method blocks until all queued evaluations are processed and
|
|
965
|
-
all pending spans are flushed to the server.
|
|
966
|
-
|
|
967
|
-
Args:
|
|
968
|
-
timeout: Maximum time to wait in seconds. Defaults to 30 seconds.
|
|
969
|
-
None means wait indefinitely.
|
|
970
|
-
|
|
971
|
-
Returns:
|
|
972
|
-
True if all processing completed within the timeout, False otherwise.
|
|
973
|
-
|
|
974
|
-
"""
|
|
975
|
-
try:
|
|
976
|
-
judgeval_logger.debug(
|
|
977
|
-
"Waiting for all evaluations and spans to complete..."
|
|
948
|
+
judgeval_logger.warning(
|
|
949
|
+
"The scorer provided is not hosted, skipping evaluation."
|
|
978
950
|
)
|
|
979
951
|
|
|
980
|
-
# Wait for all queued evaluation work to complete
|
|
981
|
-
eval_completed = self.local_eval_queue.wait_for_completion()
|
|
982
|
-
if not eval_completed:
|
|
983
|
-
judgeval_logger.warning(
|
|
984
|
-
f"Local evaluation queue did not complete within {timeout} seconds"
|
|
985
|
-
)
|
|
986
|
-
return False
|
|
987
|
-
|
|
988
|
-
self.force_flush()
|
|
989
|
-
|
|
990
|
-
judgeval_logger.debug("All evaluations and spans completed successfully")
|
|
991
|
-
return True
|
|
992
|
-
|
|
993
|
-
except Exception as e:
|
|
994
|
-
judgeval_logger.warning(f"Error while waiting for completion: {e}")
|
|
995
|
-
return False
|
|
996
|
-
|
|
997
952
|
|
|
998
953
|
def wrap(client: ApiClient) -> ApiClient:
|
|
999
954
|
try:
|
judgeval/tracer/llm/config.py
CHANGED
|
@@ -8,7 +8,6 @@ from judgeval.tracer.llm.providers import (
|
|
|
8
8
|
HAS_TOGETHER,
|
|
9
9
|
HAS_ANTHROPIC,
|
|
10
10
|
HAS_GOOGLE_GENAI,
|
|
11
|
-
HAS_GROQ,
|
|
12
11
|
ApiClient,
|
|
13
12
|
)
|
|
14
13
|
|
|
@@ -18,55 +17,28 @@ if TYPE_CHECKING:
|
|
|
18
17
|
|
|
19
18
|
def _detect_provider(client: ApiClient) -> ProviderType:
|
|
20
19
|
if HAS_OPENAI:
|
|
21
|
-
from
|
|
20
|
+
from openai import OpenAI, AsyncOpenAI
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
assert openai_AsyncOpenAI is not None, "OpenAI async client not found"
|
|
25
|
-
if isinstance(client, (openai_OpenAI, openai_AsyncOpenAI)):
|
|
22
|
+
if isinstance(client, (OpenAI, AsyncOpenAI)):
|
|
26
23
|
return ProviderType.OPENAI
|
|
27
24
|
|
|
28
25
|
if HAS_ANTHROPIC:
|
|
29
|
-
from
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
assert anthropic_Anthropic is not None, "Anthropic client not found"
|
|
35
|
-
assert anthropic_AsyncAnthropic is not None, "Anthropic async client not found"
|
|
36
|
-
if isinstance(client, (anthropic_Anthropic, anthropic_AsyncAnthropic)):
|
|
26
|
+
from anthropic import Anthropic, AsyncAnthropic
|
|
27
|
+
|
|
28
|
+
if isinstance(client, (Anthropic, AsyncAnthropic)):
|
|
37
29
|
return ProviderType.ANTHROPIC
|
|
38
30
|
|
|
39
31
|
if HAS_TOGETHER:
|
|
40
|
-
from
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
assert together_Together is not None, "Together client not found"
|
|
46
|
-
assert together_AsyncTogether is not None, "Together async client not found"
|
|
47
|
-
if isinstance(client, (together_Together, together_AsyncTogether)):
|
|
32
|
+
from together import Together, AsyncTogether # type: ignore[import-untyped]
|
|
33
|
+
|
|
34
|
+
if isinstance(client, (Together, AsyncTogether)):
|
|
48
35
|
return ProviderType.TOGETHER
|
|
49
36
|
|
|
50
37
|
if HAS_GOOGLE_GENAI:
|
|
51
|
-
from
|
|
52
|
-
google_genai_Client,
|
|
53
|
-
google_genai_AsyncClient,
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
assert google_genai_Client is not None, "Google GenAI client not found"
|
|
57
|
-
assert google_genai_AsyncClient is not None, (
|
|
58
|
-
"Google GenAI async client not found"
|
|
59
|
-
)
|
|
60
|
-
if isinstance(client, (google_genai_Client, google_genai_AsyncClient)):
|
|
61
|
-
return ProviderType.GOOGLE
|
|
38
|
+
from google.genai import Client as GoogleClient
|
|
62
39
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
assert groq_Groq is not None, "Groq client not found"
|
|
67
|
-
assert groq_AsyncGroq is not None, "Groq async client not found"
|
|
68
|
-
if isinstance(client, (groq_Groq, groq_AsyncGroq)):
|
|
69
|
-
return ProviderType.GROQ
|
|
40
|
+
if isinstance(client, GoogleClient):
|
|
41
|
+
return ProviderType.GOOGLE
|
|
70
42
|
|
|
71
43
|
judgeval_logger.warning(
|
|
72
44
|
f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
|
|
@@ -79,7 +51,7 @@ def _detect_provider(client: ApiClient) -> ProviderType:
|
|
|
79
51
|
def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
|
|
80
52
|
"""
|
|
81
53
|
Wraps an API client to add tracing capabilities.
|
|
82
|
-
Supports OpenAI, Together, Anthropic, Google GenAI
|
|
54
|
+
Supports OpenAI, Together, Anthropic, and Google GenAI clients.
|
|
83
55
|
"""
|
|
84
56
|
provider_type = _detect_provider(client)
|
|
85
57
|
|
|
@@ -99,10 +71,6 @@ def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
|
|
|
99
71
|
from .llm_google.wrapper import wrap_google_client
|
|
100
72
|
|
|
101
73
|
return wrap_google_client(tracer, client)
|
|
102
|
-
elif provider_type == ProviderType.GROQ:
|
|
103
|
-
from .llm_groq.wrapper import wrap_groq_client
|
|
104
|
-
|
|
105
|
-
return wrap_groq_client(tracer, client)
|
|
106
74
|
else:
|
|
107
75
|
# Default to OpenAI-compatible wrapping for unknown clients
|
|
108
76
|
from .llm_openai.wrapper import wrap_openai_client
|
judgeval/tracer/llm/constants.py
CHANGED
|
@@ -1,20 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
import importlib.util
|
|
2
3
|
|
|
3
|
-
HAS_ANTHROPIC =
|
|
4
|
-
anthropic_Anthropic = None
|
|
5
|
-
anthropic_AsyncAnthropic = None
|
|
4
|
+
HAS_ANTHROPIC = importlib.util.find_spec("anthropic") is not None
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
from anthropic import Anthropic, AsyncAnthropic # type: ignore[import-untyped]
|
|
9
|
-
|
|
10
|
-
anthropic_Anthropic = Anthropic
|
|
11
|
-
anthropic_AsyncAnthropic = AsyncAnthropic
|
|
12
|
-
HAS_ANTHROPIC = True
|
|
13
|
-
except ImportError:
|
|
14
|
-
pass
|
|
15
|
-
|
|
16
|
-
__all__ = [
|
|
17
|
-
"HAS_ANTHROPIC",
|
|
18
|
-
"anthropic_Anthropic",
|
|
19
|
-
"anthropic_AsyncAnthropic",
|
|
20
|
-
]
|
|
6
|
+
__all__ = ["HAS_ANTHROPIC"]
|