judgeval 0.16.7__py3-none-any.whl → 0.16.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (43) hide show
  1. judgeval/api/api_types.py +1 -2
  2. judgeval/data/judgment_types.py +1 -2
  3. judgeval/tracer/__init__.py +7 -52
  4. judgeval/tracer/llm/config.py +12 -44
  5. judgeval/tracer/llm/constants.py +0 -1
  6. judgeval/tracer/llm/llm_anthropic/config.py +3 -17
  7. judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
  8. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  9. judgeval/tracer/llm/llm_anthropic/wrapper.py +40 -621
  10. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  11. judgeval/tracer/llm/llm_google/config.py +3 -21
  12. judgeval/tracer/llm/llm_google/generate_content.py +125 -0
  13. judgeval/tracer/llm/llm_google/wrapper.py +19 -454
  14. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
  15. judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
  16. judgeval/tracer/llm/llm_openai/config.py +3 -29
  17. judgeval/tracer/llm/llm_openai/responses.py +444 -0
  18. judgeval/tracer/llm/llm_openai/wrapper.py +43 -641
  19. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  20. judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
  21. judgeval/tracer/llm/llm_together/config.py +3 -20
  22. judgeval/tracer/llm/llm_together/wrapper.py +34 -485
  23. judgeval/tracer/llm/providers.py +4 -48
  24. judgeval/utils/decorators/dont_throw.py +30 -14
  25. judgeval/utils/wrappers/README.md +3 -0
  26. judgeval/utils/wrappers/__init__.py +15 -0
  27. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  28. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  29. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  30. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  31. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  32. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  33. judgeval/utils/wrappers/utils.py +35 -0
  34. judgeval/version.py +1 -1
  35. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/METADATA +1 -1
  36. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/RECORD +40 -27
  37. judgeval/tracer/llm/llm_groq/config.py +0 -23
  38. judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
  39. judgeval/tracer/local_eval_queue.py +0 -199
  40. /judgeval/{tracer/llm/llm_groq/__init__.py → utils/wrappers/py.typed} +0 -0
  41. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/WHEEL +0 -0
  42. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/entry_points.txt +0 -0
  43. {judgeval-0.16.7.dist-info → judgeval-0.16.8.dist-info}/licenses/LICENSE.md +0 -0
judgeval/api/api_types.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-09T00:16:42+00:00
3
+ # timestamp: 2025-10-15T19:25:00+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -94,7 +94,6 @@ class ResolveProjectNameRequest(TypedDict):
94
94
 
95
95
  class ResolveProjectNameResponse(TypedDict):
96
96
  project_id: str
97
- project_created: bool
98
97
 
99
98
 
100
99
  class TraceIdRequest(TypedDict):
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-09T00:16:41+00:00
3
+ # timestamp: 2025-10-15T19:24:59+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Annotated, Any, Dict, List, Optional, Union
@@ -101,7 +101,6 @@ class ResolveProjectNameRequest(BaseModel):
101
101
 
102
102
  class ResolveProjectNameResponse(BaseModel):
103
103
  project_id: Annotated[str, Field(title="Project Id")]
104
- project_created: Annotated[bool, Field(title="Project Created")]
105
104
 
106
105
 
107
106
  class TraceIdRequest(BaseModel):
@@ -66,7 +66,6 @@ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
66
66
  from judgeval.api import JudgmentSyncClient
67
67
  from judgeval.tracer.llm import wrap_provider
68
68
  from judgeval.utils.url import url_for
69
- from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
70
69
  from judgeval.tracer.processors import (
71
70
  JudgmentSpanProcessor,
72
71
  NoOpJudgmentSpanProcessor,
@@ -99,7 +98,6 @@ class Tracer(metaclass=SingletonMeta):
99
98
  "enable_evaluation",
100
99
  "resource_attributes",
101
100
  "api_client",
102
- "local_eval_queue",
103
101
  "judgment_processor",
104
102
  "tracer",
105
103
  "agent_context",
@@ -113,7 +111,6 @@ class Tracer(metaclass=SingletonMeta):
113
111
  enable_evaluation: bool
114
112
  resource_attributes: Optional[Dict[str, Any]]
115
113
  api_client: JudgmentSyncClient
116
- local_eval_queue: LocalEvaluationQueue
117
114
  judgment_processor: JudgmentSpanProcessor
118
115
  tracer: ABCTracer
119
116
  agent_context: ContextVar[Optional[AgentContext]]
@@ -148,7 +145,6 @@ class Tracer(metaclass=SingletonMeta):
148
145
  api_key=self.api_key,
149
146
  organization_id=self.organization_id,
150
147
  )
151
- self.local_eval_queue = LocalEvaluationQueue()
152
148
 
153
149
  if initialize:
154
150
  self.initialize()
@@ -159,14 +155,10 @@ class Tracer(metaclass=SingletonMeta):
159
155
 
160
156
  self.judgment_processor = NoOpJudgmentSpanProcessor()
161
157
  if self.enable_monitoring:
162
- project_id, project_created = Tracer._resolve_project_id(
158
+ project_id = Tracer._resolve_project_id(
163
159
  self.project_name, self.api_key, self.organization_id
164
- ) or (None, False)
160
+ )
165
161
  if project_id:
166
- if project_created:
167
- judgeval_logger.info(
168
- f"Project {self.project_name} was autocreated successfully."
169
- )
170
162
  self.judgment_processor = self.get_processor(
171
163
  tracer=self,
172
164
  project_name=self.project_name,
@@ -190,9 +182,6 @@ class Tracer(metaclass=SingletonMeta):
190
182
  get_version(),
191
183
  )
192
184
 
193
- if self.enable_evaluation and self.enable_monitoring:
194
- self.local_eval_queue.start_workers()
195
-
196
185
  self._initialized = True
197
186
  atexit.register(self._atexit_flush)
198
187
  return self
@@ -240,14 +229,14 @@ class Tracer(metaclass=SingletonMeta):
240
229
  @staticmethod
241
230
  def _resolve_project_id(
242
231
  project_name: str, api_key: str, organization_id: str
243
- ) -> Tuple[str, bool]:
232
+ ) -> str:
244
233
  """Resolve project_id from project_name using the API."""
245
234
  client = JudgmentSyncClient(
246
235
  api_key=api_key,
247
236
  organization_id=organization_id,
248
237
  )
249
238
  response = client.projects_resolve({"project_name": project_name})
250
- return response["project_id"], response["project_created"]
239
+ return response["project_id"]
251
240
 
252
241
  def get_current_span(self):
253
242
  return get_current_span()
@@ -299,6 +288,7 @@ class Tracer(metaclass=SingletonMeta):
299
288
  )
300
289
  current_agent_context["is_agent_entry_point"] = False
301
290
 
291
+ @dont_throw
302
292
  def record_instance_state(self, record_point: Literal["before", "after"], span):
303
293
  current_agent_context = self.agent_context.get()
304
294
 
@@ -955,45 +945,10 @@ class Tracer(metaclass=SingletonMeta):
955
945
  eval_run.model_dump(warnings=False) # type: ignore
956
946
  )
957
947
  else:
958
- # Enqueue the evaluation run to the local evaluation queue
959
- self.local_eval_queue.enqueue(eval_run)
960
-
961
- def wait_for_completion(self, timeout: Optional[float] = 30.0) -> bool:
962
- """Wait for all evaluations and span processing to complete.
963
-
964
- This method blocks until all queued evaluations are processed and
965
- all pending spans are flushed to the server.
966
-
967
- Args:
968
- timeout: Maximum time to wait in seconds. Defaults to 30 seconds.
969
- None means wait indefinitely.
970
-
971
- Returns:
972
- True if all processing completed within the timeout, False otherwise.
973
-
974
- """
975
- try:
976
- judgeval_logger.debug(
977
- "Waiting for all evaluations and spans to complete..."
948
+ judgeval_logger.warning(
949
+ "The scorer provided is not hosted, skipping evaluation."
978
950
  )
979
951
 
980
- # Wait for all queued evaluation work to complete
981
- eval_completed = self.local_eval_queue.wait_for_completion()
982
- if not eval_completed:
983
- judgeval_logger.warning(
984
- f"Local evaluation queue did not complete within {timeout} seconds"
985
- )
986
- return False
987
-
988
- self.force_flush()
989
-
990
- judgeval_logger.debug("All evaluations and spans completed successfully")
991
- return True
992
-
993
- except Exception as e:
994
- judgeval_logger.warning(f"Error while waiting for completion: {e}")
995
- return False
996
-
997
952
 
998
953
  def wrap(client: ApiClient) -> ApiClient:
999
954
  try:
@@ -8,7 +8,6 @@ from judgeval.tracer.llm.providers import (
8
8
  HAS_TOGETHER,
9
9
  HAS_ANTHROPIC,
10
10
  HAS_GOOGLE_GENAI,
11
- HAS_GROQ,
12
11
  ApiClient,
13
12
  )
14
13
 
@@ -18,55 +17,28 @@ if TYPE_CHECKING:
18
17
 
19
18
  def _detect_provider(client: ApiClient) -> ProviderType:
20
19
  if HAS_OPENAI:
21
- from judgeval.tracer.llm.providers import openai_OpenAI, openai_AsyncOpenAI
20
+ from openai import OpenAI, AsyncOpenAI
22
21
 
23
- assert openai_OpenAI is not None, "OpenAI client not found"
24
- assert openai_AsyncOpenAI is not None, "OpenAI async client not found"
25
- if isinstance(client, (openai_OpenAI, openai_AsyncOpenAI)):
22
+ if isinstance(client, (OpenAI, AsyncOpenAI)):
26
23
  return ProviderType.OPENAI
27
24
 
28
25
  if HAS_ANTHROPIC:
29
- from judgeval.tracer.llm.providers import (
30
- anthropic_Anthropic,
31
- anthropic_AsyncAnthropic,
32
- )
33
-
34
- assert anthropic_Anthropic is not None, "Anthropic client not found"
35
- assert anthropic_AsyncAnthropic is not None, "Anthropic async client not found"
36
- if isinstance(client, (anthropic_Anthropic, anthropic_AsyncAnthropic)):
26
+ from anthropic import Anthropic, AsyncAnthropic
27
+
28
+ if isinstance(client, (Anthropic, AsyncAnthropic)):
37
29
  return ProviderType.ANTHROPIC
38
30
 
39
31
  if HAS_TOGETHER:
40
- from judgeval.tracer.llm.providers import (
41
- together_Together,
42
- together_AsyncTogether,
43
- )
44
-
45
- assert together_Together is not None, "Together client not found"
46
- assert together_AsyncTogether is not None, "Together async client not found"
47
- if isinstance(client, (together_Together, together_AsyncTogether)):
32
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
33
+
34
+ if isinstance(client, (Together, AsyncTogether)):
48
35
  return ProviderType.TOGETHER
49
36
 
50
37
  if HAS_GOOGLE_GENAI:
51
- from judgeval.tracer.llm.providers import (
52
- google_genai_Client,
53
- google_genai_AsyncClient,
54
- )
55
-
56
- assert google_genai_Client is not None, "Google GenAI client not found"
57
- assert google_genai_AsyncClient is not None, (
58
- "Google GenAI async client not found"
59
- )
60
- if isinstance(client, (google_genai_Client, google_genai_AsyncClient)):
61
- return ProviderType.GOOGLE
38
+ from google.genai import Client as GoogleClient
62
39
 
63
- if HAS_GROQ:
64
- from judgeval.tracer.llm.providers import groq_Groq, groq_AsyncGroq
65
-
66
- assert groq_Groq is not None, "Groq client not found"
67
- assert groq_AsyncGroq is not None, "Groq async client not found"
68
- if isinstance(client, (groq_Groq, groq_AsyncGroq)):
69
- return ProviderType.GROQ
40
+ if isinstance(client, GoogleClient):
41
+ return ProviderType.GOOGLE
70
42
 
71
43
  judgeval_logger.warning(
72
44
  f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
@@ -79,7 +51,7 @@ def _detect_provider(client: ApiClient) -> ProviderType:
79
51
  def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
80
52
  """
81
53
  Wraps an API client to add tracing capabilities.
82
- Supports OpenAI, Together, Anthropic, Google GenAI, and Groq clients.
54
+ Supports OpenAI, Together, Anthropic, and Google GenAI clients.
83
55
  """
84
56
  provider_type = _detect_provider(client)
85
57
 
@@ -99,10 +71,6 @@ def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
99
71
  from .llm_google.wrapper import wrap_google_client
100
72
 
101
73
  return wrap_google_client(tracer, client)
102
- elif provider_type == ProviderType.GROQ:
103
- from .llm_groq.wrapper import wrap_groq_client
104
-
105
- return wrap_groq_client(tracer, client)
106
74
  else:
107
75
  # Default to OpenAI-compatible wrapping for unknown clients
108
76
  from .llm_openai.wrapper import wrap_openai_client
@@ -6,5 +6,4 @@ class ProviderType(Enum):
6
6
  ANTHROPIC = "anthropic"
7
7
  TOGETHER = "together"
8
8
  GOOGLE = "google"
9
- GROQ = "groq"
10
9
  DEFAULT = "default"
@@ -1,20 +1,6 @@
1
1
  from __future__ import annotations
2
+ import importlib.util
2
3
 
3
- HAS_ANTHROPIC = False
4
- anthropic_Anthropic = None
5
- anthropic_AsyncAnthropic = None
4
+ HAS_ANTHROPIC = importlib.util.find_spec("anthropic") is not None
6
5
 
7
- try:
8
- from anthropic import Anthropic, AsyncAnthropic # type: ignore[import-untyped]
9
-
10
- anthropic_Anthropic = Anthropic
11
- anthropic_AsyncAnthropic = AsyncAnthropic
12
- HAS_ANTHROPIC = True
13
- except ImportError:
14
- pass
15
-
16
- __all__ = [
17
- "HAS_ANTHROPIC",
18
- "anthropic_Anthropic",
19
- "anthropic_AsyncAnthropic",
20
- ]
6
+ __all__ = ["HAS_ANTHROPIC"]