judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show
  1. judgeval/__init__.py +177 -12
  2. judgeval/api/__init__.py +519 -0
  3. judgeval/api/api_types.py +407 -0
  4. judgeval/cli.py +79 -0
  5. judgeval/constants.py +76 -47
  6. judgeval/data/__init__.py +3 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +15 -56
  9. judgeval/data/judgment_types.py +450 -0
  10. judgeval/data/result.py +29 -73
  11. judgeval/data/scorer_data.py +29 -62
  12. judgeval/data/scripts/fix_default_factory.py +23 -0
  13. judgeval/data/scripts/openapi_transform.py +123 -0
  14. judgeval/data/trace.py +121 -0
  15. judgeval/dataset/__init__.py +264 -0
  16. judgeval/env.py +52 -0
  17. judgeval/evaluation/__init__.py +344 -0
  18. judgeval/exceptions.py +27 -0
  19. judgeval/integrations/langgraph/__init__.py +13 -0
  20. judgeval/integrations/openlit/__init__.py +50 -0
  21. judgeval/judges/__init__.py +2 -3
  22. judgeval/judges/base_judge.py +2 -3
  23. judgeval/judges/litellm_judge.py +100 -20
  24. judgeval/judges/together_judge.py +101 -20
  25. judgeval/judges/utils.py +20 -24
  26. judgeval/logger.py +62 -0
  27. judgeval/prompt/__init__.py +330 -0
  28. judgeval/scorers/__init__.py +18 -25
  29. judgeval/scorers/agent_scorer.py +17 -0
  30. judgeval/scorers/api_scorer.py +45 -41
  31. judgeval/scorers/base_scorer.py +83 -38
  32. judgeval/scorers/example_scorer.py +17 -0
  33. judgeval/scorers/exceptions.py +1 -0
  34. judgeval/scorers/judgeval_scorers/__init__.py +0 -148
  35. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
  36. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
  37. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
  38. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
  40. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
  41. judgeval/scorers/score.py +77 -306
  42. judgeval/scorers/utils.py +4 -199
  43. judgeval/tracer/__init__.py +1122 -2
  44. judgeval/tracer/constants.py +1 -0
  45. judgeval/tracer/exporters/__init__.py +40 -0
  46. judgeval/tracer/exporters/s3.py +119 -0
  47. judgeval/tracer/exporters/store.py +59 -0
  48. judgeval/tracer/exporters/utils.py +32 -0
  49. judgeval/tracer/keys.py +63 -0
  50. judgeval/tracer/llm/__init__.py +7 -0
  51. judgeval/tracer/llm/config.py +78 -0
  52. judgeval/tracer/llm/constants.py +9 -0
  53. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  54. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  55. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  56. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  57. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  58. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  59. judgeval/tracer/llm/llm_google/config.py +6 -0
  60. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  61. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  62. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  63. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  64. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  65. judgeval/tracer/llm/llm_openai/config.py +6 -0
  66. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  67. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  68. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  69. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  70. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  71. judgeval/tracer/llm/llm_together/config.py +6 -0
  72. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  73. judgeval/tracer/llm/providers.py +19 -0
  74. judgeval/tracer/managers.py +167 -0
  75. judgeval/tracer/processors/__init__.py +220 -0
  76. judgeval/tracer/utils.py +19 -0
  77. judgeval/trainer/__init__.py +14 -0
  78. judgeval/trainer/base_trainer.py +122 -0
  79. judgeval/trainer/config.py +128 -0
  80. judgeval/trainer/console.py +144 -0
  81. judgeval/trainer/fireworks_trainer.py +396 -0
  82. judgeval/trainer/trainable_model.py +243 -0
  83. judgeval/trainer/trainer.py +70 -0
  84. judgeval/utils/async_utils.py +39 -0
  85. judgeval/utils/decorators/__init__.py +0 -0
  86. judgeval/utils/decorators/dont_throw.py +37 -0
  87. judgeval/utils/decorators/use_once.py +13 -0
  88. judgeval/utils/file_utils.py +97 -0
  89. judgeval/utils/guards.py +36 -0
  90. judgeval/utils/meta.py +27 -0
  91. judgeval/utils/project.py +15 -0
  92. judgeval/utils/serialize.py +253 -0
  93. judgeval/utils/testing.py +70 -0
  94. judgeval/utils/url.py +10 -0
  95. judgeval/utils/version_check.py +28 -0
  96. judgeval/utils/wrappers/README.md +3 -0
  97. judgeval/utils/wrappers/__init__.py +15 -0
  98. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  99. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  100. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  101. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  102. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  103. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  104. judgeval/utils/wrappers/py.typed +0 -0
  105. judgeval/utils/wrappers/utils.py +35 -0
  106. judgeval/version.py +5 -0
  107. judgeval/warnings.py +4 -0
  108. judgeval-0.22.2.dist-info/METADATA +265 -0
  109. judgeval-0.22.2.dist-info/RECORD +112 -0
  110. judgeval-0.22.2.dist-info/entry_points.txt +2 -0
  111. judgeval/clients.py +0 -39
  112. judgeval/common/__init__.py +0 -8
  113. judgeval/common/exceptions.py +0 -28
  114. judgeval/common/logger.py +0 -189
  115. judgeval/common/tracer.py +0 -798
  116. judgeval/common/utils.py +0 -763
  117. judgeval/data/api_example.py +0 -111
  118. judgeval/data/datasets/__init__.py +0 -5
  119. judgeval/data/datasets/dataset.py +0 -286
  120. judgeval/data/datasets/eval_dataset_client.py +0 -193
  121. judgeval/data/datasets/ground_truth.py +0 -54
  122. judgeval/data/datasets/utils.py +0 -74
  123. judgeval/evaluation_run.py +0 -132
  124. judgeval/judges/mixture_of_judges.py +0 -248
  125. judgeval/judgment_client.py +0 -354
  126. judgeval/run_evaluation.py +0 -439
  127. judgeval/scorers/judgeval_scorer.py +0 -140
  128. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
  129. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
  130. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
  131. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
  132. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
  133. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
  134. judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
  135. judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
  136. judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
  137. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
  138. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
  139. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  140. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
  141. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  142. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  143. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  144. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  145. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  146. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  147. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  148. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  149. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  150. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  151. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  152. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  153. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  154. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  155. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
  156. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  157. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  158. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
  159. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  160. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  161. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  162. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  163. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  164. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
  165. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
  166. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
  167. judgeval/scorers/prompt_scorer.py +0 -439
  168. judgeval-0.0.11.dist-info/METADATA +0 -36
  169. judgeval-0.0.11.dist-info/RECORD +0 -84
  170. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
  171. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,3 +1,1123 @@
1
- from judgeval.common.tracer import Tracer, wrap, TraceClient, TraceManagerClient
1
+ from __future__ import annotations
2
+ from contextvars import ContextVar
3
+ import atexit
4
+ import functools
5
+ import inspect
6
+ import random
7
+ from typing import (
8
+ Any,
9
+ Union,
10
+ Callable,
11
+ Dict,
12
+ List,
13
+ Optional,
14
+ Tuple,
15
+ Type,
16
+ TypeVar,
17
+ overload,
18
+ Literal,
19
+ TypedDict,
20
+ Generator,
21
+ AsyncGenerator,
22
+ Iterable,
23
+ )
24
+ import contextvars
25
+ import asyncio
26
+ from functools import partial
27
+ from warnings import warn
2
28
 
3
- __all__ = ["Tracer", "wrap", "TraceClient", "TraceManagerClient"]
29
+ from opentelemetry.sdk.trace import TracerProvider
30
+ from opentelemetry.sdk.resources import Resource
31
+ from opentelemetry.trace import (
32
+ Status,
33
+ StatusCode,
34
+ Tracer as ABCTracer,
35
+ Span,
36
+ get_current_span,
37
+ get_tracer_provider,
38
+ set_tracer_provider,
39
+ INVALID_SPAN_CONTEXT,
40
+ )
41
+
42
+ from judgeval.data.evaluation_run import ExampleEvaluationRun, TraceEvaluationRun
43
+ from judgeval.data.example import Example
44
+ from judgeval.env import (
45
+ JUDGMENT_API_KEY,
46
+ JUDGMENT_DEFAULT_GPT_MODEL,
47
+ JUDGMENT_ORG_ID,
48
+ JUDGMENT_ENABLE_MONITORING,
49
+ JUDGMENT_ENABLE_EVALUATIONS,
50
+ )
51
+ from judgeval.logger import judgeval_logger
52
+ from judgeval.scorers.api_scorer import TraceAPIScorerConfig, ExampleAPIScorerConfig
53
+ from judgeval.scorers.example_scorer import ExampleScorer
54
+ from judgeval.tracer.constants import JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME
55
+ from judgeval.tracer.managers import (
56
+ sync_span_context,
57
+ async_span_context,
58
+ sync_agent_context,
59
+ async_agent_context,
60
+ )
61
+ from judgeval.utils.decorators.dont_throw import dont_throw
62
+ from judgeval.utils.guards import expect_api_key, expect_organization_id
63
+ from judgeval.utils.serialize import safe_serialize
64
+ from judgeval.utils.meta import SingletonMeta
65
+ from judgeval.version import get_version
66
+ from judgeval.warnings import JudgmentWarning
67
+
68
+ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
69
+ from judgeval.api import JudgmentSyncClient
70
+ from judgeval.tracer.llm import wrap_provider
71
+ from judgeval.utils.url import url_for
72
+ from judgeval.tracer.processors import (
73
+ JudgmentSpanProcessor,
74
+ NoOpJudgmentSpanProcessor,
75
+ )
76
+ from judgeval.tracer.utils import set_span_attribute, TraceScorerConfig
77
+ from judgeval.utils.project import _resolve_project_id
78
+ from opentelemetry.trace import use_span
79
+
80
+ C = TypeVar("C", bound=Callable)
81
+ Cls = TypeVar("Cls", bound=Type)
82
+ ApiClient = TypeVar("ApiClient", bound=Any)
83
+
84
+
85
+ class AgentContext(TypedDict):
86
+ agent_id: str
87
+ class_name: str | None
88
+ instance_name: str | None
89
+ track_state: bool
90
+ track_attributes: List[str] | None
91
+ field_mappings: Dict[str, str]
92
+ instance: Any
93
+ is_agent_entry_point: bool
94
+ parent_agent_id: str | None
95
+
96
+
97
+ class Tracer(metaclass=SingletonMeta):
98
+ __slots__ = (
99
+ "api_key",
100
+ "organization_id",
101
+ "project_name",
102
+ "enable_monitoring",
103
+ "enable_evaluation",
104
+ "resource_attributes",
105
+ "api_client",
106
+ "judgment_processor",
107
+ "tracer",
108
+ "agent_context",
109
+ "customer_id",
110
+ "_initialized",
111
+ )
112
+
113
+ api_key: str | None
114
+ organization_id: str | None
115
+ project_name: str
116
+ enable_monitoring: bool
117
+ enable_evaluation: bool
118
+ resource_attributes: Optional[Dict[str, Any]]
119
+ api_client: JudgmentSyncClient
120
+ judgment_processor: JudgmentSpanProcessor
121
+ tracer: ABCTracer
122
+ agent_context: ContextVar[Optional[AgentContext]]
123
+ customer_id: ContextVar[Optional[str]]
124
+ _initialized: bool
125
+
126
+ def __init__(
127
+ self,
128
+ /,
129
+ *,
130
+ project_name: str,
131
+ api_key: str | None = None,
132
+ organization_id: str | None = None,
133
+ enable_monitoring: bool = JUDGMENT_ENABLE_MONITORING.lower() == "true",
134
+ enable_evaluation: bool = JUDGMENT_ENABLE_EVALUATIONS.lower() == "true",
135
+ resource_attributes: Optional[Dict[str, Any]] = None,
136
+ initialize: bool = True,
137
+ ):
138
+ if not hasattr(self, "_initialized"):
139
+ self._initialized = False
140
+ self.agent_context = ContextVar("current_agent_context", default=None)
141
+ self.customer_id = ContextVar("current_customer_id", default=None)
142
+
143
+ self.project_name = project_name
144
+ self.api_key = expect_api_key(api_key or JUDGMENT_API_KEY)
145
+ self.organization_id = expect_organization_id(
146
+ organization_id or JUDGMENT_ORG_ID
147
+ )
148
+ self.enable_monitoring = enable_monitoring
149
+ self.enable_evaluation = enable_evaluation
150
+ self.resource_attributes = resource_attributes
151
+
152
+ if self.api_key and self.organization_id:
153
+ self.api_client = JudgmentSyncClient(
154
+ api_key=self.api_key, organization_id=self.organization_id
155
+ )
156
+ else:
157
+ judgeval_logger.error(
158
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
159
+ )
160
+
161
+ if initialize:
162
+ self.initialize()
163
+
164
+ def initialize(self) -> Tracer:
165
+ if self._initialized:
166
+ return self
167
+
168
+ self.judgment_processor = NoOpJudgmentSpanProcessor()
169
+ if self.enable_monitoring:
170
+ project_id = _resolve_project_id(
171
+ self.project_name, self.api_key, self.organization_id
172
+ )
173
+ if self.api_key and self.organization_id and project_id:
174
+ self.judgment_processor = self.get_processor(
175
+ tracer=self,
176
+ project_name=self.project_name,
177
+ project_id=project_id,
178
+ api_key=self.api_key,
179
+ organization_id=self.organization_id,
180
+ resource_attributes=self.resource_attributes,
181
+ )
182
+
183
+ resource = Resource.create(self.judgment_processor.resource_attributes)
184
+ provider = TracerProvider(resource=resource)
185
+ provider.add_span_processor(self.judgment_processor)
186
+ set_tracer_provider(provider)
187
+ else:
188
+ if self.api_key and self.organization_id:
189
+ judgeval_logger.error(
190
+ f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
191
+ )
192
+
193
+ self.tracer = get_tracer_provider().get_tracer(
194
+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME,
195
+ get_version(),
196
+ )
197
+
198
+ self._initialized = True
199
+ atexit.register(self._atexit_flush)
200
+ return self
201
+
202
+ @staticmethod
203
+ def get_exporter(
204
+ project_id: str,
205
+ api_key: Optional[str] = None,
206
+ organization_id: Optional[str] = None,
207
+ ):
208
+ from judgeval.tracer.exporters import JudgmentSpanExporter
209
+
210
+ api_key = api_key or JUDGMENT_API_KEY
211
+ organization_id = organization_id or JUDGMENT_ORG_ID
212
+
213
+ if not api_key or not organization_id:
214
+ judgeval_logger.error(
215
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
216
+ )
217
+ return None
218
+
219
+ return JudgmentSpanExporter(
220
+ endpoint=url_for("/otel/v1/traces"),
221
+ api_key=api_key,
222
+ organization_id=organization_id,
223
+ project_id=project_id,
224
+ )
225
+
226
+ @staticmethod
227
+ def get_processor(
228
+ tracer: Tracer,
229
+ project_name: str,
230
+ project_id: str,
231
+ api_key: Optional[str] = None,
232
+ organization_id: Optional[str] = None,
233
+ max_queue_size: int = 2**18,
234
+ export_timeout_millis: int = 30000,
235
+ resource_attributes: Optional[Dict[str, Any]] = None,
236
+ ) -> JudgmentSpanProcessor:
237
+ """Create a JudgmentSpanProcessor using the correct constructor."""
238
+ api_key = api_key or JUDGMENT_API_KEY
239
+ organization_id = organization_id or JUDGMENT_ORG_ID
240
+ if not api_key or not organization_id:
241
+ judgeval_logger.error(
242
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
243
+ )
244
+ return NoOpJudgmentSpanProcessor()
245
+ return JudgmentSpanProcessor(
246
+ tracer,
247
+ project_name,
248
+ project_id,
249
+ api_key,
250
+ organization_id,
251
+ max_queue_size=max_queue_size,
252
+ export_timeout_millis=export_timeout_millis,
253
+ resource_attributes=resource_attributes,
254
+ )
255
+
256
+ def get_current_span(self):
257
+ return get_current_span()
258
+
259
+ def get_tracer(self):
260
+ return self.tracer
261
+
262
+ def get_current_agent_context(self):
263
+ return self.agent_context
264
+
265
+ def get_current_customer_context(self):
266
+ return self.customer_id
267
+
268
+ def get_span_processor(self) -> JudgmentSpanProcessor:
269
+ """Get the internal span processor of this tracer instance."""
270
+ return self.judgment_processor
271
+
272
+ @dont_throw
273
+ def set_customer_id(self, customer_id: str) -> None:
274
+ if not customer_id:
275
+ judgeval_logger.warning("Customer ID is empty, skipping.")
276
+ return
277
+
278
+ span = self.get_current_span()
279
+
280
+ if not span or not span.is_recording():
281
+ judgeval_logger.warning(
282
+ "No active span found. Customer ID will not be set."
283
+ )
284
+ return
285
+
286
+ if self.get_current_customer_context().get():
287
+ judgeval_logger.warning("Customer ID is already set, skipping.")
288
+ return
289
+
290
+ if span and span.is_recording():
291
+ set_span_attribute(span, AttributeKeys.JUDGMENT_CUSTOMER_ID, customer_id)
292
+ self.get_current_customer_context().set(customer_id)
293
+
294
+ self.get_span_processor().set_internal_attribute(
295
+ span_context=span.get_span_context(),
296
+ key=InternalAttributeKeys.IS_CUSTOMER_CONTEXT_OWNER,
297
+ value=True,
298
+ )
299
+
300
+ def _maybe_clear_customer_context(self, span: Span) -> None:
301
+ if self.get_span_processor().get_internal_attribute(
302
+ span_context=span.get_span_context(),
303
+ key=InternalAttributeKeys.IS_CUSTOMER_CONTEXT_OWNER,
304
+ default=False,
305
+ ):
306
+ self.get_current_customer_context().set(None)
307
+
308
+ @dont_throw
309
+ def _add_agent_attributes_to_span(self, span):
310
+ """Add agent ID, class name, and instance name to span if they exist in context"""
311
+ current_agent_context = self.agent_context.get()
312
+ if not current_agent_context:
313
+ return
314
+
315
+ set_span_attribute(
316
+ span, AttributeKeys.JUDGMENT_AGENT_ID, current_agent_context["agent_id"]
317
+ )
318
+ set_span_attribute(
319
+ span,
320
+ AttributeKeys.JUDGMENT_AGENT_CLASS_NAME,
321
+ current_agent_context["class_name"],
322
+ )
323
+ set_span_attribute(
324
+ span,
325
+ AttributeKeys.JUDGMENT_AGENT_INSTANCE_NAME,
326
+ current_agent_context["instance_name"],
327
+ )
328
+ set_span_attribute(
329
+ span,
330
+ AttributeKeys.JUDGMENT_PARENT_AGENT_ID,
331
+ current_agent_context["parent_agent_id"],
332
+ )
333
+ set_span_attribute(
334
+ span,
335
+ AttributeKeys.JUDGMENT_IS_AGENT_ENTRY_POINT,
336
+ current_agent_context["is_agent_entry_point"],
337
+ )
338
+ current_agent_context["is_agent_entry_point"] = False
339
+
340
+ @dont_throw
341
+ def _record_instance_state(self, record_point: Literal["before", "after"], span):
342
+ current_agent_context = self.agent_context.get()
343
+
344
+ if current_agent_context and current_agent_context.get("track_state"):
345
+ instance = current_agent_context.get("instance")
346
+ track_attributes = current_agent_context.get("track_attributes")
347
+ field_mappings = current_agent_context.get("field_mappings", {})
348
+
349
+ if track_attributes is not None:
350
+ attributes = {
351
+ field_mappings.get(attr, attr): getattr(instance, attr, None)
352
+ for attr in track_attributes
353
+ }
354
+ else:
355
+ attributes = {
356
+ field_mappings.get(k, k): v
357
+ for k, v in instance.__dict__.items()
358
+ if not k.startswith("_")
359
+ }
360
+ set_span_attribute(
361
+ span,
362
+ (
363
+ AttributeKeys.JUDGMENT_STATE_BEFORE
364
+ if record_point == "before"
365
+ else AttributeKeys.JUDGMENT_STATE_AFTER
366
+ ),
367
+ safe_serialize(attributes),
368
+ )
369
+
370
+ @dont_throw
371
+ def _add_customer_id_to_span(self, span):
372
+ customer_id = self.get_current_customer_context().get()
373
+ if customer_id:
374
+ set_span_attribute(span, AttributeKeys.JUDGMENT_CUSTOMER_ID, customer_id)
375
+
376
+ @dont_throw
377
+ def _inject_judgment_context(self, span):
378
+ self._add_agent_attributes_to_span(span)
379
+ self._add_customer_id_to_span(span)
380
+
381
+ def _set_pending_trace_eval(
382
+ self,
383
+ span: Span,
384
+ scorer_config: TraceScorerConfig,
385
+ args: Tuple[Any, ...],
386
+ kwargs: Dict[str, Any],
387
+ ):
388
+ if not self.enable_evaluation:
389
+ return
390
+
391
+ scorer = scorer_config.scorer
392
+ model = scorer_config.model
393
+ run_condition = scorer_config.run_condition
394
+ sampling_rate = scorer_config.sampling_rate
395
+
396
+ if scorer is None:
397
+ judgeval_logger.error("Prompt Scorer was not found, skipping evaluation.")
398
+ return
399
+ if not isinstance(scorer, (TraceAPIScorerConfig)):
400
+ judgeval_logger.error(
401
+ "Scorer must be an instance of TraceAPIScorerConfig, got %s, skipping evaluation."
402
+ % type(scorer)
403
+ )
404
+ return
405
+
406
+ if run_condition is not None and not run_condition(*args, **kwargs):
407
+ return
408
+
409
+ if sampling_rate < 0 or sampling_rate > 1:
410
+ judgeval_logger.error(
411
+ "Sampling rate must be between 0 and 1, got %s, skipping evaluation."
412
+ % sampling_rate
413
+ )
414
+ return
415
+
416
+ percentage = random.uniform(0, 1)
417
+ if percentage > sampling_rate:
418
+ judgeval_logger.info(
419
+ "Sampling rate is %s, skipping evaluation." % sampling_rate
420
+ )
421
+ return
422
+
423
+ span_context = span.get_span_context()
424
+ if span_context == INVALID_SPAN_CONTEXT:
425
+ return
426
+ trace_id = format(span_context.trace_id, "032x")
427
+ span_id = format(span_context.span_id, "016x")
428
+ eval_run_name = f"async_trace_evaluate_{span_id}"
429
+
430
+ eval_run = TraceEvaluationRun(
431
+ project_name=self.project_name,
432
+ eval_name=eval_run_name,
433
+ scorers=[scorer],
434
+ model=model,
435
+ trace_and_span_ids=[(trace_id, span_id)],
436
+ )
437
+ span.set_attribute(
438
+ AttributeKeys.PENDING_TRACE_EVAL,
439
+ safe_serialize(eval_run.model_dump(warnings=False)),
440
+ )
441
+
442
+ def _create_traced_sync_generator(
443
+ self,
444
+ generator: Generator,
445
+ main_span: Span,
446
+ disable_generator_yield_span: bool = False,
447
+ ):
448
+ """Create a traced synchronous generator that wraps each yield in a span."""
449
+ preserved_context = contextvars.copy_context()
450
+ return _ContextPreservedSyncGeneratorWrapper(
451
+ self,
452
+ generator,
453
+ preserved_context,
454
+ main_span,
455
+ None,
456
+ disable_generator_yield_span,
457
+ )
458
+
459
+ def _create_traced_async_generator(
460
+ self,
461
+ async_generator: AsyncGenerator,
462
+ main_span: Span,
463
+ disable_generator_yield_span: bool = False,
464
+ ):
465
+ """Create a traced asynchronous generator that wraps each yield in a span."""
466
+ preserved_context = contextvars.copy_context()
467
+ return _ContextPreservedAsyncGeneratorWrapper(
468
+ self,
469
+ async_generator,
470
+ preserved_context,
471
+ main_span,
472
+ None,
473
+ disable_generator_yield_span,
474
+ )
475
+
476
+ def _wrap_sync(
477
+ self,
478
+ f: Callable,
479
+ name: Optional[str],
480
+ attributes: Optional[Dict[str, Any]],
481
+ scorer_config: TraceScorerConfig | None = None,
482
+ disable_generator_yield_span: bool = False,
483
+ ):
484
+ @functools.wraps(f)
485
+ def wrapper(*args, **kwargs):
486
+ n = name or f.__qualname__
487
+ with sync_span_context(self, n, attributes) as span:
488
+ is_return_type_generator = False
489
+
490
+ self._inject_judgment_context(span)
491
+ self._record_instance_state("before", span)
492
+ try:
493
+ set_span_attribute(
494
+ span,
495
+ AttributeKeys.JUDGMENT_INPUT,
496
+ safe_serialize(format_inputs(f, args, kwargs)),
497
+ )
498
+
499
+ self.judgment_processor.emit_partial()
500
+
501
+ if scorer_config:
502
+ self._set_pending_trace_eval(span, scorer_config, args, kwargs)
503
+
504
+ result = f(*args, **kwargs)
505
+
506
+ if inspect.isgenerator(result):
507
+ is_return_type_generator = True
508
+ set_span_attribute(
509
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<generator>"
510
+ )
511
+ self._record_instance_state("after", span)
512
+ return self._create_traced_sync_generator(
513
+ result, span, disable_generator_yield_span
514
+ )
515
+ elif inspect.isasyncgen(result):
516
+ is_return_type_generator = True
517
+ set_span_attribute(
518
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<async_generator>"
519
+ )
520
+ self._record_instance_state("after", span)
521
+ return self._create_traced_async_generator(
522
+ result, span, disable_generator_yield_span
523
+ )
524
+ else:
525
+ set_span_attribute(
526
+ span, AttributeKeys.JUDGMENT_OUTPUT, safe_serialize(result)
527
+ )
528
+ self._record_instance_state("after", span)
529
+ self._maybe_clear_customer_context(span)
530
+ return result
531
+ except Exception as user_exc:
532
+ span.record_exception(user_exc)
533
+ span.set_status(Status(StatusCode.ERROR, str(user_exc)))
534
+ self._maybe_clear_customer_context(span)
535
+ raise
536
+ finally:
537
+ if not is_return_type_generator:
538
+ span.end()
539
+
540
+ return wrapper
541
+
542
+ def _wrap_async(
543
+ self,
544
+ f: Callable,
545
+ name: Optional[str],
546
+ attributes: Optional[Dict[str, Any]],
547
+ scorer_config: TraceScorerConfig | None = None,
548
+ disable_generator_yield_span: bool = False,
549
+ ):
550
+ @functools.wraps(f)
551
+ async def wrapper(*args, **kwargs):
552
+ n = name or f.__qualname__
553
+ async with async_span_context(self, n, attributes) as span:
554
+ is_return_type_generator = False
555
+ self._inject_judgment_context(span)
556
+ self._record_instance_state("before", span)
557
+ try:
558
+ set_span_attribute(
559
+ span,
560
+ AttributeKeys.JUDGMENT_INPUT,
561
+ safe_serialize(format_inputs(f, args, kwargs)),
562
+ )
563
+
564
+ self.judgment_processor.emit_partial()
565
+
566
+ if scorer_config:
567
+ self._set_pending_trace_eval(span, scorer_config, args, kwargs)
568
+
569
+ result = await f(*args, **kwargs)
570
+ if inspect.isasyncgen(result):
571
+ is_return_type_generator = True
572
+ set_span_attribute(
573
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<async_generator>"
574
+ )
575
+ self._record_instance_state("after", span)
576
+ return self._create_traced_async_generator(
577
+ result, span, disable_generator_yield_span
578
+ )
579
+ elif inspect.isgenerator(result):
580
+ is_return_type_generator = True
581
+ set_span_attribute(
582
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<generator>"
583
+ )
584
+ self._record_instance_state("after", span)
585
+ return self._create_traced_sync_generator(
586
+ result, span, disable_generator_yield_span
587
+ )
588
+ else:
589
+ set_span_attribute(
590
+ span, AttributeKeys.JUDGMENT_OUTPUT, safe_serialize(result)
591
+ )
592
+ self._record_instance_state("after", span)
593
+ self._maybe_clear_customer_context(span)
594
+ return result
595
+ except Exception as user_exc:
596
+ span.record_exception(user_exc)
597
+ span.set_status(Status(StatusCode.ERROR, str(user_exc)))
598
+ self._maybe_clear_customer_context(span)
599
+ raise
600
+ finally:
601
+ if not is_return_type_generator:
602
+ span.end()
603
+
604
+ return wrapper
605
+
606
+ @overload
607
+ def observe(
608
+ self,
609
+ func: C,
610
+ /,
611
+ *,
612
+ span_type: str | None = None,
613
+ span_name: str | None = None,
614
+ attributes: Optional[Dict[str, Any]] = None,
615
+ scorer_config: TraceScorerConfig | None = None,
616
+ ) -> C: ...
617
+
618
+ @overload
619
+ def observe(
620
+ self,
621
+ func: None = None,
622
+ /,
623
+ *,
624
+ span_type: str | None = None,
625
+ span_name: str | None = None,
626
+ attributes: Optional[Dict[str, Any]] = None,
627
+ scorer_config: TraceScorerConfig | None = None,
628
+ ) -> Callable[[C], C]: ...
629
+
630
+ def observe(
631
+ self,
632
+ func: Callable | None = None,
633
+ /,
634
+ *,
635
+ span_type: str | None = "span",
636
+ span_name: str | None = None,
637
+ attributes: Optional[Dict[str, Any]] = None,
638
+ scorer_config: TraceScorerConfig | None = None,
639
+ disable_generator_yield_span: bool = False,
640
+ ) -> Callable | None:
641
+ if func is None:
642
+ return partial(
643
+ self.observe,
644
+ span_type=span_type,
645
+ span_name=span_name,
646
+ attributes=attributes,
647
+ scorer_config=scorer_config,
648
+ disable_generator_yield_span=disable_generator_yield_span,
649
+ )
650
+
651
+ if not self.enable_monitoring:
652
+ return func
653
+
654
+ # Handle functions (including generator functions) - detect generators at runtime
655
+ name = span_name or getattr(func, "__qualname__", "function")
656
+ func_attributes: Dict[str, Any] = {
657
+ AttributeKeys.JUDGMENT_SPAN_KIND: span_type,
658
+ **(attributes or {}),
659
+ }
660
+
661
+ if inspect.iscoroutinefunction(func):
662
+ return self._wrap_async(
663
+ func, name, func_attributes, scorer_config, disable_generator_yield_span
664
+ )
665
+ else:
666
+ return self._wrap_sync(
667
+ func, name, func_attributes, scorer_config, disable_generator_yield_span
668
+ )
669
+
670
+ @overload
671
+ def agent(
672
+ self,
673
+ func: C,
674
+ /,
675
+ *,
676
+ identifier: str | None = None,
677
+ track_state: bool = False,
678
+ track_attributes: List[str] | None = None,
679
+ field_mappings: Dict[str, str] = {},
680
+ ) -> C: ...
681
+
682
+ @overload
683
+ def agent(
684
+ self,
685
+ func: None = None,
686
+ /,
687
+ *,
688
+ identifier: str | None = None,
689
+ track_state: bool = False,
690
+ track_attributes: List[str] | None = None,
691
+ field_mappings: Dict[str, str] = {},
692
+ ) -> Callable[[C], C]: ...
693
+
694
+ def agent(
695
+ self,
696
+ func: Callable | None = None,
697
+ /,
698
+ *,
699
+ identifier: str | None = None,
700
+ track_state: bool = False,
701
+ track_attributes: List[str] | None = None,
702
+ field_mappings: Dict[str, str] = {},
703
+ ) -> Callable | None:
704
+ """
705
+ Agent decorator that creates an agent ID and propagates it to child spans.
706
+ Also captures and propagates the class name if the decorated function is a method.
707
+ Optionally captures instance name based on the specified identifier attribute.
708
+
709
+ This decorator should be used in combination with @observe decorator:
710
+
711
+ class MyAgent:
712
+ def __init__(self, name):
713
+ self.name = name
714
+
715
+ @judgment.agent(identifier="name")
716
+ @judgment.observe(span_type="function")
717
+ def my_agent_method(self):
718
+ # This span and all child spans will have:
719
+ # - agent_id: auto-generated UUID
720
+ # - class_name: "MyAgent"
721
+ # - instance_name: self.name value
722
+ pass
723
+
724
+ Args:
725
+ identifier: Name of the instance attribute to use as the instance name
726
+ """
727
+ if func is None:
728
+ return partial(
729
+ self.agent,
730
+ identifier=identifier,
731
+ track_state=track_state,
732
+ track_attributes=track_attributes,
733
+ field_mappings=field_mappings,
734
+ )
735
+
736
+ if not self.enable_monitoring:
737
+ return func
738
+
739
+ class_name = None
740
+ if hasattr(func, "__qualname__") and "." in func.__qualname__:
741
+ parts = func.__qualname__.split(".")
742
+ if len(parts) >= 2:
743
+ class_name = parts[-2]
744
+
745
+ if inspect.iscoroutinefunction(func):
746
+
747
+ @functools.wraps(func)
748
+ async def async_wrapper(*args, **kwargs):
749
+ async with async_agent_context(
750
+ tracer=self,
751
+ args=args,
752
+ class_name=class_name,
753
+ identifier=identifier,
754
+ track_state=track_state,
755
+ track_attributes=track_attributes,
756
+ field_mappings=field_mappings,
757
+ ):
758
+ return await func(*args, **kwargs)
759
+
760
+ return async_wrapper
761
+ else:
762
+
763
+ @functools.wraps(func)
764
+ def sync_wrapper(*args, **kwargs):
765
+ with sync_agent_context(
766
+ tracer=self,
767
+ args=args,
768
+ class_name=class_name,
769
+ identifier=identifier,
770
+ track_state=track_state,
771
+ track_attributes=track_attributes,
772
+ field_mappings=field_mappings,
773
+ ):
774
+ return func(*args, **kwargs)
775
+
776
+ return sync_wrapper
777
+
778
+ def wrap(self, client: ApiClient) -> ApiClient:
779
+ return wrap_provider(self, client)
780
+
781
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
782
+ """Force flush all pending spans and block until completion.
783
+
784
+ Args:
785
+ timeout_millis: Maximum time to wait for flush completion in milliseconds
786
+
787
+ Returns:
788
+ True if processor flushed successfully within timeout, False otherwise
789
+ """
790
+ try:
791
+ return self.judgment_processor.force_flush(timeout_millis)
792
+ except Exception as e:
793
+ judgeval_logger.warning(f"Error flushing processor: {e}")
794
+ return False
795
+
796
+ def _atexit_flush(self, timeout_millis: int = 30000) -> None:
797
+ """Internal method called on program exit to flush remaining spans.
798
+
799
+ This blocks until all spans are flushed or timeout is reached to ensure
800
+ proper cleanup before program termination.
801
+ """
802
+ try:
803
+ self.force_flush(timeout_millis=timeout_millis)
804
+ except Exception as e:
805
+ judgeval_logger.warning(f"Error during atexit flush: {e}")
806
+
807
+ @dont_throw
808
+ def async_evaluate(
809
+ self,
810
+ /,
811
+ *,
812
+ scorer: Union[ExampleAPIScorerConfig, ExampleScorer, None],
813
+ example: Example,
814
+ model: Optional[str] = None,
815
+ sampling_rate: float = 1.0,
816
+ ):
817
+ if not self.enable_evaluation or not self.enable_monitoring:
818
+ judgeval_logger.info("Evaluation is not enabled, skipping evaluation")
819
+ return
820
+
821
+ if scorer is None:
822
+ judgeval_logger.error("Prompt Scorer was not found, skipping evaluation.")
823
+ return
824
+
825
+ if not isinstance(scorer, (ExampleAPIScorerConfig, ExampleScorer)):
826
+ judgeval_logger.error(
827
+ "Scorer must be an instance of ExampleAPIScorerConfig or ExampleScorer, got %s, skipping evaluation."
828
+ % type(scorer)
829
+ )
830
+ return
831
+
832
+ if not isinstance(example, Example):
833
+ judgeval_logger.error(
834
+ "Example must be an instance of Example, got %s, skipping evaluation."
835
+ % type(example)
836
+ )
837
+ return
838
+
839
+ if model is None:
840
+ if scorer.model is None:
841
+ model = JUDGMENT_DEFAULT_GPT_MODEL
842
+ else:
843
+ model = scorer.model
844
+
845
+ if sampling_rate < 0 or sampling_rate > 1:
846
+ judgeval_logger.error(
847
+ "Sampling rate must be between 0 and 1, got %s, skipping evaluation."
848
+ % sampling_rate
849
+ )
850
+ return
851
+
852
+ percentage = random.uniform(0, 1)
853
+ if percentage > sampling_rate:
854
+ judgeval_logger.info(
855
+ "Sampling rate is %s, skipping evaluation." % sampling_rate
856
+ )
857
+ return
858
+
859
+ span_context = self.get_current_span().get_span_context()
860
+ if span_context == INVALID_SPAN_CONTEXT:
861
+ judgeval_logger.warning(
862
+ "No span context was found for async_evaluate, skipping evaluation. Please make sure to use the @observe decorator on the function you are evaluating."
863
+ )
864
+ return
865
+
866
+ trace_id = format(span_context.trace_id, "032x")
867
+ span_id = format(span_context.span_id, "016x")
868
+ hosted_scoring = isinstance(scorer, ExampleAPIScorerConfig) or (
869
+ isinstance(scorer, ExampleScorer) and scorer.server_hosted
870
+ )
871
+ eval_run = ExampleEvaluationRun(
872
+ project_name=self.project_name,
873
+ # note this name doesnt matter because we don't save the experiment only the example and scorer_data
874
+ eval_name=f"async_evaluate_{span_id}",
875
+ examples=[example],
876
+ scorers=[scorer],
877
+ model=model,
878
+ trace_span_id=span_id,
879
+ trace_id=trace_id,
880
+ )
881
+ if hosted_scoring:
882
+ self.api_client.add_to_run_eval_queue_examples(
883
+ eval_run.model_dump(warnings=False) # type: ignore
884
+ )
885
+ else:
886
+ judgeval_logger.warning(
887
+ "The scorer provided is not hosted, skipping evaluation."
888
+ )
889
+
890
+
891
+ def wrap(client: ApiClient) -> ApiClient:
892
+ try:
893
+ tracer = Tracer.get_instance()
894
+ if tracer is None or not isinstance(tracer, Tracer):
895
+ warn(
896
+ "No Tracer instance found, client will not be wrapped. "
897
+ "Create a Tracer instance first.",
898
+ JudgmentWarning,
899
+ stacklevel=2,
900
+ )
901
+ return client
902
+ if not tracer._initialized:
903
+ warn(
904
+ "Tracer not initialized, client will not be wrapped. "
905
+ "Call Tracer.initialize() first to setup the tracer.",
906
+ JudgmentWarning,
907
+ stacklevel=2,
908
+ )
909
+ return client
910
+ return tracer.wrap(client)
911
+ except Exception:
912
+ warn(
913
+ "Error accessing tracer singleton, client will not be wrapped.",
914
+ JudgmentWarning,
915
+ stacklevel=2,
916
+ )
917
+ return client
918
+
919
+
920
+ def format_inputs(
921
+ f: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any]
922
+ ) -> Dict[str, Any]:
923
+ try:
924
+ params = list(inspect.signature(f).parameters.values())
925
+ inputs = {}
926
+ arg_i = 0
927
+ for param in params:
928
+ if param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD:
929
+ if arg_i < len(args):
930
+ inputs[param.name] = args[arg_i]
931
+ arg_i += 1
932
+ elif param.name in kwargs:
933
+ inputs[param.name] = kwargs[param.name]
934
+ elif param.kind == inspect.Parameter.VAR_POSITIONAL:
935
+ inputs[param.name] = args[arg_i:]
936
+ arg_i = len(args)
937
+ elif param.kind == inspect.Parameter.VAR_KEYWORD:
938
+ inputs[param.name] = kwargs
939
+ return inputs
940
+ except Exception:
941
+ return {}
942
+
943
+
944
+ class _ContextPreservedSyncGeneratorWrapper:
945
+ """Sync generator wrapper that ensures each iteration runs in preserved context."""
946
+
947
+ def __init__(
948
+ self,
949
+ tracer: Tracer,
950
+ generator: Generator,
951
+ context: contextvars.Context,
952
+ span: Span,
953
+ transform_fn: Optional[Callable[[Iterable], str]],
954
+ disable_generator_yield_span: bool = False,
955
+ ) -> None:
956
+ self.tracer = tracer
957
+ self.generator = generator
958
+ self.context = context
959
+ self.span = span
960
+ self.transform_fn = transform_fn
961
+ self._finished = False
962
+ self.disable_generator_yield_span = disable_generator_yield_span
963
+
964
+ def __iter__(self) -> "_ContextPreservedSyncGeneratorWrapper":
965
+ return self
966
+
967
+ def __next__(self) -> Any:
968
+ try:
969
+ # Run the generator's __next__ in the preserved context
970
+ item = self.context.run(next, self.generator)
971
+
972
+ if not self.disable_generator_yield_span:
973
+ with use_span(self.span):
974
+ span_name = (
975
+ str(self.span.name)
976
+ if hasattr(self.span, "name")
977
+ else "generator_item"
978
+ ) # type: ignore[attr-defined]
979
+ with self.tracer.get_tracer().start_as_current_span(
980
+ span_name,
981
+ attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "generator_item"},
982
+ end_on_exit=True,
983
+ ) as child_span:
984
+ set_span_attribute(
985
+ child_span,
986
+ AttributeKeys.JUDGMENT_OUTPUT,
987
+ safe_serialize(item),
988
+ )
989
+
990
+ return item
991
+
992
+ except StopIteration:
993
+ # Handle output and span cleanup when generator is exhausted
994
+ if not self._finished:
995
+ set_span_attribute(
996
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
997
+ )
998
+ self.span.end()
999
+ self._finished = True
1000
+
1001
+ raise # Re-raise StopIteration
1002
+
1003
+ except Exception as e:
1004
+ if not self._finished:
1005
+ self.span.record_exception(e)
1006
+ self.span.set_status(
1007
+ Status(StatusCode.ERROR, str(e) or type(e).__name__)
1008
+ )
1009
+ self.tracer._maybe_clear_customer_context(self.span)
1010
+ self.span.end()
1011
+ self._finished = True
1012
+
1013
+ raise
1014
+
1015
+ def close(self) -> None:
1016
+ """Close the generator (minimal implementation)."""
1017
+ try:
1018
+ self.generator.close()
1019
+ finally:
1020
+ if not self._finished:
1021
+ set_span_attribute(
1022
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
1023
+ )
1024
+ self.tracer._maybe_clear_customer_context(self.span)
1025
+ self.span.end()
1026
+ self._finished = True
1027
+
1028
+
1029
+ class _ContextPreservedAsyncGeneratorWrapper:
1030
+ """Async generator wrapper that ensures each iteration runs in preserved context."""
1031
+
1032
+ def __init__(
1033
+ self,
1034
+ tracer: Tracer,
1035
+ generator: AsyncGenerator,
1036
+ context: contextvars.Context,
1037
+ span: Span,
1038
+ transform_fn: Optional[Callable[[Iterable], str]],
1039
+ disable_generator_yield_span: bool = False,
1040
+ ) -> None:
1041
+ self.tracer = tracer
1042
+ self.generator = generator
1043
+ self.context = context
1044
+ self.span = span
1045
+ self.transform_fn = transform_fn
1046
+ self._finished = False
1047
+ self.disable_generator_yield_span = disable_generator_yield_span
1048
+
1049
+ def __aiter__(self) -> "_ContextPreservedAsyncGeneratorWrapper":
1050
+ return self
1051
+
1052
+ async def __anext__(self) -> Any:
1053
+ try:
1054
+ # Run the generator's __anext__ in the preserved context
1055
+ try:
1056
+ # Python 3.10+ approach with context parameter
1057
+ item = await asyncio.create_task(
1058
+ self.generator.__anext__(), # type: ignore
1059
+ context=self.context,
1060
+ ) # type: ignore
1061
+ except TypeError:
1062
+ # Python < 3.10 fallback - context parameter not supported
1063
+ item = await self.generator.__anext__()
1064
+
1065
+ if not self.disable_generator_yield_span:
1066
+ with use_span(self.span):
1067
+ span_name = (
1068
+ str(self.span.name)
1069
+ if hasattr(self.span, "name")
1070
+ else "generator_item"
1071
+ ) # type: ignore[attr-defined]
1072
+ with self.tracer.get_tracer().start_as_current_span(
1073
+ span_name,
1074
+ attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "generator_item"},
1075
+ end_on_exit=True,
1076
+ ) as child_span:
1077
+ set_span_attribute(
1078
+ child_span,
1079
+ AttributeKeys.JUDGMENT_OUTPUT,
1080
+ safe_serialize(item),
1081
+ )
1082
+
1083
+ return item
1084
+
1085
+ except StopAsyncIteration:
1086
+ # Handle output and span cleanup when generator is exhausted
1087
+ if not self._finished:
1088
+ set_span_attribute(
1089
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
1090
+ )
1091
+ self.span.end()
1092
+ self._finished = True
1093
+ raise # Re-raise StopAsyncIteration
1094
+ except Exception as e:
1095
+ if not self._finished:
1096
+ self.span.record_exception(e)
1097
+ self.span.set_status(
1098
+ Status(StatusCode.ERROR, str(e) or type(e).__name__)
1099
+ )
1100
+ self.tracer._maybe_clear_customer_context(self.span)
1101
+ self.span.end()
1102
+ self._finished = True
1103
+
1104
+ raise
1105
+
1106
+ async def aclose(self) -> None:
1107
+ """Close the async generator (minimal implementation)."""
1108
+ try:
1109
+ await self.generator.aclose()
1110
+ finally:
1111
+ if not self._finished:
1112
+ set_span_attribute(
1113
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
1114
+ )
1115
+ self.tracer._maybe_clear_customer_context(self.span)
1116
+ self.span.end()
1117
+ self._finished = True
1118
+
1119
+
1120
+ __all__ = [
1121
+ "Tracer",
1122
+ "wrap",
1123
+ ]