judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,3 +1,1112 @@
1
- from judgeval.common.tracer import Tracer, wrap, TraceClient, TraceManagerClient
1
+ from __future__ import annotations
2
+ from contextvars import ContextVar
3
+ import atexit
4
+ import functools
5
+ import inspect
6
+ import random
7
+ from typing import (
8
+ Any,
9
+ Union,
10
+ Callable,
11
+ Dict,
12
+ List,
13
+ Optional,
14
+ Tuple,
15
+ Type,
16
+ TypeVar,
17
+ overload,
18
+ Literal,
19
+ TypedDict,
20
+ Generator,
21
+ AsyncGenerator,
22
+ Iterable,
23
+ )
24
+ import contextvars
25
+ import asyncio
26
+ from functools import partial
27
+ from warnings import warn
2
28
 
3
- __all__ = ["Tracer", "wrap", "TraceClient", "TraceManagerClient"]
29
+ from opentelemetry.sdk.trace import TracerProvider
30
+ from opentelemetry.sdk.resources import Resource
31
+ from opentelemetry.trace import (
32
+ Status,
33
+ StatusCode,
34
+ Tracer as ABCTracer,
35
+ Span,
36
+ get_current_span,
37
+ get_tracer_provider,
38
+ set_tracer_provider,
39
+ INVALID_SPAN_CONTEXT,
40
+ )
41
+
42
+ from judgeval.data.evaluation_run import ExampleEvaluationRun, TraceEvaluationRun
43
+ from judgeval.data.example import Example
44
+ from judgeval.env import (
45
+ JUDGMENT_API_KEY,
46
+ JUDGMENT_ORG_ID,
47
+ JUDGMENT_ENABLE_MONITORING,
48
+ JUDGMENT_ENABLE_EVALUATIONS,
49
+ )
50
+ from judgeval.logger import judgeval_logger
51
+ from judgeval.scorers.api_scorer import TraceAPIScorerConfig, ExampleAPIScorerConfig
52
+ from judgeval.scorers.example_scorer import ExampleScorer
53
+ from judgeval.tracer.constants import JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME
54
+ from judgeval.tracer.managers import (
55
+ sync_span_context,
56
+ async_span_context,
57
+ sync_agent_context,
58
+ async_agent_context,
59
+ )
60
+ from judgeval.utils.decorators.dont_throw import dont_throw
61
+ from judgeval.utils.guards import expect_api_key, expect_organization_id
62
+ from judgeval.utils.serialize import safe_serialize
63
+ from judgeval.utils.meta import SingletonMeta
64
+ from judgeval.version import get_version
65
+ from judgeval.warnings import JudgmentWarning
66
+
67
+ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
68
+ from judgeval.api import JudgmentSyncClient
69
+ from judgeval.tracer.llm import wrap_provider
70
+ from judgeval.utils.url import url_for
71
+ from judgeval.tracer.processors import (
72
+ JudgmentSpanProcessor,
73
+ NoOpJudgmentSpanProcessor,
74
+ )
75
+ from judgeval.tracer.utils import set_span_attribute, TraceScorerConfig
76
+ from judgeval.utils.project import _resolve_project_id
77
+ from opentelemetry.trace import use_span
78
+
79
+ C = TypeVar("C", bound=Callable)
80
+ Cls = TypeVar("Cls", bound=Type)
81
+ ApiClient = TypeVar("ApiClient", bound=Any)
82
+
83
+
84
+ class AgentContext(TypedDict):
85
+ agent_id: str
86
+ class_name: str | None
87
+ instance_name: str | None
88
+ track_state: bool
89
+ track_attributes: List[str] | None
90
+ field_mappings: Dict[str, str]
91
+ instance: Any
92
+ is_agent_entry_point: bool
93
+ parent_agent_id: str | None
94
+
95
+
96
+ class Tracer(metaclass=SingletonMeta):
97
+ __slots__ = (
98
+ "api_key",
99
+ "organization_id",
100
+ "project_name",
101
+ "enable_monitoring",
102
+ "enable_evaluation",
103
+ "resource_attributes",
104
+ "api_client",
105
+ "judgment_processor",
106
+ "tracer",
107
+ "agent_context",
108
+ "customer_id",
109
+ "_initialized",
110
+ )
111
+
112
+ api_key: str | None
113
+ organization_id: str | None
114
+ project_name: str
115
+ enable_monitoring: bool
116
+ enable_evaluation: bool
117
+ resource_attributes: Optional[Dict[str, Any]]
118
+ api_client: JudgmentSyncClient
119
+ judgment_processor: JudgmentSpanProcessor
120
+ tracer: ABCTracer
121
+ agent_context: ContextVar[Optional[AgentContext]]
122
+ customer_id: ContextVar[Optional[str]]
123
+ _initialized: bool
124
+
125
+ def __init__(
126
+ self,
127
+ /,
128
+ *,
129
+ project_name: str,
130
+ api_key: str | None = None,
131
+ organization_id: str | None = None,
132
+ enable_monitoring: bool = JUDGMENT_ENABLE_MONITORING.lower() == "true",
133
+ enable_evaluation: bool = JUDGMENT_ENABLE_EVALUATIONS.lower() == "true",
134
+ resource_attributes: Optional[Dict[str, Any]] = None,
135
+ initialize: bool = True,
136
+ ):
137
+ if not hasattr(self, "_initialized"):
138
+ self._initialized = False
139
+ self.agent_context = ContextVar("current_agent_context", default=None)
140
+ self.customer_id = ContextVar("current_customer_id", default=None)
141
+
142
+ self.project_name = project_name
143
+ self.api_key = expect_api_key(api_key or JUDGMENT_API_KEY)
144
+ self.organization_id = expect_organization_id(
145
+ organization_id or JUDGMENT_ORG_ID
146
+ )
147
+ self.enable_monitoring = enable_monitoring
148
+ self.enable_evaluation = enable_evaluation
149
+ self.resource_attributes = resource_attributes
150
+
151
+ if self.api_key and self.organization_id:
152
+ self.api_client = JudgmentSyncClient(
153
+ api_key=self.api_key, organization_id=self.organization_id
154
+ )
155
+ else:
156
+ judgeval_logger.error(
157
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
158
+ )
159
+
160
+ if initialize:
161
+ self.initialize()
162
+
163
+ def initialize(self) -> Tracer:
164
+ if self._initialized:
165
+ return self
166
+
167
+ self.judgment_processor = NoOpJudgmentSpanProcessor()
168
+ if self.enable_monitoring:
169
+ project_id = _resolve_project_id(
170
+ self.project_name, self.api_key, self.organization_id
171
+ )
172
+ if self.api_key and self.organization_id and project_id:
173
+ self.judgment_processor = self.get_processor(
174
+ tracer=self,
175
+ project_name=self.project_name,
176
+ project_id=project_id,
177
+ api_key=self.api_key,
178
+ organization_id=self.organization_id,
179
+ resource_attributes=self.resource_attributes,
180
+ )
181
+
182
+ resource = Resource.create(self.judgment_processor.resource_attributes)
183
+ provider = TracerProvider(resource=resource)
184
+ provider.add_span_processor(self.judgment_processor)
185
+ set_tracer_provider(provider)
186
+ else:
187
+ if self.api_key and self.organization_id:
188
+ judgeval_logger.error(
189
+ f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
190
+ )
191
+
192
+ self.tracer = get_tracer_provider().get_tracer(
193
+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME,
194
+ get_version(),
195
+ )
196
+
197
+ self._initialized = True
198
+ atexit.register(self._atexit_flush)
199
+ return self
200
+
201
+ @staticmethod
202
+ def get_exporter(
203
+ project_id: str,
204
+ api_key: Optional[str] = None,
205
+ organization_id: Optional[str] = None,
206
+ ):
207
+ from judgeval.tracer.exporters import JudgmentSpanExporter
208
+
209
+ api_key = api_key or JUDGMENT_API_KEY
210
+ organization_id = organization_id or JUDGMENT_ORG_ID
211
+
212
+ if not api_key or not organization_id:
213
+ judgeval_logger.error(
214
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
215
+ )
216
+ return None
217
+
218
+ return JudgmentSpanExporter(
219
+ endpoint=url_for("/otel/v1/traces"),
220
+ api_key=api_key,
221
+ organization_id=organization_id,
222
+ project_id=project_id,
223
+ )
224
+
225
+ @staticmethod
226
+ def get_processor(
227
+ tracer: Tracer,
228
+ project_name: str,
229
+ project_id: str,
230
+ api_key: Optional[str] = None,
231
+ organization_id: Optional[str] = None,
232
+ max_queue_size: int = 2**18,
233
+ export_timeout_millis: int = 30000,
234
+ resource_attributes: Optional[Dict[str, Any]] = None,
235
+ ) -> JudgmentSpanProcessor:
236
+ """Create a JudgmentSpanProcessor using the correct constructor."""
237
+ api_key = api_key or JUDGMENT_API_KEY
238
+ organization_id = organization_id or JUDGMENT_ORG_ID
239
+ if not api_key or not organization_id:
240
+ judgeval_logger.error(
241
+ "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
242
+ )
243
+ return NoOpJudgmentSpanProcessor()
244
+ return JudgmentSpanProcessor(
245
+ tracer,
246
+ project_name,
247
+ project_id,
248
+ api_key,
249
+ organization_id,
250
+ max_queue_size=max_queue_size,
251
+ export_timeout_millis=export_timeout_millis,
252
+ resource_attributes=resource_attributes,
253
+ )
254
+
255
+ def get_current_span(self):
256
+ return get_current_span()
257
+
258
+ def get_tracer(self):
259
+ return self.tracer
260
+
261
+ def get_current_agent_context(self):
262
+ return self.agent_context
263
+
264
+ def get_current_customer_context(self):
265
+ return self.customer_id
266
+
267
+ def get_span_processor(self) -> JudgmentSpanProcessor:
268
+ """Get the internal span processor of this tracer instance."""
269
+ return self.judgment_processor
270
+
271
+ @dont_throw
272
+ def set_customer_id(self, customer_id: str) -> None:
273
+ if not customer_id:
274
+ judgeval_logger.warning("Customer ID is empty, skipping.")
275
+ return
276
+
277
+ span = self.get_current_span()
278
+
279
+ if not span or not span.is_recording():
280
+ judgeval_logger.warning(
281
+ "No active span found. Customer ID will not be set."
282
+ )
283
+ return
284
+
285
+ if self.get_current_customer_context().get():
286
+ judgeval_logger.warning("Customer ID is already set, skipping.")
287
+ return
288
+
289
+ if span and span.is_recording():
290
+ set_span_attribute(span, AttributeKeys.JUDGMENT_CUSTOMER_ID, customer_id)
291
+ self.get_current_customer_context().set(customer_id)
292
+
293
+ self.get_span_processor().set_internal_attribute(
294
+ span_context=span.get_span_context(),
295
+ key=InternalAttributeKeys.IS_CUSTOMER_CONTEXT_OWNER,
296
+ value=True,
297
+ )
298
+
299
+ def _maybe_clear_customer_context(self, span: Span) -> None:
300
+ if self.get_span_processor().get_internal_attribute(
301
+ span_context=span.get_span_context(),
302
+ key=InternalAttributeKeys.IS_CUSTOMER_CONTEXT_OWNER,
303
+ default=False,
304
+ ):
305
+ self.get_current_customer_context().set(None)
306
+
307
+ @dont_throw
308
+ def _add_agent_attributes_to_span(self, span):
309
+ """Add agent ID, class name, and instance name to span if they exist in context"""
310
+ current_agent_context = self.agent_context.get()
311
+ if not current_agent_context:
312
+ return
313
+
314
+ set_span_attribute(
315
+ span, AttributeKeys.JUDGMENT_AGENT_ID, current_agent_context["agent_id"]
316
+ )
317
+ set_span_attribute(
318
+ span,
319
+ AttributeKeys.JUDGMENT_AGENT_CLASS_NAME,
320
+ current_agent_context["class_name"],
321
+ )
322
+ set_span_attribute(
323
+ span,
324
+ AttributeKeys.JUDGMENT_AGENT_INSTANCE_NAME,
325
+ current_agent_context["instance_name"],
326
+ )
327
+ set_span_attribute(
328
+ span,
329
+ AttributeKeys.JUDGMENT_PARENT_AGENT_ID,
330
+ current_agent_context["parent_agent_id"],
331
+ )
332
+ set_span_attribute(
333
+ span,
334
+ AttributeKeys.JUDGMENT_IS_AGENT_ENTRY_POINT,
335
+ current_agent_context["is_agent_entry_point"],
336
+ )
337
+ current_agent_context["is_agent_entry_point"] = False
338
+
339
+ @dont_throw
340
+ def _record_instance_state(self, record_point: Literal["before", "after"], span):
341
+ current_agent_context = self.agent_context.get()
342
+
343
+ if current_agent_context and current_agent_context.get("track_state"):
344
+ instance = current_agent_context.get("instance")
345
+ track_attributes = current_agent_context.get("track_attributes")
346
+ field_mappings = current_agent_context.get("field_mappings", {})
347
+
348
+ if track_attributes is not None:
349
+ attributes = {
350
+ field_mappings.get(attr, attr): getattr(instance, attr, None)
351
+ for attr in track_attributes
352
+ }
353
+ else:
354
+ attributes = {
355
+ field_mappings.get(k, k): v
356
+ for k, v in instance.__dict__.items()
357
+ if not k.startswith("_")
358
+ }
359
+ set_span_attribute(
360
+ span,
361
+ (
362
+ AttributeKeys.JUDGMENT_STATE_BEFORE
363
+ if record_point == "before"
364
+ else AttributeKeys.JUDGMENT_STATE_AFTER
365
+ ),
366
+ safe_serialize(attributes),
367
+ )
368
+
369
+ @dont_throw
370
+ def _add_customer_id_to_span(self, span):
371
+ customer_id = self.get_current_customer_context().get()
372
+ if customer_id:
373
+ set_span_attribute(span, AttributeKeys.JUDGMENT_CUSTOMER_ID, customer_id)
374
+
375
+ @dont_throw
376
+ def _inject_judgment_context(self, span):
377
+ self._add_agent_attributes_to_span(span)
378
+ self._add_customer_id_to_span(span)
379
+
380
+ def _set_pending_trace_eval(
381
+ self,
382
+ span: Span,
383
+ scorer_config: TraceScorerConfig,
384
+ args: Tuple[Any, ...],
385
+ kwargs: Dict[str, Any],
386
+ ):
387
+ if not self.enable_evaluation:
388
+ return
389
+
390
+ scorer = scorer_config.scorer
391
+ run_condition = scorer_config.run_condition
392
+ sampling_rate = scorer_config.sampling_rate
393
+
394
+ if scorer is None:
395
+ judgeval_logger.error("Prompt Scorer was not found, skipping evaluation.")
396
+ return
397
+ if not isinstance(scorer, (TraceAPIScorerConfig)):
398
+ judgeval_logger.error(
399
+ "Scorer must be an instance of TraceAPIScorerConfig, got %s, skipping evaluation."
400
+ % type(scorer)
401
+ )
402
+ return
403
+
404
+ if run_condition is not None and not run_condition(*args, **kwargs):
405
+ return
406
+
407
+ if sampling_rate < 0 or sampling_rate > 1:
408
+ judgeval_logger.error(
409
+ "Sampling rate must be between 0 and 1, got %s, skipping evaluation."
410
+ % sampling_rate
411
+ )
412
+ return
413
+
414
+ percentage = random.uniform(0, 1)
415
+ if percentage > sampling_rate:
416
+ judgeval_logger.info(
417
+ "Sampling rate is %s, skipping evaluation." % sampling_rate
418
+ )
419
+ return
420
+
421
+ span_context = span.get_span_context()
422
+ if span_context == INVALID_SPAN_CONTEXT:
423
+ return
424
+ trace_id = format(span_context.trace_id, "032x")
425
+ span_id = format(span_context.span_id, "016x")
426
+ eval_run_name = f"async_trace_evaluate_{span_id}"
427
+
428
+ eval_run = TraceEvaluationRun(
429
+ project_name=self.project_name,
430
+ eval_name=eval_run_name,
431
+ scorers=[scorer],
432
+ trace_and_span_ids=[(trace_id, span_id)],
433
+ )
434
+ span.set_attribute(
435
+ AttributeKeys.PENDING_TRACE_EVAL,
436
+ safe_serialize(eval_run.model_dump(warnings=False)),
437
+ )
438
+
439
+ def _create_traced_sync_generator(
440
+ self,
441
+ generator: Generator,
442
+ main_span: Span,
443
+ disable_generator_yield_span: bool = False,
444
+ ):
445
+ """Create a traced synchronous generator that wraps each yield in a span."""
446
+ preserved_context = contextvars.copy_context()
447
+ return _ContextPreservedSyncGeneratorWrapper(
448
+ self,
449
+ generator,
450
+ preserved_context,
451
+ main_span,
452
+ None,
453
+ disable_generator_yield_span,
454
+ )
455
+
456
+ def _create_traced_async_generator(
457
+ self,
458
+ async_generator: AsyncGenerator,
459
+ main_span: Span,
460
+ disable_generator_yield_span: bool = False,
461
+ ):
462
+ """Create a traced asynchronous generator that wraps each yield in a span."""
463
+ preserved_context = contextvars.copy_context()
464
+ return _ContextPreservedAsyncGeneratorWrapper(
465
+ self,
466
+ async_generator,
467
+ preserved_context,
468
+ main_span,
469
+ None,
470
+ disable_generator_yield_span,
471
+ )
472
+
473
+ def _wrap_sync(
474
+ self,
475
+ f: Callable,
476
+ name: Optional[str],
477
+ attributes: Optional[Dict[str, Any]],
478
+ scorer_config: TraceScorerConfig | None = None,
479
+ disable_generator_yield_span: bool = False,
480
+ ):
481
+ @functools.wraps(f)
482
+ def wrapper(*args, **kwargs):
483
+ n = name or f.__qualname__
484
+ with sync_span_context(self, n, attributes) as span:
485
+ is_return_type_generator = False
486
+
487
+ self._inject_judgment_context(span)
488
+ self._record_instance_state("before", span)
489
+ try:
490
+ set_span_attribute(
491
+ span,
492
+ AttributeKeys.JUDGMENT_INPUT,
493
+ safe_serialize(format_inputs(f, args, kwargs)),
494
+ )
495
+
496
+ self.judgment_processor.emit_partial()
497
+
498
+ if scorer_config:
499
+ self._set_pending_trace_eval(span, scorer_config, args, kwargs)
500
+
501
+ result = f(*args, **kwargs)
502
+
503
+ if inspect.isgenerator(result):
504
+ is_return_type_generator = True
505
+ set_span_attribute(
506
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<generator>"
507
+ )
508
+ self._record_instance_state("after", span)
509
+ return self._create_traced_sync_generator(
510
+ result, span, disable_generator_yield_span
511
+ )
512
+ elif inspect.isasyncgen(result):
513
+ is_return_type_generator = True
514
+ set_span_attribute(
515
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<async_generator>"
516
+ )
517
+ self._record_instance_state("after", span)
518
+ return self._create_traced_async_generator(
519
+ result, span, disable_generator_yield_span
520
+ )
521
+ else:
522
+ set_span_attribute(
523
+ span, AttributeKeys.JUDGMENT_OUTPUT, safe_serialize(result)
524
+ )
525
+ self._record_instance_state("after", span)
526
+ self._maybe_clear_customer_context(span)
527
+ return result
528
+ except Exception as user_exc:
529
+ span.record_exception(user_exc)
530
+ span.set_status(Status(StatusCode.ERROR, str(user_exc)))
531
+ self._maybe_clear_customer_context(span)
532
+ raise
533
+ finally:
534
+ if not is_return_type_generator:
535
+ span.end()
536
+
537
+ return wrapper
538
+
539
+ def _wrap_async(
540
+ self,
541
+ f: Callable,
542
+ name: Optional[str],
543
+ attributes: Optional[Dict[str, Any]],
544
+ scorer_config: TraceScorerConfig | None = None,
545
+ disable_generator_yield_span: bool = False,
546
+ ):
547
+ @functools.wraps(f)
548
+ async def wrapper(*args, **kwargs):
549
+ n = name or f.__qualname__
550
+ async with async_span_context(self, n, attributes) as span:
551
+ is_return_type_generator = False
552
+ self._inject_judgment_context(span)
553
+ self._record_instance_state("before", span)
554
+ try:
555
+ set_span_attribute(
556
+ span,
557
+ AttributeKeys.JUDGMENT_INPUT,
558
+ safe_serialize(format_inputs(f, args, kwargs)),
559
+ )
560
+
561
+ self.judgment_processor.emit_partial()
562
+
563
+ if scorer_config:
564
+ self._set_pending_trace_eval(span, scorer_config, args, kwargs)
565
+
566
+ result = await f(*args, **kwargs)
567
+ if inspect.isasyncgen(result):
568
+ is_return_type_generator = True
569
+ set_span_attribute(
570
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<async_generator>"
571
+ )
572
+ self._record_instance_state("after", span)
573
+ return self._create_traced_async_generator(
574
+ result, span, disable_generator_yield_span
575
+ )
576
+ elif inspect.isgenerator(result):
577
+ is_return_type_generator = True
578
+ set_span_attribute(
579
+ span, AttributeKeys.JUDGMENT_OUTPUT, "<generator>"
580
+ )
581
+ self._record_instance_state("after", span)
582
+ return self._create_traced_sync_generator(
583
+ result, span, disable_generator_yield_span
584
+ )
585
+ else:
586
+ set_span_attribute(
587
+ span, AttributeKeys.JUDGMENT_OUTPUT, safe_serialize(result)
588
+ )
589
+ self._record_instance_state("after", span)
590
+ self._maybe_clear_customer_context(span)
591
+ return result
592
+ except Exception as user_exc:
593
+ span.record_exception(user_exc)
594
+ span.set_status(Status(StatusCode.ERROR, str(user_exc)))
595
+ self._maybe_clear_customer_context(span)
596
+ raise
597
+ finally:
598
+ if not is_return_type_generator:
599
+ span.end()
600
+
601
+ return wrapper
602
+
603
+ @overload
604
+ def observe(
605
+ self,
606
+ func: C,
607
+ /,
608
+ *,
609
+ span_type: str | None = None,
610
+ span_name: str | None = None,
611
+ attributes: Optional[Dict[str, Any]] = None,
612
+ scorer_config: TraceScorerConfig | None = None,
613
+ ) -> C: ...
614
+
615
+ @overload
616
+ def observe(
617
+ self,
618
+ func: None = None,
619
+ /,
620
+ *,
621
+ span_type: str | None = None,
622
+ span_name: str | None = None,
623
+ attributes: Optional[Dict[str, Any]] = None,
624
+ scorer_config: TraceScorerConfig | None = None,
625
+ ) -> Callable[[C], C]: ...
626
+
627
+ def observe(
628
+ self,
629
+ func: Callable | None = None,
630
+ /,
631
+ *,
632
+ span_type: str | None = "span",
633
+ span_name: str | None = None,
634
+ attributes: Optional[Dict[str, Any]] = None,
635
+ scorer_config: TraceScorerConfig | None = None,
636
+ disable_generator_yield_span: bool = False,
637
+ ) -> Callable | None:
638
+ if func is None:
639
+ return partial(
640
+ self.observe,
641
+ span_type=span_type,
642
+ span_name=span_name,
643
+ attributes=attributes,
644
+ scorer_config=scorer_config,
645
+ disable_generator_yield_span=disable_generator_yield_span,
646
+ )
647
+
648
+ if not self.enable_monitoring:
649
+ return func
650
+
651
+ # Handle functions (including generator functions) - detect generators at runtime
652
+ name = span_name or getattr(func, "__qualname__", "function")
653
+ func_attributes: Dict[str, Any] = {
654
+ AttributeKeys.JUDGMENT_SPAN_KIND: span_type,
655
+ **(attributes or {}),
656
+ }
657
+
658
+ if inspect.iscoroutinefunction(func):
659
+ return self._wrap_async(
660
+ func, name, func_attributes, scorer_config, disable_generator_yield_span
661
+ )
662
+ else:
663
+ return self._wrap_sync(
664
+ func, name, func_attributes, scorer_config, disable_generator_yield_span
665
+ )
666
+
667
+ @overload
668
+ def agent(
669
+ self,
670
+ func: C,
671
+ /,
672
+ *,
673
+ identifier: str | None = None,
674
+ track_state: bool = False,
675
+ track_attributes: List[str] | None = None,
676
+ field_mappings: Dict[str, str] = {},
677
+ ) -> C: ...
678
+
679
+ @overload
680
+ def agent(
681
+ self,
682
+ func: None = None,
683
+ /,
684
+ *,
685
+ identifier: str | None = None,
686
+ track_state: bool = False,
687
+ track_attributes: List[str] | None = None,
688
+ field_mappings: Dict[str, str] = {},
689
+ ) -> Callable[[C], C]: ...
690
+
691
+ def agent(
692
+ self,
693
+ func: Callable | None = None,
694
+ /,
695
+ *,
696
+ identifier: str | None = None,
697
+ track_state: bool = False,
698
+ track_attributes: List[str] | None = None,
699
+ field_mappings: Dict[str, str] = {},
700
+ ) -> Callable | None:
701
+ """
702
+ Agent decorator that creates an agent ID and propagates it to child spans.
703
+ Also captures and propagates the class name if the decorated function is a method.
704
+ Optionally captures instance name based on the specified identifier attribute.
705
+
706
+ This decorator should be used in combination with @observe decorator:
707
+
708
+ class MyAgent:
709
+ def __init__(self, name):
710
+ self.name = name
711
+
712
+ @judgment.agent(identifier="name")
713
+ @judgment.observe(span_type="function")
714
+ def my_agent_method(self):
715
+ # This span and all child spans will have:
716
+ # - agent_id: auto-generated UUID
717
+ # - class_name: "MyAgent"
718
+ # - instance_name: self.name value
719
+ pass
720
+
721
+ Args:
722
+ identifier: Name of the instance attribute to use as the instance name
723
+ """
724
+ if func is None:
725
+ return partial(
726
+ self.agent,
727
+ identifier=identifier,
728
+ track_state=track_state,
729
+ track_attributes=track_attributes,
730
+ field_mappings=field_mappings,
731
+ )
732
+
733
+ if not self.enable_monitoring:
734
+ return func
735
+
736
+ class_name = None
737
+ if hasattr(func, "__qualname__") and "." in func.__qualname__:
738
+ parts = func.__qualname__.split(".")
739
+ if len(parts) >= 2:
740
+ class_name = parts[-2]
741
+
742
+ if inspect.iscoroutinefunction(func):
743
+
744
+ @functools.wraps(func)
745
+ async def async_wrapper(*args, **kwargs):
746
+ async with async_agent_context(
747
+ tracer=self,
748
+ args=args,
749
+ class_name=class_name,
750
+ identifier=identifier,
751
+ track_state=track_state,
752
+ track_attributes=track_attributes,
753
+ field_mappings=field_mappings,
754
+ ):
755
+ return await func(*args, **kwargs)
756
+
757
+ return async_wrapper
758
+ else:
759
+
760
+ @functools.wraps(func)
761
+ def sync_wrapper(*args, **kwargs):
762
+ with sync_agent_context(
763
+ tracer=self,
764
+ args=args,
765
+ class_name=class_name,
766
+ identifier=identifier,
767
+ track_state=track_state,
768
+ track_attributes=track_attributes,
769
+ field_mappings=field_mappings,
770
+ ):
771
+ return func(*args, **kwargs)
772
+
773
+ return sync_wrapper
774
+
775
+ def wrap(self, client: ApiClient) -> ApiClient:
776
+ return wrap_provider(self, client)
777
+
778
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
779
+ """Force flush all pending spans and block until completion.
780
+
781
+ Args:
782
+ timeout_millis: Maximum time to wait for flush completion in milliseconds
783
+
784
+ Returns:
785
+ True if processor flushed successfully within timeout, False otherwise
786
+ """
787
+ try:
788
+ return self.judgment_processor.force_flush(timeout_millis)
789
+ except Exception as e:
790
+ judgeval_logger.warning(f"Error flushing processor: {e}")
791
+ return False
792
+
793
+ def _atexit_flush(self, timeout_millis: int = 30000) -> None:
794
+ """Internal method called on program exit to flush remaining spans.
795
+
796
+ This blocks until all spans are flushed or timeout is reached to ensure
797
+ proper cleanup before program termination.
798
+ """
799
+ try:
800
+ self.force_flush(timeout_millis=timeout_millis)
801
+ except Exception as e:
802
+ judgeval_logger.warning(f"Error during atexit flush: {e}")
803
+
804
+ @dont_throw
805
+ def async_evaluate(
806
+ self,
807
+ /,
808
+ *,
809
+ scorer: Union[ExampleAPIScorerConfig, ExampleScorer, None],
810
+ example: Example,
811
+ sampling_rate: float = 1.0,
812
+ ):
813
+ if not self.enable_evaluation or not self.enable_monitoring:
814
+ judgeval_logger.info("Evaluation is not enabled, skipping evaluation")
815
+ return
816
+
817
+ if scorer is None:
818
+ judgeval_logger.error("Prompt Scorer was not found, skipping evaluation.")
819
+ return
820
+
821
+ if not isinstance(scorer, (ExampleAPIScorerConfig, ExampleScorer)):
822
+ judgeval_logger.error(
823
+ "Scorer must be an instance of ExampleAPIScorerConfig or ExampleScorer, got %s, skipping evaluation."
824
+ % type(scorer)
825
+ )
826
+ return
827
+
828
+ if not isinstance(example, Example):
829
+ judgeval_logger.error(
830
+ "Example must be an instance of Example, got %s, skipping evaluation."
831
+ % type(example)
832
+ )
833
+ return
834
+
835
+ if sampling_rate < 0 or sampling_rate > 1:
836
+ judgeval_logger.error(
837
+ "Sampling rate must be between 0 and 1, got %s, skipping evaluation."
838
+ % sampling_rate
839
+ )
840
+ return
841
+
842
+ percentage = random.uniform(0, 1)
843
+ if percentage > sampling_rate:
844
+ judgeval_logger.info(
845
+ "Sampling rate is %s, skipping evaluation." % sampling_rate
846
+ )
847
+ return
848
+
849
+ span_context = self.get_current_span().get_span_context()
850
+ if span_context == INVALID_SPAN_CONTEXT:
851
+ judgeval_logger.warning(
852
+ "No span context was found for async_evaluate, skipping evaluation. Please make sure to use the @observe decorator on the function you are evaluating."
853
+ )
854
+ return
855
+
856
+ trace_id = format(span_context.trace_id, "032x")
857
+ span_id = format(span_context.span_id, "016x")
858
+ hosted_scoring = isinstance(scorer, ExampleAPIScorerConfig) or (
859
+ isinstance(scorer, ExampleScorer) and scorer.server_hosted
860
+ )
861
+ eval_run = ExampleEvaluationRun(
862
+ project_name=self.project_name,
863
+ # note this name doesnt matter because we don't save the experiment only the example and scorer_data
864
+ eval_name=f"async_evaluate_{span_id}",
865
+ examples=[example],
866
+ scorers=[scorer],
867
+ trace_span_id=span_id,
868
+ trace_id=trace_id,
869
+ )
870
+ if hosted_scoring:
871
+ self.api_client.add_to_run_eval_queue_examples(
872
+ eval_run.model_dump(warnings=False) # type: ignore
873
+ )
874
+ else:
875
+ judgeval_logger.warning(
876
+ "The scorer provided is not hosted, skipping evaluation."
877
+ )
878
+
879
+
880
+ def wrap(client: ApiClient) -> ApiClient:
881
+ try:
882
+ tracer = Tracer.get_instance()
883
+ if tracer is None or not isinstance(tracer, Tracer):
884
+ warn(
885
+ "No Tracer instance found, client will not be wrapped. "
886
+ "Create a Tracer instance first.",
887
+ JudgmentWarning,
888
+ stacklevel=2,
889
+ )
890
+ return client
891
+ if not tracer._initialized:
892
+ warn(
893
+ "Tracer not initialized, client will not be wrapped. "
894
+ "Call Tracer.initialize() first to setup the tracer.",
895
+ JudgmentWarning,
896
+ stacklevel=2,
897
+ )
898
+ return client
899
+ return tracer.wrap(client)
900
+ except Exception:
901
+ warn(
902
+ "Error accessing tracer singleton, client will not be wrapped.",
903
+ JudgmentWarning,
904
+ stacklevel=2,
905
+ )
906
+ return client
907
+
908
+
909
+ def format_inputs(
910
+ f: Callable, args: Tuple[Any, ...], kwargs: Dict[str, Any]
911
+ ) -> Dict[str, Any]:
912
+ try:
913
+ params = list(inspect.signature(f).parameters.values())
914
+ inputs = {}
915
+ arg_i = 0
916
+ for param in params:
917
+ if param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD:
918
+ if arg_i < len(args):
919
+ inputs[param.name] = args[arg_i]
920
+ arg_i += 1
921
+ elif param.name in kwargs:
922
+ inputs[param.name] = kwargs[param.name]
923
+ elif param.kind == inspect.Parameter.VAR_POSITIONAL:
924
+ inputs[param.name] = args[arg_i:]
925
+ arg_i = len(args)
926
+ elif param.kind == inspect.Parameter.VAR_KEYWORD:
927
+ inputs[param.name] = kwargs
928
+ return inputs
929
+ except Exception:
930
+ return {}
931
+
932
+
933
+ class _ContextPreservedSyncGeneratorWrapper:
934
+ """Sync generator wrapper that ensures each iteration runs in preserved context."""
935
+
936
+ def __init__(
937
+ self,
938
+ tracer: Tracer,
939
+ generator: Generator,
940
+ context: contextvars.Context,
941
+ span: Span,
942
+ transform_fn: Optional[Callable[[Iterable], str]],
943
+ disable_generator_yield_span: bool = False,
944
+ ) -> None:
945
+ self.tracer = tracer
946
+ self.generator = generator
947
+ self.context = context
948
+ self.span = span
949
+ self.transform_fn = transform_fn
950
+ self._finished = False
951
+ self.disable_generator_yield_span = disable_generator_yield_span
952
+
953
+ def __iter__(self) -> "_ContextPreservedSyncGeneratorWrapper":
954
+ return self
955
+
956
+ def __next__(self) -> Any:
957
+ try:
958
+ # Run the generator's __next__ in the preserved context
959
+ item = self.context.run(next, self.generator)
960
+
961
+ if not self.disable_generator_yield_span:
962
+ with use_span(self.span):
963
+ span_name = (
964
+ str(self.span.name)
965
+ if hasattr(self.span, "name")
966
+ else "generator_item"
967
+ ) # type: ignore[attr-defined]
968
+ with self.tracer.get_tracer().start_as_current_span(
969
+ span_name,
970
+ attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "generator_item"},
971
+ end_on_exit=True,
972
+ ) as child_span:
973
+ set_span_attribute(
974
+ child_span,
975
+ AttributeKeys.JUDGMENT_OUTPUT,
976
+ safe_serialize(item),
977
+ )
978
+
979
+ return item
980
+
981
+ except StopIteration:
982
+ # Handle output and span cleanup when generator is exhausted
983
+ if not self._finished:
984
+ set_span_attribute(
985
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
986
+ )
987
+ self.span.end()
988
+ self._finished = True
989
+
990
+ raise # Re-raise StopIteration
991
+
992
+ except Exception as e:
993
+ if not self._finished:
994
+ self.span.record_exception(e)
995
+ self.span.set_status(
996
+ Status(StatusCode.ERROR, str(e) or type(e).__name__)
997
+ )
998
+ self.tracer._maybe_clear_customer_context(self.span)
999
+ self.span.end()
1000
+ self._finished = True
1001
+
1002
+ raise
1003
+
1004
+ def close(self) -> None:
1005
+ """Close the generator (minimal implementation)."""
1006
+ try:
1007
+ self.generator.close()
1008
+ finally:
1009
+ if not self._finished:
1010
+ set_span_attribute(
1011
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
1012
+ )
1013
+ self.tracer._maybe_clear_customer_context(self.span)
1014
+ self.span.end()
1015
+ self._finished = True
1016
+
1017
+
1018
+ class _ContextPreservedAsyncGeneratorWrapper:
1019
+ """Async generator wrapper that ensures each iteration runs in preserved context."""
1020
+
1021
+ def __init__(
1022
+ self,
1023
+ tracer: Tracer,
1024
+ generator: AsyncGenerator,
1025
+ context: contextvars.Context,
1026
+ span: Span,
1027
+ transform_fn: Optional[Callable[[Iterable], str]],
1028
+ disable_generator_yield_span: bool = False,
1029
+ ) -> None:
1030
+ self.tracer = tracer
1031
+ self.generator = generator
1032
+ self.context = context
1033
+ self.span = span
1034
+ self.transform_fn = transform_fn
1035
+ self._finished = False
1036
+ self.disable_generator_yield_span = disable_generator_yield_span
1037
+
1038
+ def __aiter__(self) -> "_ContextPreservedAsyncGeneratorWrapper":
1039
+ return self
1040
+
1041
+ async def __anext__(self) -> Any:
1042
+ try:
1043
+ # Run the generator's __anext__ in the preserved context
1044
+ try:
1045
+ # Python 3.10+ approach with context parameter
1046
+ item = await asyncio.create_task(
1047
+ self.generator.__anext__(), # type: ignore
1048
+ context=self.context,
1049
+ ) # type: ignore
1050
+ except TypeError:
1051
+ # Python < 3.10 fallback - context parameter not supported
1052
+ item = await self.generator.__anext__()
1053
+
1054
+ if not self.disable_generator_yield_span:
1055
+ with use_span(self.span):
1056
+ span_name = (
1057
+ str(self.span.name)
1058
+ if hasattr(self.span, "name")
1059
+ else "generator_item"
1060
+ ) # type: ignore[attr-defined]
1061
+ with self.tracer.get_tracer().start_as_current_span(
1062
+ span_name,
1063
+ attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "generator_item"},
1064
+ end_on_exit=True,
1065
+ ) as child_span:
1066
+ set_span_attribute(
1067
+ child_span,
1068
+ AttributeKeys.JUDGMENT_OUTPUT,
1069
+ safe_serialize(item),
1070
+ )
1071
+
1072
+ return item
1073
+
1074
+ except StopAsyncIteration:
1075
+ # Handle output and span cleanup when generator is exhausted
1076
+ if not self._finished:
1077
+ set_span_attribute(
1078
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
1079
+ )
1080
+ self.span.end()
1081
+ self._finished = True
1082
+ raise # Re-raise StopAsyncIteration
1083
+ except Exception as e:
1084
+ if not self._finished:
1085
+ self.span.record_exception(e)
1086
+ self.span.set_status(
1087
+ Status(StatusCode.ERROR, str(e) or type(e).__name__)
1088
+ )
1089
+ self.tracer._maybe_clear_customer_context(self.span)
1090
+ self.span.end()
1091
+ self._finished = True
1092
+
1093
+ raise
1094
+
1095
+ async def aclose(self) -> None:
1096
+ """Close the async generator (minimal implementation)."""
1097
+ try:
1098
+ await self.generator.aclose()
1099
+ finally:
1100
+ if not self._finished:
1101
+ set_span_attribute(
1102
+ self.span, AttributeKeys.JUDGMENT_SPAN_KIND, "generator"
1103
+ )
1104
+ self.tracer._maybe_clear_customer_context(self.span)
1105
+ self.span.end()
1106
+ self._finished = True
1107
+
1108
+
1109
+ __all__ = [
1110
+ "Tracer",
1111
+ "wrap",
1112
+ ]