judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show
  1. judgeval/__init__.py +177 -12
  2. judgeval/api/__init__.py +519 -0
  3. judgeval/api/api_types.py +407 -0
  4. judgeval/cli.py +79 -0
  5. judgeval/constants.py +76 -47
  6. judgeval/data/__init__.py +3 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +15 -56
  9. judgeval/data/judgment_types.py +450 -0
  10. judgeval/data/result.py +29 -73
  11. judgeval/data/scorer_data.py +29 -62
  12. judgeval/data/scripts/fix_default_factory.py +23 -0
  13. judgeval/data/scripts/openapi_transform.py +123 -0
  14. judgeval/data/trace.py +121 -0
  15. judgeval/dataset/__init__.py +264 -0
  16. judgeval/env.py +52 -0
  17. judgeval/evaluation/__init__.py +344 -0
  18. judgeval/exceptions.py +27 -0
  19. judgeval/integrations/langgraph/__init__.py +13 -0
  20. judgeval/integrations/openlit/__init__.py +50 -0
  21. judgeval/judges/__init__.py +2 -3
  22. judgeval/judges/base_judge.py +2 -3
  23. judgeval/judges/litellm_judge.py +100 -20
  24. judgeval/judges/together_judge.py +101 -20
  25. judgeval/judges/utils.py +20 -24
  26. judgeval/logger.py +62 -0
  27. judgeval/prompt/__init__.py +330 -0
  28. judgeval/scorers/__init__.py +18 -25
  29. judgeval/scorers/agent_scorer.py +17 -0
  30. judgeval/scorers/api_scorer.py +45 -41
  31. judgeval/scorers/base_scorer.py +83 -38
  32. judgeval/scorers/example_scorer.py +17 -0
  33. judgeval/scorers/exceptions.py +1 -0
  34. judgeval/scorers/judgeval_scorers/__init__.py +0 -148
  35. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
  36. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
  37. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
  38. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
  40. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
  41. judgeval/scorers/score.py +77 -306
  42. judgeval/scorers/utils.py +4 -199
  43. judgeval/tracer/__init__.py +1122 -2
  44. judgeval/tracer/constants.py +1 -0
  45. judgeval/tracer/exporters/__init__.py +40 -0
  46. judgeval/tracer/exporters/s3.py +119 -0
  47. judgeval/tracer/exporters/store.py +59 -0
  48. judgeval/tracer/exporters/utils.py +32 -0
  49. judgeval/tracer/keys.py +63 -0
  50. judgeval/tracer/llm/__init__.py +7 -0
  51. judgeval/tracer/llm/config.py +78 -0
  52. judgeval/tracer/llm/constants.py +9 -0
  53. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  54. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  55. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  56. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  57. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  58. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  59. judgeval/tracer/llm/llm_google/config.py +6 -0
  60. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  61. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  62. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  63. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  64. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  65. judgeval/tracer/llm/llm_openai/config.py +6 -0
  66. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  67. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  68. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  69. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  70. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  71. judgeval/tracer/llm/llm_together/config.py +6 -0
  72. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  73. judgeval/tracer/llm/providers.py +19 -0
  74. judgeval/tracer/managers.py +167 -0
  75. judgeval/tracer/processors/__init__.py +220 -0
  76. judgeval/tracer/utils.py +19 -0
  77. judgeval/trainer/__init__.py +14 -0
  78. judgeval/trainer/base_trainer.py +122 -0
  79. judgeval/trainer/config.py +128 -0
  80. judgeval/trainer/console.py +144 -0
  81. judgeval/trainer/fireworks_trainer.py +396 -0
  82. judgeval/trainer/trainable_model.py +243 -0
  83. judgeval/trainer/trainer.py +70 -0
  84. judgeval/utils/async_utils.py +39 -0
  85. judgeval/utils/decorators/__init__.py +0 -0
  86. judgeval/utils/decorators/dont_throw.py +37 -0
  87. judgeval/utils/decorators/use_once.py +13 -0
  88. judgeval/utils/file_utils.py +97 -0
  89. judgeval/utils/guards.py +36 -0
  90. judgeval/utils/meta.py +27 -0
  91. judgeval/utils/project.py +15 -0
  92. judgeval/utils/serialize.py +253 -0
  93. judgeval/utils/testing.py +70 -0
  94. judgeval/utils/url.py +10 -0
  95. judgeval/utils/version_check.py +28 -0
  96. judgeval/utils/wrappers/README.md +3 -0
  97. judgeval/utils/wrappers/__init__.py +15 -0
  98. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  99. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  100. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  101. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  102. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  103. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  104. judgeval/utils/wrappers/py.typed +0 -0
  105. judgeval/utils/wrappers/utils.py +35 -0
  106. judgeval/version.py +5 -0
  107. judgeval/warnings.py +4 -0
  108. judgeval-0.22.2.dist-info/METADATA +265 -0
  109. judgeval-0.22.2.dist-info/RECORD +112 -0
  110. judgeval-0.22.2.dist-info/entry_points.txt +2 -0
  111. judgeval/clients.py +0 -39
  112. judgeval/common/__init__.py +0 -8
  113. judgeval/common/exceptions.py +0 -28
  114. judgeval/common/logger.py +0 -189
  115. judgeval/common/tracer.py +0 -798
  116. judgeval/common/utils.py +0 -763
  117. judgeval/data/api_example.py +0 -111
  118. judgeval/data/datasets/__init__.py +0 -5
  119. judgeval/data/datasets/dataset.py +0 -286
  120. judgeval/data/datasets/eval_dataset_client.py +0 -193
  121. judgeval/data/datasets/ground_truth.py +0 -54
  122. judgeval/data/datasets/utils.py +0 -74
  123. judgeval/evaluation_run.py +0 -132
  124. judgeval/judges/mixture_of_judges.py +0 -248
  125. judgeval/judgment_client.py +0 -354
  126. judgeval/run_evaluation.py +0 -439
  127. judgeval/scorers/judgeval_scorer.py +0 -140
  128. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
  129. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
  130. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
  131. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
  132. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
  133. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
  134. judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
  135. judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
  136. judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
  137. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
  138. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
  139. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  140. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
  141. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  142. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  143. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  144. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  145. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  146. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  147. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  148. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  149. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  150. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  151. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  152. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  153. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  154. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  155. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
  156. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  157. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  158. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
  159. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  160. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  161. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  162. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  163. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  164. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
  165. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
  166. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
  167. judgeval/scorers/prompt_scorer.py +0 -439
  168. judgeval-0.0.11.dist-info/METADATA +0 -36
  169. judgeval-0.0.11.dist-info/RECORD +0 -84
  170. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
  171. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,501 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ ParamSpec,
13
+ TypeVar,
14
+ )
15
+ from packaging import version
16
+
17
+ from judgeval.tracer.keys import AttributeKeys
18
+ from judgeval.tracer.utils import set_span_attribute
19
+ from judgeval.utils.serialize import safe_serialize
20
+ from judgeval.utils.wrappers import (
21
+ immutable_wrap_async,
22
+ immutable_wrap_sync,
23
+ mutable_wrap_sync,
24
+ mutable_wrap_async,
25
+ immutable_wrap_sync_iterator,
26
+ immutable_wrap_async_iterator,
27
+ )
28
+ from judgeval.tracer.llm.llm_openai.utils import (
29
+ openai_tokens_converter,
30
+ set_cost_attribute,
31
+ )
32
+
33
+ if TYPE_CHECKING:
34
+ from judgeval.tracer import Tracer
35
+ from openai import OpenAI, AsyncOpenAI
36
+ from openai.types.chat import ChatCompletion, ChatCompletionChunk
37
+
38
+ P = ParamSpec("P")
39
+ T = TypeVar("T")
40
+
41
+
42
+ def _supports_stream_options() -> bool:
43
+ try:
44
+ import openai
45
+
46
+ return version.parse(openai.__version__) >= version.parse("1.26.0")
47
+ except Exception:
48
+ return False
49
+
50
+
51
+ def wrap_chat_completions_create_sync(tracer: Tracer, client: OpenAI) -> None:
52
+ original_func = client.chat.completions.create
53
+
54
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
55
+ if kwargs.get("stream", False):
56
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs)
57
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs)
58
+
59
+ setattr(client.chat.completions, "create", dispatcher)
60
+
61
+
62
+ def _wrap_non_streaming_sync(
63
+ tracer: Tracer, original_func: Callable[..., ChatCompletion]
64
+ ) -> Callable[..., ChatCompletion]:
65
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
66
+ ctx["span"] = tracer.get_tracer().start_span(
67
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
68
+ )
69
+ tracer._inject_judgment_context(ctx["span"])
70
+ set_span_attribute(
71
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
72
+ )
73
+ ctx["model_name"] = kwargs.get("model", "")
74
+ set_span_attribute(
75
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
76
+ )
77
+
78
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
79
+ span = ctx.get("span")
80
+ if not span:
81
+ return
82
+
83
+ set_span_attribute(
84
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
85
+ )
86
+
87
+ usage_data = result.usage
88
+ if usage_data:
89
+ prompt_tokens = usage_data.prompt_tokens or 0
90
+ completion_tokens = usage_data.completion_tokens or 0
91
+ cache_read = 0
92
+ prompt_tokens_details = usage_data.prompt_tokens_details
93
+ if prompt_tokens_details:
94
+ cache_read = prompt_tokens_details.cached_tokens or 0
95
+
96
+ set_cost_attribute(span, usage_data)
97
+
98
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
99
+ openai_tokens_converter(
100
+ prompt_tokens,
101
+ completion_tokens,
102
+ cache_read,
103
+ 0,
104
+ usage_data.total_tokens,
105
+ )
106
+ )
107
+
108
+ set_span_attribute(
109
+ span,
110
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
111
+ prompt_tokens,
112
+ )
113
+ set_span_attribute(
114
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
115
+ )
116
+ set_span_attribute(
117
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
118
+ )
119
+ set_span_attribute(
120
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
121
+ )
122
+ set_span_attribute(
123
+ span,
124
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
125
+ safe_serialize(usage_data),
126
+ )
127
+
128
+ set_span_attribute(
129
+ span,
130
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
131
+ result.model or ctx["model_name"],
132
+ )
133
+
134
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
135
+ span = ctx.get("span")
136
+ if span:
137
+ span.record_exception(error)
138
+
139
+ def finally_hook(ctx: Dict[str, Any]) -> None:
140
+ span = ctx.get("span")
141
+ if span:
142
+ span.end()
143
+
144
+ return immutable_wrap_sync(
145
+ original_func,
146
+ pre_hook=pre_hook,
147
+ post_hook=post_hook,
148
+ error_hook=error_hook,
149
+ finally_hook=finally_hook,
150
+ )
151
+
152
+
153
+ def _wrap_streaming_sync(
154
+ tracer: Tracer, original_func: Callable[..., Iterator[ChatCompletionChunk]]
155
+ ) -> Callable[..., Iterator[ChatCompletionChunk]]:
156
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
157
+ ctx["span"] = tracer.get_tracer().start_span(
158
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
159
+ )
160
+ tracer._inject_judgment_context(ctx["span"])
161
+ set_span_attribute(
162
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
163
+ )
164
+ ctx["model_name"] = kwargs.get("model", "")
165
+ set_span_attribute(
166
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
167
+ )
168
+ ctx["accumulated_content"] = ""
169
+
170
+ def mutate_kwargs_hook(ctx: Dict[str, Any], kwargs: Any) -> Any:
171
+ if "stream_options" not in kwargs and _supports_stream_options():
172
+ modified_kwargs = dict(kwargs)
173
+ modified_kwargs["stream_options"] = {"include_usage": True}
174
+ return modified_kwargs
175
+ return kwargs
176
+
177
+ def mutate_hook(
178
+ ctx: Dict[str, Any], result: Iterator[ChatCompletionChunk]
179
+ ) -> Iterator[ChatCompletionChunk]:
180
+ def traced_generator() -> Generator[ChatCompletionChunk, None, None]:
181
+ for chunk in result:
182
+ yield chunk
183
+
184
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
185
+ span = ctx.get("span")
186
+ if not span:
187
+ return
188
+
189
+ if chunk.choices and len(chunk.choices) > 0:
190
+ delta = chunk.choices[0].delta
191
+ if delta and delta.content:
192
+ ctx["accumulated_content"] = (
193
+ ctx.get("accumulated_content", "") + delta.content
194
+ )
195
+
196
+ if hasattr(chunk, "usage") and chunk.usage:
197
+ prompt_tokens = chunk.usage.prompt_tokens or 0
198
+ completion_tokens = chunk.usage.completion_tokens or 0
199
+ cache_read = 0
200
+ if chunk.usage.prompt_tokens_details:
201
+ cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
202
+
203
+ set_cost_attribute(span, chunk.usage)
204
+
205
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
206
+ openai_tokens_converter(
207
+ prompt_tokens,
208
+ completion_tokens,
209
+ cache_read,
210
+ 0,
211
+ chunk.usage.total_tokens,
212
+ )
213
+ )
214
+
215
+ set_span_attribute(
216
+ span,
217
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
218
+ prompt_tokens,
219
+ )
220
+ set_span_attribute(
221
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
222
+ )
223
+ set_span_attribute(
224
+ span,
225
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
226
+ cache_read,
227
+ )
228
+ set_span_attribute(
229
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
230
+ )
231
+ set_span_attribute(
232
+ span,
233
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
234
+ safe_serialize(chunk.usage),
235
+ )
236
+
237
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
238
+ span = ctx.get("span")
239
+ if span:
240
+ accumulated = ctx.get("accumulated_content", "")
241
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
242
+
243
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
244
+ span = ctx.get("span")
245
+ if span:
246
+ span.record_exception(error)
247
+
248
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
249
+ span = ctx.get("span")
250
+ if span:
251
+ span.end()
252
+
253
+ wrapped_generator = immutable_wrap_sync_iterator(
254
+ traced_generator,
255
+ yield_hook=yield_hook,
256
+ post_hook=post_hook_inner,
257
+ error_hook=error_hook_inner,
258
+ finally_hook=finally_hook_inner,
259
+ )
260
+
261
+ return wrapped_generator()
262
+
263
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
264
+ span = ctx.get("span")
265
+ if span:
266
+ span.record_exception(error)
267
+
268
+ return mutable_wrap_sync(
269
+ original_func,
270
+ pre_hook=pre_hook,
271
+ mutate_kwargs_hook=mutate_kwargs_hook,
272
+ mutate_hook=mutate_hook,
273
+ error_hook=error_hook,
274
+ )
275
+
276
+
277
+ def wrap_chat_completions_create_async(tracer: Tracer, client: AsyncOpenAI) -> None:
278
+ original_func = client.chat.completions.create
279
+
280
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
281
+ if kwargs.get("stream", False):
282
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs)
283
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs)
284
+
285
+ setattr(client.chat.completions, "create", dispatcher)
286
+
287
+
288
+ def _wrap_non_streaming_async(
289
+ tracer: Tracer, original_func: Callable[..., Awaitable[ChatCompletion]]
290
+ ) -> Callable[..., Awaitable[ChatCompletion]]:
291
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
292
+ ctx["span"] = tracer.get_tracer().start_span(
293
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
294
+ )
295
+ tracer._inject_judgment_context(ctx["span"])
296
+ set_span_attribute(
297
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
298
+ )
299
+ ctx["model_name"] = kwargs.get("model", "")
300
+ set_span_attribute(
301
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
302
+ )
303
+
304
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
305
+ span = ctx.get("span")
306
+ if not span:
307
+ return
308
+
309
+ set_span_attribute(
310
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
311
+ )
312
+
313
+ usage_data = result.usage
314
+ if usage_data:
315
+ prompt_tokens = usage_data.prompt_tokens or 0
316
+ completion_tokens = usage_data.completion_tokens or 0
317
+ cache_read = 0
318
+ prompt_tokens_details = usage_data.prompt_tokens_details
319
+ if prompt_tokens_details:
320
+ cache_read = prompt_tokens_details.cached_tokens or 0
321
+
322
+ set_cost_attribute(span, usage_data)
323
+
324
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
325
+ openai_tokens_converter(
326
+ prompt_tokens,
327
+ completion_tokens,
328
+ cache_read,
329
+ 0,
330
+ usage_data.total_tokens,
331
+ )
332
+ )
333
+
334
+ set_span_attribute(
335
+ span,
336
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
337
+ prompt_tokens,
338
+ )
339
+ set_span_attribute(
340
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
341
+ )
342
+ set_span_attribute(
343
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
344
+ )
345
+ set_span_attribute(
346
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
347
+ )
348
+ set_span_attribute(
349
+ span,
350
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
351
+ safe_serialize(usage_data),
352
+ )
353
+
354
+ set_span_attribute(
355
+ span,
356
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
357
+ result.model or ctx["model_name"],
358
+ )
359
+
360
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
361
+ span = ctx.get("span")
362
+ if span:
363
+ span.record_exception(error)
364
+
365
+ def finally_hook(ctx: Dict[str, Any]) -> None:
366
+ span = ctx.get("span")
367
+ if span:
368
+ span.end()
369
+
370
+ return immutable_wrap_async(
371
+ original_func,
372
+ pre_hook=pre_hook,
373
+ post_hook=post_hook,
374
+ error_hook=error_hook,
375
+ finally_hook=finally_hook,
376
+ )
377
+
378
+
379
+ def _wrap_streaming_async(
380
+ tracer: Tracer,
381
+ original_func: Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]],
382
+ ) -> Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]]:
383
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
384
+ ctx["span"] = tracer.get_tracer().start_span(
385
+ "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
386
+ )
387
+ tracer._inject_judgment_context(ctx["span"])
388
+ set_span_attribute(
389
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
390
+ )
391
+ ctx["model_name"] = kwargs.get("model", "")
392
+ set_span_attribute(
393
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
394
+ )
395
+ ctx["accumulated_content"] = ""
396
+
397
+ def mutate_kwargs_hook(ctx: Dict[str, Any], kwargs: Any) -> Any:
398
+ if "stream_options" not in kwargs and _supports_stream_options():
399
+ modified_kwargs = dict(kwargs)
400
+ modified_kwargs["stream_options"] = {"include_usage": True}
401
+ return modified_kwargs
402
+ return kwargs
403
+
404
+ def mutate_hook(
405
+ ctx: Dict[str, Any], result: AsyncIterator[ChatCompletionChunk]
406
+ ) -> AsyncIterator[ChatCompletionChunk]:
407
+ async def traced_generator() -> AsyncGenerator[ChatCompletionChunk, None]:
408
+ async for chunk in result:
409
+ yield chunk
410
+
411
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
412
+ span = ctx.get("span")
413
+ if not span:
414
+ return
415
+
416
+ if chunk.choices and len(chunk.choices) > 0:
417
+ delta = chunk.choices[0].delta
418
+ if delta and delta.content:
419
+ ctx["accumulated_content"] = (
420
+ ctx.get("accumulated_content", "") + delta.content
421
+ )
422
+
423
+ if hasattr(chunk, "usage") and chunk.usage:
424
+ prompt_tokens = chunk.usage.prompt_tokens or 0
425
+ completion_tokens = chunk.usage.completion_tokens or 0
426
+ cache_read = 0
427
+ if chunk.usage.prompt_tokens_details:
428
+ cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
429
+
430
+ set_cost_attribute(span, chunk.usage)
431
+
432
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
433
+ openai_tokens_converter(
434
+ prompt_tokens,
435
+ completion_tokens,
436
+ cache_read,
437
+ 0,
438
+ chunk.usage.total_tokens,
439
+ )
440
+ )
441
+
442
+ set_span_attribute(
443
+ span,
444
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
445
+ prompt_tokens,
446
+ )
447
+ set_span_attribute(
448
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
449
+ )
450
+ set_span_attribute(
451
+ span,
452
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
453
+ cache_read,
454
+ )
455
+ set_span_attribute(
456
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
457
+ )
458
+ set_span_attribute(
459
+ span,
460
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
461
+ safe_serialize(chunk.usage),
462
+ )
463
+
464
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
465
+ span = ctx.get("span")
466
+ if span:
467
+ accumulated = ctx.get("accumulated_content", "")
468
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
469
+
470
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
471
+ span = ctx.get("span")
472
+ if span:
473
+ span.record_exception(error)
474
+
475
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
476
+ span = ctx.get("span")
477
+ if span:
478
+ span.end()
479
+
480
+ wrapped_generator = immutable_wrap_async_iterator(
481
+ traced_generator,
482
+ yield_hook=yield_hook,
483
+ post_hook=post_hook_inner,
484
+ error_hook=error_hook_inner,
485
+ finally_hook=finally_hook_inner,
486
+ )
487
+
488
+ return wrapped_generator()
489
+
490
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
491
+ span = ctx.get("span")
492
+ if span:
493
+ span.record_exception(error)
494
+
495
+ return mutable_wrap_async(
496
+ original_func,
497
+ pre_hook=pre_hook,
498
+ mutate_kwargs_hook=mutate_kwargs_hook,
499
+ mutate_hook=mutate_hook,
500
+ error_hook=error_hook,
501
+ )
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ import importlib.util
3
+
4
+ HAS_OPENAI = importlib.util.find_spec("openai") is not None
5
+
6
+ __all__ = ["HAS_OPENAI"]