judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,452 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ Tuple,
13
+ )
14
+
15
+ from judgeval.tracer.keys import AttributeKeys
16
+ from judgeval.tracer.utils import set_span_attribute
17
+ from judgeval.utils.serialize import safe_serialize
18
+ from judgeval.utils.wrappers import (
19
+ immutable_wrap_sync,
20
+ immutable_wrap_async,
21
+ mutable_wrap_sync,
22
+ mutable_wrap_async,
23
+ immutable_wrap_sync_iterator,
24
+ immutable_wrap_async_iterator,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ from judgeval.tracer import Tracer
29
+ from anthropic import Anthropic, AsyncAnthropic
30
+ from anthropic.types import (
31
+ Message,
32
+ Usage,
33
+ MessageDeltaUsage,
34
+ RawMessageStreamEvent,
35
+ )
36
+
37
+
38
+ def _extract_anthropic_content(chunk: RawMessageStreamEvent) -> str:
39
+ if chunk.type == "content_block_delta":
40
+ delta = chunk.delta
41
+ if delta.type == "text_delta" and delta.text:
42
+ return delta.text
43
+ return ""
44
+
45
+
46
+ def _extract_anthropic_tokens(
47
+ usage: Usage | MessageDeltaUsage,
48
+ ) -> Tuple[int, int, int, int]:
49
+ input_tokens = usage.input_tokens if usage.input_tokens is not None else 0
50
+ output_tokens = usage.output_tokens if usage.output_tokens is not None else 0
51
+ cache_read = (
52
+ usage.cache_read_input_tokens
53
+ if usage.cache_read_input_tokens is not None
54
+ else 0
55
+ )
56
+ cache_creation = (
57
+ usage.cache_creation_input_tokens
58
+ if usage.cache_creation_input_tokens is not None
59
+ else 0
60
+ )
61
+ return (input_tokens, output_tokens, cache_read, cache_creation)
62
+
63
+
64
+ def _extract_anthropic_chunk_usage(
65
+ chunk: RawMessageStreamEvent,
66
+ ) -> Usage | MessageDeltaUsage | None:
67
+ if chunk.type == "message_start":
68
+ return chunk.message.usage if chunk.message else None
69
+ elif chunk.type == "message_delta":
70
+ return chunk.usage if hasattr(chunk, "usage") else None
71
+ return None
72
+
73
+
74
+ def wrap_messages_create_sync(tracer: Tracer, client: Anthropic) -> None:
75
+ original_func = client.messages.create
76
+
77
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
78
+ if kwargs.get("stream", False):
79
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs)
80
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs)
81
+
82
+ setattr(client.messages, "create", dispatcher)
83
+
84
+
85
+ def _wrap_non_streaming_sync(
86
+ tracer: Tracer, original_func: Callable[..., Message]
87
+ ) -> Callable[..., Message]:
88
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
89
+ ctx["span"] = tracer.get_tracer().start_span(
90
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
91
+ )
92
+ tracer._inject_judgment_context(ctx["span"])
93
+ set_span_attribute(
94
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
95
+ )
96
+ ctx["model_name"] = kwargs.get("model", "")
97
+ set_span_attribute(
98
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
99
+ )
100
+
101
+ def post_hook(ctx: Dict[str, Any], result: Message) -> None:
102
+ span = ctx.get("span")
103
+ if not span:
104
+ return
105
+
106
+ set_span_attribute(
107
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
108
+ )
109
+
110
+ if result.usage:
111
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
112
+ _extract_anthropic_tokens(result.usage)
113
+ )
114
+ set_span_attribute(
115
+ span,
116
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
117
+ prompt_tokens,
118
+ )
119
+ set_span_attribute(
120
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
121
+ )
122
+ set_span_attribute(
123
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
124
+ )
125
+ set_span_attribute(
126
+ span,
127
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
128
+ cache_creation,
129
+ )
130
+ set_span_attribute(
131
+ span,
132
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
133
+ safe_serialize(result.usage),
134
+ )
135
+
136
+ set_span_attribute(
137
+ span,
138
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
139
+ result.model,
140
+ )
141
+
142
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
143
+ span = ctx.get("span")
144
+ if span:
145
+ span.record_exception(error)
146
+
147
+ def finally_hook(ctx: Dict[str, Any]) -> None:
148
+ span = ctx.get("span")
149
+ if span:
150
+ span.end()
151
+
152
+ return immutable_wrap_sync(
153
+ original_func,
154
+ pre_hook=pre_hook,
155
+ post_hook=post_hook,
156
+ error_hook=error_hook,
157
+ finally_hook=finally_hook,
158
+ )
159
+
160
+
161
+ def _wrap_streaming_sync(
162
+ tracer: Tracer, original_func: Callable[..., Iterator[RawMessageStreamEvent]]
163
+ ) -> Callable[..., Iterator[RawMessageStreamEvent]]:
164
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
165
+ ctx["span"] = tracer.get_tracer().start_span(
166
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
167
+ )
168
+ tracer._inject_judgment_context(ctx["span"])
169
+ set_span_attribute(
170
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
171
+ )
172
+ ctx["model_name"] = kwargs.get("model", "")
173
+ set_span_attribute(
174
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
175
+ )
176
+ ctx["accumulated_content"] = ""
177
+
178
+ def mutate_hook(
179
+ ctx: Dict[str, Any], result: Iterator[RawMessageStreamEvent]
180
+ ) -> Iterator[RawMessageStreamEvent]:
181
+ def traced_generator() -> Generator[RawMessageStreamEvent, None, None]:
182
+ for chunk in result:
183
+ yield chunk
184
+
185
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: RawMessageStreamEvent) -> None:
186
+ span = ctx.get("span")
187
+ if not span:
188
+ return
189
+
190
+ content = _extract_anthropic_content(chunk)
191
+ if content:
192
+ ctx["accumulated_content"] = (
193
+ ctx.get("accumulated_content", "") + content
194
+ )
195
+
196
+ usage_data = _extract_anthropic_chunk_usage(chunk)
197
+ if usage_data:
198
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
199
+ _extract_anthropic_tokens(usage_data)
200
+ )
201
+ set_span_attribute(
202
+ span,
203
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
204
+ prompt_tokens,
205
+ )
206
+ set_span_attribute(
207
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
208
+ )
209
+ set_span_attribute(
210
+ span,
211
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
212
+ cache_read,
213
+ )
214
+ set_span_attribute(
215
+ span,
216
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
217
+ cache_creation,
218
+ )
219
+ set_span_attribute(
220
+ span,
221
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
222
+ safe_serialize(usage_data),
223
+ )
224
+
225
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
226
+ span = ctx.get("span")
227
+ if span:
228
+ accumulated = ctx.get("accumulated_content", "")
229
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
230
+
231
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
232
+ span = ctx.get("span")
233
+ if span:
234
+ span.record_exception(error)
235
+
236
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
237
+ span = ctx.get("span")
238
+ if span:
239
+ span.end()
240
+
241
+ wrapped_generator = immutable_wrap_sync_iterator(
242
+ traced_generator,
243
+ yield_hook=yield_hook,
244
+ post_hook=post_hook_inner,
245
+ error_hook=error_hook_inner,
246
+ finally_hook=finally_hook_inner,
247
+ )
248
+
249
+ return wrapped_generator()
250
+
251
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
252
+ span = ctx.get("span")
253
+ if span:
254
+ span.record_exception(error)
255
+
256
+ return mutable_wrap_sync(
257
+ original_func,
258
+ pre_hook=pre_hook,
259
+ mutate_hook=mutate_hook,
260
+ error_hook=error_hook,
261
+ )
262
+
263
+
264
+ def wrap_messages_create_async(tracer: Tracer, client: AsyncAnthropic) -> None:
265
+ original_func = client.messages.create
266
+
267
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
268
+ if kwargs.get("stream", False):
269
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs)
270
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs)
271
+
272
+ setattr(client.messages, "create", dispatcher)
273
+
274
+
275
+ def _wrap_non_streaming_async(
276
+ tracer: Tracer, original_func: Callable[..., Awaitable[Message]]
277
+ ) -> Callable[..., Awaitable[Message]]:
278
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
279
+ ctx["span"] = tracer.get_tracer().start_span(
280
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
281
+ )
282
+ tracer._inject_judgment_context(ctx["span"])
283
+ set_span_attribute(
284
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
285
+ )
286
+ ctx["model_name"] = kwargs.get("model", "")
287
+ set_span_attribute(
288
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
289
+ )
290
+
291
+ def post_hook(ctx: Dict[str, Any], result: Message) -> None:
292
+ span = ctx.get("span")
293
+ if not span:
294
+ return
295
+
296
+ set_span_attribute(
297
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
298
+ )
299
+
300
+ if result.usage:
301
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
302
+ _extract_anthropic_tokens(result.usage)
303
+ )
304
+ set_span_attribute(
305
+ span,
306
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
307
+ prompt_tokens,
308
+ )
309
+ set_span_attribute(
310
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
311
+ )
312
+ set_span_attribute(
313
+ span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
314
+ )
315
+ set_span_attribute(
316
+ span,
317
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
318
+ cache_creation,
319
+ )
320
+ set_span_attribute(
321
+ span,
322
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
323
+ safe_serialize(result.usage),
324
+ )
325
+
326
+ set_span_attribute(
327
+ span,
328
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
329
+ result.model,
330
+ )
331
+
332
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
333
+ span = ctx.get("span")
334
+ if span:
335
+ span.record_exception(error)
336
+
337
+ def finally_hook(ctx: Dict[str, Any]) -> None:
338
+ span = ctx.get("span")
339
+ if span:
340
+ span.end()
341
+
342
+ return immutable_wrap_async(
343
+ original_func,
344
+ pre_hook=pre_hook,
345
+ post_hook=post_hook,
346
+ error_hook=error_hook,
347
+ finally_hook=finally_hook,
348
+ )
349
+
350
+
351
+ def _wrap_streaming_async(
352
+ tracer: Tracer,
353
+ original_func: Callable[..., Awaitable[AsyncIterator[RawMessageStreamEvent]]],
354
+ ) -> Callable[..., Awaitable[AsyncIterator[RawMessageStreamEvent]]]:
355
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
356
+ ctx["span"] = tracer.get_tracer().start_span(
357
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
358
+ )
359
+ tracer._inject_judgment_context(ctx["span"])
360
+ set_span_attribute(
361
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
362
+ )
363
+ ctx["model_name"] = kwargs.get("model", "")
364
+ set_span_attribute(
365
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
366
+ )
367
+ ctx["accumulated_content"] = ""
368
+
369
+ def mutate_hook(
370
+ ctx: Dict[str, Any], result: AsyncIterator[RawMessageStreamEvent]
371
+ ) -> AsyncIterator[RawMessageStreamEvent]:
372
+ async def traced_generator() -> AsyncGenerator[RawMessageStreamEvent, None]:
373
+ async for chunk in result:
374
+ yield chunk
375
+
376
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: RawMessageStreamEvent) -> None:
377
+ span = ctx.get("span")
378
+ if not span:
379
+ return
380
+
381
+ content = _extract_anthropic_content(chunk)
382
+ if content:
383
+ ctx["accumulated_content"] = (
384
+ ctx.get("accumulated_content", "") + content
385
+ )
386
+
387
+ usage_data = _extract_anthropic_chunk_usage(chunk)
388
+ if usage_data:
389
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
390
+ _extract_anthropic_tokens(usage_data)
391
+ )
392
+ set_span_attribute(
393
+ span,
394
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
395
+ prompt_tokens,
396
+ )
397
+ set_span_attribute(
398
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
399
+ )
400
+ set_span_attribute(
401
+ span,
402
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
403
+ cache_read,
404
+ )
405
+ set_span_attribute(
406
+ span,
407
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
408
+ cache_creation,
409
+ )
410
+ set_span_attribute(
411
+ span,
412
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
413
+ safe_serialize(usage_data),
414
+ )
415
+
416
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
417
+ span = ctx.get("span")
418
+ if span:
419
+ accumulated = ctx.get("accumulated_content", "")
420
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
421
+
422
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
423
+ span = ctx.get("span")
424
+ if span:
425
+ span.record_exception(error)
426
+
427
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
428
+ span = ctx.get("span")
429
+ if span:
430
+ span.end()
431
+
432
+ wrapped_generator = immutable_wrap_async_iterator(
433
+ traced_generator,
434
+ yield_hook=yield_hook,
435
+ post_hook=post_hook_inner,
436
+ error_hook=error_hook_inner,
437
+ finally_hook=finally_hook_inner,
438
+ )
439
+
440
+ return wrapped_generator()
441
+
442
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
443
+ span = ctx.get("span")
444
+ if span:
445
+ span.record_exception(error)
446
+
447
+ return mutable_wrap_async(
448
+ original_func,
449
+ pre_hook=pre_hook,
450
+ mutate_hook=mutate_hook,
451
+ error_hook=error_hook,
452
+ )