judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+ from judgeval.logger import judgeval_logger
4
+
5
+ from judgeval.v1.instrumentation.llm.constants import ProviderType
6
+ from judgeval.v1.instrumentation.llm.providers import (
7
+ HAS_OPENAI,
8
+ HAS_TOGETHER,
9
+ HAS_ANTHROPIC,
10
+ HAS_GOOGLE_GENAI,
11
+ ApiClient,
12
+ )
13
+
14
+ if TYPE_CHECKING:
15
+ from judgeval.v1.tracer.base_tracer import BaseTracer
16
+
17
+
18
+ def _detect_provider(client: ApiClient) -> ProviderType:
19
+ if HAS_OPENAI:
20
+ from openai import OpenAI, AsyncOpenAI
21
+
22
+ if isinstance(client, (OpenAI, AsyncOpenAI)):
23
+ return ProviderType.OPENAI
24
+
25
+ if HAS_ANTHROPIC:
26
+ from anthropic import Anthropic, AsyncAnthropic
27
+
28
+ if isinstance(client, (Anthropic, AsyncAnthropic)):
29
+ return ProviderType.ANTHROPIC
30
+
31
+ if HAS_TOGETHER:
32
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
33
+
34
+ if isinstance(client, (Together, AsyncTogether)):
35
+ return ProviderType.TOGETHER
36
+
37
+ if HAS_GOOGLE_GENAI:
38
+ from google.genai import Client as GoogleClient
39
+
40
+ if isinstance(client, GoogleClient):
41
+ return ProviderType.GOOGLE
42
+
43
+ judgeval_logger.warning(
44
+ f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
45
+ "If this is a mistake or you think we should support this client, please file an issue at https://github.com/JudgmentLabs/judgeval/issues!"
46
+ )
47
+
48
+ return ProviderType.DEFAULT
49
+
50
+
51
+ def wrap_provider(tracer: BaseTracer, client: ApiClient) -> ApiClient:
52
+ """
53
+ Wraps an API client to add tracing capabilities.
54
+ Supports OpenAI, Together, Anthropic, and Google GenAI clients.
55
+ """
56
+ provider_type = _detect_provider(client)
57
+
58
+ if provider_type == ProviderType.OPENAI:
59
+ from .llm_openai.wrapper import wrap_openai_client
60
+
61
+ return wrap_openai_client(tracer, client)
62
+ elif provider_type == ProviderType.ANTHROPIC:
63
+ from .llm_anthropic.wrapper import wrap_anthropic_client
64
+
65
+ return wrap_anthropic_client(tracer, client)
66
+ elif provider_type == ProviderType.TOGETHER:
67
+ from .llm_together.wrapper import wrap_together_client
68
+
69
+ return wrap_together_client(tracer, client)
70
+ elif provider_type == ProviderType.GOOGLE:
71
+ from .llm_google.wrapper import wrap_google_client
72
+
73
+ return wrap_google_client(tracer, client)
74
+ else:
75
+ # Default to OpenAI-compatible wrapping for unknown clients
76
+ from .llm_openai.wrapper import wrap_openai_client
77
+
78
+ return wrap_openai_client(tracer, client)
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class ProviderType(Enum):
7
+ OPENAI = "openai"
8
+ ANTHROPIC = "anthropic"
9
+ TOGETHER = "together"
10
+ GOOGLE = "google"
11
+ DEFAULT = "default"
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .wrapper import wrap_anthropic_client
4
+
5
+ __all__ = ["wrap_anthropic_client"]
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ import importlib.util
3
+
4
+ HAS_ANTHROPIC = importlib.util.find_spec("anthropic") is not None
5
+
6
+ __all__ = ["HAS_ANTHROPIC"]
@@ -0,0 +1,414 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ Tuple,
13
+ )
14
+
15
+ from opentelemetry.trace import Status, StatusCode
16
+ from judgeval.judgment_attribute_keys import AttributeKeys
17
+ from judgeval.utils.serialize import safe_serialize
18
+ from judgeval.utils.wrappers import (
19
+ immutable_wrap_sync,
20
+ immutable_wrap_async,
21
+ mutable_wrap_sync,
22
+ mutable_wrap_async,
23
+ immutable_wrap_sync_iterator,
24
+ immutable_wrap_async_iterator,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ from judgeval.v1.tracer import BaseTracer
29
+ from anthropic import Anthropic, AsyncAnthropic
30
+ from anthropic.types import (
31
+ Message,
32
+ Usage,
33
+ MessageDeltaUsage,
34
+ RawMessageStreamEvent,
35
+ )
36
+
37
+
38
+ def _extract_anthropic_content(chunk: RawMessageStreamEvent) -> str:
39
+ if chunk.type == "content_block_delta":
40
+ delta = chunk.delta
41
+ if delta.type == "text_delta" and delta.text:
42
+ return delta.text
43
+ return ""
44
+
45
+
46
+ def _extract_anthropic_tokens(
47
+ usage: Usage | MessageDeltaUsage,
48
+ ) -> Tuple[int, int, int, int]:
49
+ input_tokens = usage.input_tokens if usage.input_tokens is not None else 0
50
+ output_tokens = usage.output_tokens if usage.output_tokens is not None else 0
51
+ cache_read = (
52
+ usage.cache_read_input_tokens
53
+ if usage.cache_read_input_tokens is not None
54
+ else 0
55
+ )
56
+ cache_creation = (
57
+ usage.cache_creation_input_tokens
58
+ if usage.cache_creation_input_tokens is not None
59
+ else 0
60
+ )
61
+ return (input_tokens, output_tokens, cache_read, cache_creation)
62
+
63
+
64
+ def _extract_anthropic_chunk_usage(
65
+ chunk: RawMessageStreamEvent,
66
+ ) -> Usage | MessageDeltaUsage | None:
67
+ if chunk.type == "message_start":
68
+ return chunk.message.usage if chunk.message else None
69
+ elif chunk.type == "message_delta":
70
+ return chunk.usage if hasattr(chunk, "usage") else None
71
+ return None
72
+
73
+
74
+ def wrap_messages_create_sync(tracer: BaseTracer, client: Anthropic) -> None:
75
+ original_func = client.messages.create
76
+
77
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
78
+ if kwargs.get("stream", False):
79
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs)
80
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs)
81
+
82
+ setattr(client.messages, "create", dispatcher)
83
+
84
+
85
+ def _wrap_non_streaming_sync(
86
+ tracer: BaseTracer, original_func: Callable[..., Message]
87
+ ) -> Callable[..., Message]:
88
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
89
+ ctx["span"] = tracer.get_tracer().start_span(
90
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
91
+ )
92
+ ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
93
+ ctx["model_name"] = kwargs.get("model", "")
94
+ ctx["span"].set_attribute(
95
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
96
+ )
97
+
98
+ def post_hook(ctx: Dict[str, Any], result: Message) -> None:
99
+ span = ctx.get("span")
100
+ if not span:
101
+ return
102
+
103
+ span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result))
104
+
105
+ if result.usage:
106
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
107
+ _extract_anthropic_tokens(result.usage)
108
+ )
109
+ span.set_attribute(
110
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
111
+ prompt_tokens,
112
+ )
113
+ span.set_attribute(
114
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
115
+ )
116
+ span.set_attribute(
117
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
118
+ )
119
+ span.set_attribute(
120
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
121
+ cache_creation,
122
+ )
123
+ span.set_attribute(
124
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
125
+ safe_serialize(result.usage),
126
+ )
127
+
128
+ span.set_attribute(AttributeKeys.JUDGMENT_LLM_MODEL_NAME, result.model)
129
+
130
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
131
+ span = ctx.get("span")
132
+ if span:
133
+ span.record_exception(error)
134
+ span.set_status(Status(StatusCode.ERROR))
135
+
136
+ def finally_hook(ctx: Dict[str, Any]) -> None:
137
+ span = ctx.get("span")
138
+ if span:
139
+ span.end()
140
+
141
+ return immutable_wrap_sync(
142
+ original_func,
143
+ pre_hook=pre_hook,
144
+ post_hook=post_hook,
145
+ error_hook=error_hook,
146
+ finally_hook=finally_hook,
147
+ )
148
+
149
+
150
+ def _wrap_streaming_sync(
151
+ tracer: BaseTracer, original_func: Callable[..., Iterator[RawMessageStreamEvent]]
152
+ ) -> Callable[..., Iterator[RawMessageStreamEvent]]:
153
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
154
+ ctx["span"] = tracer.get_tracer().start_span(
155
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
156
+ )
157
+ ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
158
+ ctx["model_name"] = kwargs.get("model", "")
159
+ ctx["span"].set_attribute(
160
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
161
+ )
162
+ ctx["accumulated_content"] = ""
163
+
164
+ def mutate_hook(
165
+ ctx: Dict[str, Any], result: Iterator[RawMessageStreamEvent]
166
+ ) -> Iterator[RawMessageStreamEvent]:
167
+ def traced_generator() -> Generator[RawMessageStreamEvent, None, None]:
168
+ for chunk in result:
169
+ yield chunk
170
+
171
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: RawMessageStreamEvent) -> None:
172
+ span = ctx.get("span")
173
+ if not span:
174
+ return
175
+
176
+ content = _extract_anthropic_content(chunk)
177
+ if content:
178
+ ctx["accumulated_content"] = (
179
+ ctx.get("accumulated_content", "") + content
180
+ )
181
+
182
+ usage_data = _extract_anthropic_chunk_usage(chunk)
183
+ if usage_data:
184
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
185
+ _extract_anthropic_tokens(usage_data)
186
+ )
187
+ span.set_attribute(
188
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS, prompt_tokens
189
+ )
190
+ span.set_attribute(
191
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
192
+ )
193
+ span.set_attribute(
194
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
195
+ )
196
+ span.set_attribute(
197
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
198
+ cache_creation,
199
+ )
200
+ span.set_attribute(
201
+ AttributeKeys.JUDGMENT_USAGE_METADATA, safe_serialize(usage_data)
202
+ )
203
+
204
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
205
+ span = ctx.get("span")
206
+ if span:
207
+ accumulated = ctx.get("accumulated_content", "")
208
+ span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, accumulated)
209
+
210
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
211
+ span = ctx.get("span")
212
+ if span:
213
+ span.record_exception(error)
214
+ span.set_status(Status(StatusCode.ERROR))
215
+
216
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
217
+ span = ctx.get("span")
218
+ if span:
219
+ span.end()
220
+
221
+ wrapped_generator = immutable_wrap_sync_iterator(
222
+ traced_generator,
223
+ yield_hook=yield_hook,
224
+ post_hook=post_hook_inner,
225
+ error_hook=error_hook_inner,
226
+ finally_hook=finally_hook_inner,
227
+ )
228
+
229
+ return wrapped_generator()
230
+
231
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
232
+ span = ctx.get("span")
233
+ if span:
234
+ span.record_exception(error)
235
+ span.set_status(Status(StatusCode.ERROR))
236
+
237
+ return mutable_wrap_sync(
238
+ original_func,
239
+ pre_hook=pre_hook,
240
+ mutate_hook=mutate_hook,
241
+ error_hook=error_hook,
242
+ )
243
+
244
+
245
+ def wrap_messages_create_async(tracer: BaseTracer, client: AsyncAnthropic) -> None:
246
+ original_func = client.messages.create
247
+
248
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
249
+ if kwargs.get("stream", False):
250
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs)
251
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs)
252
+
253
+ setattr(client.messages, "create", dispatcher)
254
+
255
+
256
+ def _wrap_non_streaming_async(
257
+ tracer: BaseTracer, original_func: Callable[..., Awaitable[Message]]
258
+ ) -> Callable[..., Awaitable[Message]]:
259
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
260
+ ctx["span"] = tracer.get_tracer().start_span(
261
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
262
+ )
263
+ ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
264
+ ctx["model_name"] = kwargs.get("model", "")
265
+ ctx["span"].set_attribute(
266
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
267
+ )
268
+
269
+ def post_hook(ctx: Dict[str, Any], result: Message) -> None:
270
+ span = ctx.get("span")
271
+ if not span:
272
+ return
273
+
274
+ span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result))
275
+
276
+ if result.usage:
277
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
278
+ _extract_anthropic_tokens(result.usage)
279
+ )
280
+ span.set_attribute(
281
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
282
+ prompt_tokens,
283
+ )
284
+ span.set_attribute(
285
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
286
+ )
287
+ span.set_attribute(
288
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
289
+ )
290
+ span.set_attribute(
291
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
292
+ cache_creation,
293
+ )
294
+ span.set_attribute(
295
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
296
+ safe_serialize(result.usage),
297
+ )
298
+
299
+ span.set_attribute(AttributeKeys.JUDGMENT_LLM_MODEL_NAME, result.model)
300
+
301
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
302
+ span = ctx.get("span")
303
+ if span:
304
+ span.record_exception(error)
305
+ span.set_status(Status(StatusCode.ERROR))
306
+
307
+ def finally_hook(ctx: Dict[str, Any]) -> None:
308
+ span = ctx.get("span")
309
+ if span:
310
+ span.end()
311
+
312
+ return immutable_wrap_async(
313
+ original_func,
314
+ pre_hook=pre_hook,
315
+ post_hook=post_hook,
316
+ error_hook=error_hook,
317
+ finally_hook=finally_hook,
318
+ )
319
+
320
+
321
+ def _wrap_streaming_async(
322
+ tracer: BaseTracer,
323
+ original_func: Callable[..., Awaitable[AsyncIterator[RawMessageStreamEvent]]],
324
+ ) -> Callable[..., Awaitable[AsyncIterator[RawMessageStreamEvent]]]:
325
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
326
+ ctx["span"] = tracer.get_tracer().start_span(
327
+ "ANTHROPIC_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
328
+ )
329
+ ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
330
+ ctx["model_name"] = kwargs.get("model", "")
331
+ ctx["span"].set_attribute(
332
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
333
+ )
334
+ ctx["accumulated_content"] = ""
335
+
336
+ def mutate_hook(
337
+ ctx: Dict[str, Any], result: AsyncIterator[RawMessageStreamEvent]
338
+ ) -> AsyncIterator[RawMessageStreamEvent]:
339
+ async def traced_generator() -> AsyncGenerator[RawMessageStreamEvent, None]:
340
+ async for chunk in result:
341
+ yield chunk
342
+
343
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: RawMessageStreamEvent) -> None:
344
+ span = ctx.get("span")
345
+ if not span:
346
+ return
347
+
348
+ content = _extract_anthropic_content(chunk)
349
+ if content:
350
+ ctx["accumulated_content"] = (
351
+ ctx.get("accumulated_content", "") + content
352
+ )
353
+
354
+ usage_data = _extract_anthropic_chunk_usage(chunk)
355
+ if usage_data:
356
+ prompt_tokens, completion_tokens, cache_read, cache_creation = (
357
+ _extract_anthropic_tokens(usage_data)
358
+ )
359
+ span.set_attribute(
360
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS, prompt_tokens
361
+ )
362
+ span.set_attribute(
363
+ AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
364
+ )
365
+ span.set_attribute(
366
+ AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
367
+ )
368
+ span.set_attribute(
369
+ AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
370
+ cache_creation,
371
+ )
372
+ span.set_attribute(
373
+ AttributeKeys.JUDGMENT_USAGE_METADATA, safe_serialize(usage_data)
374
+ )
375
+
376
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
377
+ span = ctx.get("span")
378
+ if span:
379
+ accumulated = ctx.get("accumulated_content", "")
380
+ span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, accumulated)
381
+
382
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
383
+ span = ctx.get("span")
384
+ if span:
385
+ span.record_exception(error)
386
+ span.set_status(Status(StatusCode.ERROR))
387
+
388
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
389
+ span = ctx.get("span")
390
+ if span:
391
+ span.end()
392
+
393
+ wrapped_generator = immutable_wrap_async_iterator(
394
+ traced_generator,
395
+ yield_hook=yield_hook,
396
+ post_hook=post_hook_inner,
397
+ error_hook=error_hook_inner,
398
+ finally_hook=finally_hook_inner,
399
+ )
400
+
401
+ return wrapped_generator()
402
+
403
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
404
+ span = ctx.get("span")
405
+ if span:
406
+ span.record_exception(error)
407
+ span.set_status(Status(StatusCode.ERROR))
408
+
409
+ return mutable_wrap_async(
410
+ original_func,
411
+ pre_hook=pre_hook,
412
+ mutate_hook=mutate_hook,
413
+ error_hook=error_hook,
414
+ )