judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,406 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ )
13
+
14
+ from judgeval.tracer.keys import AttributeKeys
15
+ from judgeval.tracer.utils import set_span_attribute
16
+ from judgeval.utils.serialize import safe_serialize
17
+ from judgeval.utils.wrappers import (
18
+ immutable_wrap_async,
19
+ immutable_wrap_sync,
20
+ mutable_wrap_sync,
21
+ mutable_wrap_async,
22
+ immutable_wrap_sync_iterator,
23
+ immutable_wrap_async_iterator,
24
+ )
25
+
26
+ if TYPE_CHECKING:
27
+ from judgeval.tracer import Tracer
28
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
29
+ from together.types import ChatCompletionResponse, ChatCompletionChunk # type: ignore[import-untyped]
30
+ from together.types.common import UsageData # type: ignore[import-untyped]
31
+
32
+
33
+ def _extract_together_tokens(usage: UsageData) -> tuple[int, int, int, int]:
34
+ prompt_tokens = usage.prompt_tokens if usage.prompt_tokens is not None else 0
35
+ completion_tokens = (
36
+ usage.completion_tokens if usage.completion_tokens is not None else 0
37
+ )
38
+ cache_read_input_tokens = 0
39
+ cache_creation_input_tokens = 0
40
+ return (
41
+ prompt_tokens,
42
+ completion_tokens,
43
+ cache_read_input_tokens,
44
+ cache_creation_input_tokens,
45
+ )
46
+
47
+
48
+ def wrap_chat_completions_create_sync(tracer: Tracer, client: Together) -> None:
49
+ original_func = client.chat.completions.create
50
+
51
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
52
+ if kwargs.get("stream", False):
53
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
54
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
55
+
56
+ setattr(client.chat.completions, "create", dispatcher)
57
+
58
+
59
+ def _wrap_non_streaming_sync(
60
+ tracer: Tracer, original_func: Callable[..., ChatCompletionResponse]
61
+ ) -> Callable[..., ChatCompletionResponse]:
62
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
63
+ ctx["span"] = tracer.get_tracer().start_span(
64
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
65
+ )
66
+ tracer._inject_judgment_context(ctx["span"])
67
+ set_span_attribute(
68
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
69
+ )
70
+ ctx["model_name"] = kwargs.get("model", "")
71
+ prefixed_model_name = (
72
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
73
+ )
74
+ ctx["model_name"] = prefixed_model_name
75
+ set_span_attribute(
76
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
77
+ )
78
+
79
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
80
+ span = ctx.get("span")
81
+ if not span:
82
+ return
83
+
84
+ set_span_attribute(
85
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
86
+ )
87
+
88
+ if result.usage:
89
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
90
+ result.usage
91
+ )
92
+ set_span_attribute(
93
+ span,
94
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
95
+ prompt_tokens,
96
+ )
97
+ set_span_attribute(
98
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
99
+ )
100
+ set_span_attribute(
101
+ span,
102
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
103
+ safe_serialize(result.usage),
104
+ )
105
+
106
+ set_span_attribute(
107
+ span,
108
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
109
+ ctx["model_name"],
110
+ )
111
+
112
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
113
+ span = ctx.get("span")
114
+ if span:
115
+ span.record_exception(error)
116
+
117
+ def finally_hook(ctx: Dict[str, Any]) -> None:
118
+ span = ctx.get("span")
119
+ if span:
120
+ span.end()
121
+
122
+ return immutable_wrap_sync(
123
+ original_func,
124
+ pre_hook=pre_hook,
125
+ post_hook=post_hook,
126
+ error_hook=error_hook,
127
+ finally_hook=finally_hook,
128
+ )
129
+
130
+
131
+ def _wrap_streaming_sync(
132
+ tracer: Tracer, original_func: Callable[..., Iterator[ChatCompletionChunk]]
133
+ ) -> Callable[..., Iterator[ChatCompletionChunk]]:
134
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
135
+ ctx["span"] = tracer.get_tracer().start_span(
136
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
137
+ )
138
+ tracer._inject_judgment_context(ctx["span"])
139
+ set_span_attribute(
140
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
141
+ )
142
+ ctx["model_name"] = kwargs.get("model", "")
143
+ prefixed_model_name = (
144
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
145
+ )
146
+ ctx["model_name"] = prefixed_model_name
147
+ set_span_attribute(
148
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
149
+ )
150
+ ctx["accumulated_content"] = ""
151
+
152
+ def mutate_hook(
153
+ ctx: Dict[str, Any], result: Iterator[ChatCompletionChunk]
154
+ ) -> Iterator[ChatCompletionChunk]:
155
+ def traced_generator() -> Generator[ChatCompletionChunk, None, None]:
156
+ for chunk in result:
157
+ yield chunk
158
+
159
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
160
+ span = ctx.get("span")
161
+ if not span:
162
+ return
163
+
164
+ if chunk.choices and len(chunk.choices) > 0:
165
+ delta = chunk.choices[0].delta
166
+ if delta and hasattr(delta, "content") and delta.content:
167
+ ctx["accumulated_content"] = (
168
+ ctx.get("accumulated_content", "") + delta.content
169
+ )
170
+
171
+ if chunk.usage:
172
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
173
+ chunk.usage
174
+ )
175
+ set_span_attribute(
176
+ span,
177
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
178
+ prompt_tokens,
179
+ )
180
+ set_span_attribute(
181
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
182
+ )
183
+ set_span_attribute(
184
+ span,
185
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
186
+ safe_serialize(chunk.usage),
187
+ )
188
+
189
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
190
+ span = ctx.get("span")
191
+ if span:
192
+ accumulated = ctx.get("accumulated_content", "")
193
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
194
+
195
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
196
+ span = ctx.get("span")
197
+ if span:
198
+ span.record_exception(error)
199
+
200
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
201
+ span = ctx.get("span")
202
+ if span:
203
+ span.end()
204
+
205
+ wrapped_generator = immutable_wrap_sync_iterator(
206
+ traced_generator,
207
+ yield_hook=yield_hook,
208
+ post_hook=post_hook_inner,
209
+ error_hook=error_hook_inner,
210
+ finally_hook=finally_hook_inner,
211
+ )
212
+
213
+ return wrapped_generator()
214
+
215
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
216
+ span = ctx.get("span")
217
+ if span:
218
+ span.record_exception(error)
219
+
220
+ return mutable_wrap_sync(
221
+ original_func,
222
+ pre_hook=pre_hook,
223
+ mutate_hook=mutate_hook,
224
+ error_hook=error_hook,
225
+ )
226
+
227
+
228
+ def wrap_chat_completions_create_async(tracer: Tracer, client: AsyncTogether) -> None:
229
+ original_func = client.chat.completions.create
230
+
231
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
232
+ if kwargs.get("stream", False):
233
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
234
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
235
+
236
+ setattr(client.chat.completions, "create", dispatcher)
237
+
238
+
239
+ def _wrap_non_streaming_async(
240
+ tracer: Tracer, original_func: Callable[..., Awaitable[ChatCompletionResponse]]
241
+ ) -> Callable[..., Awaitable[ChatCompletionResponse]]:
242
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
243
+ ctx["span"] = tracer.get_tracer().start_span(
244
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
245
+ )
246
+ tracer._inject_judgment_context(ctx["span"])
247
+ set_span_attribute(
248
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
249
+ )
250
+ ctx["model_name"] = kwargs.get("model", "")
251
+ prefixed_model_name = (
252
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
253
+ )
254
+ ctx["model_name"] = prefixed_model_name
255
+ set_span_attribute(
256
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
257
+ )
258
+
259
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
260
+ span = ctx.get("span")
261
+ if not span:
262
+ return
263
+
264
+ set_span_attribute(
265
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
266
+ )
267
+
268
+ if result.usage:
269
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
270
+ result.usage
271
+ )
272
+ set_span_attribute(
273
+ span,
274
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
275
+ prompt_tokens,
276
+ )
277
+ set_span_attribute(
278
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
279
+ )
280
+ set_span_attribute(
281
+ span,
282
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
283
+ safe_serialize(result.usage),
284
+ )
285
+
286
+ set_span_attribute(
287
+ span,
288
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
289
+ ctx["model_name"],
290
+ )
291
+
292
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
293
+ span = ctx.get("span")
294
+ if span:
295
+ span.record_exception(error)
296
+
297
+ def finally_hook(ctx: Dict[str, Any]) -> None:
298
+ span = ctx.get("span")
299
+ if span:
300
+ span.end()
301
+
302
+ return immutable_wrap_async(
303
+ original_func,
304
+ pre_hook=pre_hook,
305
+ post_hook=post_hook,
306
+ error_hook=error_hook,
307
+ finally_hook=finally_hook,
308
+ )
309
+
310
+
311
+ def _wrap_streaming_async(
312
+ tracer: Tracer,
313
+ original_func: Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]],
314
+ ) -> Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]]:
315
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
316
+ ctx["span"] = tracer.get_tracer().start_span(
317
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
318
+ )
319
+ tracer._inject_judgment_context(ctx["span"])
320
+ set_span_attribute(
321
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
322
+ )
323
+ ctx["model_name"] = kwargs.get("model", "")
324
+ prefixed_model_name = (
325
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
326
+ )
327
+ ctx["model_name"] = prefixed_model_name
328
+ set_span_attribute(
329
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
330
+ )
331
+ ctx["accumulated_content"] = ""
332
+
333
+ def mutate_hook(
334
+ ctx: Dict[str, Any], result: AsyncIterator[ChatCompletionChunk]
335
+ ) -> AsyncIterator[ChatCompletionChunk]:
336
+ async def traced_generator() -> AsyncGenerator[ChatCompletionChunk, None]:
337
+ async for chunk in result:
338
+ yield chunk
339
+
340
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
341
+ span = ctx.get("span")
342
+ if not span:
343
+ return
344
+
345
+ if chunk.choices and len(chunk.choices) > 0:
346
+ delta = chunk.choices[0].delta
347
+ if delta and hasattr(delta, "content") and delta.content:
348
+ ctx["accumulated_content"] = (
349
+ ctx.get("accumulated_content", "") + delta.content
350
+ )
351
+
352
+ if chunk.usage:
353
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
354
+ chunk.usage
355
+ )
356
+ set_span_attribute(
357
+ span,
358
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
359
+ prompt_tokens,
360
+ )
361
+ set_span_attribute(
362
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
363
+ )
364
+ set_span_attribute(
365
+ span,
366
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
367
+ safe_serialize(chunk.usage),
368
+ )
369
+
370
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
371
+ span = ctx.get("span")
372
+ if span:
373
+ accumulated = ctx.get("accumulated_content", "")
374
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
375
+
376
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
377
+ span = ctx.get("span")
378
+ if span:
379
+ span.record_exception(error)
380
+
381
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
382
+ span = ctx.get("span")
383
+ if span:
384
+ span.end()
385
+
386
+ wrapped_generator = immutable_wrap_async_iterator(
387
+ traced_generator,
388
+ yield_hook=yield_hook,
389
+ post_hook=post_hook_inner,
390
+ error_hook=error_hook_inner,
391
+ finally_hook=finally_hook_inner,
392
+ )
393
+
394
+ return wrapped_generator()
395
+
396
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
397
+ span = ctx.get("span")
398
+ if span:
399
+ span.record_exception(error)
400
+
401
+ return mutable_wrap_async(
402
+ original_func,
403
+ pre_hook=pre_hook,
404
+ mutate_hook=mutate_hook,
405
+ error_hook=error_hook,
406
+ )
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ import importlib.util
3
+
4
+ HAS_TOGETHER = importlib.util.find_spec("together") is not None
5
+
6
+ __all__ = ["HAS_TOGETHER"]
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING, Union
3
+ import typing
4
+
5
+ from judgeval.tracer.llm.llm_together.chat_completions import (
6
+ wrap_chat_completions_create_sync,
7
+ wrap_chat_completions_create_async,
8
+ )
9
+
10
+
11
+ if TYPE_CHECKING:
12
+ from judgeval.tracer import Tracer
13
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
14
+
15
+ TClient = Union[Together, AsyncTogether]
16
+
17
+
18
+ def wrap_together_client_sync(tracer: Tracer, client: Together) -> Together:
19
+ wrap_chat_completions_create_sync(tracer, client)
20
+ return client
21
+
22
+
23
+ def wrap_together_client_async(tracer: Tracer, client: AsyncTogether) -> AsyncTogether:
24
+ wrap_chat_completions_create_async(tracer, client)
25
+ return client
26
+
27
+
28
+ @typing.overload
29
+ def wrap_together_client(tracer: Tracer, client: Together) -> Together: ...
30
+ @typing.overload
31
+ def wrap_together_client(tracer: Tracer, client: AsyncTogether) -> AsyncTogether: ... # type: ignore[overload-cannot-match]
32
+
33
+
34
+ def wrap_together_client(tracer: Tracer, client: TClient) -> TClient:
35
+ from judgeval.tracer.llm.llm_together.config import HAS_TOGETHER
36
+ from judgeval.logger import judgeval_logger
37
+
38
+ if not HAS_TOGETHER:
39
+ judgeval_logger.error(
40
+ "Cannot wrap Together client: 'together' library not installed. "
41
+ "Install it with: pip install together"
42
+ )
43
+ return client
44
+
45
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
46
+
47
+ if isinstance(client, AsyncTogether):
48
+ return wrap_together_client_async(tracer, client)
49
+ elif isinstance(client, Together):
50
+ return wrap_together_client_sync(tracer, client)
51
+ else:
52
+ raise TypeError(f"Invalid client type: {type(client)}")
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+ from typing import Any, TypeAlias
3
+
4
+ from judgeval.tracer.llm.llm_openai.config import HAS_OPENAI
5
+ from judgeval.tracer.llm.llm_together.config import HAS_TOGETHER
6
+ from judgeval.tracer.llm.llm_anthropic.config import HAS_ANTHROPIC
7
+ from judgeval.tracer.llm.llm_google.config import HAS_GOOGLE_GENAI
8
+
9
+ # TODO: if we support dependency groups we can have this better type, but during runtime, we do
10
+ # not know which clients an end user might have installed.
11
+ ApiClient: TypeAlias = Any
12
+
13
+ __all__ = [
14
+ "ApiClient",
15
+ "HAS_OPENAI",
16
+ "HAS_TOGETHER",
17
+ "HAS_ANTHROPIC",
18
+ "HAS_GOOGLE_GENAI",
19
+ ]
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import asynccontextmanager, contextmanager
4
+ from typing import TYPE_CHECKING, Dict, Optional, List, Any
5
+ from judgeval.tracer.keys import InternalAttributeKeys
6
+ import uuid
7
+ from judgeval.exceptions import JudgmentRuntimeError
8
+
9
+ if TYPE_CHECKING:
10
+ from judgeval.tracer import Tracer
11
+
12
+
13
+ @contextmanager
14
+ def sync_span_context(
15
+ tracer: Tracer,
16
+ name: str,
17
+ span_attributes: Optional[Dict[str, str]] = None,
18
+ disable_partial_emit: bool = False,
19
+ end_on_exit: bool = False,
20
+ ):
21
+ if span_attributes is None:
22
+ span_attributes = {}
23
+
24
+ with tracer.get_tracer().start_as_current_span(
25
+ name=name,
26
+ attributes=span_attributes,
27
+ end_on_exit=end_on_exit,
28
+ ) as span:
29
+ if disable_partial_emit:
30
+ tracer.judgment_processor.set_internal_attribute(
31
+ span_context=span.get_span_context(),
32
+ key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
33
+ value=True,
34
+ )
35
+ yield span
36
+
37
+
38
+ @asynccontextmanager
39
+ async def async_span_context(
40
+ tracer: Tracer,
41
+ name: str,
42
+ span_attributes: Optional[Dict[str, str]] = None,
43
+ disable_partial_emit: bool = False,
44
+ end_on_exit: bool = False,
45
+ ):
46
+ if span_attributes is None:
47
+ span_attributes = {}
48
+
49
+ with tracer.get_tracer().start_as_current_span(
50
+ name=name,
51
+ attributes=span_attributes,
52
+ end_on_exit=end_on_exit,
53
+ ) as span:
54
+ if disable_partial_emit:
55
+ tracer.judgment_processor.set_internal_attribute(
56
+ span_context=span.get_span_context(),
57
+ key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
58
+ value=True,
59
+ )
60
+ yield span
61
+
62
+
63
+ def create_agent_context(
64
+ tracer: Tracer,
65
+ args: tuple,
66
+ class_name: Optional[str] = None,
67
+ identifier: Optional[str] = None,
68
+ track_state: bool = False,
69
+ track_attributes: Optional[List[str]] = None,
70
+ field_mappings: Optional[Dict[str, str]] = None,
71
+ ):
72
+ """Create agent context and return token for cleanup"""
73
+ agent_id = str(uuid.uuid4())
74
+ agent_context: Dict[str, Any] = {"agent_id": agent_id}
75
+
76
+ if class_name:
77
+ agent_context["class_name"] = class_name
78
+ else:
79
+ agent_context["class_name"] = None
80
+
81
+ agent_context["track_state"] = track_state
82
+ agent_context["track_attributes"] = track_attributes or []
83
+ agent_context["field_mappings"] = field_mappings or {}
84
+
85
+ instance = args[0] if args else None
86
+ agent_context["instance"] = instance
87
+
88
+ if identifier:
89
+ if not class_name or not instance or not isinstance(instance, object):
90
+ raise JudgmentRuntimeError(
91
+ "'identifier' is set but no class name or instance is available. 'identifier' can only be specified when using the agent() decorator on a class method."
92
+ )
93
+ if (
94
+ instance
95
+ and hasattr(instance, identifier)
96
+ and not callable(getattr(instance, identifier))
97
+ ):
98
+ instance_name = str(getattr(instance, identifier))
99
+ agent_context["instance_name"] = instance_name
100
+ else:
101
+ raise JudgmentRuntimeError(
102
+ f"Attribute {identifier} does not exist for {class_name}. Check your agent() decorator."
103
+ )
104
+ else:
105
+ agent_context["instance_name"] = None
106
+
107
+ current_agent_context = tracer.get_current_agent_context().get()
108
+ if current_agent_context and "agent_id" in current_agent_context:
109
+ agent_context["parent_agent_id"] = current_agent_context["agent_id"]
110
+ else:
111
+ agent_context["parent_agent_id"] = None
112
+
113
+ agent_context["is_agent_entry_point"] = True
114
+ token = tracer.get_current_agent_context().set(agent_context) # type: ignore
115
+ return token
116
+
117
+
118
+ @contextmanager
119
+ def sync_agent_context(
120
+ tracer: Tracer,
121
+ args: tuple,
122
+ class_name: Optional[str] = None,
123
+ identifier: Optional[str] = None,
124
+ track_state: bool = False,
125
+ track_attributes: Optional[List[str]] = None,
126
+ field_mappings: Optional[Dict[str, str]] = None,
127
+ ):
128
+ """Context manager for synchronous agent context"""
129
+ token = create_agent_context(
130
+ tracer=tracer,
131
+ args=args,
132
+ class_name=class_name,
133
+ identifier=identifier,
134
+ track_state=track_state,
135
+ track_attributes=track_attributes,
136
+ field_mappings=field_mappings,
137
+ )
138
+ try:
139
+ yield
140
+ finally:
141
+ tracer.get_current_agent_context().reset(token)
142
+
143
+
144
+ @asynccontextmanager
145
+ async def async_agent_context(
146
+ tracer: Tracer,
147
+ args: tuple,
148
+ class_name: Optional[str] = None,
149
+ identifier: Optional[str] = None,
150
+ track_state: bool = False,
151
+ track_attributes: Optional[List[str]] = None,
152
+ field_mappings: Optional[Dict[str, str]] = None,
153
+ ):
154
+ """Context manager for asynchronous agent context"""
155
+ token = create_agent_context(
156
+ tracer=tracer,
157
+ args=args,
158
+ class_name=class_name,
159
+ identifier=identifier,
160
+ track_state=track_state,
161
+ track_attributes=track_attributes,
162
+ field_mappings=field_mappings,
163
+ )
164
+ try:
165
+ yield
166
+ finally:
167
+ tracer.get_current_agent_context().reset(token)