judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show
  1. judgeval/__init__.py +177 -12
  2. judgeval/api/__init__.py +519 -0
  3. judgeval/api/api_types.py +407 -0
  4. judgeval/cli.py +79 -0
  5. judgeval/constants.py +76 -47
  6. judgeval/data/__init__.py +3 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +15 -56
  9. judgeval/data/judgment_types.py +450 -0
  10. judgeval/data/result.py +29 -73
  11. judgeval/data/scorer_data.py +29 -62
  12. judgeval/data/scripts/fix_default_factory.py +23 -0
  13. judgeval/data/scripts/openapi_transform.py +123 -0
  14. judgeval/data/trace.py +121 -0
  15. judgeval/dataset/__init__.py +264 -0
  16. judgeval/env.py +52 -0
  17. judgeval/evaluation/__init__.py +344 -0
  18. judgeval/exceptions.py +27 -0
  19. judgeval/integrations/langgraph/__init__.py +13 -0
  20. judgeval/integrations/openlit/__init__.py +50 -0
  21. judgeval/judges/__init__.py +2 -3
  22. judgeval/judges/base_judge.py +2 -3
  23. judgeval/judges/litellm_judge.py +100 -20
  24. judgeval/judges/together_judge.py +101 -20
  25. judgeval/judges/utils.py +20 -24
  26. judgeval/logger.py +62 -0
  27. judgeval/prompt/__init__.py +330 -0
  28. judgeval/scorers/__init__.py +18 -25
  29. judgeval/scorers/agent_scorer.py +17 -0
  30. judgeval/scorers/api_scorer.py +45 -41
  31. judgeval/scorers/base_scorer.py +83 -38
  32. judgeval/scorers/example_scorer.py +17 -0
  33. judgeval/scorers/exceptions.py +1 -0
  34. judgeval/scorers/judgeval_scorers/__init__.py +0 -148
  35. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
  36. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
  37. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
  38. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
  40. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
  41. judgeval/scorers/score.py +77 -306
  42. judgeval/scorers/utils.py +4 -199
  43. judgeval/tracer/__init__.py +1122 -2
  44. judgeval/tracer/constants.py +1 -0
  45. judgeval/tracer/exporters/__init__.py +40 -0
  46. judgeval/tracer/exporters/s3.py +119 -0
  47. judgeval/tracer/exporters/store.py +59 -0
  48. judgeval/tracer/exporters/utils.py +32 -0
  49. judgeval/tracer/keys.py +63 -0
  50. judgeval/tracer/llm/__init__.py +7 -0
  51. judgeval/tracer/llm/config.py +78 -0
  52. judgeval/tracer/llm/constants.py +9 -0
  53. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  54. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  55. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  56. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  57. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  58. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  59. judgeval/tracer/llm/llm_google/config.py +6 -0
  60. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  61. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  62. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  63. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  64. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  65. judgeval/tracer/llm/llm_openai/config.py +6 -0
  66. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  67. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  68. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  69. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  70. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  71. judgeval/tracer/llm/llm_together/config.py +6 -0
  72. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  73. judgeval/tracer/llm/providers.py +19 -0
  74. judgeval/tracer/managers.py +167 -0
  75. judgeval/tracer/processors/__init__.py +220 -0
  76. judgeval/tracer/utils.py +19 -0
  77. judgeval/trainer/__init__.py +14 -0
  78. judgeval/trainer/base_trainer.py +122 -0
  79. judgeval/trainer/config.py +128 -0
  80. judgeval/trainer/console.py +144 -0
  81. judgeval/trainer/fireworks_trainer.py +396 -0
  82. judgeval/trainer/trainable_model.py +243 -0
  83. judgeval/trainer/trainer.py +70 -0
  84. judgeval/utils/async_utils.py +39 -0
  85. judgeval/utils/decorators/__init__.py +0 -0
  86. judgeval/utils/decorators/dont_throw.py +37 -0
  87. judgeval/utils/decorators/use_once.py +13 -0
  88. judgeval/utils/file_utils.py +97 -0
  89. judgeval/utils/guards.py +36 -0
  90. judgeval/utils/meta.py +27 -0
  91. judgeval/utils/project.py +15 -0
  92. judgeval/utils/serialize.py +253 -0
  93. judgeval/utils/testing.py +70 -0
  94. judgeval/utils/url.py +10 -0
  95. judgeval/utils/version_check.py +28 -0
  96. judgeval/utils/wrappers/README.md +3 -0
  97. judgeval/utils/wrappers/__init__.py +15 -0
  98. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  99. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  100. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  101. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  102. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  103. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  104. judgeval/utils/wrappers/py.typed +0 -0
  105. judgeval/utils/wrappers/utils.py +35 -0
  106. judgeval/version.py +5 -0
  107. judgeval/warnings.py +4 -0
  108. judgeval-0.22.2.dist-info/METADATA +265 -0
  109. judgeval-0.22.2.dist-info/RECORD +112 -0
  110. judgeval-0.22.2.dist-info/entry_points.txt +2 -0
  111. judgeval/clients.py +0 -39
  112. judgeval/common/__init__.py +0 -8
  113. judgeval/common/exceptions.py +0 -28
  114. judgeval/common/logger.py +0 -189
  115. judgeval/common/tracer.py +0 -798
  116. judgeval/common/utils.py +0 -763
  117. judgeval/data/api_example.py +0 -111
  118. judgeval/data/datasets/__init__.py +0 -5
  119. judgeval/data/datasets/dataset.py +0 -286
  120. judgeval/data/datasets/eval_dataset_client.py +0 -193
  121. judgeval/data/datasets/ground_truth.py +0 -54
  122. judgeval/data/datasets/utils.py +0 -74
  123. judgeval/evaluation_run.py +0 -132
  124. judgeval/judges/mixture_of_judges.py +0 -248
  125. judgeval/judgment_client.py +0 -354
  126. judgeval/run_evaluation.py +0 -439
  127. judgeval/scorers/judgeval_scorer.py +0 -140
  128. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
  129. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
  130. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
  131. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
  132. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
  133. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
  134. judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
  135. judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
  136. judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
  137. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
  138. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
  139. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  140. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
  141. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  142. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  143. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  144. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  145. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  146. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  147. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  148. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  149. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  150. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  151. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  152. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  153. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  154. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  155. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
  156. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  157. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  158. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
  159. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  160. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  161. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  162. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  163. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  164. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
  165. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
  166. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
  167. judgeval/scorers/prompt_scorer.py +0 -439
  168. judgeval-0.0.11.dist-info/METADATA +0 -36
  169. judgeval-0.0.11.dist-info/RECORD +0 -84
  170. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
  171. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,406 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Awaitable,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ AsyncIterator,
10
+ Generator,
11
+ AsyncGenerator,
12
+ )
13
+
14
+ from judgeval.tracer.keys import AttributeKeys
15
+ from judgeval.tracer.utils import set_span_attribute
16
+ from judgeval.utils.serialize import safe_serialize
17
+ from judgeval.utils.wrappers import (
18
+ immutable_wrap_async,
19
+ immutable_wrap_sync,
20
+ mutable_wrap_sync,
21
+ mutable_wrap_async,
22
+ immutable_wrap_sync_iterator,
23
+ immutable_wrap_async_iterator,
24
+ )
25
+
26
+ if TYPE_CHECKING:
27
+ from judgeval.tracer import Tracer
28
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
29
+ from together.types import ChatCompletionResponse, ChatCompletionChunk # type: ignore[import-untyped]
30
+ from together.types.common import UsageData # type: ignore[import-untyped]
31
+
32
+
33
+ def _extract_together_tokens(usage: UsageData) -> tuple[int, int, int, int]:
34
+ prompt_tokens = usage.prompt_tokens if usage.prompt_tokens is not None else 0
35
+ completion_tokens = (
36
+ usage.completion_tokens if usage.completion_tokens is not None else 0
37
+ )
38
+ cache_read_input_tokens = 0
39
+ cache_creation_input_tokens = 0
40
+ return (
41
+ prompt_tokens,
42
+ completion_tokens,
43
+ cache_read_input_tokens,
44
+ cache_creation_input_tokens,
45
+ )
46
+
47
+
48
+ def wrap_chat_completions_create_sync(tracer: Tracer, client: Together) -> None:
49
+ original_func = client.chat.completions.create
50
+
51
+ def dispatcher(*args: Any, **kwargs: Any) -> Any:
52
+ if kwargs.get("stream", False):
53
+ return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
54
+ return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
55
+
56
+ setattr(client.chat.completions, "create", dispatcher)
57
+
58
+
59
+ def _wrap_non_streaming_sync(
60
+ tracer: Tracer, original_func: Callable[..., ChatCompletionResponse]
61
+ ) -> Callable[..., ChatCompletionResponse]:
62
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
63
+ ctx["span"] = tracer.get_tracer().start_span(
64
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
65
+ )
66
+ tracer._inject_judgment_context(ctx["span"])
67
+ set_span_attribute(
68
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
69
+ )
70
+ ctx["model_name"] = kwargs.get("model", "")
71
+ prefixed_model_name = (
72
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
73
+ )
74
+ ctx["model_name"] = prefixed_model_name
75
+ set_span_attribute(
76
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
77
+ )
78
+
79
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
80
+ span = ctx.get("span")
81
+ if not span:
82
+ return
83
+
84
+ set_span_attribute(
85
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
86
+ )
87
+
88
+ if result.usage:
89
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
90
+ result.usage
91
+ )
92
+ set_span_attribute(
93
+ span,
94
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
95
+ prompt_tokens,
96
+ )
97
+ set_span_attribute(
98
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
99
+ )
100
+ set_span_attribute(
101
+ span,
102
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
103
+ safe_serialize(result.usage),
104
+ )
105
+
106
+ set_span_attribute(
107
+ span,
108
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
109
+ ctx["model_name"],
110
+ )
111
+
112
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
113
+ span = ctx.get("span")
114
+ if span:
115
+ span.record_exception(error)
116
+
117
+ def finally_hook(ctx: Dict[str, Any]) -> None:
118
+ span = ctx.get("span")
119
+ if span:
120
+ span.end()
121
+
122
+ return immutable_wrap_sync(
123
+ original_func,
124
+ pre_hook=pre_hook,
125
+ post_hook=post_hook,
126
+ error_hook=error_hook,
127
+ finally_hook=finally_hook,
128
+ )
129
+
130
+
131
+ def _wrap_streaming_sync(
132
+ tracer: Tracer, original_func: Callable[..., Iterator[ChatCompletionChunk]]
133
+ ) -> Callable[..., Iterator[ChatCompletionChunk]]:
134
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
135
+ ctx["span"] = tracer.get_tracer().start_span(
136
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
137
+ )
138
+ tracer._inject_judgment_context(ctx["span"])
139
+ set_span_attribute(
140
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
141
+ )
142
+ ctx["model_name"] = kwargs.get("model", "")
143
+ prefixed_model_name = (
144
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
145
+ )
146
+ ctx["model_name"] = prefixed_model_name
147
+ set_span_attribute(
148
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
149
+ )
150
+ ctx["accumulated_content"] = ""
151
+
152
+ def mutate_hook(
153
+ ctx: Dict[str, Any], result: Iterator[ChatCompletionChunk]
154
+ ) -> Iterator[ChatCompletionChunk]:
155
+ def traced_generator() -> Generator[ChatCompletionChunk, None, None]:
156
+ for chunk in result:
157
+ yield chunk
158
+
159
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
160
+ span = ctx.get("span")
161
+ if not span:
162
+ return
163
+
164
+ if chunk.choices and len(chunk.choices) > 0:
165
+ delta = chunk.choices[0].delta
166
+ if delta and hasattr(delta, "content") and delta.content:
167
+ ctx["accumulated_content"] = (
168
+ ctx.get("accumulated_content", "") + delta.content
169
+ )
170
+
171
+ if chunk.usage:
172
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
173
+ chunk.usage
174
+ )
175
+ set_span_attribute(
176
+ span,
177
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
178
+ prompt_tokens,
179
+ )
180
+ set_span_attribute(
181
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
182
+ )
183
+ set_span_attribute(
184
+ span,
185
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
186
+ safe_serialize(chunk.usage),
187
+ )
188
+
189
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
190
+ span = ctx.get("span")
191
+ if span:
192
+ accumulated = ctx.get("accumulated_content", "")
193
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
194
+
195
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
196
+ span = ctx.get("span")
197
+ if span:
198
+ span.record_exception(error)
199
+
200
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
201
+ span = ctx.get("span")
202
+ if span:
203
+ span.end()
204
+
205
+ wrapped_generator = immutable_wrap_sync_iterator(
206
+ traced_generator,
207
+ yield_hook=yield_hook,
208
+ post_hook=post_hook_inner,
209
+ error_hook=error_hook_inner,
210
+ finally_hook=finally_hook_inner,
211
+ )
212
+
213
+ return wrapped_generator()
214
+
215
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
216
+ span = ctx.get("span")
217
+ if span:
218
+ span.record_exception(error)
219
+
220
+ return mutable_wrap_sync(
221
+ original_func,
222
+ pre_hook=pre_hook,
223
+ mutate_hook=mutate_hook,
224
+ error_hook=error_hook,
225
+ )
226
+
227
+
228
+ def wrap_chat_completions_create_async(tracer: Tracer, client: AsyncTogether) -> None:
229
+ original_func = client.chat.completions.create
230
+
231
+ async def dispatcher(*args: Any, **kwargs: Any) -> Any:
232
+ if kwargs.get("stream", False):
233
+ return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
234
+ return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs) # type: ignore[arg-type]
235
+
236
+ setattr(client.chat.completions, "create", dispatcher)
237
+
238
+
239
+ def _wrap_non_streaming_async(
240
+ tracer: Tracer, original_func: Callable[..., Awaitable[ChatCompletionResponse]]
241
+ ) -> Callable[..., Awaitable[ChatCompletionResponse]]:
242
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
243
+ ctx["span"] = tracer.get_tracer().start_span(
244
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
245
+ )
246
+ tracer._inject_judgment_context(ctx["span"])
247
+ set_span_attribute(
248
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
249
+ )
250
+ ctx["model_name"] = kwargs.get("model", "")
251
+ prefixed_model_name = (
252
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
253
+ )
254
+ ctx["model_name"] = prefixed_model_name
255
+ set_span_attribute(
256
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
257
+ )
258
+
259
+ def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
260
+ span = ctx.get("span")
261
+ if not span:
262
+ return
263
+
264
+ set_span_attribute(
265
+ span, AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result)
266
+ )
267
+
268
+ if result.usage:
269
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
270
+ result.usage
271
+ )
272
+ set_span_attribute(
273
+ span,
274
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
275
+ prompt_tokens,
276
+ )
277
+ set_span_attribute(
278
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
279
+ )
280
+ set_span_attribute(
281
+ span,
282
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
283
+ safe_serialize(result.usage),
284
+ )
285
+
286
+ set_span_attribute(
287
+ span,
288
+ AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
289
+ ctx["model_name"],
290
+ )
291
+
292
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
293
+ span = ctx.get("span")
294
+ if span:
295
+ span.record_exception(error)
296
+
297
+ def finally_hook(ctx: Dict[str, Any]) -> None:
298
+ span = ctx.get("span")
299
+ if span:
300
+ span.end()
301
+
302
+ return immutable_wrap_async(
303
+ original_func,
304
+ pre_hook=pre_hook,
305
+ post_hook=post_hook,
306
+ error_hook=error_hook,
307
+ finally_hook=finally_hook,
308
+ )
309
+
310
+
311
+ def _wrap_streaming_async(
312
+ tracer: Tracer,
313
+ original_func: Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]],
314
+ ) -> Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]]:
315
+ def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
316
+ ctx["span"] = tracer.get_tracer().start_span(
317
+ "TOGETHER_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
318
+ )
319
+ tracer._inject_judgment_context(ctx["span"])
320
+ set_span_attribute(
321
+ ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
322
+ )
323
+ ctx["model_name"] = kwargs.get("model", "")
324
+ prefixed_model_name = (
325
+ f"together_ai/{ctx['model_name']}" if ctx["model_name"] else ""
326
+ )
327
+ ctx["model_name"] = prefixed_model_name
328
+ set_span_attribute(
329
+ ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
330
+ )
331
+ ctx["accumulated_content"] = ""
332
+
333
+ def mutate_hook(
334
+ ctx: Dict[str, Any], result: AsyncIterator[ChatCompletionChunk]
335
+ ) -> AsyncIterator[ChatCompletionChunk]:
336
+ async def traced_generator() -> AsyncGenerator[ChatCompletionChunk, None]:
337
+ async for chunk in result:
338
+ yield chunk
339
+
340
+ def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
341
+ span = ctx.get("span")
342
+ if not span:
343
+ return
344
+
345
+ if chunk.choices and len(chunk.choices) > 0:
346
+ delta = chunk.choices[0].delta
347
+ if delta and hasattr(delta, "content") and delta.content:
348
+ ctx["accumulated_content"] = (
349
+ ctx.get("accumulated_content", "") + delta.content
350
+ )
351
+
352
+ if chunk.usage:
353
+ prompt_tokens, completion_tokens, _, _ = _extract_together_tokens(
354
+ chunk.usage
355
+ )
356
+ set_span_attribute(
357
+ span,
358
+ AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
359
+ prompt_tokens,
360
+ )
361
+ set_span_attribute(
362
+ span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
363
+ )
364
+ set_span_attribute(
365
+ span,
366
+ AttributeKeys.JUDGMENT_USAGE_METADATA,
367
+ safe_serialize(chunk.usage),
368
+ )
369
+
370
+ def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
371
+ span = ctx.get("span")
372
+ if span:
373
+ accumulated = ctx.get("accumulated_content", "")
374
+ set_span_attribute(span, AttributeKeys.GEN_AI_COMPLETION, accumulated)
375
+
376
+ def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
377
+ span = ctx.get("span")
378
+ if span:
379
+ span.record_exception(error)
380
+
381
+ def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
382
+ span = ctx.get("span")
383
+ if span:
384
+ span.end()
385
+
386
+ wrapped_generator = immutable_wrap_async_iterator(
387
+ traced_generator,
388
+ yield_hook=yield_hook,
389
+ post_hook=post_hook_inner,
390
+ error_hook=error_hook_inner,
391
+ finally_hook=finally_hook_inner,
392
+ )
393
+
394
+ return wrapped_generator()
395
+
396
+ def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
397
+ span = ctx.get("span")
398
+ if span:
399
+ span.record_exception(error)
400
+
401
+ return mutable_wrap_async(
402
+ original_func,
403
+ pre_hook=pre_hook,
404
+ mutate_hook=mutate_hook,
405
+ error_hook=error_hook,
406
+ )
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ import importlib.util
3
+
4
+ HAS_TOGETHER = importlib.util.find_spec("together") is not None
5
+
6
+ __all__ = ["HAS_TOGETHER"]
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING, Union
3
+ import typing
4
+
5
+ from judgeval.tracer.llm.llm_together.chat_completions import (
6
+ wrap_chat_completions_create_sync,
7
+ wrap_chat_completions_create_async,
8
+ )
9
+
10
+
11
+ if TYPE_CHECKING:
12
+ from judgeval.tracer import Tracer
13
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
14
+
15
+ TClient = Union[Together, AsyncTogether]
16
+
17
+
18
+ def wrap_together_client_sync(tracer: Tracer, client: Together) -> Together:
19
+ wrap_chat_completions_create_sync(tracer, client)
20
+ return client
21
+
22
+
23
+ def wrap_together_client_async(tracer: Tracer, client: AsyncTogether) -> AsyncTogether:
24
+ wrap_chat_completions_create_async(tracer, client)
25
+ return client
26
+
27
+
28
+ @typing.overload
29
+ def wrap_together_client(tracer: Tracer, client: Together) -> Together: ...
30
+ @typing.overload
31
+ def wrap_together_client(tracer: Tracer, client: AsyncTogether) -> AsyncTogether: ... # type: ignore[overload-cannot-match]
32
+
33
+
34
+ def wrap_together_client(tracer: Tracer, client: TClient) -> TClient:
35
+ from judgeval.tracer.llm.llm_together.config import HAS_TOGETHER
36
+ from judgeval.logger import judgeval_logger
37
+
38
+ if not HAS_TOGETHER:
39
+ judgeval_logger.error(
40
+ "Cannot wrap Together client: 'together' library not installed. "
41
+ "Install it with: pip install together"
42
+ )
43
+ return client
44
+
45
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
46
+
47
+ if isinstance(client, AsyncTogether):
48
+ return wrap_together_client_async(tracer, client)
49
+ elif isinstance(client, Together):
50
+ return wrap_together_client_sync(tracer, client)
51
+ else:
52
+ raise TypeError(f"Invalid client type: {type(client)}")
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+ from typing import Any, TypeAlias
3
+
4
+ from judgeval.tracer.llm.llm_openai.config import HAS_OPENAI
5
+ from judgeval.tracer.llm.llm_together.config import HAS_TOGETHER
6
+ from judgeval.tracer.llm.llm_anthropic.config import HAS_ANTHROPIC
7
+ from judgeval.tracer.llm.llm_google.config import HAS_GOOGLE_GENAI
8
+
9
+ # TODO: if we support dependency groups we can have this better type, but during runtime, we do
10
+ # not know which clients an end user might have installed.
11
+ ApiClient: TypeAlias = Any
12
+
13
+ __all__ = [
14
+ "ApiClient",
15
+ "HAS_OPENAI",
16
+ "HAS_TOGETHER",
17
+ "HAS_ANTHROPIC",
18
+ "HAS_GOOGLE_GENAI",
19
+ ]
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import asynccontextmanager, contextmanager
4
+ from typing import TYPE_CHECKING, Dict, Optional, List, Any
5
+ from judgeval.tracer.keys import InternalAttributeKeys
6
+ import uuid
7
+ from judgeval.exceptions import JudgmentRuntimeError
8
+
9
+ if TYPE_CHECKING:
10
+ from judgeval.tracer import Tracer
11
+
12
+
13
+ @contextmanager
14
+ def sync_span_context(
15
+ tracer: Tracer,
16
+ name: str,
17
+ span_attributes: Optional[Dict[str, str]] = None,
18
+ disable_partial_emit: bool = False,
19
+ end_on_exit: bool = False,
20
+ ):
21
+ if span_attributes is None:
22
+ span_attributes = {}
23
+
24
+ with tracer.get_tracer().start_as_current_span(
25
+ name=name,
26
+ attributes=span_attributes,
27
+ end_on_exit=end_on_exit,
28
+ ) as span:
29
+ if disable_partial_emit:
30
+ tracer.judgment_processor.set_internal_attribute(
31
+ span_context=span.get_span_context(),
32
+ key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
33
+ value=True,
34
+ )
35
+ yield span
36
+
37
+
38
+ @asynccontextmanager
39
+ async def async_span_context(
40
+ tracer: Tracer,
41
+ name: str,
42
+ span_attributes: Optional[Dict[str, str]] = None,
43
+ disable_partial_emit: bool = False,
44
+ end_on_exit: bool = False,
45
+ ):
46
+ if span_attributes is None:
47
+ span_attributes = {}
48
+
49
+ with tracer.get_tracer().start_as_current_span(
50
+ name=name,
51
+ attributes=span_attributes,
52
+ end_on_exit=end_on_exit,
53
+ ) as span:
54
+ if disable_partial_emit:
55
+ tracer.judgment_processor.set_internal_attribute(
56
+ span_context=span.get_span_context(),
57
+ key=InternalAttributeKeys.DISABLE_PARTIAL_EMIT,
58
+ value=True,
59
+ )
60
+ yield span
61
+
62
+
63
+ def create_agent_context(
64
+ tracer: Tracer,
65
+ args: tuple,
66
+ class_name: Optional[str] = None,
67
+ identifier: Optional[str] = None,
68
+ track_state: bool = False,
69
+ track_attributes: Optional[List[str]] = None,
70
+ field_mappings: Optional[Dict[str, str]] = None,
71
+ ):
72
+ """Create agent context and return token for cleanup"""
73
+ agent_id = str(uuid.uuid4())
74
+ agent_context: Dict[str, Any] = {"agent_id": agent_id}
75
+
76
+ if class_name:
77
+ agent_context["class_name"] = class_name
78
+ else:
79
+ agent_context["class_name"] = None
80
+
81
+ agent_context["track_state"] = track_state
82
+ agent_context["track_attributes"] = track_attributes or []
83
+ agent_context["field_mappings"] = field_mappings or {}
84
+
85
+ instance = args[0] if args else None
86
+ agent_context["instance"] = instance
87
+
88
+ if identifier:
89
+ if not class_name or not instance or not isinstance(instance, object):
90
+ raise JudgmentRuntimeError(
91
+ "'identifier' is set but no class name or instance is available. 'identifier' can only be specified when using the agent() decorator on a class method."
92
+ )
93
+ if (
94
+ instance
95
+ and hasattr(instance, identifier)
96
+ and not callable(getattr(instance, identifier))
97
+ ):
98
+ instance_name = str(getattr(instance, identifier))
99
+ agent_context["instance_name"] = instance_name
100
+ else:
101
+ raise JudgmentRuntimeError(
102
+ f"Attribute {identifier} does not exist for {class_name}. Check your agent() decorator."
103
+ )
104
+ else:
105
+ agent_context["instance_name"] = None
106
+
107
+ current_agent_context = tracer.get_current_agent_context().get()
108
+ if current_agent_context and "agent_id" in current_agent_context:
109
+ agent_context["parent_agent_id"] = current_agent_context["agent_id"]
110
+ else:
111
+ agent_context["parent_agent_id"] = None
112
+
113
+ agent_context["is_agent_entry_point"] = True
114
+ token = tracer.get_current_agent_context().set(agent_context) # type: ignore
115
+ return token
116
+
117
+
118
+ @contextmanager
119
+ def sync_agent_context(
120
+ tracer: Tracer,
121
+ args: tuple,
122
+ class_name: Optional[str] = None,
123
+ identifier: Optional[str] = None,
124
+ track_state: bool = False,
125
+ track_attributes: Optional[List[str]] = None,
126
+ field_mappings: Optional[Dict[str, str]] = None,
127
+ ):
128
+ """Context manager for synchronous agent context"""
129
+ token = create_agent_context(
130
+ tracer=tracer,
131
+ args=args,
132
+ class_name=class_name,
133
+ identifier=identifier,
134
+ track_state=track_state,
135
+ track_attributes=track_attributes,
136
+ field_mappings=field_mappings,
137
+ )
138
+ try:
139
+ yield
140
+ finally:
141
+ tracer.get_current_agent_context().reset(token)
142
+
143
+
144
+ @asynccontextmanager
145
+ async def async_agent_context(
146
+ tracer: Tracer,
147
+ args: tuple,
148
+ class_name: Optional[str] = None,
149
+ identifier: Optional[str] = None,
150
+ track_state: bool = False,
151
+ track_attributes: Optional[List[str]] = None,
152
+ field_mappings: Optional[Dict[str, str]] = None,
153
+ ):
154
+ """Context manager for asynchronous agent context"""
155
+ token = create_agent_context(
156
+ tracer=tracer,
157
+ args=args,
158
+ class_name=class_name,
159
+ identifier=identifier,
160
+ track_state=track_state,
161
+ track_attributes=track_attributes,
162
+ field_mappings=field_mappings,
163
+ )
164
+ try:
165
+ yield
166
+ finally:
167
+ tracer.get_current_agent_context().reset(token)