judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1 @@
1
+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "judgeval"
@@ -0,0 +1,40 @@
1
+ from opentelemetry.sdk.trace.export import (
2
+ SpanExportResult,
3
+ SpanExporter,
4
+ )
5
+ from opentelemetry.sdk.trace import ReadableSpan
6
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
7
+ from typing import Sequence
8
+
9
+ from judgeval.tracer.exporters.store import ABCSpanStore
10
+ from judgeval.tracer.exporters.s3 import S3Exporter
11
+ from judgeval.tracer.exporters.utils import deduplicate_spans
12
+
13
+
14
+ class JudgmentSpanExporter(OTLPSpanExporter):
15
+ def __init__(
16
+ self, endpoint: str, api_key: str, organization_id: str, project_id: str
17
+ ):
18
+ super().__init__(
19
+ endpoint=endpoint,
20
+ headers={
21
+ "Authorization": f"Bearer {api_key}",
22
+ "X-Organization-Id": organization_id,
23
+ "X-Project-Id": project_id,
24
+ },
25
+ )
26
+
27
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
28
+ return super().export(deduplicate_spans(spans))
29
+
30
+
31
+ class InMemorySpanExporter(SpanExporter):
32
+ def __init__(self, store: ABCSpanStore):
33
+ self.store = store
34
+
35
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
36
+ self.store.add(*spans)
37
+ return SpanExportResult.SUCCESS
38
+
39
+
40
+ __all__ = ("JudgmentSpanExporter", "InMemorySpanExporter", "S3Exporter")
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ from concurrent.futures import ThreadPoolExecutor, as_completed
4
+ from datetime import datetime
5
+ from typing import Literal, Sequence, Optional, TYPE_CHECKING, cast
6
+ import boto3
7
+ from botocore.client import Config
8
+
9
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
10
+ from opentelemetry.sdk.trace import ReadableSpan
11
+ from judgeval.env import (
12
+ JUDGMENT_S3_ACCESS_KEY_ID,
13
+ JUDGMENT_S3_SECRET_ACCESS_KEY,
14
+ JUDGMENT_S3_REGION_NAME,
15
+ JUDGMENT_S3_BUCKET_NAME,
16
+ JUDGMENT_S3_PREFIX,
17
+ JUDGMENT_S3_ENDPOINT_URL,
18
+ JUDGMENT_S3_SIGNATURE_VERSION,
19
+ JUDGMENT_S3_ADDRESSING_STYLE,
20
+ )
21
+ from judgeval.exceptions import JudgmentRuntimeError
22
+ from judgeval.logger import judgeval_logger
23
+
24
+ if TYPE_CHECKING:
25
+ from mypy_boto3_s3.client import S3Client
26
+
27
+
28
+ class S3Exporter(SpanExporter):
29
+ __slots__ = ("bucket_name", "prefix", "s3_client")
30
+
31
+ bucket_name: str
32
+ prefix: str
33
+ s3_client: S3Client
34
+
35
+ def __init__(
36
+ self,
37
+ bucket_name: Optional[str] = JUDGMENT_S3_BUCKET_NAME,
38
+ region_name: Optional[str] = JUDGMENT_S3_REGION_NAME,
39
+ prefix: str = JUDGMENT_S3_PREFIX,
40
+ s3_access_key_id: Optional[str] = JUDGMENT_S3_ACCESS_KEY_ID,
41
+ s3_secret_access_key: Optional[str] = JUDGMENT_S3_SECRET_ACCESS_KEY,
42
+ endpoint_url: Optional[str] = JUDGMENT_S3_ENDPOINT_URL,
43
+ # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
44
+ signature_version: str = JUDGMENT_S3_SIGNATURE_VERSION,
45
+ addressing_style: str = JUDGMENT_S3_ADDRESSING_STYLE,
46
+ batch_size: int = 8,
47
+ ):
48
+ if not bucket_name:
49
+ raise JudgmentRuntimeError("JUDGMENT_S3_BUCKET_NAME is not set")
50
+
51
+ if not region_name:
52
+ raise JudgmentRuntimeError("JUDGMENT_S3_REGION_NAME is not set")
53
+
54
+ if addressing_style not in ["auto", "virtual", "path"]:
55
+ raise JudgmentRuntimeError(f"Invalid addressing style: {addressing_style}")
56
+ addressing_style = cast(Literal["auto", "virtual", "path"], addressing_style)
57
+
58
+ self.bucket_name = bucket_name
59
+ self.prefix = prefix.rstrip("/")
60
+ self.batch_size = batch_size
61
+
62
+ self.s3_client = boto3.client(
63
+ "s3",
64
+ config=Config(
65
+ signature_version=signature_version,
66
+ s3={"addressing_style": addressing_style},
67
+ ),
68
+ aws_access_key_id=s3_access_key_id,
69
+ aws_secret_access_key=s3_secret_access_key,
70
+ endpoint_url=endpoint_url,
71
+ region_name=region_name,
72
+ )
73
+
74
+ def _upload_span(self, span: ReadableSpan) -> tuple[bool, str]:
75
+ """Upload a single span to S3. Returns (success, key)."""
76
+ try:
77
+ span_context = span.get_span_context()
78
+ if not span_context:
79
+ return False, ""
80
+
81
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S%f")
82
+ trace_id = format(span_context.trace_id, "032x")
83
+ span_id = format(span_context.span_id, "016x")
84
+ key = f"{self.prefix}/{trace_id}/{span_id}/{timestamp}.json"
85
+
86
+ span_json = span.to_json(indent=0)
87
+
88
+ self.s3_client.put_object(
89
+ Bucket=self.bucket_name,
90
+ Key=key,
91
+ Body=span_json,
92
+ ContentType="application/json",
93
+ )
94
+ return True, key
95
+ except Exception as e:
96
+ judgeval_logger.error(
97
+ f"Error uploading span {span_context.span_id if span_context else 'unknown'}: {e}"
98
+ )
99
+ return False, ""
100
+
101
+ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
102
+ if not spans:
103
+ return SpanExportResult.SUCCESS
104
+
105
+ try:
106
+ with ThreadPoolExecutor(
107
+ max_workers=min(len(spans), self.batch_size)
108
+ ) as executor:
109
+ futures = [executor.submit(self._upload_span, span) for span in spans]
110
+
111
+ for future in as_completed(futures):
112
+ success, key = future.result()
113
+ if not success:
114
+ return SpanExportResult.FAILURE
115
+ return SpanExportResult.SUCCESS
116
+
117
+ except Exception as e:
118
+ judgeval_logger.error(f"Error exporting spans to S3: {e}")
119
+ return SpanExportResult.FAILURE
@@ -0,0 +1,59 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict
3
+
4
+ from opentelemetry.sdk.trace import ReadableSpan
5
+
6
+
7
+ class ABCSpanStore(ABC):
8
+ @abstractmethod
9
+ def add(self, *spans: ReadableSpan): ...
10
+
11
+ @abstractmethod
12
+ def get_all(self) -> List[ReadableSpan]: ...
13
+
14
+ @abstractmethod
15
+ def get_by_trace_id(self, trace_id: str) -> List[ReadableSpan]: ...
16
+
17
+ @abstractmethod
18
+ def clear_trace(self, trace_id: str): ...
19
+
20
+
21
+ class SpanStore(ABCSpanStore):
22
+ __slots__ = ("_spans_by_trace",)
23
+
24
+ _spans_by_trace: Dict[str, List[ReadableSpan]]
25
+
26
+ def __init__(self):
27
+ self._spans_by_trace = {}
28
+
29
+ def add(self, *spans: ReadableSpan):
30
+ for span in spans:
31
+ context = span.get_span_context()
32
+ if context is None:
33
+ continue
34
+ # Convert trace_id to hex string per OTEL spec
35
+ trace_id = format(context.trace_id, "032x")
36
+ if trace_id not in self._spans_by_trace:
37
+ self._spans_by_trace[trace_id] = []
38
+ self._spans_by_trace[trace_id].append(span)
39
+
40
+ def get_all(self) -> List[ReadableSpan]:
41
+ all_spans = []
42
+ for spans in self._spans_by_trace.values():
43
+ all_spans.extend(spans)
44
+ return all_spans
45
+
46
+ def get_by_trace_id(self, trace_id: str) -> List[ReadableSpan]:
47
+ """Get all spans for a specific trace ID (32-char hex string)."""
48
+ return self._spans_by_trace.get(trace_id, [])
49
+
50
+ def clear_trace(self, trace_id: str):
51
+ """Clear all spans for a specific trace ID (32-char hex string)."""
52
+ if trace_id in self._spans_by_trace:
53
+ del self._spans_by_trace[trace_id]
54
+
55
+ def __repr__(self) -> str:
56
+ total_spans = sum(len(spans) for spans in self._spans_by_trace.values())
57
+ return (
58
+ f"SpanStore(traces={len(self._spans_by_trace)}, total_spans={total_spans})"
59
+ )
@@ -0,0 +1,32 @@
1
+ from typing import Sequence
2
+ from opentelemetry.sdk.trace import ReadableSpan
3
+
4
+ from judgeval.tracer.keys import AttributeKeys
5
+
6
+
7
+ def deduplicate_spans(spans: Sequence[ReadableSpan]) -> Sequence[ReadableSpan]:
8
+ spans_by_key: dict[tuple[int, int], ReadableSpan] = {}
9
+ for span in spans:
10
+ if span.attributes and span.context:
11
+ update_id = span.attributes.get(AttributeKeys.JUDGMENT_UPDATE_ID)
12
+
13
+ if not isinstance(update_id, int):
14
+ continue
15
+
16
+ key = (span.context.trace_id, span.context.span_id)
17
+ if key not in spans_by_key:
18
+ spans_by_key[key] = span
19
+ else:
20
+ existing_attrs = spans_by_key[key].attributes
21
+ existing_update_id = (
22
+ existing_attrs.get(AttributeKeys.JUDGMENT_UPDATE_ID, 0)
23
+ if existing_attrs
24
+ else 0
25
+ )
26
+ if (
27
+ isinstance(existing_update_id, (int, float))
28
+ and update_id > existing_update_id
29
+ ):
30
+ spans_by_key[key] = span
31
+
32
+ return list(spans_by_key.values())
@@ -0,0 +1,63 @@
1
+ """
2
+ Identifiers used by Judgeval to store specific types of data in the spans.
3
+ """
4
+
5
+ from enum import Enum
6
+
7
+
8
+ class AttributeKeys(str, Enum):
9
+ JUDGMENT_SPAN_KIND = "judgment.span_kind"
10
+ JUDGMENT_INPUT = "judgment.input"
11
+ JUDGMENT_OUTPUT = "judgment.output"
12
+ JUDGMENT_OFFLINE_MODE = "judgment.offline_mode"
13
+ JUDGMENT_UPDATE_ID = "judgment.update_id"
14
+
15
+ JUDGMENT_USAGE_METADATA = "judgment.usage.metadata"
16
+
17
+ JUDGMENT_CUSTOMER_ID = "judgment.customer_id"
18
+
19
+ JUDGMENT_AGENT_ID = "judgment.agent_id"
20
+ JUDGMENT_PARENT_AGENT_ID = "judgment.parent_agent_id"
21
+ JUDGMENT_AGENT_CLASS_NAME = "judgment.agent_class_name"
22
+ JUDGMENT_AGENT_INSTANCE_NAME = "judgment.agent_instance_name"
23
+ JUDGMENT_IS_AGENT_ENTRY_POINT = "judgment.is_agent_entry_point"
24
+ JUDGMENT_STATE_BEFORE = "judgment.state_before"
25
+ JUDGMENT_STATE_AFTER = "judgment.state_after"
26
+
27
+ PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
28
+
29
+ JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
30
+ JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
31
+ JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
32
+ JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
33
+ "judgment.usage.cache_creation_input_tokens"
34
+ )
35
+ JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
36
+ JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
37
+ JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
38
+
39
+ GEN_AI_PROMPT = "gen_ai.prompt"
40
+ GEN_AI_COMPLETION = "gen_ai.completion"
41
+ GEN_AI_SYSTEM = "gen_ai.system"
42
+ GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
43
+ GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
44
+ GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
45
+
46
+
47
+ class InternalAttributeKeys(str, Enum):
48
+ """
49
+ Internal attribute keys used for temporary state management in span processors.
50
+ These are NOT exported and are used only for internal span lifecycle management.
51
+ """
52
+
53
+ DISABLE_PARTIAL_EMIT = "disable_partial_emit"
54
+ CANCELLED = "cancelled"
55
+ IS_CUSTOMER_CONTEXT_OWNER = "is_customer_context_owner"
56
+
57
+
58
+ class ResourceKeys(str, Enum):
59
+ SERVICE_NAME = "service.name"
60
+ TELEMETRY_SDK_LANGUAGE = "telemetry.sdk.language"
61
+ TELEMETRY_SDK_NAME = "telemetry.sdk.name"
62
+ TELEMETRY_SDK_VERSION = "telemetry.sdk.version"
63
+ JUDGMENT_PROJECT_ID = "judgment.project_id"
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ from .config import _detect_provider, wrap_provider
5
+
6
+
7
+ __all__ = ["_detect_provider", "wrap_provider"]
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+ from judgeval.logger import judgeval_logger
4
+
5
+ from judgeval.tracer.llm.constants import ProviderType
6
+ from judgeval.tracer.llm.providers import (
7
+ HAS_OPENAI,
8
+ HAS_TOGETHER,
9
+ HAS_ANTHROPIC,
10
+ HAS_GOOGLE_GENAI,
11
+ ApiClient,
12
+ )
13
+
14
+ if TYPE_CHECKING:
15
+ from judgeval.tracer import Tracer
16
+
17
+
18
+ def _detect_provider(client: ApiClient) -> ProviderType:
19
+ if HAS_OPENAI:
20
+ from openai import OpenAI, AsyncOpenAI
21
+
22
+ if isinstance(client, (OpenAI, AsyncOpenAI)):
23
+ return ProviderType.OPENAI
24
+
25
+ if HAS_ANTHROPIC:
26
+ from anthropic import Anthropic, AsyncAnthropic
27
+
28
+ if isinstance(client, (Anthropic, AsyncAnthropic)):
29
+ return ProviderType.ANTHROPIC
30
+
31
+ if HAS_TOGETHER:
32
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
33
+
34
+ if isinstance(client, (Together, AsyncTogether)):
35
+ return ProviderType.TOGETHER
36
+
37
+ if HAS_GOOGLE_GENAI:
38
+ from google.genai import Client as GoogleClient
39
+
40
+ if isinstance(client, GoogleClient):
41
+ return ProviderType.GOOGLE
42
+
43
+ judgeval_logger.warning(
44
+ f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
45
+ "If this is a mistake or you think we should support this client, please file an issue at https://github.com/JudgmentLabs/judgeval/issues!"
46
+ )
47
+
48
+ return ProviderType.DEFAULT
49
+
50
+
51
+ def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
52
+ """
53
+ Wraps an API client to add tracing capabilities.
54
+ Supports OpenAI, Together, Anthropic, and Google GenAI clients.
55
+ """
56
+ provider_type = _detect_provider(client)
57
+
58
+ if provider_type == ProviderType.OPENAI:
59
+ from .llm_openai.wrapper import wrap_openai_client
60
+
61
+ return wrap_openai_client(tracer, client)
62
+ elif provider_type == ProviderType.ANTHROPIC:
63
+ from .llm_anthropic.wrapper import wrap_anthropic_client
64
+
65
+ return wrap_anthropic_client(tracer, client)
66
+ elif provider_type == ProviderType.TOGETHER:
67
+ from .llm_together.wrapper import wrap_together_client
68
+
69
+ return wrap_together_client(tracer, client)
70
+ elif provider_type == ProviderType.GOOGLE:
71
+ from .llm_google.wrapper import wrap_google_client
72
+
73
+ return wrap_google_client(tracer, client)
74
+ else:
75
+ # Default to OpenAI-compatible wrapping for unknown clients
76
+ from .llm_openai.wrapper import wrap_openai_client
77
+
78
+ return wrap_openai_client(tracer, client)
@@ -0,0 +1,9 @@
1
+ from enum import Enum
2
+
3
+
4
+ class ProviderType(Enum):
5
+ OPENAI = "openai"
6
+ ANTHROPIC = "anthropic"
7
+ TOGETHER = "together"
8
+ GOOGLE = "google"
9
+ DEFAULT = "default"
@@ -0,0 +1,3 @@
1
+ from .wrapper import wrap_anthropic_client
2
+
3
+ __all__ = ["wrap_anthropic_client"]
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ import importlib.util
3
+
4
+ HAS_ANTHROPIC = importlib.util.find_spec("anthropic") is not None
5
+
6
+ __all__ = ["HAS_ANTHROPIC"]