judgeval 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. judgeval/__init__.py +2 -2
  2. judgeval/api/api_types.py +81 -12
  3. judgeval/cli.py +2 -1
  4. judgeval/constants.py +0 -6
  5. judgeval/data/evaluation_run.py +2 -5
  6. judgeval/data/judgment_types.py +97 -12
  7. judgeval/data/trace.py +108 -1
  8. judgeval/dataset/__init__.py +72 -23
  9. judgeval/env.py +5 -20
  10. judgeval/integrations/langgraph/__init__.py +9 -785
  11. judgeval/scorers/api_scorer.py +7 -12
  12. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -8
  13. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -8
  14. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -12
  15. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +22 -33
  16. judgeval/scorers/score.py +1 -1
  17. judgeval/scorers/utils.py +1 -4
  18. judgeval/tracer/__init__.py +175 -156
  19. judgeval/tracer/exporters/__init__.py +4 -1
  20. judgeval/tracer/keys.py +15 -25
  21. judgeval/tracer/llm/__init__.py +0 -1
  22. judgeval/tracer/llm/anthropic/__init__.py +20 -0
  23. judgeval/tracer/llm/google/__init__.py +21 -0
  24. judgeval/tracer/llm/groq/__init__.py +20 -0
  25. judgeval/tracer/llm/openai/__init__.py +32 -0
  26. judgeval/tracer/llm/providers.py +28 -79
  27. judgeval/tracer/llm/together/__init__.py +20 -0
  28. judgeval/tracer/managers.py +23 -48
  29. judgeval/tracer/processors/__init__.py +36 -75
  30. judgeval/tracer/utils.py +1 -2
  31. judgeval/utils/file_utils.py +0 -2
  32. judgeval/utils/meta.py +18 -5
  33. judgeval/utils/testing.py +0 -14
  34. judgeval/utils/version_check.py +2 -0
  35. judgeval/version.py +1 -1
  36. {judgeval-0.12.0.dist-info → judgeval-0.13.0.dist-info}/METADATA +1 -7
  37. {judgeval-0.12.0.dist-info → judgeval-0.13.0.dist-info}/RECORD +40 -35
  38. {judgeval-0.12.0.dist-info → judgeval-0.13.0.dist-info}/WHEEL +0 -0
  39. {judgeval-0.12.0.dist-info → judgeval-0.13.0.dist-info}/entry_points.txt +0 -0
  40. {judgeval-0.12.0.dist-info → judgeval-0.13.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,5 +1,4 @@
1
1
  from __future__ import annotations
2
- import os
3
2
  from contextvars import ContextVar
4
3
  import atexit
5
4
  import functools
@@ -24,15 +23,17 @@ from typing import (
24
23
  from functools import partial
25
24
  from warnings import warn
26
25
 
27
- from opentelemetry.sdk.trace import SpanProcessor, TracerProvider, Span
26
+ from opentelemetry.sdk.trace import TracerProvider
28
27
  from opentelemetry.sdk.resources import Resource
29
28
  from opentelemetry.trace import (
30
29
  Status,
31
30
  StatusCode,
32
- TracerProvider as ABCTracerProvider,
33
- NoOpTracerProvider,
34
31
  Tracer as ABCTracer,
32
+ Span,
35
33
  get_current_span,
34
+ get_tracer_provider,
35
+ set_tracer_provider,
36
+ INVALID_SPAN_CONTEXT,
36
37
  )
37
38
 
38
39
  from judgeval.data.evaluation_run import ExampleEvaluationRun, TraceEvaluationRun
@@ -41,6 +42,8 @@ from judgeval.env import (
41
42
  JUDGMENT_API_KEY,
42
43
  JUDGMENT_DEFAULT_GPT_MODEL,
43
44
  JUDGMENT_ORG_ID,
45
+ JUDGMENT_ENABLE_MONITORING,
46
+ JUDGMENT_ENABLE_EVALUATIONS,
44
47
  )
45
48
  from judgeval.logger import judgeval_logger
46
49
  from judgeval.scorers.api_scorer import TraceAPIScorerConfig, ExampleAPIScorerConfig
@@ -52,7 +55,10 @@ from judgeval.tracer.managers import (
52
55
  sync_agent_context,
53
56
  async_agent_context,
54
57
  )
58
+ from judgeval.utils.decorators import dont_throw
59
+ from judgeval.utils.guards import expect_api_key, expect_organization_id
55
60
  from judgeval.utils.serialize import safe_serialize
61
+ from judgeval.utils.meta import SingletonMeta
56
62
  from judgeval.version import get_version
57
63
  from judgeval.warnings import JudgmentWarning
58
64
 
@@ -64,7 +70,6 @@ from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
64
70
  from judgeval.tracer.processors import (
65
71
  JudgmentSpanProcessor,
66
72
  NoOpJudgmentSpanProcessor,
67
- NoOpSpanProcessor,
68
73
  )
69
74
  from judgeval.tracer.utils import set_span_attribute, TraceScorerConfig
70
75
 
@@ -85,46 +90,34 @@ class AgentContext(TypedDict):
85
90
  parent_agent_id: str | None
86
91
 
87
92
 
88
- class Tracer:
89
- _active_tracers: List[Tracer] = []
90
-
93
+ class Tracer(metaclass=SingletonMeta):
91
94
  __slots__ = (
92
95
  "api_key",
93
96
  "organization_id",
94
97
  "project_name",
95
- "api_url",
96
- "deep_tracing",
97
98
  "enable_monitoring",
98
99
  "enable_evaluation",
100
+ "resource_attributes",
99
101
  "api_client",
100
102
  "local_eval_queue",
101
- # Otel
102
103
  "judgment_processor",
103
- "processors",
104
- "provider",
105
104
  "tracer",
106
- # Agent
107
105
  "agent_context",
108
- "cost_context",
106
+ "_initialized",
109
107
  )
110
108
 
111
109
  api_key: str
112
110
  organization_id: str
113
111
  project_name: str
114
- api_url: str
115
- deep_tracing: bool
116
112
  enable_monitoring: bool
117
113
  enable_evaluation: bool
114
+ resource_attributes: Optional[Dict[str, Any]]
118
115
  api_client: JudgmentSyncClient
119
116
  local_eval_queue: LocalEvaluationQueue
120
-
121
117
  judgment_processor: JudgmentSpanProcessor
122
- processors: List[SpanProcessor]
123
- provider: ABCTracerProvider
124
118
  tracer: ABCTracer
125
-
126
119
  agent_context: ContextVar[Optional[AgentContext]]
127
- cost_context: ContextVar[Optional[Dict[str, float]]]
120
+ _initialized: bool
128
121
 
129
122
  def __init__(
130
123
  self,
@@ -133,82 +126,125 @@ class Tracer:
133
126
  project_name: str,
134
127
  api_key: Optional[str] = None,
135
128
  organization_id: Optional[str] = None,
136
- deep_tracing: bool = False,
137
- enable_monitoring: bool = os.getenv(
138
- "JUDGMENT_ENABLE_MONITORING", "true"
139
- ).lower()
140
- != "false",
141
- enable_evaluation: bool = os.getenv(
142
- "JUDGMENT_ENABLE_EVALUATIONS", "true"
143
- ).lower()
144
- != "false",
145
- processors: List[SpanProcessor] = [],
129
+ enable_monitoring: bool = JUDGMENT_ENABLE_MONITORING.lower() == "true",
130
+ enable_evaluation: bool = JUDGMENT_ENABLE_EVALUATIONS.lower() == "true",
146
131
  resource_attributes: Optional[Dict[str, Any]] = None,
132
+ initialize: bool = True,
147
133
  ):
148
- _api_key = api_key or JUDGMENT_API_KEY
149
- _organization_id = organization_id or JUDGMENT_ORG_ID
150
-
151
- if _api_key is None:
152
- raise ValueError(
153
- "API Key is not set, please set it in the environment variables or pass it as `api_key`"
134
+ if not hasattr(self, "_initialized"):
135
+ self._initialized = False
136
+ self.agent_context = ContextVar("current_agent_context", default=None)
137
+
138
+ self.project_name = project_name
139
+ self.api_key = expect_api_key(api_key or JUDGMENT_API_KEY)
140
+ self.organization_id = expect_organization_id(
141
+ organization_id or JUDGMENT_ORG_ID
154
142
  )
143
+ self.enable_monitoring = enable_monitoring
144
+ self.enable_evaluation = enable_evaluation
145
+ self.resource_attributes = resource_attributes
155
146
 
156
- if _organization_id is None:
157
- raise ValueError(
158
- "Organization ID is not set, please set it in the environment variables or pass it as `organization_id`"
147
+ self.api_client = JudgmentSyncClient(
148
+ api_key=self.api_key,
149
+ organization_id=self.organization_id,
159
150
  )
151
+ self.local_eval_queue = LocalEvaluationQueue()
160
152
 
161
- self.api_key = _api_key
162
- self.organization_id = _organization_id
163
- self.project_name = project_name
164
- self.api_url = url_for("/otel/v1/traces")
153
+ if initialize:
154
+ self.initialize()
165
155
 
166
- self.deep_tracing = deep_tracing
167
- self.enable_monitoring = enable_monitoring
168
- self.enable_evaluation = enable_evaluation
156
+ def initialize(self) -> Tracer:
157
+ if self._initialized:
158
+ return self
169
159
 
170
160
  self.judgment_processor = NoOpJudgmentSpanProcessor()
171
- self.processors = processors
172
- self.provider = NoOpTracerProvider()
173
-
174
- self.agent_context = ContextVar("current_agent_context", default=None)
175
- self.cost_context = ContextVar("current_cost_context", default=None)
176
-
177
161
  if self.enable_monitoring:
178
- self.judgment_processor = JudgmentSpanProcessor(
179
- self,
180
- self.project_name,
181
- self.api_key,
182
- self.organization_id,
183
- max_queue_size=2**18,
184
- export_timeout_millis=30000,
185
- resource_attributes=resource_attributes,
162
+ project_id = Tracer._resolve_project_id(
163
+ self.project_name, self.api_key, self.organization_id
186
164
  )
187
165
 
188
- resource = Resource.create(self.judgment_processor.resource_attributes)
189
- self.provider = TracerProvider(resource=resource)
166
+ if project_id:
167
+ self.judgment_processor = self.get_processor(
168
+ tracer=self,
169
+ project_name=self.project_name,
170
+ project_id=project_id,
171
+ api_key=self.api_key,
172
+ organization_id=self.organization_id,
173
+ resource_attributes=self.resource_attributes,
174
+ )
190
175
 
191
- self.processors.append(self.judgment_processor)
192
- for processor in self.processors:
193
- self.provider.add_span_processor(processor)
176
+ resource = Resource.create(self.judgment_processor.resource_attributes)
177
+ provider = TracerProvider(resource=resource)
178
+ provider.add_span_processor(self.judgment_processor)
179
+ set_tracer_provider(provider)
180
+ else:
181
+ judgeval_logger.error(
182
+ f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
183
+ )
194
184
 
195
- self.tracer = self.provider.get_tracer(
185
+ self.tracer = get_tracer_provider().get_tracer(
196
186
  JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME,
197
187
  get_version(),
198
188
  )
199
- self.api_client = JudgmentSyncClient(
200
- api_key=self.api_key,
201
- organization_id=self.organization_id,
202
- )
203
- self.local_eval_queue = LocalEvaluationQueue()
204
189
 
205
190
  if self.enable_evaluation and self.enable_monitoring:
206
191
  self.local_eval_queue.start_workers()
207
192
 
208
- Tracer._active_tracers.append(self)
209
-
210
- # Register atexit handler to flush on program exit
193
+ self._initialized = True
211
194
  atexit.register(self._atexit_flush)
195
+ return self
196
+
197
+ @staticmethod
198
+ def get_exporter(
199
+ project_id: str,
200
+ api_key: Optional[str] = None,
201
+ organization_id: Optional[str] = None,
202
+ ):
203
+ from judgeval.tracer.exporters import JudgmentSpanExporter
204
+
205
+ return JudgmentSpanExporter(
206
+ endpoint=url_for("/otel/v1/traces"),
207
+ api_key=api_key or JUDGMENT_API_KEY,
208
+ organization_id=organization_id or JUDGMENT_ORG_ID,
209
+ project_id=project_id,
210
+ )
211
+
212
+ @staticmethod
213
+ def get_processor(
214
+ tracer: Tracer,
215
+ project_name: str,
216
+ project_id: str,
217
+ api_key: Optional[str] = None,
218
+ organization_id: Optional[str] = None,
219
+ max_queue_size: int = 2**18,
220
+ export_timeout_millis: int = 30000,
221
+ resource_attributes: Optional[Dict[str, Any]] = None,
222
+ ) -> JudgmentSpanProcessor:
223
+ """Create a JudgmentSpanProcessor using the correct constructor."""
224
+ return JudgmentSpanProcessor(
225
+ tracer,
226
+ project_name,
227
+ project_id,
228
+ api_key or JUDGMENT_API_KEY,
229
+ organization_id or JUDGMENT_ORG_ID,
230
+ max_queue_size=max_queue_size,
231
+ export_timeout_millis=export_timeout_millis,
232
+ resource_attributes=resource_attributes,
233
+ )
234
+
235
+ @dont_throw
236
+ @functools.lru_cache(maxsize=64)
237
+ @staticmethod
238
+ def _resolve_project_id(
239
+ project_name: str, api_key: str, organization_id: str
240
+ ) -> str | None:
241
+ """Resolve project_id from project_name using the API."""
242
+ client = JudgmentSyncClient(
243
+ api_key=api_key,
244
+ organization_id=organization_id,
245
+ )
246
+ response = client.projects_resolve({"project_name": project_name})
247
+ return response["project_id"]
212
248
 
213
249
  def get_current_span(self):
214
250
  return get_current_span()
@@ -219,40 +255,11 @@ class Tracer:
219
255
  def get_current_agent_context(self):
220
256
  return self.agent_context
221
257
 
222
- def get_current_cost_context(self):
223
- return self.cost_context
224
-
225
- def get_processor(self):
226
- """Get the judgment span processor instance.
227
-
228
- Returns:
229
- The JudgmentSpanProcessor or NoOpJudgmentSpanProcessor instance used by this tracer.
230
- """
231
- return self.judgment_processor
232
-
233
258
  def set_customer_id(self, customer_id: str) -> None:
234
259
  span = self.get_current_span()
235
260
  if span and span.is_recording():
236
261
  set_span_attribute(span, AttributeKeys.JUDGMENT_CUSTOMER_ID, customer_id)
237
262
 
238
- def add_cost_to_current_context(self, cost: Optional[float]) -> None:
239
- """Add cost to the current cost context and update span attribute."""
240
- if cost is None:
241
- return
242
- current_cost_context = self.cost_context.get()
243
- if current_cost_context is not None:
244
- current_cumulative_cost = current_cost_context.get("cumulative_cost", 0.0)
245
- new_cumulative_cost = float(current_cumulative_cost) + cost
246
- current_cost_context["cumulative_cost"] = new_cumulative_cost
247
-
248
- span = self.get_current_span()
249
- if span and span.is_recording():
250
- set_span_attribute(
251
- span,
252
- AttributeKeys.JUDGMENT_CUMULATIVE_LLM_COST,
253
- new_cumulative_cost,
254
- )
255
-
256
263
  def add_agent_attributes_to_span(self, span):
257
264
  """Add agent ID, class name, and instance name to span if they exist in context"""
258
265
  current_agent_context = self.agent_context.get()
@@ -353,6 +360,8 @@ class Tracer:
353
360
  return
354
361
 
355
362
  span_context = span.get_span_context()
363
+ if span_context == INVALID_SPAN_CONTEXT:
364
+ return
356
365
  trace_id = format(span_context.trace_id, "032x")
357
366
  span_id = format(span_context.span_id, "016x")
358
367
  eval_run_name = f"async_trace_evaluate_{span_id}"
@@ -668,6 +677,8 @@ class Tracer:
668
677
  /,
669
678
  *,
670
679
  span_type: str | None = None,
680
+ span_name: str | None = None,
681
+ attributes: Optional[Dict[str, Any]] = None,
671
682
  scorer_config: TraceScorerConfig | None = None,
672
683
  ) -> C: ...
673
684
 
@@ -678,6 +689,8 @@ class Tracer:
678
689
  /,
679
690
  *,
680
691
  span_type: str | None = None,
692
+ span_name: str | None = None,
693
+ attributes: Optional[Dict[str, Any]] = None,
681
694
  scorer_config: TraceScorerConfig | None = None,
682
695
  ) -> Callable[[C], C]: ...
683
696
 
@@ -833,37 +846,33 @@ class Tracer:
833
846
  timeout_millis: Maximum time to wait for flush completion in milliseconds
834
847
 
835
848
  Returns:
836
- True if all processors flushed successfully within timeout, False otherwise
849
+ True if processor flushed successfully within timeout, False otherwise
837
850
  """
838
- success = True
839
- for processor in self.processors:
840
- try:
841
- result = processor.force_flush(timeout_millis)
842
- if not result:
843
- success = False
844
- except Exception as e:
845
- judgeval_logger.warning(f"Error flushing processor {processor}: {e}")
846
- success = False
847
- return success
848
-
849
- def _atexit_flush(self) -> None:
851
+ try:
852
+ return self.judgment_processor.force_flush(timeout_millis)
853
+ except Exception as e:
854
+ judgeval_logger.warning(f"Error flushing processor: {e}")
855
+ return False
856
+
857
+ def _atexit_flush(self, timeout_millis: int = 30000) -> None:
850
858
  """Internal method called on program exit to flush remaining spans.
851
859
 
852
860
  This blocks until all spans are flushed or timeout is reached to ensure
853
861
  proper cleanup before program termination.
854
862
  """
855
863
  try:
856
- self.force_flush(timeout_millis=30000)
864
+ self.force_flush(timeout_millis=timeout_millis)
857
865
  except Exception as e:
858
866
  judgeval_logger.warning(f"Error during atexit flush: {e}")
859
867
 
868
+ @dont_throw
860
869
  def async_evaluate(
861
870
  self,
862
871
  /,
863
872
  *,
864
873
  scorer: Union[ExampleAPIScorerConfig, ExampleScorer],
865
874
  example: Example,
866
- model: str = JUDGMENT_DEFAULT_GPT_MODEL,
875
+ model: Optional[str] = None,
867
876
  sampling_rate: float = 1.0,
868
877
  ):
869
878
  if not self.enable_evaluation or not self.enable_monitoring:
@@ -884,6 +893,12 @@ class Tracer:
884
893
  )
885
894
  return
886
895
 
896
+ if model is None:
897
+ if scorer.model is None:
898
+ model = JUDGMENT_DEFAULT_GPT_MODEL
899
+ else:
900
+ model = scorer.model
901
+
887
902
  if sampling_rate < 0 or sampling_rate > 1:
888
903
  judgeval_logger.error(
889
904
  "Sampling rate must be between 0 and 1, got %s, skipping evaluation."
@@ -899,37 +914,32 @@ class Tracer:
899
914
  return
900
915
 
901
916
  span_context = self.get_current_span().get_span_context()
917
+ if span_context == INVALID_SPAN_CONTEXT:
918
+ judgeval_logger.warning(
919
+ "No span context was found for async_evaluate, skipping evaluation. Please make sure to use the @observe decorator on the function you are evaluating."
920
+ )
921
+ return
922
+
902
923
  trace_id = format(span_context.trace_id, "032x")
903
924
  span_id = format(span_context.span_id, "016x")
904
925
  hosted_scoring = isinstance(scorer, ExampleAPIScorerConfig) or (
905
926
  isinstance(scorer, ExampleScorer) and scorer.server_hosted
906
927
  )
907
- eval_run_name = f"async_evaluate_{span_id}" # note this name doesnt matter because we don't save the experiment only the example and scorer_data
928
+ eval_run = ExampleEvaluationRun(
929
+ project_name=self.project_name,
930
+ # note this name doesnt matter because we don't save the experiment only the example and scorer_data
931
+ eval_name=f"async_evaluate_{span_id}",
932
+ examples=[example],
933
+ scorers=[scorer],
934
+ model=model,
935
+ trace_span_id=span_id,
936
+ trace_id=trace_id,
937
+ )
908
938
  if hosted_scoring:
909
- eval_run = ExampleEvaluationRun(
910
- project_name=self.project_name,
911
- eval_name=eval_run_name,
912
- examples=[example],
913
- scorers=[scorer],
914
- model=model,
915
- trace_span_id=span_id,
916
- trace_id=trace_id,
917
- )
918
939
  self.api_client.add_to_run_eval_queue_examples(
919
- eval_run.model_dump(warnings=False)
920
- ) # type: ignore
921
- else:
922
- # Handle custom scorers using local evaluation queue
923
- eval_run = ExampleEvaluationRun(
924
- project_name=self.project_name,
925
- eval_name=eval_run_name,
926
- examples=[example],
927
- scorers=[scorer],
928
- model=model,
929
- trace_span_id=span_id,
930
- trace_id=trace_id,
940
+ eval_run.model_dump(warnings=False) # type: ignore
931
941
  )
932
-
942
+ else:
933
943
  # Enqueue the evaluation run to the local evaluation queue
934
944
  self.local_eval_queue.enqueue(eval_run)
935
945
 
@@ -971,19 +981,32 @@ class Tracer:
971
981
 
972
982
 
973
983
  def wrap(client: ApiClient) -> ApiClient:
974
- if not Tracer._active_tracers:
984
+ try:
985
+ tracer = Tracer.get_instance()
986
+ if tracer is None or not isinstance(tracer, Tracer):
987
+ warn(
988
+ "No Tracer instance found, client will not be wrapped. "
989
+ "Create a Tracer instance first.",
990
+ JudgmentWarning,
991
+ stacklevel=2,
992
+ )
993
+ return client
994
+ if not tracer._initialized:
995
+ warn(
996
+ "Tracer not initialized, client will not be wrapped. "
997
+ "Call Tracer.initialize() first to setup the tracer.",
998
+ JudgmentWarning,
999
+ stacklevel=2,
1000
+ )
1001
+ return client
1002
+ return tracer.wrap(client)
1003
+ except Exception:
975
1004
  warn(
976
- "No active tracers found, client will not be wrapped. "
977
- "You can use the global `wrap` function after creating a tracer instance. "
978
- "Or you can use the `wrap` method on the tracer instance to directly wrap the client. ",
1005
+ "Error accessing tracer singleton, client will not be wrapped.",
979
1006
  JudgmentWarning,
980
1007
  stacklevel=2,
981
1008
  )
982
-
983
- wrapped_client = client
984
- for tracer in Tracer._active_tracers:
985
- wrapped_client = tracer.wrap(wrapped_client)
986
- return wrapped_client
1009
+ return client
987
1010
 
988
1011
 
989
1012
  def format_inputs(
@@ -1010,11 +1033,7 @@ def format_inputs(
1010
1033
  return {}
1011
1034
 
1012
1035
 
1013
- # Export processor classes for direct access
1014
1036
  __all__ = [
1015
1037
  "Tracer",
1016
1038
  "wrap",
1017
- "JudgmentSpanProcessor",
1018
- "NoOpJudgmentSpanProcessor",
1019
- "NoOpSpanProcessor",
1020
1039
  ]
@@ -12,12 +12,15 @@ from judgeval.tracer.exporters.utils import deduplicate_spans
12
12
 
13
13
 
14
14
  class JudgmentSpanExporter(OTLPSpanExporter):
15
- def __init__(self, endpoint: str, api_key: str, organization_id: str):
15
+ def __init__(
16
+ self, endpoint: str, api_key: str, organization_id: str, project_id: str
17
+ ):
16
18
  super().__init__(
17
19
  endpoint=endpoint,
18
20
  headers={
19
21
  "Authorization": f"Bearer {api_key}",
20
22
  "X-Organization-Id": organization_id,
23
+ "X-Project-Id": project_id,
21
24
  },
22
25
  )
23
26
 
judgeval/tracer/keys.py CHANGED
@@ -2,49 +2,40 @@
2
2
  Identifiers used by Judgeval to store specific types of data in the spans.
3
3
  """
4
4
 
5
- from opentelemetry.semconv.resource import ResourceAttributes
6
- from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
7
5
  from enum import Enum
8
6
 
9
7
 
10
8
  class AttributeKeys(str, Enum):
11
- # General function tracing attributes (custom namespace)
12
9
  JUDGMENT_SPAN_KIND = "judgment.span_kind"
13
10
  JUDGMENT_INPUT = "judgment.input"
14
11
  JUDGMENT_OUTPUT = "judgment.output"
15
12
  JUDGMENT_OFFLINE_MODE = "judgment.offline_mode"
16
13
  JUDGMENT_UPDATE_ID = "judgment.update_id"
17
14
 
18
- # Custom tracking attributes
19
15
  JUDGMENT_CUSTOMER_ID = "judgment.customer_id"
20
16
 
21
- # Agent specific attributes (custom namespace)
22
17
  JUDGMENT_AGENT_ID = "judgment.agent_id"
23
18
  JUDGMENT_PARENT_AGENT_ID = "judgment.parent_agent_id"
24
19
  JUDGMENT_AGENT_CLASS_NAME = "judgment.agent_class_name"
25
20
  JUDGMENT_AGENT_INSTANCE_NAME = "judgment.agent_instance_name"
26
21
  JUDGMENT_IS_AGENT_ENTRY_POINT = "judgment.is_agent_entry_point"
27
- JUDGMENT_CUMULATIVE_LLM_COST = "judgment.cumulative_llm_cost"
28
22
  JUDGMENT_STATE_BEFORE = "judgment.state_before"
29
23
  JUDGMENT_STATE_AFTER = "judgment.state_after"
30
24
 
31
- # Evaluation-specific attributes (custom namespace)
32
25
  PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
33
26
 
34
- # GenAI-specific attributes (semantic conventions)
35
- GEN_AI_PROMPT = gen_ai_attributes.GEN_AI_PROMPT
36
- GEN_AI_COMPLETION = gen_ai_attributes.GEN_AI_COMPLETION
37
- GEN_AI_REQUEST_MODEL = gen_ai_attributes.GEN_AI_REQUEST_MODEL
38
- GEN_AI_RESPONSE_MODEL = gen_ai_attributes.GEN_AI_RESPONSE_MODEL
39
- GEN_AI_SYSTEM = gen_ai_attributes.GEN_AI_SYSTEM
40
- GEN_AI_USAGE_INPUT_TOKENS = gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS
41
- GEN_AI_USAGE_OUTPUT_TOKENS = gen_ai_attributes.GEN_AI_USAGE_OUTPUT_TOKENS
42
- GEN_AI_USAGE_COMPLETION_TOKENS = gen_ai_attributes.GEN_AI_USAGE_COMPLETION_TOKENS
43
- GEN_AI_REQUEST_TEMPERATURE = gen_ai_attributes.GEN_AI_REQUEST_TEMPERATURE
44
- GEN_AI_REQUEST_MAX_TOKENS = gen_ai_attributes.GEN_AI_REQUEST_MAX_TOKENS
45
- GEN_AI_RESPONSE_FINISH_REASONS = gen_ai_attributes.GEN_AI_RESPONSE_FINISH_REASONS
27
+ GEN_AI_PROMPT = "gen_ai.prompt"
28
+ GEN_AI_COMPLETION = "gen_ai.completion"
29
+ GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
30
+ GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
31
+ GEN_AI_SYSTEM = "gen_ai.system"
32
+ GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
33
+ GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
34
+ GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
35
+ GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
36
+ GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
37
+ GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
46
38
 
47
- # GenAI-specific attributes (custom namespace)
48
39
  GEN_AI_USAGE_TOTAL_COST = "gen_ai.usage.total_cost_usd"
49
40
 
50
41
 
@@ -54,14 +45,13 @@ class InternalAttributeKeys(str, Enum):
54
45
  These are NOT exported and are used only for internal span lifecycle management.
55
46
  """
56
47
 
57
- # Span control attributes
58
48
  DISABLE_PARTIAL_EMIT = "disable_partial_emit"
59
49
  CANCELLED = "cancelled"
60
50
 
61
51
 
62
52
  class ResourceKeys(str, Enum):
63
- SERVICE_NAME = ResourceAttributes.SERVICE_NAME
64
- TELEMETRY_SDK_LANGUAGE = ResourceAttributes.TELEMETRY_SDK_LANGUAGE
65
- TELEMETRY_SDK_NAME = ResourceAttributes.TELEMETRY_SDK_NAME
66
- TELEMETRY_SDK_VERSION = ResourceAttributes.TELEMETRY_SDK_VERSION
53
+ SERVICE_NAME = "service.name"
54
+ TELEMETRY_SDK_LANGUAGE = "telemetry.sdk.language"
55
+ TELEMETRY_SDK_NAME = "telemetry.sdk.name"
56
+ TELEMETRY_SDK_VERSION = "telemetry.sdk.version"
67
57
  JUDGMENT_PROJECT_ID = "judgment.project_id"
@@ -873,7 +873,6 @@ def _set_usage_attributes(span, usage: TraceUsage, tracer: Tracer):
873
873
  set_span_attribute(
874
874
  span, AttributeKeys.GEN_AI_USAGE_TOTAL_COST, usage.total_cost_usd
875
875
  )
876
- tracer.add_cost_to_current_context(usage.total_cost_usd)
877
876
 
878
877
 
879
878
  def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ HAS_ANTHROPIC = False
4
+ anthropic_Anthropic = None
5
+ anthropic_AsyncAnthropic = None
6
+
7
+ try:
8
+ from anthropic import Anthropic, AsyncAnthropic # type: ignore[import-untyped]
9
+
10
+ anthropic_Anthropic = Anthropic
11
+ anthropic_AsyncAnthropic = AsyncAnthropic
12
+ HAS_ANTHROPIC = True
13
+ except ImportError:
14
+ pass
15
+
16
+ __all__ = [
17
+ "HAS_ANTHROPIC",
18
+ "anthropic_Anthropic",
19
+ "anthropic_AsyncAnthropic",
20
+ ]
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ HAS_GOOGLE_GENAI = False
4
+ google_genai_Client = None
5
+ google_genai_AsyncClient = None
6
+
7
+ try:
8
+ from google.genai import Client # type: ignore[import-untyped]
9
+ from google.genai.client import AsyncClient # type: ignore[import-untyped]
10
+
11
+ google_genai_Client = Client
12
+ google_genai_AsyncClient = AsyncClient
13
+ HAS_GOOGLE_GENAI = True
14
+ except ImportError:
15
+ pass
16
+
17
+ __all__ = [
18
+ "HAS_GOOGLE_GENAI",
19
+ "google_genai_Client",
20
+ "google_genai_AsyncClient",
21
+ ]