openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +40 -15
- openlit/instrumentation/crewai/async_crewai.py +32 -7
- openlit/instrumentation/crewai/crewai.py +32 -7
- openlit/instrumentation/crewai/utils.py +159 -56
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +304 -102
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +657 -185
- openlit/instrumentation/openai_agents/__init__.py +5 -1
- openlit/instrumentation/openai_agents/processor.py +110 -90
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +72 -6
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.30.dist-info/RECORD +0 -168
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1100 @@
|
|
1
|
+
"""
|
2
|
+
OpenLIT LangChain Callback Handler for Hierarchical Span Creation
|
3
|
+
"""
|
4
|
+
|
5
|
+
import time
|
6
|
+
import json
|
7
|
+
from datetime import datetime, timezone
|
8
|
+
from typing import Any, Dict, List, Optional
|
9
|
+
from uuid import UUID
|
10
|
+
|
11
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
12
|
+
from langchain_core.messages import BaseMessage
|
13
|
+
from langchain_core.outputs import LLMResult, ChatGeneration, Generation
|
14
|
+
|
15
|
+
from opentelemetry import context as context_api
|
16
|
+
from opentelemetry.trace import SpanKind, set_span_in_context, Status, StatusCode
|
17
|
+
from opentelemetry.trace.span import Span
|
18
|
+
|
19
|
+
from openlit.__helpers import (
|
20
|
+
common_framework_span_attributes,
|
21
|
+
handle_exception,
|
22
|
+
get_chat_model_cost,
|
23
|
+
general_tokens,
|
24
|
+
)
|
25
|
+
from openlit.semcov import SemanticConvention
|
26
|
+
|
27
|
+
# Enhanced Provider Mapping (inspired by OpenInference)
|
28
|
+
LANGCHAIN_PROVIDER_MAP = {
|
29
|
+
"anthropic": "anthropic",
|
30
|
+
"azure": "azure",
|
31
|
+
"azure_ai": "azure",
|
32
|
+
"azure_openai": "azure",
|
33
|
+
"bedrock": "aws",
|
34
|
+
"bedrock_converse": "aws",
|
35
|
+
"cohere": "cohere",
|
36
|
+
"deepseek": "deepseek",
|
37
|
+
"fireworks": "fireworks",
|
38
|
+
"google": "google",
|
39
|
+
"google_anthropic_vertex": "google",
|
40
|
+
"google_genai": "google",
|
41
|
+
"google_vertexai": "google",
|
42
|
+
"groq": "groq",
|
43
|
+
"huggingface": "huggingface",
|
44
|
+
"ibm": "ibm",
|
45
|
+
"mistralai": "mistralai",
|
46
|
+
"nvidia": "nvidia",
|
47
|
+
"ollama": "ollama",
|
48
|
+
"openai": "openai",
|
49
|
+
"perplexity": "perplexity",
|
50
|
+
"together": "together",
|
51
|
+
"vertex": "google",
|
52
|
+
"vertexai": "google",
|
53
|
+
"xai": "xai",
|
54
|
+
}
|
55
|
+
|
56
|
+
|
57
|
+
class SpanHolder: # pylint: disable=too-few-public-methods
|
58
|
+
"""Holds span information and tracks relationships"""
|
59
|
+
|
60
|
+
def __init__(self, span: Span, start_time: float, context_token=None):
|
61
|
+
self.span = span
|
62
|
+
self.start_time = start_time
|
63
|
+
self.children: List[UUID] = []
|
64
|
+
self.workflow_name = ""
|
65
|
+
self.entity_name = ""
|
66
|
+
self.context_token = context_token # For context restoration
|
67
|
+
|
68
|
+
|
69
|
+
class OpenLITLangChainCallbackHandler(BaseCallbackHandler):
|
70
|
+
"""
|
71
|
+
Enhanced OpenLIT callback handler
|
72
|
+
"""
|
73
|
+
|
74
|
+
def __init__(
|
75
|
+
self,
|
76
|
+
tracer,
|
77
|
+
version,
|
78
|
+
environment,
|
79
|
+
application_name,
|
80
|
+
pricing_info,
|
81
|
+
capture_message_content,
|
82
|
+
metrics,
|
83
|
+
disable_metrics,
|
84
|
+
):
|
85
|
+
super().__init__()
|
86
|
+
self.tracer = tracer
|
87
|
+
self.version = version
|
88
|
+
self.environment = environment
|
89
|
+
self.application_name = application_name
|
90
|
+
self.pricing_info = pricing_info
|
91
|
+
self.capture_message_content = capture_message_content
|
92
|
+
self.metrics = metrics
|
93
|
+
self.disable_metrics = disable_metrics
|
94
|
+
|
95
|
+
# Track active spans by run_id
|
96
|
+
self.spans: Dict[UUID, SpanHolder] = {}
|
97
|
+
|
98
|
+
self.session_name = environment # Map environment to session
|
99
|
+
self.tags_enabled = True # Enable tagging system
|
100
|
+
self.events_enabled = True # Enable events tracking
|
101
|
+
self.performance_baselines = {} # Store performance baselines
|
102
|
+
self.error_classification_enabled = True # Enable error classification
|
103
|
+
|
104
|
+
# Required BaseCallbackHandler properties
|
105
|
+
@property
|
106
|
+
def raise_error(self) -> bool:
|
107
|
+
"""Should the handler raise errors instead of logging them."""
|
108
|
+
return False
|
109
|
+
|
110
|
+
@property
|
111
|
+
def run_inline(self) -> bool:
|
112
|
+
"""Should the handler run inline with the main thread."""
|
113
|
+
return True
|
114
|
+
|
115
|
+
# Ignore flags - all set to False so we capture everything
|
116
|
+
@property
|
117
|
+
def ignore_llm(self) -> bool:
|
118
|
+
return False
|
119
|
+
|
120
|
+
@property
|
121
|
+
def ignore_chain(self) -> bool:
|
122
|
+
return False
|
123
|
+
|
124
|
+
@property
|
125
|
+
def ignore_agent(self) -> bool:
|
126
|
+
return False
|
127
|
+
|
128
|
+
@property
|
129
|
+
def ignore_retriever(self) -> bool:
|
130
|
+
return False
|
131
|
+
|
132
|
+
@property
|
133
|
+
def ignore_chat_model(self) -> bool:
|
134
|
+
return False
|
135
|
+
|
136
|
+
def _get_span_name(self, serialized: Dict[str, Any], operation_type: str) -> str:
|
137
|
+
"""Generate OpenLIT-style span names following our naming convention"""
|
138
|
+
|
139
|
+
# Handle None serialized (common for RunnableSequence)
|
140
|
+
if not serialized:
|
141
|
+
return f"{operation_type} RunnableSequence"
|
142
|
+
|
143
|
+
# Extract class name for component identification
|
144
|
+
if "id" in serialized and serialized["id"]:
|
145
|
+
component_name = serialized["id"][-1] # Last part is usually the class name
|
146
|
+
elif "name" in serialized:
|
147
|
+
component_name = serialized["name"]
|
148
|
+
else:
|
149
|
+
component_name = "unknown"
|
150
|
+
|
151
|
+
# Follow OpenLIT naming: {operation_type} {component_name}
|
152
|
+
return f"{operation_type} {component_name}"
|
153
|
+
|
154
|
+
def _create_span(
|
155
|
+
self,
|
156
|
+
run_id: UUID,
|
157
|
+
parent_run_id: Optional[UUID],
|
158
|
+
span_name: str,
|
159
|
+
kind: SpanKind = SpanKind.CLIENT,
|
160
|
+
model_name: Optional[str] = None,
|
161
|
+
) -> Span:
|
162
|
+
"""Create a span with proper parent-child relationship and set as active context"""
|
163
|
+
|
164
|
+
# If we have a parent, create child span in parent context
|
165
|
+
if parent_run_id and parent_run_id in self.spans:
|
166
|
+
parent_span = self.spans[parent_run_id].span
|
167
|
+
span = self.tracer.start_span(
|
168
|
+
span_name, context=set_span_in_context(parent_span), kind=kind
|
169
|
+
)
|
170
|
+
# Track parent-child relationship
|
171
|
+
self.spans[parent_run_id].children.append(run_id)
|
172
|
+
else:
|
173
|
+
# Create root span
|
174
|
+
span = self.tracer.start_span(span_name, kind=kind)
|
175
|
+
|
176
|
+
# Set this span as the active context so downstream instrumentations (like OpenAI)
|
177
|
+
# will create child spans under it
|
178
|
+
span_context = set_span_in_context(span)
|
179
|
+
context_token = context_api.attach(span_context)
|
180
|
+
|
181
|
+
# Store span with start time and context token
|
182
|
+
start_time = time.time()
|
183
|
+
self.spans[run_id] = SpanHolder(span, start_time, context_token)
|
184
|
+
|
185
|
+
# Set common framework span attributes for consistency
|
186
|
+
scope = type("GenericScope", (), {})()
|
187
|
+
scope._span = span
|
188
|
+
scope._start_time = start_time
|
189
|
+
scope._end_time = None
|
190
|
+
|
191
|
+
# Create mock instance with model name for common_framework_span_attributes
|
192
|
+
mock_instance = None
|
193
|
+
if model_name:
|
194
|
+
mock_instance = type("MockInstance", (), {"model_name": model_name})()
|
195
|
+
|
196
|
+
common_framework_span_attributes(
|
197
|
+
scope,
|
198
|
+
SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN,
|
199
|
+
"localhost", # Default server address for LangChain
|
200
|
+
8080, # Default port
|
201
|
+
self.environment,
|
202
|
+
self.application_name,
|
203
|
+
self.version,
|
204
|
+
span_name,
|
205
|
+
mock_instance,
|
206
|
+
)
|
207
|
+
|
208
|
+
return span
|
209
|
+
|
210
|
+
def _end_span(self, run_id: UUID) -> None:
|
211
|
+
"""End span and all its children, restore context"""
|
212
|
+
if run_id not in self.spans:
|
213
|
+
return
|
214
|
+
|
215
|
+
span_holder = self.spans[run_id]
|
216
|
+
span = span_holder.span
|
217
|
+
|
218
|
+
# End all child spans first
|
219
|
+
for child_id in span_holder.children:
|
220
|
+
if child_id in self.spans:
|
221
|
+
self._end_span(child_id)
|
222
|
+
|
223
|
+
# Restore the previous context before ending span
|
224
|
+
if span_holder.context_token:
|
225
|
+
context_api.detach(span_holder.context_token)
|
226
|
+
|
227
|
+
# Update end time for duration calculation
|
228
|
+
end_time = time.time()
|
229
|
+
duration = end_time - span_holder.start_time
|
230
|
+
span.set_attribute(
|
231
|
+
SemanticConvention.GEN_AI_CLIENT_OPERATION_DURATION, duration
|
232
|
+
)
|
233
|
+
|
234
|
+
# End this span
|
235
|
+
span.set_status(Status(StatusCode.OK))
|
236
|
+
span.end()
|
237
|
+
|
238
|
+
# Clean up
|
239
|
+
del self.spans[run_id]
|
240
|
+
|
241
|
+
def _add_langsmith_events(
|
242
|
+
self, span: Span, event_type: str, data: Optional[Dict] = None
|
243
|
+
):
|
244
|
+
"""Add LangSmith-style events to spans"""
|
245
|
+
if not self.events_enabled:
|
246
|
+
return
|
247
|
+
|
248
|
+
try:
|
249
|
+
event_data = {
|
250
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
251
|
+
"event_type": event_type,
|
252
|
+
"session_name": self.session_name,
|
253
|
+
}
|
254
|
+
|
255
|
+
if data:
|
256
|
+
event_data.update(data)
|
257
|
+
|
258
|
+
span.add_event(f"langchain.{event_type}", event_data)
|
259
|
+
except Exception:
|
260
|
+
pass # Graceful degradation
|
261
|
+
|
262
|
+
def _classify_error(self, error: Exception) -> str:
|
263
|
+
"""Classify errors like LangSmith does"""
|
264
|
+
error_type = type(error).__name__
|
265
|
+
|
266
|
+
# LangSmith-style error classification
|
267
|
+
if "rate" in str(error).lower() or "429" in str(error):
|
268
|
+
return "RATE_LIMIT_ERROR"
|
269
|
+
elif "timeout" in str(error).lower():
|
270
|
+
return "TIMEOUT_ERROR"
|
271
|
+
elif "auth" in str(error).lower() or "401" in str(error):
|
272
|
+
return "AUTH_ERROR"
|
273
|
+
elif "not found" in str(error).lower() or "404" in str(error):
|
274
|
+
return "NOT_FOUND_ERROR"
|
275
|
+
elif "connection" in str(error).lower():
|
276
|
+
return "CONNECTION_ERROR"
|
277
|
+
elif "validation" in str(error).lower():
|
278
|
+
return "VALIDATION_ERROR"
|
279
|
+
else:
|
280
|
+
return f"GENERAL_ERROR_{error_type}"
|
281
|
+
|
282
|
+
def _add_tags_from_context(self, span: Span, run_id: UUID, **kwargs):
|
283
|
+
"""Add LangSmith-style tags to spans"""
|
284
|
+
if not self.tags_enabled:
|
285
|
+
return
|
286
|
+
|
287
|
+
try:
|
288
|
+
tags = []
|
289
|
+
|
290
|
+
# Auto-generate tags based on context
|
291
|
+
if "model" in kwargs:
|
292
|
+
tags.append(f"model:{kwargs['model']}")
|
293
|
+
if "temperature" in kwargs:
|
294
|
+
tags.append(f"temperature:{kwargs['temperature']}")
|
295
|
+
if hasattr(kwargs.get("invocation_params", {}), "stream"):
|
296
|
+
tags.append(
|
297
|
+
f"streaming:{kwargs['invocation_params'].get('stream', False)}"
|
298
|
+
)
|
299
|
+
|
300
|
+
# Add session tag
|
301
|
+
tags.append(f"session:{self.session_name}")
|
302
|
+
|
303
|
+
# Add environment tag
|
304
|
+
tags.append(f"env:{self.environment}")
|
305
|
+
|
306
|
+
if tags:
|
307
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_TAGS, tags)
|
308
|
+
|
309
|
+
except Exception:
|
310
|
+
pass # Graceful degradation
|
311
|
+
|
312
|
+
def _track_performance_baseline(
|
313
|
+
self, span: Span, operation_name: str, duration_ms: float
|
314
|
+
):
|
315
|
+
"""Track performance against baselines like LangSmith"""
|
316
|
+
try:
|
317
|
+
# Store baseline if not exists
|
318
|
+
if operation_name not in self.performance_baselines:
|
319
|
+
self.performance_baselines[operation_name] = {
|
320
|
+
"avg_duration": duration_ms,
|
321
|
+
"min_duration": duration_ms,
|
322
|
+
"max_duration": duration_ms,
|
323
|
+
"count": 1,
|
324
|
+
}
|
325
|
+
else:
|
326
|
+
baseline = self.performance_baselines[operation_name]
|
327
|
+
baseline["count"] += 1
|
328
|
+
baseline["avg_duration"] = (
|
329
|
+
baseline["avg_duration"] * (baseline["count"] - 1) + duration_ms
|
330
|
+
) / baseline["count"]
|
331
|
+
baseline["min_duration"] = min(baseline["min_duration"], duration_ms)
|
332
|
+
baseline["max_duration"] = max(baseline["max_duration"], duration_ms)
|
333
|
+
|
334
|
+
# Add performance comparison attributes
|
335
|
+
baseline = self.performance_baselines[operation_name]
|
336
|
+
span.set_attribute(
|
337
|
+
SemanticConvention.GEN_AI_FRAMEWORK_PERFORMANCE_VS_BASELINE,
|
338
|
+
duration_ms / baseline["avg_duration"],
|
339
|
+
)
|
340
|
+
span.set_attribute(
|
341
|
+
SemanticConvention.GEN_AI_FRAMEWORK_PERFORMANCE_BASELINE_AVG,
|
342
|
+
baseline["avg_duration"],
|
343
|
+
)
|
344
|
+
span.set_attribute(
|
345
|
+
SemanticConvention.GEN_AI_FRAMEWORK_PERFORMANCE_BASELINE_PERCENTILE,
|
346
|
+
_calculate_percentile(duration_ms, baseline),
|
347
|
+
)
|
348
|
+
|
349
|
+
except Exception:
|
350
|
+
pass # Graceful degradation
|
351
|
+
|
352
|
+
# Enhanced callback methods with new features
|
353
|
+
def on_llm_start(
|
354
|
+
self,
|
355
|
+
serialized: Dict[str, Any],
|
356
|
+
prompts: List[str],
|
357
|
+
*,
|
358
|
+
run_id: UUID,
|
359
|
+
parent_run_id: Optional[UUID] = None,
|
360
|
+
**kwargs: Any,
|
361
|
+
) -> None:
|
362
|
+
"""Called when an LLM starts"""
|
363
|
+
|
364
|
+
try:
|
365
|
+
# Create workflow span for chain operations
|
366
|
+
span_name = self._get_span_name(serialized, "workflow")
|
367
|
+
span = self._create_span(
|
368
|
+
run_id, parent_run_id, span_name, SpanKind.INTERNAL
|
369
|
+
)
|
370
|
+
|
371
|
+
# Set OpenLIT attributes
|
372
|
+
span.set_attribute(
|
373
|
+
SemanticConvention.GEN_AI_SYSTEM,
|
374
|
+
SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN,
|
375
|
+
)
|
376
|
+
span.set_attribute(
|
377
|
+
SemanticConvention.GEN_AI_OPERATION,
|
378
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
|
379
|
+
)
|
380
|
+
|
381
|
+
# Set workflow-specific attributes
|
382
|
+
if serialized and "id" in serialized and serialized["id"]:
|
383
|
+
span.set_attribute(
|
384
|
+
SemanticConvention.GEN_AI_WORKFLOW_TYPE, serialized["id"][-1]
|
385
|
+
)
|
386
|
+
else:
|
387
|
+
span.set_attribute(
|
388
|
+
SemanticConvention.GEN_AI_WORKFLOW_TYPE, "RunnableSequence"
|
389
|
+
)
|
390
|
+
|
391
|
+
# LangSmith-style serialized function capture
|
392
|
+
self._capture_serialized_info(span, serialized)
|
393
|
+
|
394
|
+
# Capture input if enabled (with safe JSON serialization)
|
395
|
+
if self.capture_message_content:
|
396
|
+
try:
|
397
|
+
input_str = json.dumps(prompts, default=str)[:1000]
|
398
|
+
span.set_attribute(
|
399
|
+
SemanticConvention.GEN_AI_WORKFLOW_INPUT, input_str
|
400
|
+
)
|
401
|
+
except Exception:
|
402
|
+
span.set_attribute(
|
403
|
+
SemanticConvention.GEN_AI_WORKFLOW_INPUT, str(prompts)[:1000]
|
404
|
+
)
|
405
|
+
|
406
|
+
# Framework enhancements - use attributes only (not events for non-chat operations)
|
407
|
+
|
408
|
+
self._add_tags_from_context(span, run_id, **kwargs)
|
409
|
+
|
410
|
+
# Additional metadata is captured through semantic conventions above
|
411
|
+
|
412
|
+
# Extract provider information (OpenInference-inspired enhancement)
|
413
|
+
self._extract_provider_info(span, **kwargs)
|
414
|
+
|
415
|
+
except Exception:
|
416
|
+
# Graceful error handling to prevent callback system failure
|
417
|
+
pass
|
418
|
+
|
419
|
+
def on_llm_end(
|
420
|
+
self,
|
421
|
+
response: LLMResult,
|
422
|
+
*,
|
423
|
+
run_id: UUID,
|
424
|
+
parent_run_id: Optional[UUID] = None,
|
425
|
+
**kwargs: Any,
|
426
|
+
) -> None:
|
427
|
+
"""Called when an LLM call ends"""
|
428
|
+
|
429
|
+
try:
|
430
|
+
if run_id not in self.spans:
|
431
|
+
return
|
432
|
+
|
433
|
+
span_holder = self.spans[run_id]
|
434
|
+
span = span_holder.span
|
435
|
+
|
436
|
+
# Process LLM response with OpenLIT's business intelligence
|
437
|
+
self._process_llm_response(span, response, run_id)
|
438
|
+
|
439
|
+
# Duration is set in _end_span method
|
440
|
+
|
441
|
+
# NEW: Add performance baseline tracking
|
442
|
+
if span_holder:
|
443
|
+
duration_ms = (time.time() - span_holder.start_time) * 1000
|
444
|
+
self._track_performance_baseline(span, span.name, duration_ms)
|
445
|
+
|
446
|
+
# Framework completion - use attributes only (not events for non-chat operations)
|
447
|
+
|
448
|
+
self._end_span(run_id)
|
449
|
+
|
450
|
+
except Exception:
|
451
|
+
# Graceful error handling
|
452
|
+
pass
|
453
|
+
|
454
|
+
def on_llm_error(
|
455
|
+
self,
|
456
|
+
error: Exception,
|
457
|
+
*,
|
458
|
+
run_id: UUID,
|
459
|
+
parent_run_id: Optional[UUID] = None,
|
460
|
+
**kwargs: Any,
|
461
|
+
) -> None:
|
462
|
+
"""Called when an LLM call ends with an error"""
|
463
|
+
|
464
|
+
try:
|
465
|
+
if run_id not in self.spans:
|
466
|
+
return
|
467
|
+
|
468
|
+
span_holder = self.spans[run_id]
|
469
|
+
span = span_holder.span
|
470
|
+
|
471
|
+
# NEW: Enhanced error classification and tracking
|
472
|
+
# Framework error classification
|
473
|
+
error_class = self._classify_error(error)
|
474
|
+
span.set_attribute(
|
475
|
+
SemanticConvention.GEN_AI_FRAMEWORK_ERROR_CLASS, error_class
|
476
|
+
)
|
477
|
+
span.set_attribute(
|
478
|
+
SemanticConvention.GEN_AI_FRAMEWORK_ERROR_TYPE, type(error).__name__
|
479
|
+
)
|
480
|
+
span.set_attribute(
|
481
|
+
SemanticConvention.GEN_AI_FRAMEWORK_ERROR_MESSAGE, str(error)
|
482
|
+
)
|
483
|
+
|
484
|
+
# Framework error - use attributes only (not events for non-chat operations)
|
485
|
+
|
486
|
+
# Set error status
|
487
|
+
span.set_status(Status(StatusCode.ERROR, str(error)))
|
488
|
+
span.record_exception(error)
|
489
|
+
|
490
|
+
self._end_span(run_id)
|
491
|
+
|
492
|
+
except Exception:
|
493
|
+
# Graceful error handling
|
494
|
+
pass
|
495
|
+
|
496
|
+
def on_chain_start(
|
497
|
+
self,
|
498
|
+
serialized: Dict[str, Any],
|
499
|
+
inputs: Dict[str, Any],
|
500
|
+
*,
|
501
|
+
run_id: UUID,
|
502
|
+
parent_run_id: Optional[UUID] = None,
|
503
|
+
tags: Optional[List[str]] = None,
|
504
|
+
metadata: Optional[Dict[str, Any]] = None,
|
505
|
+
**kwargs: Any,
|
506
|
+
) -> None:
|
507
|
+
"""Called when a chain (RunnableSequence, etc.) starts"""
|
508
|
+
|
509
|
+
try:
|
510
|
+
# Create workflow span for chain operations
|
511
|
+
span_name = self._get_span_name(serialized, "workflow")
|
512
|
+
span = self._create_span(
|
513
|
+
run_id, parent_run_id, span_name, SpanKind.INTERNAL
|
514
|
+
)
|
515
|
+
|
516
|
+
# Set OpenLIT attributes
|
517
|
+
span.set_attribute(
|
518
|
+
SemanticConvention.GEN_AI_SYSTEM,
|
519
|
+
SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN,
|
520
|
+
)
|
521
|
+
span.set_attribute(
|
522
|
+
SemanticConvention.GEN_AI_OPERATION,
|
523
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
|
524
|
+
)
|
525
|
+
|
526
|
+
# Set workflow-specific attributes
|
527
|
+
if serialized and "id" in serialized and serialized["id"]:
|
528
|
+
span.set_attribute(
|
529
|
+
SemanticConvention.GEN_AI_WORKFLOW_TYPE, serialized["id"][-1]
|
530
|
+
)
|
531
|
+
else:
|
532
|
+
span.set_attribute(
|
533
|
+
SemanticConvention.GEN_AI_WORKFLOW_TYPE, "RunnableSequence"
|
534
|
+
)
|
535
|
+
|
536
|
+
# LangSmith-style serialized function capture
|
537
|
+
self._capture_serialized_info(span, serialized)
|
538
|
+
|
539
|
+
# Capture input if enabled (with safe JSON serialization)
|
540
|
+
if self.capture_message_content:
|
541
|
+
try:
|
542
|
+
input_str = json.dumps(inputs, default=str)[:1000]
|
543
|
+
span.set_attribute(
|
544
|
+
SemanticConvention.GEN_AI_WORKFLOW_INPUT, input_str
|
545
|
+
)
|
546
|
+
except Exception:
|
547
|
+
span.set_attribute(
|
548
|
+
SemanticConvention.GEN_AI_WORKFLOW_INPUT, str(inputs)[:1000]
|
549
|
+
)
|
550
|
+
|
551
|
+
# Enhanced chain tracking - use attributes only (not events for non-chat operations)
|
552
|
+
|
553
|
+
# Add chain-specific tags
|
554
|
+
chain_tags = [f"chain_type:{serialized.get('id', ['unknown'])[-1]}"]
|
555
|
+
if inputs:
|
556
|
+
chain_tags.append(f"input_count:{len(inputs)}")
|
557
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_TAGS, chain_tags)
|
558
|
+
|
559
|
+
except Exception:
|
560
|
+
# Graceful error handling to prevent callback system failure
|
561
|
+
pass
|
562
|
+
|
563
|
+
def on_chain_end(
|
564
|
+
self,
|
565
|
+
outputs: Dict[str, Any],
|
566
|
+
*,
|
567
|
+
run_id: UUID,
|
568
|
+
parent_run_id: Optional[UUID] = None,
|
569
|
+
**kwargs: Any,
|
570
|
+
) -> None:
|
571
|
+
"""Called when a chain ends"""
|
572
|
+
|
573
|
+
try:
|
574
|
+
if run_id not in self.spans:
|
575
|
+
return
|
576
|
+
|
577
|
+
span = self.spans[run_id].span
|
578
|
+
|
579
|
+
# Capture output if enabled (with safe JSON serialization)
|
580
|
+
if self.capture_message_content:
|
581
|
+
try:
|
582
|
+
output_str = json.dumps(outputs, default=str)[:1000]
|
583
|
+
span.set_attribute(
|
584
|
+
SemanticConvention.GEN_AI_WORKFLOW_OUTPUT, output_str
|
585
|
+
)
|
586
|
+
except Exception:
|
587
|
+
span.set_attribute(
|
588
|
+
SemanticConvention.GEN_AI_WORKFLOW_OUTPUT, str(outputs)[:1000]
|
589
|
+
)
|
590
|
+
|
591
|
+
# Duration is set in _end_span method
|
592
|
+
|
593
|
+
self._end_span(run_id)
|
594
|
+
|
595
|
+
except Exception:
|
596
|
+
# Graceful error handling
|
597
|
+
pass
|
598
|
+
|
599
|
+
def on_chat_model_start(
|
600
|
+
self,
|
601
|
+
serialized: Dict[str, Any],
|
602
|
+
messages: List[List[BaseMessage]],
|
603
|
+
*,
|
604
|
+
run_id: UUID,
|
605
|
+
parent_run_id: Optional[UUID] = None,
|
606
|
+
tags: Optional[List[str]] = None,
|
607
|
+
metadata: Optional[Dict[str, Any]] = None,
|
608
|
+
**kwargs: Any,
|
609
|
+
) -> None:
|
610
|
+
"""Called when a chat model (ChatOpenAI, etc.) starts"""
|
611
|
+
|
612
|
+
try:
|
613
|
+
# Extract model name from multiple sources with fallback chain
|
614
|
+
model_name = "unknown"
|
615
|
+
|
616
|
+
# DEBUG: Log serialized data to understand structure
|
617
|
+
# This helps us improve model extraction logic
|
618
|
+
# print(f"DEBUG: serialized={serialized}")
|
619
|
+
# print(f"DEBUG: kwargs={kwargs}")
|
620
|
+
|
621
|
+
# Try extracting from serialized kwargs first (most reliable)
|
622
|
+
if (
|
623
|
+
serialized
|
624
|
+
and "kwargs" in serialized
|
625
|
+
and "model" in serialized["kwargs"]
|
626
|
+
):
|
627
|
+
model_name = serialized["kwargs"]["model"]
|
628
|
+
elif kwargs.get("model"):
|
629
|
+
model_name = kwargs["model"]
|
630
|
+
elif serialized:
|
631
|
+
# Try extracting from different parts of serialized data
|
632
|
+
# LangChain often stores model info in various locations
|
633
|
+
if "model" in serialized:
|
634
|
+
model_name = serialized["model"]
|
635
|
+
elif "model_name" in serialized:
|
636
|
+
model_name = serialized["model_name"]
|
637
|
+
elif (
|
638
|
+
"name" in serialized and "model" in str(serialized["name"]).lower()
|
639
|
+
):
|
640
|
+
model_name = serialized["name"]
|
641
|
+
elif "id" in serialized and serialized["id"]:
|
642
|
+
# Extract from class identifier - this gives us the framework being used
|
643
|
+
class_info = serialized["id"]
|
644
|
+
if isinstance(class_info, list) and len(class_info) > 0:
|
645
|
+
class_name = class_info[
|
646
|
+
-1
|
647
|
+
] # Last part is usually the class name
|
648
|
+
# Infer model based on LangChain provider class
|
649
|
+
if "chatopenai" in class_name.lower():
|
650
|
+
model_name = "gpt-3.5-turbo" # Default OpenAI model
|
651
|
+
elif "chatanthropic" in class_name.lower():
|
652
|
+
model_name = "claude-3"
|
653
|
+
elif (
|
654
|
+
"chatgooglevertexai" in class_name.lower()
|
655
|
+
or "chatgoogleai" in class_name.lower()
|
656
|
+
):
|
657
|
+
model_name = "gemini-pro"
|
658
|
+
elif "llama" in class_name.lower():
|
659
|
+
model_name = "llama-2"
|
660
|
+
else:
|
661
|
+
# Use the class name itself as model identifier
|
662
|
+
model_name = class_name
|
663
|
+
|
664
|
+
# Create chat span with model name
|
665
|
+
span_name = f"chat {model_name}"
|
666
|
+
span = self._create_span(
|
667
|
+
run_id, parent_run_id, span_name, SpanKind.CLIENT, model_name
|
668
|
+
)
|
669
|
+
|
670
|
+
# Set OpenLIT chat operation attributes
|
671
|
+
span.set_attribute(
|
672
|
+
SemanticConvention.GEN_AI_SYSTEM, "openai"
|
673
|
+
) # Most common
|
674
|
+
span.set_attribute(
|
675
|
+
SemanticConvention.GEN_AI_OPERATION,
|
676
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
677
|
+
)
|
678
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, model_name)
|
679
|
+
|
680
|
+
# Streaming detection
|
681
|
+
is_streaming = kwargs.get("stream", False) or kwargs.get("streaming", False)
|
682
|
+
span.set_attribute(
|
683
|
+
SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_streaming
|
684
|
+
)
|
685
|
+
|
686
|
+
# Process messages for content capture and token counting
|
687
|
+
if messages and len(messages) > 0:
|
688
|
+
formatted_messages = self._format_messages(messages[0])
|
689
|
+
|
690
|
+
if self.capture_message_content:
|
691
|
+
span.set_attribute(
|
692
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT, formatted_messages
|
693
|
+
)
|
694
|
+
|
695
|
+
# Calculate input tokens
|
696
|
+
input_tokens = general_tokens(formatted_messages)
|
697
|
+
span.set_attribute(
|
698
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
|
699
|
+
)
|
700
|
+
|
701
|
+
except Exception:
|
702
|
+
# Graceful error handling
|
703
|
+
pass
|
704
|
+
|
705
|
+
def _format_messages(self, messages: List[BaseMessage]) -> str:
|
706
|
+
"""Format LangChain messages for content capture"""
|
707
|
+
formatted = []
|
708
|
+
for message in messages:
|
709
|
+
role = self._get_message_role(message)
|
710
|
+
content = getattr(message, "content", str(message))
|
711
|
+
formatted.append(f"{role}: {content}")
|
712
|
+
return "\n".join(formatted)
|
713
|
+
|
714
|
+
def _get_message_role(self, message: BaseMessage) -> str:
|
715
|
+
"""Extract role from LangChain message"""
|
716
|
+
message_type = message.__class__.__name__.lower()
|
717
|
+
if "human" in message_type:
|
718
|
+
return "user"
|
719
|
+
elif "ai" in message_type:
|
720
|
+
return "assistant"
|
721
|
+
elif "system" in message_type:
|
722
|
+
return "system"
|
723
|
+
elif "tool" in message_type:
|
724
|
+
return "tool"
|
725
|
+
else:
|
726
|
+
return "user"
|
727
|
+
|
728
|
+
def _process_llm_response(
|
729
|
+
self, span: Span, response: LLMResult, run_id: UUID
|
730
|
+
) -> None:
|
731
|
+
"""Process LLM response with OpenLIT's comprehensive business intelligence"""
|
732
|
+
|
733
|
+
try:
|
734
|
+
# Extract response content
|
735
|
+
if response.generations and len(response.generations) > 0:
|
736
|
+
generation = response.generations[0][0]
|
737
|
+
|
738
|
+
if isinstance(generation, ChatGeneration):
|
739
|
+
completion_content = generation.message.content
|
740
|
+
elif isinstance(generation, Generation):
|
741
|
+
completion_content = generation.text
|
742
|
+
else:
|
743
|
+
completion_content = str(generation)
|
744
|
+
|
745
|
+
# Set completion content
|
746
|
+
if self.capture_message_content:
|
747
|
+
span.set_attribute(
|
748
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion_content
|
749
|
+
)
|
750
|
+
|
751
|
+
# Calculate output tokens
|
752
|
+
output_tokens = general_tokens(completion_content)
|
753
|
+
span.set_attribute(
|
754
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens
|
755
|
+
)
|
756
|
+
|
757
|
+
# Use enhanced token extraction method (inspired by OpenInference)
|
758
|
+
self._extract_token_usage(span, response)
|
759
|
+
|
760
|
+
# Extract additional LLM output information
|
761
|
+
if hasattr(response, "llm_output") and response.llm_output:
|
762
|
+
llm_output = response.llm_output
|
763
|
+
|
764
|
+
# Model information
|
765
|
+
if "model_name" in llm_output:
|
766
|
+
span.set_attribute(
|
767
|
+
SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
768
|
+
llm_output["model_name"],
|
769
|
+
)
|
770
|
+
|
771
|
+
# Calculate cost using OpenLIT's cost tracking
|
772
|
+
input_tokens = span.attributes.get(
|
773
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, 0
|
774
|
+
)
|
775
|
+
output_tokens = span.attributes.get(
|
776
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, 0
|
777
|
+
)
|
778
|
+
model_name = span.attributes.get(
|
779
|
+
SemanticConvention.GEN_AI_REQUEST_MODEL, ""
|
780
|
+
)
|
781
|
+
|
782
|
+
if input_tokens and output_tokens and model_name:
|
783
|
+
cost = get_chat_model_cost(
|
784
|
+
model_name, self.pricing_info, input_tokens, output_tokens
|
785
|
+
)
|
786
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
787
|
+
|
788
|
+
except Exception as e:
|
789
|
+
handle_exception(span, e)
|
790
|
+
|
791
|
+
def _extract_token_usage(self, span: Span, result: LLMResult) -> None:
|
792
|
+
"""Extract comprehensive token usage with OpenInference-style parsing"""
|
793
|
+
try:
|
794
|
+
# Try multiple token usage extraction patterns
|
795
|
+
token_usage = None
|
796
|
+
|
797
|
+
# Pattern 1: Standard non-streaming (OpenAI, most providers)
|
798
|
+
if hasattr(result, "llm_output") and result.llm_output:
|
799
|
+
token_usage = result.llm_output.get(
|
800
|
+
"token_usage"
|
801
|
+
) or result.llm_output.get("usage")
|
802
|
+
|
803
|
+
# Pattern 2: Streaming outputs (when stream_usage=True)
|
804
|
+
if not token_usage and result.generations:
|
805
|
+
try:
|
806
|
+
first_gen = result.generations[0][0]
|
807
|
+
if hasattr(first_gen, "message") and hasattr(
|
808
|
+
first_gen.message, "kwargs"
|
809
|
+
):
|
810
|
+
token_usage = first_gen.message.kwargs.get("usage_metadata")
|
811
|
+
except (IndexError, AttributeError):
|
812
|
+
pass
|
813
|
+
|
814
|
+
# Pattern 3: VertexAI-specific (generation_info.usage_metadata)
|
815
|
+
if not token_usage and result.generations:
|
816
|
+
try:
|
817
|
+
first_gen = result.generations[0][0]
|
818
|
+
if (
|
819
|
+
hasattr(first_gen, "generation_info")
|
820
|
+
and first_gen.generation_info
|
821
|
+
):
|
822
|
+
token_usage = first_gen.generation_info.get("usage_metadata")
|
823
|
+
except (IndexError, AttributeError):
|
824
|
+
pass
|
825
|
+
|
826
|
+
if token_usage:
|
827
|
+
# Support multiple token field names from different providers
|
828
|
+
input_tokens = (
|
829
|
+
token_usage.get("prompt_tokens")
|
830
|
+
or token_usage.get("input_tokens") # Anthropic
|
831
|
+
or token_usage.get("prompt_token_count")
|
832
|
+
) # Gemini
|
833
|
+
|
834
|
+
output_tokens = (
|
835
|
+
token_usage.get("completion_tokens")
|
836
|
+
or token_usage.get("output_tokens") # Anthropic
|
837
|
+
or token_usage.get("candidates_token_count")
|
838
|
+
) # Gemini
|
839
|
+
|
840
|
+
total_tokens = token_usage.get("total_tokens") or token_usage.get(
|
841
|
+
"total_token_count"
|
842
|
+
) # Gemini
|
843
|
+
|
844
|
+
if input_tokens:
|
845
|
+
span.set_attribute(
|
846
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
|
847
|
+
)
|
848
|
+
if output_tokens:
|
849
|
+
span.set_attribute(
|
850
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens
|
851
|
+
)
|
852
|
+
if total_tokens:
|
853
|
+
span.set_attribute(
|
854
|
+
SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens
|
855
|
+
)
|
856
|
+
|
857
|
+
# Enhanced token details (OpenAI-specific)
|
858
|
+
if details := token_usage.get("completion_tokens_details"):
|
859
|
+
if audio_tokens := details.get("audio_tokens"):
|
860
|
+
span.set_attribute(
|
861
|
+
SemanticConvention.GEN_AI_USAGE_COMPLETION_TOKENS_DETAILS_AUDIO,
|
862
|
+
audio_tokens,
|
863
|
+
)
|
864
|
+
if reasoning_tokens := details.get("reasoning_tokens"):
|
865
|
+
span.set_attribute(
|
866
|
+
SemanticConvention.GEN_AI_USAGE_COMPLETION_TOKENS_DETAILS_REASONING,
|
867
|
+
reasoning_tokens,
|
868
|
+
)
|
869
|
+
|
870
|
+
if details := token_usage.get("prompt_tokens_details"):
|
871
|
+
if cached_tokens := details.get("cached_tokens"):
|
872
|
+
span.set_attribute(
|
873
|
+
SemanticConvention.GEN_AI_USAGE_PROMPT_TOKENS_DETAILS_CACHE_READ,
|
874
|
+
cached_tokens,
|
875
|
+
)
|
876
|
+
if audio_tokens := details.get("audio_tokens"):
|
877
|
+
span.set_attribute(
|
878
|
+
SemanticConvention.GEN_AI_USAGE_PROMPT_TOKENS_DETAILS_CACHE_READ,
|
879
|
+
audio_tokens,
|
880
|
+
)
|
881
|
+
|
882
|
+
# Anthropic cache tokens
|
883
|
+
if cache_read := token_usage.get("cache_read_input_tokens"):
|
884
|
+
span.set_attribute(
|
885
|
+
SemanticConvention.GEN_AI_USAGE_PROMPT_TOKENS_DETAILS_CACHE_READ,
|
886
|
+
cache_read,
|
887
|
+
)
|
888
|
+
if cache_write := token_usage.get("cache_creation_input_tokens"):
|
889
|
+
span.set_attribute(
|
890
|
+
SemanticConvention.GEN_AI_USAGE_PROMPT_TOKENS_DETAILS_CACHE_WRITE,
|
891
|
+
cache_write,
|
892
|
+
)
|
893
|
+
|
894
|
+
except Exception as e:
|
895
|
+
handle_exception(span, e)
|
896
|
+
|
897
|
+
def _extract_provider_info(self, span: Span, **kwargs) -> None:
|
898
|
+
"""Extract provider information with OpenInference-style mapping"""
|
899
|
+
try:
|
900
|
+
# Extract from metadata if available
|
901
|
+
if "metadata" in kwargs:
|
902
|
+
metadata = kwargs["metadata"]
|
903
|
+
if isinstance(metadata, dict) and "ls_provider" in metadata:
|
904
|
+
provider = metadata["ls_provider"].lower()
|
905
|
+
mapped_provider = LANGCHAIN_PROVIDER_MAP.get(provider, provider)
|
906
|
+
span.set_attribute(
|
907
|
+
SemanticConvention.GEN_AI_SYSTEM, mapped_provider
|
908
|
+
)
|
909
|
+
span.set_attribute(
|
910
|
+
SemanticConvention.GEN_AI_REQUEST_PROVIDER, mapped_provider
|
911
|
+
)
|
912
|
+
return
|
913
|
+
|
914
|
+
# Extract from invocation parameters
|
915
|
+
if "invocation_params" in kwargs:
|
916
|
+
params = kwargs["invocation_params"]
|
917
|
+
if isinstance(params, dict):
|
918
|
+
# Look for provider indicators in model names
|
919
|
+
model = params.get("model") or params.get("model_name", "")
|
920
|
+
if model:
|
921
|
+
if "gpt-" in model or "o1-" in model:
|
922
|
+
span.set_attribute(
|
923
|
+
SemanticConvention.GEN_AI_SYSTEM, "openai"
|
924
|
+
)
|
925
|
+
elif "claude-" in model:
|
926
|
+
span.set_attribute(
|
927
|
+
SemanticConvention.GEN_AI_SYSTEM, "anthropic"
|
928
|
+
)
|
929
|
+
elif "gemini-" in model or "bison-" in model:
|
930
|
+
span.set_attribute(
|
931
|
+
SemanticConvention.GEN_AI_SYSTEM, "google"
|
932
|
+
)
|
933
|
+
elif "mistral-" in model:
|
934
|
+
span.set_attribute(
|
935
|
+
SemanticConvention.GEN_AI_SYSTEM, "mistralai"
|
936
|
+
)
|
937
|
+
|
938
|
+
except Exception as e:
|
939
|
+
handle_exception(span, e)
|
940
|
+
|
941
|
+
def _capture_serialized_info(self, span: Span, serialized: Dict[str, Any]) -> None:
|
942
|
+
"""Capture LangSmith-style serialized function information"""
|
943
|
+
if not serialized:
|
944
|
+
return
|
945
|
+
|
946
|
+
try:
|
947
|
+
# Capture function name (LangSmith enhancement)
|
948
|
+
if "name" in serialized and serialized["name"]:
|
949
|
+
span.set_attribute(
|
950
|
+
SemanticConvention.GEN_AI_SERIALIZED_NAME, serialized["name"]
|
951
|
+
)
|
952
|
+
|
953
|
+
# Capture function signature if available
|
954
|
+
if "signature" in serialized and serialized["signature"]:
|
955
|
+
span.set_attribute(
|
956
|
+
SemanticConvention.GEN_AI_SERIALIZED_SIGNATURE,
|
957
|
+
str(serialized["signature"])[:500],
|
958
|
+
)
|
959
|
+
|
960
|
+
# Capture docstring if available
|
961
|
+
if "doc" in serialized and serialized["doc"]:
|
962
|
+
span.set_attribute(
|
963
|
+
SemanticConvention.GEN_AI_SERIALIZED_DOC,
|
964
|
+
str(serialized["doc"])[:200],
|
965
|
+
)
|
966
|
+
|
967
|
+
# Capture module information
|
968
|
+
if "id" in serialized and isinstance(serialized["id"], list):
|
969
|
+
module_path = (
|
970
|
+
".".join(serialized["id"][:-1]) if len(serialized["id"]) > 1 else ""
|
971
|
+
)
|
972
|
+
if module_path:
|
973
|
+
span.set_attribute(
|
974
|
+
SemanticConvention.GEN_AI_SERIALIZED_MODULE, module_path
|
975
|
+
)
|
976
|
+
|
977
|
+
except Exception:
|
978
|
+
# Graceful failure for serialized info capture
|
979
|
+
pass
|
980
|
+
|
981
|
+
def on_tool_start(
|
982
|
+
self,
|
983
|
+
serialized: Dict[str, Any],
|
984
|
+
input_str: str,
|
985
|
+
*,
|
986
|
+
run_id: UUID,
|
987
|
+
parent_run_id: Optional[UUID] = None,
|
988
|
+
**kwargs: Any,
|
989
|
+
) -> None:
|
990
|
+
"""Called when a tool starts"""
|
991
|
+
|
992
|
+
span_name = self._get_span_name(serialized, "tool")
|
993
|
+
span = self._create_span(run_id, parent_run_id, span_name, SpanKind.CLIENT)
|
994
|
+
|
995
|
+
span.set_attribute(
|
996
|
+
SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN
|
997
|
+
)
|
998
|
+
span.set_attribute(
|
999
|
+
SemanticConvention.GEN_AI_OPERATION,
|
1000
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_TOOLS,
|
1001
|
+
)
|
1002
|
+
|
1003
|
+
if self.capture_message_content:
|
1004
|
+
span.set_attribute(SemanticConvention.GEN_AI_TOOL_INPUT, input_str[:1000])
|
1005
|
+
|
1006
|
+
def on_tool_end(
|
1007
|
+
self,
|
1008
|
+
output: str,
|
1009
|
+
*,
|
1010
|
+
run_id: UUID,
|
1011
|
+
parent_run_id: Optional[UUID] = None,
|
1012
|
+
**kwargs: Any,
|
1013
|
+
) -> None:
|
1014
|
+
"""Called when a tool ends"""
|
1015
|
+
|
1016
|
+
if run_id not in self.spans:
|
1017
|
+
return
|
1018
|
+
|
1019
|
+
span = self.spans[run_id].span
|
1020
|
+
|
1021
|
+
if self.capture_message_content:
|
1022
|
+
span.set_attribute(SemanticConvention.GEN_AI_TOOL_OUTPUT, output[:1000])
|
1023
|
+
|
1024
|
+
# Duration is set in _end_span method
|
1025
|
+
|
1026
|
+
self._end_span(run_id)
|
1027
|
+
|
1028
|
+
def on_retriever_start(
|
1029
|
+
self,
|
1030
|
+
serialized: Dict[str, Any],
|
1031
|
+
query: str,
|
1032
|
+
*,
|
1033
|
+
run_id: UUID,
|
1034
|
+
parent_run_id: Optional[UUID] = None,
|
1035
|
+
**kwargs: Any,
|
1036
|
+
) -> None:
|
1037
|
+
"""Called when a retriever starts"""
|
1038
|
+
|
1039
|
+
span_name = self._get_span_name(serialized, "retrieval")
|
1040
|
+
span = self._create_span(run_id, parent_run_id, span_name, SpanKind.CLIENT)
|
1041
|
+
|
1042
|
+
span.set_attribute(
|
1043
|
+
SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_LANGCHAIN
|
1044
|
+
)
|
1045
|
+
span.set_attribute(
|
1046
|
+
SemanticConvention.GEN_AI_OPERATION,
|
1047
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
|
1048
|
+
)
|
1049
|
+
|
1050
|
+
if self.capture_message_content:
|
1051
|
+
span.set_attribute(SemanticConvention.GEN_AI_RETRIEVAL_QUERY, query[:1000])
|
1052
|
+
|
1053
|
+
def on_retriever_end(
|
1054
|
+
self,
|
1055
|
+
documents,
|
1056
|
+
*,
|
1057
|
+
run_id: UUID,
|
1058
|
+
parent_run_id: Optional[UUID] = None,
|
1059
|
+
**kwargs: Any,
|
1060
|
+
) -> None:
|
1061
|
+
"""Called when a retriever ends"""
|
1062
|
+
|
1063
|
+
if run_id not in self.spans:
|
1064
|
+
return
|
1065
|
+
|
1066
|
+
span = self.spans[run_id].span
|
1067
|
+
|
1068
|
+
# Document count
|
1069
|
+
doc_count = len(documents) if documents else 0
|
1070
|
+
span.set_attribute(
|
1071
|
+
SemanticConvention.GEN_AI_RETRIEVAL_DOCUMENT_COUNT, doc_count
|
1072
|
+
)
|
1073
|
+
|
1074
|
+
# Sample document content
|
1075
|
+
if self.capture_message_content and documents:
|
1076
|
+
sample_docs = []
|
1077
|
+
for doc in documents[:3]: # First 3 docs
|
1078
|
+
if hasattr(doc, "page_content"):
|
1079
|
+
sample_docs.append(doc.page_content[:200])
|
1080
|
+
else:
|
1081
|
+
sample_docs.append(str(doc)[:200])
|
1082
|
+
span.set_attribute(
|
1083
|
+
SemanticConvention.GEN_AI_RETRIEVAL_DOCUMENTS, "; ".join(sample_docs)
|
1084
|
+
)
|
1085
|
+
|
1086
|
+
# Duration is set in _end_span method
|
1087
|
+
|
1088
|
+
self._end_span(run_id)
|
1089
|
+
|
1090
|
+
|
1091
|
+
def _calculate_percentile(value: float, baseline: Dict[str, Any]) -> float:
|
1092
|
+
"""Calculate rough percentile based on min/max"""
|
1093
|
+
try:
|
1094
|
+
min_val = baseline["min_duration"]
|
1095
|
+
max_val = baseline["max_duration"]
|
1096
|
+
if max_val == min_val:
|
1097
|
+
return 50.0
|
1098
|
+
return ((value - min_val) / (max_val - min_val)) * 100
|
1099
|
+
except:
|
1100
|
+
return 50.0 # Default to median
|