openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +78 -0
- openlit/__init__.py +41 -13
- openlit/instrumentation/ag2/__init__.py +9 -10
- openlit/instrumentation/ag2/ag2.py +134 -69
- openlit/instrumentation/ai21/__init__.py +6 -5
- openlit/instrumentation/ai21/ai21.py +71 -534
- openlit/instrumentation/ai21/async_ai21.py +71 -534
- openlit/instrumentation/ai21/utils.py +407 -0
- openlit/instrumentation/anthropic/__init__.py +3 -3
- openlit/instrumentation/anthropic/anthropic.py +5 -5
- openlit/instrumentation/anthropic/async_anthropic.py +5 -5
- openlit/instrumentation/assemblyai/__init__.py +2 -2
- openlit/instrumentation/assemblyai/assemblyai.py +3 -3
- openlit/instrumentation/astra/__init__.py +25 -25
- openlit/instrumentation/astra/astra.py +7 -7
- openlit/instrumentation/astra/async_astra.py +7 -7
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
- openlit/instrumentation/bedrock/__init__.py +2 -2
- openlit/instrumentation/bedrock/bedrock.py +3 -3
- openlit/instrumentation/chroma/__init__.py +9 -9
- openlit/instrumentation/chroma/chroma.py +7 -7
- openlit/instrumentation/cohere/__init__.py +7 -7
- openlit/instrumentation/cohere/async_cohere.py +10 -10
- openlit/instrumentation/cohere/cohere.py +11 -11
- openlit/instrumentation/controlflow/__init__.py +4 -4
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/__init__.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/__init__.py +3 -3
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/__init__.py +5 -5
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/__init__.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
- openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
- openlit/instrumentation/embedchain/__init__.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/__init__.py +3 -3
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +3 -3
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +5 -5
- openlit/instrumentation/gpt4all/gpt4all.py +350 -225
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +5 -5
- openlit/instrumentation/groq/async_groq.py +359 -243
- openlit/instrumentation/groq/groq.py +359 -243
- openlit/instrumentation/haystack/__init__.py +2 -2
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/__init__.py +7 -7
- openlit/instrumentation/julep/async_julep.py +6 -6
- openlit/instrumentation/julep/julep.py +6 -6
- openlit/instrumentation/langchain/__init__.py +15 -9
- openlit/instrumentation/langchain/async_langchain.py +388 -0
- openlit/instrumentation/langchain/langchain.py +110 -497
- openlit/instrumentation/letta/__init__.py +7 -7
- openlit/instrumentation/letta/letta.py +10 -8
- openlit/instrumentation/litellm/__init__.py +9 -10
- openlit/instrumentation/litellm/async_litellm.py +321 -250
- openlit/instrumentation/litellm/litellm.py +319 -248
- openlit/instrumentation/llamaindex/__init__.py +2 -2
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/__init__.py +2 -2
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/__init__.py +2 -2
- openlit/instrumentation/milvus/milvus.py +7 -7
- openlit/instrumentation/mistral/__init__.py +13 -13
- openlit/instrumentation/mistral/async_mistral.py +426 -253
- openlit/instrumentation/mistral/mistral.py +424 -250
- openlit/instrumentation/multion/__init__.py +7 -7
- openlit/instrumentation/multion/async_multion.py +9 -7
- openlit/instrumentation/multion/multion.py +9 -7
- openlit/instrumentation/ollama/__init__.py +19 -39
- openlit/instrumentation/ollama/async_ollama.py +137 -563
- openlit/instrumentation/ollama/ollama.py +136 -563
- openlit/instrumentation/ollama/utils.py +333 -0
- openlit/instrumentation/openai/__init__.py +11 -11
- openlit/instrumentation/openai/async_openai.py +25 -27
- openlit/instrumentation/openai/openai.py +25 -27
- openlit/instrumentation/phidata/__init__.py +2 -2
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/__init__.py +6 -6
- openlit/instrumentation/pinecone/pinecone.py +7 -7
- openlit/instrumentation/premai/__init__.py +5 -5
- openlit/instrumentation/premai/premai.py +268 -219
- openlit/instrumentation/qdrant/__init__.py +2 -2
- openlit/instrumentation/qdrant/async_qdrant.py +7 -7
- openlit/instrumentation/qdrant/qdrant.py +7 -7
- openlit/instrumentation/reka/__init__.py +5 -5
- openlit/instrumentation/reka/async_reka.py +93 -55
- openlit/instrumentation/reka/reka.py +93 -55
- openlit/instrumentation/together/__init__.py +9 -9
- openlit/instrumentation/together/async_together.py +284 -242
- openlit/instrumentation/together/together.py +284 -242
- openlit/instrumentation/transformers/__init__.py +3 -3
- openlit/instrumentation/transformers/transformers.py +79 -48
- openlit/instrumentation/vertexai/__init__.py +19 -69
- openlit/instrumentation/vertexai/async_vertexai.py +333 -990
- openlit/instrumentation/vertexai/vertexai.py +333 -990
- openlit/instrumentation/vllm/__init__.py +3 -3
- openlit/instrumentation/vllm/vllm.py +65 -35
- openlit/otel/events.py +85 -0
- openlit/otel/tracing.py +3 -13
- openlit/semcov/__init__.py +16 -4
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
- openlit-1.33.11.dist-info/RECORD +125 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -0,0 +1,407 @@
|
|
1
|
+
"""
|
2
|
+
AI21 OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
|
5
|
+
import time
|
6
|
+
|
7
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
8
|
+
from opentelemetry.trace import Status, StatusCode
|
9
|
+
|
10
|
+
from openlit.__helpers import (
|
11
|
+
calculate_ttft,
|
12
|
+
response_as_dict,
|
13
|
+
calculate_tbt,
|
14
|
+
general_tokens,
|
15
|
+
extract_and_format_input,
|
16
|
+
get_chat_model_cost,
|
17
|
+
handle_exception,
|
18
|
+
create_metrics_attributes,
|
19
|
+
otel_event,
|
20
|
+
concatenate_all_contents
|
21
|
+
)
|
22
|
+
from openlit.semcov import SemanticConvetion
|
23
|
+
|
24
|
+
def setup_common_span_attributes(span, request_model, kwargs, tokens,
|
25
|
+
server_port, server_address, environment,
|
26
|
+
application_name, extra_attrs):
|
27
|
+
"""
|
28
|
+
Set common span attributes for both chat and RAG operations.
|
29
|
+
"""
|
30
|
+
|
31
|
+
# Base attributes from SDK and operation settings.
|
32
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
33
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
34
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
35
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
|
36
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT, server_port)
|
37
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED, kwargs.get("seed", ""))
|
38
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get("frequency_penalty", 0.0))
|
39
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get("max_tokens", -1))
|
40
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get("presence_penalty", 0.0))
|
41
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get("stop", []))
|
42
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, kwargs.get("temperature", 0.4))
|
43
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, kwargs.get("top_p", 1.0))
|
44
|
+
|
45
|
+
# Add token-related attributes if available.
|
46
|
+
if "finish_reason" in tokens:
|
47
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [tokens["finish_reason"]])
|
48
|
+
if "response_id" in tokens:
|
49
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, tokens["response_id"])
|
50
|
+
if "input_tokens" in tokens:
|
51
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, tokens["input_tokens"])
|
52
|
+
if "output_tokens" in tokens:
|
53
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, tokens["output_tokens"])
|
54
|
+
if "total_tokens" in tokens:
|
55
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS, tokens["total_tokens"])
|
56
|
+
|
57
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, request_model)
|
58
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS, server_address)
|
59
|
+
# Environment and service identifiers.
|
60
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
61
|
+
span.set_attribute(SERVICE_NAME, application_name)
|
62
|
+
# Set any extra attributes passed in.
|
63
|
+
for key, value in extra_attrs.items():
|
64
|
+
span.set_attribute(key, value)
|
65
|
+
|
66
|
+
def record_common_metrics(metrics, application_name, environment, request_model,
|
67
|
+
server_address, server_port, start_time, end_time,
|
68
|
+
input_tokens, output_tokens, cost, include_tbt=False, tbt_value=None):
|
69
|
+
"""
|
70
|
+
Record common metrics for the operation.
|
71
|
+
"""
|
72
|
+
|
73
|
+
attributes = create_metrics_attributes(
|
74
|
+
service_name=application_name,
|
75
|
+
deployment_environment=environment,
|
76
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
77
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
78
|
+
request_model=request_model,
|
79
|
+
server_address=server_address,
|
80
|
+
server_port=server_port,
|
81
|
+
response_model=request_model,
|
82
|
+
)
|
83
|
+
metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
|
84
|
+
metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
|
85
|
+
if include_tbt and tbt_value is not None:
|
86
|
+
metrics["genai_server_tbt"].record(tbt_value, attributes)
|
87
|
+
metrics["genai_server_ttft"].record(end_time - start_time, attributes)
|
88
|
+
metrics["genai_requests"].add(1, attributes)
|
89
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
90
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
91
|
+
metrics["genai_cost"].record(cost, attributes)
|
92
|
+
|
93
|
+
def emit_common_events(event_provider, choices, finish_reason, llmresponse, formatted_messages,
|
94
|
+
capture_message_content, n):
|
95
|
+
"""
|
96
|
+
Emit events common to both chat and chat rag operations.
|
97
|
+
"""
|
98
|
+
|
99
|
+
if n > 1:
|
100
|
+
for choice in choices:
|
101
|
+
choice_event_body = {
|
102
|
+
"finish_reason": finish_reason,
|
103
|
+
"index": choice.get('index', 0),
|
104
|
+
"message": {
|
105
|
+
**({"content": choice.get('message', {}).get('content', '')} if capture_message_content else {}),
|
106
|
+
"role": choice.get('message', {}).get('role', 'assistant')
|
107
|
+
}
|
108
|
+
}
|
109
|
+
# If tool calls exist, emit an event for each tool call.
|
110
|
+
tool_calls = choice.get('message', {}).get('tool_calls')
|
111
|
+
if tool_calls:
|
112
|
+
for tool_call in tool_calls:
|
113
|
+
choice_event_body["message"].update({
|
114
|
+
"tool_calls": {
|
115
|
+
"function": {
|
116
|
+
"name": tool_call.get('function', {}).get('name', ''),
|
117
|
+
"arguments": tool_call.get('function', {}).get('arguments', '')
|
118
|
+
},
|
119
|
+
"id": tool_call.get('id', ''),
|
120
|
+
"type": tool_call.get('type', 'function')
|
121
|
+
}
|
122
|
+
})
|
123
|
+
event = otel_event(
|
124
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
125
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
126
|
+
body=choice_event_body
|
127
|
+
)
|
128
|
+
event_provider.emit(event)
|
129
|
+
else:
|
130
|
+
event = otel_event(
|
131
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
132
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
133
|
+
body=choice_event_body
|
134
|
+
)
|
135
|
+
event_provider.emit(event)
|
136
|
+
else:
|
137
|
+
# Single choice case.
|
138
|
+
choice_event_body = {
|
139
|
+
"finish_reason": finish_reason,
|
140
|
+
"index": 0,
|
141
|
+
"message": {
|
142
|
+
**({"content": llmresponse} if capture_message_content else {}),
|
143
|
+
"role": 'assistant'
|
144
|
+
}
|
145
|
+
}
|
146
|
+
event = otel_event(
|
147
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
148
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
149
|
+
body=choice_event_body
|
150
|
+
)
|
151
|
+
event_provider.emit(event)
|
152
|
+
|
153
|
+
# Emit additional role-based events (if formatted messages are available).
|
154
|
+
for role in ['user', 'system', 'assistant', 'tool']:
|
155
|
+
msg = formatted_messages.get(role, {})
|
156
|
+
if msg.get('content', ''):
|
157
|
+
event_body = {
|
158
|
+
**({"content": msg.get('content', '')} if capture_message_content else {}),
|
159
|
+
"role": msg.get('role', [])
|
160
|
+
}
|
161
|
+
# For assistant messages, attach tool call details if they exist.
|
162
|
+
if role == 'assistant' and choices:
|
163
|
+
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
164
|
+
if tool_calls:
|
165
|
+
event_body["tool_calls"] = {
|
166
|
+
"function": {
|
167
|
+
"name": tool_calls[0].get('function', {}).get('name', ''),
|
168
|
+
"arguments": tool_calls[0].get('function', {}).get('arguments', '')
|
169
|
+
},
|
170
|
+
"id": tool_calls[0].get('id', ''),
|
171
|
+
"type": "function"
|
172
|
+
}
|
173
|
+
if role == 'tool' and choices:
|
174
|
+
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
175
|
+
if tool_calls:
|
176
|
+
event_body["id"] = tool_calls[0].get('id', '')
|
177
|
+
event = otel_event(
|
178
|
+
name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
|
179
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
180
|
+
body=event_body
|
181
|
+
)
|
182
|
+
event_provider.emit(event)
|
183
|
+
|
184
|
+
def process_chunk(self, chunk):
|
185
|
+
"""
|
186
|
+
Process a chunk of response data and update state.
|
187
|
+
"""
|
188
|
+
|
189
|
+
end_time = time.time()
|
190
|
+
# Record the timestamp for the current chunk.
|
191
|
+
self._timestamps.append(end_time)
|
192
|
+
if len(self._timestamps) == 1:
|
193
|
+
# Calculate time-to-first-chunk (TTFT).
|
194
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
195
|
+
|
196
|
+
chunked = response_as_dict(chunk)
|
197
|
+
if (len(chunked.get('choices')) > 0 and
|
198
|
+
'delta' in chunked.get('choices')[0] and
|
199
|
+
'content' in chunked.get('choices')[0].get('delta')):
|
200
|
+
content = chunked.get('choices')[0].get('delta').get('content')
|
201
|
+
if content:
|
202
|
+
self._llmresponse += content
|
203
|
+
if chunked.get('usage'):
|
204
|
+
self._input_tokens = chunked.get('usage').get("prompt_tokens")
|
205
|
+
self._output_tokens = chunked.get('usage').get("completion_tokens")
|
206
|
+
self._response_id = chunked.get('id')
|
207
|
+
self._choices += chunked.get('choices')
|
208
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
209
|
+
|
210
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
211
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream):
|
212
|
+
"""
|
213
|
+
Process chat request and generate Telemetry.
|
214
|
+
"""
|
215
|
+
|
216
|
+
scope._end_time = time.time()
|
217
|
+
if len(scope._timestamps) > 1:
|
218
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
219
|
+
|
220
|
+
# Extract and format input messages.
|
221
|
+
formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
|
222
|
+
prompt = concatenate_all_contents(formatted_messages)
|
223
|
+
request_model = scope._kwargs.get("model", "jamba-1.5-mini")
|
224
|
+
|
225
|
+
# Calculate cost based on token usage.
|
226
|
+
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
227
|
+
# Prepare tokens dictionary.
|
228
|
+
tokens = {
|
229
|
+
"finish_reason": scope._finish_reason,
|
230
|
+
"response_id": scope._response_id,
|
231
|
+
"input_tokens": scope._input_tokens,
|
232
|
+
"output_tokens": scope._output_tokens,
|
233
|
+
"total_tokens": scope._input_tokens + scope._output_tokens,
|
234
|
+
}
|
235
|
+
extra_attrs = {
|
236
|
+
SemanticConvetion.GEN_AI_REQUEST_IS_STREAM: is_stream,
|
237
|
+
SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE: scope._input_tokens + scope._output_tokens,
|
238
|
+
SemanticConvetion.GEN_AI_USAGE_COST: cost,
|
239
|
+
SemanticConvetion.GEN_AI_SERVER_TBT: scope._tbt,
|
240
|
+
SemanticConvetion.GEN_AI_SERVER_TTFT: scope._ttft,
|
241
|
+
SemanticConvetion.GEN_AI_SDK_VERSION: version,
|
242
|
+
SemanticConvetion.GEN_AI_OUTPUT_TYPE: "text" if isinstance(scope._llmresponse, str) else "json"
|
243
|
+
}
|
244
|
+
# Set span attributes.
|
245
|
+
setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
|
246
|
+
scope._server_port, scope._server_address, environment,
|
247
|
+
application_name, extra_attrs)
|
248
|
+
|
249
|
+
# Optionally add events capturing the prompt and completion.
|
250
|
+
if capture_message_content:
|
251
|
+
scope._span.add_event(
|
252
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
253
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt},
|
254
|
+
)
|
255
|
+
scope._span.add_event(
|
256
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
257
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse},
|
258
|
+
)
|
259
|
+
|
260
|
+
# Emit events for each choice and message role.
|
261
|
+
n = scope._kwargs.get('n', 1)
|
262
|
+
emit_common_events(event_provider, scope._choices, scope._finish_reason, scope._llmresponse,
|
263
|
+
formatted_messages, capture_message_content, n)
|
264
|
+
|
265
|
+
scope._span.set_status(Status(StatusCode.OK))
|
266
|
+
|
267
|
+
if not disable_metrics:
|
268
|
+
record_common_metrics(metrics, application_name, environment, request_model,
|
269
|
+
scope._server_address, scope._server_port,
|
270
|
+
scope._start_time, scope._end_time,
|
271
|
+
scope._input_tokens, scope._output_tokens, cost,
|
272
|
+
include_tbt=True, tbt_value=scope._tbt)
|
273
|
+
|
274
|
+
def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
|
275
|
+
event_provider, capture_message_content=False, disable_metrics=False, version=''):
|
276
|
+
"""
|
277
|
+
Process a streaming chat response and generate Telemetry.
|
278
|
+
"""
|
279
|
+
|
280
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
281
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=True)
|
282
|
+
|
283
|
+
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
284
|
+
environment, application_name, metrics, event_provider, start_time,
|
285
|
+
span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
|
286
|
+
"""
|
287
|
+
Process a synchronous chat response and generate Telemetry.
|
288
|
+
"""
|
289
|
+
|
290
|
+
# Create a generic scope object to hold telemetry data.
|
291
|
+
self = type('GenericScope', (), {})()
|
292
|
+
|
293
|
+
# pylint: disable = no-member
|
294
|
+
self._start_time = start_time
|
295
|
+
self._end_time = time.time()
|
296
|
+
|
297
|
+
self._span = span
|
298
|
+
# Concatenate content from all choices.
|
299
|
+
self._llmresponse = ''.join(
|
300
|
+
(choice.get('message', {}).get('content') or '')
|
301
|
+
for choice in response.get('choices', [])
|
302
|
+
)
|
303
|
+
self._response_role = response.get('message', {}).get('role', 'assistant')
|
304
|
+
self._input_tokens = response.get('usage', {}).get('prompt_tokens', 0)
|
305
|
+
self._output_tokens = response.get('usage', {}).get('completion_tokens', 0)
|
306
|
+
self._response_id = response.get('id', '')
|
307
|
+
self._response_model = request_model
|
308
|
+
self._finish_reason = response.get('choices', [{}])[0].get('finish_reason')
|
309
|
+
self._timestamps = []
|
310
|
+
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
311
|
+
self._server_address, self._server_port = server_address, server_port
|
312
|
+
self._kwargs = kwargs
|
313
|
+
self._choices = response.get('choices')
|
314
|
+
|
315
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
316
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=False)
|
317
|
+
|
318
|
+
return response
|
319
|
+
|
320
|
+
def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
|
321
|
+
environment, application_name, metrics, event_provider, start_time,
|
322
|
+
span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
|
323
|
+
"""
|
324
|
+
Process a chat response and generate Telemetry.
|
325
|
+
"""
|
326
|
+
end_time = time.time()
|
327
|
+
try:
|
328
|
+
# Format input messages into a single prompt string.
|
329
|
+
messages_input = kwargs.get("messages", "")
|
330
|
+
formatted_messages = extract_and_format_input(messages_input)
|
331
|
+
prompt = concatenate_all_contents(formatted_messages)
|
332
|
+
input_tokens = general_tokens(prompt)
|
333
|
+
|
334
|
+
# Create tokens dict and RAG-specific extra attributes.
|
335
|
+
tokens = {"response_id": response.get("id"), "input_tokens": input_tokens}
|
336
|
+
extra_attrs = {
|
337
|
+
SemanticConvetion.GEN_AI_REQUEST_IS_STREAM: False,
|
338
|
+
SemanticConvetion.GEN_AI_SERVER_TTFT: end_time - start_time,
|
339
|
+
SemanticConvetion.GEN_AI_SDK_VERSION: version,
|
340
|
+
SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get("max_segments", -1),
|
341
|
+
SemanticConvetion.GEN_AI_RAG_STRATEGY: kwargs.get("retrieval_strategy", "segments"),
|
342
|
+
SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get("retrieval_similarity_threshold", -1),
|
343
|
+
SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get("max_neighbors", -1),
|
344
|
+
SemanticConvetion.GEN_AI_RAG_FILE_IDS: str(kwargs.get("file_ids", "")),
|
345
|
+
SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get("path", "")
|
346
|
+
}
|
347
|
+
# Set common span attributes.
|
348
|
+
setup_common_span_attributes(span, request_model, kwargs, tokens,
|
349
|
+
server_port, server_address, environment, application_name,
|
350
|
+
extra_attrs)
|
351
|
+
|
352
|
+
# Record the prompt event if requested.
|
353
|
+
if capture_message_content:
|
354
|
+
span.add_event(
|
355
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
356
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt},
|
357
|
+
)
|
358
|
+
|
359
|
+
output_tokens = 0
|
360
|
+
choices = response.get('choices', [])
|
361
|
+
# Instead of adding a separate event per choice, we aggregate all completion content.
|
362
|
+
aggregated_completion = []
|
363
|
+
for i in range(kwargs.get('n', 1)):
|
364
|
+
# Get the response content from each choice and count tokens.
|
365
|
+
content = choices[i].get('content', '')
|
366
|
+
aggregated_completion.append(content)
|
367
|
+
output_tokens += general_tokens(content)
|
368
|
+
if kwargs.get('tools'):
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
370
|
+
str(choices[i].get('message', {}).get('tool_calls')))
|
371
|
+
# Set output type based on actual content type.
|
372
|
+
if isinstance(content, str):
|
373
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE, "text")
|
374
|
+
elif content is not None:
|
375
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE, "json")
|
376
|
+
|
377
|
+
# Concatenate completion responses.
|
378
|
+
llmresponse = ''.join(aggregated_completion)
|
379
|
+
tokens["output_tokens"] = output_tokens
|
380
|
+
tokens["total_tokens"] = input_tokens + output_tokens
|
381
|
+
|
382
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
383
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
|
384
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
|
386
|
+
|
387
|
+
span.set_status(Status(StatusCode.OK))
|
388
|
+
# Emit a single aggregated completion event.
|
389
|
+
if capture_message_content:
|
390
|
+
span.add_event(
|
391
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
392
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse},
|
393
|
+
)
|
394
|
+
# Emit the rest of the events (choice and role-based events) as before.
|
395
|
+
n = kwargs.get('n', 1)
|
396
|
+
emit_common_events(event_provider, choices, choices[0].get('finish_reason', ''),
|
397
|
+
llmresponse, formatted_messages, capture_message_content, n)
|
398
|
+
|
399
|
+
if not disable_metrics:
|
400
|
+
record_common_metrics(metrics, application_name, environment, request_model,
|
401
|
+
server_address, server_port, start_time, end_time,
|
402
|
+
input_tokens, output_tokens, cost, include_tbt=False)
|
403
|
+
return response
|
404
|
+
|
405
|
+
except Exception as e:
|
406
|
+
handle_exception(span, e)
|
407
|
+
return response
|
@@ -25,7 +25,7 @@ class AnthropicInstrumentor(BaseInstrumentor):
|
|
25
25
|
tracer = kwargs.get("tracer")
|
26
26
|
metrics = kwargs.get("metrics_dict")
|
27
27
|
pricing_info = kwargs.get("pricing_info", {})
|
28
|
-
|
28
|
+
capture_message_content = kwargs.get("capture_message_content", False)
|
29
29
|
disable_metrics = kwargs.get("disable_metrics")
|
30
30
|
version = importlib.metadata.version("anthropic")
|
31
31
|
|
@@ -34,7 +34,7 @@ class AnthropicInstrumentor(BaseInstrumentor):
|
|
34
34
|
"anthropic.resources.messages",
|
35
35
|
"Messages.create",
|
36
36
|
messages(version, environment, application_name,
|
37
|
-
tracer, pricing_info,
|
37
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
38
38
|
)
|
39
39
|
|
40
40
|
#async
|
@@ -42,7 +42,7 @@ class AnthropicInstrumentor(BaseInstrumentor):
|
|
42
42
|
"anthropic.resources.messages",
|
43
43
|
"AsyncMessages.create",
|
44
44
|
async_messages(version, environment, application_name,
|
45
|
-
tracer, pricing_info,
|
45
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
46
46
|
)
|
47
47
|
|
48
48
|
def _uninstrument(self, **kwargs):
|
@@ -21,7 +21,7 @@ from openlit.semcov import SemanticConvetion
|
|
21
21
|
logger = logging.getLogger(__name__)
|
22
22
|
|
23
23
|
def messages(version, environment, application_name, tracer,
|
24
|
-
pricing_info,
|
24
|
+
pricing_info, capture_message_content, metrics, disable_metrics):
|
25
25
|
"""
|
26
26
|
Generates a telemetry wrapper for messages to collect metrics.
|
27
27
|
|
@@ -31,7 +31,7 @@ def messages(version, environment, application_name, tracer,
|
|
31
31
|
application_name: Name of the application using the Anthropic API.
|
32
32
|
tracer: OpenTelemetry tracer for creating spans.
|
33
33
|
pricing_info: Information used for calculating the cost of Anthropic usage.
|
34
|
-
|
34
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
35
35
|
|
36
36
|
Returns:
|
37
37
|
A function that wraps the chat method to add telemetry.
|
@@ -40,7 +40,7 @@ def messages(version, environment, application_name, tracer,
|
|
40
40
|
class TracedSyncStream:
|
41
41
|
"""
|
42
42
|
Wrapper for streaming responses to collect metrics and trace data.
|
43
|
-
Wraps the
|
43
|
+
Wraps the response to collect message IDs and aggregated response.
|
44
44
|
|
45
45
|
This class implements the '__aiter__' and '__anext__' methods that
|
46
46
|
handle asynchronous streaming responses.
|
@@ -211,7 +211,7 @@ def messages(version, environment, application_name, tracer,
|
|
211
211
|
self._ttft)
|
212
212
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
213
213
|
version)
|
214
|
-
if
|
214
|
+
if capture_message_content:
|
215
215
|
self._span.add_event(
|
216
216
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
217
217
|
attributes={
|
@@ -386,7 +386,7 @@ def messages(version, environment, application_name, tracer,
|
|
386
386
|
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
387
387
|
version)
|
388
388
|
|
389
|
-
if
|
389
|
+
if capture_message_content:
|
390
390
|
span.add_event(
|
391
391
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
392
392
|
attributes={
|
@@ -21,7 +21,7 @@ from openlit.semcov import SemanticConvetion
|
|
21
21
|
logger = logging.getLogger(__name__)
|
22
22
|
|
23
23
|
def async_messages(version, environment, application_name, tracer,
|
24
|
-
pricing_info,
|
24
|
+
pricing_info, capture_message_content, metrics, disable_metrics):
|
25
25
|
"""
|
26
26
|
Generates a telemetry wrapper for messages to collect metrics.
|
27
27
|
|
@@ -31,7 +31,7 @@ def async_messages(version, environment, application_name, tracer,
|
|
31
31
|
application_name: Name of the application using the Anthropic API.
|
32
32
|
tracer: OpenTelemetry tracer for creating spans.
|
33
33
|
pricing_info: Information used for calculating the cost of Anthropic usage.
|
34
|
-
|
34
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
35
35
|
|
36
36
|
Returns:
|
37
37
|
A function that wraps the chat method to add telemetry.
|
@@ -40,7 +40,7 @@ def async_messages(version, environment, application_name, tracer,
|
|
40
40
|
class TracedASyncStream:
|
41
41
|
"""
|
42
42
|
Wrapper for streaming responses to collect metrics and trace data.
|
43
|
-
Wraps the
|
43
|
+
Wraps the response to collect message IDs and aggregated response.
|
44
44
|
|
45
45
|
This class implements the '__aiter__' and '__anext__' methods that
|
46
46
|
handle asynchronous streaming responses.
|
@@ -211,7 +211,7 @@ def async_messages(version, environment, application_name, tracer,
|
|
211
211
|
self._ttft)
|
212
212
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
213
213
|
version)
|
214
|
-
if
|
214
|
+
if capture_message_content:
|
215
215
|
self._span.add_event(
|
216
216
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
217
217
|
attributes={
|
@@ -386,7 +386,7 @@ def async_messages(version, environment, application_name, tracer,
|
|
386
386
|
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
387
387
|
version)
|
388
388
|
|
389
|
-
if
|
389
|
+
if capture_message_content:
|
390
390
|
span.add_event(
|
391
391
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
392
392
|
attributes={
|
@@ -26,7 +26,7 @@ class AssemblyAIInstrumentor(BaseInstrumentor):
|
|
26
26
|
tracer = kwargs.get("tracer")
|
27
27
|
metrics = kwargs.get("metrics_dict")
|
28
28
|
pricing_info = kwargs.get("pricing_info", {})
|
29
|
-
|
29
|
+
capture_message_content = kwargs.get("capture_message_content", False)
|
30
30
|
disable_metrics = kwargs.get("disable_metrics")
|
31
31
|
version = importlib.metadata.version("assemblyai")
|
32
32
|
|
@@ -35,7 +35,7 @@ class AssemblyAIInstrumentor(BaseInstrumentor):
|
|
35
35
|
"assemblyai.transcriber",
|
36
36
|
"Transcriber.transcribe",
|
37
37
|
transcribe(version, environment, application_name,
|
38
|
-
tracer, pricing_info,
|
38
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
39
39
|
)
|
40
40
|
|
41
41
|
def _uninstrument(self, **kwargs):
|
@@ -18,7 +18,7 @@ from openlit.semcov import SemanticConvetion
|
|
18
18
|
logger = logging.getLogger(__name__)
|
19
19
|
|
20
20
|
def transcribe(version, environment, application_name,
|
21
|
-
tracer, pricing_info,
|
21
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
22
22
|
"""
|
23
23
|
Generates a telemetry wrapper for creating speech audio to collect metrics.
|
24
24
|
|
@@ -29,7 +29,7 @@ def transcribe(version, environment, application_name,
|
|
29
29
|
application_name: Name of the application using the Assembly AI API.
|
30
30
|
tracer: OpenTelemetry tracer for creating spans.
|
31
31
|
pricing_info: Information used for calculating the cost of generating speech audio.
|
32
|
-
|
32
|
+
capture_message_content: Flag indicating whether to trace the input text and generated audio.
|
33
33
|
|
34
34
|
Returns:
|
35
35
|
A function that wraps the speech audio creation method to add telemetry.
|
@@ -96,7 +96,7 @@ def transcribe(version, environment, application_name,
|
|
96
96
|
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
97
97
|
version)
|
98
98
|
|
99
|
-
if
|
99
|
+
if capture_message_content:
|
100
100
|
span.add_event(
|
101
101
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
102
102
|
attributes={
|