openlit 1.33.10__py3-none-any.whl → 1.33.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +125 -88
- openlit/__init__.py +38 -11
- openlit/instrumentation/ag2/__init__.py +19 -20
- openlit/instrumentation/ag2/ag2.py +134 -69
- openlit/instrumentation/ai21/__init__.py +22 -21
- openlit/instrumentation/ai21/ai21.py +82 -546
- openlit/instrumentation/ai21/async_ai21.py +82 -546
- openlit/instrumentation/ai21/utils.py +409 -0
- openlit/instrumentation/anthropic/__init__.py +16 -16
- openlit/instrumentation/anthropic/anthropic.py +61 -353
- openlit/instrumentation/anthropic/async_anthropic.py +62 -354
- openlit/instrumentation/anthropic/utils.py +251 -0
- openlit/instrumentation/assemblyai/__init__.py +2 -2
- openlit/instrumentation/assemblyai/assemblyai.py +3 -3
- openlit/instrumentation/astra/__init__.py +25 -25
- openlit/instrumentation/astra/astra.py +2 -2
- openlit/instrumentation/astra/async_astra.py +2 -2
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +8 -8
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +8 -8
- openlit/instrumentation/bedrock/__init__.py +2 -2
- openlit/instrumentation/bedrock/bedrock.py +3 -3
- openlit/instrumentation/chroma/__init__.py +9 -9
- openlit/instrumentation/chroma/chroma.py +2 -2
- openlit/instrumentation/cohere/__init__.py +7 -7
- openlit/instrumentation/cohere/async_cohere.py +9 -9
- openlit/instrumentation/cohere/cohere.py +9 -9
- openlit/instrumentation/controlflow/__init__.py +4 -4
- openlit/instrumentation/controlflow/controlflow.py +2 -2
- openlit/instrumentation/crawl4ai/__init__.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
- openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
- openlit/instrumentation/crewai/__init__.py +3 -3
- openlit/instrumentation/crewai/crewai.py +2 -2
- openlit/instrumentation/dynamiq/__init__.py +5 -5
- openlit/instrumentation/dynamiq/dynamiq.py +2 -2
- openlit/instrumentation/elevenlabs/__init__.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +3 -3
- openlit/instrumentation/elevenlabs/elevenlabs.py +3 -3
- openlit/instrumentation/embedchain/__init__.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +4 -4
- openlit/instrumentation/firecrawl/__init__.py +3 -3
- openlit/instrumentation/firecrawl/firecrawl.py +2 -2
- openlit/instrumentation/google_ai_studio/__init__.py +3 -3
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +3 -3
- openlit/instrumentation/gpt4all/gpt4all.py +7 -7
- openlit/instrumentation/groq/__init__.py +3 -3
- openlit/instrumentation/groq/async_groq.py +5 -5
- openlit/instrumentation/groq/groq.py +5 -5
- openlit/instrumentation/haystack/__init__.py +2 -2
- openlit/instrumentation/haystack/haystack.py +2 -2
- openlit/instrumentation/julep/__init__.py +7 -7
- openlit/instrumentation/julep/async_julep.py +3 -3
- openlit/instrumentation/julep/julep.py +3 -3
- openlit/instrumentation/langchain/__init__.py +2 -2
- openlit/instrumentation/langchain/async_langchain.py +13 -9
- openlit/instrumentation/langchain/langchain.py +13 -8
- openlit/instrumentation/letta/__init__.py +7 -7
- openlit/instrumentation/letta/letta.py +5 -5
- openlit/instrumentation/litellm/__init__.py +5 -5
- openlit/instrumentation/litellm/async_litellm.py +8 -8
- openlit/instrumentation/litellm/litellm.py +8 -8
- openlit/instrumentation/llamaindex/__init__.py +2 -2
- openlit/instrumentation/llamaindex/llamaindex.py +2 -2
- openlit/instrumentation/mem0/__init__.py +2 -2
- openlit/instrumentation/mem0/mem0.py +2 -2
- openlit/instrumentation/milvus/__init__.py +2 -2
- openlit/instrumentation/milvus/milvus.py +2 -2
- openlit/instrumentation/mistral/__init__.py +7 -7
- openlit/instrumentation/mistral/async_mistral.py +10 -10
- openlit/instrumentation/mistral/mistral.py +10 -10
- openlit/instrumentation/multion/__init__.py +7 -7
- openlit/instrumentation/multion/async_multion.py +5 -5
- openlit/instrumentation/multion/multion.py +5 -5
- openlit/instrumentation/ollama/__init__.py +11 -9
- openlit/instrumentation/ollama/async_ollama.py +71 -465
- openlit/instrumentation/ollama/ollama.py +71 -465
- openlit/instrumentation/ollama/utils.py +332 -0
- openlit/instrumentation/openai/__init__.py +11 -11
- openlit/instrumentation/openai/async_openai.py +18 -18
- openlit/instrumentation/openai/openai.py +18 -18
- openlit/instrumentation/phidata/__init__.py +2 -2
- openlit/instrumentation/phidata/phidata.py +2 -2
- openlit/instrumentation/pinecone/__init__.py +6 -6
- openlit/instrumentation/pinecone/pinecone.py +2 -2
- openlit/instrumentation/premai/__init__.py +3 -3
- openlit/instrumentation/premai/premai.py +7 -7
- openlit/instrumentation/qdrant/__init__.py +2 -2
- openlit/instrumentation/qdrant/async_qdrant.py +2 -2
- openlit/instrumentation/qdrant/qdrant.py +2 -2
- openlit/instrumentation/reka/__init__.py +3 -3
- openlit/instrumentation/reka/async_reka.py +3 -3
- openlit/instrumentation/reka/reka.py +3 -3
- openlit/instrumentation/together/__init__.py +5 -5
- openlit/instrumentation/together/async_together.py +8 -8
- openlit/instrumentation/together/together.py +8 -8
- openlit/instrumentation/transformers/__init__.py +2 -2
- openlit/instrumentation/transformers/transformers.py +4 -4
- openlit/instrumentation/vertexai/__init__.py +9 -9
- openlit/instrumentation/vertexai/async_vertexai.py +4 -4
- openlit/instrumentation/vertexai/vertexai.py +4 -4
- openlit/instrumentation/vllm/__init__.py +2 -2
- openlit/instrumentation/vllm/vllm.py +3 -3
- openlit/otel/events.py +85 -0
- openlit/otel/tracing.py +3 -13
- openlit/semcov/__init__.py +13 -1
- {openlit-1.33.10.dist-info → openlit-1.33.12.dist-info}/METADATA +2 -2
- openlit-1.33.12.dist-info/RECORD +126 -0
- openlit-1.33.10.dist-info/RECORD +0 -122
- {openlit-1.33.10.dist-info → openlit-1.33.12.dist-info}/LICENSE +0 -0
- {openlit-1.33.10.dist-info → openlit-1.33.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,409 @@
|
|
1
|
+
"""
|
2
|
+
AI21 OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
|
5
|
+
import time
|
6
|
+
|
7
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
8
|
+
from opentelemetry.trace import Status, StatusCode
|
9
|
+
|
10
|
+
from openlit.__helpers import (
|
11
|
+
calculate_ttft,
|
12
|
+
response_as_dict,
|
13
|
+
calculate_tbt,
|
14
|
+
general_tokens,
|
15
|
+
extract_and_format_input,
|
16
|
+
get_chat_model_cost,
|
17
|
+
handle_exception,
|
18
|
+
create_metrics_attributes,
|
19
|
+
otel_event,
|
20
|
+
concatenate_all_contents
|
21
|
+
)
|
22
|
+
from openlit.semcov import SemanticConvetion
|
23
|
+
|
24
|
+
def setup_common_span_attributes(span, request_model, kwargs, tokens,
|
25
|
+
server_port, server_address, environment,
|
26
|
+
application_name, extra_attrs):
|
27
|
+
"""
|
28
|
+
Set common span attributes for both chat and RAG operations.
|
29
|
+
"""
|
30
|
+
|
31
|
+
# Base attributes from SDK and operation settings.
|
32
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
33
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
34
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
35
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
|
36
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT, server_port)
|
37
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED, kwargs.get('seed', ''))
|
38
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get('frequency_penalty', 0.0))
|
39
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get('max_tokens', -1))
|
40
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get('presence_penalty', 0.0))
|
41
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get('stop', []))
|
42
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, kwargs.get('temperature', 0.4))
|
43
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, kwargs.get('top_p', 1.0))
|
44
|
+
|
45
|
+
# Add token-related attributes if available.
|
46
|
+
if 'finish_reason' in tokens:
|
47
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [tokens['finish_reason']])
|
48
|
+
if 'response_id' in tokens:
|
49
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, tokens['response_id'])
|
50
|
+
if 'input_tokens' in tokens:
|
51
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, tokens['input_tokens'])
|
52
|
+
if 'output_tokens' in tokens:
|
53
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, tokens['output_tokens'])
|
54
|
+
if 'total_tokens' in tokens:
|
55
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS, tokens['total_tokens'])
|
56
|
+
|
57
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, request_model)
|
58
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS, server_address)
|
59
|
+
# Environment and service identifiers.
|
60
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
61
|
+
span.set_attribute(SERVICE_NAME, application_name)
|
62
|
+
# Set any extra attributes passed in.
|
63
|
+
for key, value in extra_attrs.items():
|
64
|
+
span.set_attribute(key, value)
|
65
|
+
|
66
|
+
def record_common_metrics(metrics, application_name, environment, request_model,
|
67
|
+
server_address, server_port, start_time, end_time,
|
68
|
+
input_tokens, output_tokens, cost, include_tbt=False, tbt_value=None):
|
69
|
+
"""
|
70
|
+
Record common metrics for the operation.
|
71
|
+
"""
|
72
|
+
|
73
|
+
attributes = create_metrics_attributes(
|
74
|
+
service_name=application_name,
|
75
|
+
deployment_environment=environment,
|
76
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
77
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
78
|
+
request_model=request_model,
|
79
|
+
server_address=server_address,
|
80
|
+
server_port=server_port,
|
81
|
+
response_model=request_model,
|
82
|
+
)
|
83
|
+
metrics['genai_client_usage_tokens'].record(input_tokens + output_tokens, attributes)
|
84
|
+
metrics['genai_client_operation_duration'].record(end_time - start_time, attributes)
|
85
|
+
if include_tbt and tbt_value is not None:
|
86
|
+
metrics['genai_server_tbt'].record(tbt_value, attributes)
|
87
|
+
metrics['genai_server_ttft'].record(end_time - start_time, attributes)
|
88
|
+
metrics['genai_requests'].add(1, attributes)
|
89
|
+
metrics['genai_completion_tokens'].add(output_tokens, attributes)
|
90
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
91
|
+
metrics['genai_cost'].record(cost, attributes)
|
92
|
+
|
93
|
+
def emit_common_events(event_provider, choices, finish_reason, llmresponse, formatted_messages,
|
94
|
+
capture_message_content, n):
|
95
|
+
"""
|
96
|
+
Emit events common to both chat and chat rag operations.
|
97
|
+
"""
|
98
|
+
|
99
|
+
if n > 1:
|
100
|
+
for choice in choices:
|
101
|
+
choice_event_body = {
|
102
|
+
'finish_reason': finish_reason,
|
103
|
+
'index': choice.get('index', 0),
|
104
|
+
'message': {
|
105
|
+
**({'content': choice.get('message', {}).get('content', '')} if capture_message_content else {}),
|
106
|
+
'role': choice.get('message', {}).get('role', 'assistant')
|
107
|
+
}
|
108
|
+
}
|
109
|
+
# If tool calls exist, emit an event for each tool call.
|
110
|
+
tool_calls = choice.get('message', {}).get('tool_calls')
|
111
|
+
if tool_calls:
|
112
|
+
for tool_call in tool_calls:
|
113
|
+
choice_event_body['message'].update({
|
114
|
+
'tool_calls': {
|
115
|
+
'function': {
|
116
|
+
'name': tool_call.get('function', {}).get('name', ''),
|
117
|
+
'arguments': tool_call.get('function', {}).get('arguments', '')
|
118
|
+
},
|
119
|
+
'id': tool_call.get('id', ''),
|
120
|
+
'type': tool_call.get('type', 'function')
|
121
|
+
}
|
122
|
+
})
|
123
|
+
event = otel_event(
|
124
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
125
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
126
|
+
body=choice_event_body
|
127
|
+
)
|
128
|
+
event_provider.emit(event)
|
129
|
+
else:
|
130
|
+
event = otel_event(
|
131
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
132
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
133
|
+
body=choice_event_body
|
134
|
+
)
|
135
|
+
event_provider.emit(event)
|
136
|
+
else:
|
137
|
+
# Single choice case.
|
138
|
+
choice_event_body = {
|
139
|
+
'finish_reason': finish_reason,
|
140
|
+
'index': 0,
|
141
|
+
'message': {
|
142
|
+
**({'content': llmresponse} if capture_message_content else {}),
|
143
|
+
'role': 'assistant'
|
144
|
+
}
|
145
|
+
}
|
146
|
+
event = otel_event(
|
147
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
148
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
149
|
+
body=choice_event_body
|
150
|
+
)
|
151
|
+
event_provider.emit(event)
|
152
|
+
|
153
|
+
# Emit additional role-based events (if formatted messages are available).
|
154
|
+
for role in ['user', 'system', 'assistant', 'tool']:
|
155
|
+
msg = formatted_messages.get(role, {})
|
156
|
+
if msg.get('content', ''):
|
157
|
+
event_body = {
|
158
|
+
**({'content': msg.get('content', '')} if capture_message_content else {}),
|
159
|
+
'role': msg.get('role', [])
|
160
|
+
}
|
161
|
+
# For assistant messages, attach tool call details if they exist.
|
162
|
+
if role == 'assistant' and choices:
|
163
|
+
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
164
|
+
if tool_calls:
|
165
|
+
event_body['tool_calls'] = {
|
166
|
+
'function': {
|
167
|
+
'name': tool_calls[0].get('function', {}).get('name', ''),
|
168
|
+
'arguments': tool_calls[0].get('function', {}).get('arguments', '')
|
169
|
+
},
|
170
|
+
'id': tool_calls[0].get('id', ''),
|
171
|
+
'type': 'function'
|
172
|
+
}
|
173
|
+
if role == 'tool' and choices:
|
174
|
+
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
175
|
+
if tool_calls:
|
176
|
+
event_body['id'] = tool_calls[0].get('id', '')
|
177
|
+
event = otel_event(
|
178
|
+
name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
|
179
|
+
attributes={SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AI21},
|
180
|
+
body=event_body
|
181
|
+
)
|
182
|
+
event_provider.emit(event)
|
183
|
+
|
184
|
+
def process_chunk(self, chunk):
|
185
|
+
"""
|
186
|
+
Process a chunk of response data and update state.
|
187
|
+
"""
|
188
|
+
|
189
|
+
end_time = time.time()
|
190
|
+
# Record the timestamp for the current chunk.
|
191
|
+
self._timestamps.append(end_time)
|
192
|
+
if len(self._timestamps) == 1:
|
193
|
+
# Calculate time-to-first-chunk (TTFT).
|
194
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
195
|
+
|
196
|
+
chunked = response_as_dict(chunk)
|
197
|
+
if (len(chunked.get('choices')) > 0 and
|
198
|
+
'delta' in chunked.get('choices')[0] and
|
199
|
+
'content' in chunked.get('choices')[0].get('delta')):
|
200
|
+
content = chunked.get('choices')[0].get('delta').get('content')
|
201
|
+
if content:
|
202
|
+
self._llmresponse += content
|
203
|
+
if chunked.get('usage'):
|
204
|
+
self._input_tokens = chunked.get('usage').get('prompt_tokens')
|
205
|
+
self._output_tokens = chunked.get('usage').get('completion_tokens')
|
206
|
+
self._response_id = chunked.get('id')
|
207
|
+
self._choices += chunked.get('choices')
|
208
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
209
|
+
|
210
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
211
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream):
|
212
|
+
"""
|
213
|
+
Process chat request and generate Telemetry.
|
214
|
+
"""
|
215
|
+
|
216
|
+
scope._end_time = time.time()
|
217
|
+
if len(scope._timestamps) > 1:
|
218
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
219
|
+
|
220
|
+
# Extract and format input messages.
|
221
|
+
formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
|
222
|
+
prompt = concatenate_all_contents(formatted_messages)
|
223
|
+
request_model = scope._kwargs.get('model', 'jamba-1.5-mini')
|
224
|
+
|
225
|
+
# Calculate cost based on token usage.
|
226
|
+
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
227
|
+
# Prepare tokens dictionary.
|
228
|
+
tokens = {
|
229
|
+
'finish_reason': scope._finish_reason,
|
230
|
+
'response_id': scope._response_id,
|
231
|
+
'input_tokens': scope._input_tokens,
|
232
|
+
'output_tokens': scope._output_tokens,
|
233
|
+
'total_tokens': scope._input_tokens + scope._output_tokens,
|
234
|
+
}
|
235
|
+
extra_attrs = {
|
236
|
+
SemanticConvetion.GEN_AI_REQUEST_IS_STREAM: is_stream,
|
237
|
+
SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE: scope._input_tokens + scope._output_tokens,
|
238
|
+
SemanticConvetion.GEN_AI_USAGE_COST: cost,
|
239
|
+
SemanticConvetion.GEN_AI_SERVER_TBT: scope._tbt,
|
240
|
+
SemanticConvetion.GEN_AI_SERVER_TTFT: scope._ttft,
|
241
|
+
SemanticConvetion.GEN_AI_SDK_VERSION: version,
|
242
|
+
SemanticConvetion.GEN_AI_OUTPUT_TYPE: 'text' if isinstance(scope._llmresponse, str) else 'json'
|
243
|
+
}
|
244
|
+
# Set span attributes.
|
245
|
+
setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
|
246
|
+
scope._server_port, scope._server_address, environment,
|
247
|
+
application_name, extra_attrs)
|
248
|
+
|
249
|
+
# Optionally add events capturing the prompt and completion.
|
250
|
+
if capture_message_content:
|
251
|
+
scope._span.add_event(
|
252
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
253
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt},
|
254
|
+
)
|
255
|
+
scope._span.add_event(
|
256
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
257
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse},
|
258
|
+
)
|
259
|
+
|
260
|
+
# Emit events for each choice and message role.
|
261
|
+
n = scope._kwargs.get('n', 1)
|
262
|
+
emit_common_events(event_provider, scope._choices, scope._finish_reason, scope._llmresponse,
|
263
|
+
formatted_messages, capture_message_content, n)
|
264
|
+
|
265
|
+
scope._span.set_status(Status(StatusCode.OK))
|
266
|
+
|
267
|
+
if not disable_metrics:
|
268
|
+
record_common_metrics(metrics, application_name, environment, request_model,
|
269
|
+
scope._server_address, scope._server_port,
|
270
|
+
scope._start_time, scope._end_time,
|
271
|
+
scope._input_tokens, scope._output_tokens, cost,
|
272
|
+
include_tbt=True, tbt_value=scope._tbt)
|
273
|
+
|
274
|
+
def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
|
275
|
+
event_provider, capture_message_content=False, disable_metrics=False, version=''):
|
276
|
+
"""
|
277
|
+
Process a streaming chat response and generate Telemetry.
|
278
|
+
"""
|
279
|
+
|
280
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
281
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=True)
|
282
|
+
|
283
|
+
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
284
|
+
environment, application_name, metrics, event_provider, start_time,
|
285
|
+
span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
|
286
|
+
"""
|
287
|
+
Process a synchronous chat response and generate Telemetry.
|
288
|
+
"""
|
289
|
+
|
290
|
+
# Create a generic scope object to hold telemetry data.
|
291
|
+
self = type('GenericScope', (), {})()
|
292
|
+
response_dict = response_as_dict(response)
|
293
|
+
|
294
|
+
# pylint: disable = no-member
|
295
|
+
self._start_time = start_time
|
296
|
+
self._end_time = time.time()
|
297
|
+
|
298
|
+
self._span = span
|
299
|
+
# Concatenate content from all choices.
|
300
|
+
self._llmresponse = ''.join(
|
301
|
+
(choice.get('message', {}).get('content') or '')
|
302
|
+
for choice in response_dict.get('choices', [])
|
303
|
+
)
|
304
|
+
self._response_role = response_dict.get('message', {}).get('role', 'assistant')
|
305
|
+
self._input_tokens = response_dict.get('usage', {}).get('prompt_tokens', 0)
|
306
|
+
self._output_tokens = response_dict.get('usage', {}).get('completion_tokens', 0)
|
307
|
+
self._response_id = response_dict.get('id', '')
|
308
|
+
self._response_model = request_model
|
309
|
+
self._finish_reason = response_dict.get('choices', [{}])[0].get('finish_reason')
|
310
|
+
self._timestamps = []
|
311
|
+
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
312
|
+
self._server_address, self._server_port = server_address, server_port
|
313
|
+
self._kwargs = kwargs
|
314
|
+
self._choices = response_dict.get('choices')
|
315
|
+
|
316
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
317
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=False)
|
318
|
+
|
319
|
+
return response
|
320
|
+
|
321
|
+
def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
|
322
|
+
environment, application_name, metrics, event_provider, start_time,
|
323
|
+
span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
|
324
|
+
"""
|
325
|
+
Process a chat response and generate Telemetry.
|
326
|
+
"""
|
327
|
+
end_time = time.time()
|
328
|
+
response_dict = response_as_dict(response)
|
329
|
+
try:
|
330
|
+
# Format input messages into a single prompt string.
|
331
|
+
messages_input = kwargs.get('messages', '')
|
332
|
+
formatted_messages = extract_and_format_input(messages_input)
|
333
|
+
prompt = concatenate_all_contents(formatted_messages)
|
334
|
+
input_tokens = general_tokens(prompt)
|
335
|
+
|
336
|
+
# Create tokens dict and RAG-specific extra attributes.
|
337
|
+
tokens = {'response_id': response_dict.get('id'), 'input_tokens': input_tokens}
|
338
|
+
extra_attrs = {
|
339
|
+
SemanticConvetion.GEN_AI_REQUEST_IS_STREAM: False,
|
340
|
+
SemanticConvetion.GEN_AI_SERVER_TTFT: end_time - start_time,
|
341
|
+
SemanticConvetion.GEN_AI_SDK_VERSION: version,
|
342
|
+
SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get('max_segments', -1),
|
343
|
+
SemanticConvetion.GEN_AI_RAG_STRATEGY: kwargs.get('retrieval_strategy', 'segments'),
|
344
|
+
SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get('retrieval_similarity_threshold', -1),
|
345
|
+
SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get('max_neighbors', -1),
|
346
|
+
SemanticConvetion.GEN_AI_RAG_FILE_IDS: str(kwargs.get('file_ids', '')),
|
347
|
+
SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get('path', '')
|
348
|
+
}
|
349
|
+
# Set common span attributes.
|
350
|
+
setup_common_span_attributes(span, request_model, kwargs, tokens,
|
351
|
+
server_port, server_address, environment, application_name,
|
352
|
+
extra_attrs)
|
353
|
+
|
354
|
+
# Record the prompt event if requested.
|
355
|
+
if capture_message_content:
|
356
|
+
span.add_event(
|
357
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
358
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt},
|
359
|
+
)
|
360
|
+
|
361
|
+
output_tokens = 0
|
362
|
+
choices = response_dict.get('choices', [])
|
363
|
+
# Instead of adding a separate event per choice, we aggregate all completion content.
|
364
|
+
aggregated_completion = []
|
365
|
+
for i in range(kwargs.get('n', 1)):
|
366
|
+
# Get the response content from each choice and count tokens.
|
367
|
+
content = choices[i].get('content', '')
|
368
|
+
aggregated_completion.append(content)
|
369
|
+
output_tokens += general_tokens(content)
|
370
|
+
if kwargs.get('tools'):
|
371
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
372
|
+
str(choices[i].get('message', {}).get('tool_calls')))
|
373
|
+
# Set output type based on actual content type.
|
374
|
+
if isinstance(content, str):
|
375
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE, 'text')
|
376
|
+
elif content is not None:
|
377
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE, 'json')
|
378
|
+
|
379
|
+
# Concatenate completion responses.
|
380
|
+
llmresponse = ''.join(aggregated_completion)
|
381
|
+
tokens['output_tokens'] = output_tokens
|
382
|
+
tokens['total_tokens'] = input_tokens + output_tokens
|
383
|
+
|
384
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
|
386
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
387
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
|
388
|
+
|
389
|
+
span.set_status(Status(StatusCode.OK))
|
390
|
+
# Emit a single aggregated completion event.
|
391
|
+
if capture_message_content:
|
392
|
+
span.add_event(
|
393
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
394
|
+
attributes={SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse},
|
395
|
+
)
|
396
|
+
# Emit the rest of the events (choice and role-based events) as before.
|
397
|
+
n = kwargs.get('n', 1)
|
398
|
+
emit_common_events(event_provider, choices, choices[0].get('finish_reason', ''),
|
399
|
+
llmresponse, formatted_messages, capture_message_content, n)
|
400
|
+
|
401
|
+
if not disable_metrics:
|
402
|
+
record_common_metrics(metrics, application_name, environment, request_model,
|
403
|
+
server_address, server_port, start_time, end_time,
|
404
|
+
input_tokens, output_tokens, cost, include_tbt=False)
|
405
|
+
return response
|
406
|
+
|
407
|
+
except Exception as e:
|
408
|
+
handle_exception(span, e)
|
409
|
+
return response
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of Anthropic Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -9,7 +8,7 @@ from wrapt import wrap_function_wrapper
|
|
9
8
|
from openlit.instrumentation.anthropic.anthropic import messages
|
10
9
|
from openlit.instrumentation.anthropic.async_anthropic import async_messages
|
11
10
|
|
12
|
-
_instruments = (
|
11
|
+
_instruments = ('anthropic >= 0.21.0',)
|
13
12
|
|
14
13
|
class AnthropicInstrumentor(BaseInstrumentor):
|
15
14
|
"""
|
@@ -20,29 +19,30 @@ class AnthropicInstrumentor(BaseInstrumentor):
|
|
20
19
|
return _instruments
|
21
20
|
|
22
21
|
def _instrument(self, **kwargs):
|
23
|
-
application_name = kwargs.get(
|
24
|
-
environment = kwargs.get(
|
25
|
-
tracer = kwargs.get(
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
22
|
+
application_name = kwargs.get('application_name', 'default')
|
23
|
+
environment = kwargs.get('environment', 'default')
|
24
|
+
tracer = kwargs.get('tracer')
|
25
|
+
event_provider = kwargs.get('event_provider')
|
26
|
+
metrics = kwargs.get('metrics_dict')
|
27
|
+
pricing_info = kwargs.get('pricing_info', {})
|
28
|
+
capture_message_content = kwargs.get('capture_message_content', False)
|
29
|
+
disable_metrics = kwargs.get('disable_metrics')
|
30
|
+
version = importlib.metadata.version('anthropic')
|
31
31
|
|
32
32
|
#sync
|
33
33
|
wrap_function_wrapper(
|
34
|
-
|
35
|
-
|
34
|
+
'anthropic.resources.messages',
|
35
|
+
'Messages.create',
|
36
36
|
messages(version, environment, application_name,
|
37
|
-
tracer, pricing_info,
|
37
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
38
38
|
)
|
39
39
|
|
40
40
|
#async
|
41
41
|
wrap_function_wrapper(
|
42
|
-
|
43
|
-
|
42
|
+
'anthropic.resources.messages',
|
43
|
+
'AsyncMessages.create',
|
44
44
|
async_messages(version, environment, application_name,
|
45
|
-
tracer, pricing_info,
|
45
|
+
tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
|
46
46
|
)
|
47
47
|
|
48
48
|
def _uninstrument(self, **kwargs):
|