openlit 1.33.17__py3-none-any.whl → 1.33.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -22
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +48 -489
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +48 -489
- openlit/instrumentation/azure_ai_inference/utils.py +225 -0
- {openlit-1.33.17.dist-info → openlit-1.33.18.dist-info}/METADATA +1 -1
- {openlit-1.33.17.dist-info → openlit-1.33.18.dist-info}/RECORD +8 -7
- {openlit-1.33.17.dist-info → openlit-1.33.18.dist-info}/LICENSE +0 -0
- {openlit-1.33.17.dist-info → openlit-1.33.18.dist-info}/WHEEL +0 -0
@@ -0,0 +1,225 @@
|
|
1
|
+
"""
|
2
|
+
Azure AI Inference OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import Status, StatusCode
|
8
|
+
|
9
|
+
from openlit.__helpers import (
|
10
|
+
calculate_ttft,
|
11
|
+
response_as_dict,
|
12
|
+
calculate_tbt,
|
13
|
+
extract_and_format_input,
|
14
|
+
get_chat_model_cost,
|
15
|
+
create_metrics_attributes,
|
16
|
+
otel_event,
|
17
|
+
concatenate_all_contents
|
18
|
+
)
|
19
|
+
from openlit.semcov import SemanticConvetion
|
20
|
+
|
21
|
+
def process_chunk(self, chunk):
|
22
|
+
"""
|
23
|
+
Process a chunk of response data and update state.
|
24
|
+
"""
|
25
|
+
|
26
|
+
end_time = time.time()
|
27
|
+
# Record the timestamp for the current chunk
|
28
|
+
self._timestamps.append(end_time)
|
29
|
+
|
30
|
+
if len(self._timestamps) == 1:
|
31
|
+
# Calculate time to first chunk
|
32
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
33
|
+
|
34
|
+
chunked = response_as_dict(chunk)
|
35
|
+
|
36
|
+
# Collect message IDs and aggregated response from events
|
37
|
+
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
38
|
+
'content' in chunked.get('choices')[0].get('delta'))):
|
39
|
+
|
40
|
+
if content := chunked.get('choices')[0].get('delta').get('content'):
|
41
|
+
self._llmresponse += content
|
42
|
+
|
43
|
+
if chunked.get('choices')[0].get('finish_reason') is not None:
|
44
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
45
|
+
|
46
|
+
if chunked.get('usage') is not None:
|
47
|
+
self._input_tokens = chunked.get('usage').get('prompt_tokens')
|
48
|
+
self._response_id = chunked.get('id')
|
49
|
+
self._response_model = chunked.get('model')
|
50
|
+
self._output_tokens = chunked.get('usage').get('completion_tokens')
|
51
|
+
|
52
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
53
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream):
|
54
|
+
"""
|
55
|
+
Process chat request and generate Telemetry
|
56
|
+
"""
|
57
|
+
|
58
|
+
scope._end_time = time.time()
|
59
|
+
if len(scope._timestamps) > 1:
|
60
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
61
|
+
|
62
|
+
formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
|
63
|
+
request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
|
64
|
+
|
65
|
+
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
66
|
+
|
67
|
+
# Set Span attributes (OTel Semconv)
|
68
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
69
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
70
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
|
71
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
|
72
|
+
scope._span.set_attribute(SemanticConvetion.SERVER_PORT, scope._server_port)
|
73
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
|
74
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop', []))
|
75
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
|
76
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K, scope._kwargs.get('top_k', 1.0))
|
77
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
|
78
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
79
|
+
scope._kwargs.get("frequency_penalty", 0.0))
|
80
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
81
|
+
scope._kwargs.get("presence_penalty", 0.0))
|
82
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
|
83
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, scope._response_id)
|
84
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, scope._response_model)
|
85
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
|
86
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
|
87
|
+
scope._span.set_attribute(SemanticConvetion.SERVER_ADDRESS, scope._server_address)
|
88
|
+
|
89
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
90
|
+
'text' if isinstance(scope._llmresponse, str) else 'json')
|
91
|
+
|
92
|
+
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
93
|
+
scope._span.set_attribute(SERVICE_NAME, application_name)
|
94
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
95
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
|
96
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
|
97
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT, scope._tbt)
|
98
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT, scope._ttft)
|
99
|
+
scope._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION, version)
|
100
|
+
|
101
|
+
# To be removed one the change to log events (from span events) is complete
|
102
|
+
prompt = concatenate_all_contents(formatted_messages)
|
103
|
+
if capture_message_content:
|
104
|
+
scope._span.add_event(
|
105
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
106
|
+
attributes={
|
107
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
108
|
+
},
|
109
|
+
)
|
110
|
+
scope._span.add_event(
|
111
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
112
|
+
attributes={
|
113
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
114
|
+
},
|
115
|
+
)
|
116
|
+
|
117
|
+
choice_event_body = {
|
118
|
+
'finish_reason': scope._finish_reason,
|
119
|
+
'index': 0,
|
120
|
+
'message': {
|
121
|
+
**({'content': scope._llmresponse} if capture_message_content else {}),
|
122
|
+
'role': 'assistant'
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
# Emit events
|
127
|
+
for role in ['user', 'system', 'assistant', 'tool']:
|
128
|
+
if formatted_messages.get(role, {}).get('content', ''):
|
129
|
+
event = otel_event(
|
130
|
+
name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
|
131
|
+
attributes={
|
132
|
+
SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
|
133
|
+
},
|
134
|
+
body = {
|
135
|
+
# pylint: disable=line-too-long
|
136
|
+
**({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
|
137
|
+
'role': formatted_messages.get(role, {}).get('role', []),
|
138
|
+
**({
|
139
|
+
'tool_calls': {
|
140
|
+
'function': {
|
141
|
+
# pylint: disable=line-too-long
|
142
|
+
'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
|
143
|
+
'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
|
144
|
+
},
|
145
|
+
'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
|
146
|
+
'type': 'function'
|
147
|
+
}
|
148
|
+
} if role == 'assistant' else {}),
|
149
|
+
**({
|
150
|
+
'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
|
151
|
+
} if role == 'tool' else {})
|
152
|
+
}
|
153
|
+
)
|
154
|
+
event_provider.emit(event)
|
155
|
+
|
156
|
+
choice_event = otel_event(
|
157
|
+
name=SemanticConvetion.GEN_AI_CHOICE,
|
158
|
+
attributes={
|
159
|
+
SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
|
160
|
+
},
|
161
|
+
body=choice_event_body
|
162
|
+
)
|
163
|
+
event_provider.emit(choice_event)
|
164
|
+
|
165
|
+
scope._span.set_status(Status(StatusCode.OK))
|
166
|
+
|
167
|
+
if not disable_metrics:
|
168
|
+
metrics_attributes = create_metrics_attributes(
|
169
|
+
service_name=application_name,
|
170
|
+
deployment_environment=environment,
|
171
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
172
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
|
173
|
+
request_model=request_model,
|
174
|
+
server_address=scope._server_address,
|
175
|
+
server_port=scope._server_port,
|
176
|
+
response_model=scope._response_model,
|
177
|
+
)
|
178
|
+
|
179
|
+
metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
|
180
|
+
metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
|
181
|
+
metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
|
182
|
+
metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
|
183
|
+
metrics['genai_requests'].add(1, metrics_attributes)
|
184
|
+
metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
|
185
|
+
metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
|
186
|
+
metrics['genai_cost'].record(cost, metrics_attributes)
|
187
|
+
|
188
|
+
def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
|
189
|
+
event_provider, capture_message_content=False, disable_metrics=False, version=''):
|
190
|
+
"""
|
191
|
+
Process chat request and generate Telemetry
|
192
|
+
"""
|
193
|
+
|
194
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
195
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=True)
|
196
|
+
|
197
|
+
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
198
|
+
environment, application_name, metrics, event_provider, start_time,
|
199
|
+
span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
|
200
|
+
"""
|
201
|
+
Process chat request and generate Telemetry
|
202
|
+
"""
|
203
|
+
|
204
|
+
self = type('GenericScope', (), {})()
|
205
|
+
response_dict = response_as_dict(response)
|
206
|
+
|
207
|
+
# pylint: disable = no-member
|
208
|
+
self._start_time = start_time
|
209
|
+
self._end_time = time.time()
|
210
|
+
self._span = span
|
211
|
+
self._llmresponse = response_dict.get('choices', {})[0].get('message', '').get('content', '')
|
212
|
+
self._input_tokens = response_dict.get('usage').get('prompt_tokens')
|
213
|
+
self._output_tokens = response_dict.get('usage').get('completion_tokens')
|
214
|
+
self._response_model = response_dict.get('model', '')
|
215
|
+
self._finish_reason = response_dict.get('choices', {})[0].get('finish_reason', '')
|
216
|
+
self._response_id = response_dict.get('id', '')
|
217
|
+
self._timestamps = []
|
218
|
+
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
219
|
+
self._server_address, self._server_port = server_address, server_port
|
220
|
+
self._kwargs = kwargs
|
221
|
+
|
222
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
223
|
+
event_provider, capture_message_content, disable_metrics, version, is_stream=False)
|
224
|
+
|
225
|
+
return response
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.33.
|
3
|
+
Version: 1.33.18
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -28,9 +28,10 @@ openlit/instrumentation/astra/__init__.py,sha256=-JG3_YHQQaOQUr4XtFzqfaYiQKqviAA
|
|
28
28
|
openlit/instrumentation/astra/astra.py,sha256=JH2-7RJBbk6nM9kBEVgbxCXXnzgTuGT0KoBhVGBGeIs,1607
|
29
29
|
openlit/instrumentation/astra/async_astra.py,sha256=mMG22exgduREIe-7s2TdqLM1Ub8wP_ttcIS8wJH5P1Y,1625
|
30
30
|
openlit/instrumentation/astra/utils.py,sha256=-Af5R_g8-x9XeQiepLBW3Qa3Beji4EMxppDtiE_nmzM,4933
|
31
|
-
openlit/instrumentation/azure_ai_inference/__init__.py,sha256=
|
32
|
-
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py,sha256=
|
33
|
-
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py,sha256=
|
31
|
+
openlit/instrumentation/azure_ai_inference/__init__.py,sha256=ZoMAX_MUNCNMJqLZgl0A_kQ_lsgoz3VddkHiDT3pVF8,2032
|
32
|
+
openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py,sha256=o5KbVT9UbBZzhdGpw7p1P_7c-IKmy9XVAySZNd1hqAg,5056
|
33
|
+
openlit/instrumentation/azure_ai_inference/azure_ai_inference.py,sha256=WwoAQO5UCeX7bbK8GbtukqqY_s0w0U9Nab_zrU-3T8I,4970
|
34
|
+
openlit/instrumentation/azure_ai_inference/utils.py,sha256=YVzrfumTK29A-igAPxsG_ERggAnLlRnWI5MGRHAXxgI,10997
|
34
35
|
openlit/instrumentation/bedrock/__init__.py,sha256=ZdCBjgwK92cclnbFfF90NC6AsRreom3nT3CklbM7EmM,1555
|
35
36
|
openlit/instrumentation/bedrock/bedrock.py,sha256=jBDzm005Y9rbcTLxo-yShOtRb65NCDFyUYtggi0XRF0,12264
|
36
37
|
openlit/instrumentation/chroma/__init__.py,sha256=4ZeHY1OInRKQbb4qg8BVvGJtWN1XdzW6mosqi7-6ruE,3353
|
@@ -123,7 +124,7 @@ openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
|
|
123
124
|
openlit/otel/metrics.py,sha256=Iwx6baEiCZPNqsFf92K5mDWU8are8DOF0uQAuNZsCKg,6826
|
124
125
|
openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
|
125
126
|
openlit/semcov/__init__.py,sha256=lM0Y3wMYYmCvfcNGD3k0xSn1XZUiGw-bKgCuwcGsOp8,13302
|
126
|
-
openlit-1.33.
|
127
|
-
openlit-1.33.
|
128
|
-
openlit-1.33.
|
129
|
-
openlit-1.33.
|
127
|
+
openlit-1.33.18.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
128
|
+
openlit-1.33.18.dist-info/METADATA,sha256=ILFGRCT5sslnrJoaoeKDGqnbKhjs-nYI4BRM9S3-7-A,23471
|
129
|
+
openlit-1.33.18.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
130
|
+
openlit-1.33.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|