openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +5 -0
- openlit/__init__.py +3 -2
- openlit/instrumentation/ag2/ag2.py +3 -3
- openlit/instrumentation/ai21/ai21.py +1 -1
- openlit/instrumentation/ai21/async_ai21.py +1 -1
- openlit/instrumentation/anthropic/anthropic.py +1 -1
- openlit/instrumentation/anthropic/async_anthropic.py +1 -1
- openlit/instrumentation/astra/astra.py +5 -5
- openlit/instrumentation/astra/async_astra.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
- openlit/instrumentation/chroma/chroma.py +5 -5
- openlit/instrumentation/cohere/async_cohere.py +1 -1
- openlit/instrumentation/cohere/cohere.py +2 -2
- openlit/instrumentation/controlflow/controlflow.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
- openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
- openlit/instrumentation/crewai/crewai.py +4 -2
- openlit/instrumentation/dynamiq/dynamiq.py +3 -3
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
- openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
- openlit/instrumentation/embedchain/embedchain.py +5 -5
- openlit/instrumentation/firecrawl/firecrawl.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +3 -3
- openlit/instrumentation/julep/async_julep.py +3 -3
- openlit/instrumentation/julep/julep.py +3 -3
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +98 -490
- openlit/instrumentation/letta/letta.py +5 -3
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +316 -245
- openlit/instrumentation/litellm/litellm.py +312 -241
- openlit/instrumentation/llamaindex/llamaindex.py +3 -3
- openlit/instrumentation/mem0/mem0.py +3 -3
- openlit/instrumentation/milvus/milvus.py +5 -5
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +421 -248
- openlit/instrumentation/mistral/mistral.py +418 -244
- openlit/instrumentation/multion/async_multion.py +4 -2
- openlit/instrumentation/multion/multion.py +4 -2
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/async_openai.py +7 -9
- openlit/instrumentation/openai/openai.py +7 -9
- openlit/instrumentation/phidata/phidata.py +4 -2
- openlit/instrumentation/pinecone/pinecone.py +5 -5
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +5 -5
- openlit/instrumentation/qdrant/qdrant.py +5 -5
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +75 -44
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +329 -986
- openlit/instrumentation/vertexai/vertexai.py +329 -986
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +62 -32
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -1,30 +1,32 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, too-many-branches
|
2
1
|
"""
|
3
2
|
Module for monitoring Prem AI API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
|
-
handle_exception,
|
11
|
-
general_tokens,
|
12
10
|
get_chat_model_cost,
|
13
11
|
get_embed_model_cost,
|
14
|
-
|
12
|
+
general_tokens,
|
13
|
+
handle_exception,
|
14
|
+
calculate_ttft,
|
15
|
+
calculate_tbt,
|
16
|
+
create_metrics_attributes,
|
17
|
+
set_server_address_and_port
|
15
18
|
)
|
16
19
|
from openlit.semcov import SemanticConvetion
|
17
20
|
|
18
21
|
# Initialize logger for logging potential issues and operations
|
19
22
|
logger = logging.getLogger(__name__)
|
20
23
|
|
21
|
-
def chat(
|
22
|
-
|
24
|
+
def chat(version, environment, application_name,
|
25
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
23
26
|
"""
|
24
27
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
25
28
|
|
26
29
|
Args:
|
27
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
28
30
|
version: Version of the monitoring package.
|
29
31
|
environment: Deployment environment (e.g., production, staging).
|
30
32
|
application_name: Name of the application using the PremAI API.
|
@@ -42,13 +44,22 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
42
44
|
Wraps the response to collect message IDs and aggregated response.
|
43
45
|
"""
|
44
46
|
|
45
|
-
def __init__(self, wrapped, span, kwargs,
|
47
|
+
def __init__(self, wrapped, span, kwargs, server_address, server_port,**args):
|
46
48
|
self.__wrapped__ = wrapped
|
47
49
|
self._span = span
|
48
50
|
self._llmresponse = ""
|
49
51
|
self._response_id = ""
|
50
52
|
self._args = args
|
51
53
|
self._kwargs = kwargs
|
54
|
+
self._server_address = server_address
|
55
|
+
self._server_port = server_port
|
56
|
+
self._start_time = time.time()
|
57
|
+
self._end_time = None
|
58
|
+
self._timestamps = []
|
59
|
+
self._ttft = 0
|
60
|
+
self._tbt = 0
|
61
|
+
self._response_model = ''
|
62
|
+
self._finish_reason = ''
|
52
63
|
|
53
64
|
def __enter__(self):
|
54
65
|
# Using context management protocols (if needed)
|
@@ -64,6 +75,14 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
64
75
|
|
65
76
|
def __iter__(self):
|
66
77
|
try:
|
78
|
+
end_time = time.time()
|
79
|
+
# Record the timestamp for the current chunk
|
80
|
+
self._timestamps.append(end_time)
|
81
|
+
|
82
|
+
if len(self._timestamps) == 1:
|
83
|
+
# Calculate time to first chunk
|
84
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
85
|
+
|
67
86
|
for chunk in self.__wrapped__:
|
68
87
|
# Assuming `chunk` has similar structure as 'ChatCompletionResponseStream'
|
69
88
|
if chunk.choices:
|
@@ -72,7 +91,11 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
72
91
|
if first_choice.delta.get('content'):
|
73
92
|
self._llmresponse += first_choice.delta.get('content')
|
74
93
|
|
75
|
-
|
94
|
+
if chunk.choices[0].finish_reason:
|
95
|
+
self._finish_reason = chunk.choices[0].finish_reason
|
96
|
+
self._response_id = chunk.id
|
97
|
+
self._response_model = chunk.model
|
98
|
+
|
76
99
|
if not chunk:
|
77
100
|
# pylint: disable= stop-iteration-return
|
78
101
|
raise StopIteration
|
@@ -81,6 +104,10 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
81
104
|
finally:
|
82
105
|
# Handling exception ensure observability without disrupting operation
|
83
106
|
try:
|
107
|
+
self._end_time = time.time()
|
108
|
+
if len(self._timestamps) > 1:
|
109
|
+
self._tbt = calculate_tbt(self._timestamps)
|
110
|
+
|
84
111
|
# Format 'messages' into a single string
|
85
112
|
message_prompt = self._kwargs.get("messages", "")
|
86
113
|
formatted_messages = []
|
@@ -90,7 +117,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
90
117
|
|
91
118
|
if isinstance(content, list):
|
92
119
|
content_str = ", ".join(
|
93
|
-
# pylint: disable=line-too-long
|
94
120
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
95
121
|
if "type" in item else f'text: {item["text"]}'
|
96
122
|
for item in content
|
@@ -100,55 +126,79 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
100
126
|
formatted_messages.append(f"{role}: {content}")
|
101
127
|
prompt = "\n".join(formatted_messages)
|
102
128
|
|
129
|
+
request_model = self._kwargs.get("model", "gpt-4o-mini")
|
130
|
+
|
103
131
|
# Calculate tokens using input prompt and aggregated response
|
104
|
-
|
105
|
-
|
132
|
+
input_tokens = general_tokens(prompt)
|
133
|
+
output_tokens = general_tokens(self._llmresponse)
|
106
134
|
|
107
135
|
# Calculate cost of the operation
|
108
|
-
cost = get_chat_model_cost(
|
109
|
-
pricing_info,
|
110
|
-
|
111
|
-
|
112
|
-
# Set Span attributes
|
136
|
+
cost = get_chat_model_cost(request_model,
|
137
|
+
pricing_info, input_tokens,
|
138
|
+
output_tokens)
|
139
|
+
|
140
|
+
# Set Span attributes (OTel Semconv)
|
113
141
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
114
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
115
|
-
SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
|
116
142
|
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
117
143
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
118
|
-
self._span.set_attribute(SemanticConvetion.
|
119
|
-
|
144
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
145
|
+
SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
|
146
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
147
|
+
request_model)
|
148
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
149
|
+
self._kwargs.get("seed", ""))
|
150
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
151
|
+
self._server_port)
|
152
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
153
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
154
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
155
|
+
self._kwargs.get("max_tokens", -1))
|
156
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
157
|
+
self._kwargs.get("presence_penalty", 0.0))
|
158
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
159
|
+
self._kwargs.get("stop", []))
|
160
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
161
|
+
self._kwargs.get("temperature", 1.0))
|
162
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
163
|
+
self._kwargs.get("top_p", 1.0))
|
164
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
165
|
+
[self._finish_reason])
|
120
166
|
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
121
167
|
self._response_id)
|
122
|
-
self._span.set_attribute(SemanticConvetion.
|
168
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
169
|
+
self._response_model)
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
171
|
+
input_tokens)
|
172
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
173
|
+
output_tokens)
|
174
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
175
|
+
self._server_address)
|
176
|
+
if isinstance(self._llmresponse, str):
|
177
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
178
|
+
"text")
|
179
|
+
else:
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
181
|
+
"json")
|
182
|
+
|
183
|
+
# Set Span attributes (Extra)
|
184
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
123
185
|
environment)
|
124
|
-
self._span.set_attribute(
|
186
|
+
self._span.set_attribute(SERVICE_NAME,
|
125
187
|
application_name)
|
126
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
127
|
-
self._kwargs.get("model", "gpt-4o-mini"))
|
128
188
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
129
189
|
self._kwargs.get("user", ""))
|
130
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
131
|
-
self._kwargs.get("top_p", 1.0))
|
132
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
133
|
-
self._kwargs.get("max_tokens", -1))
|
134
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
135
|
-
self._kwargs.get("temperature", 1.0))
|
136
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
137
|
-
self._kwargs.get("presence_penalty", 0.0))
|
138
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
139
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
140
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
141
|
-
self._kwargs.get("seed", ""))
|
142
190
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
143
191
|
True)
|
144
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
145
|
-
prompt_tokens)
|
146
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
147
|
-
completion_tokens)
|
148
192
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
149
|
-
|
193
|
+
input_tokens + output_tokens)
|
150
194
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
151
195
|
cost)
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
197
|
+
self._tbt)
|
198
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
199
|
+
self._ttft)
|
200
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
201
|
+
version)
|
152
202
|
if trace_content:
|
153
203
|
self._span.add_event(
|
154
204
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -162,31 +212,35 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
162
212
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
163
213
|
},
|
164
214
|
)
|
165
|
-
|
166
215
|
self._span.set_status(Status(StatusCode.OK))
|
167
216
|
|
168
217
|
if disable_metrics is False:
|
169
|
-
attributes =
|
170
|
-
|
171
|
-
|
172
|
-
SemanticConvetion.
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
180
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
181
|
-
self._kwargs.get("model", "gpt-3.5-turbo")
|
182
|
-
}
|
218
|
+
attributes = create_metrics_attributes(
|
219
|
+
service_name=application_name,
|
220
|
+
deployment_environment=environment,
|
221
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
222
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
|
223
|
+
request_model=request_model,
|
224
|
+
server_address=self._server_address,
|
225
|
+
server_port=self._server_port,
|
226
|
+
response_model=self._response_model,
|
227
|
+
)
|
183
228
|
|
184
|
-
metrics["
|
185
|
-
|
186
|
-
|
229
|
+
metrics["genai_client_usage_tokens"].record(
|
230
|
+
input_tokens + output_tokens, attributes
|
231
|
+
)
|
232
|
+
metrics["genai_client_operation_duration"].record(
|
233
|
+
self._end_time - self._start_time, attributes
|
234
|
+
)
|
235
|
+
metrics["genai_server_tbt"].record(
|
236
|
+
self._tbt, attributes
|
187
237
|
)
|
188
|
-
metrics["
|
189
|
-
|
238
|
+
metrics["genai_server_ttft"].record(
|
239
|
+
self._ttft, attributes
|
240
|
+
)
|
241
|
+
metrics["genai_requests"].add(1, attributes)
|
242
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
243
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
190
244
|
metrics["genai_cost"].record(cost, attributes)
|
191
245
|
|
192
246
|
except Exception as e:
|
@@ -214,22 +268,25 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
214
268
|
|
215
269
|
# Check if streaming is enabled for the API call
|
216
270
|
streaming = kwargs.get("stream", False)
|
271
|
+
server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
|
272
|
+
request_model = kwargs.get("model", "gpt-4o-mini")
|
273
|
+
|
274
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
217
275
|
|
218
276
|
# pylint: disable=no-else-return
|
219
277
|
if streaming:
|
220
278
|
# Special handling for streaming response to accommodate the nature of data flow
|
221
279
|
awaited_wrapped = wrapped(*args, **kwargs)
|
222
|
-
span = tracer.start_span(
|
280
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
223
281
|
|
224
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs)
|
282
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
225
283
|
|
226
284
|
# Handling for non-streaming responses
|
227
285
|
else:
|
228
|
-
|
229
|
-
|
286
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
287
|
+
start_time = time.time()
|
230
288
|
response = wrapped(*args, **kwargs)
|
231
|
-
|
232
|
-
response_dict = response_as_dict(response)
|
289
|
+
end_time = time.time()
|
233
290
|
|
234
291
|
try:
|
235
292
|
# Format 'messages' into a single string
|
@@ -241,7 +298,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
241
298
|
|
242
299
|
if isinstance(content, list):
|
243
300
|
content_str = ", ".join(
|
244
|
-
# pylint: disable=line-too-long
|
245
301
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
246
302
|
if "type" in item else f'text: {item["text"]}'
|
247
303
|
for item in content
|
@@ -251,38 +307,62 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
251
307
|
formatted_messages.append(f"{role}: {content}")
|
252
308
|
prompt = "\n".join(formatted_messages)
|
253
309
|
|
254
|
-
|
310
|
+
input_tokens = response.usage.prompt_tokens
|
311
|
+
output_tokens = response.usage.completion_tokens
|
312
|
+
|
313
|
+
# Calculate cost of the operation
|
314
|
+
cost = get_chat_model_cost(request_model,
|
315
|
+
pricing_info, input_tokens,
|
316
|
+
output_tokens)
|
317
|
+
|
318
|
+
# Set base span attribues (OTel Semconv)
|
255
319
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
256
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
257
|
-
SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
|
258
320
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
259
321
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
260
|
-
span.set_attribute(SemanticConvetion.
|
261
|
-
|
262
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
263
|
-
response_dict.additional_properties["id"])
|
264
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
265
|
-
environment)
|
266
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
267
|
-
application_name)
|
322
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
323
|
+
SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
|
268
324
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
269
|
-
|
270
|
-
span.set_attribute(SemanticConvetion.
|
271
|
-
|
325
|
+
request_model)
|
326
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
327
|
+
server_port)
|
328
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
329
|
+
kwargs.get("frequency_penalty", 0.0))
|
272
330
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
273
331
|
kwargs.get("max_tokens", -1))
|
274
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
275
|
-
kwargs.get("user", ""))
|
276
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
277
|
-
kwargs.get("temperature", 1.0))
|
278
332
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
279
333
|
kwargs.get("presence_penalty", 0.0))
|
280
|
-
span.set_attribute(SemanticConvetion.
|
281
|
-
kwargs.get("
|
282
|
-
span.set_attribute(SemanticConvetion.
|
283
|
-
kwargs.get("
|
334
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
335
|
+
kwargs.get("stop", []))
|
336
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
337
|
+
kwargs.get("temperature", 1.0))
|
338
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
339
|
+
kwargs.get("top_p", 1.0))
|
340
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
341
|
+
response.additional_properties.get('id'))
|
342
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
343
|
+
response.model)
|
344
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
345
|
+
input_tokens)
|
346
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
347
|
+
output_tokens)
|
348
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
349
|
+
server_address)
|
350
|
+
|
351
|
+
# Set base span attribues (Extras)
|
352
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
353
|
+
environment)
|
354
|
+
span.set_attribute(SERVICE_NAME,
|
355
|
+
application_name)
|
284
356
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
285
357
|
False)
|
358
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
359
|
+
input_tokens + output_tokens)
|
360
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
361
|
+
cost)
|
362
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
363
|
+
end_time - start_time)
|
364
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
365
|
+
version)
|
286
366
|
if trace_content:
|
287
367
|
span.add_event(
|
288
368
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -290,94 +370,50 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
290
370
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
291
371
|
},
|
292
372
|
)
|
293
|
-
|
294
|
-
# Set span attributes when tools is not passed to the function call
|
295
|
-
if "tools" not in kwargs:
|
296
|
-
# Calculate cost of the operation
|
297
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-4o-mini"),
|
298
|
-
pricing_info, response_dict.usage.prompt_tokens,
|
299
|
-
response_dict.usage.completion_tokens)
|
300
|
-
|
301
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
302
|
-
response_dict.usage.prompt_tokens)
|
303
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
304
|
-
response_dict.usage.completion_tokens)
|
305
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
306
|
-
response_dict.usage.total_tokens)
|
307
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
308
|
-
[response_dict.choices[0].finish_reason])
|
309
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
310
|
-
cost)
|
311
|
-
|
312
|
-
# Set span attributes for when n = 1 (default)
|
313
|
-
if "n" not in kwargs or kwargs["n"] == 1:
|
314
|
-
if trace_content:
|
315
|
-
span.add_event(
|
316
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
317
|
-
attributes={
|
318
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.choices[0].message.content,
|
319
|
-
},
|
320
|
-
)
|
321
|
-
|
322
|
-
# Set span attributes for when n > 0
|
323
|
-
else:
|
324
|
-
i = 0
|
325
|
-
while i < kwargs["n"] and trace_content is True:
|
326
|
-
attribute_name = f"gen_ai.content.completion.{i}"
|
327
|
-
span.add_event(
|
328
|
-
name=attribute_name,
|
329
|
-
attributes={
|
330
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.choices[i].message.content,
|
331
|
-
},
|
332
|
-
)
|
333
|
-
i += 1
|
334
|
-
|
335
|
-
# Return original response
|
336
|
-
return response
|
337
|
-
|
338
|
-
# Set span attributes when tools is passed to the function call
|
339
|
-
elif "tools" in kwargs:
|
340
|
-
# Calculate cost of the operation
|
341
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
342
|
-
pricing_info, response_dict.usage.prompt_tokens,
|
343
|
-
response_dict.usage.completion_tokens)
|
344
373
|
span.add_event(
|
345
374
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
346
375
|
attributes={
|
347
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION:
|
376
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response.choices[0].message.content),
|
348
377
|
},
|
349
378
|
)
|
350
|
-
|
351
|
-
|
352
|
-
span.set_attribute(SemanticConvetion.
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
span.set_attribute(SemanticConvetion.
|
357
|
-
|
379
|
+
|
380
|
+
if kwargs.get('tools'):
|
381
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
382
|
+
str(response.choices[0].message.tool_calls))
|
383
|
+
|
384
|
+
if kwargs.get('response_format', '') != '':
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
386
|
+
"json")
|
387
|
+
else:
|
388
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
389
|
+
"text")
|
358
390
|
|
359
391
|
span.set_status(Status(StatusCode.OK))
|
360
392
|
|
361
393
|
if disable_metrics is False:
|
362
|
-
attributes =
|
363
|
-
|
364
|
-
|
365
|
-
SemanticConvetion.
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
373
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
374
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
375
|
-
}
|
394
|
+
attributes = create_metrics_attributes(
|
395
|
+
service_name=application_name,
|
396
|
+
deployment_environment=environment,
|
397
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
398
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
|
399
|
+
request_model=request_model,
|
400
|
+
server_address=server_address,
|
401
|
+
server_port=server_port,
|
402
|
+
response_model=response.model,
|
403
|
+
)
|
376
404
|
|
405
|
+
metrics["genai_client_usage_tokens"].record(
|
406
|
+
input_tokens + output_tokens, attributes
|
407
|
+
)
|
408
|
+
metrics["genai_client_operation_duration"].record(
|
409
|
+
end_time - start_time, attributes
|
410
|
+
)
|
411
|
+
metrics["genai_server_ttft"].record(
|
412
|
+
end_time - start_time, attributes
|
413
|
+
)
|
377
414
|
metrics["genai_requests"].add(1, attributes)
|
378
|
-
metrics["
|
379
|
-
metrics["
|
380
|
-
metrics["genai_prompt_tokens"].add(response_dict.usage.prompt_tokens, attributes)
|
415
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
416
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
381
417
|
metrics["genai_cost"].record(cost, attributes)
|
382
418
|
|
383
419
|
# Return original response
|
@@ -392,18 +428,17 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
392
428
|
|
393
429
|
return wrapper
|
394
430
|
|
395
|
-
def embedding(
|
431
|
+
def embedding(version, environment, application_name,
|
396
432
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
397
433
|
"""
|
398
434
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
399
435
|
|
400
436
|
Args:
|
401
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
402
437
|
version: Version of the monitoring package.
|
403
438
|
environment: Deployment environment (e.g., production, staging).
|
404
|
-
application_name: Name of the application using the
|
439
|
+
application_name: Name of the application using the PremAI API.
|
405
440
|
tracer: OpenTelemetry tracer for creating spans.
|
406
|
-
pricing_info: Information used for calculating the cost of
|
441
|
+
pricing_info: Information used for calculating the cost of PremAI usage.
|
407
442
|
trace_content: Flag indicating whether to trace the actual content.
|
408
443
|
|
409
444
|
Returns:
|
@@ -427,71 +462,85 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
427
462
|
The response from the original 'embeddings' method.
|
428
463
|
"""
|
429
464
|
|
430
|
-
|
465
|
+
server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
|
466
|
+
request_model = kwargs.get("model", "text-embedding-ada-002")
|
467
|
+
|
468
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
469
|
+
|
470
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
471
|
+
start_time = time.time()
|
431
472
|
response = wrapped(*args, **kwargs)
|
432
|
-
|
473
|
+
end_time = time.time()
|
474
|
+
|
433
475
|
try:
|
476
|
+
input_tokens = response.usage.prompt_tokens
|
477
|
+
|
434
478
|
# Calculate cost of the operation
|
435
|
-
cost = get_embed_model_cost(
|
436
|
-
pricing_info,
|
479
|
+
cost = get_embed_model_cost(request_model,
|
480
|
+
pricing_info, input_tokens)
|
437
481
|
|
438
|
-
# Set Span attributes
|
482
|
+
# Set Span attributes (OTel Semconv)
|
439
483
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
440
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
441
|
-
SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
|
442
484
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
443
485
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
444
|
-
span.set_attribute(SemanticConvetion.
|
445
|
-
|
446
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
447
|
-
environment)
|
448
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
449
|
-
application_name)
|
486
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
487
|
+
SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
|
450
488
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
451
|
-
|
489
|
+
request_model)
|
452
490
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
453
|
-
kwargs.get(
|
454
|
-
|
455
|
-
|
491
|
+
[kwargs.get('encoding_format', 'float')])
|
492
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
493
|
+
response.model)
|
494
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
495
|
+
server_address)
|
496
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
497
|
+
server_port)
|
498
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
499
|
+
input_tokens)
|
500
|
+
|
501
|
+
# Set Span attributes (Extras)
|
502
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
503
|
+
environment)
|
504
|
+
span.set_attribute(SERVICE_NAME,
|
505
|
+
application_name)
|
456
506
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
457
507
|
kwargs.get("user", ""))
|
458
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
459
|
-
response_dict.usage.prompt_tokens)
|
460
508
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
461
|
-
|
509
|
+
input_tokens)
|
462
510
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
463
511
|
cost)
|
512
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
513
|
+
version)
|
514
|
+
|
464
515
|
if trace_content:
|
465
516
|
span.add_event(
|
466
517
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
467
518
|
attributes={
|
468
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
|
519
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
|
469
520
|
},
|
470
521
|
)
|
471
522
|
|
472
523
|
span.set_status(Status(StatusCode.OK))
|
473
524
|
|
474
525
|
if disable_metrics is False:
|
475
|
-
attributes =
|
476
|
-
|
477
|
-
|
478
|
-
SemanticConvetion.
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
526
|
+
attributes = create_metrics_attributes(
|
527
|
+
service_name=application_name,
|
528
|
+
deployment_environment=environment,
|
529
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
530
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
|
531
|
+
request_model=request_model,
|
532
|
+
server_address=server_address,
|
533
|
+
server_port=server_port,
|
534
|
+
response_model=response.model,
|
535
|
+
)
|
536
|
+
metrics["genai_client_usage_tokens"].record(
|
537
|
+
input_tokens, attributes
|
538
|
+
)
|
539
|
+
metrics["genai_client_operation_duration"].record(
|
540
|
+
end_time - start_time, attributes
|
541
|
+
)
|
490
542
|
metrics["genai_requests"].add(1, attributes)
|
491
|
-
metrics["
|
492
|
-
response_dict.usage.total_tokens, attributes)
|
493
|
-
metrics["genai_prompt_tokens"].add(
|
494
|
-
response_dict.usageprompt_tokens, attributes)
|
543
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
495
544
|
metrics["genai_cost"].record(cost, attributes)
|
496
545
|
|
497
546
|
# Return original response
|