openlit 1.34.5__py3-none-any.whl → 1.34.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/openai/async_openai.py +1 -1
- openlit/instrumentation/premai/__init__.py +0 -1
- openlit/instrumentation/premai/premai.py +84 -454
- openlit/instrumentation/premai/utils.py +325 -0
- openlit/instrumentation/reka/__init__.py +5 -7
- openlit/instrumentation/reka/async_reka.py +25 -163
- openlit/instrumentation/reka/reka.py +24 -162
- openlit/instrumentation/reka/utils.py +193 -0
- openlit/instrumentation/together/utils.py +3 -3
- {openlit-1.34.5.dist-info → openlit-1.34.7.dist-info}/METADATA +1 -1
- {openlit-1.34.5.dist-info → openlit-1.34.7.dist-info}/RECORD +13 -11
- {openlit-1.34.5.dist-info → openlit-1.34.7.dist-info}/LICENSE +0 -0
- {openlit-1.34.5.dist-info → openlit-1.34.7.dist-info}/WHEEL +0 -0
@@ -733,7 +733,7 @@ def async_chat_completions(version, environment, application_name,
|
|
733
733
|
formatted_messages = []
|
734
734
|
for message in message_prompt:
|
735
735
|
role = message["role"]
|
736
|
-
content = message
|
736
|
+
content = message.get("content", "")
|
737
737
|
|
738
738
|
if isinstance(content, list):
|
739
739
|
content_str = ", ".join(
|
@@ -1,72 +1,66 @@
|
|
1
1
|
"""
|
2
|
-
Module for monitoring
|
2
|
+
Module for monitoring PremAI API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
6
|
+
from opentelemetry.trace import SpanKind
|
9
7
|
from openlit.__helpers import (
|
10
|
-
get_chat_model_cost,
|
11
|
-
get_embed_model_cost,
|
12
|
-
general_tokens,
|
13
8
|
handle_exception,
|
14
|
-
calculate_ttft,
|
15
|
-
calculate_tbt,
|
16
|
-
create_metrics_attributes,
|
17
9
|
set_server_address_and_port
|
18
10
|
)
|
11
|
+
from openlit.instrumentation.premai.utils import (
|
12
|
+
process_chat_response,
|
13
|
+
process_chunk,
|
14
|
+
process_streaming_chat_response,
|
15
|
+
process_embedding_response
|
16
|
+
)
|
19
17
|
from openlit.semcov import SemanticConvention
|
20
18
|
|
21
|
-
# Initialize logger for logging potential issues and operations
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
19
|
def chat(version, environment, application_name,
|
25
|
-
|
20
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
26
21
|
"""
|
27
|
-
Generates a telemetry wrapper for
|
28
|
-
|
29
|
-
Args:
|
30
|
-
version: Version of the monitoring package.
|
31
|
-
environment: Deployment environment (e.g., production, staging).
|
32
|
-
application_name: Name of the application using the PremAI API.
|
33
|
-
tracer: OpenTelemetry tracer for creating spans.
|
34
|
-
pricing_info: Information used for calculating the cost of PremAI usage.
|
35
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
36
|
-
|
37
|
-
Returns:
|
38
|
-
A function that wraps the chat completions method to add telemetry.
|
22
|
+
Generates a telemetry wrapper for GenAI function call
|
39
23
|
"""
|
40
24
|
|
41
25
|
class TracedSyncStream:
|
42
26
|
"""
|
43
|
-
Wrapper for streaming responses to collect
|
44
|
-
Wraps the response to collect message IDs and aggregated response.
|
27
|
+
Wrapper for streaming responses to collect telemetry.
|
45
28
|
"""
|
46
29
|
|
47
|
-
def __init__(
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
wrapped,
|
33
|
+
span,
|
34
|
+
span_name,
|
35
|
+
kwargs,
|
36
|
+
server_address,
|
37
|
+
server_port,
|
38
|
+
**args,
|
39
|
+
):
|
48
40
|
self.__wrapped__ = wrapped
|
49
41
|
self._span = span
|
42
|
+
self._span_name = span_name
|
50
43
|
self._llmresponse = ""
|
51
44
|
self._response_id = ""
|
45
|
+
self._response_model = ""
|
46
|
+
self._input_tokens = 0
|
47
|
+
self._output_tokens = 0
|
48
|
+
self._finish_reason = ""
|
49
|
+
self._tools = None
|
52
50
|
self._args = args
|
53
51
|
self._kwargs = kwargs
|
54
|
-
self._server_address = server_address
|
55
|
-
self._server_port = server_port
|
56
52
|
self._start_time = time.time()
|
57
53
|
self._end_time = None
|
58
54
|
self._timestamps = []
|
59
55
|
self._ttft = 0
|
60
56
|
self._tbt = 0
|
61
|
-
self.
|
62
|
-
self.
|
57
|
+
self._server_address = server_address
|
58
|
+
self._server_port = server_port
|
63
59
|
|
64
60
|
def __enter__(self):
|
65
|
-
# Using context management protocols (if needed)
|
66
61
|
return self
|
67
62
|
|
68
63
|
def __exit__(self, exc_type, exc_value, traceback):
|
69
|
-
# Add any resource cleanup or finalization if required.
|
70
64
|
pass
|
71
65
|
|
72
66
|
def __getattr__(self, name):
|
@@ -75,391 +69,86 @@ def chat(version, environment, application_name,
|
|
75
69
|
|
76
70
|
def __iter__(self):
|
77
71
|
try:
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
if len(self._timestamps) == 1:
|
83
|
-
# Calculate time to first chunk
|
84
|
-
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
85
|
-
|
86
|
-
for chunk in self.__wrapped__:
|
87
|
-
# Assuming `chunk` has similar structure as 'ChatCompletionResponseStream'
|
88
|
-
if chunk.choices:
|
89
|
-
first_choice = chunk.choices[0]
|
90
|
-
|
91
|
-
if first_choice.delta.get('content'):
|
92
|
-
self._llmresponse += first_choice.delta.get('content')
|
93
|
-
|
94
|
-
if chunk.choices[0].finish_reason:
|
95
|
-
self._finish_reason = chunk.choices[0].finish_reason
|
96
|
-
self._response_id = chunk.id
|
97
|
-
self._response_model = chunk.model
|
98
|
-
|
99
|
-
if not chunk:
|
100
|
-
# pylint: disable= stop-iteration-return
|
101
|
-
raise StopIteration
|
102
|
-
yield chunk
|
72
|
+
chunk = self.__wrapped__.__next__()
|
73
|
+
process_chunk(self, chunk)
|
74
|
+
return chunk
|
103
75
|
|
104
76
|
finally:
|
105
|
-
# Handling exception ensure observability without disrupting operation
|
106
77
|
try:
|
107
|
-
self.
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
if isinstance(content, list):
|
119
|
-
content_str = ", ".join(
|
120
|
-
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
121
|
-
if "type" in item else f'text: {item["text"]}'
|
122
|
-
for item in content
|
123
|
-
)
|
124
|
-
formatted_messages.append(f"{role}: {content_str}")
|
125
|
-
else:
|
126
|
-
formatted_messages.append(f"{role}: {content}")
|
127
|
-
prompt = "\n".join(formatted_messages)
|
128
|
-
|
129
|
-
request_model = self._kwargs.get("model", "gpt-4o-mini")
|
130
|
-
|
131
|
-
# Calculate tokens using input prompt and aggregated response
|
132
|
-
input_tokens = general_tokens(prompt)
|
133
|
-
output_tokens = general_tokens(self._llmresponse)
|
134
|
-
|
135
|
-
# Calculate cost of the operation
|
136
|
-
cost = get_chat_model_cost(request_model,
|
137
|
-
pricing_info, input_tokens,
|
138
|
-
output_tokens)
|
139
|
-
|
140
|
-
# Set Span attributes (OTel Semconv)
|
141
|
-
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
142
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
143
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
144
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
145
|
-
SemanticConvention.GEN_AI_SYSTEM_PREMAI)
|
146
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
147
|
-
request_model)
|
148
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
149
|
-
self._kwargs.get("seed", ""))
|
150
|
-
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
151
|
-
self._server_port)
|
152
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
153
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
154
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
155
|
-
self._kwargs.get("max_tokens", -1))
|
156
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
157
|
-
self._kwargs.get("presence_penalty", 0.0))
|
158
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
159
|
-
self._kwargs.get("stop", []))
|
160
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
161
|
-
self._kwargs.get("temperature", 1.0))
|
162
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
163
|
-
self._kwargs.get("top_p", 1.0))
|
164
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
165
|
-
[self._finish_reason])
|
166
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
167
|
-
self._response_id)
|
168
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
169
|
-
self._response_model)
|
170
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
171
|
-
input_tokens)
|
172
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
173
|
-
output_tokens)
|
174
|
-
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
175
|
-
self._server_address)
|
176
|
-
if isinstance(self._llmresponse, str):
|
177
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
178
|
-
"text")
|
179
|
-
else:
|
180
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
181
|
-
"json")
|
182
|
-
|
183
|
-
# Set Span attributes (Extra)
|
184
|
-
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
185
|
-
environment)
|
186
|
-
self._span.set_attribute(SERVICE_NAME,
|
187
|
-
application_name)
|
188
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
189
|
-
self._kwargs.get("user", ""))
|
190
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
191
|
-
True)
|
192
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
193
|
-
input_tokens + output_tokens)
|
194
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
195
|
-
cost)
|
196
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
197
|
-
self._tbt)
|
198
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
199
|
-
self._ttft)
|
200
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
201
|
-
version)
|
202
|
-
if capture_message_content:
|
203
|
-
self._span.add_event(
|
204
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
205
|
-
attributes={
|
206
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
207
|
-
},
|
208
|
-
)
|
209
|
-
self._span.add_event(
|
210
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
211
|
-
attributes={
|
212
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
213
|
-
},
|
214
|
-
)
|
215
|
-
self._span.set_status(Status(StatusCode.OK))
|
216
|
-
|
217
|
-
if disable_metrics is False:
|
218
|
-
attributes = create_metrics_attributes(
|
219
|
-
service_name=application_name,
|
220
|
-
deployment_environment=environment,
|
221
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
222
|
-
system=SemanticConvention.GEN_AI_SYSTEM_PREMAI,
|
223
|
-
request_model=request_model,
|
224
|
-
server_address=self._server_address,
|
225
|
-
server_port=self._server_port,
|
226
|
-
response_model=self._response_model,
|
227
|
-
)
|
228
|
-
|
229
|
-
metrics["genai_client_usage_tokens"].record(
|
230
|
-
input_tokens + output_tokens, attributes
|
231
|
-
)
|
232
|
-
metrics["genai_client_operation_duration"].record(
|
233
|
-
self._end_time - self._start_time, attributes
|
234
|
-
)
|
235
|
-
metrics["genai_server_tbt"].record(
|
236
|
-
self._tbt, attributes
|
237
|
-
)
|
238
|
-
metrics["genai_server_ttft"].record(
|
239
|
-
self._ttft, attributes
|
78
|
+
with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
|
79
|
+
process_streaming_chat_response(
|
80
|
+
self,
|
81
|
+
pricing_info=pricing_info,
|
82
|
+
environment=environment,
|
83
|
+
application_name=application_name,
|
84
|
+
metrics=metrics,
|
85
|
+
capture_message_content=capture_message_content,
|
86
|
+
disable_metrics=disable_metrics,
|
87
|
+
version=version
|
240
88
|
)
|
241
|
-
metrics["genai_requests"].add(1, attributes)
|
242
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
243
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
244
|
-
metrics["genai_cost"].record(cost, attributes)
|
245
89
|
|
246
90
|
except Exception as e:
|
247
91
|
handle_exception(self._span, e)
|
248
|
-
logger.error("Error in trace creation: %s", e)
|
249
|
-
finally:
|
250
|
-
self._span.end()
|
251
92
|
|
252
93
|
def wrapper(wrapped, instance, args, kwargs):
|
253
94
|
"""
|
254
|
-
Wraps the
|
255
|
-
|
256
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
257
|
-
gracefully, adding details to the trace for observability.
|
258
|
-
|
259
|
-
Args:
|
260
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
261
|
-
instance: The instance of the class where the original method is defined.
|
262
|
-
args: Positional arguments for the 'chat.completions' method.
|
263
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
264
|
-
|
265
|
-
Returns:
|
266
|
-
The response from the original 'chat.completions' method.
|
95
|
+
Wraps the GenAI function call.
|
267
96
|
"""
|
268
97
|
|
269
98
|
# Check if streaming is enabled for the API call
|
270
99
|
streaming = kwargs.get("stream", False)
|
100
|
+
|
271
101
|
server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
|
272
102
|
request_model = kwargs.get("model", "gpt-4o-mini")
|
273
103
|
|
274
104
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
275
105
|
|
276
|
-
# pylint: disable=no-else-return
|
277
106
|
if streaming:
|
278
107
|
# Special handling for streaming response to accommodate the nature of data flow
|
279
108
|
awaited_wrapped = wrapped(*args, **kwargs)
|
280
109
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
281
|
-
|
282
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
110
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
283
111
|
|
284
112
|
# Handling for non-streaming responses
|
285
113
|
else:
|
286
|
-
with tracer.start_as_current_span(span_name, kind=
|
114
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
287
115
|
start_time = time.time()
|
288
116
|
response = wrapped(*args, **kwargs)
|
289
|
-
end_time = time.time()
|
290
117
|
|
291
118
|
try:
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
prompt = "\n".join(formatted_messages)
|
309
|
-
|
310
|
-
input_tokens = response.usage.prompt_tokens
|
311
|
-
output_tokens = response.usage.completion_tokens
|
312
|
-
|
313
|
-
# Calculate cost of the operation
|
314
|
-
cost = get_chat_model_cost(request_model,
|
315
|
-
pricing_info, input_tokens,
|
316
|
-
output_tokens)
|
317
|
-
|
318
|
-
# Set base span attribues (OTel Semconv)
|
319
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
320
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
321
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
322
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
323
|
-
SemanticConvention.GEN_AI_SYSTEM_PREMAI)
|
324
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
325
|
-
request_model)
|
326
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
327
|
-
server_port)
|
328
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
329
|
-
kwargs.get("frequency_penalty", 0.0))
|
330
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
331
|
-
kwargs.get("max_tokens", -1))
|
332
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
333
|
-
kwargs.get("presence_penalty", 0.0))
|
334
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
335
|
-
kwargs.get("stop", []))
|
336
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
337
|
-
kwargs.get("temperature", 1.0))
|
338
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
339
|
-
kwargs.get("top_p", 1.0))
|
340
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
341
|
-
response.additional_properties.get('id'))
|
342
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
343
|
-
response.model)
|
344
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
345
|
-
input_tokens)
|
346
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
347
|
-
output_tokens)
|
348
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
349
|
-
server_address)
|
350
|
-
|
351
|
-
# Set base span attribues (Extras)
|
352
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
353
|
-
environment)
|
354
|
-
span.set_attribute(SERVICE_NAME,
|
355
|
-
application_name)
|
356
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
357
|
-
False)
|
358
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
359
|
-
input_tokens + output_tokens)
|
360
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
361
|
-
cost)
|
362
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
363
|
-
end_time - start_time)
|
364
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
365
|
-
version)
|
366
|
-
if capture_message_content:
|
367
|
-
span.add_event(
|
368
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
369
|
-
attributes={
|
370
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
371
|
-
},
|
372
|
-
)
|
373
|
-
span.add_event(
|
374
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
375
|
-
attributes={
|
376
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response.choices[0].message.content),
|
377
|
-
},
|
378
|
-
)
|
379
|
-
|
380
|
-
if kwargs.get('tools'):
|
381
|
-
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
382
|
-
str(response.choices[0].message.tool_calls))
|
383
|
-
|
384
|
-
if kwargs.get('response_format', '') != '':
|
385
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
386
|
-
"json")
|
387
|
-
else:
|
388
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
389
|
-
"text")
|
390
|
-
|
391
|
-
span.set_status(Status(StatusCode.OK))
|
392
|
-
|
393
|
-
if disable_metrics is False:
|
394
|
-
attributes = create_metrics_attributes(
|
395
|
-
service_name=application_name,
|
396
|
-
deployment_environment=environment,
|
397
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
398
|
-
system=SemanticConvention.GEN_AI_SYSTEM_PREMAI,
|
399
|
-
request_model=request_model,
|
400
|
-
server_address=server_address,
|
401
|
-
server_port=server_port,
|
402
|
-
response_model=response.model,
|
403
|
-
)
|
404
|
-
|
405
|
-
metrics["genai_client_usage_tokens"].record(
|
406
|
-
input_tokens + output_tokens, attributes
|
407
|
-
)
|
408
|
-
metrics["genai_client_operation_duration"].record(
|
409
|
-
end_time - start_time, attributes
|
410
|
-
)
|
411
|
-
metrics["genai_server_ttft"].record(
|
412
|
-
end_time - start_time, attributes
|
413
|
-
)
|
414
|
-
metrics["genai_requests"].add(1, attributes)
|
415
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
416
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
417
|
-
metrics["genai_cost"].record(cost, attributes)
|
418
|
-
|
419
|
-
# Return original response
|
420
|
-
return response
|
119
|
+
response = process_chat_response(
|
120
|
+
response=response,
|
121
|
+
request_model=request_model,
|
122
|
+
pricing_info=pricing_info,
|
123
|
+
server_port=server_port,
|
124
|
+
server_address=server_address,
|
125
|
+
environment=environment,
|
126
|
+
application_name=application_name,
|
127
|
+
metrics=metrics,
|
128
|
+
start_time=start_time,
|
129
|
+
span=span,
|
130
|
+
capture_message_content=capture_message_content,
|
131
|
+
disable_metrics=disable_metrics,
|
132
|
+
version=version,
|
133
|
+
**kwargs
|
134
|
+
)
|
421
135
|
|
422
136
|
except Exception as e:
|
423
137
|
handle_exception(span, e)
|
424
|
-
logger.error("Error in trace creation: %s", e)
|
425
138
|
|
426
|
-
|
427
|
-
return response
|
139
|
+
return response
|
428
140
|
|
429
141
|
return wrapper
|
430
142
|
|
431
143
|
def embedding(version, environment, application_name,
|
432
|
-
|
144
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
433
145
|
"""
|
434
|
-
Generates a telemetry wrapper for
|
435
|
-
|
436
|
-
Args:
|
437
|
-
version: Version of the monitoring package.
|
438
|
-
environment: Deployment environment (e.g., production, staging).
|
439
|
-
application_name: Name of the application using the PremAI API.
|
440
|
-
tracer: OpenTelemetry tracer for creating spans.
|
441
|
-
pricing_info: Information used for calculating the cost of PremAI usage.
|
442
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
443
|
-
|
444
|
-
Returns:
|
445
|
-
A function that wraps the embeddings method to add telemetry.
|
146
|
+
Generates a telemetry wrapper for GenAI function call
|
446
147
|
"""
|
447
148
|
|
448
149
|
def wrapper(wrapped, instance, args, kwargs):
|
449
150
|
"""
|
450
|
-
Wraps the
|
451
|
-
|
452
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
453
|
-
gracefully, adding details to the trace for observability.
|
454
|
-
|
455
|
-
Args:
|
456
|
-
wrapped: The original 'embeddings' method to be wrapped.
|
457
|
-
instance: The instance of the class where the original method is defined.
|
458
|
-
args: Positional arguments for the 'embeddings' method.
|
459
|
-
kwargs: Keyword arguments for the 'embeddings' method.
|
460
|
-
|
461
|
-
Returns:
|
462
|
-
The response from the original 'embeddings' method.
|
151
|
+
Wraps the GenAI function call.
|
463
152
|
"""
|
464
153
|
|
465
154
|
server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
|
@@ -467,90 +156,31 @@ def embedding(version, environment, application_name,
|
|
467
156
|
|
468
157
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
469
158
|
|
470
|
-
with tracer.start_as_current_span(span_name, kind=
|
159
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
471
160
|
start_time = time.time()
|
472
161
|
response = wrapped(*args, **kwargs)
|
473
|
-
end_time = time.time()
|
474
162
|
|
475
163
|
try:
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
493
|
-
response.model)
|
494
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
495
|
-
server_address)
|
496
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
497
|
-
server_port)
|
498
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
499
|
-
input_tokens)
|
500
|
-
|
501
|
-
# Set Span attributes (Extras)
|
502
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
503
|
-
environment)
|
504
|
-
span.set_attribute(SERVICE_NAME,
|
505
|
-
application_name)
|
506
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
507
|
-
kwargs.get("user", ""))
|
508
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
509
|
-
input_tokens)
|
510
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
511
|
-
cost)
|
512
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
513
|
-
version)
|
514
|
-
|
515
|
-
if capture_message_content:
|
516
|
-
span.add_event(
|
517
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
518
|
-
attributes={
|
519
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
|
520
|
-
},
|
521
|
-
)
|
522
|
-
|
523
|
-
span.set_status(Status(StatusCode.OK))
|
524
|
-
|
525
|
-
if disable_metrics is False:
|
526
|
-
attributes = create_metrics_attributes(
|
527
|
-
service_name=application_name,
|
528
|
-
deployment_environment=environment,
|
529
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
530
|
-
system=SemanticConvention.GEN_AI_SYSTEM_PREMAI,
|
531
|
-
request_model=request_model,
|
532
|
-
server_address=server_address,
|
533
|
-
server_port=server_port,
|
534
|
-
response_model=response.model,
|
535
|
-
)
|
536
|
-
metrics["genai_client_usage_tokens"].record(
|
537
|
-
input_tokens, attributes
|
538
|
-
)
|
539
|
-
metrics["genai_client_operation_duration"].record(
|
540
|
-
end_time - start_time, attributes
|
541
|
-
)
|
542
|
-
metrics["genai_requests"].add(1, attributes)
|
543
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
544
|
-
metrics["genai_cost"].record(cost, attributes)
|
545
|
-
|
546
|
-
# Return original response
|
547
|
-
return response
|
164
|
+
response = process_embedding_response(
|
165
|
+
response=response,
|
166
|
+
request_model=request_model,
|
167
|
+
pricing_info=pricing_info,
|
168
|
+
server_port=server_port,
|
169
|
+
server_address=server_address,
|
170
|
+
environment=environment,
|
171
|
+
application_name=application_name,
|
172
|
+
metrics=metrics,
|
173
|
+
start_time=start_time,
|
174
|
+
span=span,
|
175
|
+
capture_message_content=capture_message_content,
|
176
|
+
disable_metrics=disable_metrics,
|
177
|
+
version=version,
|
178
|
+
**kwargs
|
179
|
+
)
|
548
180
|
|
549
181
|
except Exception as e:
|
550
182
|
handle_exception(span, e)
|
551
|
-
logger.error("Error in trace creation: %s", e)
|
552
183
|
|
553
|
-
|
554
|
-
return response
|
184
|
+
return response
|
555
185
|
|
556
186
|
return wrapper
|