openlit 1.33.16__py3-none-any.whl → 1.33.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,11 @@ from typing import Collection
4
4
  import importlib.metadata
5
5
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
6
  from wrapt import wrap_function_wrapper
7
-
8
7
  from openlit.instrumentation.azure_ai_inference.azure_ai_inference import (
9
- complete, embedding
8
+ complete
10
9
  )
11
-
12
10
  from openlit.instrumentation.azure_ai_inference.async_azure_ai_inference import (
13
- async_complete, async_embedding
11
+ async_complete
14
12
  )
15
13
 
16
14
  _instruments = ('azure-ai-inference >= 1.0.0b4',)
@@ -27,6 +25,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
27
25
  application_name = kwargs.get('application_name', 'default')
28
26
  environment = kwargs.get('environment', 'default')
29
27
  tracer = kwargs.get('tracer')
28
+ event_provider = kwargs.get('event_provider')
30
29
  metrics = kwargs.get('metrics_dict')
31
30
  pricing_info = kwargs.get('pricing_info', {})
32
31
  capture_message_content = kwargs.get('capture_message_content', False)
@@ -38,15 +37,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
38
37
  'azure.ai.inference',
39
38
  'ChatCompletionsClient.complete',
40
39
  complete(version, environment, application_name,
41
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
42
- )
43
-
44
- # sync embedding
45
- wrap_function_wrapper(
46
- 'azure.ai.inference',
47
- 'EmbeddingsClient.embed',
48
- embedding(version, environment, application_name,
49
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
40
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
50
41
  )
51
42
 
52
43
  # async generate
@@ -54,15 +45,7 @@ class AzureAIInferenceInstrumentor(BaseInstrumentor):
54
45
  'azure.ai.inference.aio',
55
46
  'ChatCompletionsClient.complete',
56
47
  async_complete(version, environment, application_name,
57
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
58
- )
59
-
60
- # async embedding
61
- wrap_function_wrapper(
62
- 'azure.ai.inference.aio',
63
- 'EmbeddingsClient.embed',
64
- async_embedding(version, environment, application_name,
65
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
48
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
66
49
  )
67
50
 
68
51
  def _uninstrument(self, **kwargs):
@@ -4,18 +4,15 @@ Module for monitoring Azure AI Inference API calls.
4
4
 
5
5
  import logging
6
6
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
- get_embed_model_cost,
12
9
  handle_exception,
13
- response_as_dict,
14
- calculate_ttft,
15
- calculate_tbt,
16
- create_metrics_attributes,
17
10
  set_server_address_and_port,
18
- general_tokens
11
+ )
12
+ from openlit.instrumentation.azure_ai_inference.utils import (
13
+ process_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
19
16
  )
20
17
  from openlit.semcov import SemanticConvetion
21
18
 
@@ -23,37 +20,21 @@ from openlit.semcov import SemanticConvetion
23
20
  logger = logging.getLogger(__name__)
24
21
 
25
22
  def async_complete(version, environment, application_name,
26
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
23
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
27
24
  """
28
- Generates a telemetry wrapper for chat to collect metrics.
29
-
30
- Args:
31
- version: Version of the monitoring package.
32
- environment: Deployment environment (e.g., production, staging).
33
- application_name: Name of the application using the Azure AI Inference API.
34
- tracer: OpenTelemetry tracer for creating spans.
35
- pricing_info: Information used for calculating the cost of Azure AI Inference usage.
36
- capture_message_content: Flag indicating whether to trace the actual content.
37
-
38
- Returns:
39
- A function that wraps the chat method to add telemetry.
25
+ Generates a telemetry wrapper for GenAI function call
40
26
  """
41
27
 
42
28
  class TracedAsyncStream:
43
29
  """
44
- Wrapper for streaming responses to collect metrics and trace data.
45
- Wraps the response to collect message IDs and aggregated response.
46
-
47
- This class implements the '__aiter__' and '__anext__' methods that
48
- handle asynchronous streaming responses.
49
-
50
- This class also implements '__aenter__' and '__aexit__' methods that
51
- handle asynchronous context management protocol.
30
+ Wrapper for streaming responses to collect telemetry.
52
31
  """
32
+
53
33
  def __init__(
54
34
  self,
55
35
  wrapped,
56
36
  span,
37
+ span_name,
57
38
  kwargs,
58
39
  server_address,
59
40
  server_port,
@@ -61,12 +42,13 @@ def async_complete(version, environment, application_name,
61
42
  ):
62
43
  self.__wrapped__ = wrapped
63
44
  self._span = span
64
- # Placeholder for aggregating streaming response
45
+ self._span_name = span_name
65
46
  self._llmresponse = ""
66
47
  self._response_id = ""
67
48
  self._response_model = ""
68
49
  self._finish_reason = ""
69
- self._system_fingerprint = ""
50
+ self._input_tokens = 0
51
+ self._output_tokens = 0
70
52
 
71
53
  self._args = args
72
54
  self._kwargs = kwargs
@@ -95,197 +77,33 @@ def async_complete(version, environment, application_name,
95
77
  async def __anext__(self):
96
78
  try:
97
79
  chunk = await self.__wrapped__.__anext__()
98
- end_time = time.time()
99
- # Record the timestamp for the current chunk
100
- self._timestamps.append(end_time)
101
-
102
- if len(self._timestamps) == 1:
103
- # Calculate time to first chunk
104
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
105
-
106
- chunked = response_as_dict(chunk)
107
- # Collect message IDs and aggregated response from events
108
- if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
109
- 'content' in chunked.get('choices')[0].get('delta'))):
110
-
111
- content = chunked.get('choices')[0].get('delta').get('content')
112
- if content:
113
- self._llmresponse += content
114
- self._response_id = chunked.get('id')
115
- self._response_model = chunked.get('model')
116
- self._finish_reason = chunked.get('choices')[0].get('finish_reason')
117
- self._system_fingerprint = chunked.get('system_fingerprint')
80
+ process_chunk(self, chunk)
118
81
  return chunk
119
82
  except StopAsyncIteration:
120
- # Handling exception ensure observability without disrupting operation
121
83
  try:
122
- self._end_time = time.time()
123
- if len(self._timestamps) > 1:
124
- self._tbt = calculate_tbt(self._timestamps)
125
-
126
- # Format 'messages' into a single string
127
- message_prompt = self._kwargs.get("messages", "")
128
- formatted_messages = []
129
- for message in message_prompt:
130
- role = message["role"]
131
- content = message["content"]
132
-
133
- if isinstance(content, list):
134
- content_str_list = []
135
- for item in content:
136
- if item["type"] == "text":
137
- content_str_list.append(f'text: {item["text"]}')
138
- elif (item["type"] == "image_url" and
139
- not item["image_url"]["url"].startswith("data:")):
140
- content_str_list.append(f'image_url: {item["image_url"]["url"]}')
141
- content_str = ", ".join(content_str_list)
142
- formatted_messages.append(f"{role}: {content_str}")
143
- else:
144
- formatted_messages.append(f"{role}: {content}")
145
- prompt = "\n".join(formatted_messages)
146
-
147
- request_model = self._kwargs.get("model", "gpt-4o")
148
-
149
- # Calculate tokens using input prompt and aggregated response
150
- input_tokens = general_tokens(prompt)
151
- output_tokens = general_tokens(self._llmresponse)
152
-
153
- # Calculate cost of the operation
154
- cost = get_chat_model_cost(request_model,
155
- pricing_info, input_tokens,
156
- output_tokens)
157
-
158
- # Set Span attributes (OTel Semconv)
159
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
160
- self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
161
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
162
- self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
163
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
164
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
165
- request_model)
166
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
167
- self._kwargs.get("seed", ""))
168
- self._span.set_attribute(SemanticConvetion.SERVER_PORT,
169
- self._server_port)
170
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
171
- self._kwargs.get("frequency_penalty", 0.0))
172
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
173
- self._kwargs.get("max_tokens", -1))
174
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
175
- self._kwargs.get("presence_penalty", 0.0))
176
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
177
- self._kwargs.get("stop", []))
178
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
179
- self._kwargs.get("temperature", 1.0))
180
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
181
- self._kwargs.get("top_p", 1.0))
182
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
183
- [self._finish_reason])
184
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
185
- self._response_id)
186
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
187
- self._response_model)
188
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
189
- input_tokens)
190
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
191
- output_tokens)
192
- self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
193
- self._server_address)
194
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
195
- self._system_fingerprint)
196
- if isinstance(self._llmresponse, str):
197
- self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
198
- "text")
199
- else:
200
- self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
201
- "json")
202
-
203
- # Set Span attributes (Extra)
204
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
205
- environment)
206
- self._span.set_attribute(SERVICE_NAME,
207
- application_name)
208
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
209
- True)
210
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
211
- input_tokens + output_tokens)
212
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
213
- cost)
214
- self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
215
- self._tbt)
216
- self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
217
- self._ttft)
218
- self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
219
- version)
220
- if capture_message_content:
221
- self._span.add_event(
222
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
223
- attributes={
224
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
225
- },
226
- )
227
- self._span.add_event(
228
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
229
- attributes={
230
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
231
- },
84
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
85
+ process_streaming_chat_response(
86
+ self,
87
+ pricing_info=pricing_info,
88
+ environment=environment,
89
+ application_name=application_name,
90
+ metrics=metrics,
91
+ event_provider=event_provider,
92
+ capture_message_content=capture_message_content,
93
+ disable_metrics=disable_metrics,
94
+ version=version
232
95
  )
233
- self._span.set_status(Status(StatusCode.OK))
234
-
235
- if disable_metrics is False:
236
- attributes = create_metrics_attributes(
237
- service_name=application_name,
238
- deployment_environment=environment,
239
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
240
- system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
241
- request_model=request_model,
242
- server_address=self._server_address,
243
- server_port=self._server_port,
244
- response_model=self._response_model,
245
- )
246
-
247
- metrics["genai_client_usage_tokens"].record(
248
- input_tokens + output_tokens, attributes
249
- )
250
- metrics["genai_client_operation_duration"].record(
251
- self._end_time - self._start_time, attributes
252
- )
253
- metrics["genai_server_tbt"].record(
254
- self._tbt, attributes
255
- )
256
- metrics["genai_server_ttft"].record(
257
- self._ttft, attributes
258
- )
259
- metrics["genai_requests"].add(1, attributes)
260
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
261
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
262
- metrics["genai_cost"].record(cost, attributes)
263
96
 
264
97
  except Exception as e:
265
98
  handle_exception(self._span, e)
266
99
  logger.error("Error in trace creation: %s", e)
267
- finally:
268
- self._span.end()
269
100
  raise
270
101
 
271
102
  async def wrapper(wrapped, instance, args, kwargs):
272
103
  """
273
- Wraps the 'chat.completions' API call to add telemetry.
274
-
275
- This collects metrics such as execution time, cost, and token usage, and handles errors
276
- gracefully, adding details to the trace for observability.
277
-
278
- Args:
279
- wrapped: The original 'chat.completions' method to be wrapped.
280
- instance: The instance of the class where the original method is defined.
281
- args: Positional arguments for the 'chat.completions' method.
282
- kwargs: Keyword arguments for the 'chat.completions' method.
283
-
284
- Returns:
285
- The response from the original 'chat.completions' method.
104
+ Wraps the GenAI function call.
286
105
  """
287
106
 
288
- # Check if streaming is enabled for the API call
289
107
  streaming = kwargs.get("stream", False)
290
108
  server_address, server_port = set_server_address_and_port(instance, "models.github.ai", 443)
291
109
  request_model = kwargs.get("model", "gpt-4o")
@@ -294,292 +112,33 @@ def async_complete(version, environment, application_name,
294
112
 
295
113
  # pylint: disable=no-else-return
296
114
  if streaming:
297
- # Special handling for streaming response to accommodate the nature of data flow
298
115
  awaited_wrapped = await wrapped(*args, **kwargs)
299
116
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
300
117
 
301
- return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
118
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
302
119
 
303
- # Handling for non-streaming responses
304
120
  else:
305
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
121
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
306
122
  start_time = time.time()
307
123
  response = await wrapped(*args, **kwargs)
308
- end_time = time.time()
309
-
310
- response_dict = response_as_dict(response)
311
-
312
- try:
313
- # Format 'messages' into a single string
314
- message_prompt = kwargs.get("messages", "")
315
- formatted_messages = []
316
- for message in message_prompt:
317
- role = message["role"]
318
- content = message["content"]
319
-
320
- if isinstance(content, list):
321
- content_str = ", ".join(
322
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
323
- if "type" in item else f'text: {item["text"]}'
324
- for item in content
325
- )
326
- formatted_messages.append(f"{role}: {content_str}")
327
- else:
328
- formatted_messages.append(f"{role}: {content}")
329
- prompt = "\n".join(formatted_messages)
330
-
331
- input_tokens = response_dict.get('usage').get('prompt_tokens')
332
- output_tokens = response_dict.get('usage').get('completion_tokens')
333
-
334
- # Calculate cost of the operation
335
- cost = get_chat_model_cost(request_model,
336
- pricing_info, input_tokens,
337
- output_tokens)
338
-
339
- # Set base span attribues (OTel Semconv)
340
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
341
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
342
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
343
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
344
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
345
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
346
- request_model)
347
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
348
- kwargs.get("seed", ""))
349
- span.set_attribute(SemanticConvetion.SERVER_PORT,
350
- server_port)
351
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
352
- kwargs.get("frequency_penalty", 0.0))
353
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
354
- kwargs.get("max_tokens", -1))
355
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
356
- kwargs.get("presence_penalty", 0.0))
357
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
358
- kwargs.get("stop", []))
359
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
360
- kwargs.get("temperature", 1.0))
361
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
362
- kwargs.get("top_p", 1.0))
363
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
364
- response_dict.get("id"))
365
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
366
- response_dict.get('model'))
367
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
368
- input_tokens)
369
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
370
- output_tokens)
371
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
372
- server_address)
373
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
374
- response_dict.get('system_fingerprint'))
375
-
376
- # Set base span attribues (Extras)
377
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
378
- environment)
379
- span.set_attribute(SERVICE_NAME,
380
- application_name)
381
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
382
- False)
383
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
384
- input_tokens + output_tokens)
385
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
386
- cost)
387
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
388
- end_time - start_time)
389
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
390
- version)
391
- if capture_message_content:
392
- span.add_event(
393
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
394
- attributes={
395
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
396
- },
397
- )
398
-
399
- for i in range(kwargs.get('n',1)):
400
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
401
- [response_dict.get('choices')[i].get('finish_reason')])
402
- if capture_message_content:
403
- span.add_event(
404
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
405
- attributes={
406
- # pylint: disable=line-too-long
407
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
408
- },
409
- )
410
- if kwargs.get('tools'):
411
- span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
412
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
413
-
414
- if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
415
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
416
- "text")
417
- elif response_dict.get('choices')[i].get('message').get('content') is not None:
418
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
419
- "json")
420
-
421
- span.set_status(Status(StatusCode.OK))
422
-
423
- if disable_metrics is False:
424
- attributes = create_metrics_attributes(
425
- service_name=application_name,
426
- deployment_environment=environment,
427
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
428
- system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
429
- request_model=request_model,
430
- server_address=server_address,
431
- server_port=server_port,
432
- response_model=response_dict.get('model'),
433
- )
434
-
435
- metrics["genai_client_usage_tokens"].record(
436
- input_tokens + output_tokens, attributes
437
- )
438
- metrics["genai_client_operation_duration"].record(
439
- end_time - start_time, attributes
440
- )
441
- metrics["genai_server_ttft"].record(
442
- end_time - start_time, attributes
443
- )
444
- metrics["genai_requests"].add(1, attributes)
445
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
446
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
447
- metrics["genai_cost"].record(cost, attributes)
448
-
449
- # Return original response
450
- return response
451
-
452
- except Exception as e:
453
- handle_exception(span, e)
454
- logger.error("Error in trace creation: %s", e)
455
-
456
- # Return original response
457
- return response
458
-
459
- return wrapper
460
-
461
- def async_embedding(version, environment, application_name,
462
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
463
- """
464
- Generates a telemetry wrapper for embeddings to collect metrics.
465
-
466
- Args:
467
- version: Version of the monitoring package.
468
- environment: Deployment environment (e.g., production, staging).
469
- application_name: Name of the application using the Azure Inference API.
470
- tracer: OpenTelemetry tracer for creating spans.
471
- pricing_info: Information used for calculating the cost of Azure Inference usage.
472
- capture_message_content: Flag indicating whether to trace the actual content.
473
-
474
- Returns:
475
- A function that wraps the embeddings method to add telemetry.
476
- """
477
-
478
- async def wrapper(wrapped, instance, args, kwargs):
479
- """
480
- Wraps the 'embeddings' API call to add telemetry.
481
-
482
- This collects metrics such as execution time, cost, and token usage, and handles errors
483
- gracefully, adding details to the trace for observability.
484
-
485
- Args:
486
- wrapped: The original 'embeddings' method to be wrapped.
487
- instance: The instance of the class where the original method is defined.
488
- args: Positional arguments for the 'embeddings' method.
489
- kwargs: Keyword arguments for the 'embeddings' method.
490
-
491
- Returns:
492
- The response from the original 'embeddings' method.
493
- """
494
-
495
- server_address, server_port = set_server_address_and_port(instance, "models.github.ai", 443)
496
- request_model = kwargs.get("model", "text-embedding-ada-002")
497
-
498
- span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
499
-
500
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
501
- start_time = time.time()
502
- response = await wrapped(*args, **kwargs)
503
- end_time = time.time()
504
-
505
- response_dict = response_as_dict(response)
506
- try:
507
- input_tokens = response_dict.get('usage').get('prompt_tokens')
508
-
509
- # Calculate cost of the operation
510
- cost = get_embed_model_cost(request_model,
511
- pricing_info, input_tokens)
512
-
513
- # Set Span attributes (OTel Semconv)
514
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
515
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
516
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
517
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
518
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
519
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
520
- request_model)
521
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
522
- [kwargs.get('encoding_format', 'float')])
523
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
524
- request_model)
525
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
526
- server_address)
527
- span.set_attribute(SemanticConvetion.SERVER_PORT,
528
- server_port)
529
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
530
- input_tokens)
531
-
532
- # Set Span attributes (Extras)
533
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
534
- environment)
535
- span.set_attribute(SERVICE_NAME,
536
- application_name)
537
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
538
- input_tokens)
539
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
540
- cost)
541
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
542
- version)
543
-
544
- if capture_message_content:
545
- span.add_event(
546
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
547
- attributes={
548
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
549
- },
550
- )
551
-
552
- span.set_status(Status(StatusCode.OK))
553
-
554
- if disable_metrics is False:
555
- attributes = create_metrics_attributes(
556
- service_name=application_name,
557
- deployment_environment=environment,
558
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
559
- system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
560
- request_model=request_model,
561
- server_address=server_address,
562
- server_port=server_port,
563
- response_model=request_model,
564
- )
565
- metrics["genai_client_usage_tokens"].record(
566
- input_tokens, attributes
567
- )
568
- metrics["genai_client_operation_duration"].record(
569
- end_time - start_time, attributes
570
- )
571
- metrics["genai_requests"].add(1, attributes)
572
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
573
- metrics["genai_cost"].record(cost, attributes)
574
-
575
- # Return original response
576
- return response
577
-
578
- except Exception as e:
579
- handle_exception(span, e)
580
- logger.error("Error in trace creation: %s", e)
581
-
582
- # Return original response
583
- return response
124
+ response = process_chat_response(
125
+ response=response,
126
+ request_model=request_model,
127
+ pricing_info=pricing_info,
128
+ server_port=server_port,
129
+ server_address=server_address,
130
+ environment=environment,
131
+ application_name=application_name,
132
+ metrics=metrics,
133
+ event_provider=event_provider,
134
+ start_time=start_time,
135
+ span=span,
136
+ capture_message_content=capture_message_content,
137
+ disable_metrics=disable_metrics,
138
+ version=version,
139
+ **kwargs
140
+ )
141
+
142
+ return response
584
143
 
585
144
  return wrapper