openlit 1.21.0__py3-none-any.whl → 1.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openlit/__init__.py CHANGED
@@ -34,6 +34,7 @@ from openlit.instrumentation.gpt4all import GPT4AllInstrumentor
34
34
  from openlit.instrumentation.elevenlabs import ElevenLabsInstrumentor
35
35
  from openlit.instrumentation.vllm import VLLMInstrumentor
36
36
  from openlit.instrumentation.google_ai_studio import GoogleAIStudioInstrumentor
37
+ from openlit.instrumentation.azure_ai_inference import AzureAIInferenceInstrumentor
37
38
  from openlit.instrumentation.langchain import LangChainInstrumentor
38
39
  from openlit.instrumentation.llamaindex import LlamaIndexInstrumentor
39
40
  from openlit.instrumentation.haystack import HaystackInstrumentor
@@ -139,12 +140,17 @@ def instrument_if_available(
139
140
  ):
140
141
  """Instruments the specified instrumentor if its library is available."""
141
142
  if instrumentor_name in disabled_instrumentors:
143
+ logger.info("Instrumentor %s is disabled", instrumentor_name)
142
144
  return
143
145
 
144
146
  module_name = module_name_map.get(instrumentor_name)
145
147
 
146
- if not module_name or find_spec(module_name) is not None:
147
- try:
148
+ if not module_name:
149
+ logger.error("No module mapping for %s", instrumentor_name)
150
+ return
151
+
152
+ try:
153
+ if find_spec(module_name) is not None:
148
154
  instrumentor_instance.instrument(
149
155
  environment=config.environment,
150
156
  application_name=config.application_name,
@@ -154,10 +160,12 @@ def instrument_if_available(
154
160
  metrics_dict=config.metrics_dict,
155
161
  disable_metrics=config.disable_metrics,
156
162
  )
157
-
158
- # pylint: disable=broad-exception-caught
159
- except Exception as e:
160
- logger.error("Failed to instrument %s: %s", instrumentor_name, e)
163
+ logger.info("Instrumented %s", instrumentor_name)
164
+ else:
165
+ # pylint: disable=line-too-long
166
+ logger.info("Library for %s (%s) not found. Skipping instrumentation", instrumentor_name, module_name)
167
+ except Exception as e:
168
+ logger.error("Failed to instrument %s: %s", instrumentor_name, e)
161
169
 
162
170
  def init(environment="default", application_name="default", tracer=None, otlp_endpoint=None,
163
171
  otlp_headers=None, disable_batch=False, trace_content=True, disabled_instrumentors=None,
@@ -183,7 +191,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
183
191
  collect_gpu_stats (bool): Flag to enable or disable GPU metrics collection.
184
192
  """
185
193
  disabled_instrumentors = disabled_instrumentors if disabled_instrumentors else []
186
- # Check for invalid instrumentor names
194
+ logger.info("Starting openLIT initialization...")
187
195
 
188
196
  module_name_map = {
189
197
  "openai": "openai",
@@ -198,6 +206,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
198
206
  "elevenlabs": "elevenlabs",
199
207
  "vllm": "vllm",
200
208
  "google-ai-studio": "google.generativeai",
209
+ "azure-ai-inference": "azure.ai.inference",
201
210
  "langchain": "langchain",
202
211
  "llama_index": "llama_index",
203
212
  "haystack": "haystack",
@@ -213,9 +222,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
213
222
  name for name in disabled_instrumentors if name not in module_name_map
214
223
  ]
215
224
  for invalid_name in invalid_instrumentors:
216
- logger.warning(
217
- "Invalid instrumentor name detected and ignored: '%s'", invalid_name
218
- )
225
+ logger.warning("Invalid instrumentor name detected and ignored: '%s'", invalid_name)
219
226
 
220
227
  try:
221
228
  # Retrieve or create the single configuration instance.
@@ -235,7 +242,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
235
242
  logger.error("openLIT tracing setup failed. Tracing will not be available.")
236
243
  return
237
244
 
238
- # Setup meter and receive metrics_dict instead of meter
245
+ # Setup meter and receive metrics_dict instead of meter.
239
246
  metrics_dict = setup_meter(
240
247
  application_name=application_name,
241
248
  environment=environment,
@@ -276,6 +283,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
276
283
  "elevenlabs": ElevenLabsInstrumentor(),
277
284
  "vllm": VLLMInstrumentor(),
278
285
  "google-ai-studio": GoogleAIStudioInstrumentor(),
286
+ "azure-ai-inference": AzureAIInferenceInstrumentor(),
279
287
  "langchain": LangChainInstrumentor(),
280
288
  "llama_index": LlamaIndexInstrumentor(),
281
289
  "haystack": HaystackInstrumentor(),
@@ -290,14 +298,13 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
290
298
  # Initialize and instrument only the enabled instrumentors
291
299
  for name, instrumentor in instrumentor_instances.items():
292
300
  instrument_if_available(name, instrumentor, config,
293
- disabled_instrumentors, module_name_map)
301
+ disabled_instrumentors, module_name_map)
294
302
 
295
- if (disable_metrics is False) and (collect_gpu_stats is True):
303
+ if not disable_metrics and collect_gpu_stats:
296
304
  NvidiaGPUInstrumentor().instrument(
297
305
  environment=config.environment,
298
306
  application_name=config.application_name,
299
307
  )
300
-
301
308
  except Exception as e:
302
309
  logger.error("Error during openLIT initialization: %s", e)
303
310
 
@@ -0,0 +1,71 @@
1
+ # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
+ """Initializer of Auto Instrumentation of Azure AI Inference Functions"""
3
+
4
+ from typing import Collection
5
+ import importlib.metadata
6
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
+ from wrapt import wrap_function_wrapper
8
+
9
+ from openlit.instrumentation.azure_ai_inference.azure_ai_inference import (
10
+ complete, embedding
11
+ )
12
+
13
+ from openlit.instrumentation.azure_ai_inference.async_azure_ai_inference import (
14
+ async_complete, async_embedding
15
+ )
16
+
17
+ _instruments = ("azure-ai-inference >= 1.0.0b4",)
18
+
19
+ class AzureAIInferenceInstrumentor(BaseInstrumentor):
20
+ """
21
+ An instrumentor for azure-ai-inference's client library.
22
+ """
23
+
24
+ def instrumentation_dependencies(self) -> Collection[str]:
25
+ return _instruments
26
+
27
+ def _instrument(self, **kwargs):
28
+ application_name = kwargs.get("application_name", "default_application")
29
+ environment = kwargs.get("environment", "default_environment")
30
+ tracer = kwargs.get("tracer")
31
+ metrics = kwargs.get("metrics_dict")
32
+ pricing_info = kwargs.get("pricing_info", {})
33
+ trace_content = kwargs.get("trace_content", False)
34
+ disable_metrics = kwargs.get("disable_metrics")
35
+ version = importlib.metadata.version("azure-ai-inference")
36
+
37
+ # sync generate
38
+ wrap_function_wrapper(
39
+ "azure.ai.inference",
40
+ "ChatCompletionsClient.complete",
41
+ complete("azure_ai.complete", version, environment, application_name,
42
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
43
+ )
44
+
45
+ # sync embedding
46
+ wrap_function_wrapper(
47
+ "azure.ai.inference",
48
+ "EmbeddingsClient.embed",
49
+ embedding("azure_ai.embed", version, environment, application_name,
50
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
51
+ )
52
+
53
+ # async generate
54
+ wrap_function_wrapper(
55
+ "azure.ai.inference.aio",
56
+ "ChatCompletionsClient.complete",
57
+ async_complete("azure_ai.complete", version, environment, application_name,
58
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
59
+ )
60
+
61
+ # async embedding
62
+ wrap_function_wrapper(
63
+ "azure.ai.inference.aio",
64
+ "EmbeddingsClient.embed",
65
+ async_embedding("azure_ai.embed", version, environment, application_name,
66
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
67
+ )
68
+
69
+ def _uninstrument(self, **kwargs):
70
+ # Proper uninstrumentation logic to revert patched methods
71
+ pass
@@ -0,0 +1,432 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
+ """
3
+ Module for monitoring Azure AI Inference API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import (
10
+ handle_exception,
11
+ get_chat_model_cost,
12
+ get_embed_model_cost,
13
+ general_tokens
14
+ )
15
+ from openlit.semcov import SemanticConvetion
16
+
17
+ # Initialize logger for logging potential issues and operations
18
+ logger = logging.getLogger(__name__)
19
+
20
+ def async_complete(gen_ai_endpoint, version, environment, application_name,
21
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ """
23
+ Generates a telemetry wrapper for chat to collect metrics.
24
+
25
+ Args:
26
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
+ version: Version of the monitoring package.
28
+ environment: Deployment environment (e.g., production, staging).
29
+ application_name: Name of the application using the Azure AI Inference API.
30
+ tracer: OpenTelemetry tracer for creating spans.
31
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
32
+ trace_content: Flag indicating whether to trace the actual content.
33
+
34
+ Returns:
35
+ A function that wraps the chat method to add telemetry.
36
+ """
37
+
38
+ async def wrapper(wrapped, instance, args, kwargs):
39
+ """
40
+ Wraps the 'chat' API call to add telemetry.
41
+
42
+ This collects metrics such as execution time, cost, and token usage, and handles errors
43
+ gracefully, adding details to the trace for observability.
44
+
45
+ Args:
46
+ wrapped: The original 'chat' method to be wrapped.
47
+ instance: The instance of the class where the original method is defined.
48
+ args: Positional arguments for the 'chat' method.
49
+ kwargs: Keyword arguments for the 'chat' method.
50
+
51
+ Returns:
52
+ The response from the original 'chat' method.
53
+ """
54
+ # pylint: disable=no-else-return
55
+ if kwargs.get("stream", False) is True:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ async def stream_generator():
58
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
+ # Placeholder for aggregating streaming response
60
+ llmresponse = ""
61
+
62
+ # Loop through streaming events capturing relevant details
63
+ async for chunk in await wrapped(*args, **kwargs):
64
+ if chunk.choices:
65
+ # Collect message IDs and aggregated response from events
66
+ content = chunk.choices[0].delta.content
67
+ if content:
68
+ llmresponse += content
69
+
70
+ yield chunk
71
+ response_id = chunk.id
72
+
73
+ # Handling exception ensure observability without disrupting operation
74
+ try:
75
+ # Format 'messages' into a single string
76
+ message_prompt = kwargs.get("messages", "")
77
+ formatted_messages = []
78
+ for message in message_prompt:
79
+ role = message["role"]
80
+ content = message["content"]
81
+
82
+ if isinstance(content, list):
83
+ content_str = ", ".join(
84
+ # pylint: disable=line-too-long
85
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
86
+ if "type" in item else f'text: {item["text"]}'
87
+ for item in content
88
+ )
89
+ formatted_messages.append(f"{role}: {content_str}")
90
+ else:
91
+ formatted_messages.append(f"{role}: {content}")
92
+ prompt = "\n".join(formatted_messages)
93
+
94
+ model = kwargs.get("model", "phi3-mini-4k")
95
+
96
+ # Calculate tokens using input prompt and aggregated response
97
+ input_tokens = general_tokens(prompt)
98
+ output_tokens = general_tokens(llmresponse)
99
+
100
+ total_tokens = input_tokens + output_tokens
101
+ # Calculate cost of the operation
102
+ cost = get_chat_model_cost(model,
103
+ pricing_info, input_tokens,
104
+ output_tokens)
105
+
106
+ # Set base span attribues
107
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
108
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
109
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
110
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
111
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
112
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
113
+ gen_ai_endpoint)
114
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
115
+ environment)
116
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
117
+ application_name)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
119
+ model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
121
+ True)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
123
+ kwargs.get("user", ""))
124
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
125
+ kwargs.get("top_p", 1.0))
126
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
127
+ kwargs.get("max_tokens", -1))
128
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
129
+ kwargs.get("temperature", 1.0))
130
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
131
+ kwargs.get("presence_penalty", 0.0))
132
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
133
+ kwargs.get("frequency_penalty", 0.0))
134
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
135
+ kwargs.get("seed", ""))
136
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
+ response_id)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
139
+ input_tokens)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
141
+ output_tokens)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
143
+ total_tokens)
144
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
145
+ cost)
146
+
147
+
148
+ if trace_content:
149
+ span.add_event(
150
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
151
+ attributes={
152
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
153
+ },
154
+ )
155
+ span.add_event(
156
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
157
+ attributes={
158
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
159
+ },
160
+ )
161
+
162
+ span.set_status(Status(StatusCode.OK))
163
+
164
+ if disable_metrics is False:
165
+ attributes = {
166
+ TELEMETRY_SDK_NAME:
167
+ "openlit",
168
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
169
+ application_name,
170
+ SemanticConvetion.GEN_AI_SYSTEM:
171
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
172
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
173
+ environment,
174
+ SemanticConvetion.GEN_AI_TYPE:
175
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
176
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
177
+ model
178
+ }
179
+
180
+ metrics["genai_requests"].add(1, attributes)
181
+ metrics["genai_total_tokens"].add(
182
+ total_tokens, attributes
183
+ )
184
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
185
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
186
+ metrics["genai_cost"].record(cost, attributes)
187
+
188
+ except Exception as e:
189
+ handle_exception(span, e)
190
+ logger.error("Error in trace creation: %s", e)
191
+
192
+ return stream_generator()
193
+ else:
194
+ # pylint: disable=line-too-long
195
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
196
+ response = await wrapped(*args, **kwargs)
197
+
198
+ # print(instance._system_instruction.__dict__["_pb"].parts[0].text)
199
+ try:
200
+ # Format 'messages' into a single string
201
+ message_prompt = kwargs.get("messages", "")
202
+ formatted_messages = []
203
+ for message in message_prompt:
204
+ role = message["role"]
205
+ content = message["content"]
206
+
207
+ if isinstance(content, list):
208
+ content_str = ", ".join(
209
+ # pylint: disable=line-too-long
210
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
211
+ if "type" in item else f'text: {item["text"]}'
212
+ for item in content
213
+ )
214
+ formatted_messages.append(f"{role}: {content_str}")
215
+ else:
216
+ formatted_messages.append(f"{role}: {content}")
217
+ prompt = "\n".join(formatted_messages)
218
+
219
+ model = kwargs.get("model", "phi3-mini-4k")
220
+
221
+ # Set base span attribues
222
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
223
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
224
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
225
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
226
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
227
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
228
+ gen_ai_endpoint)
229
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
230
+ environment)
231
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
232
+ application_name)
233
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
234
+ model)
235
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
236
+ False)
237
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
238
+ kwargs.get("user", ""))
239
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
240
+ kwargs.get("top_p", 1.0))
241
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
242
+ kwargs.get("max_tokens", -1))
243
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
244
+ kwargs.get("temperature", 1.0))
245
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
246
+ kwargs.get("presence_penalty", 0.0))
247
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
248
+ kwargs.get("frequency_penalty", 0.0))
249
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
250
+ kwargs.get("seed", ""))
251
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
252
+ response.id)
253
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
254
+ [response.choices[0]["finish_reason"]])
255
+
256
+ if trace_content:
257
+ span.add_event(
258
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
259
+ attributes={
260
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
261
+ },
262
+ )
263
+ span.add_event(
264
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
265
+ attributes={
266
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
267
+ },
268
+ )
269
+
270
+ input_tokens = response.usage.prompt_tokens
271
+ output_tokens = response.usage.completion_tokens
272
+ total_tokens = response.usage.total_tokens
273
+ # Calculate cost of the operation
274
+ cost = get_chat_model_cost(model,
275
+ pricing_info, input_tokens, output_tokens)
276
+
277
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
278
+ input_tokens)
279
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
280
+ output_tokens)
281
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
282
+ total_tokens)
283
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
284
+ cost)
285
+
286
+ span.set_status(Status(StatusCode.OK))
287
+
288
+ if disable_metrics is False:
289
+ attributes = {
290
+ TELEMETRY_SDK_NAME:
291
+ "openlit",
292
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
293
+ application_name,
294
+ SemanticConvetion.GEN_AI_SYSTEM:
295
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
296
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
297
+ environment,
298
+ SemanticConvetion.GEN_AI_TYPE:
299
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
300
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
301
+ model
302
+ }
303
+
304
+ metrics["genai_requests"].add(1, attributes)
305
+ metrics["genai_total_tokens"].add(total_tokens, attributes)
306
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
307
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
308
+ metrics["genai_cost"].record(cost, attributes)
309
+
310
+ # Return original response
311
+ return response
312
+
313
+ except Exception as e:
314
+ handle_exception(span, e)
315
+ logger.error("Error in trace creation: %s", e)
316
+
317
+ # Return original response
318
+ return response
319
+
320
+ return wrapper
321
+
322
+ def async_embedding(gen_ai_endpoint, version, environment, application_name,
323
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
324
+ """
325
+ Generates a telemetry wrapper for embeddings to collect metrics.
326
+
327
+ Args:
328
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
329
+ version: Version of the monitoring package.
330
+ environment: Deployment environment (e.g., production, staging).
331
+ application_name: Name of the application using the Azure AI Inference API.
332
+ tracer: OpenTelemetry tracer for creating spans.
333
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
334
+ trace_content: Flag indicating whether to trace the actual content.
335
+
336
+ Returns:
337
+ A function that wraps the embeddings method to add telemetry.
338
+ """
339
+
340
+ async def wrapper(wrapped, instance, args, kwargs):
341
+ """
342
+ Wraps the 'embeddings' API call to add telemetry.
343
+
344
+ This collects metrics such as execution time, cost, and token usage, and handles errors
345
+ gracefully, adding details to the trace for observability.
346
+
347
+ Args:
348
+ wrapped: The original 'embeddings' method to be wrapped.
349
+ instance: The instance of the class where the original method is defined.
350
+ args: Positional arguments for the 'embeddings' method.
351
+ kwargs: Keyword arguments for the 'embeddings' method.
352
+
353
+ Returns:
354
+ The response from the original 'embeddings' method.
355
+ """
356
+
357
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
358
+ response = await wrapped(*args, **kwargs)
359
+
360
+ try:
361
+ # Calculate cost of the operation
362
+ cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
363
+ pricing_info, response.usage.prompt_tokens)
364
+
365
+ # Set Span attributes
366
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
367
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
368
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
369
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
370
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
371
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
372
+ gen_ai_endpoint)
373
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
374
+ environment)
375
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
376
+ application_name)
377
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
378
+ kwargs.get("model", "text-embedding-ada-002"))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
380
+ kwargs.get("encoding_format", "float"))
381
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
382
+ kwargs.get("dimensions", ""))
383
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
384
+ kwargs.get("user", ""))
385
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
386
+ response.usage.prompt_tokens)
387
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
388
+ response.usage.total_tokens)
389
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
390
+ cost)
391
+ if trace_content:
392
+ span.add_event(
393
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
394
+ attributes={
395
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
396
+ },
397
+ )
398
+
399
+ span.set_status(Status(StatusCode.OK))
400
+
401
+ if disable_metrics is False:
402
+ attributes = {
403
+ TELEMETRY_SDK_NAME:
404
+ "openlit",
405
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
406
+ application_name,
407
+ SemanticConvetion.GEN_AI_SYSTEM:
408
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
409
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
410
+ environment,
411
+ SemanticConvetion.GEN_AI_TYPE:
412
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
413
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
414
+ kwargs.get("model", "text-embedding-ada-002")
415
+ }
416
+
417
+ metrics["genai_requests"].add(1, attributes)
418
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
419
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
420
+ metrics["genai_cost"].record(cost, attributes)
421
+
422
+ # Return original response
423
+ return response
424
+
425
+ except Exception as e:
426
+ handle_exception(span, e)
427
+ logger.error("Error in trace creation: %s", e)
428
+
429
+ # Return original response
430
+ return response
431
+
432
+ return wrapper
@@ -0,0 +1,432 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
+ """
3
+ Module for monitoring Azure AI Inference API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import (
10
+ handle_exception,
11
+ get_chat_model_cost,
12
+ get_embed_model_cost,
13
+ general_tokens
14
+ )
15
+ from openlit.semcov import SemanticConvetion
16
+
17
+ # Initialize logger for logging potential issues and operations
18
+ logger = logging.getLogger(__name__)
19
+
20
+ def complete(gen_ai_endpoint, version, environment, application_name,
21
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ """
23
+ Generates a telemetry wrapper for chat to collect metrics.
24
+
25
+ Args:
26
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
+ version: Version of the monitoring package.
28
+ environment: Deployment environment (e.g., production, staging).
29
+ application_name: Name of the application using the Azure AI Inference API.
30
+ tracer: OpenTelemetry tracer for creating spans.
31
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
32
+ trace_content: Flag indicating whether to trace the actual content.
33
+
34
+ Returns:
35
+ A function that wraps the chat method to add telemetry.
36
+ """
37
+
38
+ def wrapper(wrapped, instance, args, kwargs):
39
+ """
40
+ Wraps the 'chat' API call to add telemetry.
41
+
42
+ This collects metrics such as execution time, cost, and token usage, and handles errors
43
+ gracefully, adding details to the trace for observability.
44
+
45
+ Args:
46
+ wrapped: The original 'chat' method to be wrapped.
47
+ instance: The instance of the class where the original method is defined.
48
+ args: Positional arguments for the 'chat' method.
49
+ kwargs: Keyword arguments for the 'chat' method.
50
+
51
+ Returns:
52
+ The response from the original 'chat' method.
53
+ """
54
+ # pylint: disable=no-else-return
55
+ if kwargs.get("stream", False) is True:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ def stream_generator():
58
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
+ # Placeholder for aggregating streaming response
60
+ llmresponse = ""
61
+
62
+ # Loop through streaming events capturing relevant details
63
+ for chunk in wrapped(*args, **kwargs):
64
+ if chunk.choices:
65
+ # Collect message IDs and aggregated response from events
66
+ content = chunk.choices[0].delta.content
67
+ if content:
68
+ llmresponse += content
69
+
70
+ yield chunk
71
+ response_id = chunk.id
72
+
73
+ # Handling exception ensure observability without disrupting operation
74
+ try:
75
+ # Format 'messages' into a single string
76
+ message_prompt = kwargs.get("messages", "")
77
+ formatted_messages = []
78
+ for message in message_prompt:
79
+ role = message["role"]
80
+ content = message["content"]
81
+
82
+ if isinstance(content, list):
83
+ content_str = ", ".join(
84
+ # pylint: disable=line-too-long
85
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
86
+ if "type" in item else f'text: {item["text"]}'
87
+ for item in content
88
+ )
89
+ formatted_messages.append(f"{role}: {content_str}")
90
+ else:
91
+ formatted_messages.append(f"{role}: {content}")
92
+ prompt = "\n".join(formatted_messages)
93
+
94
+ model = kwargs.get("model", "phi3-mini-4k")
95
+
96
+ # Calculate tokens using input prompt and aggregated response
97
+ input_tokens = general_tokens(prompt)
98
+ output_tokens = general_tokens(llmresponse)
99
+
100
+ total_tokens = input_tokens + output_tokens
101
+ # Calculate cost of the operation
102
+ cost = get_chat_model_cost(model,
103
+ pricing_info, input_tokens,
104
+ output_tokens)
105
+
106
+ # Set base span attribues
107
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
108
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
109
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
110
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
111
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
112
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
113
+ gen_ai_endpoint)
114
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
115
+ environment)
116
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
117
+ application_name)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
119
+ model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
121
+ True)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
123
+ kwargs.get("user", ""))
124
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
125
+ kwargs.get("top_p", 1.0))
126
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
127
+ kwargs.get("max_tokens", -1))
128
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
129
+ kwargs.get("temperature", 1.0))
130
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
131
+ kwargs.get("presence_penalty", 0.0))
132
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
133
+ kwargs.get("frequency_penalty", 0.0))
134
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
135
+ kwargs.get("seed", ""))
136
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
+ response_id)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
139
+ input_tokens)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
141
+ output_tokens)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
143
+ total_tokens)
144
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
145
+ cost)
146
+
147
+
148
+ if trace_content:
149
+ span.add_event(
150
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
151
+ attributes={
152
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
153
+ },
154
+ )
155
+ span.add_event(
156
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
157
+ attributes={
158
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
159
+ },
160
+ )
161
+
162
+ span.set_status(Status(StatusCode.OK))
163
+
164
+ if disable_metrics is False:
165
+ attributes = {
166
+ TELEMETRY_SDK_NAME:
167
+ "openlit",
168
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
169
+ application_name,
170
+ SemanticConvetion.GEN_AI_SYSTEM:
171
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
172
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
173
+ environment,
174
+ SemanticConvetion.GEN_AI_TYPE:
175
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
176
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
177
+ model
178
+ }
179
+
180
+ metrics["genai_requests"].add(1, attributes)
181
+ metrics["genai_total_tokens"].add(
182
+ total_tokens, attributes
183
+ )
184
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
185
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
186
+ metrics["genai_cost"].record(cost, attributes)
187
+
188
+ except Exception as e:
189
+ handle_exception(span, e)
190
+ logger.error("Error in trace creation: %s", e)
191
+
192
+ return stream_generator()
193
+ else:
194
+ # pylint: disable=line-too-long
195
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
196
+ response = wrapped(*args, **kwargs)
197
+
198
+ # print(instance._system_instruction.__dict__["_pb"].parts[0].text)
199
+ try:
200
+ # Format 'messages' into a single string
201
+ message_prompt = kwargs.get("messages", "")
202
+ formatted_messages = []
203
+ for message in message_prompt:
204
+ role = message["role"]
205
+ content = message["content"]
206
+
207
+ if isinstance(content, list):
208
+ content_str = ", ".join(
209
+ # pylint: disable=line-too-long
210
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
211
+ if "type" in item else f'text: {item["text"]}'
212
+ for item in content
213
+ )
214
+ formatted_messages.append(f"{role}: {content_str}")
215
+ else:
216
+ formatted_messages.append(f"{role}: {content}")
217
+ prompt = "\n".join(formatted_messages)
218
+
219
+ model = kwargs.get("model", "phi3-mini-4k")
220
+
221
+ # Set base span attribues
222
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
223
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
224
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
225
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
226
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
227
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
228
+ gen_ai_endpoint)
229
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
230
+ environment)
231
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
232
+ application_name)
233
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
234
+ model)
235
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
236
+ False)
237
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
238
+ kwargs.get("user", ""))
239
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
240
+ kwargs.get("top_p", 1.0))
241
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
242
+ kwargs.get("max_tokens", -1))
243
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
244
+ kwargs.get("temperature", 1.0))
245
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
246
+ kwargs.get("presence_penalty", 0.0))
247
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
248
+ kwargs.get("frequency_penalty", 0.0))
249
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
250
+ kwargs.get("seed", ""))
251
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
252
+ response.id)
253
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
254
+ [response.choices[0]["finish_reason"]])
255
+
256
+ if trace_content:
257
+ span.add_event(
258
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
259
+ attributes={
260
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
261
+ },
262
+ )
263
+ span.add_event(
264
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
265
+ attributes={
266
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
267
+ },
268
+ )
269
+
270
+ input_tokens = response.usage.prompt_tokens
271
+ output_tokens = response.usage.completion_tokens
272
+ total_tokens = response.usage.total_tokens
273
+ # Calculate cost of the operation
274
+ cost = get_chat_model_cost(model,
275
+ pricing_info, input_tokens, output_tokens)
276
+
277
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
278
+ input_tokens)
279
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
280
+ output_tokens)
281
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
282
+ total_tokens)
283
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
284
+ cost)
285
+
286
+ span.set_status(Status(StatusCode.OK))
287
+
288
+ if disable_metrics is False:
289
+ attributes = {
290
+ TELEMETRY_SDK_NAME:
291
+ "openlit",
292
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
293
+ application_name,
294
+ SemanticConvetion.GEN_AI_SYSTEM:
295
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
296
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
297
+ environment,
298
+ SemanticConvetion.GEN_AI_TYPE:
299
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
300
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
301
+ model
302
+ }
303
+
304
+ metrics["genai_requests"].add(1, attributes)
305
+ metrics["genai_total_tokens"].add(total_tokens, attributes)
306
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
307
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
308
+ metrics["genai_cost"].record(cost, attributes)
309
+
310
+ # Return original response
311
+ return response
312
+
313
+ except Exception as e:
314
+ handle_exception(span, e)
315
+ logger.error("Error in trace creation: %s", e)
316
+
317
+ # Return original response
318
+ return response
319
+
320
+ return wrapper
321
+
322
+ def embedding(gen_ai_endpoint, version, environment, application_name,
323
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
324
+ """
325
+ Generates a telemetry wrapper for embeddings to collect metrics.
326
+
327
+ Args:
328
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
329
+ version: Version of the monitoring package.
330
+ environment: Deployment environment (e.g., production, staging).
331
+ application_name: Name of the application using the Azure AI Inference API.
332
+ tracer: OpenTelemetry tracer for creating spans.
333
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
334
+ trace_content: Flag indicating whether to trace the actual content.
335
+
336
+ Returns:
337
+ A function that wraps the embeddings method to add telemetry.
338
+ """
339
+
340
+ def wrapper(wrapped, instance, args, kwargs):
341
+ """
342
+ Wraps the 'embeddings' API call to add telemetry.
343
+
344
+ This collects metrics such as execution time, cost, and token usage, and handles errors
345
+ gracefully, adding details to the trace for observability.
346
+
347
+ Args:
348
+ wrapped: The original 'embeddings' method to be wrapped.
349
+ instance: The instance of the class where the original method is defined.
350
+ args: Positional arguments for the 'embeddings' method.
351
+ kwargs: Keyword arguments for the 'embeddings' method.
352
+
353
+ Returns:
354
+ The response from the original 'embeddings' method.
355
+ """
356
+
357
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
358
+ response = wrapped(*args, **kwargs)
359
+
360
+ try:
361
+ # Calculate cost of the operation
362
+ cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
363
+ pricing_info, response.usage.prompt_tokens)
364
+
365
+ # Set Span attributes
366
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
367
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
368
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
369
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
370
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
371
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
372
+ gen_ai_endpoint)
373
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
374
+ environment)
375
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
376
+ application_name)
377
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
378
+ kwargs.get("model", "text-embedding-ada-002"))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
380
+ kwargs.get("encoding_format", "float"))
381
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
382
+ kwargs.get("dimensions", ""))
383
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
384
+ kwargs.get("user", ""))
385
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
386
+ response.usage.prompt_tokens)
387
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
388
+ response.usage.total_tokens)
389
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
390
+ cost)
391
+ if trace_content:
392
+ span.add_event(
393
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
394
+ attributes={
395
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
396
+ },
397
+ )
398
+
399
+ span.set_status(Status(StatusCode.OK))
400
+
401
+ if disable_metrics is False:
402
+ attributes = {
403
+ TELEMETRY_SDK_NAME:
404
+ "openlit",
405
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
406
+ application_name,
407
+ SemanticConvetion.GEN_AI_SYSTEM:
408
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
409
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
410
+ environment,
411
+ SemanticConvetion.GEN_AI_TYPE:
412
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
413
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
414
+ kwargs.get("model", "text-embedding-ada-002")
415
+ }
416
+
417
+ metrics["genai_requests"].add(1, attributes)
418
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
419
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
420
+ metrics["genai_cost"].record(cost, attributes)
421
+
422
+ # Return original response
423
+ return response
424
+
425
+ except Exception as e:
426
+ handle_exception(span, e)
427
+ logger.error("Error in trace creation: %s", e)
428
+
429
+ # Return original response
430
+ return response
431
+
432
+ return wrapper
@@ -32,7 +32,7 @@ class GoogleAIStudioInstrumentor(BaseInstrumentor):
32
32
  pricing_info = kwargs.get("pricing_info", {})
33
33
  trace_content = kwargs.get("trace_content", False)
34
34
  disable_metrics = kwargs.get("disable_metrics")
35
- version = importlib.metadata.version("ollama")
35
+ version = importlib.metadata.version("google-generativeai")
36
36
 
37
37
  # sync generate
38
38
  wrap_function_wrapper(
@@ -481,8 +481,8 @@ def chat(gen_ai_endpoint, version, environment, application_name,
481
481
  response = wrapped(*args, **kwargs)
482
482
 
483
483
  try:
484
- input_tokens = response.response_metadata["prompt_eval_count"] or 0
485
- output_tokens = response.response_metadata["eval_count"] or 0
484
+ input_tokens = response.response_metadata.get("prompt_eval_count", 0)
485
+ output_tokens = response.response_metadata.get("eval_count", 0)
486
486
 
487
487
  # Calculate cost of the operation
488
488
  cost = get_chat_model_cost(
@@ -103,6 +103,7 @@ class SemanticConvetion:
103
103
  GEN_AI_SYSTEM_ELEVENLABS = "elevenlabs"
104
104
  GEN_AI_SYSTEM_VLLM = "vLLM"
105
105
  GEN_AI_SYSTEM_GOOGLE_AI_STUDIO = "google-ai-studio"
106
+ GEN_AI_SYSTEM_AZURE_AI_INFERENCE = "azure-ai-inference"
106
107
  GEN_AI_SYSTEM_LANGCHAIN = "langchain"
107
108
  GEN_AI_SYSTEM_LLAMAINDEX = "llama_index"
108
109
  GEN_AI_SYSTEM_HAYSTACK = "haystack"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openlit
3
- Version: 1.21.0
3
+ Version: 1.22.2
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -54,23 +54,26 @@ This project adheres to the [Semantic Conventions](https://github.com/open-telem
54
54
 
55
55
  ## Auto Instrumentation Capabilities
56
56
 
57
- | LLMs | Vector DBs | Frameworks | GPUs |
58
- |-----------------------------------------------------------------|----------------------------------------------|----------------------------------------------|---------------|
59
- | [✅ OpenAI](https://docs.openlit.io/latest/integrations/openai) | [✅ ChromaDB](https://docs.openlit.io/latest/integrations/chromadb) | [✅ Langchain](https://docs.openlit.io/latest/integrations/langchain) | [✅ NVIDIA GPUs](https://docs.openlit.io/latest/integrations/nvidia-gpu) |
60
- | [✅ Ollama](https://docs.openlit.io/latest/integrations/ollama) | [✅ Pinecone](https://docs.openlit.io/latest/integrations/pinecone) | [✅ LiteLLM](https://docs.openlit.io/latest/integrations/litellm) | |
61
- | [✅ Anthropic](https://docs.openlit.io/latest/integrations/anthropic) | [✅ Qdrant](https://docs.openlit.io/latest/integrations/qdrant) | [✅ LlamaIndex](https://docs.openlit.io/latest/integrations/llama-index) | |
62
- | [✅ GPT4All](https://docs.openlit.io/latest/integrations/gpt4all) | [✅ Milvus](https://docs.openlit.io/latest/integrations/milvus) | [✅ Haystack](https://docs.openlit.io/latest/integrations/haystack) | |
63
- | [✅ Cohere](https://docs.openlit.io/latest/integrations/cohere) | | [✅ EmbedChain](https://docs.openlit.io/latest/integrations/embedchain) | |
64
- | [✅ Mistral](https://docs.openlit.io/latest/integrations/mistral) | | [✅ Guardrails](https://docs.openlit.io/latest/integrations/guardrails) | |
65
- | [✅ Azure OpenAI](https://docs.openlit.io/latest/integrations/azure-openai) | | | |
66
- | [✅ HuggingFace Transformers](https://docs.openlit.io/latest/integrations/huggingface) | | | |
67
- | [✅ Amazon Bedrock](https://docs.openlit.io/latest/integrations/bedrock) | | | |
68
- | [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai) | | | |
69
- | [✅ Groq](https://docs.openlit.io/latest/integrations/groq) | | | |
70
- | [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs) | | | |
71
- | [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm) | | | |
72
- | [✅ OLA Krutrim](https://docs.openlit.io/latest/integrations/krutrim) | | | |
73
- | [✅ Google AI Studio](https://docs.openlit.io/latest/integrations/google-ai-studio) | | | |
57
+ | LLMs | Vector DBs | Frameworks | GPUs |
58
+ |--------------------------------------------------------------------------|----------------------------------------------|----------------------------------------------|---------------|
59
+ | [✅ OpenAI](https://docs.openlit.io/latest/integrations/openai) | [✅ ChromaDB](https://docs.openlit.io/latest/integrations/chromadb) | [✅ Langchain](https://docs.openlit.io/latest/integrations/langchain) | [✅ NVIDIA GPUs](https://docs.openlit.io/latest/integrations/nvidia-gpu) |
60
+ | [✅ Ollama](https://docs.openlit.io/latest/integrations/ollama) | [✅ Pinecone](https://docs.openlit.io/latest/integrations/pinecone) | [✅ LiteLLM](https://docs.openlit.io/latest/integrations/litellm) | |
61
+ | [✅ Anthropic](https://docs.openlit.io/latest/integrations/anthropic) | [✅ Qdrant](https://docs.openlit.io/latest/integrations/qdrant) | [✅ LlamaIndex](https://docs.openlit.io/latest/integrations/llama-index) | |
62
+ | [✅ GPT4All](https://docs.openlit.io/latest/integrations/gpt4all) | [✅ Milvus](https://docs.openlit.io/latest/integrations/milvus) | [✅ Haystack](https://docs.openlit.io/latest/integrations/haystack) | |
63
+ | [✅ Cohere](https://docs.openlit.io/latest/integrations/cohere) | | [✅ EmbedChain](https://docs.openlit.io/latest/integrations/embedchain) | |
64
+ | [✅ Mistral](https://docs.openlit.io/latest/integrations/mistral) | | [✅ Guardrails](https://docs.openlit.io/latest/integrations/guardrails) | |
65
+ | [✅ Azure OpenAI](https://docs.openlit.io/latest/integrations/azure-openai) | | | |
66
+ | [✅ Azure AI Inference](https://docs.openlit.io/latest/integrations/azure-ai-inference) | | | |
67
+ | [✅ GitHub AI Models](https://docs.openlit.io/latest/integrations/github-models) | | | |
68
+ | [✅ HuggingFace Transformers](https://docs.openlit.io/latest/integrations/huggingface) | | | |
69
+ | [✅ Amazon Bedrock](https://docs.openlit.io/latest/integrations/bedrock) | | | |
70
+ | [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai) | | | |
71
+ | [✅ Groq](https://docs.openlit.io/latest/integrations/groq) | | | |
72
+ | [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs) | | | |
73
+ | [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm) | | | |
74
+ | [✅ OLA Krutrim](https://docs.openlit.io/latest/integrations/krutrim) | | | |
75
+ | [✅ Google AI Studio](https://docs.openlit.io/latest/integrations/google-ai-studio) | | | |
76
+ | [✅ NVIDIA NIM](https://docs.openlit.io/latest/integrations/nvidia-nim) | | | |
74
77
 
75
78
  ## Supported Destinations
76
79
  - [✅ OpenTelemetry Collector](https://docs.openlit.io/latest/connections/otelcol)
@@ -1,8 +1,11 @@
1
1
  openlit/__helpers.py,sha256=lrn4PBs9owDudiCY2NBoVbAi7AU_HtUpyOj0oqPBsPY,5545
2
- openlit/__init__.py,sha256=fqeMFg76VYEX4VdxTn76Fh-0kbrN5_Sk8ahlm7Xlp9k,15296
2
+ openlit/__init__.py,sha256=NvlRqmf_mHcUNTHDI-Tv211x4BlDGLnU2vv8KTaGOxc,15782
3
3
  openlit/instrumentation/anthropic/__init__.py,sha256=oaU53BOPyfUKbEzYvLr1DPymDluurSnwo4Hernf2XdU,1955
4
4
  openlit/instrumentation/anthropic/anthropic.py,sha256=y7CEGhKOGHWt8G_5Phr4qPJTfPGRJIAr9Yk6nM3CcvM,16775
5
5
  openlit/instrumentation/anthropic/async_anthropic.py,sha256=Zz1KRKIG9wGn0quOoLvjORC-49IvHQpJ6GBdB-4PfCQ,16816
6
+ openlit/instrumentation/azure_ai_inference/__init__.py,sha256=Xl_4hjQeXcA-NgkqwTbs1ejPKRRnQXsDErXfFIz0z7U,2699
7
+ openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py,sha256=uZLOW1iD5pKZeQ4Mg-Mvmt9aWc2WrLiWlZn2dKbX0X4,22832
8
+ openlit/instrumentation/azure_ai_inference/azure_ai_inference.py,sha256=Hz-WCzlzM27hT4Rw4uNiMDBit2YB9vAyq6m0gkNwo4A,22778
6
9
  openlit/instrumentation/bedrock/__init__.py,sha256=DLLYio4S4gUzRElqNRT8WMKzM79HZwOBVjXfJI4BfaA,1545
7
10
  openlit/instrumentation/bedrock/bedrock.py,sha256=HqRZeiAFeNdlhlnt4DSLda8qkMP3nPKq_zhdxDssXmY,9498
8
11
  openlit/instrumentation/chroma/__init__.py,sha256=61lFpHlUEQUobsUJZHXdvOViKwsOH8AOvSfc4VgCmiM,3253
@@ -14,7 +17,7 @@ openlit/instrumentation/elevenlabs/async_elevenlabs.py,sha256=yMYACh95SFr5EYklKn
14
17
  openlit/instrumentation/elevenlabs/elevenlabs.py,sha256=mFnD7sgT47OxaXJz0Vc1nrNjXEpcGQDj5run3gA48Lw,6089
15
18
  openlit/instrumentation/embedchain/__init__.py,sha256=8TYk1OEbz46yF19dr-gB_x80VZMagU3kJ8-QihPXTeA,1929
16
19
  openlit/instrumentation/embedchain/embedchain.py,sha256=SLlr7qieT3kp4M6OYSRy8FaVCXQ2t3oPyIiE99ioNE4,7892
17
- openlit/instrumentation/google_ai_studio/__init__.py,sha256=vG4WzaavOiiwI3r5stDMotM5TSBxdcxQhC3W4XjJIG8,2146
20
+ openlit/instrumentation/google_ai_studio/__init__.py,sha256=rhHbEJbDQ-nH8y3AXzzyqNxcunR0ZEqR2RIstM55-Ms,2159
18
21
  openlit/instrumentation/google_ai_studio/async_google_ai_studio.py,sha256=20MHsp-tAONxOtmCFg5WDvktTdRce5CyH3_9w0b_AqI,13587
19
22
  openlit/instrumentation/google_ai_studio/google_ai_studio.py,sha256=vIJjzl5Fkgsf3vfaqmxhtSFvOpXK-wGC-JFhEXGP50M,13636
20
23
  openlit/instrumentation/gpt4all/__init__.py,sha256=-59CP2B3-HGZJ_vC-fI9Dt-0BuQXRhSCWCjnaGeU15Q,1802
@@ -26,7 +29,7 @@ openlit/instrumentation/groq/groq.py,sha256=m4gFPbYzjUUIgjXZ0Alu2Zy1HcO5takCFA2X
26
29
  openlit/instrumentation/haystack/__init__.py,sha256=QK6XxxZUHX8vMv2Crk7rNBOc64iOOBLhJGL_lPlAZ8s,1758
27
30
  openlit/instrumentation/haystack/haystack.py,sha256=oQIZiDhdp3gnJnhYQ1OouJMc9YT0pQ-_31cmNuopa68,3891
28
31
  openlit/instrumentation/langchain/__init__.py,sha256=0AI2Dnqw81IcJw3jM--gGkv_HRh2GtosOGJjvOpw7Zk,3431
29
- openlit/instrumentation/langchain/langchain.py,sha256=7K-m35sS3yTu9IklRo6n9LCcymeg6OyIYKrMzMG_uDQ,35730
32
+ openlit/instrumentation/langchain/langchain.py,sha256=s8vZZxhHfDANeisqIyNvsd5BPcHbc-LNFKDXS5hiEfA,35734
30
33
  openlit/instrumentation/llamaindex/__init__.py,sha256=vPtK65G6b-TwJERowVRUVl7f_nBSlFdwPBtpg8dOGos,1977
31
34
  openlit/instrumentation/llamaindex/llamaindex.py,sha256=uiIigbwhonSbJWA7LpgOVI1R4kxxPODS1K5wyHIQ4hM,4048
32
35
  openlit/instrumentation/milvus/__init__.py,sha256=qi1yfmMrvkDtnrN_6toW8qC9BRL78bq7ayWpObJ8Bq4,2961
@@ -55,8 +58,8 @@ openlit/instrumentation/vllm/__init__.py,sha256=OVWalQ1dXvip1DUsjUGaHX4J-2FrSp-T
55
58
  openlit/instrumentation/vllm/vllm.py,sha256=lDzM7F5pgxvh8nKL0dcKB4TD0Mc9wXOWeXOsOGN7Wd8,6527
56
59
  openlit/otel/metrics.py,sha256=O7NoaDz0bY19mqpE4-0PcKwEe-B-iJFRgOCaanAuZAc,4291
57
60
  openlit/otel/tracing.py,sha256=vL1ifMbARPBpqK--yXYsCM6y5dSu5LFIKqkhZXtYmUc,3712
58
- openlit/semcov/__init__.py,sha256=56daxsJtWFqp87TTT3R5OxSeMH7eOZqb4k-7AELirTI,7554
59
- openlit-1.21.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- openlit-1.21.0.dist-info/METADATA,sha256=1zrrmnjPv-UMLhm5DARcS9FylVEbOSUjvJpj1flSDAc,14934
61
- openlit-1.21.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
62
- openlit-1.21.0.dist-info/RECORD,,
61
+ openlit/semcov/__init__.py,sha256=wpAarrnkndbgvP8VSudi8IRInYtD02hkewqjyiC0dMk,7614
62
+ openlit-1.22.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ openlit-1.22.2.dist-info/METADATA,sha256=yD1-PRHWYtdY6KsSX_ulQvL83tqbpBhZVkgQqm8J2vk,15710
64
+ openlit-1.22.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
65
+ openlit-1.22.2.dist-info/RECORD,,