openlit 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openlit/__init__.py CHANGED
@@ -34,6 +34,7 @@ from openlit.instrumentation.gpt4all import GPT4AllInstrumentor
34
34
  from openlit.instrumentation.elevenlabs import ElevenLabsInstrumentor
35
35
  from openlit.instrumentation.vllm import VLLMInstrumentor
36
36
  from openlit.instrumentation.google_ai_studio import GoogleAIStudioInstrumentor
37
+ from openlit.instrumentation.azure_ai_inference import AzureAIInferenceInstrumentor
37
38
  from openlit.instrumentation.langchain import LangChainInstrumentor
38
39
  from openlit.instrumentation.llamaindex import LlamaIndexInstrumentor
39
40
  from openlit.instrumentation.haystack import HaystackInstrumentor
@@ -198,6 +199,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
198
199
  "elevenlabs": "elevenlabs",
199
200
  "vllm": "vllm",
200
201
  "google-ai-studio": "google.generativeai",
202
+ "azure-ai-inference": "azure.ai.inference",
201
203
  "langchain": "langchain",
202
204
  "llama_index": "llama_index",
203
205
  "haystack": "haystack",
@@ -276,6 +278,7 @@ def init(environment="default", application_name="default", tracer=None, otlp_en
276
278
  "elevenlabs": ElevenLabsInstrumentor(),
277
279
  "vllm": VLLMInstrumentor(),
278
280
  "google-ai-studio": GoogleAIStudioInstrumentor(),
281
+ "azure-ai-inference": AzureAIInferenceInstrumentor(),
279
282
  "langchain": LangChainInstrumentor(),
280
283
  "llama_index": LlamaIndexInstrumentor(),
281
284
  "haystack": HaystackInstrumentor(),
@@ -0,0 +1,71 @@
1
+ # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
+ """Initializer of Auto Instrumentation of Azure AI Inference Functions"""
3
+
4
+ from typing import Collection
5
+ import importlib.metadata
6
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
+ from wrapt import wrap_function_wrapper
8
+
9
+ from openlit.instrumentation.azure_ai_inference.azure_ai_inference import (
10
+ complete, embedding
11
+ )
12
+
13
+ from openlit.instrumentation.azure_ai_inference.async_azure_ai_inference import (
14
+ async_complete, async_embedding
15
+ )
16
+
17
+ _instruments = ("azure-ai-inference >= 1.0.0b4",)
18
+
19
+ class AzureAIInferenceInstrumentor(BaseInstrumentor):
20
+ """
21
+ An instrumentor for azure-ai-inference's client library.
22
+ """
23
+
24
+ def instrumentation_dependencies(self) -> Collection[str]:
25
+ return _instruments
26
+
27
+ def _instrument(self, **kwargs):
28
+ application_name = kwargs.get("application_name", "default_application")
29
+ environment = kwargs.get("environment", "default_environment")
30
+ tracer = kwargs.get("tracer")
31
+ metrics = kwargs.get("metrics_dict")
32
+ pricing_info = kwargs.get("pricing_info", {})
33
+ trace_content = kwargs.get("trace_content", False)
34
+ disable_metrics = kwargs.get("disable_metrics")
35
+ version = importlib.metadata.version("azure-ai-inference")
36
+
37
+ # sync generate
38
+ wrap_function_wrapper(
39
+ "azure.ai.inference",
40
+ "ChatCompletionsClient.complete",
41
+ complete("azure_ai.complete", version, environment, application_name,
42
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
43
+ )
44
+
45
+ # sync embedding
46
+ wrap_function_wrapper(
47
+ "azure.ai.inference",
48
+ "EmbeddingsClient.embed",
49
+ embedding("azure_ai.embed", version, environment, application_name,
50
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
51
+ )
52
+
53
+ # async generate
54
+ wrap_function_wrapper(
55
+ "azure.ai.inference.aio",
56
+ "ChatCompletionsClient.complete",
57
+ async_complete("azure_ai.complete", version, environment, application_name,
58
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
59
+ )
60
+
61
+ # async embedding
62
+ wrap_function_wrapper(
63
+ "azure.ai.inference.aio",
64
+ "EmbeddingsClient.embed",
65
+ async_embedding("azure_ai.embed", version, environment, application_name,
66
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
67
+ )
68
+
69
+ def _uninstrument(self, **kwargs):
70
+ # Proper uninstrumentation logic to revert patched methods
71
+ pass
@@ -0,0 +1,432 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
+ """
3
+ Module for monitoring Azure AI Inference API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import (
10
+ handle_exception,
11
+ get_chat_model_cost,
12
+ get_embed_model_cost,
13
+ general_tokens
14
+ )
15
+ from openlit.semcov import SemanticConvetion
16
+
17
+ # Initialize logger for logging potential issues and operations
18
+ logger = logging.getLogger(__name__)
19
+
20
+ def async_complete(gen_ai_endpoint, version, environment, application_name,
21
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ """
23
+ Generates a telemetry wrapper for chat to collect metrics.
24
+
25
+ Args:
26
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
+ version: Version of the monitoring package.
28
+ environment: Deployment environment (e.g., production, staging).
29
+ application_name: Name of the application using the Azure AI Inference API.
30
+ tracer: OpenTelemetry tracer for creating spans.
31
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
32
+ trace_content: Flag indicating whether to trace the actual content.
33
+
34
+ Returns:
35
+ A function that wraps the chat method to add telemetry.
36
+ """
37
+
38
+ async def wrapper(wrapped, instance, args, kwargs):
39
+ """
40
+ Wraps the 'chat' API call to add telemetry.
41
+
42
+ This collects metrics such as execution time, cost, and token usage, and handles errors
43
+ gracefully, adding details to the trace for observability.
44
+
45
+ Args:
46
+ wrapped: The original 'chat' method to be wrapped.
47
+ instance: The instance of the class where the original method is defined.
48
+ args: Positional arguments for the 'chat' method.
49
+ kwargs: Keyword arguments for the 'chat' method.
50
+
51
+ Returns:
52
+ The response from the original 'chat' method.
53
+ """
54
+ # pylint: disable=no-else-return
55
+ if kwargs.get("stream", False) is True:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ async def stream_generator():
58
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
+ # Placeholder for aggregating streaming response
60
+ llmresponse = ""
61
+
62
+ # Loop through streaming events capturing relevant details
63
+ async for chunk in await wrapped(*args, **kwargs):
64
+ if chunk.choices:
65
+ # Collect message IDs and aggregated response from events
66
+ content = chunk.choices[0].delta.content
67
+ if content:
68
+ llmresponse += content
69
+
70
+ yield chunk
71
+ response_id = chunk.id
72
+
73
+ # Handling exception ensure observability without disrupting operation
74
+ try:
75
+ # Format 'messages' into a single string
76
+ message_prompt = kwargs.get("messages", "")
77
+ formatted_messages = []
78
+ for message in message_prompt:
79
+ role = message["role"]
80
+ content = message["content"]
81
+
82
+ if isinstance(content, list):
83
+ content_str = ", ".join(
84
+ # pylint: disable=line-too-long
85
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
86
+ if "type" in item else f'text: {item["text"]}'
87
+ for item in content
88
+ )
89
+ formatted_messages.append(f"{role}: {content_str}")
90
+ else:
91
+ formatted_messages.append(f"{role}: {content}")
92
+ prompt = "\n".join(formatted_messages)
93
+
94
+ model = kwargs.get("model", "phi3-mini-4k")
95
+
96
+ # Calculate tokens using input prompt and aggregated response
97
+ input_tokens = general_tokens(prompt)
98
+ output_tokens = general_tokens(llmresponse)
99
+
100
+ total_tokens = input_tokens + output_tokens
101
+ # Calculate cost of the operation
102
+ cost = get_chat_model_cost(model,
103
+ pricing_info, input_tokens,
104
+ output_tokens)
105
+
106
+ # Set base span attribues
107
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
108
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
109
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
110
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
111
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
112
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
113
+ gen_ai_endpoint)
114
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
115
+ environment)
116
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
117
+ application_name)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
119
+ model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
121
+ True)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
123
+ kwargs.get("user", ""))
124
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
125
+ kwargs.get("top_p", 1.0))
126
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
127
+ kwargs.get("max_tokens", -1))
128
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
129
+ kwargs.get("temperature", 1.0))
130
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
131
+ kwargs.get("presence_penalty", 0.0))
132
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
133
+ kwargs.get("frequency_penalty", 0.0))
134
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
135
+ kwargs.get("seed", ""))
136
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
+ response_id)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
139
+ input_tokens)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
141
+ output_tokens)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
143
+ total_tokens)
144
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
145
+ cost)
146
+
147
+
148
+ if trace_content:
149
+ span.add_event(
150
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
151
+ attributes={
152
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
153
+ },
154
+ )
155
+ span.add_event(
156
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
157
+ attributes={
158
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
159
+ },
160
+ )
161
+
162
+ span.set_status(Status(StatusCode.OK))
163
+
164
+ if disable_metrics is False:
165
+ attributes = {
166
+ TELEMETRY_SDK_NAME:
167
+ "openlit",
168
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
169
+ application_name,
170
+ SemanticConvetion.GEN_AI_SYSTEM:
171
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
172
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
173
+ environment,
174
+ SemanticConvetion.GEN_AI_TYPE:
175
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
176
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
177
+ model
178
+ }
179
+
180
+ metrics["genai_requests"].add(1, attributes)
181
+ metrics["genai_total_tokens"].add(
182
+ total_tokens, attributes
183
+ )
184
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
185
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
186
+ metrics["genai_cost"].record(cost, attributes)
187
+
188
+ except Exception as e:
189
+ handle_exception(span, e)
190
+ logger.error("Error in trace creation: %s", e)
191
+
192
+ return stream_generator()
193
+ else:
194
+ # pylint: disable=line-too-long
195
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
196
+ response = await wrapped(*args, **kwargs)
197
+
198
+ # print(instance._system_instruction.__dict__["_pb"].parts[0].text)
199
+ try:
200
+ # Format 'messages' into a single string
201
+ message_prompt = kwargs.get("messages", "")
202
+ formatted_messages = []
203
+ for message in message_prompt:
204
+ role = message["role"]
205
+ content = message["content"]
206
+
207
+ if isinstance(content, list):
208
+ content_str = ", ".join(
209
+ # pylint: disable=line-too-long
210
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
211
+ if "type" in item else f'text: {item["text"]}'
212
+ for item in content
213
+ )
214
+ formatted_messages.append(f"{role}: {content_str}")
215
+ else:
216
+ formatted_messages.append(f"{role}: {content}")
217
+ prompt = "\n".join(formatted_messages)
218
+
219
+ model = kwargs.get("model", "phi3-mini-4k")
220
+
221
+ # Set base span attribues
222
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
223
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
224
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
225
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
226
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
227
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
228
+ gen_ai_endpoint)
229
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
230
+ environment)
231
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
232
+ application_name)
233
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
234
+ model)
235
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
236
+ False)
237
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
238
+ kwargs.get("user", ""))
239
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
240
+ kwargs.get("top_p", 1.0))
241
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
242
+ kwargs.get("max_tokens", -1))
243
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
244
+ kwargs.get("temperature", 1.0))
245
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
246
+ kwargs.get("presence_penalty", 0.0))
247
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
248
+ kwargs.get("frequency_penalty", 0.0))
249
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
250
+ kwargs.get("seed", ""))
251
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
252
+ response.id)
253
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
254
+ [response.choices[0]["finish_reason"]])
255
+
256
+ if trace_content:
257
+ span.add_event(
258
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
259
+ attributes={
260
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
261
+ },
262
+ )
263
+ span.add_event(
264
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
265
+ attributes={
266
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
267
+ },
268
+ )
269
+
270
+ input_tokens = response.usage.prompt_tokens
271
+ output_tokens = response.usage.completion_tokens
272
+ total_tokens = response.usage.total_tokens
273
+ # Calculate cost of the operation
274
+ cost = get_chat_model_cost(model,
275
+ pricing_info, input_tokens, output_tokens)
276
+
277
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
278
+ input_tokens)
279
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
280
+ output_tokens)
281
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
282
+ total_tokens)
283
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
284
+ cost)
285
+
286
+ span.set_status(Status(StatusCode.OK))
287
+
288
+ if disable_metrics is False:
289
+ attributes = {
290
+ TELEMETRY_SDK_NAME:
291
+ "openlit",
292
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
293
+ application_name,
294
+ SemanticConvetion.GEN_AI_SYSTEM:
295
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
296
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
297
+ environment,
298
+ SemanticConvetion.GEN_AI_TYPE:
299
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
300
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
301
+ model
302
+ }
303
+
304
+ metrics["genai_requests"].add(1, attributes)
305
+ metrics["genai_total_tokens"].add(total_tokens, attributes)
306
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
307
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
308
+ metrics["genai_cost"].record(cost, attributes)
309
+
310
+ # Return original response
311
+ return response
312
+
313
+ except Exception as e:
314
+ handle_exception(span, e)
315
+ logger.error("Error in trace creation: %s", e)
316
+
317
+ # Return original response
318
+ return response
319
+
320
+ return wrapper
321
+
322
+ def async_embedding(gen_ai_endpoint, version, environment, application_name,
323
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
324
+ """
325
+ Generates a telemetry wrapper for embeddings to collect metrics.
326
+
327
+ Args:
328
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
329
+ version: Version of the monitoring package.
330
+ environment: Deployment environment (e.g., production, staging).
331
+ application_name: Name of the application using the Azure AI Inference API.
332
+ tracer: OpenTelemetry tracer for creating spans.
333
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
334
+ trace_content: Flag indicating whether to trace the actual content.
335
+
336
+ Returns:
337
+ A function that wraps the embeddings method to add telemetry.
338
+ """
339
+
340
+ async def wrapper(wrapped, instance, args, kwargs):
341
+ """
342
+ Wraps the 'embeddings' API call to add telemetry.
343
+
344
+ This collects metrics such as execution time, cost, and token usage, and handles errors
345
+ gracefully, adding details to the trace for observability.
346
+
347
+ Args:
348
+ wrapped: The original 'embeddings' method to be wrapped.
349
+ instance: The instance of the class where the original method is defined.
350
+ args: Positional arguments for the 'embeddings' method.
351
+ kwargs: Keyword arguments for the 'embeddings' method.
352
+
353
+ Returns:
354
+ The response from the original 'embeddings' method.
355
+ """
356
+
357
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
358
+ response = await wrapped(*args, **kwargs)
359
+
360
+ try:
361
+ # Calculate cost of the operation
362
+ cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
363
+ pricing_info, response.usage.prompt_tokens)
364
+
365
+ # Set Span attributes
366
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
367
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
368
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
369
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
370
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
371
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
372
+ gen_ai_endpoint)
373
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
374
+ environment)
375
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
376
+ application_name)
377
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
378
+ kwargs.get("model", "text-embedding-ada-002"))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
380
+ kwargs.get("encoding_format", "float"))
381
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
382
+ kwargs.get("dimensions", ""))
383
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
384
+ kwargs.get("user", ""))
385
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
386
+ response.usage.prompt_tokens)
387
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
388
+ response.usage.total_tokens)
389
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
390
+ cost)
391
+ if trace_content:
392
+ span.add_event(
393
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
394
+ attributes={
395
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
396
+ },
397
+ )
398
+
399
+ span.set_status(Status(StatusCode.OK))
400
+
401
+ if disable_metrics is False:
402
+ attributes = {
403
+ TELEMETRY_SDK_NAME:
404
+ "openlit",
405
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
406
+ application_name,
407
+ SemanticConvetion.GEN_AI_SYSTEM:
408
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
409
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
410
+ environment,
411
+ SemanticConvetion.GEN_AI_TYPE:
412
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
413
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
414
+ kwargs.get("model", "text-embedding-ada-002")
415
+ }
416
+
417
+ metrics["genai_requests"].add(1, attributes)
418
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
419
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
420
+ metrics["genai_cost"].record(cost, attributes)
421
+
422
+ # Return original response
423
+ return response
424
+
425
+ except Exception as e:
426
+ handle_exception(span, e)
427
+ logger.error("Error in trace creation: %s", e)
428
+
429
+ # Return original response
430
+ return response
431
+
432
+ return wrapper
@@ -0,0 +1,432 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
+ """
3
+ Module for monitoring Azure AI Inference API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import (
10
+ handle_exception,
11
+ get_chat_model_cost,
12
+ get_embed_model_cost,
13
+ general_tokens
14
+ )
15
+ from openlit.semcov import SemanticConvetion
16
+
17
+ # Initialize logger for logging potential issues and operations
18
+ logger = logging.getLogger(__name__)
19
+
20
+ def complete(gen_ai_endpoint, version, environment, application_name,
21
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ """
23
+ Generates a telemetry wrapper for chat to collect metrics.
24
+
25
+ Args:
26
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
+ version: Version of the monitoring package.
28
+ environment: Deployment environment (e.g., production, staging).
29
+ application_name: Name of the application using the Azure AI Inference API.
30
+ tracer: OpenTelemetry tracer for creating spans.
31
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
32
+ trace_content: Flag indicating whether to trace the actual content.
33
+
34
+ Returns:
35
+ A function that wraps the chat method to add telemetry.
36
+ """
37
+
38
+ def wrapper(wrapped, instance, args, kwargs):
39
+ """
40
+ Wraps the 'chat' API call to add telemetry.
41
+
42
+ This collects metrics such as execution time, cost, and token usage, and handles errors
43
+ gracefully, adding details to the trace for observability.
44
+
45
+ Args:
46
+ wrapped: The original 'chat' method to be wrapped.
47
+ instance: The instance of the class where the original method is defined.
48
+ args: Positional arguments for the 'chat' method.
49
+ kwargs: Keyword arguments for the 'chat' method.
50
+
51
+ Returns:
52
+ The response from the original 'chat' method.
53
+ """
54
+ # pylint: disable=no-else-return
55
+ if kwargs.get("stream", False) is True:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ def stream_generator():
58
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
+ # Placeholder for aggregating streaming response
60
+ llmresponse = ""
61
+
62
+ # Loop through streaming events capturing relevant details
63
+ for chunk in wrapped(*args, **kwargs):
64
+ if chunk.choices:
65
+ # Collect message IDs and aggregated response from events
66
+ content = chunk.choices[0].delta.content
67
+ if content:
68
+ llmresponse += content
69
+
70
+ yield chunk
71
+ response_id = chunk.id
72
+
73
+ # Handling exception ensure observability without disrupting operation
74
+ try:
75
+ # Format 'messages' into a single string
76
+ message_prompt = kwargs.get("messages", "")
77
+ formatted_messages = []
78
+ for message in message_prompt:
79
+ role = message["role"]
80
+ content = message["content"]
81
+
82
+ if isinstance(content, list):
83
+ content_str = ", ".join(
84
+ # pylint: disable=line-too-long
85
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
86
+ if "type" in item else f'text: {item["text"]}'
87
+ for item in content
88
+ )
89
+ formatted_messages.append(f"{role}: {content_str}")
90
+ else:
91
+ formatted_messages.append(f"{role}: {content}")
92
+ prompt = "\n".join(formatted_messages)
93
+
94
+ model = kwargs.get("model", "phi3-mini-4k")
95
+
96
+ # Calculate tokens using input prompt and aggregated response
97
+ input_tokens = general_tokens(prompt)
98
+ output_tokens = general_tokens(llmresponse)
99
+
100
+ total_tokens = input_tokens + output_tokens
101
+ # Calculate cost of the operation
102
+ cost = get_chat_model_cost(model,
103
+ pricing_info, input_tokens,
104
+ output_tokens)
105
+
106
+ # Set base span attribues
107
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
108
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
109
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
110
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
111
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
112
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
113
+ gen_ai_endpoint)
114
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
115
+ environment)
116
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
117
+ application_name)
118
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
119
+ model)
120
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
121
+ True)
122
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
123
+ kwargs.get("user", ""))
124
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
125
+ kwargs.get("top_p", 1.0))
126
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
127
+ kwargs.get("max_tokens", -1))
128
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
129
+ kwargs.get("temperature", 1.0))
130
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
131
+ kwargs.get("presence_penalty", 0.0))
132
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
133
+ kwargs.get("frequency_penalty", 0.0))
134
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
135
+ kwargs.get("seed", ""))
136
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
+ response_id)
138
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
139
+ input_tokens)
140
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
141
+ output_tokens)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
143
+ total_tokens)
144
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
145
+ cost)
146
+
147
+
148
+ if trace_content:
149
+ span.add_event(
150
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
151
+ attributes={
152
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
153
+ },
154
+ )
155
+ span.add_event(
156
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
157
+ attributes={
158
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
159
+ },
160
+ )
161
+
162
+ span.set_status(Status(StatusCode.OK))
163
+
164
+ if disable_metrics is False:
165
+ attributes = {
166
+ TELEMETRY_SDK_NAME:
167
+ "openlit",
168
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
169
+ application_name,
170
+ SemanticConvetion.GEN_AI_SYSTEM:
171
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
172
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
173
+ environment,
174
+ SemanticConvetion.GEN_AI_TYPE:
175
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
176
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
177
+ model
178
+ }
179
+
180
+ metrics["genai_requests"].add(1, attributes)
181
+ metrics["genai_total_tokens"].add(
182
+ total_tokens, attributes
183
+ )
184
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
185
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
186
+ metrics["genai_cost"].record(cost, attributes)
187
+
188
+ except Exception as e:
189
+ handle_exception(span, e)
190
+ logger.error("Error in trace creation: %s", e)
191
+
192
+ return stream_generator()
193
+ else:
194
+ # pylint: disable=line-too-long
195
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
196
+ response = wrapped(*args, **kwargs)
197
+
198
+ # print(instance._system_instruction.__dict__["_pb"].parts[0].text)
199
+ try:
200
+ # Format 'messages' into a single string
201
+ message_prompt = kwargs.get("messages", "")
202
+ formatted_messages = []
203
+ for message in message_prompt:
204
+ role = message["role"]
205
+ content = message["content"]
206
+
207
+ if isinstance(content, list):
208
+ content_str = ", ".join(
209
+ # pylint: disable=line-too-long
210
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
211
+ if "type" in item else f'text: {item["text"]}'
212
+ for item in content
213
+ )
214
+ formatted_messages.append(f"{role}: {content_str}")
215
+ else:
216
+ formatted_messages.append(f"{role}: {content}")
217
+ prompt = "\n".join(formatted_messages)
218
+
219
+ model = kwargs.get("model", "phi3-mini-4k")
220
+
221
+ # Set base span attribues
222
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
223
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
224
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
225
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
226
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
227
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
228
+ gen_ai_endpoint)
229
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
230
+ environment)
231
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
232
+ application_name)
233
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
234
+ model)
235
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
236
+ False)
237
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
238
+ kwargs.get("user", ""))
239
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
240
+ kwargs.get("top_p", 1.0))
241
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
242
+ kwargs.get("max_tokens", -1))
243
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
244
+ kwargs.get("temperature", 1.0))
245
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
246
+ kwargs.get("presence_penalty", 0.0))
247
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
248
+ kwargs.get("frequency_penalty", 0.0))
249
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
250
+ kwargs.get("seed", ""))
251
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
252
+ response.id)
253
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
254
+ [response.choices[0]["finish_reason"]])
255
+
256
+ if trace_content:
257
+ span.add_event(
258
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
259
+ attributes={
260
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
261
+ },
262
+ )
263
+ span.add_event(
264
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
265
+ attributes={
266
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
267
+ },
268
+ )
269
+
270
+ input_tokens = response.usage.prompt_tokens
271
+ output_tokens = response.usage.completion_tokens
272
+ total_tokens = response.usage.total_tokens
273
+ # Calculate cost of the operation
274
+ cost = get_chat_model_cost(model,
275
+ pricing_info, input_tokens, output_tokens)
276
+
277
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
278
+ input_tokens)
279
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
280
+ output_tokens)
281
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
282
+ total_tokens)
283
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
284
+ cost)
285
+
286
+ span.set_status(Status(StatusCode.OK))
287
+
288
+ if disable_metrics is False:
289
+ attributes = {
290
+ TELEMETRY_SDK_NAME:
291
+ "openlit",
292
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
293
+ application_name,
294
+ SemanticConvetion.GEN_AI_SYSTEM:
295
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
296
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
297
+ environment,
298
+ SemanticConvetion.GEN_AI_TYPE:
299
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
300
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
301
+ model
302
+ }
303
+
304
+ metrics["genai_requests"].add(1, attributes)
305
+ metrics["genai_total_tokens"].add(total_tokens, attributes)
306
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
307
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
308
+ metrics["genai_cost"].record(cost, attributes)
309
+
310
+ # Return original response
311
+ return response
312
+
313
+ except Exception as e:
314
+ handle_exception(span, e)
315
+ logger.error("Error in trace creation: %s", e)
316
+
317
+ # Return original response
318
+ return response
319
+
320
+ return wrapper
321
+
322
+ def embedding(gen_ai_endpoint, version, environment, application_name,
323
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
324
+ """
325
+ Generates a telemetry wrapper for embeddings to collect metrics.
326
+
327
+ Args:
328
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
329
+ version: Version of the monitoring package.
330
+ environment: Deployment environment (e.g., production, staging).
331
+ application_name: Name of the application using the Azure AI Inference API.
332
+ tracer: OpenTelemetry tracer for creating spans.
333
+ pricing_info: Information used for calculating the cost of Azure AI Inference usage.
334
+ trace_content: Flag indicating whether to trace the actual content.
335
+
336
+ Returns:
337
+ A function that wraps the embeddings method to add telemetry.
338
+ """
339
+
340
+ def wrapper(wrapped, instance, args, kwargs):
341
+ """
342
+ Wraps the 'embeddings' API call to add telemetry.
343
+
344
+ This collects metrics such as execution time, cost, and token usage, and handles errors
345
+ gracefully, adding details to the trace for observability.
346
+
347
+ Args:
348
+ wrapped: The original 'embeddings' method to be wrapped.
349
+ instance: The instance of the class where the original method is defined.
350
+ args: Positional arguments for the 'embeddings' method.
351
+ kwargs: Keyword arguments for the 'embeddings' method.
352
+
353
+ Returns:
354
+ The response from the original 'embeddings' method.
355
+ """
356
+
357
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
358
+ response = wrapped(*args, **kwargs)
359
+
360
+ try:
361
+ # Calculate cost of the operation
362
+ cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
363
+ pricing_info, response.usage.prompt_tokens)
364
+
365
+ # Set Span attributes
366
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
367
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
368
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
369
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
370
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
371
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
372
+ gen_ai_endpoint)
373
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
374
+ environment)
375
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
376
+ application_name)
377
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
378
+ kwargs.get("model", "text-embedding-ada-002"))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
380
+ kwargs.get("encoding_format", "float"))
381
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
382
+ kwargs.get("dimensions", ""))
383
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
384
+ kwargs.get("user", ""))
385
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
386
+ response.usage.prompt_tokens)
387
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
388
+ response.usage.total_tokens)
389
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
390
+ cost)
391
+ if trace_content:
392
+ span.add_event(
393
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
394
+ attributes={
395
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
396
+ },
397
+ )
398
+
399
+ span.set_status(Status(StatusCode.OK))
400
+
401
+ if disable_metrics is False:
402
+ attributes = {
403
+ TELEMETRY_SDK_NAME:
404
+ "openlit",
405
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
406
+ application_name,
407
+ SemanticConvetion.GEN_AI_SYSTEM:
408
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
409
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
410
+ environment,
411
+ SemanticConvetion.GEN_AI_TYPE:
412
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
413
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
414
+ kwargs.get("model", "text-embedding-ada-002")
415
+ }
416
+
417
+ metrics["genai_requests"].add(1, attributes)
418
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
419
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
420
+ metrics["genai_cost"].record(cost, attributes)
421
+
422
+ # Return original response
423
+ return response
424
+
425
+ except Exception as e:
426
+ handle_exception(span, e)
427
+ logger.error("Error in trace creation: %s", e)
428
+
429
+ # Return original response
430
+ return response
431
+
432
+ return wrapper
@@ -32,7 +32,7 @@ class GoogleAIStudioInstrumentor(BaseInstrumentor):
32
32
  pricing_info = kwargs.get("pricing_info", {})
33
33
  trace_content = kwargs.get("trace_content", False)
34
34
  disable_metrics = kwargs.get("disable_metrics")
35
- version = importlib.metadata.version("ollama")
35
+ version = importlib.metadata.version("google-generativeai")
36
36
 
37
37
  # sync generate
38
38
  wrap_function_wrapper(
@@ -1,6 +1,6 @@
1
1
  # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
2
  """
3
- Module for monitoring Ollama API calls.
3
+ Module for monitoring Google AI Studio API calls.
4
4
  """
5
5
 
6
6
  import logging
@@ -24,9 +24,9 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
24
24
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
25
  version: Version of the monitoring package.
26
26
  environment: Deployment environment (e.g., production, staging).
27
- application_name: Name of the application using the Ollama API.
27
+ application_name: Name of the application using the Google AI Studio API.
28
28
  tracer: OpenTelemetry tracer for creating spans.
29
- pricing_info: Information used for calculating the cost of Ollama usage.
29
+ pricing_info: Information used for calculating the cost of Google AI Studio usage.
30
30
  trace_content: Flag indicating whether to trace the actual content.
31
31
 
32
32
  Returns:
@@ -81,10 +81,12 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
81
81
  model = instance._model_id
82
82
  if hasattr(instance, "_model_name"):
83
83
  model = instance._model_name.replace("publishers/google/models/", "")
84
+ if model.startswith("models/"):
85
+ model = model[len("models/"):]
84
86
 
85
87
  total_tokens = input_tokens + output_tokens
86
88
  # Calculate cost of the operation
87
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
89
+ cost = get_chat_model_cost(model,
88
90
  pricing_info, input_tokens,
89
91
  output_tokens)
90
92
 
@@ -174,6 +176,8 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
174
176
  model = instance._model_id
175
177
  if hasattr(instance, "_model_name"):
176
178
  model = instance._model_name.replace("publishers/google/models/", "")
179
+ if model.startswith("models/"):
180
+ model = model[len("models/"):]
177
181
 
178
182
  # Set base span attribues
179
183
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
@@ -210,7 +214,7 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
210
214
  completion_tokens = response.usage_metadata.candidates_token_count
211
215
  total_tokens = response.usage_metadata.total_token_count
212
216
  # Calculate cost of the operation
213
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
217
+ cost = get_chat_model_cost(model,
214
218
  pricing_info, prompt_tokens, completion_tokens)
215
219
 
216
220
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
@@ -231,13 +235,13 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
231
235
  SemanticConvetion.GEN_AI_APPLICATION_NAME:
232
236
  application_name,
233
237
  SemanticConvetion.GEN_AI_SYSTEM:
234
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
238
+ SemanticConvetion.GEN_AI_SYSTEM_GOOGLE_AI_STUDIO,
235
239
  SemanticConvetion.GEN_AI_ENVIRONMENT:
236
240
  environment,
237
241
  SemanticConvetion.GEN_AI_TYPE:
238
242
  SemanticConvetion.GEN_AI_TYPE_CHAT,
239
243
  SemanticConvetion.GEN_AI_REQUEST_MODEL:
240
- kwargs.get("model", "llama3")
244
+ model
241
245
  }
242
246
 
243
247
  metrics["genai_requests"].add(1, attributes)
@@ -1,6 +1,6 @@
1
1
  # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
2
  """
3
- Module for monitoring Ollama API calls.
3
+ Module for monitoring Google AI Studio API calls.
4
4
  """
5
5
 
6
6
  import logging
@@ -24,9 +24,9 @@ def generate(gen_ai_endpoint, version, environment, application_name,
24
24
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
25
25
  version: Version of the monitoring package.
26
26
  environment: Deployment environment (e.g., production, staging).
27
- application_name: Name of the application using the Ollama API.
27
+ application_name: Name of the application using the Google AI Studio API.
28
28
  tracer: OpenTelemetry tracer for creating spans.
29
- pricing_info: Information used for calculating the cost of Ollama usage.
29
+ pricing_info: Information used for calculating the cost of Google AI Studio usage.
30
30
  trace_content: Flag indicating whether to trace the actual content.
31
31
 
32
32
  Returns:
@@ -81,10 +81,12 @@ def generate(gen_ai_endpoint, version, environment, application_name,
81
81
  model = instance._model_id
82
82
  if hasattr(instance, "_model_name"):
83
83
  model = instance._model_name.replace("publishers/google/models/", "")
84
+ if model.startswith("models/"):
85
+ model = model[len("models/"):]
84
86
 
85
87
  total_tokens = input_tokens + output_tokens
86
88
  # Calculate cost of the operation
87
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
89
+ cost = get_chat_model_cost(model,
88
90
  pricing_info, input_tokens,
89
91
  output_tokens)
90
92
 
@@ -176,6 +178,8 @@ def generate(gen_ai_endpoint, version, environment, application_name,
176
178
  model = instance._model_id
177
179
  if hasattr(instance, "_model_name"):
178
180
  model = instance._model_name.replace("publishers/google/models/", "")
181
+ if model.startswith("models/"):
182
+ model = model[len("models/"):]
179
183
 
180
184
  # Set base span attribues
181
185
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
@@ -212,7 +216,7 @@ def generate(gen_ai_endpoint, version, environment, application_name,
212
216
  completion_tokens = response.usage_metadata.candidates_token_count
213
217
  total_tokens = response.usage_metadata.total_token_count
214
218
  # Calculate cost of the operation
215
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
219
+ cost = get_chat_model_cost(model,
216
220
  pricing_info, prompt_tokens, completion_tokens)
217
221
 
218
222
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
@@ -233,13 +237,13 @@ def generate(gen_ai_endpoint, version, environment, application_name,
233
237
  SemanticConvetion.GEN_AI_APPLICATION_NAME:
234
238
  application_name,
235
239
  SemanticConvetion.GEN_AI_SYSTEM:
236
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
240
+ SemanticConvetion.GEN_AI_SYSTEM_GOOGLE_AI_STUDIO,
237
241
  SemanticConvetion.GEN_AI_ENVIRONMENT:
238
242
  environment,
239
243
  SemanticConvetion.GEN_AI_TYPE:
240
244
  SemanticConvetion.GEN_AI_TYPE_CHAT,
241
245
  SemanticConvetion.GEN_AI_REQUEST_MODEL:
242
- kwargs.get("model", "llama3")
246
+ model
243
247
  }
244
248
 
245
249
  metrics["genai_requests"].add(1, attributes)
@@ -103,6 +103,7 @@ class SemanticConvetion:
103
103
  GEN_AI_SYSTEM_ELEVENLABS = "elevenlabs"
104
104
  GEN_AI_SYSTEM_VLLM = "vLLM"
105
105
  GEN_AI_SYSTEM_GOOGLE_AI_STUDIO = "google-ai-studio"
106
+ GEN_AI_SYSTEM_AZURE_AI_INFERENCE = "azure-ai-inference"
106
107
  GEN_AI_SYSTEM_LANGCHAIN = "langchain"
107
108
  GEN_AI_SYSTEM_LLAMAINDEX = "llama_index"
108
109
  GEN_AI_SYSTEM_HAYSTACK = "haystack"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openlit
3
- Version: 1.20.0
3
+ Version: 1.22.0
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -54,23 +54,25 @@ This project adheres to the [Semantic Conventions](https://github.com/open-telem
54
54
 
55
55
  ## Auto Instrumentation Capabilities
56
56
 
57
- | LLMs | Vector DBs | Frameworks | GPUs |
58
- |-----------------------------------------------------------------|----------------------------------------------|----------------------------------------------|---------------|
59
- | [✅ OpenAI](https://docs.openlit.io/latest/integrations/openai) | [✅ ChromaDB](https://docs.openlit.io/latest/integrations/chromadb) | [✅ Langchain](https://docs.openlit.io/latest/integrations/langchain) | [✅ NVIDIA GPUs](https://docs.openlit.io/latest/integrations/nvidia-gpu) |
60
- | [✅ Ollama](https://docs.openlit.io/latest/integrations/ollama) | [✅ Pinecone](https://docs.openlit.io/latest/integrations/pinecone) | [✅ LiteLLM](https://docs.openlit.io/latest/integrations/litellm) | |
61
- | [✅ Anthropic](https://docs.openlit.io/latest/integrations/anthropic) | [✅ Qdrant](https://docs.openlit.io/latest/integrations/qdrant) | [✅ LlamaIndex](https://docs.openlit.io/latest/integrations/llama-index) | |
62
- | [✅ GPT4All](https://docs.openlit.io/latest/integrations/gpt4all) | [✅ Milvus](https://docs.openlit.io/latest/integrations/milvus) | [✅ Haystack](https://docs.openlit.io/latest/integrations/haystack) | |
63
- | [✅ Cohere](https://docs.openlit.io/latest/integrations/cohere) | | [✅ EmbedChain](https://docs.openlit.io/latest/integrations/embedchain) | |
64
- | [✅ Mistral](https://docs.openlit.io/latest/integrations/mistral) | | [✅ Guardrails](https://docs.openlit.io/latest/integrations/guardrails) | |
65
- | [✅ Azure OpenAI](https://docs.openlit.io/latest/integrations/azure-openai) | | | |
66
- | [✅ HuggingFace Transformers](https://docs.openlit.io/latest/integrations/huggingface) | | | |
67
- | [✅ Amazon Bedrock](https://docs.openlit.io/latest/integrations/bedrock) | | | |
68
- | [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai) | | | |
69
- | [✅ Groq](https://docs.openlit.io/latest/integrations/groq) | | | |
70
- | [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs) | | | |
71
- | [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm) | | | |
72
- | [✅ OLA Krutrim](https://docs.openlit.io/latest/integrations/krutrim) | | | |
73
- | [✅ Google AI Studio](https://docs.openlit.io/latest/integrations/google-ai-studio) | | | |
57
+ | LLMs | Vector DBs | Frameworks | GPUs |
58
+ |--------------------------------------------------------------------------|----------------------------------------------|----------------------------------------------|---------------|
59
+ | [✅ OpenAI](https://docs.openlit.io/latest/integrations/openai) | [✅ ChromaDB](https://docs.openlit.io/latest/integrations/chromadb) | [✅ Langchain](https://docs.openlit.io/latest/integrations/langchain) | [✅ NVIDIA GPUs](https://docs.openlit.io/latest/integrations/nvidia-gpu) |
60
+ | [✅ Ollama](https://docs.openlit.io/latest/integrations/ollama) | [✅ Pinecone](https://docs.openlit.io/latest/integrations/pinecone) | [✅ LiteLLM](https://docs.openlit.io/latest/integrations/litellm) | |
61
+ | [✅ Anthropic](https://docs.openlit.io/latest/integrations/anthropic) | [✅ Qdrant](https://docs.openlit.io/latest/integrations/qdrant) | [✅ LlamaIndex](https://docs.openlit.io/latest/integrations/llama-index) | |
62
+ | [✅ GPT4All](https://docs.openlit.io/latest/integrations/gpt4all) | [✅ Milvus](https://docs.openlit.io/latest/integrations/milvus) | [✅ Haystack](https://docs.openlit.io/latest/integrations/haystack) | |
63
+ | [✅ Cohere](https://docs.openlit.io/latest/integrations/cohere) | | [✅ EmbedChain](https://docs.openlit.io/latest/integrations/embedchain) | |
64
+ | [✅ Mistral](https://docs.openlit.io/latest/integrations/mistral) | | [✅ Guardrails](https://docs.openlit.io/latest/integrations/guardrails) | |
65
+ | [✅ Azure OpenAI](https://docs.openlit.io/latest/integrations/azure-openai) | | | |
66
+ | [✅ Azure AI Inference](https://docs.openlit.io/latest/integrations/azure-ai-inference) | | | |
67
+ | [✅ GitHub AI Models](https://docs.openlit.io/latest/integrations/github-models) | | | |
68
+ | [✅ HuggingFace Transformers](https://docs.openlit.io/latest/integrations/huggingface) | | | |
69
+ | [✅ Amazon Bedrock](https://docs.openlit.io/latest/integrations/bedrock) | | | |
70
+ | [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai) | | | |
71
+ | [✅ Groq](https://docs.openlit.io/latest/integrations/groq) | | | |
72
+ | [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs) | | | |
73
+ | [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm) | | | |
74
+ | [✅ OLA Krutrim](https://docs.openlit.io/latest/integrations/krutrim) | | | |
75
+ | [✅ Google AI Studio](https://docs.openlit.io/latest/integrations/google-ai-studio) | | | |
74
76
 
75
77
  ## Supported Destinations
76
78
  - [✅ OpenTelemetry Collector](https://docs.openlit.io/latest/connections/otelcol)
@@ -1,8 +1,11 @@
1
1
  openlit/__helpers.py,sha256=lrn4PBs9owDudiCY2NBoVbAi7AU_HtUpyOj0oqPBsPY,5545
2
- openlit/__init__.py,sha256=fqeMFg76VYEX4VdxTn76Fh-0kbrN5_Sk8ahlm7Xlp9k,15296
2
+ openlit/__init__.py,sha256=w3xpFptltMR5TgYWz3ADfvWGpIH32CdBBbcIFFGc8vc,15498
3
3
  openlit/instrumentation/anthropic/__init__.py,sha256=oaU53BOPyfUKbEzYvLr1DPymDluurSnwo4Hernf2XdU,1955
4
4
  openlit/instrumentation/anthropic/anthropic.py,sha256=y7CEGhKOGHWt8G_5Phr4qPJTfPGRJIAr9Yk6nM3CcvM,16775
5
5
  openlit/instrumentation/anthropic/async_anthropic.py,sha256=Zz1KRKIG9wGn0quOoLvjORC-49IvHQpJ6GBdB-4PfCQ,16816
6
+ openlit/instrumentation/azure_ai_inference/__init__.py,sha256=Xl_4hjQeXcA-NgkqwTbs1ejPKRRnQXsDErXfFIz0z7U,2699
7
+ openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py,sha256=uZLOW1iD5pKZeQ4Mg-Mvmt9aWc2WrLiWlZn2dKbX0X4,22832
8
+ openlit/instrumentation/azure_ai_inference/azure_ai_inference.py,sha256=Hz-WCzlzM27hT4Rw4uNiMDBit2YB9vAyq6m0gkNwo4A,22778
6
9
  openlit/instrumentation/bedrock/__init__.py,sha256=DLLYio4S4gUzRElqNRT8WMKzM79HZwOBVjXfJI4BfaA,1545
7
10
  openlit/instrumentation/bedrock/bedrock.py,sha256=HqRZeiAFeNdlhlnt4DSLda8qkMP3nPKq_zhdxDssXmY,9498
8
11
  openlit/instrumentation/chroma/__init__.py,sha256=61lFpHlUEQUobsUJZHXdvOViKwsOH8AOvSfc4VgCmiM,3253
@@ -14,9 +17,9 @@ openlit/instrumentation/elevenlabs/async_elevenlabs.py,sha256=yMYACh95SFr5EYklKn
14
17
  openlit/instrumentation/elevenlabs/elevenlabs.py,sha256=mFnD7sgT47OxaXJz0Vc1nrNjXEpcGQDj5run3gA48Lw,6089
15
18
  openlit/instrumentation/embedchain/__init__.py,sha256=8TYk1OEbz46yF19dr-gB_x80VZMagU3kJ8-QihPXTeA,1929
16
19
  openlit/instrumentation/embedchain/embedchain.py,sha256=SLlr7qieT3kp4M6OYSRy8FaVCXQ2t3oPyIiE99ioNE4,7892
17
- openlit/instrumentation/google_ai_studio/__init__.py,sha256=vG4WzaavOiiwI3r5stDMotM5TSBxdcxQhC3W4XjJIG8,2146
18
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py,sha256=cEy5FNu92gvdmsKKVDrUcY5LpY4hHzWwJl9yezxN-Zo,13404
19
- openlit/instrumentation/google_ai_studio/google_ai_studio.py,sha256=mLABvD756GEgVfAjiYsfLYWATT1AxPuWDuElwpX4voo,13453
20
+ openlit/instrumentation/google_ai_studio/__init__.py,sha256=rhHbEJbDQ-nH8y3AXzzyqNxcunR0ZEqR2RIstM55-Ms,2159
21
+ openlit/instrumentation/google_ai_studio/async_google_ai_studio.py,sha256=20MHsp-tAONxOtmCFg5WDvktTdRce5CyH3_9w0b_AqI,13587
22
+ openlit/instrumentation/google_ai_studio/google_ai_studio.py,sha256=vIJjzl5Fkgsf3vfaqmxhtSFvOpXK-wGC-JFhEXGP50M,13636
20
23
  openlit/instrumentation/gpt4all/__init__.py,sha256=-59CP2B3-HGZJ_vC-fI9Dt-0BuQXRhSCWCjnaGeU15Q,1802
21
24
  openlit/instrumentation/gpt4all/gpt4all.py,sha256=dbxqZeuTrv_y6wyDOIEmC8-Dc4iCGgLpj3l5JiodLMI,18787
22
25
  openlit/instrumentation/gpu/__init__.py,sha256=Dj2MLar0DB20-t6W3pfR-3jfR_mwg4SYwhzIrH_n9sU,5596
@@ -55,8 +58,8 @@ openlit/instrumentation/vllm/__init__.py,sha256=OVWalQ1dXvip1DUsjUGaHX4J-2FrSp-T
55
58
  openlit/instrumentation/vllm/vllm.py,sha256=lDzM7F5pgxvh8nKL0dcKB4TD0Mc9wXOWeXOsOGN7Wd8,6527
56
59
  openlit/otel/metrics.py,sha256=O7NoaDz0bY19mqpE4-0PcKwEe-B-iJFRgOCaanAuZAc,4291
57
60
  openlit/otel/tracing.py,sha256=vL1ifMbARPBpqK--yXYsCM6y5dSu5LFIKqkhZXtYmUc,3712
58
- openlit/semcov/__init__.py,sha256=56daxsJtWFqp87TTT3R5OxSeMH7eOZqb4k-7AELirTI,7554
59
- openlit-1.20.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- openlit-1.20.0.dist-info/METADATA,sha256=DvK9Dt8ZvBBvdPc_Fdbi7HxkCu9Kp-Z4IkzCOuynkg4,14934
61
- openlit-1.20.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
62
- openlit-1.20.0.dist-info/RECORD,,
61
+ openlit/semcov/__init__.py,sha256=wpAarrnkndbgvP8VSudi8IRInYtD02hkewqjyiC0dMk,7614
62
+ openlit-1.22.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ openlit-1.22.0.dist-info/METADATA,sha256=rSCNSQss7NahZ5Vnq028vzvKxmhsXuOxGvQt5n5Ock0,15489
64
+ openlit-1.22.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
65
+ openlit-1.22.0.dist-info/RECORD,,