openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.7.dist-info/RECORD +0 -122
  78. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,900 +0,0 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
- """
3
- Module for monitoring Azure OpenAI API calls.
4
- """
5
-
6
- import logging
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost
10
- from openlit.__helpers import get_image_model_cost, openai_tokens, handle_exception
11
- from openlit.semcov import SemanticConvetion
12
-
13
- # Initialize logger for logging potential issues and operations
14
- logger = logging.getLogger(__name__)
15
-
16
- def azure_async_chat_completions(gen_ai_endpoint, version, environment, application_name,
17
- tracer, pricing_info, trace_content, metrics, disable_metrics):
18
- """
19
- Generates a telemetry wrapper for chat completions to collect metrics.
20
-
21
- Args:
22
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
23
- version: Version of the monitoring package.
24
- environment: Deployment environment (e.g., production, staging).
25
- application_name: Name of the application using the OpenAI API.
26
- tracer: OpenTelemetry tracer for creating spans.
27
- pricing_info: Information used for calculating the cost of OpenAI usage.
28
- trace_content: Flag indicating whether to trace the actual content.
29
-
30
- Returns:
31
- A function that wraps the chat completions method to add telemetry.
32
- """
33
-
34
- async def wrapper(wrapped, instance, args, kwargs):
35
- """
36
- Wraps the 'chat.completions' API call to add telemetry.
37
-
38
- This collects metrics such as execution time, cost, and token usage, and handles errors
39
- gracefully, adding details to the trace for observability.
40
-
41
- Args:
42
- wrapped: The original 'chat.completions' method to be wrapped.
43
- instance: The instance of the class where the original method is defined.
44
- args: Positional arguments for the 'chat.completions' method.
45
- kwargs: Keyword arguments for the 'chat.completions' method.
46
-
47
- Returns:
48
- The response from the original 'chat.completions' method.
49
- """
50
-
51
- # Check if streaming is enabled for the API call
52
- streaming = kwargs.get("stream", False)
53
-
54
- # pylint: disable=no-else-return
55
- if streaming:
56
- # Special handling for streaming response to accommodate the nature of data flow
57
- async def stream_generator():
58
- # pylint: disable=line-too-long
59
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
60
- # Placeholder for aggregating streaming response
61
- llmresponse = ""
62
-
63
- # Loop through streaming events capturing relevant details
64
- async for chunk in await wrapped(*args, **kwargs):
65
- # Collect message IDs and aggregated response from events
66
- if len(chunk.choices) > 0:
67
- # pylint: disable=line-too-long
68
- if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
69
- content = chunk.choices[0].delta.content
70
- if content:
71
- llmresponse += content
72
- yield chunk
73
- response_id = chunk.id
74
- model = "azure_" + chunk.model
75
-
76
- # Handling exception ensure observability without disrupting operation
77
- try:
78
- # Format 'messages' into a single string
79
- message_prompt = kwargs.get("messages", "")
80
- formatted_messages = []
81
- for message in message_prompt:
82
- role = message["role"]
83
- content = message["content"]
84
-
85
- if isinstance(content, list):
86
- content_str = ", ".join(
87
- # pylint: disable=line-too-long
88
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
89
- if "type" in item else f'text: {item["text"]}'
90
- for item in content
91
- )
92
- formatted_messages.append(f"{role}: {content_str}")
93
- else:
94
- formatted_messages.append(f"{role}: {content}")
95
- prompt = "\n".join(formatted_messages)
96
-
97
- # Calculate tokens using input prompt and aggregated response
98
- prompt_tokens = openai_tokens(prompt,
99
- kwargs.get("model", "gpt-3.5-turbo"))
100
- completion_tokens = openai_tokens(llmresponse,
101
- kwargs.get("model", "gpt-3.5-turbo"))
102
-
103
- # Calculate cost of the operation
104
- cost = get_chat_model_cost(model, pricing_info,
105
- prompt_tokens, completion_tokens)
106
-
107
- # Set Span attributes
108
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
109
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
110
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
111
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
112
- SemanticConvetion.GEN_AI_TYPE_CHAT)
113
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
114
- gen_ai_endpoint)
115
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
116
- response_id)
117
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
118
- environment)
119
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
120
- application_name)
121
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
122
- model)
123
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
124
- kwargs.get("user", ""))
125
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
126
- kwargs.get("tool_choice", ""))
127
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
128
- kwargs.get("temperature", 1.0))
129
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
130
- kwargs.get("presence_penalty", 0.0))
131
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
132
- kwargs.get("frequency_penalty", 0.0))
133
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
134
- kwargs.get("seed", ""))
135
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
136
- True)
137
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
138
- prompt_tokens)
139
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
140
- completion_tokens)
141
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
142
- prompt_tokens + completion_tokens)
143
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
144
- cost)
145
- if trace_content:
146
- span.add_event(
147
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
148
- attributes={
149
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
150
- },
151
- )
152
- span.add_event(
153
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
154
- attributes={
155
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
156
- },
157
- )
158
-
159
- span.set_status(Status(StatusCode.OK))
160
-
161
- if disable_metrics is False:
162
- attributes = {
163
- TELEMETRY_SDK_NAME:
164
- "openlit",
165
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
166
- application_name,
167
- SemanticConvetion.GEN_AI_SYSTEM:
168
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
169
- SemanticConvetion.GEN_AI_ENVIRONMENT:
170
- environment,
171
- SemanticConvetion.GEN_AI_TYPE:
172
- SemanticConvetion.GEN_AI_TYPE_CHAT,
173
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
174
- model
175
- }
176
-
177
- metrics["genai_requests"].add(1, attributes)
178
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
179
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
180
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
181
- metrics["genai_cost"].record(cost, attributes)
182
-
183
- except Exception as e:
184
- handle_exception(span, e)
185
- logger.error("Error in trace creation: %s", e)
186
-
187
- return stream_generator()
188
-
189
- # Handling for non-streaming responses
190
- else:
191
- # pylint: disable=line-too-long
192
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
193
- response = await wrapped(*args, **kwargs)
194
-
195
- try:
196
- # Find base model from response
197
- model = "azure_" + response.model
198
-
199
- # Format 'messages' into a single string
200
- message_prompt = kwargs.get("messages", "")
201
- formatted_messages = []
202
- for message in message_prompt:
203
- role = message["role"]
204
- content = message["content"]
205
-
206
- if isinstance(content, list):
207
- content_str = ", ".join(
208
- # pylint: disable=line-too-long
209
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
210
- if "type" in item else f'text: {item["text"]}'
211
- for item in content
212
- )
213
- formatted_messages.append(f"{role}: {content_str}")
214
- else:
215
- formatted_messages.append(f"{role}: {content}")
216
- prompt = "\n".join(formatted_messages)
217
-
218
- # Set base span attribues
219
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
220
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
221
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
222
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
223
- SemanticConvetion.GEN_AI_TYPE_CHAT)
224
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
225
- gen_ai_endpoint)
226
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
227
- response.id)
228
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
229
- environment)
230
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
231
- application_name)
232
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
233
- model)
234
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
235
- kwargs.get("user", ""))
236
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
237
- kwargs.get("tool_choice", ""))
238
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
239
- kwargs.get("temperature", 1.0))
240
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
241
- kwargs.get("presence_penalty", 0.0))
242
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
243
- kwargs.get("frequency_penalty", 0.0))
244
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
245
- kwargs.get("seed", ""))
246
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
247
- False)
248
- if trace_content:
249
- span.add_event(
250
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
251
- attributes={
252
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
253
- },
254
- )
255
-
256
- # Set span attributes when tools is not passed to the function call
257
- if "tools" not in kwargs:
258
- # Calculate cost of the operation
259
- cost = get_chat_model_cost(model, pricing_info,
260
- response.usage.prompt_tokens,
261
- response.usage.completion_tokens)
262
-
263
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
264
- response.usage.prompt_tokens)
265
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
266
- response.usage.completion_tokens)
267
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
268
- response.usage.total_tokens)
269
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
270
- [response.choices[0].finish_reason])
271
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
272
- cost)
273
-
274
- # Set span attributes for when n = 1 (default)
275
- if "n" not in kwargs or kwargs["n"] == 1:
276
- if trace_content:
277
- span.add_event(
278
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
279
- attributes={
280
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
281
- },
282
- )
283
-
284
- # Set span attributes for when n > 0
285
- else:
286
- i = 0
287
- while i < kwargs["n"] and trace_content is True:
288
- attribute_name = f"gen_ai.content.completion.{i}"
289
- span.add_event(
290
- name=attribute_name,
291
- attributes={
292
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[i].message.content,
293
- },
294
- )
295
- i += 1
296
-
297
- # Return original response
298
- return response
299
-
300
- # Set span attributes when tools is passed to the function call
301
- elif "tools" in kwargs:
302
- # Calculate cost of the operation
303
- cost = get_chat_model_cost(model, pricing_info,
304
- response.usage.prompt_tokens,
305
- response.usage.completion_tokens)
306
-
307
- span.add_event(
308
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
309
- attributes={
310
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
311
- },
312
- )
313
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
314
- response.usage.prompt_tokens)
315
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
316
- response.usage.completion_tokens)
317
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
318
- response.usage.total_tokens)
319
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
320
- cost)
321
-
322
- span.set_status(Status(StatusCode.OK))
323
-
324
- if disable_metrics is False:
325
- attributes = {
326
- TELEMETRY_SDK_NAME:
327
- "openlit",
328
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
329
- application_name,
330
- SemanticConvetion.GEN_AI_SYSTEM:
331
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
332
- SemanticConvetion.GEN_AI_ENVIRONMENT:
333
- environment,
334
- SemanticConvetion.GEN_AI_TYPE:
335
- SemanticConvetion.GEN_AI_TYPE_CHAT,
336
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
337
- model
338
- }
339
-
340
- metrics["genai_requests"].add(1, attributes)
341
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
342
- metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
343
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
344
- metrics["genai_cost"].record(cost, attributes)
345
-
346
- # Return original response
347
- return response
348
-
349
- except Exception as e:
350
- handle_exception(span, e)
351
- logger.error("Error in trace creation: %s", e)
352
-
353
- # Return original response
354
- return response
355
-
356
- return wrapper
357
-
358
- def azure_async_completions(gen_ai_endpoint, version, environment, application_name,
359
- tracer, pricing_info, trace_content, metrics, disable_metrics):
360
- """
361
- Generates a telemetry wrapper for completions to collect metrics.
362
-
363
- Args:
364
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
365
- version: Version of the monitoring package.
366
- environment: Deployment environment (e.g., production, staging).
367
- application_name: Name of the application using the OpenAI API.
368
- tracer: OpenTelemetry tracer for creating spans.
369
- pricing_info: Information used for calculating the cost of OpenAI usage.
370
- trace_content: Flag indicating whether to trace the actual content.
371
-
372
- Returns:
373
- A function that wraps the chat completions method to add telemetry.
374
- """
375
-
376
- async def wrapper(wrapped, instance, args, kwargs):
377
- """
378
- Wraps the 'completions' API call to add telemetry.
379
-
380
- This collects metrics such as execution time, cost, and token usage, and handles errors
381
- gracefully, adding details to the trace for observability.
382
-
383
- Args:
384
- wrapped: The original 'completions' method to be wrapped.
385
- instance: The instance of the class where the original method is defined.
386
- args: Positional arguments for the 'completions' method.
387
- kwargs: Keyword arguments for the 'completions' method.
388
-
389
- Returns:
390
- The response from the original 'chat.completions' method.
391
- """
392
-
393
- # Check if streaming is enabled for the API call
394
- streaming = kwargs.get("stream", False)
395
-
396
- # pylint: disable=no-else-return
397
- if streaming:
398
- # Special handling for streaming response to accommodate the nature of data flow
399
- async def stream_generator():
400
- # pylint: disable=line-too-long
401
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
402
- # Placeholder for aggregating streaming response
403
- llmresponse = ""
404
-
405
- # Loop through streaming events capturing relevant details
406
- async for chunk in await wrapped(*args, **kwargs):
407
- # Collect message IDs and aggregated response from events
408
- if len(chunk.choices) > 0:
409
- if hasattr(chunk.choices[0], "text"):
410
- content = chunk.choices[0].text
411
- if content:
412
- llmresponse += content
413
- yield chunk
414
- response_id = chunk.id
415
- model = "azure_" + chunk.model
416
-
417
- # Handling exception ensure observability without disrupting operation
418
- try:
419
- prompt = kwargs.get("prompt", "")
420
-
421
- # Calculate tokens using input prompt and aggregated response
422
- prompt_tokens = openai_tokens(prompt,
423
- "gpt-3.5-turbo")
424
- completion_tokens = openai_tokens(llmresponse,
425
- "gpt-3.5-turbo")
426
-
427
- # Calculate cost of the operation
428
- cost = get_chat_model_cost(model, pricing_info,
429
- prompt_tokens, completion_tokens)
430
-
431
- # Set Span attributes
432
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
433
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
434
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
435
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
436
- SemanticConvetion.GEN_AI_TYPE_CHAT)
437
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
438
- gen_ai_endpoint)
439
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
440
- response_id)
441
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
442
- environment)
443
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
444
- application_name)
445
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
446
- model)
447
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
448
- kwargs.get("user", ""))
449
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
450
- kwargs.get("tool_choice", ""))
451
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
452
- kwargs.get("temperature", 1.0))
453
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
454
- kwargs.get("presence_penalty", 0.0))
455
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
456
- kwargs.get("frequency_penalty", 0.0))
457
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
458
- kwargs.get("seed", ""))
459
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
460
- True)
461
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
462
- prompt_tokens)
463
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
464
- completion_tokens)
465
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
466
- prompt_tokens + completion_tokens)
467
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
468
- cost)
469
- if trace_content:
470
- span.add_event(
471
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
472
- attributes={
473
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
474
- },
475
- )
476
- span.add_event(
477
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
478
- attributes={
479
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
480
- },
481
- )
482
-
483
- span.set_status(Status(StatusCode.OK))
484
-
485
- if disable_metrics is False:
486
- attributes = {
487
- TELEMETRY_SDK_NAME:
488
- "openlit",
489
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
490
- application_name,
491
- SemanticConvetion.GEN_AI_SYSTEM:
492
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
493
- SemanticConvetion.GEN_AI_ENVIRONMENT:
494
- environment,
495
- SemanticConvetion.GEN_AI_TYPE:
496
- SemanticConvetion.GEN_AI_TYPE_CHAT,
497
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
498
- model
499
- }
500
-
501
- metrics["genai_requests"].add(1, attributes)
502
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
503
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
504
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
505
- metrics["genai_cost"].record(cost, attributes)
506
-
507
- except Exception as e:
508
- handle_exception(span, e)
509
- logger.error("Error in trace creation: %s", e)
510
-
511
- return stream_generator()
512
-
513
- # Handling for non-streaming responses
514
- else:
515
- # pylint: disable=line-too-long
516
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
517
- response = wrapped(*args, **kwargs)
518
-
519
- try:
520
- # Find base model from response
521
- model = "azure_" + response.model
522
-
523
- # Set base span attribues
524
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
525
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
526
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
527
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
528
- SemanticConvetion.GEN_AI_TYPE_CHAT)
529
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
530
- gen_ai_endpoint)
531
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
532
- response.id)
533
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
534
- environment)
535
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
536
- application_name)
537
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
538
- model)
539
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
540
- kwargs.get("user", ""))
541
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
542
- kwargs.get("tool_choice", ""))
543
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
544
- kwargs.get("temperature", 1.0))
545
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
546
- kwargs.get("presence_penalty", 0.0))
547
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
548
- kwargs.get("frequency_penalty", 0.0))
549
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
550
- kwargs.get("seed", ""))
551
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
552
- False)
553
- if trace_content:
554
- span.add_event(
555
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
556
- attributes={
557
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
558
- },
559
- )
560
-
561
- # Set span attributes when tools is not passed to the function call
562
- if "tools" not in kwargs:
563
- # Calculate cost of the operation
564
- cost = get_chat_model_cost(model, pricing_info,
565
- response.usage.prompt_tokens,
566
- response.usage.completion_tokens)
567
-
568
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
569
- response.usage.prompt_tokens)
570
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
571
- response.usage.completion_tokens)
572
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
573
- response.usage.total_tokens)
574
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
575
- [response.choices[0].finish_reason])
576
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
577
- cost)
578
-
579
- # Set span attributes for when n = 1 (default)
580
- if "n" not in kwargs or kwargs["n"] == 1:
581
- if trace_content:
582
- span.add_event(
583
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
584
- attributes={
585
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].text,
586
- },
587
- )
588
-
589
- # Set span attributes for when n > 0
590
- else:
591
- i = 0
592
- while i < kwargs["n"] and trace_content is True:
593
- attribute_name = f"gen_ai.content.completion.{i}"
594
- span.add_event(
595
- name=attribute_name,
596
- attributes={
597
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[i].text,
598
- },
599
- )
600
- i += 1
601
- return response
602
-
603
- # Set span attributes when tools is passed to the function call
604
- elif "tools" in kwargs:
605
- # Calculate cost of the operation
606
- cost = get_chat_model_cost(model, pricing_info,
607
- response.usage.prompt_tokens,
608
- response.usage.completion_tokens)
609
-
610
- span.add_event(
611
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
612
- attributes={
613
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
614
- },
615
- )
616
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
617
- response.usage.prompt_tokens)
618
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
619
- response.usage.completion_tokens)
620
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
621
- response.usage.total_tokens)
622
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
623
- cost)
624
-
625
- span.set_status(Status(StatusCode.OK))
626
-
627
- if disable_metrics is False:
628
- attributes = {
629
- TELEMETRY_SDK_NAME:
630
- "openlit",
631
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
632
- application_name,
633
- SemanticConvetion.GEN_AI_SYSTEM:
634
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
635
- SemanticConvetion.GEN_AI_ENVIRONMENT:
636
- environment,
637
- SemanticConvetion.GEN_AI_TYPE:
638
- SemanticConvetion.GEN_AI_TYPE_CHAT,
639
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
640
- model
641
- }
642
-
643
- metrics["genai_requests"].add(1, attributes)
644
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
645
- metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
646
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
647
- metrics["genai_cost"].record(cost, attributes)
648
-
649
- # Return original response
650
- return response
651
-
652
- except Exception as e:
653
- handle_exception(span, e)
654
- logger.error("Error in trace creation: %s", e)
655
-
656
- # Return original response
657
- return response
658
-
659
- return wrapper
660
-
661
- def azure_async_embedding(gen_ai_endpoint, version, environment, application_name,
662
- tracer, pricing_info, trace_content, metrics, disable_metrics):
663
- """
664
- Generates a telemetry wrapper for embeddings to collect metrics.
665
-
666
- Args:
667
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
668
- version: Version of the monitoring package.
669
- environment: Deployment environment (e.g., production, staging).
670
- application_name: Name of the application using the OpenAI API.
671
- tracer: OpenTelemetry tracer for creating spans.
672
- pricing_info: Information used for calculating the cost of OpenAI usage.
673
- trace_content: Flag indicating whether to trace the actual content.
674
-
675
- Returns:
676
- A function that wraps the embeddings method to add telemetry.
677
- """
678
-
679
- async def wrapper(wrapped, instance, args, kwargs):
680
- """
681
- Wraps the 'embeddings' API call to add telemetry.
682
-
683
- This collects metrics such as execution time, cost, and token usage, and handles errors
684
- gracefully, adding details to the trace for observability.
685
-
686
- Args:
687
- wrapped: The original 'embeddings' method to be wrapped.
688
- instance: The instance of the class where the original method is defined.
689
- args: Positional arguments for the 'embeddings' method.
690
- kwargs: Keyword arguments for the 'embeddings' method.
691
-
692
- Returns:
693
- The response from the original 'embeddings' method.
694
- """
695
-
696
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
697
- response = await wrapped(*args, **kwargs)
698
-
699
- try:
700
- # Calculate cost of the operation
701
- cost = get_embed_model_cost("azure_" + response.model,
702
- pricing_info, response.usage.prompt_tokens)
703
-
704
- # Set Span attributes
705
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
706
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
707
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
708
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
709
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
710
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
711
- gen_ai_endpoint)
712
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
713
- environment)
714
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
715
- application_name)
716
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
717
- "azure_" + response.model)
718
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
719
- kwargs.get("encoding_format", "float"))
720
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
721
- kwargs.get("dimensions", ""))
722
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
723
- kwargs.get("user", ""))
724
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
725
- response.usage.prompt_tokens)
726
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
727
- response.usage.total_tokens)
728
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
729
- if trace_content:
730
- span.add_event(
731
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
732
- attributes={
733
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
734
- },
735
- )
736
-
737
- span.set_status(Status(StatusCode.OK))
738
-
739
- if disable_metrics is False:
740
- attributes = {
741
- TELEMETRY_SDK_NAME:
742
- "openlit",
743
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
744
- application_name,
745
- SemanticConvetion.GEN_AI_SYSTEM:
746
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
747
- SemanticConvetion.GEN_AI_ENVIRONMENT:
748
- environment,
749
- SemanticConvetion.GEN_AI_TYPE:
750
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
751
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
752
- "azure_" + response.model
753
- }
754
-
755
- metrics["genai_requests"].add(1, attributes)
756
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
757
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
758
- metrics["genai_cost"].record(cost, attributes)
759
-
760
- # Return original response
761
- return response
762
-
763
- except Exception as e:
764
- handle_exception(span, e)
765
- logger.error("Error in trace creation: %s", e)
766
-
767
- # Return original response
768
- return response
769
-
770
- return wrapper
771
-
772
- def azure_async_image_generate(gen_ai_endpoint, version, environment, application_name,
773
- tracer, pricing_info, trace_content, metrics, disable_metrics):
774
- """
775
- Generates a telemetry wrapper for image generation to collect metrics.
776
-
777
- Args:
778
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
779
- version: Version of the monitoring package.
780
- environment: Deployment environment (e.g., production, staging).
781
- application_name: Name of the application using the OpenAI API.
782
- tracer: OpenTelemetry tracer for creating spans.
783
- pricing_info: Information used for calculating the cost of OpenAI image generation.
784
- trace_content: Flag indicating whether to trace the input prompt and generated images.
785
-
786
- Returns:
787
- A function that wraps the image generation method to add telemetry.
788
- """
789
-
790
- async def wrapper(wrapped, instance, args, kwargs):
791
- """
792
- Wraps the 'images.generate' API call to add telemetry.
793
-
794
- This collects metrics such as execution time, cost, and handles errors
795
- gracefully, adding details to the trace for observability.
796
-
797
- Args:
798
- wrapped: The original 'images.generate' method to be wrapped.
799
- instance: The instance of the class where the original method is defined.
800
- args: Positional arguments for the 'images.generate' method.
801
- kwargs: Keyword arguments for the 'images.generate' method.
802
-
803
- Returns:
804
- The response from the original 'images.generate' method.
805
- """
806
-
807
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
808
- response = await wrapped(*args, **kwargs)
809
- images_count = 0
810
-
811
- try:
812
- # Find Image format
813
- if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
814
- image = "b64_json"
815
- else:
816
- image = "url"
817
-
818
- # Calculate cost of the operation
819
- cost = get_image_model_cost("azure_" + kwargs.get("model", "dall-e-3"),
820
- pricing_info, kwargs.get("size", "1024x1024"),
821
- kwargs.get("quality", "standard"))
822
-
823
- for items in response.data:
824
- # Set Span attributes
825
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
826
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
827
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
828
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
829
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
830
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
831
- gen_ai_endpoint)
832
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
833
- response.created)
834
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
835
- environment)
836
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
837
- application_name)
838
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
839
- "azure_" + kwargs.get("model", "dall-e-3"))
840
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
841
- kwargs.get("size", "1024x1024"))
842
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
843
- kwargs.get("quality", "standard"))
844
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_STYLE,
845
- kwargs.get("style", "vivid"))
846
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_REVISED_PROMPT,
847
- items.revised_prompt if response.revised_prompt else "")
848
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
849
- kwargs.get("user", ""))
850
- if trace_content:
851
- span.add_event(
852
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
853
- attributes={
854
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
855
- },
856
- )
857
- attribute_name = f"gen_ai.response.image.{images_count}"
858
- span.add_event(
859
- name=attribute_name,
860
- attributes={
861
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
862
- },
863
- )
864
-
865
- images_count+=1
866
-
867
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
868
- len(response.data) * cost)
869
- span.set_status(Status(StatusCode.OK))
870
-
871
- if disable_metrics is False:
872
- attributes = {
873
- TELEMETRY_SDK_NAME:
874
- "openlit",
875
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
876
- application_name,
877
- SemanticConvetion.GEN_AI_SYSTEM:
878
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
879
- SemanticConvetion.GEN_AI_ENVIRONMENT:
880
- environment,
881
- SemanticConvetion.GEN_AI_TYPE:
882
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
883
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
884
- "azure_" + kwargs.get("model", "dall-e-3")
885
- }
886
-
887
- metrics["genai_requests"].add(1, attributes)
888
- metrics["genai_cost"].record(cost, attributes)
889
-
890
- # Return original response
891
- return response
892
-
893
- except Exception as e:
894
- handle_exception(span, e)
895
- logger.error("Error in trace creation: %s", e)
896
-
897
- # Return original response
898
- return response
899
-
900
- return wrapper