openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.8.dist-info/RECORD +0 -122
  78. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,898 +0,0 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
- """
3
- Module for monitoring Azure OpenAI API calls.
4
- """
5
-
6
- import logging
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost
10
- from openlit.__helpers import get_image_model_cost, openai_tokens, handle_exception
11
- from openlit.semcov import SemanticConvetion
12
-
13
- # Initialize logger for logging potential issues and operations
14
- logger = logging.getLogger(__name__)
15
-
16
- def azure_chat_completions(gen_ai_endpoint, version, environment, application_name,
17
- tracer, pricing_info, trace_content, metrics, disable_metrics):
18
- """
19
- Generates a telemetry wrapper for chat completions to collect metrics.
20
-
21
- Args:
22
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
23
- version: Version of the monitoring package.
24
- environment: Deployment environment (e.g., production, staging).
25
- application_name: Name of the application using the OpenAI API.
26
- tracer: OpenTelemetry tracer for creating spans.
27
- pricing_info: Information used for calculating the cost of OpenAI usage.
28
- trace_content: Flag indicating whether to trace the actual content.
29
-
30
- Returns:
31
- A function that wraps the chat completions method to add telemetry.
32
- """
33
-
34
- def wrapper(wrapped, instance, args, kwargs):
35
- """
36
- Wraps the 'chat.completions' API call to add telemetry.
37
-
38
- This collects metrics such as execution time, cost, and token usage, and handles errors
39
- gracefully, adding details to the trace for observability.
40
-
41
- Args:
42
- wrapped: The original 'chat.completions' method to be wrapped.
43
- instance: The instance of the class where the original method is defined.
44
- args: Positional arguments for the 'chat.completions' method.
45
- kwargs: Keyword arguments for the 'chat.completions' method.
46
-
47
- Returns:
48
- The response from the original 'chat.completions' method.
49
- """
50
-
51
- # Check if streaming is enabled for the API call
52
- streaming = kwargs.get("stream", False)
53
-
54
- # pylint: disable=no-else-return
55
- if streaming:
56
- # Special handling for streaming response to accommodate the nature of data flow
57
- def stream_generator():
58
- # pylint: disable=line-too-long
59
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
60
- # Placeholder for aggregating streaming response
61
- llmresponse = ""
62
-
63
- # Loop through streaming events capturing relevant details
64
- for chunk in wrapped(*args, **kwargs):
65
- # Collect message IDs and aggregated response from events
66
- if len(chunk.choices) > 0:
67
- # pylint: disable=line-too-long
68
- if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
69
- content = chunk.choices[0].delta.content
70
- if content:
71
- llmresponse += content
72
- yield chunk
73
- response_id = chunk.id
74
- model = "azure_" + chunk.model
75
-
76
- # Handling exception ensure observability without disrupting operation
77
- try:
78
- # Format 'messages' into a single string
79
- message_prompt = kwargs.get("messages", "")
80
- formatted_messages = []
81
- for message in message_prompt:
82
- role = message["role"]
83
- content = message["content"]
84
-
85
- if isinstance(content, list):
86
- content_str = ", ".join(
87
- # pylint: disable=line-too-long
88
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
89
- if "type" in item else f'text: {item["text"]}'
90
- for item in content
91
- )
92
- formatted_messages.append(f"{role}: {content_str}")
93
- else:
94
- formatted_messages.append(f"{role}: {content}")
95
- prompt = "\n".join(formatted_messages)
96
-
97
- # Calculate tokens using input prompt and aggregated response
98
- prompt_tokens = openai_tokens(prompt,
99
- kwargs.get("model", "gpt-3.5-turbo"))
100
- completion_tokens = openai_tokens(llmresponse,
101
- kwargs.get("model", "gpt-3.5-turbo"))
102
-
103
- # Calculate cost of the operation
104
- cost = get_chat_model_cost(model, pricing_info,
105
- prompt_tokens, completion_tokens)
106
-
107
- # Set Span attributes
108
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
109
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
110
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
111
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
112
- SemanticConvetion.GEN_AI_TYPE_CHAT)
113
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
114
- gen_ai_endpoint)
115
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
116
- response_id)
117
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
118
- environment)
119
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
120
- application_name)
121
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
122
- model)
123
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
124
- kwargs.get("user", ""))
125
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
126
- kwargs.get("tool_choice", ""))
127
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
128
- kwargs.get("temperature", 1.0))
129
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
130
- kwargs.get("presence_penalty", 0.0))
131
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
132
- kwargs.get("frequency_penalty", 0.0))
133
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
134
- kwargs.get("seed", ""))
135
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
136
- True)
137
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
138
- prompt_tokens)
139
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
140
- completion_tokens)
141
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
142
- prompt_tokens + completion_tokens)
143
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
144
- cost)
145
- if trace_content:
146
- span.add_event(
147
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
148
- attributes={
149
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
150
- },
151
- )
152
- span.add_event(
153
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
154
- attributes={
155
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
156
- },
157
- )
158
-
159
- span.set_status(Status(StatusCode.OK))
160
-
161
- if disable_metrics is False:
162
- attributes = {
163
- TELEMETRY_SDK_NAME:
164
- "openlit",
165
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
166
- application_name,
167
- SemanticConvetion.GEN_AI_SYSTEM:
168
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
169
- SemanticConvetion.GEN_AI_ENVIRONMENT:
170
- environment,
171
- SemanticConvetion.GEN_AI_TYPE:
172
- SemanticConvetion.GEN_AI_TYPE_CHAT,
173
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
174
- model
175
- }
176
-
177
- metrics["genai_requests"].add(1, attributes)
178
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
179
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
180
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
181
- metrics["genai_cost"].record(cost, attributes)
182
-
183
- except Exception as e:
184
- handle_exception(span, e)
185
- logger.error("Error in trace creation: %s", e)
186
-
187
- return stream_generator()
188
-
189
- # Handling for non-streaming responses
190
- else:
191
- # pylint: disable=line-too-long
192
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
193
- response = wrapped(*args, **kwargs)
194
-
195
- try:
196
- # Find base model from response
197
- model = "azure_" + response.model
198
-
199
- # Format 'messages' into a single string
200
- message_prompt = kwargs.get("messages", "")
201
- formatted_messages = []
202
- for message in message_prompt:
203
- role = message["role"]
204
- content = message["content"]
205
-
206
- if isinstance(content, list):
207
- content_str = ", ".join(
208
- # pylint: disable=line-too-long
209
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
210
- if "type" in item else f'text: {item["text"]}'
211
- for item in content
212
- )
213
- formatted_messages.append(f"{role}: {content_str}")
214
- else:
215
- formatted_messages.append(f"{role}: {content}")
216
- prompt = "\n".join(formatted_messages)
217
-
218
- # Set base span attribues
219
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
220
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
221
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
222
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
223
- SemanticConvetion.GEN_AI_TYPE_CHAT)
224
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
225
- gen_ai_endpoint)
226
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
227
- response.id)
228
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
229
- environment)
230
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
231
- application_name)
232
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
233
- model)
234
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
235
- kwargs.get("user", ""))
236
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
237
- kwargs.get("tool_choice", ""))
238
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
239
- kwargs.get("temperature", 1.0))
240
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
241
- kwargs.get("presence_penalty", 0.0))
242
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
243
- kwargs.get("frequency_penalty", 0.0))
244
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
245
- kwargs.get("seed", ""))
246
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
247
- False)
248
- if trace_content:
249
- span.add_event(
250
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
251
- attributes={
252
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
253
- },
254
- )
255
- if trace_content:
256
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
257
- prompt)
258
-
259
- # Set span attributes when tools is not passed to the function call
260
- if "tools" not in kwargs:
261
- # Calculate cost of the operation
262
- cost = get_chat_model_cost(model, pricing_info,
263
- response.usage.prompt_tokens,
264
- response.usage.completion_tokens)
265
-
266
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
267
- response.usage.prompt_tokens)
268
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
269
- response.usage.completion_tokens)
270
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
271
- response.usage.total_tokens)
272
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
273
- [response.choices[0].finish_reason])
274
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
275
- cost)
276
-
277
- # Set span attributes for when n = 1 (default)
278
- if "n" not in kwargs or kwargs["n"] == 1:
279
- if trace_content:
280
- span.add_event(
281
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
282
- attributes={
283
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
284
- },
285
- )
286
-
287
- # Set span attributes for when n > 0
288
- else:
289
- i = 0
290
- while i < kwargs["n"] and trace_content is True:
291
- attribute_name = f"gen_ai.content.completion.{i}"
292
- span.add_event(
293
- name=attribute_name,
294
- attributes={
295
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[i].message.content,
296
- },
297
- )
298
- i += 1
299
-
300
- # Return original response
301
- return response
302
-
303
- # Set span attributes when tools is passed to the function call
304
- elif "tools" in kwargs:
305
- # Calculate cost of the operation
306
- cost = get_chat_model_cost(model, pricing_info,
307
- response.usage.prompt_tokens,
308
- response.usage.completion_tokens)
309
-
310
- span.add_event(
311
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
312
- attributes={
313
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
314
- },
315
- )
316
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
317
- response.usage.prompt_tokens)
318
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
319
- response.usage.completion_tokens)
320
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
321
- response.usage.total_tokens)
322
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
323
- cost)
324
-
325
- span.set_status(Status(StatusCode.OK))
326
-
327
- if disable_metrics is False:
328
- attributes = {
329
- TELEMETRY_SDK_NAME:
330
- "openlit",
331
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
332
- application_name,
333
- SemanticConvetion.GEN_AI_SYSTEM:
334
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
335
- SemanticConvetion.GEN_AI_ENVIRONMENT:
336
- environment,
337
- SemanticConvetion.GEN_AI_TYPE:
338
- SemanticConvetion.GEN_AI_TYPE_CHAT,
339
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
340
- model
341
- }
342
-
343
- metrics["genai_requests"].add(1, attributes)
344
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
345
- metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
346
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
347
- metrics["genai_cost"].record(cost, attributes)
348
-
349
- # Return original response
350
- return response
351
-
352
- except Exception as e:
353
- handle_exception(span, e)
354
- logger.error("Error in trace creation: %s", e)
355
-
356
- # Return original response
357
- return response
358
-
359
- return wrapper
360
-
361
- def azure_completions(gen_ai_endpoint, version, environment, application_name,
362
- tracer, pricing_info, trace_content, metrics, disable_metrics):
363
- """
364
- Generates a telemetry wrapper for completions to collect metrics.
365
-
366
- Args:
367
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
368
- version: Version of the monitoring package.
369
- environment: Deployment environment (e.g., production, staging).
370
- application_name: Name of the application using the OpenAI API.
371
- tracer: OpenTelemetry tracer for creating spans.
372
- pricing_info: Information used for calculating the cost of OpenAI usage.
373
- trace_content: Flag indicating whether to trace the actual content.
374
-
375
- Returns:
376
- A function that wraps the chat completions method to add telemetry.
377
- """
378
-
379
- def wrapper(wrapped, instance, args, kwargs):
380
- """
381
- Wraps the 'completions' API call to add telemetry.
382
-
383
- This collects metrics such as execution time, cost, and token usage, and handles errors
384
- gracefully, adding details to the trace for observability.
385
-
386
- Args:
387
- wrapped: The original 'completions' method to be wrapped.
388
- instance: The instance of the class where the original method is defined.
389
- args: Positional arguments for the 'completions' method.
390
- kwargs: Keyword arguments for the 'completions' method.
391
-
392
- Returns:
393
- The response from the original 'chat.completions' method.
394
- """
395
-
396
- # Check if streaming is enabled for the API call
397
- streaming = kwargs.get("stream", False)
398
-
399
- # pylint: disable=no-else-return
400
- if streaming:
401
- # Special handling for streaming response to accommodate the nature of data flow
402
- def stream_generator():
403
- # pylint: disable=line-too-long
404
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
405
- # Placeholder for aggregating streaming response
406
- llmresponse = ""
407
-
408
- # Loop through streaming events capturing relevant details
409
- for chunk in wrapped(*args, **kwargs):
410
- # Collect message IDs and aggregated response from events
411
- if len(chunk.choices) > 0:
412
- if hasattr(chunk.choices[0], "text"):
413
- content = chunk.choices[0].text
414
- if content:
415
- llmresponse += content
416
- yield chunk
417
- response_id = chunk.id
418
- model = "azure_" + chunk.model
419
-
420
- # Handling exception ensure observability without disrupting operation
421
- try:
422
- prompt = kwargs.get("prompt", "")
423
-
424
- # Calculate tokens using input prompt and aggregated response
425
- prompt_tokens = openai_tokens(prompt, "gpt-3.5-turbo")
426
- completion_tokens = openai_tokens(llmresponse, "gpt-3.5-turbo")
427
-
428
- # Calculate cost of the operation
429
- cost = get_chat_model_cost(model, pricing_info,
430
- prompt_tokens, completion_tokens)
431
-
432
- # Set Span attributes
433
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
434
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
435
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
436
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
437
- SemanticConvetion.GEN_AI_TYPE_CHAT)
438
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
439
- gen_ai_endpoint)
440
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
441
- response_id)
442
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
443
- environment)
444
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
445
- application_name)
446
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
447
- model)
448
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
449
- kwargs.get("user", ""))
450
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
451
- kwargs.get("tool_choice", ""))
452
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
453
- kwargs.get("temperature", 1.0))
454
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
455
- kwargs.get("presence_penalty", 0.0))
456
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
457
- kwargs.get("frequency_penalty", 0.0))
458
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
459
- kwargs.get("seed", ""))
460
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
461
- True)
462
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
463
- prompt_tokens)
464
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
465
- completion_tokens)
466
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
467
- prompt_tokens + completion_tokens)
468
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
469
- cost)
470
- if trace_content:
471
- span.add_event(
472
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
473
- attributes={
474
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
475
- },
476
- )
477
- span.add_event(
478
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
479
- attributes={
480
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
481
- },
482
- )
483
-
484
- span.set_status(Status(StatusCode.OK))
485
-
486
- if disable_metrics is False:
487
- attributes = {
488
- TELEMETRY_SDK_NAME:
489
- "openlit",
490
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
491
- application_name,
492
- SemanticConvetion.GEN_AI_SYSTEM:
493
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
494
- SemanticConvetion.GEN_AI_ENVIRONMENT:
495
- environment,
496
- SemanticConvetion.GEN_AI_TYPE:
497
- SemanticConvetion.GEN_AI_TYPE_CHAT,
498
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
499
- model
500
- }
501
-
502
- metrics["genai_requests"].add(1, attributes)
503
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
504
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
505
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
506
- metrics["genai_cost"].record(cost, attributes)
507
-
508
- except Exception as e:
509
- handle_exception(span, e)
510
- logger.error("Error in trace creation: %s", e)
511
-
512
- return stream_generator()
513
-
514
- # Handling for non-streaming responses
515
- else:
516
- # pylint: disable=line-too-long
517
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
518
- response = wrapped(*args, **kwargs)
519
-
520
- try:
521
- # Find base model from response
522
- model = "azure_" + response.model
523
-
524
- # Set base span attribues
525
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
526
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
527
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
528
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
529
- SemanticConvetion.GEN_AI_TYPE_CHAT)
530
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
531
- gen_ai_endpoint)
532
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
533
- response.id)
534
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
535
- environment)
536
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
537
- application_name)
538
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
539
- model)
540
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
541
- kwargs.get("user", ""))
542
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
543
- kwargs.get("tool_choice", ""))
544
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
545
- kwargs.get("temperature", 1.0))
546
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
547
- kwargs.get("presence_penalty", 0.0))
548
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
549
- kwargs.get("frequency_penalty", 0.0))
550
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
551
- kwargs.get("seed", ""))
552
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
553
- False)
554
- if trace_content:
555
- span.add_event(
556
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
557
- attributes={
558
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
559
- },
560
- )
561
-
562
- # Set span attributes when tools is not passed to the function call
563
- if "tools" not in kwargs:
564
- # Calculate cost of the operation
565
- cost = get_chat_model_cost(model, pricing_info,
566
- response.usage.prompt_tokens,
567
- response.usage.completion_tokens)
568
-
569
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
570
- response.usage.prompt_tokens)
571
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
572
- response.usage.completion_tokens)
573
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
574
- response.usage.total_tokens)
575
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
576
- [response.choices[0].finish_reason])
577
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
578
- cost)
579
-
580
- # Set span attributes for when n = 1 (default)
581
- if "n" not in kwargs or kwargs["n"] == 1:
582
- if trace_content:
583
- span.add_event(
584
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
585
- attributes={
586
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].text,
587
- },
588
- )
589
-
590
- # Set span attributes for when n > 0
591
- else:
592
- i = 0
593
- while i < kwargs["n"] and trace_content is True:
594
- attribute_name = f"gen_ai.content.completion.{i}"
595
- span.add_event(
596
- name=attribute_name,
597
- attributes={
598
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[i].text,
599
- },
600
- )
601
- i += 1
602
- return response
603
-
604
- # Set span attributes when tools is passed to the function call
605
- elif "tools" in kwargs:
606
- # Calculate cost of the operation
607
- cost = get_chat_model_cost(model, pricing_info,
608
- response.usage.prompt_tokens,
609
- response.usage.completion_tokens)
610
-
611
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
612
- "Function called with tools")
613
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
614
- response.usage.prompt_tokens)
615
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
616
- response.usage.completion_tokens)
617
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
618
- response.usage.total_tokens)
619
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
620
- cost)
621
-
622
- span.set_status(Status(StatusCode.OK))
623
-
624
- if disable_metrics is False:
625
- attributes = {
626
- TELEMETRY_SDK_NAME:
627
- "openlit",
628
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
629
- application_name,
630
- SemanticConvetion.GEN_AI_SYSTEM:
631
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
632
- SemanticConvetion.GEN_AI_ENVIRONMENT:
633
- environment,
634
- SemanticConvetion.GEN_AI_TYPE:
635
- SemanticConvetion.GEN_AI_TYPE_CHAT,
636
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
637
- model
638
- }
639
-
640
- metrics["genai_requests"].add(1, attributes)
641
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
642
- metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
643
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
644
- metrics["genai_cost"].record(cost, attributes)
645
-
646
- # Return original response
647
- return response
648
-
649
- except Exception as e:
650
- handle_exception(span, e)
651
- logger.error("Error in trace creation: %s", e)
652
-
653
- # Return original response
654
- return response
655
-
656
- return wrapper
657
-
658
- def azure_embedding(gen_ai_endpoint, version, environment, application_name,
659
- tracer, pricing_info, trace_content, metrics, disable_metrics):
660
- """
661
- Generates a telemetry wrapper for embeddings to collect metrics.
662
-
663
- Args:
664
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
665
- version: Version of the monitoring package.
666
- environment: Deployment environment (e.g., production, staging).
667
- application_name: Name of the application using the OpenAI API.
668
- tracer: OpenTelemetry tracer for creating spans.
669
- pricing_info: Information used for calculating the cost of OpenAI usage.
670
- trace_content: Flag indicating whether to trace the actual content.
671
-
672
- Returns:
673
- A function that wraps the embeddings method to add telemetry.
674
- """
675
-
676
- def wrapper(wrapped, instance, args, kwargs):
677
- """
678
- Wraps the 'embeddings' API call to add telemetry.
679
-
680
- This collects metrics such as execution time, cost, and token usage, and handles errors
681
- gracefully, adding details to the trace for observability.
682
-
683
- Args:
684
- wrapped: The original 'embeddings' method to be wrapped.
685
- instance: The instance of the class where the original method is defined.
686
- args: Positional arguments for the 'embeddings' method.
687
- kwargs: Keyword arguments for the 'embeddings' method.
688
-
689
- Returns:
690
- The response from the original 'embeddings' method.
691
- """
692
-
693
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
694
- response = wrapped(*args, **kwargs)
695
-
696
- try:
697
- # Calculate cost of the operation
698
- cost = get_embed_model_cost("azure_" + response.model,
699
- pricing_info, response.usage.prompt_tokens)
700
-
701
- # Set Span attributes
702
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
703
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
704
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
705
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
706
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
707
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
708
- gen_ai_endpoint)
709
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
710
- environment)
711
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
712
- application_name)
713
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
714
- "azure_" + response.model)
715
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
716
- kwargs.get("encoding_format", "float"))
717
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
718
- kwargs.get("dimensions", ""))
719
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
720
- kwargs.get("user", ""))
721
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
722
- response.usage.prompt_tokens)
723
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
724
- response.usage.total_tokens)
725
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
726
- cost)
727
- if trace_content:
728
- span.add_event(
729
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
730
- attributes={
731
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
732
- },
733
- )
734
-
735
- span.set_status(Status(StatusCode.OK))
736
-
737
- if disable_metrics is False:
738
- attributes = {
739
- TELEMETRY_SDK_NAME:
740
- "openlit",
741
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
742
- application_name,
743
- SemanticConvetion.GEN_AI_SYSTEM:
744
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
745
- SemanticConvetion.GEN_AI_ENVIRONMENT:
746
- environment,
747
- SemanticConvetion.GEN_AI_TYPE:
748
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
749
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
750
- "azure_" + response.model
751
- }
752
-
753
- metrics["genai_requests"].add(1, attributes)
754
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
755
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
756
- metrics["genai_cost"].record(cost, attributes)
757
-
758
- # Return original response
759
- return response
760
-
761
- except Exception as e:
762
- handle_exception(span, e)
763
- logger.error("Error in trace creation: %s", e)
764
-
765
- # Return original response
766
- return response
767
-
768
- return wrapper
769
-
770
- def azure_image_generate(gen_ai_endpoint, version, environment, application_name,
771
- tracer, pricing_info, trace_content, metrics, disable_metrics):
772
- """
773
- Generates a telemetry wrapper for image generation to collect metrics.
774
-
775
- Args:
776
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
777
- version: Version of the monitoring package.
778
- environment: Deployment environment (e.g., production, staging).
779
- application_name: Name of the application using the OpenAI API.
780
- tracer: OpenTelemetry tracer for creating spans.
781
- pricing_info: Information used for calculating the cost of OpenAI image generation.
782
- trace_content: Flag indicating whether to trace the input prompt and generated images.
783
-
784
- Returns:
785
- A function that wraps the image generation method to add telemetry.
786
- """
787
-
788
- def wrapper(wrapped, instance, args, kwargs):
789
- """
790
- Wraps the 'images.generate' API call to add telemetry.
791
-
792
- This collects metrics such as execution time, cost, and handles errors
793
- gracefully, adding details to the trace for observability.
794
-
795
- Args:
796
- wrapped: The original 'images.generate' method to be wrapped.
797
- instance: The instance of the class where the original method is defined.
798
- args: Positional arguments for the 'images.generate' method.
799
- kwargs: Keyword arguments for the 'images.generate' method.
800
-
801
- Returns:
802
- The response from the original 'images.generate' method.
803
- """
804
-
805
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
806
- response = wrapped(*args, **kwargs)
807
- images_count = 0
808
-
809
- try:
810
- # Find Image format
811
- if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
812
- image = "b64_json"
813
- else:
814
- image = "url"
815
-
816
- # Calculate cost of the operation
817
- cost = get_image_model_cost("azure_" + kwargs.get("model", "dall-e-3"),
818
- pricing_info, kwargs.get("size", "1024x1024"),
819
- kwargs.get("quality", "standard"))
820
-
821
- for items in response.data:
822
- # Set Span attributes
823
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
824
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
825
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
826
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
827
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
828
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
829
- gen_ai_endpoint)
830
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
831
- response.created)
832
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
833
- environment)
834
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
835
- application_name)
836
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
837
- "azure_" + kwargs.get("model", "dall-e-3"))
838
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
839
- kwargs.get("size", "1024x1024"))
840
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
841
- kwargs.get("quality", "standard"))
842
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_STYLE,
843
- kwargs.get("style", "vivid"))
844
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_REVISED_PROMPT,
845
- items.revised_prompt if response.revised_prompt else "")
846
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
847
- kwargs.get("user", ""))
848
- if trace_content:
849
- span.add_event(
850
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
851
- attributes={
852
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
853
- },
854
- )
855
- attribute_name = f"gen_ai.response.image.{images_count}"
856
- span.add_event(
857
- name=attribute_name,
858
- attributes={
859
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
860
- },
861
- )
862
-
863
- images_count+=1
864
-
865
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
866
- len(response.data) * cost)
867
- span.set_status(Status(StatusCode.OK))
868
-
869
- if disable_metrics is False:
870
- attributes = {
871
- TELEMETRY_SDK_NAME:
872
- "openlit",
873
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
874
- application_name,
875
- SemanticConvetion.GEN_AI_SYSTEM:
876
- SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
877
- SemanticConvetion.GEN_AI_ENVIRONMENT:
878
- environment,
879
- SemanticConvetion.GEN_AI_TYPE:
880
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
881
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
882
- "azure_" + kwargs.get("model", "dall-e-3")
883
- }
884
-
885
- metrics["genai_requests"].add(1, attributes)
886
- metrics["genai_cost"].record(cost, attributes)
887
-
888
- # Return original response
889
- return response
890
-
891
- except Exception as e:
892
- handle_exception(span, e)
893
- logger.error("Error in trace creation: %s", e)
894
-
895
- # Return original response
896
- return response
897
-
898
- return wrapper