openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,610 +1,184 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Ollama API calls.
4
3
  """
5
4
 
6
5
  import logging
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
6
+ import time
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
9
  handle_exception,
11
- general_tokens,
12
- get_chat_model_cost,
13
- get_embed_model_cost)
10
+ set_server_address_and_port
11
+ )
12
+ from openlit.instrumentation.ollama.utils import (
13
+ process_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
16
+ process_embedding_response
17
+ )
14
18
  from openlit.semcov import SemanticConvetion
15
19
 
16
- # Initialize logger for logging potential issues and operations
17
20
  logger = logging.getLogger(__name__)
18
21
 
19
- def async_chat(gen_ai_endpoint, version, environment, application_name,
20
- tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ def async_chat(version, environment, application_name,
23
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
21
24
  """
22
- Generates a telemetry wrapper for chat to collect metrics.
23
-
24
- Args:
25
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
26
- version: Version of the monitoring package.
27
- environment: Deployment environment (e.g., production, staging).
28
- application_name: Name of the application using the Ollama API.
29
- tracer: OpenTelemetry tracer for creating spans.
30
- pricing_info: Information used for calculating the cost of Ollama usage.
31
- trace_content: Flag indicating whether to trace the actual content.
32
-
33
- Returns:
34
- A function that wraps the chat method to add telemetry.
25
+ Generates a telemetry wrapper for GenAI function call
35
26
  """
36
27
 
37
- async def wrapper(wrapped, instance, args, kwargs):
28
+ class TracedAsyncStream:
38
29
  """
39
- Wraps the 'chat' API call to add telemetry.
40
-
41
- This collects metrics such as execution time, cost, and token usage, and handles errors
42
- gracefully, adding details to the trace for observability.
43
-
44
- Args:
45
- wrapped: The original 'chat' method to be wrapped.
46
- instance: The instance of the class where the original method is defined.
47
- args: Positional arguments for the 'chat' method.
48
- kwargs: Keyword arguments for the 'chat' method.
49
-
50
- Returns:
51
- The response from the original 'chat' method.
30
+ Wrapper for streaming responses to collect telemetry.
52
31
  """
53
32
 
54
- # Check if streaming is enabled for the API call
55
- streaming = kwargs.get("stream", False)
56
-
57
- # pylint: disable=no-else-return
58
- if streaming:
59
- # Special handling for streaming response to accommodate the nature of data flow
60
- async def stream_generator():
61
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
62
- # Placeholder for aggregating streaming response
63
- llmresponse = ""
64
-
65
- # Loop through streaming events capturing relevant details
66
- async for chunk in await wrapped(*args, **kwargs):
67
- # Collect aggregated response from events
68
- content = chunk['message']['content']
69
- llmresponse += content
70
-
71
- if chunk['done'] is True:
72
- completion_tokens = chunk["eval_count"]
73
-
74
- yield chunk
75
-
76
- # Handling exception ensure observability without disrupting operation
77
- try:
78
- # Format 'messages' into a single string
79
- message_prompt = kwargs.get("messages", "")
80
- formatted_messages = []
81
- for message in message_prompt:
82
- role = message["role"]
83
- content = message["content"]
84
-
85
- if isinstance(content, list):
86
- content_str = ", ".join(
87
- # pylint: disable=line-too-long
88
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
89
- if "type" in item else f'text: {item["text"]}'
90
- for item in content
91
- )
92
- formatted_messages.append(f"{role}: {content_str}")
93
- else:
94
- formatted_messages.append(f"{role}: {content}")
95
- prompt = "\n".join(formatted_messages)
96
-
97
- prompt_tokens = general_tokens(prompt)
98
- total_tokens = prompt_tokens + completion_tokens
99
- # Calculate cost of the operation
100
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
101
- pricing_info, prompt_tokens, completion_tokens)
102
-
103
- # Set Span attributes
104
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
105
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
106
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
107
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
108
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
109
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
110
- gen_ai_endpoint)
111
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
112
- environment)
113
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
114
- application_name)
115
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
116
- kwargs.get("model", "llama3"))
117
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
118
- True)
119
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
120
- prompt_tokens)
121
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
122
- completion_tokens)
123
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
124
- total_tokens)
125
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
126
- cost)
127
- if trace_content:
128
- span.add_event(
129
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
130
- attributes={
131
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
132
- },
133
- )
134
- span.add_event(
135
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
136
- attributes={
137
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
138
- },
139
- )
140
-
141
- span.set_status(Status(StatusCode.OK))
142
-
143
- if disable_metrics is False:
144
- attributes = {
145
- TELEMETRY_SDK_NAME:
146
- "openlit",
147
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
148
- application_name,
149
- SemanticConvetion.GEN_AI_SYSTEM:
150
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
151
- SemanticConvetion.GEN_AI_ENVIRONMENT:
152
- environment,
153
- SemanticConvetion.GEN_AI_OPERATION:
154
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
155
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
156
- kwargs.get("model", "llama3")
157
- }
158
-
159
- metrics["genai_requests"].add(1, attributes)
160
- metrics["genai_total_tokens"].add(total_tokens, attributes)
161
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
162
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
163
- metrics["genai_cost"].record(cost, attributes)
164
-
165
- except Exception as e:
166
- handle_exception(span, e)
167
- logger.error("Error in trace creation: %s", e)
168
-
169
- return stream_generator()
170
-
171
- # Handling for non-streaming responses
172
- else:
173
- # pylint: disable=line-too-long
174
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
175
- response = await wrapped(*args, **kwargs)
176
-
33
+ def __init__(
34
+ self,
35
+ wrapped,
36
+ span,
37
+ span_name,
38
+ kwargs,
39
+ server_address,
40
+ server_port,
41
+ **args,
42
+ ):
43
+ self.__wrapped__ = wrapped
44
+ self._span = span
45
+ self._llmresponse = ""
46
+ self._response_model = ""
47
+ self._finish_reason = ""
48
+ self._tool_calls = []
49
+ self._input_tokens = 0
50
+ self._output_tokens = 0
51
+ self._response_role = ''
52
+ self._span_name = span_name
53
+ self._args = args
54
+ self._kwargs = kwargs
55
+ self._start_time = time.time()
56
+ self._end_time = None
57
+ self._timestamps = []
58
+ self._ttft = 0
59
+ self._tbt = 0
60
+ self._server_address = server_address
61
+ self._server_port = server_port
62
+
63
+ async def __aenter__(self):
64
+ await self.__wrapped__.__aenter__()
65
+ return self
66
+
67
+ async def __aexit__(self, exc_type, exc_value, traceback):
68
+ await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
69
+
70
+ def __aiter__(self):
71
+ return self
72
+
73
+ async def __getattr__(self, name):
74
+ """Delegate attribute access to the wrapped object."""
75
+ return getattr(await self.__wrapped__, name)
76
+
77
+ async def __anext__(self):
78
+ try:
79
+ chunk = await self.__wrapped__.__anext__()
80
+ process_chunk(self, chunk)
81
+ return chunk
82
+ except StopAsyncIteration:
177
83
  try:
178
- # Format 'messages' into a single string
179
- message_prompt = kwargs.get("messages", "")
180
- formatted_messages = []
181
- for message in message_prompt:
182
- role = message["role"]
183
- content = message["content"]
184
-
185
- if isinstance(content, list):
186
- content_str = ", ".join(
187
- # pylint: disable=line-too-long
188
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
189
- if "type" in item else f'text: {item["text"]}'
190
- for item in content
191
- )
192
- formatted_messages.append(f"{role}: {content_str}")
193
- else:
194
- formatted_messages.append(f"{role}: {content}")
195
- prompt = "\n".join(formatted_messages)
196
-
197
- # Set base span attribues
198
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
199
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
200
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
201
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
202
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
203
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
204
- gen_ai_endpoint)
205
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
206
- environment)
207
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
208
- application_name)
209
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
210
- kwargs.get("model", "llama3"))
211
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
212
- False)
213
- if trace_content:
214
- span.add_event(
215
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
216
- attributes={
217
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
218
- },
219
- )
220
- span.add_event(
221
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
222
- attributes={
223
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['message']['content'],
224
- },
84
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
85
+ process_streaming_chat_response(
86
+ self,
87
+ pricing_info=pricing_info,
88
+ environment=environment,
89
+ application_name=application_name,
90
+ metrics=metrics,
91
+ event_provider=event_provider,
92
+ capture_message_content=capture_message_content,
93
+ disable_metrics=disable_metrics,
94
+ version=version
225
95
  )
226
-
227
- prompt_tokens = general_tokens(prompt)
228
- completion_tokens = response["eval_count"]
229
- total_tokens = prompt_tokens + completion_tokens
230
- # Calculate cost of the operation
231
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
232
- pricing_info, prompt_tokens, completion_tokens)
233
-
234
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
235
- prompt_tokens)
236
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
237
- completion_tokens)
238
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
239
- total_tokens)
240
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
241
- [response["done_reason"]])
242
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
243
- cost)
244
-
245
- span.set_status(Status(StatusCode.OK))
246
-
247
- if disable_metrics is False:
248
- attributes = {
249
- TELEMETRY_SDK_NAME:
250
- "openlit",
251
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
252
- application_name,
253
- SemanticConvetion.GEN_AI_SYSTEM:
254
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
255
- SemanticConvetion.GEN_AI_ENVIRONMENT:
256
- environment,
257
- SemanticConvetion.GEN_AI_OPERATION:
258
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
259
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
260
- kwargs.get("model", "llama3")
261
- }
262
-
263
- metrics["genai_requests"].add(1, attributes)
264
- metrics["genai_total_tokens"].add(total_tokens, attributes)
265
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
266
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
267
- metrics["genai_cost"].record(cost, attributes)
268
-
269
- # Return original response
270
- return response
271
-
272
96
  except Exception as e:
273
- handle_exception(span, e)
97
+ handle_exception(self._span, e)
274
98
  logger.error("Error in trace creation: %s", e)
275
-
276
- # Return original response
277
- return response
278
-
279
- return wrapper
280
-
281
- def async_generate(gen_ai_endpoint, version, environment, application_name,
282
- tracer, pricing_info, trace_content, metrics, disable_metrics):
283
- """
284
- Generates a telemetry wrapper for generate to collect metrics.
285
-
286
- Args:
287
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
288
- version: Version of the monitoring package.
289
- environment: Deployment environment (e.g., production, staging).
290
- application_name: Name of the application using the Ollama API.
291
- tracer: OpenTelemetry tracer for creating spans.
292
- pricing_info: Information used for calculating the cost of Ollama usage.
293
- trace_content: Flag indicating whether to trace the actual content.
294
-
295
- Returns:
296
- A function that wraps the generate method to add telemetry.
297
- """
99
+ raise
298
100
 
299
101
  async def wrapper(wrapped, instance, args, kwargs):
300
102
  """
301
- Wraps the 'generate' API call to add telemetry.
302
-
303
- This collects metrics such as execution time, cost, and token usage, and handles errors
304
- gracefully, adding details to the trace for observability.
305
-
306
- Args:
307
- wrapped: The original 'generate' method to be wrapped.
308
- instance: The instance of the class where the original method is defined.
309
- args: Positional arguments for the 'generate' method.
310
- kwargs: Keyword arguments for the 'generate' method.
311
-
312
- Returns:
313
- The response from the original 'generate' method.
103
+ Wraps the GenAI function call.
314
104
  """
315
105
 
316
- # Check if streaming is enabled for the API call
317
106
  streaming = kwargs.get("stream", False)
318
107
 
319
- # pylint: disable=no-else-return
320
- if streaming:
321
- # Special handling for streaming response to accommodate the nature of data flow
322
- async def stream_generator():
323
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
324
- # Placeholder for aggregating streaming response
325
- llmresponse = ""
326
-
327
- # Loop through streaming events capturing relevant details
328
- async for chunk in await wrapped(*args, **kwargs):
329
- # Collect aggregated response from events
330
- content = chunk['response']
331
- llmresponse += content
332
-
333
- if chunk['done'] is True:
334
- completion_tokens = chunk["eval_count"]
335
-
336
- yield chunk
337
-
338
- # Handling exception ensure observability without disrupting operation
339
- try:
340
- prompt_tokens = general_tokens(kwargs.get("prompt", ""))
341
- total_tokens = prompt_tokens + completion_tokens
342
- # Calculate cost of the operation
343
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
344
- pricing_info, prompt_tokens, completion_tokens)
345
-
346
- # Set Span attributes
347
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
348
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
349
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
350
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
351
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
352
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
353
- gen_ai_endpoint)
354
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
355
- environment)
356
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
357
- application_name)
358
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
359
- kwargs.get("model", "llama3"))
360
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
361
- True)
362
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
363
- prompt_tokens)
364
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
365
- completion_tokens)
366
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
367
- total_tokens)
368
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
369
- cost)
370
- if trace_content:
371
- span.add_event(
372
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
373
- attributes={
374
- # pylint: disable=line-too-long
375
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
376
- },
377
- )
378
- span.add_event(
379
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
380
- attributes={
381
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
382
- },
383
- )
108
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
109
+ request_model = kwargs.get("model", "gpt-4o")
384
110
 
385
- span.set_status(Status(StatusCode.OK))
111
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
386
112
 
387
- if disable_metrics is False:
388
- attributes = {
389
- TELEMETRY_SDK_NAME:
390
- "openlit",
391
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
392
- application_name,
393
- SemanticConvetion.GEN_AI_SYSTEM:
394
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
395
- SemanticConvetion.GEN_AI_ENVIRONMENT:
396
- environment,
397
- SemanticConvetion.GEN_AI_OPERATION:
398
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
399
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
400
- kwargs.get("model", "llama3")
401
- }
402
-
403
- metrics["genai_requests"].add(1, attributes)
404
- metrics["genai_total_tokens"].add(total_tokens, attributes)
405
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
406
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
407
- metrics["genai_cost"].record(cost, attributes)
408
-
409
- except Exception as e:
410
- handle_exception(span, e)
411
- logger.error("Error in trace creation: %s", e)
412
-
413
- return stream_generator()
113
+ # pylint: disable=no-else-return
114
+ if streaming:
115
+ awaited_wrapped = await wrapped(*args, **kwargs)
116
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
117
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
414
118
 
415
- # Handling for non-streaming responses
416
119
  else:
417
- # pylint: disable=line-too-long
418
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
120
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
121
+ start_time = time.time()
419
122
  response = await wrapped(*args, **kwargs)
420
-
421
- try:
422
- # Set base span attribues
423
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
424
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
425
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
426
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
427
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
428
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
429
- gen_ai_endpoint)
430
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
431
- environment)
432
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
433
- application_name)
434
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
435
- kwargs.get("model", "llama3"))
436
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
437
- False)
438
- if trace_content:
439
- span.add_event(
440
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
441
- attributes={
442
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
443
- },
444
- )
445
- span.add_event(
446
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
447
- attributes={
448
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['response'],
449
- },
450
- )
451
-
452
- prompt_tokens = response["prompt_eval_count"]
453
- completion_tokens = response["eval_count"]
454
- total_tokens = prompt_tokens + completion_tokens
455
- # Calculate cost of the operation
456
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
457
- pricing_info, prompt_tokens, completion_tokens)
458
-
459
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
460
- prompt_tokens)
461
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
462
- completion_tokens)
463
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
464
- total_tokens)
465
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
466
- [response["done_reason"]])
467
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
468
- cost)
469
-
470
- span.set_status(Status(StatusCode.OK))
471
-
472
- if disable_metrics is False:
473
- attributes = {
474
- TELEMETRY_SDK_NAME:
475
- "openlit",
476
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
477
- application_name,
478
- SemanticConvetion.GEN_AI_SYSTEM:
479
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
480
- SemanticConvetion.GEN_AI_ENVIRONMENT:
481
- environment,
482
- SemanticConvetion.GEN_AI_OPERATION:
483
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
484
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
485
- kwargs.get("model", "llama3")
486
- }
487
-
488
- metrics["genai_requests"].add(1, attributes)
489
- metrics["genai_total_tokens"].add(total_tokens, attributes)
490
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
491
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
492
- metrics["genai_cost"].record(cost, attributes)
493
-
494
- # Return original response
495
- return response
496
-
497
- except Exception as e:
498
- handle_exception(span, e)
499
- logger.error("Error in trace creation: %s", e)
500
-
501
- # Return original response
502
- return response
123
+ response = process_chat_response(
124
+ response=response,
125
+ request_model=request_model,
126
+ pricing_info=pricing_info,
127
+ server_port=server_port,
128
+ server_address=server_address,
129
+ environment=environment,
130
+ application_name=application_name,
131
+ metrics=metrics,
132
+ event_provider=event_provider,
133
+ start_time=start_time,
134
+ span=span,
135
+ capture_message_content=capture_message_content,
136
+ disable_metrics=disable_metrics,
137
+ version=version,
138
+ **kwargs
139
+ )
140
+
141
+ return response
503
142
 
504
143
  return wrapper
505
144
 
506
- def async_embeddings(gen_ai_endpoint, version, environment, application_name,
507
- tracer, pricing_info, trace_content, metrics, disable_metrics):
145
+ def async_embeddings(version, environment, application_name,
146
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
508
147
  """
509
- Generates a telemetry wrapper for embeddings to collect metrics.
510
-
511
- Args:
512
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
513
- version: Version of the monitoring package.
514
- environment: Deployment environment (e.g., production, staging).
515
- application_name: Name of the application using the Ollama API.
516
- tracer: OpenTelemetry tracer for creating spans.
517
- pricing_info: Information used for calculating the cost of Ollama usage.
518
- trace_content: Flag indicating whether to trace the actual content.
519
-
520
- Returns:
521
- A function that wraps the embeddings method to add telemetry.
148
+ Generates a telemetry wrapper for GenAI function call
522
149
  """
523
150
 
524
151
  async def wrapper(wrapped, instance, args, kwargs):
525
152
  """
526
- Wraps the 'embeddings' API call to add telemetry.
527
-
528
- This collects metrics such as execution time, cost, and token usage, and handles errors
529
- gracefully, adding details to the trace for observability.
530
-
531
- Args:
532
- wrapped: The original 'embeddings' method to be wrapped.
533
- instance: The instance of the class where the original method is defined.
534
- args: Positional arguments for the 'embeddings' method.
535
- kwargs: Keyword arguments for the 'embeddings' method.
536
-
537
- Returns:
538
- The response from the original 'embeddings' method.
153
+ Wraps the GenAI function call.
539
154
  """
540
155
 
541
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
542
- response = await wrapped(*args, **kwargs)
543
-
544
- try:
545
- prompt_tokens = general_tokens(kwargs.get('prompt', ""))
546
- # Calculate cost of the operation
547
- cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
548
- pricing_info, prompt_tokens)
549
- # Set Span attributes
550
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
551
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
552
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
553
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
554
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
555
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
556
- gen_ai_endpoint)
557
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
558
- environment)
559
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
560
- application_name)
561
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
562
- kwargs.get('model', "llama3"))
563
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
564
- prompt_tokens)
565
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
566
- prompt_tokens)
567
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
568
- cost)
569
- if trace_content:
570
- span.add_event(
571
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
572
- attributes={
573
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
574
- },
575
- )
576
-
577
- span.set_status(Status(StatusCode.OK))
578
-
579
- if disable_metrics is False:
580
- attributes = {
581
- TELEMETRY_SDK_NAME:
582
- "openlit",
583
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
584
- application_name,
585
- SemanticConvetion.GEN_AI_SYSTEM:
586
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
587
- SemanticConvetion.GEN_AI_ENVIRONMENT:
588
- environment,
589
- SemanticConvetion.GEN_AI_OPERATION:
590
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
591
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
592
- kwargs.get('model', "llama3")
593
- }
594
-
595
- metrics["genai_requests"].add(1, attributes)
596
- metrics["genai_total_tokens"].add(prompt_tokens, attributes)
597
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
598
- metrics["genai_cost"].record(cost, attributes)
156
+ server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
157
+ request_model = kwargs.get('model', 'all-minilm')
599
158
 
600
- # Return original response
601
- return response
159
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
602
160
 
603
- except Exception as e:
604
- handle_exception(span, e)
605
- logger.error("Error in trace creation: %s", e)
606
-
607
- # Return original response
608
- return response
161
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
162
+ start_time = time.time()
163
+ response = await wrapped(*args, **kwargs)
164
+ response = process_embedding_response(
165
+ response=response,
166
+ request_model=request_model,
167
+ pricing_info=pricing_info,
168
+ server_port=server_port,
169
+ server_address=server_address,
170
+ environment=environment,
171
+ application_name=application_name,
172
+ metrics=metrics,
173
+ event_provider=event_provider,
174
+ start_time=start_time,
175
+ span=span,
176
+ capture_message_content=capture_message_content,
177
+ disable_metrics=disable_metrics,
178
+ version=version,
179
+ **kwargs
180
+ )
181
+
182
+ return response
609
183
 
610
184
  return wrapper