openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,611 +1,184 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Ollama API calls.
4
3
  """
5
4
 
6
5
  import logging
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
6
+ import time
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
9
  handle_exception,
11
- general_tokens,
12
- get_chat_model_cost,
13
- get_embed_model_cost
10
+ set_server_address_and_port
11
+ )
12
+ from openlit.instrumentation.ollama.utils import (
13
+ process_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
16
+ process_embedding_response
14
17
  )
15
18
  from openlit.semcov import SemanticConvetion
16
19
 
17
- # Initialize logger for logging potential issues and operations
18
20
  logger = logging.getLogger(__name__)
19
21
 
20
- def chat(gen_ai_endpoint, version, environment, application_name,
21
- tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ def chat(version, environment, application_name,
23
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
22
24
  """
23
- Generates a telemetry wrapper for chat to collect metrics.
24
-
25
- Args:
26
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
- version: Version of the monitoring package.
28
- environment: Deployment environment (e.g., production, staging).
29
- application_name: Name of the application using the Ollama API.
30
- tracer: OpenTelemetry tracer for creating spans.
31
- pricing_info: Information used for calculating the cost of Ollama usage.
32
- trace_content: Flag indicating whether to trace the actual content.
33
-
34
- Returns:
35
- A function that wraps the chat method to add telemetry.
25
+ Generates a telemetry wrapper for GenAI function call
36
26
  """
37
27
 
38
- def wrapper(wrapped, instance, args, kwargs):
28
+ class TracedSyncStream:
39
29
  """
40
- Wraps the 'chat' API call to add telemetry.
41
-
42
- This collects metrics such as execution time, cost, and token usage, and handles errors
43
- gracefully, adding details to the trace for observability.
44
-
45
- Args:
46
- wrapped: The original 'chat' method to be wrapped.
47
- instance: The instance of the class where the original method is defined.
48
- args: Positional arguments for the 'chat' method.
49
- kwargs: Keyword arguments for the 'chat' method.
50
-
51
- Returns:
52
- The response from the original 'chat' method.
30
+ Wrapper for streaming responses to collect telemetry.
53
31
  """
54
32
 
55
- # Check if streaming is enabled for the API call
56
- streaming = kwargs.get("stream", False)
57
-
58
- # pylint: disable=no-else-return
59
- if streaming:
60
- # Special handling for streaming response to accommodate the nature of data flow
61
- def stream_generator():
62
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
63
- # Placeholder for aggregating streaming response
64
- llmresponse = ""
65
-
66
- # Loop through streaming events capturing relevant details
67
- for chunk in wrapped(*args, **kwargs):
68
- # Collect aggregated response from events
69
- content = chunk['message']['content']
70
- llmresponse += content
71
-
72
- if chunk['done'] is True:
73
- completion_tokens = chunk["eval_count"]
74
-
75
- yield chunk
76
-
77
- # Handling exception ensure observability without disrupting operation
78
- try:
79
- # Format 'messages' into a single string
80
- message_prompt = kwargs.get("messages", "")
81
- formatted_messages = []
82
- for message in message_prompt:
83
- role = message["role"]
84
- content = message["content"]
85
-
86
- if isinstance(content, list):
87
- content_str = ", ".join(
88
- # pylint: disable=line-too-long
89
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
90
- if "type" in item else f'text: {item["text"]}'
91
- for item in content
92
- )
93
- formatted_messages.append(f"{role}: {content_str}")
94
- else:
95
- formatted_messages.append(f"{role}: {content}")
96
- prompt = "\n".join(formatted_messages)
97
-
98
- prompt_tokens = general_tokens(prompt)
99
- total_tokens = prompt_tokens + completion_tokens
100
- # Calculate cost of the operation
101
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
102
- pricing_info, prompt_tokens, completion_tokens)
103
-
104
- # Set Span attributes
105
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
106
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
107
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
108
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
109
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
110
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
111
- gen_ai_endpoint)
112
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
113
- environment)
114
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
115
- application_name)
116
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
117
- kwargs.get("model", "llama3"))
118
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
119
- True)
120
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
- prompt_tokens)
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
- completion_tokens)
124
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
125
- total_tokens)
126
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
127
- cost)
128
- if trace_content:
129
- span.add_event(
130
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
131
- attributes={
132
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
133
- },
134
- )
135
- span.add_event(
136
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
137
- attributes={
138
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
139
- },
140
- )
141
-
142
- span.set_status(Status(StatusCode.OK))
143
-
144
- if disable_metrics is False:
145
- attributes = {
146
- TELEMETRY_SDK_NAME:
147
- "openlit",
148
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
149
- application_name,
150
- SemanticConvetion.GEN_AI_SYSTEM:
151
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
152
- SemanticConvetion.GEN_AI_ENVIRONMENT:
153
- environment,
154
- SemanticConvetion.GEN_AI_OPERATION:
155
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
156
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
157
- kwargs.get("model", "llama3")
158
- }
159
-
160
- metrics["genai_requests"].add(1, attributes)
161
- metrics["genai_total_tokens"].add(total_tokens, attributes)
162
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
163
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
164
- metrics["genai_cost"].record(cost, attributes)
165
-
166
- except Exception as e:
167
- handle_exception(span, e)
168
- logger.error("Error in trace creation: %s", e)
169
-
170
- return stream_generator()
171
-
172
- # Handling for non-streaming responses
173
- else:
174
- # pylint: disable=line-too-long
175
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
176
- response = wrapped(*args, **kwargs)
177
-
33
+ def __init__(
34
+ self,
35
+ wrapped,
36
+ span,
37
+ span_name,
38
+ kwargs,
39
+ server_address,
40
+ server_port,
41
+ **args,
42
+ ):
43
+ self.__wrapped__ = wrapped
44
+ self._span = span
45
+ self._llmresponse = ""
46
+ self._response_model = ""
47
+ self._finish_reason = ""
48
+ self._tool_calls = []
49
+ self._input_tokens = 0
50
+ self._output_tokens = 0
51
+ self._response_role = ''
52
+ self._span_name = span_name
53
+ self._args = args
54
+ self._kwargs = kwargs
55
+ self._start_time = time.time()
56
+ self._end_time = None
57
+ self._timestamps = []
58
+ self._ttft = 0
59
+ self._tbt = 0
60
+ self._server_address = server_address
61
+ self._server_port = server_port
62
+
63
+ def __enter__(self):
64
+ self.__wrapped__.__enter__()
65
+ return self
66
+
67
+ def __exit__(self, exc_type, exc_value, traceback):
68
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
69
+
70
+ def __iter__(self):
71
+ return self
72
+
73
+ def __getattr__(self, name):
74
+ """Delegate attribute access to the wrapped object."""
75
+ return getattr(self.__wrapped__, name)
76
+
77
+ def __next__(self):
78
+ try:
79
+ chunk = self.__wrapped__.__next__()
80
+ process_chunk(self, chunk)
81
+ return chunk
82
+ except StopIteration:
178
83
  try:
179
- # Format 'messages' into a single string
180
- message_prompt = kwargs.get("messages", "")
181
- formatted_messages = []
182
- for message in message_prompt:
183
- role = message["role"]
184
- content = message["content"]
185
-
186
- if isinstance(content, list):
187
- content_str = ", ".join(
188
- # pylint: disable=line-too-long
189
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
190
- if "type" in item else f'text: {item["text"]}'
191
- for item in content
192
- )
193
- formatted_messages.append(f"{role}: {content_str}")
194
- else:
195
- formatted_messages.append(f"{role}: {content}")
196
- prompt = "\n".join(formatted_messages)
197
-
198
- # Set base span attribues
199
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
200
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
201
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
202
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
203
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
204
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
205
- gen_ai_endpoint)
206
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
207
- environment)
208
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
209
- application_name)
210
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
211
- kwargs.get("model", "llama3"))
212
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
213
- False)
214
- if trace_content:
215
- span.add_event(
216
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
217
- attributes={
218
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
219
- },
220
- )
221
- span.add_event(
222
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
223
- attributes={
224
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['message']['content'],
225
- },
84
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
85
+ process_streaming_chat_response(
86
+ self,
87
+ pricing_info=pricing_info,
88
+ environment=environment,
89
+ application_name=application_name,
90
+ metrics=metrics,
91
+ event_provider=event_provider,
92
+ capture_message_content=capture_message_content,
93
+ disable_metrics=disable_metrics,
94
+ version=version
226
95
  )
227
-
228
- prompt_tokens = general_tokens(prompt)
229
- completion_tokens = response["eval_count"]
230
- total_tokens = prompt_tokens + completion_tokens
231
- # Calculate cost of the operation
232
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
233
- pricing_info, prompt_tokens, completion_tokens)
234
-
235
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
236
- prompt_tokens)
237
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
238
- completion_tokens)
239
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
240
- total_tokens)
241
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
242
- [response["done_reason"]])
243
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
244
- cost)
245
-
246
- span.set_status(Status(StatusCode.OK))
247
-
248
- if disable_metrics is False:
249
- attributes = {
250
- TELEMETRY_SDK_NAME:
251
- "openlit",
252
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
253
- application_name,
254
- SemanticConvetion.GEN_AI_SYSTEM:
255
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
256
- SemanticConvetion.GEN_AI_ENVIRONMENT:
257
- environment,
258
- SemanticConvetion.GEN_AI_OPERATION:
259
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
260
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
261
- kwargs.get("model", "llama3")
262
- }
263
-
264
- metrics["genai_requests"].add(1, attributes)
265
- metrics["genai_total_tokens"].add(total_tokens, attributes)
266
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
267
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
268
- metrics["genai_cost"].record(cost, attributes)
269
-
270
- # Return original response
271
- return response
272
-
273
96
  except Exception as e:
274
- handle_exception(span, e)
97
+ handle_exception(self._span, e)
275
98
  logger.error("Error in trace creation: %s", e)
276
-
277
- # Return original response
278
- return response
279
-
280
- return wrapper
281
-
282
- def generate(gen_ai_endpoint, version, environment, application_name,
283
- tracer, pricing_info, trace_content, metrics, disable_metrics):
284
- """
285
- Generates a telemetry wrapper for generate to collect metrics.
286
-
287
- Args:
288
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
289
- version: Version of the monitoring package.
290
- environment: Deployment environment (e.g., production, staging).
291
- application_name: Name of the application using the Ollama API.
292
- tracer: OpenTelemetry tracer for creating spans.
293
- pricing_info: Information used for calculating the cost of Ollama usage.
294
- trace_content: Flag indicating whether to trace the actual content.
295
-
296
- Returns:
297
- A function that wraps the generate method to add telemetry.
298
- """
99
+ raise
299
100
 
300
101
  def wrapper(wrapped, instance, args, kwargs):
301
102
  """
302
- Wraps the 'generate' API call to add telemetry.
303
-
304
- This collects metrics such as execution time, cost, and token usage, and handles errors
305
- gracefully, adding details to the trace for observability.
306
-
307
- Args:
308
- wrapped: The original 'generate' method to be wrapped.
309
- instance: The instance of the class where the original method is defined.
310
- args: Positional arguments for the 'generate' method.
311
- kwargs: Keyword arguments for the 'generate' method.
312
-
313
- Returns:
314
- The response from the original 'generate' method.
103
+ Wraps the GenAI function call.
315
104
  """
316
105
 
317
- # Check if streaming is enabled for the API call
318
106
  streaming = kwargs.get("stream", False)
319
107
 
320
- # pylint: disable=no-else-return
321
- if streaming:
322
- # Special handling for streaming response to accommodate the nature of data flow
323
- def stream_generator():
324
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
325
- # Placeholder for aggregating streaming response
326
- llmresponse = ""
327
-
328
- # Loop through streaming events capturing relevant details
329
- for chunk in wrapped(*args, **kwargs):
330
- # Collect aggregated response from events
331
- content = chunk['response']
332
- llmresponse += content
333
-
334
- if chunk['done'] is True:
335
- completion_tokens = chunk["eval_count"]
336
-
337
- yield chunk
338
-
339
- # Handling exception ensure observability without disrupting operation
340
- try:
341
- prompt_tokens = general_tokens(kwargs.get("prompt", ""))
342
- total_tokens = prompt_tokens + completion_tokens
343
- # Calculate cost of the operation
344
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
345
- pricing_info, prompt_tokens, completion_tokens)
108
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
109
+ request_model = kwargs.get("model", "gpt-4o")
346
110
 
347
- # Set Span attributes
348
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
349
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
350
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
351
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
352
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
353
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
354
- gen_ai_endpoint)
355
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
356
- environment)
357
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
358
- application_name)
359
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
360
- kwargs.get("model", "llama3"))
361
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
362
- True)
363
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
364
- prompt_tokens)
365
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
366
- completion_tokens)
367
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
368
- total_tokens)
369
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
370
- cost)
371
- if trace_content:
372
- span.add_event(
373
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
374
- attributes={
375
- # pylint: disable=line-too-long
376
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
377
- },
378
- )
379
- span.add_event(
380
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
381
- attributes={
382
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
383
- },
384
- )
111
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
385
112
 
386
- span.set_status(Status(StatusCode.OK))
387
-
388
- if disable_metrics is False:
389
- attributes = {
390
- TELEMETRY_SDK_NAME:
391
- "openlit",
392
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
393
- application_name,
394
- SemanticConvetion.GEN_AI_SYSTEM:
395
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
396
- SemanticConvetion.GEN_AI_ENVIRONMENT:
397
- environment,
398
- SemanticConvetion.GEN_AI_OPERATION:
399
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
400
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
401
- kwargs.get("model", "llama3")
402
- }
403
-
404
- metrics["genai_requests"].add(1, attributes)
405
- metrics["genai_total_tokens"].add(total_tokens, attributes)
406
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
407
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
408
- metrics["genai_cost"].record(cost, attributes)
409
-
410
- except Exception as e:
411
- handle_exception(span, e)
412
- logger.error("Error in trace creation: %s", e)
413
-
414
- return stream_generator()
113
+ # pylint: disable=no-else-return
114
+ if streaming:
115
+ awaited_wrapped = wrapped(*args, **kwargs)
116
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
117
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
415
118
 
416
- # Handling for non-streaming responses
417
119
  else:
418
- # pylint: disable=line-too-long
419
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
120
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
121
+ start_time = time.time()
420
122
  response = wrapped(*args, **kwargs)
421
-
422
- try:
423
- # Set base span attribues
424
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
425
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
426
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
427
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
428
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
429
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
430
- gen_ai_endpoint)
431
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
432
- environment)
433
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
434
- application_name)
435
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
436
- kwargs.get("model", "llama3"))
437
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
438
- False)
439
- if trace_content:
440
- span.add_event(
441
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
442
- attributes={
443
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
444
- },
445
- )
446
- span.add_event(
447
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
448
- attributes={
449
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['response'],
450
- },
451
- )
452
-
453
- prompt_tokens = response["prompt_eval_count"]
454
- completion_tokens = response["eval_count"]
455
- total_tokens = prompt_tokens + completion_tokens
456
- # Calculate cost of the operation
457
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
458
- pricing_info, prompt_tokens, completion_tokens)
459
-
460
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
461
- prompt_tokens)
462
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
463
- completion_tokens)
464
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
465
- total_tokens)
466
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
467
- [response["done_reason"]])
468
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
469
- cost)
470
-
471
- span.set_status(Status(StatusCode.OK))
472
-
473
- if disable_metrics is False:
474
- attributes = {
475
- TELEMETRY_SDK_NAME:
476
- "openlit",
477
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
478
- application_name,
479
- SemanticConvetion.GEN_AI_SYSTEM:
480
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
481
- SemanticConvetion.GEN_AI_ENVIRONMENT:
482
- environment,
483
- SemanticConvetion.GEN_AI_OPERATION:
484
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
485
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
486
- kwargs.get("model", "llama3")
487
- }
488
-
489
- metrics["genai_requests"].add(1, attributes)
490
- metrics["genai_total_tokens"].add(total_tokens, attributes)
491
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
492
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
493
- metrics["genai_cost"].record(cost, attributes)
494
-
495
- # Return original response
496
- return response
497
-
498
- except Exception as e:
499
- handle_exception(span, e)
500
- logger.error("Error in trace creation: %s", e)
501
-
502
- # Return original response
503
- return response
123
+ response = process_chat_response(
124
+ response=response,
125
+ request_model=request_model,
126
+ pricing_info=pricing_info,
127
+ server_port=server_port,
128
+ server_address=server_address,
129
+ environment=environment,
130
+ application_name=application_name,
131
+ metrics=metrics,
132
+ event_provider=event_provider,
133
+ start_time=start_time,
134
+ span=span,
135
+ capture_message_content=capture_message_content,
136
+ disable_metrics=disable_metrics,
137
+ version=version,
138
+ **kwargs
139
+ )
140
+
141
+ return response
504
142
 
505
143
  return wrapper
506
144
 
507
- def embeddings(gen_ai_endpoint, version, environment, application_name,
508
- tracer, pricing_info, trace_content, metrics, disable_metrics):
145
+ def embeddings(version, environment, application_name,
146
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
509
147
  """
510
- Generates a telemetry wrapper for embeddings to collect metrics.
511
-
512
- Args:
513
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
514
- version: Version of the monitoring package.
515
- environment: Deployment environment (e.g., production, staging).
516
- application_name: Name of the application using the Ollama API.
517
- tracer: OpenTelemetry tracer for creating spans.
518
- pricing_info: Information used for calculating the cost of Ollama usage.
519
- trace_content: Flag indicating whether to trace the actual content.
520
-
521
- Returns:
522
- A function that wraps the embeddings method to add telemetry.
148
+ Generates a telemetry wrapper for GenAI function call
523
149
  """
524
150
 
525
151
  def wrapper(wrapped, instance, args, kwargs):
526
152
  """
527
- Wraps the 'embeddings' API call to add telemetry.
528
-
529
- This collects metrics such as execution time, cost, and token usage, and handles errors
530
- gracefully, adding details to the trace for observability.
531
-
532
- Args:
533
- wrapped: The original 'embeddings' method to be wrapped.
534
- instance: The instance of the class where the original method is defined.
535
- args: Positional arguments for the 'embeddings' method.
536
- kwargs: Keyword arguments for the 'embeddings' method.
537
-
538
- Returns:
539
- The response from the original 'embeddings' method.
153
+ Wraps the GenAI function call.
540
154
  """
541
155
 
542
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
543
- response = wrapped(*args, **kwargs)
544
-
545
- try:
546
- prompt_tokens = general_tokens(kwargs.get('prompt', ""))
547
- # Calculate cost of the operation
548
- cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
549
- pricing_info, prompt_tokens)
550
- # Set Span attributes
551
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
552
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
553
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
554
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
555
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
556
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
557
- gen_ai_endpoint)
558
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
559
- environment)
560
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
561
- application_name)
562
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
563
- kwargs.get('model', "llama3"))
564
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
565
- prompt_tokens)
566
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
567
- prompt_tokens)
568
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
569
- cost)
570
- if trace_content:
571
- span.add_event(
572
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
573
- attributes={
574
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
575
- },
576
- )
577
-
578
- span.set_status(Status(StatusCode.OK))
579
-
580
- if disable_metrics is False:
581
- attributes = {
582
- TELEMETRY_SDK_NAME:
583
- "openlit",
584
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
585
- application_name,
586
- SemanticConvetion.GEN_AI_SYSTEM:
587
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
588
- SemanticConvetion.GEN_AI_ENVIRONMENT:
589
- environment,
590
- SemanticConvetion.GEN_AI_OPERATION:
591
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
592
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
593
- kwargs.get('model', "llama3")
594
- }
595
-
596
- metrics["genai_requests"].add(1, attributes)
597
- metrics["genai_total_tokens"].add(prompt_tokens, attributes)
598
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
599
- metrics["genai_cost"].record(cost, attributes)
156
+ server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
157
+ request_model = kwargs.get('model', 'all-minilm')
600
158
 
601
- # Return original response
602
- return response
159
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
603
160
 
604
- except Exception as e:
605
- handle_exception(span, e)
606
- logger.error("Error in trace creation: %s", e)
607
-
608
- # Return original response
609
- return response
161
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
162
+ start_time = time.time()
163
+ response = wrapped(*args, **kwargs)
164
+ response = process_embedding_response(
165
+ response=response,
166
+ request_model=request_model,
167
+ pricing_info=pricing_info,
168
+ server_port=server_port,
169
+ server_address=server_address,
170
+ environment=environment,
171
+ application_name=application_name,
172
+ metrics=metrics,
173
+ event_provider=event_provider,
174
+ start_time=start_time,
175
+ span=span,
176
+ capture_message_content=capture_message_content,
177
+ disable_metrics=disable_metrics,
178
+ version=version,
179
+ **kwargs
180
+ )
181
+
182
+ return response
610
183
 
611
184
  return wrapper