openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. openlit/__helpers.py +5 -0
  2. openlit/__init__.py +3 -2
  3. openlit/instrumentation/ag2/ag2.py +3 -3
  4. openlit/instrumentation/ai21/ai21.py +1 -1
  5. openlit/instrumentation/ai21/async_ai21.py +1 -1
  6. openlit/instrumentation/anthropic/anthropic.py +1 -1
  7. openlit/instrumentation/anthropic/async_anthropic.py +1 -1
  8. openlit/instrumentation/astra/astra.py +5 -5
  9. openlit/instrumentation/astra/async_astra.py +5 -5
  10. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
  11. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
  12. openlit/instrumentation/chroma/chroma.py +5 -5
  13. openlit/instrumentation/cohere/async_cohere.py +1 -1
  14. openlit/instrumentation/cohere/cohere.py +2 -2
  15. openlit/instrumentation/controlflow/controlflow.py +3 -3
  16. openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
  17. openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
  18. openlit/instrumentation/crewai/crewai.py +4 -2
  19. openlit/instrumentation/dynamiq/dynamiq.py +3 -3
  20. openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
  21. openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
  22. openlit/instrumentation/embedchain/embedchain.py +5 -5
  23. openlit/instrumentation/firecrawl/firecrawl.py +3 -3
  24. openlit/instrumentation/gpt4all/__init__.py +2 -2
  25. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  26. openlit/instrumentation/gpu/__init__.py +5 -5
  27. openlit/instrumentation/groq/__init__.py +2 -2
  28. openlit/instrumentation/groq/async_groq.py +356 -240
  29. openlit/instrumentation/groq/groq.py +356 -240
  30. openlit/instrumentation/haystack/haystack.py +3 -3
  31. openlit/instrumentation/julep/async_julep.py +3 -3
  32. openlit/instrumentation/julep/julep.py +3 -3
  33. openlit/instrumentation/langchain/__init__.py +13 -7
  34. openlit/instrumentation/langchain/async_langchain.py +384 -0
  35. openlit/instrumentation/langchain/langchain.py +98 -490
  36. openlit/instrumentation/letta/letta.py +5 -3
  37. openlit/instrumentation/litellm/__init__.py +4 -5
  38. openlit/instrumentation/litellm/async_litellm.py +316 -245
  39. openlit/instrumentation/litellm/litellm.py +312 -241
  40. openlit/instrumentation/llamaindex/llamaindex.py +3 -3
  41. openlit/instrumentation/mem0/mem0.py +3 -3
  42. openlit/instrumentation/milvus/milvus.py +5 -5
  43. openlit/instrumentation/mistral/__init__.py +6 -6
  44. openlit/instrumentation/mistral/async_mistral.py +421 -248
  45. openlit/instrumentation/mistral/mistral.py +418 -244
  46. openlit/instrumentation/multion/async_multion.py +4 -2
  47. openlit/instrumentation/multion/multion.py +4 -2
  48. openlit/instrumentation/ollama/__init__.py +8 -30
  49. openlit/instrumentation/ollama/async_ollama.py +385 -417
  50. openlit/instrumentation/ollama/ollama.py +384 -417
  51. openlit/instrumentation/openai/async_openai.py +7 -9
  52. openlit/instrumentation/openai/openai.py +7 -9
  53. openlit/instrumentation/phidata/phidata.py +4 -2
  54. openlit/instrumentation/pinecone/pinecone.py +5 -5
  55. openlit/instrumentation/premai/__init__.py +2 -2
  56. openlit/instrumentation/premai/premai.py +262 -213
  57. openlit/instrumentation/qdrant/async_qdrant.py +5 -5
  58. openlit/instrumentation/qdrant/qdrant.py +5 -5
  59. openlit/instrumentation/reka/__init__.py +2 -2
  60. openlit/instrumentation/reka/async_reka.py +90 -52
  61. openlit/instrumentation/reka/reka.py +90 -52
  62. openlit/instrumentation/together/__init__.py +4 -4
  63. openlit/instrumentation/together/async_together.py +278 -236
  64. openlit/instrumentation/together/together.py +278 -236
  65. openlit/instrumentation/transformers/__init__.py +1 -1
  66. openlit/instrumentation/transformers/transformers.py +75 -44
  67. openlit/instrumentation/vertexai/__init__.py +14 -64
  68. openlit/instrumentation/vertexai/async_vertexai.py +329 -986
  69. openlit/instrumentation/vertexai/vertexai.py +329 -986
  70. openlit/instrumentation/vllm/__init__.py +1 -1
  71. openlit/instrumentation/vllm/vllm.py +62 -32
  72. openlit/semcov/__init__.py +3 -3
  73. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  74. openlit-1.33.10.dist-info/RECORD +122 -0
  75. openlit-1.33.9.dist-info/RECORD +0 -121
  76. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
  77. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -1,29 +1,33 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Ollama API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ get_embed_model_cost,
10
12
  handle_exception,
13
+ response_as_dict,
11
14
  general_tokens,
12
- get_chat_model_cost,
13
- get_embed_model_cost
15
+ calculate_ttft,
16
+ calculate_tbt,
17
+ create_metrics_attributes,
18
+ set_server_address_and_port
14
19
  )
15
20
  from openlit.semcov import SemanticConvetion
16
21
 
17
22
  # Initialize logger for logging potential issues and operations
18
23
  logger = logging.getLogger(__name__)
19
24
 
20
- def chat(gen_ai_endpoint, version, environment, application_name,
25
+ def chat(version, environment, application_name,
21
26
  tracer, pricing_info, trace_content, metrics, disable_metrics):
22
27
  """
23
- Generates a telemetry wrapper for chat to collect metrics.
28
+ Generates a telemetry wrapper for chat completions to collect metrics.
24
29
 
25
30
  Args:
26
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
31
  version: Version of the monitoring package.
28
32
  environment: Deployment environment (e.g., production, staging).
29
33
  application_name: Name of the application using the Ollama API.
@@ -32,464 +36,410 @@ def chat(gen_ai_endpoint, version, environment, application_name,
32
36
  trace_content: Flag indicating whether to trace the actual content.
33
37
 
34
38
  Returns:
35
- A function that wraps the chat method to add telemetry.
39
+ A function that wraps the chat completions method to add telemetry.
36
40
  """
37
41
 
38
- def wrapper(wrapped, instance, args, kwargs):
42
+ class TracedSyncStream:
39
43
  """
40
- Wraps the 'chat' API call to add telemetry.
41
-
42
- This collects metrics such as execution time, cost, and token usage, and handles errors
43
- gracefully, adding details to the trace for observability.
44
+ Wrapper for streaming responses to collect metrics and trace data.
45
+ Wraps the response to collect message IDs and aggregated response.
44
46
 
45
- Args:
46
- wrapped: The original 'chat' method to be wrapped.
47
- instance: The instance of the class where the original method is defined.
48
- args: Positional arguments for the 'chat' method.
49
- kwargs: Keyword arguments for the 'chat' method.
47
+ This class implements the '__aiter__' and '__anext__' methods that
48
+ handle asynchronous streaming responses.
50
49
 
51
- Returns:
52
- The response from the original 'chat' method.
50
+ This class also implements '__aenter__' and '__aexit__' methods that
51
+ handle asynchronous context management protocol.
53
52
  """
54
-
55
- # Check if streaming is enabled for the API call
56
- streaming = kwargs.get("stream", False)
57
-
58
- # pylint: disable=no-else-return
59
- if streaming:
60
- # Special handling for streaming response to accommodate the nature of data flow
61
- def stream_generator():
62
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
63
- # Placeholder for aggregating streaming response
64
- llmresponse = ""
65
-
66
- # Loop through streaming events capturing relevant details
67
- for chunk in wrapped(*args, **kwargs):
68
- # Collect aggregated response from events
69
- content = chunk['message']['content']
70
- llmresponse += content
71
-
72
- if chunk['done'] is True:
73
- completion_tokens = chunk["eval_count"]
74
-
75
- yield chunk
76
-
77
- # Handling exception ensure observability without disrupting operation
78
- try:
79
- # Format 'messages' into a single string
80
- message_prompt = kwargs.get("messages", "")
81
- formatted_messages = []
82
- for message in message_prompt:
83
- role = message["role"]
84
- content = message["content"]
85
-
86
- if isinstance(content, list):
87
- content_str = ", ".join(
88
- # pylint: disable=line-too-long
89
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
90
- if "type" in item else f'text: {item["text"]}'
91
- for item in content
92
- )
93
- formatted_messages.append(f"{role}: {content_str}")
94
- else:
95
- formatted_messages.append(f"{role}: {content}")
96
- prompt = "\n".join(formatted_messages)
97
-
98
- prompt_tokens = general_tokens(prompt)
99
- total_tokens = prompt_tokens + completion_tokens
100
- # Calculate cost of the operation
101
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
102
- pricing_info, prompt_tokens, completion_tokens)
103
-
104
- # Set Span attributes
105
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
106
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
107
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
108
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
109
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
110
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
111
- gen_ai_endpoint)
112
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
113
- environment)
114
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
115
- application_name)
116
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
117
- kwargs.get("model", "llama3"))
118
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
119
- True)
120
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
121
- prompt_tokens)
122
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
123
- completion_tokens)
124
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
125
- total_tokens)
126
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
127
- cost)
128
- if trace_content:
129
- span.add_event(
130
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
131
- attributes={
132
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
133
- },
134
- )
135
- span.add_event(
136
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
137
- attributes={
138
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
139
- },
140
- )
141
-
142
- span.set_status(Status(StatusCode.OK))
143
-
144
- if disable_metrics is False:
145
- attributes = {
146
- TELEMETRY_SDK_NAME:
147
- "openlit",
148
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
149
- application_name,
150
- SemanticConvetion.GEN_AI_SYSTEM:
151
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
152
- SemanticConvetion.GEN_AI_ENVIRONMENT:
153
- environment,
154
- SemanticConvetion.GEN_AI_OPERATION:
155
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
156
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
157
- kwargs.get("model", "llama3")
158
- }
159
-
160
- metrics["genai_requests"].add(1, attributes)
161
- metrics["genai_total_tokens"].add(total_tokens, attributes)
162
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
163
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
164
- metrics["genai_cost"].record(cost, attributes)
165
-
166
- except Exception as e:
167
- handle_exception(span, e)
168
- logger.error("Error in trace creation: %s", e)
169
-
170
- return stream_generator()
171
-
172
- # Handling for non-streaming responses
173
- else:
174
- # pylint: disable=line-too-long
175
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
176
- response = wrapped(*args, **kwargs)
177
-
53
+ def __init__(
54
+ self,
55
+ wrapped,
56
+ span,
57
+ kwargs,
58
+ server_address,
59
+ server_port,
60
+ **args,
61
+ ):
62
+ self.__wrapped__ = wrapped
63
+ self._span = span
64
+ # Placeholder for aggregating streaming response
65
+ self._llmresponse = ""
66
+ self._response_model = ""
67
+ self._finish_reason = ""
68
+ self._input_tokens = 0
69
+ self._output_tokens = 0
70
+
71
+ self._args = args
72
+ self._kwargs = kwargs
73
+ self._start_time = time.time()
74
+ self._end_time = None
75
+ self._timestamps = []
76
+ self._ttft = 0
77
+ self._tbt = 0
78
+ self._server_address = server_address
79
+ self._server_port = server_port
80
+
81
+ def __enter__(self):
82
+ self.__wrapped__.__enter__()
83
+ return self
84
+
85
+ def __exit__(self, exc_type, exc_value, traceback):
86
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
87
+
88
+ def __iter__(self):
89
+ return self
90
+
91
+ def __getattr__(self, name):
92
+ """Delegate attribute access to the wrapped object."""
93
+ return getattr(self.__wrapped__, name)
94
+
95
+ def __next__(self):
96
+ try:
97
+ chunk = self.__wrapped__.__next__()
98
+ end_time = time.time()
99
+ # Record the timestamp for the current chunk
100
+ self._timestamps.append(end_time)
101
+
102
+ if len(self._timestamps) == 1:
103
+ # Calculate time to first chunk
104
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
105
+
106
+ chunked = response_as_dict(chunk)
107
+ self._llmresponse += chunked.get('message').get('content')
108
+
109
+ if chunked.get('eval_count'):
110
+ self._input_tokens = chunked.get('prompt_eval_count')
111
+ self._output_tokens = chunked.get('eval_count')
112
+ self._response_model = chunked.get('model')
113
+ self._finish_reason = chunked.get('done_reason')
114
+ return chunk
115
+ except StopIteration:
116
+ # Handling exception ensure observability without disrupting operation
178
117
  try:
118
+ self._end_time = time.time()
119
+ if len(self._timestamps) > 1:
120
+ self._tbt = calculate_tbt(self._timestamps)
121
+
179
122
  # Format 'messages' into a single string
180
- message_prompt = kwargs.get("messages", "")
123
+ message_prompt = self._kwargs.get("messages", "")
181
124
  formatted_messages = []
182
125
  for message in message_prompt:
183
126
  role = message["role"]
184
127
  content = message["content"]
185
128
 
186
129
  if isinstance(content, list):
187
- content_str = ", ".join(
188
- # pylint: disable=line-too-long
189
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
190
- if "type" in item else f'text: {item["text"]}'
191
- for item in content
192
- )
130
+ content_str_list = []
131
+ for item in content:
132
+ if item["type"] == "text":
133
+ content_str_list.append(f'text: {item["text"]}')
134
+ elif (item["type"] == "image_url" and
135
+ not item["image_url"]["url"].startswith("data:")):
136
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
137
+ content_str = ", ".join(content_str_list)
193
138
  formatted_messages.append(f"{role}: {content_str}")
194
139
  else:
195
140
  formatted_messages.append(f"{role}: {content}")
196
141
  prompt = "\n".join(formatted_messages)
197
142
 
198
- # Set base span attribues
199
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
200
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
201
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
202
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
143
+ request_model = self._kwargs.get("model", "gpt-4o")
144
+
145
+ # Calculate cost of the operation
146
+ cost = get_chat_model_cost(request_model,
147
+ pricing_info, self._input_tokens,
148
+ self._output_tokens)
149
+
150
+ # Set Span attributes (OTel Semconv)
151
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
152
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
203
153
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
204
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
205
- gen_ai_endpoint)
206
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
154
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
155
+ SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
156
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
157
+ request_model)
158
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
159
+ self._server_port)
160
+
161
+ # List of attributes and their config keys
162
+ attributes = [
163
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
164
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
165
+ (SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
166
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
167
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
168
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
169
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
170
+ ]
171
+
172
+ # Safely get the options dictionary from kwargs
173
+ options = self._kwargs.get('options', {})
174
+
175
+ # Set each attribute if the corresponding value exists and is not None
176
+ for attribute, key in attributes:
177
+ # Use dictionary `get` to retrieve values from the options dictionary
178
+ value = options.get(key)
179
+ if value is not None:
180
+ self._span.set_attribute(attribute, value)
181
+
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
183
+ [self._finish_reason])
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
185
+ self._response_model)
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
187
+ self._input_tokens)
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
189
+ self._output_tokens)
190
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
191
+ self._server_address)
192
+ if isinstance(self._llmresponse, str):
193
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
194
+ "text")
195
+ else:
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
197
+ "json")
198
+
199
+ # Set Span attributes (Extra)
200
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
207
201
  environment)
208
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
202
+ self._span.set_attribute(SERVICE_NAME,
209
203
  application_name)
210
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
211
- kwargs.get("model", "llama3"))
212
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
213
- False)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
205
+ True)
206
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
207
+ self._input_tokens + self._output_tokens)
208
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
209
+ cost)
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
211
+ self._tbt)
212
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
213
+ self._ttft)
214
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
215
+ version)
214
216
  if trace_content:
215
- span.add_event(
217
+ self._span.add_event(
216
218
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
217
219
  attributes={
218
220
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
219
221
  },
220
222
  )
221
- span.add_event(
223
+ self._span.add_event(
222
224
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
223
225
  attributes={
224
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['message']['content'],
226
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
225
227
  },
226
228
  )
227
-
228
- prompt_tokens = general_tokens(prompt)
229
- completion_tokens = response["eval_count"]
230
- total_tokens = prompt_tokens + completion_tokens
231
- # Calculate cost of the operation
232
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
233
- pricing_info, prompt_tokens, completion_tokens)
234
-
235
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
236
- prompt_tokens)
237
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
238
- completion_tokens)
239
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
240
- total_tokens)
241
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
242
- [response["done_reason"]])
243
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
244
- cost)
245
-
246
- span.set_status(Status(StatusCode.OK))
229
+ self._span.set_status(Status(StatusCode.OK))
247
230
 
248
231
  if disable_metrics is False:
249
- attributes = {
250
- TELEMETRY_SDK_NAME:
251
- "openlit",
252
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
253
- application_name,
254
- SemanticConvetion.GEN_AI_SYSTEM:
255
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
256
- SemanticConvetion.GEN_AI_ENVIRONMENT:
257
- environment,
258
- SemanticConvetion.GEN_AI_OPERATION:
259
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
260
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
261
- kwargs.get("model", "llama3")
262
- }
232
+ attributes = create_metrics_attributes(
233
+ service_name=application_name,
234
+ deployment_environment=environment,
235
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
236
+ system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
237
+ request_model=request_model,
238
+ server_address=self._server_address,
239
+ server_port=self._server_port,
240
+ response_model=self._response_model,
241
+ )
263
242
 
243
+ metrics["genai_client_usage_tokens"].record(
244
+ self._input_tokens + self._output_tokens, attributes
245
+ )
246
+ metrics["genai_client_operation_duration"].record(
247
+ self._end_time - self._start_time, attributes
248
+ )
249
+ metrics["genai_server_tbt"].record(
250
+ self._tbt, attributes
251
+ )
252
+ metrics["genai_server_ttft"].record(
253
+ self._ttft, attributes
254
+ )
264
255
  metrics["genai_requests"].add(1, attributes)
265
- metrics["genai_total_tokens"].add(total_tokens, attributes)
266
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
267
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
256
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
257
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
268
258
  metrics["genai_cost"].record(cost, attributes)
269
259
 
270
- # Return original response
271
- return response
272
-
273
260
  except Exception as e:
274
- handle_exception(span, e)
261
+ handle_exception(self._span, e)
275
262
  logger.error("Error in trace creation: %s", e)
276
-
277
- # Return original response
278
- return response
279
-
280
- return wrapper
281
-
282
- def generate(gen_ai_endpoint, version, environment, application_name,
283
- tracer, pricing_info, trace_content, metrics, disable_metrics):
284
- """
285
- Generates a telemetry wrapper for generate to collect metrics.
286
-
287
- Args:
288
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
289
- version: Version of the monitoring package.
290
- environment: Deployment environment (e.g., production, staging).
291
- application_name: Name of the application using the Ollama API.
292
- tracer: OpenTelemetry tracer for creating spans.
293
- pricing_info: Information used for calculating the cost of Ollama usage.
294
- trace_content: Flag indicating whether to trace the actual content.
295
-
296
- Returns:
297
- A function that wraps the generate method to add telemetry.
298
- """
263
+ finally:
264
+ self._span.end()
265
+ raise
299
266
 
300
267
  def wrapper(wrapped, instance, args, kwargs):
301
268
  """
302
- Wraps the 'generate' API call to add telemetry.
303
-
269
+ Wraps the 'chat.completions' API call to add telemetry.
270
+
304
271
  This collects metrics such as execution time, cost, and token usage, and handles errors
305
272
  gracefully, adding details to the trace for observability.
306
273
 
307
274
  Args:
308
- wrapped: The original 'generate' method to be wrapped.
275
+ wrapped: The original 'chat.completions' method to be wrapped.
309
276
  instance: The instance of the class where the original method is defined.
310
- args: Positional arguments for the 'generate' method.
311
- kwargs: Keyword arguments for the 'generate' method.
277
+ args: Positional arguments for the 'chat.completions' method.
278
+ kwargs: Keyword arguments for the 'chat.completions' method.
312
279
 
313
280
  Returns:
314
- The response from the original 'generate' method.
281
+ The response from the original 'chat.completions' method.
315
282
  """
316
283
 
317
284
  # Check if streaming is enabled for the API call
318
285
  streaming = kwargs.get("stream", False)
286
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
287
+ request_model = kwargs.get("model", "gpt-4o")
288
+
289
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
319
290
 
320
291
  # pylint: disable=no-else-return
321
292
  if streaming:
322
293
  # Special handling for streaming response to accommodate the nature of data flow
323
- def stream_generator():
324
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
325
- # Placeholder for aggregating streaming response
326
- llmresponse = ""
327
-
328
- # Loop through streaming events capturing relevant details
329
- for chunk in wrapped(*args, **kwargs):
330
- # Collect aggregated response from events
331
- content = chunk['response']
332
- llmresponse += content
333
-
334
- if chunk['done'] is True:
335
- completion_tokens = chunk["eval_count"]
336
-
337
- yield chunk
338
-
339
- # Handling exception ensure observability without disrupting operation
340
- try:
341
- prompt_tokens = general_tokens(kwargs.get("prompt", ""))
342
- total_tokens = prompt_tokens + completion_tokens
343
- # Calculate cost of the operation
344
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
345
- pricing_info, prompt_tokens, completion_tokens)
346
-
347
- # Set Span attributes
348
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
349
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
350
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
351
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
352
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
353
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
354
- gen_ai_endpoint)
355
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
356
- environment)
357
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
358
- application_name)
359
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
360
- kwargs.get("model", "llama3"))
361
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
362
- True)
363
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
364
- prompt_tokens)
365
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
366
- completion_tokens)
367
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
368
- total_tokens)
369
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
370
- cost)
371
- if trace_content:
372
- span.add_event(
373
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
374
- attributes={
375
- # pylint: disable=line-too-long
376
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
377
- },
378
- )
379
- span.add_event(
380
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
381
- attributes={
382
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
383
- },
384
- )
294
+ awaited_wrapped = wrapped(*args, **kwargs)
295
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
385
296
 
386
- span.set_status(Status(StatusCode.OK))
387
-
388
- if disable_metrics is False:
389
- attributes = {
390
- TELEMETRY_SDK_NAME:
391
- "openlit",
392
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
393
- application_name,
394
- SemanticConvetion.GEN_AI_SYSTEM:
395
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
396
- SemanticConvetion.GEN_AI_ENVIRONMENT:
397
- environment,
398
- SemanticConvetion.GEN_AI_OPERATION:
399
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
400
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
401
- kwargs.get("model", "llama3")
402
- }
403
-
404
- metrics["genai_requests"].add(1, attributes)
405
- metrics["genai_total_tokens"].add(total_tokens, attributes)
406
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
407
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
408
- metrics["genai_cost"].record(cost, attributes)
409
-
410
- except Exception as e:
411
- handle_exception(span, e)
412
- logger.error("Error in trace creation: %s", e)
413
-
414
- return stream_generator()
297
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
415
298
 
416
299
  # Handling for non-streaming responses
417
300
  else:
418
- # pylint: disable=line-too-long
419
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
301
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
302
+ start_time = time.time()
420
303
  response = wrapped(*args, **kwargs)
304
+ end_time = time.time()
305
+
306
+ response_dict = response_as_dict(response)
421
307
 
422
308
  try:
423
- # Set base span attribues
309
+ # Format 'messages' into a single string
310
+ message_prompt = kwargs.get("messages", "")
311
+ formatted_messages = []
312
+ for message in message_prompt:
313
+ role = message["role"]
314
+ content = message["content"]
315
+
316
+ if isinstance(content, list):
317
+ content_str = ", ".join(
318
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
319
+ if "type" in item else f'text: {item["text"]}'
320
+ for item in content
321
+ )
322
+ formatted_messages.append(f"{role}: {content_str}")
323
+ else:
324
+ formatted_messages.append(f"{role}: {content}")
325
+ prompt = "\n".join(formatted_messages)
326
+
327
+ input_tokens = response_dict.get('prompt_eval_count')
328
+ output_tokens = response_dict.get('eval_count')
329
+
330
+ # Calculate cost of the operation
331
+ cost = get_chat_model_cost(request_model,
332
+ pricing_info, input_tokens,
333
+ output_tokens)
334
+
335
+ # Set base span attribues (OTel Semconv)
424
336
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
425
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
426
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
427
337
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
428
338
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
429
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
430
- gen_ai_endpoint)
431
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
339
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
340
+ SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
341
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
342
+ request_model)
343
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
344
+ server_port)
345
+
346
+ # List of attributes and their config keys
347
+ attributes = [
348
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
349
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
350
+ (SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
351
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
352
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
353
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
354
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
355
+ ]
356
+
357
+ # Safely get the options dictionary from kwargs
358
+ options = kwargs.get('options', {})
359
+
360
+ # Set each attribute if the corresponding value exists and is not None
361
+ for attribute, key in attributes:
362
+ # Use dictionary `get` to retrieve values from the options dictionary
363
+ value = options.get(key)
364
+ if value is not None:
365
+ span.set_attribute(attribute, value)
366
+
367
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
368
+ response_dict.get('model'))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
370
+ input_tokens)
371
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
372
+ output_tokens)
373
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
374
+ server_address)
375
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
376
+ [response_dict.get('done_reason')])
377
+ if kwargs.get('format'):
378
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
379
+ 'json')
380
+ else:
381
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
382
+ 'text')
383
+
384
+ # Set base span attribues (Extras)
385
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
432
386
  environment)
433
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
387
+ span.set_attribute(SERVICE_NAME,
434
388
  application_name)
435
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
436
- kwargs.get("model", "llama3"))
437
389
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
438
390
  False)
391
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
392
+ input_tokens + output_tokens)
393
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
394
+ cost)
395
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
396
+ end_time - start_time)
397
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
398
+ version)
439
399
  if trace_content:
440
400
  span.add_event(
441
401
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
442
402
  attributes={
443
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
403
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
444
404
  },
445
405
  )
446
406
  span.add_event(
447
407
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
448
408
  attributes={
449
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['response'],
409
+ # pylint: disable=line-too-long
410
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('message').get('content')),
450
411
  },
451
412
  )
452
-
453
- prompt_tokens = response["prompt_eval_count"]
454
- completion_tokens = response["eval_count"]
455
- total_tokens = prompt_tokens + completion_tokens
456
- # Calculate cost of the operation
457
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
458
- pricing_info, prompt_tokens, completion_tokens)
459
-
460
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
461
- prompt_tokens)
462
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
463
- completion_tokens)
464
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
465
- total_tokens)
466
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
467
- [response["done_reason"]])
468
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
469
- cost)
413
+ if kwargs.get('tools'):
414
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
415
+ str(response_dict.get('message').get('tool_calls')))
470
416
 
471
417
  span.set_status(Status(StatusCode.OK))
472
418
 
473
419
  if disable_metrics is False:
474
- attributes = {
475
- TELEMETRY_SDK_NAME:
476
- "openlit",
477
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
478
- application_name,
479
- SemanticConvetion.GEN_AI_SYSTEM:
480
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
481
- SemanticConvetion.GEN_AI_ENVIRONMENT:
482
- environment,
483
- SemanticConvetion.GEN_AI_OPERATION:
484
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
485
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
486
- kwargs.get("model", "llama3")
487
- }
420
+ attributes = create_metrics_attributes(
421
+ service_name=application_name,
422
+ deployment_environment=environment,
423
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
424
+ system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
425
+ request_model=request_model,
426
+ server_address=server_address,
427
+ server_port=server_port,
428
+ response_model=response_dict.get('model'),
429
+ )
488
430
 
431
+ metrics["genai_client_usage_tokens"].record(
432
+ input_tokens + output_tokens, attributes
433
+ )
434
+ metrics["genai_client_operation_duration"].record(
435
+ end_time - start_time, attributes
436
+ )
437
+ metrics["genai_server_ttft"].record(
438
+ end_time - start_time, attributes
439
+ )
489
440
  metrics["genai_requests"].add(1, attributes)
490
- metrics["genai_total_tokens"].add(total_tokens, attributes)
491
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
492
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
441
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
442
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
493
443
  metrics["genai_cost"].record(cost, attributes)
494
444
 
495
445
  # Return original response
@@ -504,13 +454,12 @@ def generate(gen_ai_endpoint, version, environment, application_name,
504
454
 
505
455
  return wrapper
506
456
 
507
- def embeddings(gen_ai_endpoint, version, environment, application_name,
508
- tracer, pricing_info, trace_content, metrics, disable_metrics):
457
+ def embeddings(version, environment, application_name,
458
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
509
459
  """
510
460
  Generates a telemetry wrapper for embeddings to collect metrics.
511
461
 
512
462
  Args:
513
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
514
463
  version: Version of the monitoring package.
515
464
  environment: Deployment environment (e.g., production, staging).
516
465
  application_name: Name of the application using the Ollama API.
@@ -539,71 +488,89 @@ def embeddings(gen_ai_endpoint, version, environment, application_name,
539
488
  The response from the original 'embeddings' method.
540
489
  """
541
490
 
542
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
491
+ server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
492
+ request_model = kwargs.get('model', 'all-minilm')
493
+
494
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
495
+
496
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
497
+ start_time = time.time()
543
498
  response = wrapped(*args, **kwargs)
499
+ end_time = time.time()
544
500
 
545
501
  try:
546
- prompt_tokens = general_tokens(kwargs.get('prompt', ""))
502
+ input_tokens = general_tokens(str(kwargs.get('prompt')))
503
+
547
504
  # Calculate cost of the operation
548
- cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
549
- pricing_info, prompt_tokens)
550
- # Set Span attributes
551
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
552
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
553
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
505
+ cost = get_embed_model_cost(request_model,
506
+ pricing_info, input_tokens)
507
+
508
+ # Set Span attributes (OTel Semconv)
509
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
554
510
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
555
511
  SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
556
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
557
- gen_ai_endpoint)
558
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
559
- environment)
560
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
561
- application_name)
512
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
513
+ SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
562
514
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
563
- kwargs.get('model', "llama3"))
515
+ request_model)
516
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
517
+ request_model)
518
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
519
+ server_address)
520
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
521
+ server_port)
564
522
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
565
- prompt_tokens)
523
+ input_tokens)
524
+
525
+ # Set Span attributes (Extras)
526
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
527
+ environment)
528
+ span.set_attribute(SERVICE_NAME,
529
+ application_name)
566
530
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
567
- prompt_tokens)
531
+ input_tokens)
568
532
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
569
533
  cost)
534
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
535
+ version)
536
+
570
537
  if trace_content:
571
538
  span.add_event(
572
539
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
573
540
  attributes={
574
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
541
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('prompt', '')),
575
542
  },
576
543
  )
577
544
 
578
545
  span.set_status(Status(StatusCode.OK))
579
546
 
580
547
  if disable_metrics is False:
581
- attributes = {
582
- TELEMETRY_SDK_NAME:
583
- "openlit",
584
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
585
- application_name,
586
- SemanticConvetion.GEN_AI_SYSTEM:
587
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
588
- SemanticConvetion.GEN_AI_ENVIRONMENT:
589
- environment,
590
- SemanticConvetion.GEN_AI_OPERATION:
591
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
592
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
593
- kwargs.get('model', "llama3")
594
- }
595
-
596
- metrics["genai_requests"].add(1, attributes)
597
- metrics["genai_total_tokens"].add(prompt_tokens, attributes)
598
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
599
- metrics["genai_cost"].record(cost, attributes)
548
+ attributes = create_metrics_attributes(
549
+ service_name=application_name,
550
+ deployment_environment=environment,
551
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
552
+ system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
553
+ request_model=request_model,
554
+ server_address=server_address,
555
+ server_port=server_port,
556
+ response_model=request_model,
557
+ )
558
+ metrics['genai_client_usage_tokens'].record(
559
+ input_tokens, attributes
560
+ )
561
+ metrics['genai_client_operation_duration'].record(
562
+ end_time - start_time, attributes
563
+ )
564
+ metrics['genai_requests'].add(1, attributes)
565
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
566
+ metrics['genai_cost'].record(cost, attributes)
600
567
 
601
568
  # Return original response
602
569
  return response
603
570
 
604
571
  except Exception as e:
605
572
  handle_exception(span, e)
606
- logger.error("Error in trace creation: %s", e)
573
+ logger.error('Error in trace creation: %s', e)
607
574
 
608
575
  # Return original response
609
576
  return response