openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,28 +1,33 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Ollama API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ get_embed_model_cost,
10
12
  handle_exception,
13
+ response_as_dict,
11
14
  general_tokens,
12
- get_chat_model_cost,
13
- get_embed_model_cost)
15
+ calculate_ttft,
16
+ calculate_tbt,
17
+ create_metrics_attributes,
18
+ set_server_address_and_port
19
+ )
14
20
  from openlit.semcov import SemanticConvetion
15
21
 
16
22
  # Initialize logger for logging potential issues and operations
17
23
  logger = logging.getLogger(__name__)
18
24
 
19
- def async_chat(gen_ai_endpoint, version, environment, application_name,
25
+ def async_chat(version, environment, application_name,
20
26
  tracer, pricing_info, trace_content, metrics, disable_metrics):
21
27
  """
22
- Generates a telemetry wrapper for chat to collect metrics.
28
+ Generates a telemetry wrapper for chat completions to collect metrics.
23
29
 
24
30
  Args:
25
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
26
31
  version: Version of the monitoring package.
27
32
  environment: Deployment environment (e.g., production, staging).
28
33
  application_name: Name of the application using the Ollama API.
@@ -31,464 +36,410 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
31
36
  trace_content: Flag indicating whether to trace the actual content.
32
37
 
33
38
  Returns:
34
- A function that wraps the chat method to add telemetry.
39
+ A function that wraps the chat completions method to add telemetry.
35
40
  """
36
41
 
37
- async def wrapper(wrapped, instance, args, kwargs):
42
+ class TracedAsyncStream:
38
43
  """
39
- Wraps the 'chat' API call to add telemetry.
40
-
41
- This collects metrics such as execution time, cost, and token usage, and handles errors
42
- gracefully, adding details to the trace for observability.
44
+ Wrapper for streaming responses to collect metrics and trace data.
45
+ Wraps the response to collect message IDs and aggregated response.
43
46
 
44
- Args:
45
- wrapped: The original 'chat' method to be wrapped.
46
- instance: The instance of the class where the original method is defined.
47
- args: Positional arguments for the 'chat' method.
48
- kwargs: Keyword arguments for the 'chat' method.
47
+ This class implements the '__aiter__' and '__anext__' methods that
48
+ handle asynchronous streaming responses.
49
49
 
50
- Returns:
51
- The response from the original 'chat' method.
50
+ This class also implements '__aenter__' and '__aexit__' methods that
51
+ handle asynchronous context management protocol.
52
52
  """
53
-
54
- # Check if streaming is enabled for the API call
55
- streaming = kwargs.get("stream", False)
56
-
57
- # pylint: disable=no-else-return
58
- if streaming:
59
- # Special handling for streaming response to accommodate the nature of data flow
60
- async def stream_generator():
61
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
62
- # Placeholder for aggregating streaming response
63
- llmresponse = ""
64
-
65
- # Loop through streaming events capturing relevant details
66
- async for chunk in await wrapped(*args, **kwargs):
67
- # Collect aggregated response from events
68
- content = chunk['message']['content']
69
- llmresponse += content
70
-
71
- if chunk['done'] is True:
72
- completion_tokens = chunk["eval_count"]
73
-
74
- yield chunk
75
-
76
- # Handling exception ensure observability without disrupting operation
77
- try:
78
- # Format 'messages' into a single string
79
- message_prompt = kwargs.get("messages", "")
80
- formatted_messages = []
81
- for message in message_prompt:
82
- role = message["role"]
83
- content = message["content"]
84
-
85
- if isinstance(content, list):
86
- content_str = ", ".join(
87
- # pylint: disable=line-too-long
88
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
89
- if "type" in item else f'text: {item["text"]}'
90
- for item in content
91
- )
92
- formatted_messages.append(f"{role}: {content_str}")
93
- else:
94
- formatted_messages.append(f"{role}: {content}")
95
- prompt = "\n".join(formatted_messages)
96
-
97
- prompt_tokens = general_tokens(prompt)
98
- total_tokens = prompt_tokens + completion_tokens
99
- # Calculate cost of the operation
100
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
101
- pricing_info, prompt_tokens, completion_tokens)
102
-
103
- # Set Span attributes
104
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
105
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
106
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
107
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
108
- SemanticConvetion.GEN_AI_TYPE_CHAT)
109
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
110
- gen_ai_endpoint)
111
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
112
- environment)
113
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
114
- application_name)
115
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
116
- kwargs.get("model", "llama3"))
117
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
118
- True)
119
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
120
- prompt_tokens)
121
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
122
- completion_tokens)
123
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
124
- total_tokens)
125
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
126
- cost)
127
- if trace_content:
128
- span.add_event(
129
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
130
- attributes={
131
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
132
- },
133
- )
134
- span.add_event(
135
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
136
- attributes={
137
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
138
- },
139
- )
140
-
141
- span.set_status(Status(StatusCode.OK))
142
-
143
- if disable_metrics is False:
144
- attributes = {
145
- TELEMETRY_SDK_NAME:
146
- "openlit",
147
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
148
- application_name,
149
- SemanticConvetion.GEN_AI_SYSTEM:
150
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
151
- SemanticConvetion.GEN_AI_ENVIRONMENT:
152
- environment,
153
- SemanticConvetion.GEN_AI_TYPE:
154
- SemanticConvetion.GEN_AI_TYPE_CHAT,
155
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
156
- kwargs.get("model", "llama3")
157
- }
158
-
159
- metrics["genai_requests"].add(1, attributes)
160
- metrics["genai_total_tokens"].add(total_tokens, attributes)
161
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
162
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
163
- metrics["genai_cost"].record(cost, attributes)
164
-
165
- except Exception as e:
166
- handle_exception(span, e)
167
- logger.error("Error in trace creation: %s", e)
168
-
169
- return stream_generator()
170
-
171
- # Handling for non-streaming responses
172
- else:
173
- # pylint: disable=line-too-long
174
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
175
- response = await wrapped(*args, **kwargs)
176
-
53
+ def __init__(
54
+ self,
55
+ wrapped,
56
+ span,
57
+ kwargs,
58
+ server_address,
59
+ server_port,
60
+ **args,
61
+ ):
62
+ self.__wrapped__ = wrapped
63
+ self._span = span
64
+ # Placeholder for aggregating streaming response
65
+ self._llmresponse = ""
66
+ self._response_model = ""
67
+ self._finish_reason = ""
68
+ self._input_tokens = 0
69
+ self._output_tokens = 0
70
+
71
+ self._args = args
72
+ self._kwargs = kwargs
73
+ self._start_time = time.time()
74
+ self._end_time = None
75
+ self._timestamps = []
76
+ self._ttft = 0
77
+ self._tbt = 0
78
+ self._server_address = server_address
79
+ self._server_port = server_port
80
+
81
+ async def __aenter__(self):
82
+ await self.__wrapped__.__aenter__()
83
+ return self
84
+
85
+ async def __aexit__(self, exc_type, exc_value, traceback):
86
+ await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
87
+
88
+ def __aiter__(self):
89
+ return self
90
+
91
+ async def __getattr__(self, name):
92
+ """Delegate attribute access to the wrapped object."""
93
+ return getattr(await self.__wrapped__, name)
94
+
95
+ async def __anext__(self):
96
+ try:
97
+ chunk = await self.__wrapped__.__anext__()
98
+ end_time = time.time()
99
+ # Record the timestamp for the current chunk
100
+ self._timestamps.append(end_time)
101
+
102
+ if len(self._timestamps) == 1:
103
+ # Calculate time to first chunk
104
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
105
+
106
+ chunked = response_as_dict(chunk)
107
+ self._llmresponse += chunked.get('message').get('content')
108
+
109
+ if chunked.get('eval_count'):
110
+ self._input_tokens = chunked.get('prompt_eval_count')
111
+ self._output_tokens = chunked.get('eval_count')
112
+ self._response_model = chunked.get('model')
113
+ self._finish_reason = chunked.get('done_reason')
114
+ return chunk
115
+ except StopAsyncIteration:
116
+ # Handling exception ensure observability without disrupting operation
177
117
  try:
118
+ self._end_time = time.time()
119
+ if len(self._timestamps) > 1:
120
+ self._tbt = calculate_tbt(self._timestamps)
121
+
178
122
  # Format 'messages' into a single string
179
- message_prompt = kwargs.get("messages", "")
123
+ message_prompt = self._kwargs.get("messages", "")
180
124
  formatted_messages = []
181
125
  for message in message_prompt:
182
126
  role = message["role"]
183
127
  content = message["content"]
184
128
 
185
129
  if isinstance(content, list):
186
- content_str = ", ".join(
187
- # pylint: disable=line-too-long
188
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
189
- if "type" in item else f'text: {item["text"]}'
190
- for item in content
191
- )
130
+ content_str_list = []
131
+ for item in content:
132
+ if item["type"] == "text":
133
+ content_str_list.append(f'text: {item["text"]}')
134
+ elif (item["type"] == "image_url" and
135
+ not item["image_url"]["url"].startswith("data:")):
136
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
137
+ content_str = ", ".join(content_str_list)
192
138
  formatted_messages.append(f"{role}: {content_str}")
193
139
  else:
194
140
  formatted_messages.append(f"{role}: {content}")
195
141
  prompt = "\n".join(formatted_messages)
196
142
 
197
- # Set base span attribues
198
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
199
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
143
+ request_model = self._kwargs.get("model", "gpt-4o")
144
+
145
+ # Calculate cost of the operation
146
+ cost = get_chat_model_cost(request_model,
147
+ pricing_info, self._input_tokens,
148
+ self._output_tokens)
149
+
150
+ # Set Span attributes (OTel Semconv)
151
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
152
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
153
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
154
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
200
155
  SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
201
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
202
- SemanticConvetion.GEN_AI_TYPE_CHAT)
203
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
204
- gen_ai_endpoint)
205
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
156
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
157
+ request_model)
158
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
159
+ self._server_port)
160
+
161
+ # List of attributes and their config keys
162
+ attributes = [
163
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
164
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
165
+ (SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
166
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
167
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
168
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
169
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
170
+ ]
171
+
172
+ # Safely get the options dictionary from kwargs
173
+ options = self._kwargs.get('options', {})
174
+
175
+ # Set each attribute if the corresponding value exists and is not None
176
+ for attribute, key in attributes:
177
+ # Use dictionary `get` to retrieve values from the options dictionary
178
+ value = options.get(key)
179
+ if value is not None:
180
+ self._span.set_attribute(attribute, value)
181
+
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
183
+ [self._finish_reason])
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
185
+ self._response_model)
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
187
+ self._input_tokens)
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
189
+ self._output_tokens)
190
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
191
+ self._server_address)
192
+ if isinstance(self._llmresponse, str):
193
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
194
+ "text")
195
+ else:
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
197
+ "json")
198
+
199
+ # Set Span attributes (Extra)
200
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
206
201
  environment)
207
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
202
+ self._span.set_attribute(SERVICE_NAME,
208
203
  application_name)
209
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
210
- kwargs.get("model", "llama3"))
211
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
212
- False)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
205
+ True)
206
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
207
+ self._input_tokens + self._output_tokens)
208
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
209
+ cost)
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
211
+ self._tbt)
212
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
213
+ self._ttft)
214
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
215
+ version)
213
216
  if trace_content:
214
- span.add_event(
217
+ self._span.add_event(
215
218
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
216
219
  attributes={
217
220
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
218
221
  },
219
222
  )
220
- span.add_event(
223
+ self._span.add_event(
221
224
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
222
225
  attributes={
223
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['message']['content'],
226
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
224
227
  },
225
228
  )
226
-
227
- prompt_tokens = general_tokens(prompt)
228
- completion_tokens = response["eval_count"]
229
- total_tokens = prompt_tokens + completion_tokens
230
- # Calculate cost of the operation
231
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
232
- pricing_info, prompt_tokens, completion_tokens)
233
-
234
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
235
- prompt_tokens)
236
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
237
- completion_tokens)
238
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
239
- total_tokens)
240
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
241
- [response["done_reason"]])
242
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
243
- cost)
244
-
245
- span.set_status(Status(StatusCode.OK))
229
+ self._span.set_status(Status(StatusCode.OK))
246
230
 
247
231
  if disable_metrics is False:
248
- attributes = {
249
- TELEMETRY_SDK_NAME:
250
- "openlit",
251
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
252
- application_name,
253
- SemanticConvetion.GEN_AI_SYSTEM:
254
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
255
- SemanticConvetion.GEN_AI_ENVIRONMENT:
256
- environment,
257
- SemanticConvetion.GEN_AI_TYPE:
258
- SemanticConvetion.GEN_AI_TYPE_CHAT,
259
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
260
- kwargs.get("model", "llama3")
261
- }
232
+ attributes = create_metrics_attributes(
233
+ service_name=application_name,
234
+ deployment_environment=environment,
235
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
236
+ system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
237
+ request_model=request_model,
238
+ server_address=self._server_address,
239
+ server_port=self._server_port,
240
+ response_model=self._response_model,
241
+ )
262
242
 
243
+ metrics["genai_client_usage_tokens"].record(
244
+ self._input_tokens + self._output_tokens, attributes
245
+ )
246
+ metrics["genai_client_operation_duration"].record(
247
+ self._end_time - self._start_time, attributes
248
+ )
249
+ metrics["genai_server_tbt"].record(
250
+ self._tbt, attributes
251
+ )
252
+ metrics["genai_server_ttft"].record(
253
+ self._ttft, attributes
254
+ )
263
255
  metrics["genai_requests"].add(1, attributes)
264
- metrics["genai_total_tokens"].add(total_tokens, attributes)
265
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
266
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
256
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
257
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
267
258
  metrics["genai_cost"].record(cost, attributes)
268
259
 
269
- # Return original response
270
- return response
271
-
272
260
  except Exception as e:
273
- handle_exception(span, e)
261
+ handle_exception(self._span, e)
274
262
  logger.error("Error in trace creation: %s", e)
275
-
276
- # Return original response
277
- return response
278
-
279
- return wrapper
280
-
281
- def async_generate(gen_ai_endpoint, version, environment, application_name,
282
- tracer, pricing_info, trace_content, metrics, disable_metrics):
283
- """
284
- Generates a telemetry wrapper for generate to collect metrics.
285
-
286
- Args:
287
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
288
- version: Version of the monitoring package.
289
- environment: Deployment environment (e.g., production, staging).
290
- application_name: Name of the application using the Ollama API.
291
- tracer: OpenTelemetry tracer for creating spans.
292
- pricing_info: Information used for calculating the cost of Ollama usage.
293
- trace_content: Flag indicating whether to trace the actual content.
294
-
295
- Returns:
296
- A function that wraps the generate method to add telemetry.
297
- """
263
+ finally:
264
+ self._span.end()
265
+ raise
298
266
 
299
267
  async def wrapper(wrapped, instance, args, kwargs):
300
268
  """
301
- Wraps the 'generate' API call to add telemetry.
302
-
269
+ Wraps the 'chat.completions' API call to add telemetry.
270
+
303
271
  This collects metrics such as execution time, cost, and token usage, and handles errors
304
272
  gracefully, adding details to the trace for observability.
305
273
 
306
274
  Args:
307
- wrapped: The original 'generate' method to be wrapped.
275
+ wrapped: The original 'chat.completions' method to be wrapped.
308
276
  instance: The instance of the class where the original method is defined.
309
- args: Positional arguments for the 'generate' method.
310
- kwargs: Keyword arguments for the 'generate' method.
277
+ args: Positional arguments for the 'chat.completions' method.
278
+ kwargs: Keyword arguments for the 'chat.completions' method.
311
279
 
312
280
  Returns:
313
- The response from the original 'generate' method.
281
+ The response from the original 'chat.completions' method.
314
282
  """
315
283
 
316
284
  # Check if streaming is enabled for the API call
317
285
  streaming = kwargs.get("stream", False)
286
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
287
+ request_model = kwargs.get("model", "gpt-4o")
288
+
289
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
318
290
 
319
291
  # pylint: disable=no-else-return
320
292
  if streaming:
321
293
  # Special handling for streaming response to accommodate the nature of data flow
322
- async def stream_generator():
323
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
324
- # Placeholder for aggregating streaming response
325
- llmresponse = ""
326
-
327
- # Loop through streaming events capturing relevant details
328
- async for chunk in await wrapped(*args, **kwargs):
329
- # Collect aggregated response from events
330
- content = chunk['response']
331
- llmresponse += content
332
-
333
- if chunk['done'] is True:
334
- completion_tokens = chunk["eval_count"]
335
-
336
- yield chunk
337
-
338
- # Handling exception ensure observability without disrupting operation
339
- try:
340
- prompt_tokens = general_tokens(kwargs.get("prompt", ""))
341
- total_tokens = prompt_tokens + completion_tokens
342
- # Calculate cost of the operation
343
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
344
- pricing_info, prompt_tokens, completion_tokens)
345
-
346
- # Set Span attributes
347
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
348
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
349
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
350
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
351
- SemanticConvetion.GEN_AI_TYPE_CHAT)
352
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
353
- gen_ai_endpoint)
354
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
355
- environment)
356
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
357
- application_name)
358
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
359
- kwargs.get("model", "llama3"))
360
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
361
- True)
362
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
363
- prompt_tokens)
364
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
365
- completion_tokens)
366
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
367
- total_tokens)
368
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
369
- cost)
370
- if trace_content:
371
- span.add_event(
372
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
373
- attributes={
374
- # pylint: disable=line-too-long
375
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
376
- },
377
- )
378
- span.add_event(
379
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
380
- attributes={
381
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
382
- },
383
- )
294
+ awaited_wrapped = await wrapped(*args, **kwargs)
295
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
384
296
 
385
- span.set_status(Status(StatusCode.OK))
386
-
387
- if disable_metrics is False:
388
- attributes = {
389
- TELEMETRY_SDK_NAME:
390
- "openlit",
391
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
392
- application_name,
393
- SemanticConvetion.GEN_AI_SYSTEM:
394
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
395
- SemanticConvetion.GEN_AI_ENVIRONMENT:
396
- environment,
397
- SemanticConvetion.GEN_AI_TYPE:
398
- SemanticConvetion.GEN_AI_TYPE_CHAT,
399
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
400
- kwargs.get("model", "llama3")
401
- }
402
-
403
- metrics["genai_requests"].add(1, attributes)
404
- metrics["genai_total_tokens"].add(total_tokens, attributes)
405
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
406
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
407
- metrics["genai_cost"].record(cost, attributes)
408
-
409
- except Exception as e:
410
- handle_exception(span, e)
411
- logger.error("Error in trace creation: %s", e)
412
-
413
- return stream_generator()
297
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
414
298
 
415
299
  # Handling for non-streaming responses
416
300
  else:
417
- # pylint: disable=line-too-long
418
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
301
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
302
+ start_time = time.time()
419
303
  response = await wrapped(*args, **kwargs)
304
+ end_time = time.time()
305
+
306
+ response_dict = response_as_dict(response)
420
307
 
421
308
  try:
422
- # Set base span attribues
309
+ # Format 'messages' into a single string
310
+ message_prompt = kwargs.get("messages", "")
311
+ formatted_messages = []
312
+ for message in message_prompt:
313
+ role = message["role"]
314
+ content = message["content"]
315
+
316
+ if isinstance(content, list):
317
+ content_str = ", ".join(
318
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
319
+ if "type" in item else f'text: {item["text"]}'
320
+ for item in content
321
+ )
322
+ formatted_messages.append(f"{role}: {content_str}")
323
+ else:
324
+ formatted_messages.append(f"{role}: {content}")
325
+ prompt = "\n".join(formatted_messages)
326
+
327
+ input_tokens = response_dict.get('prompt_eval_count')
328
+ output_tokens = response_dict.get('eval_count')
329
+
330
+ # Calculate cost of the operation
331
+ cost = get_chat_model_cost(request_model,
332
+ pricing_info, input_tokens,
333
+ output_tokens)
334
+
335
+ # Set base span attribues (OTel Semconv)
423
336
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
337
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
338
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
424
339
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
425
340
  SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
426
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
427
- SemanticConvetion.GEN_AI_TYPE_CHAT)
428
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
429
- gen_ai_endpoint)
430
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
341
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
342
+ request_model)
343
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
344
+ server_port)
345
+
346
+ # List of attributes and their config keys
347
+ attributes = [
348
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
349
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
350
+ (SemanticConvetion.GEN_AI_REQUEST_SEED, 'seed'),
351
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
352
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
353
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
354
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
355
+ ]
356
+
357
+ # Safely get the options dictionary from kwargs
358
+ options = kwargs.get('options', {})
359
+
360
+ # Set each attribute if the corresponding value exists and is not None
361
+ for attribute, key in attributes:
362
+ # Use dictionary `get` to retrieve values from the options dictionary
363
+ value = options.get(key)
364
+ if value is not None:
365
+ span.set_attribute(attribute, value)
366
+
367
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
368
+ response_dict.get('model'))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
370
+ input_tokens)
371
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
372
+ output_tokens)
373
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
374
+ server_address)
375
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
376
+ [response_dict.get('done_reason')])
377
+ if kwargs.get('format'):
378
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
379
+ 'json')
380
+ else:
381
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
382
+ 'text')
383
+
384
+ # Set base span attribues (Extras)
385
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
431
386
  environment)
432
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
387
+ span.set_attribute(SERVICE_NAME,
433
388
  application_name)
434
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
435
- kwargs.get("model", "llama3"))
436
389
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
437
390
  False)
391
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
392
+ input_tokens + output_tokens)
393
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
394
+ cost)
395
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
396
+ end_time - start_time)
397
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
398
+ version)
438
399
  if trace_content:
439
400
  span.add_event(
440
401
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
441
402
  attributes={
442
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
403
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
443
404
  },
444
405
  )
445
406
  span.add_event(
446
407
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
447
408
  attributes={
448
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response['response'],
409
+ # pylint: disable=line-too-long
410
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('message').get('content')),
449
411
  },
450
412
  )
451
-
452
- prompt_tokens = response["prompt_eval_count"]
453
- completion_tokens = response["eval_count"]
454
- total_tokens = prompt_tokens + completion_tokens
455
- # Calculate cost of the operation
456
- cost = get_chat_model_cost(kwargs.get("model", "llama3"),
457
- pricing_info, prompt_tokens, completion_tokens)
458
-
459
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
460
- prompt_tokens)
461
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
462
- completion_tokens)
463
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
464
- total_tokens)
465
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
466
- [response["done_reason"]])
467
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
468
- cost)
413
+ if kwargs.get('tools'):
414
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
415
+ str(response_dict.get('message').get('tool_calls')))
469
416
 
470
417
  span.set_status(Status(StatusCode.OK))
471
418
 
472
419
  if disable_metrics is False:
473
- attributes = {
474
- TELEMETRY_SDK_NAME:
475
- "openlit",
476
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
477
- application_name,
478
- SemanticConvetion.GEN_AI_SYSTEM:
479
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
480
- SemanticConvetion.GEN_AI_ENVIRONMENT:
481
- environment,
482
- SemanticConvetion.GEN_AI_TYPE:
483
- SemanticConvetion.GEN_AI_TYPE_CHAT,
484
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
485
- kwargs.get("model", "llama3")
486
- }
420
+ attributes = create_metrics_attributes(
421
+ service_name=application_name,
422
+ deployment_environment=environment,
423
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
424
+ system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
425
+ request_model=request_model,
426
+ server_address=server_address,
427
+ server_port=server_port,
428
+ response_model=response_dict.get('model'),
429
+ )
487
430
 
431
+ metrics["genai_client_usage_tokens"].record(
432
+ input_tokens + output_tokens, attributes
433
+ )
434
+ metrics["genai_client_operation_duration"].record(
435
+ end_time - start_time, attributes
436
+ )
437
+ metrics["genai_server_ttft"].record(
438
+ end_time - start_time, attributes
439
+ )
488
440
  metrics["genai_requests"].add(1, attributes)
489
- metrics["genai_total_tokens"].add(total_tokens, attributes)
490
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
491
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
441
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
442
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
492
443
  metrics["genai_cost"].record(cost, attributes)
493
444
 
494
445
  # Return original response
@@ -503,13 +454,12 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
503
454
 
504
455
  return wrapper
505
456
 
506
- def async_embeddings(gen_ai_endpoint, version, environment, application_name,
507
- tracer, pricing_info, trace_content, metrics, disable_metrics):
457
+ def async_embeddings(version, environment, application_name,
458
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
508
459
  """
509
460
  Generates a telemetry wrapper for embeddings to collect metrics.
510
461
 
511
462
  Args:
512
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
513
463
  version: Version of the monitoring package.
514
464
  environment: Deployment environment (e.g., production, staging).
515
465
  application_name: Name of the application using the Ollama API.
@@ -538,71 +488,89 @@ def async_embeddings(gen_ai_endpoint, version, environment, application_name,
538
488
  The response from the original 'embeddings' method.
539
489
  """
540
490
 
541
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
491
+ server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
492
+ request_model = kwargs.get('model', 'all-minilm')
493
+
494
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
495
+
496
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
497
+ start_time = time.time()
542
498
  response = await wrapped(*args, **kwargs)
499
+ end_time = time.time()
543
500
 
544
501
  try:
545
- prompt_tokens = general_tokens(kwargs.get('prompt', ""))
502
+ input_tokens = general_tokens(str(kwargs.get('prompt')))
503
+
546
504
  # Calculate cost of the operation
547
- cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
548
- pricing_info, prompt_tokens)
549
- # Set Span attributes
550
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
505
+ cost = get_embed_model_cost(request_model,
506
+ pricing_info, input_tokens)
507
+
508
+ # Set Span attributes (OTel Semconv)
509
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
510
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
511
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
551
512
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
552
513
  SemanticConvetion.GEN_AI_SYSTEM_OLLAMA)
553
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
554
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
555
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
556
- gen_ai_endpoint)
557
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
514
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
515
+ request_model)
516
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
517
+ request_model)
518
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
519
+ server_address)
520
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
521
+ server_port)
522
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
523
+ input_tokens)
524
+
525
+ # Set Span attributes (Extras)
526
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
558
527
  environment)
559
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
528
+ span.set_attribute(SERVICE_NAME,
560
529
  application_name)
561
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
562
- kwargs.get('model', "llama3"))
563
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
564
- prompt_tokens)
565
530
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
566
- prompt_tokens)
531
+ input_tokens)
567
532
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
568
533
  cost)
534
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
535
+ version)
536
+
569
537
  if trace_content:
570
538
  span.add_event(
571
539
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
572
540
  attributes={
573
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
541
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('prompt', '')),
574
542
  },
575
543
  )
576
544
 
577
545
  span.set_status(Status(StatusCode.OK))
578
546
 
579
547
  if disable_metrics is False:
580
- attributes = {
581
- TELEMETRY_SDK_NAME:
582
- "openlit",
583
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
584
- application_name,
585
- SemanticConvetion.GEN_AI_SYSTEM:
586
- SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
587
- SemanticConvetion.GEN_AI_ENVIRONMENT:
588
- environment,
589
- SemanticConvetion.GEN_AI_TYPE:
590
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
591
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
592
- kwargs.get('model', "llama3")
593
- }
594
-
595
- metrics["genai_requests"].add(1, attributes)
596
- metrics["genai_total_tokens"].add(prompt_tokens, attributes)
597
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
598
- metrics["genai_cost"].record(cost, attributes)
548
+ attributes = create_metrics_attributes(
549
+ service_name=application_name,
550
+ deployment_environment=environment,
551
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
552
+ system=SemanticConvetion.GEN_AI_SYSTEM_OLLAMA,
553
+ request_model=request_model,
554
+ server_address=server_address,
555
+ server_port=server_port,
556
+ response_model=request_model,
557
+ )
558
+ metrics['genai_client_usage_tokens'].record(
559
+ input_tokens, attributes
560
+ )
561
+ metrics['genai_client_operation_duration'].record(
562
+ end_time - start_time, attributes
563
+ )
564
+ metrics['genai_requests'].add(1, attributes)
565
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
566
+ metrics['genai_cost'].record(cost, attributes)
599
567
 
600
568
  # Return original response
601
569
  return response
602
570
 
603
571
  except Exception as e:
604
572
  handle_exception(span, e)
605
- logger.error("Error in trace creation: %s", e)
573
+ logger.error('Error in trace creation: %s', e)
606
574
 
607
575
  # Return original response
608
576
  return response