openlit 1.34.8__py3-none-any.whl → 1.34.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ Ollama OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
5
 
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
7
 
9
8
  from openlit.__helpers import (
@@ -11,22 +10,42 @@ from openlit.__helpers import (
11
10
  response_as_dict,
12
11
  calculate_tbt,
13
12
  general_tokens,
14
- extract_and_format_input,
15
13
  get_chat_model_cost,
16
14
  get_embed_model_cost,
17
- handle_exception,
18
15
  create_metrics_attributes,
19
- otel_event,
20
- concatenate_all_contents
16
+ common_span_attributes,
17
+ record_completion_metrics,
21
18
  )
22
19
  from openlit.semcov import SemanticConvention
23
20
 
21
+ def format_content(messages):
22
+ """
23
+ Process a list of messages to extract content.
24
+ """
25
+
26
+ formatted_messages = []
27
+ for message in messages:
28
+ role = message["role"]
29
+ content = message["content"]
30
+
31
+ if isinstance(content, list):
32
+ content_str = ", ".join(
33
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
34
+ if "type" in item else f'text: {item["text"]}'
35
+ for item in content
36
+ )
37
+ formatted_messages.append(f"{role}: {content_str}")
38
+ else:
39
+ formatted_messages.append(f"{role}: {content}")
40
+
41
+ return "\n".join(formatted_messages)
42
+
24
43
  def process_chunk(self, chunk):
25
44
  """
26
45
  Process a chunk of response data and update state.
27
46
  """
28
47
 
29
- end_time = time.time()
48
+ end_time = time.monotonic()
30
49
  # Record the timestamp for the current chunk
31
50
  self._timestamps.append(end_time)
32
51
 
@@ -35,79 +54,101 @@ def process_chunk(self, chunk):
35
54
  self._ttft = calculate_ttft(self._timestamps, self._start_time)
36
55
 
37
56
  chunked = response_as_dict(chunk)
38
- self._llmresponse += chunked.get('message', {}).get('content', '')
57
+ self._llmresponse += chunked.get("message", {}).get("content", "")
58
+
59
+ if chunked.get("message", {}).get("tool_calls"):
60
+ self._tools = chunked["message"]["tool_calls"]
39
61
 
40
- if chunked.get('message', {}).get('tool_calls'):
41
- self._tool_calls = chunked['message']['tool_calls']
62
+ if chunked.get("eval_count"):
63
+ self._response_role = chunked.get("message", {}).get("role", "")
64
+ self._input_tokens = chunked.get("prompt_eval_count", 0)
65
+ self._output_tokens = chunked.get("eval_count", 0)
66
+ self._response_model = chunked.get("model", "")
67
+ self._finish_reason = chunked.get("done_reason", "")
42
68
 
43
- if chunked.get('eval_count'):
44
- self._response_role = chunked.get('message', {}).get('role', '')
45
- self._input_tokens = chunked.get('prompt_eval_count', 0)
46
- self._output_tokens = chunked.get('eval_count', 0)
47
- self._response_model = chunked.get('model', '')
48
- self._finish_reason = chunked.get('done_reason', '')
69
+ def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
70
+ request_model, response_model, environment, application_name, start_time, end_time, cost, input_tokens):
71
+ """
72
+ Record embedding metrics for the operation.
73
+ """
49
74
 
50
- def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
51
- event_provider, capture_message_content, disable_metrics, version, is_stream):
75
+ attributes = create_metrics_attributes(
76
+ operation=gen_ai_operation,
77
+ system=gen_ai_system,
78
+ server_address=server_address,
79
+ server_port=server_port,
80
+ request_model=request_model,
81
+ response_model=response_model,
82
+ service_name=application_name,
83
+ deployment_environment=environment,
84
+ )
85
+ metrics["genai_client_usage_tokens"].record(input_tokens, attributes)
86
+ metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
87
+ metrics["genai_requests"].add(1, attributes)
88
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
89
+ metrics["genai_cost"].record(cost, attributes)
90
+
91
+ def common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name,
92
+ metrics, capture_message_content, disable_metrics, version):
52
93
  """
53
94
  Process chat request and generate Telemetry
54
95
  """
55
96
 
56
- scope._end_time = time.time()
97
+ scope._end_time = time.monotonic()
57
98
  if len(scope._timestamps) > 1:
58
99
  scope._tbt = calculate_tbt(scope._timestamps)
59
-
60
100
  json_body = scope._kwargs.get("json", {}) or {}
61
- request_model = json_body.get("model") or scope._kwargs.get("model")
62
101
  messages = json_body.get("messages", scope._kwargs.get("messages", ""))
63
- formatted_messages = extract_and_format_input(messages)
102
+ prompt = format_content(messages)
103
+ request_model = json_body.get("model") or scope._kwargs.get("model", "llama3.2")
104
+ is_stream = scope._kwargs.get("stream", False)
64
105
 
65
106
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
66
107
 
67
- # Set Span attributes (OTel Semconv)
68
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OLLAMA)
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
72
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
108
+ # Common Span Attributes
109
+ common_span_attributes(scope,
110
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
111
+ scope._server_address, scope._server_port, request_model, request_model,
112
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
73
113
 
74
- options = scope._kwargs.get('options', {})
114
+ # Span Attributes for Request parameters
115
+ options = json_body.get("options", scope._kwargs.get("options", {}))
75
116
  attributes = [
76
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
77
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
78
- (SemanticConvention.GEN_AI_REQUEST_SEED, 'seed'),
79
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
80
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
81
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
82
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
117
+ (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, "repeat_penalty"),
118
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_tokens"),
119
+ (SemanticConvention.GEN_AI_REQUEST_SEED, "seed"),
120
+ (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, "stop"),
121
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
122
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
123
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
83
124
  ]
84
-
85
125
  for attribute, key in attributes:
86
126
  value = options.get(key)
87
127
  if value is not None:
88
128
  scope._span.set_attribute(attribute, value)
89
129
 
130
+ # Span Attributes for Response parameters
90
131
  scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
91
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
132
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,"text" if isinstance(scope._llmresponse, str) else "json")
133
+
134
+ # Span Attributes for Cost and Tokens
92
135
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
93
136
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
94
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
95
-
96
- scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
97
- "text" if isinstance(scope._llmresponse, str) else "json")
98
-
99
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
100
- scope._span.set_attribute(SERVICE_NAME, application_name)
101
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
102
137
  scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
103
138
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
104
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
105
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
106
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
107
139
 
108
- # To be removed one the change to log events (from span events) is complete
109
- prompt = concatenate_all_contents(formatted_messages)
140
+ # Span Attributes for Tools
141
+ if scope._tools is not None:
142
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function","")).get("name","")
143
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id","")))
144
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("function","").get("arguments","")))
145
+
146
+ # Span Attributes for Content
110
147
  if capture_message_content:
148
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
149
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
150
+
151
+ # To be removed once the change to span_attributes (from span events) is complete
111
152
  scope._span.add_event(
112
153
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
113
154
  attributes={
@@ -121,216 +162,120 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
121
162
  },
122
163
  )
123
164
 
124
- choice_event_body = {
125
- "finish_reason": scope._finish_reason,
126
- "index": 0,
127
- "message": {
128
- **({"content": scope._llmresponse} if capture_message_content else {}),
129
- "role": scope._response_role
130
- }
131
- }
132
-
133
- if scope._tool_calls:
134
- function_call = scope._tool_calls[0]
135
- choice_event_body["message"].update({
136
- "tool_calls": {
137
- "function": {
138
- "name": function_call.get('function', {}).get('name', ''),
139
- "arguments": function_call.get('function', {}).get('arguments', '')
140
- },
141
- "id": function_call.get('id', ''),
142
- "type": "function"
143
- }
144
- })
145
-
146
- # Emit events
147
- for role in ['user', 'system', 'assistant', 'tool']:
148
- if formatted_messages.get(role, {}).get('content', ''):
149
- event = otel_event(
150
- name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
151
- attributes={
152
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
153
- },
154
- body = {
155
- # pylint: disable=line-too-long
156
- **({"content": formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
157
- "role": formatted_messages.get(role, {}).get('role', []),
158
- **({
159
- "tool_calls": {
160
- "function": {
161
- # pylint: disable=line-too-long
162
- "name": (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
163
- "arguments": (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
164
- },
165
- "id": (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
166
- "type": "function"
167
- }
168
- } if role == 'assistant' else {}),
169
- **({
170
- "id": (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
171
- } if role == 'tool' else {})
172
- }
173
- )
174
- event_provider.emit(event)
175
-
176
- choice_event = otel_event(
177
- name=SemanticConvention.GEN_AI_CHOICE,
178
- attributes={
179
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
180
- },
181
- body=choice_event_body
182
- )
183
- event_provider.emit(choice_event)
184
-
185
165
  scope._span.set_status(Status(StatusCode.OK))
186
166
 
167
+ # Metrics
187
168
  if not disable_metrics:
188
- metrics_attributes = create_metrics_attributes(
189
- service_name=application_name,
190
- deployment_environment=environment,
191
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
192
- system=SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
193
- request_model=request_model,
194
- server_address=scope._server_address,
195
- server_port=scope._server_port,
196
- response_model=scope._response_model,
197
- )
169
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
170
+ SemanticConvention.GEN_AI_SYSTEM_OLLAMA, scope._server_address, scope._server_port,
171
+ request_model, scope._response_model, environment, application_name, scope._start_time,
172
+ scope._end_time, cost, scope._input_tokens, scope._output_tokens, scope._tbt, scope._ttft)
173
+
174
+ def common_embedding_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name,
175
+ metrics, capture_message_content, disable_metrics, version):
176
+ """
177
+ Process embedding request and generate Telemetry
178
+ """
179
+
180
+ json_body = scope._kwargs.get("json", {}) or {}
181
+ request_model = json_body.get("model") or scope._kwargs.get("model", "llama3.2")
182
+ prompt_val = json_body.get("prompt", scope._kwargs.get("prompt", ""))
183
+ input_tokens = general_tokens(str(prompt_val))
184
+ is_stream = False # Ollama embeddings are not streaming
185
+
186
+ cost = get_embed_model_cost(request_model, pricing_info, input_tokens)
198
187
 
199
- metrics["genai_client_usage_tokens"].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
200
- metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
201
- metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
202
- metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
203
- metrics["genai_requests"].add(1, metrics_attributes)
204
- metrics["genai_completion_tokens"].add(scope._output_tokens, metrics_attributes)
205
- metrics["genai_prompt_tokens"].add(scope._input_tokens, metrics_attributes)
206
- metrics["genai_cost"].record(cost, metrics_attributes)
188
+ # Common Span Attributes
189
+ common_span_attributes(scope,
190
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
191
+ scope._server_address, scope._server_port, request_model, request_model,
192
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
193
+
194
+ # Span Attributes for Embedding-specific parameters
195
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
196
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens)
197
+
198
+ # Span Attributes for Cost
199
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
200
+
201
+ # Span Attributes for Content
202
+ if capture_message_content:
203
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt_val)
204
+
205
+ scope._span.set_status(Status(StatusCode.OK))
206
+
207
+ # Metrics
208
+ if not disable_metrics:
209
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
210
+ SemanticConvention.GEN_AI_SYSTEM_OLLAMA, scope._server_address, scope._server_port,
211
+ request_model, request_model, environment, application_name, scope._start_time,
212
+ scope._end_time, cost, input_tokens)
207
213
 
208
214
  def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
209
- event_provider, capture_message_content=False, disable_metrics=False, version=''):
215
+ capture_message_content=False, disable_metrics=False, version=""):
210
216
  """
211
- Process chat request and generate Telemetry
217
+ Process streaming chat request and generate Telemetry
212
218
  """
213
219
 
214
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
215
- event_provider, capture_message_content, disable_metrics, version, is_stream=True)
220
+ common_chat_logic(self, "ollama.chat", pricing_info, environment, application_name, metrics,
221
+ capture_message_content, disable_metrics, version)
216
222
 
217
- def process_chat_response(response, request_model, pricing_info, server_port, server_address,
218
- environment, application_name, metrics, event_provider, start_time,
219
- span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
223
+ def process_chat_response(response, gen_ai_endpoint, pricing_info, server_port, server_address,
224
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
225
+ disable_metrics=False, version="1.0.0", **kwargs):
220
226
  """
221
227
  Process chat request and generate Telemetry
222
228
  """
223
229
 
224
- self = type('GenericScope', (), {})()
225
-
226
- # pylint: disable = no-member
227
- self._start_time = start_time
228
- self._end_time = time.time()
229
- self._span = span
230
- self._llmresponse = response.get('message', {}).get('content', '')
231
- self._response_role = response.get('message', {}).get('role', 'assistant')
232
- self._input_tokens = response.get('prompt_eval_count')
233
- self._output_tokens = response.get('eval_count')
234
- self._response_model = response.get('model', '')
235
- self._finish_reason = response.get('done_reason', '')
236
- self._timestamps = []
237
- self._ttft, self._tbt = self._end_time - self._start_time, 0
238
- self._server_address, self._server_port = server_address, server_port
239
- self._kwargs = kwargs
240
- self._tool_calls = response.get('message', {}).get('tool_calls', [])
241
-
242
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
243
- event_provider, capture_message_content, disable_metrics, version, is_stream=False)
230
+ scope = type("GenericScope", (), {})()
231
+ response_dict = response_as_dict(response)
232
+
233
+ scope._start_time = start_time
234
+ scope._end_time = time.monotonic()
235
+ scope._span = span
236
+ scope._llmresponse = response_dict.get("message", {}).get("content", "")
237
+ scope._response_role = response_dict.get("message", {}).get("role", "assistant")
238
+ scope._input_tokens = response_dict.get("prompt_eval_count", 0)
239
+ scope._output_tokens = response_dict.get("eval_count", 0)
240
+ scope._response_model = response_dict.get("model", "llama3.2")
241
+ scope._finish_reason = response_dict.get("done_reason", "")
242
+ scope._timestamps = []
243
+ scope._ttft = scope._end_time - scope._start_time
244
+ scope._tbt = 0
245
+ scope._server_address, scope._server_port = server_address, server_port
246
+ scope._kwargs = kwargs
247
+
248
+ if scope._kwargs.get("tools"):
249
+ scope._tools = response_dict.get("choices")[0].get("message").get("tool_calls")
250
+ else:
251
+ scope._tools = None
252
+
253
+ common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name, metrics,
254
+ capture_message_content, disable_metrics, version)
244
255
 
245
256
  return response
246
257
 
247
- def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
248
- environment, application_name, metrics, event_provider,
249
- start_time, span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
258
+ def process_embedding_response(response, gen_ai_endpoint, pricing_info, server_port, server_address,
259
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
260
+ disable_metrics=False, version="1.0.0", **kwargs):
250
261
  """
251
262
  Process embedding request and generate Telemetry
252
263
  """
253
264
 
254
- end_time = time.time()
255
-
256
- try:
257
- json_body = kwargs.get("json", {}) or {}
258
- prompt_val = json_body.get('prompt', kwargs.get('prompt', ''))
259
- input_tokens = general_tokens(str(prompt_val))
260
-
261
- # Calculate cost of the operation
262
- cost = get_embed_model_cost(request_model,
263
- pricing_info, input_tokens)
264
-
265
- # Set Span attributes (OTel Semconv)
266
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
267
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
268
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
269
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
270
- SemanticConvention.GEN_AI_SYSTEM_OLLAMA)
271
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
272
- request_model)
273
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
274
- request_model)
275
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
276
- server_address)
277
- span.set_attribute(SemanticConvention.SERVER_PORT,
278
- server_port)
279
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
280
- input_tokens)
281
-
282
- # Set Span attributes (Extras)
283
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
284
- environment)
285
- span.set_attribute(SERVICE_NAME,
286
- application_name)
287
- span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
288
- input_tokens)
289
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
290
- cost)
291
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
292
- version)
293
-
294
- prompt_event = otel_event(
295
- name=SemanticConvention.GEN_AI_USER_MESSAGE,
296
- attributes={
297
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
298
- },
299
- body={
300
- **({"content": prompt_val} if capture_message_content else {}),
301
- "role": 'user'
302
- }
303
- )
304
- event_provider.emit(prompt_event)
305
-
306
- span.set_status(Status(StatusCode.OK))
307
-
308
- if disable_metrics is False:
309
- attributes = create_metrics_attributes(
310
- service_name=application_name,
311
- deployment_environment=environment,
312
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
313
- system=SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
314
- request_model=request_model,
315
- server_address=server_address,
316
- server_port=server_port,
317
- response_model=request_model,
318
- )
319
- metrics['genai_client_usage_tokens'].record(
320
- input_tokens, attributes
321
- )
322
- metrics['genai_client_operation_duration'].record(
323
- end_time - start_time, attributes
324
- )
325
- metrics['genai_requests'].add(1, attributes)
326
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
327
- metrics['genai_cost'].record(cost, attributes)
265
+ scope = type("GenericScope", (), {})()
328
266
 
329
- # Return original response
330
- return response
267
+ scope._start_time = start_time
268
+ scope._end_time = time.monotonic()
269
+ scope._span = span
270
+ scope._server_address, scope._server_port = server_address, server_port
271
+ scope._kwargs = kwargs
331
272
 
332
- except Exception as e:
333
- handle_exception(span, e)
273
+ # Initialize streaming and timing values for Ollama embeddings
274
+ scope._response_model = kwargs.get("model", "llama3.2")
275
+ scope._tbt = 0.0
276
+ scope._ttft = scope._end_time - scope._start_time
334
277
 
335
- # Return original response
336
- return response
278
+ common_embedding_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name,
279
+ metrics, capture_message_content, disable_metrics, version)
280
+
281
+ return response
@@ -3,7 +3,6 @@ PremAI OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
5
 
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
7
 
9
8
  from openlit.__helpers import (
@@ -13,7 +12,9 @@ from openlit.__helpers import (
13
12
  get_chat_model_cost,
14
13
  get_embed_model_cost,
15
14
  general_tokens,
16
- create_metrics_attributes,
15
+ common_span_attributes,
16
+ record_completion_metrics,
17
+ record_embedding_metrics,
17
18
  )
18
19
  from openlit.semcov import SemanticConvention
19
20
 
@@ -66,77 +67,6 @@ def process_chunk(scope, chunk):
66
67
  scope._response_model = chunked.get("model")
67
68
  scope._end_time = time.time()
68
69
 
69
- def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_address, server_port,
70
- request_model, response_model, environment, application_name, is_stream, tbt, ttft, version):
71
- """
72
- Set common span attributes for both chat and RAG operations.
73
- """
74
-
75
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
76
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, gen_ai_operation)
77
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, gen_ai_system)
78
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
79
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
80
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
81
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
82
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
83
- scope._span.set_attribute(SERVICE_NAME, application_name)
84
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
85
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
86
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
87
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
88
-
89
- def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
90
- request_model, response_model, environment, application_name, start_time, end_time,
91
- input_tokens, output_tokens, cost, tbt=None, ttft=None):
92
- """
93
- Record completion-specific metrics for the operation.
94
- """
95
-
96
- attributes = create_metrics_attributes(
97
- operation=gen_ai_operation,
98
- system=gen_ai_system,
99
- server_address=server_address,
100
- server_port=server_port,
101
- request_model=request_model,
102
- response_model=response_model,
103
- service_name=application_name,
104
- deployment_environment=environment,
105
- )
106
- metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
107
- metrics["genai_requests"].add(1, attributes)
108
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
109
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
110
- metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
111
- metrics["genai_cost"].record(cost, attributes)
112
- if tbt is not None:
113
- metrics["genai_server_tbt"].record(tbt, attributes)
114
- if ttft is not None:
115
- metrics["genai_server_ttft"].record(ttft, attributes)
116
-
117
- def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
118
- request_model, response_model, environment, application_name, start_time, end_time,
119
- input_tokens, cost):
120
- """
121
- Record embedding-specific metrics for the operation.
122
- """
123
-
124
- attributes = create_metrics_attributes(
125
- operation=gen_ai_operation,
126
- system=gen_ai_system,
127
- server_address=server_address,
128
- server_port=server_port,
129
- request_model=request_model,
130
- response_model=response_model,
131
- service_name=application_name,
132
- deployment_environment=environment,
133
- )
134
- metrics["genai_client_usage_tokens"].record(input_tokens, attributes)
135
- metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
136
- metrics["genai_requests"].add(1, attributes)
137
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
138
- metrics["genai_cost"].record(cost, attributes)
139
-
140
70
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
141
71
  capture_message_content, disable_metrics, version, is_stream):
142
72
  """