openlit 1.34.7__py3-none-any.whl → 1.34.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@ Ollama OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
5
 
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
7
 
9
8
  from openlit.__helpers import (
@@ -11,22 +10,42 @@ from openlit.__helpers import (
11
10
  response_as_dict,
12
11
  calculate_tbt,
13
12
  general_tokens,
14
- extract_and_format_input,
15
13
  get_chat_model_cost,
16
14
  get_embed_model_cost,
17
- handle_exception,
18
15
  create_metrics_attributes,
19
- otel_event,
20
- concatenate_all_contents
16
+ common_span_attributes,
17
+ record_completion_metrics,
21
18
  )
22
19
  from openlit.semcov import SemanticConvention
23
20
 
21
+ def format_content(messages):
22
+ """
23
+ Process a list of messages to extract content.
24
+ """
25
+
26
+ formatted_messages = []
27
+ for message in messages:
28
+ role = message["role"]
29
+ content = message["content"]
30
+
31
+ if isinstance(content, list):
32
+ content_str = ", ".join(
33
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
34
+ if "type" in item else f'text: {item["text"]}'
35
+ for item in content
36
+ )
37
+ formatted_messages.append(f"{role}: {content_str}")
38
+ else:
39
+ formatted_messages.append(f"{role}: {content}")
40
+
41
+ return "\n".join(formatted_messages)
42
+
24
43
  def process_chunk(self, chunk):
25
44
  """
26
45
  Process a chunk of response data and update state.
27
46
  """
28
47
 
29
- end_time = time.time()
48
+ end_time = time.monotonic()
30
49
  # Record the timestamp for the current chunk
31
50
  self._timestamps.append(end_time)
32
51
 
@@ -35,79 +54,101 @@ def process_chunk(self, chunk):
35
54
  self._ttft = calculate_ttft(self._timestamps, self._start_time)
36
55
 
37
56
  chunked = response_as_dict(chunk)
38
- self._llmresponse += chunked.get('message', {}).get('content', '')
57
+ self._llmresponse += chunked.get("message", {}).get("content", "")
58
+
59
+ if chunked.get("message", {}).get("tool_calls"):
60
+ self._tool_calls = chunked["message"]["tool_calls"]
39
61
 
40
- if chunked.get('message', {}).get('tool_calls'):
41
- self._tool_calls = chunked['message']['tool_calls']
62
+ if chunked.get("eval_count"):
63
+ self._response_role = chunked.get("message", {}).get("role", "")
64
+ self._input_tokens = chunked.get("prompt_eval_count", 0)
65
+ self._output_tokens = chunked.get("eval_count", 0)
66
+ self._response_model = chunked.get("model", "")
67
+ self._finish_reason = chunked.get("done_reason", "")
42
68
 
43
- if chunked.get('eval_count'):
44
- self._response_role = chunked.get('message', {}).get('role', '')
45
- self._input_tokens = chunked.get('prompt_eval_count', 0)
46
- self._output_tokens = chunked.get('eval_count', 0)
47
- self._response_model = chunked.get('model', '')
48
- self._finish_reason = chunked.get('done_reason', '')
69
+ def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
70
+ request_model, response_model, environment, application_name, start_time, end_time, cost, input_tokens):
71
+ """
72
+ Record embedding metrics for the operation.
73
+ """
49
74
 
50
- def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
51
- event_provider, capture_message_content, disable_metrics, version, is_stream):
75
+ attributes = create_metrics_attributes(
76
+ operation=gen_ai_operation,
77
+ system=gen_ai_system,
78
+ server_address=server_address,
79
+ server_port=server_port,
80
+ request_model=request_model,
81
+ response_model=response_model,
82
+ service_name=application_name,
83
+ deployment_environment=environment,
84
+ )
85
+ metrics["genai_client_usage_tokens"].record(input_tokens, attributes)
86
+ metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
87
+ metrics["genai_requests"].add(1, attributes)
88
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
89
+ metrics["genai_cost"].record(cost, attributes)
90
+
91
+ def common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name,
92
+ metrics, capture_message_content, disable_metrics, version):
52
93
  """
53
94
  Process chat request and generate Telemetry
54
95
  """
55
96
 
56
- scope._end_time = time.time()
97
+ scope._end_time = time.monotonic()
57
98
  if len(scope._timestamps) > 1:
58
99
  scope._tbt = calculate_tbt(scope._timestamps)
59
-
60
100
  json_body = scope._kwargs.get("json", {}) or {}
61
- request_model = json_body.get("model") or scope._kwargs.get("model")
62
101
  messages = json_body.get("messages", scope._kwargs.get("messages", ""))
63
- formatted_messages = extract_and_format_input(messages)
102
+ prompt = format_content(messages)
103
+ request_model = json_body.get("model") or scope._kwargs.get("model", "llama3.2")
104
+ is_stream = scope._kwargs.get("stream", False)
64
105
 
65
106
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
66
107
 
67
- # Set Span attributes (OTel Semconv)
68
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OLLAMA)
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
72
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
108
+ # Common Span Attributes
109
+ common_span_attributes(scope,
110
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
111
+ scope._server_address, scope._server_port, request_model, request_model,
112
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
73
113
 
74
- options = scope._kwargs.get('options', {})
114
+ # Span Attributes for Request parameters
115
+ options = json_body.get("options", scope._kwargs.get("options", {}))
75
116
  attributes = [
76
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'repeat_penalty'),
77
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
78
- (SemanticConvention.GEN_AI_REQUEST_SEED, 'seed'),
79
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop'),
80
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
81
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
82
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
117
+ (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, "repeat_penalty"),
118
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_tokens"),
119
+ (SemanticConvention.GEN_AI_REQUEST_SEED, "seed"),
120
+ (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, "stop"),
121
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
122
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
123
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
83
124
  ]
84
-
85
125
  for attribute, key in attributes:
86
126
  value = options.get(key)
87
127
  if value is not None:
88
128
  scope._span.set_attribute(attribute, value)
89
129
 
130
+ # Span Attributes for Response parameters
90
131
  scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
91
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
132
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,"text" if isinstance(scope._llmresponse, str) else "json")
133
+
134
+ # Span Attributes for Cost and Tokens
92
135
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
93
136
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
94
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
95
-
96
- scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
97
- "text" if isinstance(scope._llmresponse, str) else "json")
98
-
99
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
100
- scope._span.set_attribute(SERVICE_NAME, application_name)
101
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
102
137
  scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
103
138
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
104
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
105
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
106
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
107
139
 
108
- # To be removed one the change to log events (from span events) is complete
109
- prompt = concatenate_all_contents(formatted_messages)
140
+ # Span Attributes for Tools
141
+ if hasattr(scope, "_tool_calls") and scope._tool_calls:
142
+ tool_call = scope._tool_calls[0]
143
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, tool_call.get("function", {}).get("name", ""))
144
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(tool_call.get("function", {}).get("arguments", "")))
145
+
146
+ # Span Attributes for Content
110
147
  if capture_message_content:
148
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
149
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
150
+
151
+ # To be removed once the change to span_attributes (from span events) is complete
111
152
  scope._span.add_event(
112
153
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
113
154
  attributes={
@@ -121,216 +162,116 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
121
162
  },
122
163
  )
123
164
 
124
- choice_event_body = {
125
- "finish_reason": scope._finish_reason,
126
- "index": 0,
127
- "message": {
128
- **({"content": scope._llmresponse} if capture_message_content else {}),
129
- "role": scope._response_role
130
- }
131
- }
132
-
133
- if scope._tool_calls:
134
- function_call = scope._tool_calls[0]
135
- choice_event_body["message"].update({
136
- "tool_calls": {
137
- "function": {
138
- "name": function_call.get('function', {}).get('name', ''),
139
- "arguments": function_call.get('function', {}).get('arguments', '')
140
- },
141
- "id": function_call.get('id', ''),
142
- "type": "function"
143
- }
144
- })
145
-
146
- # Emit events
147
- for role in ['user', 'system', 'assistant', 'tool']:
148
- if formatted_messages.get(role, {}).get('content', ''):
149
- event = otel_event(
150
- name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
151
- attributes={
152
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
153
- },
154
- body = {
155
- # pylint: disable=line-too-long
156
- **({"content": formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
157
- "role": formatted_messages.get(role, {}).get('role', []),
158
- **({
159
- "tool_calls": {
160
- "function": {
161
- # pylint: disable=line-too-long
162
- "name": (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
163
- "arguments": (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
164
- },
165
- "id": (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
166
- "type": "function"
167
- }
168
- } if role == 'assistant' else {}),
169
- **({
170
- "id": (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
171
- } if role == 'tool' else {})
172
- }
173
- )
174
- event_provider.emit(event)
175
-
176
- choice_event = otel_event(
177
- name=SemanticConvention.GEN_AI_CHOICE,
178
- attributes={
179
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
180
- },
181
- body=choice_event_body
182
- )
183
- event_provider.emit(choice_event)
184
-
185
165
  scope._span.set_status(Status(StatusCode.OK))
186
166
 
167
+ # Metrics
187
168
  if not disable_metrics:
188
- metrics_attributes = create_metrics_attributes(
189
- service_name=application_name,
190
- deployment_environment=environment,
191
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
192
- system=SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
193
- request_model=request_model,
194
- server_address=scope._server_address,
195
- server_port=scope._server_port,
196
- response_model=scope._response_model,
197
- )
169
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
170
+ SemanticConvention.GEN_AI_SYSTEM_OLLAMA, scope._server_address, scope._server_port,
171
+ request_model, scope._response_model, environment, application_name, scope._start_time,
172
+ scope._end_time, cost, scope._input_tokens, scope._output_tokens, scope._tbt, scope._ttft)
173
+
174
+ def common_embedding_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name,
175
+ metrics, capture_message_content, disable_metrics, version):
176
+ """
177
+ Process embedding request and generate Telemetry
178
+ """
179
+
180
+ json_body = scope._kwargs.get("json", {}) or {}
181
+ request_model = json_body.get("model") or scope._kwargs.get("model", "llama3.2")
182
+ prompt_val = json_body.get("prompt", scope._kwargs.get("prompt", ""))
183
+ input_tokens = general_tokens(str(prompt_val))
184
+ is_stream = False # Ollama embeddings are not streaming
185
+
186
+ cost = get_embed_model_cost(request_model, pricing_info, input_tokens)
198
187
 
199
- metrics["genai_client_usage_tokens"].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
200
- metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
201
- metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
202
- metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
203
- metrics["genai_requests"].add(1, metrics_attributes)
204
- metrics["genai_completion_tokens"].add(scope._output_tokens, metrics_attributes)
205
- metrics["genai_prompt_tokens"].add(scope._input_tokens, metrics_attributes)
206
- metrics["genai_cost"].record(cost, metrics_attributes)
188
+ # Common Span Attributes
189
+ common_span_attributes(scope,
190
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
191
+ scope._server_address, scope._server_port, request_model, request_model,
192
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
193
+
194
+ # Span Attributes for Embedding-specific parameters
195
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
196
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens)
197
+
198
+ # Span Attributes for Cost
199
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
200
+
201
+ # Span Attributes for Content
202
+ if capture_message_content:
203
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt_val)
204
+
205
+ scope._span.set_status(Status(StatusCode.OK))
206
+
207
+ # Metrics
208
+ if not disable_metrics:
209
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
210
+ SemanticConvention.GEN_AI_SYSTEM_OLLAMA, scope._server_address, scope._server_port,
211
+ request_model, request_model, environment, application_name, scope._start_time,
212
+ scope._end_time, cost, input_tokens)
207
213
 
208
214
  def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
209
- event_provider, capture_message_content=False, disable_metrics=False, version=''):
215
+ capture_message_content=False, disable_metrics=False, version=""):
210
216
  """
211
- Process chat request and generate Telemetry
217
+ Process streaming chat request and generate Telemetry
212
218
  """
213
219
 
214
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
215
- event_provider, capture_message_content, disable_metrics, version, is_stream=True)
220
+ common_chat_logic(self, "ollama.chat", pricing_info, environment, application_name, metrics,
221
+ capture_message_content, disable_metrics, version)
216
222
 
217
- def process_chat_response(response, request_model, pricing_info, server_port, server_address,
218
- environment, application_name, metrics, event_provider, start_time,
219
- span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
223
+ def process_chat_response(response, gen_ai_endpoint, pricing_info, server_port, server_address,
224
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
225
+ disable_metrics=False, version="1.0.0", **kwargs):
220
226
  """
221
227
  Process chat request and generate Telemetry
222
228
  """
223
229
 
224
- self = type('GenericScope', (), {})()
225
-
226
- # pylint: disable = no-member
227
- self._start_time = start_time
228
- self._end_time = time.time()
229
- self._span = span
230
- self._llmresponse = response.get('message', {}).get('content', '')
231
- self._response_role = response.get('message', {}).get('role', 'assistant')
232
- self._input_tokens = response.get('prompt_eval_count')
233
- self._output_tokens = response.get('eval_count')
234
- self._response_model = response.get('model', '')
235
- self._finish_reason = response.get('done_reason', '')
236
- self._timestamps = []
237
- self._ttft, self._tbt = self._end_time - self._start_time, 0
238
- self._server_address, self._server_port = server_address, server_port
239
- self._kwargs = kwargs
240
- self._tool_calls = response.get('message', {}).get('tool_calls', [])
241
-
242
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
243
- event_provider, capture_message_content, disable_metrics, version, is_stream=False)
230
+ scope = type("GenericScope", (), {})()
231
+ response_dict = response_as_dict(response)
232
+
233
+ scope._start_time = start_time
234
+ scope._end_time = time.monotonic()
235
+ scope._span = span
236
+ scope._llmresponse = response_dict.get("message", {}).get("content", "")
237
+ scope._response_role = response_dict.get("message", {}).get("role", "assistant")
238
+ scope._input_tokens = response_dict.get("prompt_eval_count", 0)
239
+ scope._output_tokens = response_dict.get("eval_count", 0)
240
+ scope._response_model = response_dict.get("model", "llama3.2")
241
+ scope._finish_reason = response_dict.get("done_reason", "")
242
+ scope._timestamps = []
243
+ scope._ttft = scope._end_time - scope._start_time
244
+ scope._tbt = 0
245
+ scope._server_address, scope._server_port = server_address, server_port
246
+ scope._kwargs = kwargs
247
+ scope._tool_calls = response_dict.get("message", {}).get("tool_calls", [])
248
+
249
+ common_chat_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name, metrics,
250
+ capture_message_content, disable_metrics, version)
244
251
 
245
252
  return response
246
253
 
247
- def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
248
- environment, application_name, metrics, event_provider,
249
- start_time, span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
254
+ def process_embedding_response(response, gen_ai_endpoint, pricing_info, server_port, server_address,
255
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
256
+ disable_metrics=False, version="1.0.0", **kwargs):
250
257
  """
251
258
  Process embedding request and generate Telemetry
252
259
  """
253
260
 
254
- end_time = time.time()
255
-
256
- try:
257
- json_body = kwargs.get("json", {}) or {}
258
- prompt_val = json_body.get('prompt', kwargs.get('prompt', ''))
259
- input_tokens = general_tokens(str(prompt_val))
260
-
261
- # Calculate cost of the operation
262
- cost = get_embed_model_cost(request_model,
263
- pricing_info, input_tokens)
264
-
265
- # Set Span attributes (OTel Semconv)
266
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
267
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
268
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
269
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
270
- SemanticConvention.GEN_AI_SYSTEM_OLLAMA)
271
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
272
- request_model)
273
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
274
- request_model)
275
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
276
- server_address)
277
- span.set_attribute(SemanticConvention.SERVER_PORT,
278
- server_port)
279
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
280
- input_tokens)
281
-
282
- # Set Span attributes (Extras)
283
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
284
- environment)
285
- span.set_attribute(SERVICE_NAME,
286
- application_name)
287
- span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
288
- input_tokens)
289
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
290
- cost)
291
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
292
- version)
293
-
294
- prompt_event = otel_event(
295
- name=SemanticConvention.GEN_AI_USER_MESSAGE,
296
- attributes={
297
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
298
- },
299
- body={
300
- **({"content": prompt_val} if capture_message_content else {}),
301
- "role": 'user'
302
- }
303
- )
304
- event_provider.emit(prompt_event)
305
-
306
- span.set_status(Status(StatusCode.OK))
307
-
308
- if disable_metrics is False:
309
- attributes = create_metrics_attributes(
310
- service_name=application_name,
311
- deployment_environment=environment,
312
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
313
- system=SemanticConvention.GEN_AI_SYSTEM_OLLAMA,
314
- request_model=request_model,
315
- server_address=server_address,
316
- server_port=server_port,
317
- response_model=request_model,
318
- )
319
- metrics['genai_client_usage_tokens'].record(
320
- input_tokens, attributes
321
- )
322
- metrics['genai_client_operation_duration'].record(
323
- end_time - start_time, attributes
324
- )
325
- metrics['genai_requests'].add(1, attributes)
326
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
327
- metrics['genai_cost'].record(cost, attributes)
261
+ scope = type("GenericScope", (), {})()
328
262
 
329
- # Return original response
330
- return response
263
+ scope._start_time = start_time
264
+ scope._end_time = time.monotonic()
265
+ scope._span = span
266
+ scope._server_address, scope._server_port = server_address, server_port
267
+ scope._kwargs = kwargs
331
268
 
332
- except Exception as e:
333
- handle_exception(span, e)
269
+ # Initialize streaming and timing values for Ollama embeddings
270
+ scope._response_model = kwargs.get("model", "llama3.2")
271
+ scope._tbt = 0.0
272
+ scope._ttft = scope._end_time - scope._start_time
334
273
 
335
- # Return original response
336
- return response
274
+ common_embedding_logic(scope, gen_ai_endpoint, pricing_info, environment, application_name,
275
+ metrics, capture_message_content, disable_metrics, version)
276
+
277
+ return response
@@ -20,8 +20,8 @@ class PremAIInstrumentor(BaseInstrumentor):
20
20
  return _instruments
21
21
 
22
22
  def _instrument(self, **kwargs):
23
- application_name = kwargs.get("application_name", "default_application")
24
- environment = kwargs.get("environment", "default_environment")
23
+ application_name = kwargs.get("application_name", "default")
24
+ environment = kwargs.get("environment", "default")
25
25
  tracer = kwargs.get("tracer")
26
26
  metrics = kwargs.get("metrics_dict")
27
27
  pricing_info = kwargs.get("pricing_info", {})
@@ -64,6 +64,7 @@ def process_chunk(scope, chunk):
64
64
  scope._finish_reason = chunked.get("choices")[0].get("finish_reason")
65
65
  scope._response_id = chunked.get("id")
66
66
  scope._response_model = chunked.get("model")
67
+ scope._end_time = time.time()
67
68
 
68
69
  def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_address, server_port,
69
70
  request_model, response_model, environment, application_name, is_stream, tbt, ttft, version):
@@ -77,12 +78,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
77
78
  scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
78
79
  scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
79
80
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
80
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
81
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
81
82
  scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
82
83
  scope._span.set_attribute(SERVICE_NAME, application_name)
83
84
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
84
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
85
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
85
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
86
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
86
87
  scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
87
88
 
88
89
  def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
@@ -48,12 +48,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
48
48
  scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
49
49
  scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
50
50
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
51
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
51
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
52
52
  scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
53
53
  scope._span.set_attribute(SERVICE_NAME, application_name)
54
54
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
55
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
56
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
55
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
56
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
57
57
  scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
58
58
 
59
59
  def record_common_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
@@ -80,12 +80,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
80
80
  scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
81
81
  scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
82
82
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
83
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
83
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
84
84
  scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
85
85
  scope._span.set_attribute(SERVICE_NAME, application_name)
86
86
  scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
87
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
88
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
87
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
88
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
89
89
  scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
90
90
 
91
91
  def record_common_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.34.7
3
+ Version: 1.34.10
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu