openlit 1.34.3__py3-none-any.whl → 1.34.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,9 +14,7 @@ from openlit.__helpers import (
14
14
  general_tokens,
15
15
  extract_and_format_input,
16
16
  get_chat_model_cost,
17
- handle_exception,
18
17
  create_metrics_attributes,
19
- otel_event,
20
18
  concatenate_all_contents
21
19
  )
22
20
  from openlit.semcov import SemanticConvention
@@ -29,36 +27,38 @@ def setup_common_span_attributes(span, request_model, kwargs, tokens,
29
27
  """
30
28
 
31
29
  # Base attributes from SDK and operation settings.
32
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
30
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
33
31
  span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
34
32
  span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AI21)
35
33
  span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
36
34
  span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
37
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, kwargs.get('seed', ''))
38
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get('frequency_penalty', 0.0))
39
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get('max_tokens', -1))
40
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get('presence_penalty', 0.0))
41
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get('stop', []))
42
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, kwargs.get('temperature', 0.4))
43
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, kwargs.get('top_p', 1.0))
35
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, kwargs.get("seed", ""))
36
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get("frequency_penalty", 0.0))
37
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get("max_tokens", -1))
38
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get("presence_penalty", 0.0))
39
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get("stop", []))
40
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, kwargs.get("temperature", 0.4))
41
+ span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, kwargs.get("top_p", 1.0))
44
42
 
45
43
  # Add token-related attributes if available.
46
- if 'finish_reason' in tokens:
47
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [tokens['finish_reason']])
48
- if 'response_id' in tokens:
49
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, tokens['response_id'])
50
- if 'input_tokens' in tokens:
51
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, tokens['input_tokens'])
52
- if 'output_tokens' in tokens:
53
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, tokens['output_tokens'])
54
- if 'total_tokens' in tokens:
55
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, tokens['total_tokens'])
44
+ if "finish_reason" in tokens:
45
+ span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [tokens["finish_reason"]])
46
+ if "response_id" in tokens:
47
+ span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, tokens["response_id"])
48
+ if "input_tokens" in tokens:
49
+ span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, tokens["input_tokens"])
50
+ if "output_tokens" in tokens:
51
+ span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, tokens["output_tokens"])
52
+ if "total_tokens" in tokens:
53
+ span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, tokens["total_tokens"])
56
54
 
57
55
  span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
58
56
  span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
57
+
59
58
  # Environment and service identifiers.
60
59
  span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
61
60
  span.set_attribute(SERVICE_NAME, application_name)
61
+
62
62
  # Set any extra attributes passed in.
63
63
  for key, value in extra_attrs.items():
64
64
  span.set_attribute(key, value)
@@ -80,106 +80,15 @@ def record_common_metrics(metrics, application_name, environment, request_model,
80
80
  server_port=server_port,
81
81
  response_model=request_model,
82
82
  )
83
- metrics['genai_client_usage_tokens'].record(input_tokens + output_tokens, attributes)
84
- metrics['genai_client_operation_duration'].record(end_time - start_time, attributes)
83
+ metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
84
+ metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
85
85
  if include_tbt and tbt_value is not None:
86
- metrics['genai_server_tbt'].record(tbt_value, attributes)
87
- metrics['genai_server_ttft'].record(end_time - start_time, attributes)
88
- metrics['genai_requests'].add(1, attributes)
89
- metrics['genai_completion_tokens'].add(output_tokens, attributes)
90
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
91
- metrics['genai_cost'].record(cost, attributes)
92
-
93
- def emit_common_events(event_provider, choices, finish_reason, llmresponse, formatted_messages,
94
- capture_message_content, n):
95
- """
96
- Emit events common to both chat and chat rag operations.
97
- """
98
-
99
- if n > 1:
100
- for choice in choices:
101
- choice_event_body = {
102
- 'finish_reason': finish_reason,
103
- 'index': choice.get('index', 0),
104
- 'message': {
105
- **({'content': choice.get('message', {}).get('content', '')} if capture_message_content else {}),
106
- 'role': choice.get('message', {}).get('role', 'assistant')
107
- }
108
- }
109
- # If tool calls exist, emit an event for each tool call.
110
- tool_calls = choice.get('message', {}).get('tool_calls')
111
- if tool_calls:
112
- for tool_call in tool_calls:
113
- choice_event_body['message'].update({
114
- 'tool_calls': {
115
- 'function': {
116
- 'name': tool_call.get('function', {}).get('name', ''),
117
- 'arguments': tool_call.get('function', {}).get('arguments', '')
118
- },
119
- 'id': tool_call.get('id', ''),
120
- 'type': tool_call.get('type', 'function')
121
- }
122
- })
123
- event = otel_event(
124
- name=SemanticConvention.GEN_AI_CHOICE,
125
- attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
126
- body=choice_event_body
127
- )
128
- event_provider.emit(event)
129
- else:
130
- event = otel_event(
131
- name=SemanticConvention.GEN_AI_CHOICE,
132
- attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
133
- body=choice_event_body
134
- )
135
- event_provider.emit(event)
136
- else:
137
- # Single choice case.
138
- choice_event_body = {
139
- 'finish_reason': finish_reason,
140
- 'index': 0,
141
- 'message': {
142
- **({'content': llmresponse} if capture_message_content else {}),
143
- 'role': 'assistant'
144
- }
145
- }
146
- event = otel_event(
147
- name=SemanticConvention.GEN_AI_CHOICE,
148
- attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
149
- body=choice_event_body
150
- )
151
- event_provider.emit(event)
152
-
153
- # Emit additional role-based events (if formatted messages are available).
154
- for role in ['user', 'system', 'assistant', 'tool']:
155
- msg = formatted_messages.get(role, {})
156
- if msg.get('content', ''):
157
- event_body = {
158
- **({'content': msg.get('content', '')} if capture_message_content else {}),
159
- 'role': msg.get('role', [])
160
- }
161
- # For assistant messages, attach tool call details if they exist.
162
- if role == 'assistant' and choices:
163
- tool_calls = choices[0].get('message', {}).get('tool_calls', [])
164
- if tool_calls:
165
- event_body['tool_calls'] = {
166
- 'function': {
167
- 'name': tool_calls[0].get('function', {}).get('name', ''),
168
- 'arguments': tool_calls[0].get('function', {}).get('arguments', '')
169
- },
170
- 'id': tool_calls[0].get('id', ''),
171
- 'type': 'function'
172
- }
173
- if role == 'tool' and choices:
174
- tool_calls = choices[0].get('message', {}).get('tool_calls', [])
175
- if tool_calls:
176
- event_body['id'] = tool_calls[0].get('id', '')
177
- event = otel_event(
178
- name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
179
- attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
180
- body=event_body
181
- )
182
- event_provider.emit(event)
86
+ metrics["genai_server_tbt"].record(tbt_value, attributes)
87
+ metrics["genai_server_ttft"].record(end_time - start_time, attributes)
88
+ metrics["genai_requests"].add(1, attributes)
89
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
90
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
91
+ metrics["genai_cost"].record(cost, attributes)
183
92
 
184
93
  def process_chunk(self, chunk):
185
94
  """
@@ -194,21 +103,20 @@ def process_chunk(self, chunk):
194
103
  self._ttft = calculate_ttft(self._timestamps, self._start_time)
195
104
 
196
105
  chunked = response_as_dict(chunk)
197
- if (len(chunked.get('choices')) > 0 and
198
- 'delta' in chunked.get('choices')[0] and
199
- 'content' in chunked.get('choices')[0].get('delta')):
200
- content = chunked.get('choices')[0].get('delta').get('content')
201
- if content:
106
+ if (len(chunked.get("choices")) > 0 and
107
+ "delta" in chunked.get("choices")[0] and
108
+ "content" in chunked.get("choices")[0].get("delta")):
109
+ if content := chunked.get("choices")[0].get("delta").get("content"):
202
110
  self._llmresponse += content
203
- if chunked.get('usage'):
204
- self._input_tokens = chunked.get('usage').get('prompt_tokens')
205
- self._output_tokens = chunked.get('usage').get('completion_tokens')
206
- self._response_id = chunked.get('id')
207
- self._choices += chunked.get('choices')
208
- self._finish_reason = chunked.get('choices')[0].get('finish_reason')
111
+ if chunked.get("usage"):
112
+ self._input_tokens = chunked.get("usage").get("prompt_tokens")
113
+ self._output_tokens = chunked.get("usage").get("completion_tokens")
114
+ self._response_id = chunked.get("id")
115
+ self._choices += chunked.get("choices")
116
+ self._finish_reason = chunked.get("choices")[0].get("finish_reason")
209
117
 
210
118
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
211
- event_provider, capture_message_content, disable_metrics, version, is_stream):
119
+ capture_message_content, disable_metrics, version, is_stream):
212
120
  """
213
121
  Process chat request and generate Telemetry.
214
122
  """
@@ -218,19 +126,19 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
218
126
  scope._tbt = calculate_tbt(scope._timestamps)
219
127
 
220
128
  # Extract and format input messages.
221
- formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
129
+ formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
222
130
  prompt = concatenate_all_contents(formatted_messages)
223
- request_model = scope._kwargs.get('model', 'jamba-1.5-mini')
131
+ request_model = scope._kwargs.get("model", "jamba-1.5-mini")
224
132
 
225
133
  # Calculate cost based on token usage.
226
134
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
227
135
  # Prepare tokens dictionary.
228
136
  tokens = {
229
- 'finish_reason': scope._finish_reason,
230
- 'response_id': scope._response_id,
231
- 'input_tokens': scope._input_tokens,
232
- 'output_tokens': scope._output_tokens,
233
- 'total_tokens': scope._input_tokens + scope._output_tokens,
137
+ "finish_reason": scope._finish_reason,
138
+ "response_id": scope._response_id,
139
+ "input_tokens": scope._input_tokens,
140
+ "output_tokens": scope._output_tokens,
141
+ "total_tokens": scope._input_tokens + scope._output_tokens,
234
142
  }
235
143
  extra_attrs = {
236
144
  SemanticConvention.GEN_AI_REQUEST_IS_STREAM: is_stream,
@@ -239,14 +147,13 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
239
147
  SemanticConvention.GEN_AI_SERVER_TBT: scope._tbt,
240
148
  SemanticConvention.GEN_AI_SERVER_TTFT: scope._ttft,
241
149
  SemanticConvention.GEN_AI_SDK_VERSION: version,
242
- SemanticConvention.GEN_AI_OUTPUT_TYPE: 'text' if isinstance(scope._llmresponse, str) else 'json'
150
+ SemanticConvention.GEN_AI_OUTPUT_TYPE: "text" if isinstance(scope._llmresponse, str) else "json"
243
151
  }
244
152
  # Set span attributes.
245
153
  setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
246
154
  scope._server_port, scope._server_address, environment,
247
155
  application_name, extra_attrs)
248
156
 
249
- # Optionally add events capturing the prompt and completion.
250
157
  if capture_message_content:
251
158
  scope._span.add_event(
252
159
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -257,11 +164,6 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
257
164
  attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse},
258
165
  )
259
166
 
260
- # Emit events for each choice and message role.
261
- n = scope._kwargs.get('n', 1)
262
- emit_common_events(event_provider, scope._choices, scope._finish_reason, scope._llmresponse,
263
- formatted_messages, capture_message_content, n)
264
-
265
167
  scope._span.set_status(Status(StatusCode.OK))
266
168
 
267
169
  if not disable_metrics:
@@ -272,23 +174,23 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
272
174
  include_tbt=True, tbt_value=scope._tbt)
273
175
 
274
176
  def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
275
- event_provider, capture_message_content=False, disable_metrics=False, version=''):
177
+ capture_message_content=False, disable_metrics=False, version=""):
276
178
  """
277
179
  Process a streaming chat response and generate Telemetry.
278
180
  """
279
181
 
280
182
  common_chat_logic(self, pricing_info, environment, application_name, metrics,
281
- event_provider, capture_message_content, disable_metrics, version, is_stream=True)
183
+ capture_message_content, disable_metrics, version, is_stream=True)
282
184
 
283
185
  def process_chat_response(response, request_model, pricing_info, server_port, server_address,
284
- environment, application_name, metrics, event_provider, start_time,
285
- span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
186
+ environment, application_name, metrics, start_time,
187
+ span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
286
188
  """
287
189
  Process a synchronous chat response and generate Telemetry.
288
190
  """
289
191
 
290
192
  # Create a generic scope object to hold telemetry data.
291
- self = type('GenericScope', (), {})()
193
+ self = type("GenericScope", (), {})()
292
194
  response_dict = response_as_dict(response)
293
195
 
294
196
  # pylint: disable = no-member
@@ -297,113 +199,102 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
297
199
 
298
200
  self._span = span
299
201
  # Concatenate content from all choices.
300
- self._llmresponse = ''.join(
301
- (choice.get('message', {}).get('content') or '')
302
- for choice in response_dict.get('choices', [])
202
+ self._llmresponse = "".join(
203
+ (choice.get("message", {}).get("content") or "")
204
+ for choice in response_dict.get("choices", [])
303
205
  )
304
- self._response_role = response_dict.get('message', {}).get('role', 'assistant')
305
- self._input_tokens = response_dict.get('usage', {}).get('prompt_tokens', 0)
306
- self._output_tokens = response_dict.get('usage', {}).get('completion_tokens', 0)
307
- self._response_id = response_dict.get('id', '')
206
+ self._response_role = response_dict.get("message", {}).get("role", "assistant")
207
+ self._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
208
+ self._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
209
+ self._response_id = response_dict.get("id", "")
308
210
  self._response_model = request_model
309
- self._finish_reason = response_dict.get('choices', [{}])[0].get('finish_reason')
211
+ self._finish_reason = response_dict.get("choices", [{}])[0].get("finish_reason")
310
212
  self._timestamps = []
311
213
  self._ttft, self._tbt = self._end_time - self._start_time, 0
312
214
  self._server_address, self._server_port = server_address, server_port
313
215
  self._kwargs = kwargs
314
- self._choices = response_dict.get('choices')
216
+ self._choices = response_dict.get("choices")
315
217
 
316
218
  common_chat_logic(self, pricing_info, environment, application_name, metrics,
317
- event_provider, capture_message_content, disable_metrics, version, is_stream=False)
219
+ capture_message_content, disable_metrics, version, is_stream=False)
318
220
 
319
221
  return response
320
222
 
321
223
  def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
322
- environment, application_name, metrics, event_provider, start_time,
323
- span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
224
+ environment, application_name, metrics, start_time,
225
+ span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
324
226
  """
325
227
  Process a chat response and generate Telemetry.
326
228
  """
327
229
  end_time = time.time()
328
230
  response_dict = response_as_dict(response)
329
- try:
330
- # Format input messages into a single prompt string.
331
- messages_input = kwargs.get('messages', '')
332
- formatted_messages = extract_and_format_input(messages_input)
333
- prompt = concatenate_all_contents(formatted_messages)
334
- input_tokens = general_tokens(prompt)
335
-
336
- # Create tokens dict and RAG-specific extra attributes.
337
- tokens = {'response_id': response_dict.get('id'), 'input_tokens': input_tokens}
338
- extra_attrs = {
339
- SemanticConvention.GEN_AI_REQUEST_IS_STREAM: False,
340
- SemanticConvention.GEN_AI_SERVER_TTFT: end_time - start_time,
341
- SemanticConvention.GEN_AI_SDK_VERSION: version,
342
- SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get('max_segments', -1),
343
- SemanticConvention.GEN_AI_RAG_STRATEGY: kwargs.get('retrieval_strategy', 'segments'),
344
- SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get('retrieval_similarity_threshold', -1),
345
- SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get('max_neighbors', -1),
346
- SemanticConvention.GEN_AI_RAG_FILE_IDS: str(kwargs.get('file_ids', '')),
347
- SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get('path', '')
348
- }
349
- # Set common span attributes.
350
- setup_common_span_attributes(span, request_model, kwargs, tokens,
351
- server_port, server_address, environment, application_name,
352
- extra_attrs)
353
-
354
- # Record the prompt event if requested.
355
- if capture_message_content:
356
- span.add_event(
357
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
358
- attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
359
- )
360
-
361
- output_tokens = 0
362
- choices = response_dict.get('choices', [])
363
- # Instead of adding a separate event per choice, we aggregate all completion content.
364
- aggregated_completion = []
365
- for i in range(kwargs.get('n', 1)):
366
- # Get the response content from each choice and count tokens.
367
- content = choices[i].get('content', '')
368
- aggregated_completion.append(content)
369
- output_tokens += general_tokens(content)
370
- if kwargs.get('tools'):
371
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
372
- str(choices[i].get('message', {}).get('tool_calls')))
373
- # Set output type based on actual content type.
374
- if isinstance(content, str):
375
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, 'text')
376
- elif content is not None:
377
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, 'json')
378
-
379
- # Concatenate completion responses.
380
- llmresponse = ''.join(aggregated_completion)
381
- tokens['output_tokens'] = output_tokens
382
- tokens['total_tokens'] = input_tokens + output_tokens
383
-
384
- cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
385
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
386
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
387
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
388
-
389
- span.set_status(Status(StatusCode.OK))
390
- # Emit a single aggregated completion event.
391
- if capture_message_content:
392
- span.add_event(
393
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
394
- attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: llmresponse},
395
- )
396
- # Emit the rest of the events (choice and role-based events) as before.
397
- n = kwargs.get('n', 1)
398
- emit_common_events(event_provider, choices, choices[0].get('finish_reason', ''),
399
- llmresponse, formatted_messages, capture_message_content, n)
400
-
401
- if not disable_metrics:
402
- record_common_metrics(metrics, application_name, environment, request_model,
403
- server_address, server_port, start_time, end_time,
404
- input_tokens, output_tokens, cost, include_tbt=False)
405
- return response
406
-
407
- except Exception as e:
408
- handle_exception(span, e)
409
- return response
231
+ # Format input messages into a single prompt string.
232
+ messages_input = kwargs.get("messages", "")
233
+ formatted_messages = extract_and_format_input(messages_input)
234
+ prompt = concatenate_all_contents(formatted_messages)
235
+ input_tokens = general_tokens(prompt)
236
+
237
+ # Create tokens dict and RAG-specific extra attributes.
238
+ tokens = {"response_id": response_dict.get("id"), "input_tokens": input_tokens}
239
+ extra_attrs = {
240
+ SemanticConvention.GEN_AI_REQUEST_IS_STREAM: False,
241
+ SemanticConvention.GEN_AI_SERVER_TTFT: end_time - start_time,
242
+ SemanticConvention.GEN_AI_SDK_VERSION: version,
243
+ SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get("max_segments", -1),
244
+ SemanticConvention.GEN_AI_RAG_STRATEGY: kwargs.get("retrieval_strategy", "segments"),
245
+ SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get("retrieval_similarity_threshold", -1),
246
+ SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get("max_neighbors", -1),
247
+ SemanticConvention.GEN_AI_RAG_FILE_IDS: str(kwargs.get("file_ids", "")),
248
+ SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get("path", "")
249
+ }
250
+ # Set common span attributes.
251
+ setup_common_span_attributes(span, request_model, kwargs, tokens,
252
+ server_port, server_address, environment, application_name,
253
+ extra_attrs)
254
+
255
+ if capture_message_content:
256
+ span.add_event(
257
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
258
+ attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
259
+ )
260
+
261
+ output_tokens = 0
262
+ choices = response_dict.get("choices", [])
263
+ aggregated_completion = []
264
+ for i in range(kwargs.get("n", 1)):
265
+ # Get the response content from each choice and count tokens.
266
+ content = choices[i].get("content", "")
267
+ aggregated_completion.append(content)
268
+ output_tokens += general_tokens(content)
269
+ if kwargs.get("tools"):
270
+ span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
271
+ str(choices[i].get("message", {}).get("tool_calls")))
272
+ # Set output type based on actual content type.
273
+ if isinstance(content, str):
274
+ span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
275
+ elif content is not None:
276
+ span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "json")
277
+
278
+ # Concatenate completion responses.
279
+ llmresponse = "".join(aggregated_completion)
280
+ tokens["output_tokens"] = output_tokens
281
+ tokens["total_tokens"] = input_tokens + output_tokens
282
+
283
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
284
+ span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
285
+ span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
286
+ span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
287
+
288
+ span.set_status(Status(StatusCode.OK))
289
+
290
+ if capture_message_content:
291
+ span.add_event(
292
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
293
+ attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: llmresponse},
294
+ )
295
+
296
+ if not disable_metrics:
297
+ record_common_metrics(metrics, application_name, environment, request_model,
298
+ server_address, server_port, start_time, end_time,
299
+ input_tokens, output_tokens, cost, include_tbt=False)
300
+ return response
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Google AI Studio Functions"""
3
2
 
4
3
  from typing import Collection
@@ -25,8 +24,8 @@ class GoogleAIStudioInstrumentor(BaseInstrumentor):
25
24
  return _instruments
26
25
 
27
26
  def _instrument(self, **kwargs):
28
- application_name = kwargs.get("application_name", "default_application")
29
- environment = kwargs.get("environment", "default_environment")
27
+ application_name = kwargs.get("application_name", "default")
28
+ environment = kwargs.get("environment", "default")
30
29
  tracer = kwargs.get("tracer")
31
30
  metrics = kwargs.get("metrics_dict")
32
31
  pricing_info = kwargs.get("pricing_info", {})
@@ -67,5 +66,4 @@ class GoogleAIStudioInstrumentor(BaseInstrumentor):
67
66
  )
68
67
 
69
68
  def _uninstrument(self, **kwargs):
70
- # Proper uninstrumentation logic to revert patched methods
71
69
  pass
@@ -2,7 +2,6 @@
2
2
  Module for monitoring Google AI Studio API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -16,9 +15,6 @@ from openlit.instrumentation.google_ai_studio.utils import (
16
15
  )
17
16
  from openlit.semcov import SemanticConvention
18
17
 
19
- # Initialize logger for logging potential issues and operations
20
- logger = logging.getLogger(__name__)
21
-
22
18
  def async_generate(version, environment, application_name,
23
19
  tracer, pricing_info, capture_message_content, metrics, disable_metrics):
24
20
  """
@@ -61,7 +57,6 @@ def async_generate(version, environment, application_name,
61
57
 
62
58
  except Exception as e:
63
59
  handle_exception(span, e)
64
- logger.error("Error in trace creation: %s", e)
65
60
 
66
61
  # Return original response
67
62
  return response
@@ -144,7 +139,6 @@ def async_generate_stream(version, environment, application_name,
144
139
 
145
140
  except Exception as e:
146
141
  handle_exception(self._span, e)
147
- logger.error("Error in trace creation: %s", e)
148
142
  raise
149
143
 
150
144
  async def wrapper(wrapped, instance, args, kwargs):
@@ -2,7 +2,6 @@
2
2
  Module for monitoring Google AI Studio API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -16,9 +15,6 @@ from openlit.instrumentation.google_ai_studio.utils import (
16
15
  )
17
16
  from openlit.semcov import SemanticConvention
18
17
 
19
- # Initialize logger for logging potential issues and operations
20
- logger = logging.getLogger(__name__)
21
-
22
18
  def generate(version, environment, application_name,
23
19
  tracer, pricing_info, capture_message_content, metrics, disable_metrics):
24
20
  """
@@ -61,7 +57,6 @@ def generate(version, environment, application_name,
61
57
 
62
58
  except Exception as e:
63
59
  handle_exception(span, e)
64
- logger.error("Error in trace creation: %s", e)
65
60
 
66
61
  # Return original response
67
62
  return response
@@ -144,7 +139,6 @@ def generate_stream(version, environment, application_name,
144
139
 
145
140
  except Exception as e:
146
141
  handle_exception(self._span, e)
147
- logger.error("Error in trace creation: %s", e)
148
142
  raise
149
143
 
150
144
  def wrapper(wrapped, instance, args, kwargs):
@@ -15,8 +15,7 @@ from openlit.semcov import SemanticConvention
15
15
 
16
16
  def format_content(messages):
17
17
  """
18
- Process a list of messages to extract content, categorize them by role,
19
- and concatenate all 'content' fields into a single string with role: content format.
18
+ Process a list of messages to extract content.
20
19
  """
21
20
 
22
21
  formatted_messages = []
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Together AI Functions"""
3
2
 
4
3
  from typing import Collection
@@ -17,15 +16,15 @@ _instruments = ("together >= 1.3.5",)
17
16
 
18
17
  class TogetherInstrumentor(BaseInstrumentor):
19
18
  """
20
- An instrumentor for Together's client library.
19
+ An instrumentor for Together client library.
21
20
  """
22
21
 
23
22
  def instrumentation_dependencies(self) -> Collection[str]:
24
23
  return _instruments
25
24
 
26
25
  def _instrument(self, **kwargs):
27
- application_name = kwargs.get("application_name", "default_application")
28
- environment = kwargs.get("environment", "default_environment")
26
+ application_name = kwargs.get("application_name", "default")
27
+ environment = kwargs.get("environment", "default")
29
28
  tracer = kwargs.get("tracer")
30
29
  metrics = kwargs.get("metrics_dict")
31
30
  pricing_info = kwargs.get("pricing_info", {})
@@ -66,5 +65,4 @@ class TogetherInstrumentor(BaseInstrumentor):
66
65
  )
67
66
 
68
67
  def _uninstrument(self, **kwargs):
69
- # Proper uninstrumentation logic to revert patched methods
70
68
  pass