openlit 1.34.10__py3-none-any.whl → 1.34.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,6 @@ AI21 OpenTelemetry instrumentation utility functions
4
4
 
5
5
  import time
6
6
 
7
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
8
7
  from opentelemetry.trace import Status, StatusCode
9
8
 
10
9
  from openlit.__helpers import (
@@ -12,289 +11,307 @@ from openlit.__helpers import (
12
11
  response_as_dict,
13
12
  calculate_tbt,
14
13
  general_tokens,
15
- extract_and_format_input,
16
14
  get_chat_model_cost,
17
- create_metrics_attributes,
18
- concatenate_all_contents
15
+ common_span_attributes,
16
+ record_completion_metrics,
19
17
  )
20
18
  from openlit.semcov import SemanticConvention
21
19
 
22
- def setup_common_span_attributes(span, request_model, kwargs, tokens,
23
- server_port, server_address, environment,
24
- application_name, extra_attrs):
20
+ def format_content(messages):
25
21
  """
26
- Set common span attributes for both chat and RAG operations.
22
+ Process a list of messages to extract content.
27
23
  """
28
24
 
29
- # Base attributes from SDK and operation settings.
30
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
31
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
32
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AI21)
33
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
34
- span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
35
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, kwargs.get("seed", ""))
36
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get("frequency_penalty", 0.0))
37
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get("max_tokens", -1))
38
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get("presence_penalty", 0.0))
39
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get("stop", []))
40
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, kwargs.get("temperature", 0.4))
41
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, kwargs.get("top_p", 1.0))
42
-
43
- # Add token-related attributes if available.
44
- if "finish_reason" in tokens:
45
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [tokens["finish_reason"]])
46
- if "response_id" in tokens:
47
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, tokens["response_id"])
48
- if "input_tokens" in tokens:
49
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, tokens["input_tokens"])
50
- if "output_tokens" in tokens:
51
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, tokens["output_tokens"])
52
- if "total_tokens" in tokens:
53
- span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, tokens["total_tokens"])
54
-
55
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
56
- span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
57
-
58
- # Environment and service identifiers.
59
- span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
60
- span.set_attribute(SERVICE_NAME, application_name)
61
-
62
- # Set any extra attributes passed in.
63
- for key, value in extra_attrs.items():
64
- span.set_attribute(key, value)
65
-
66
- def record_common_metrics(metrics, application_name, environment, request_model,
67
- server_address, server_port, start_time, end_time,
68
- input_tokens, output_tokens, cost, include_tbt=False, tbt_value=None):
69
- """
70
- Record common metrics for the operation.
71
- """
72
-
73
- attributes = create_metrics_attributes(
74
- service_name=application_name,
75
- deployment_environment=environment,
76
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
77
- system=SemanticConvention.GEN_AI_SYSTEM_AI21,
78
- request_model=request_model,
79
- server_address=server_address,
80
- server_port=server_port,
81
- response_model=request_model,
82
- )
83
- metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
84
- metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
85
- if include_tbt and tbt_value is not None:
86
- metrics["genai_server_tbt"].record(tbt_value, attributes)
87
- metrics["genai_server_ttft"].record(end_time - start_time, attributes)
88
- metrics["genai_requests"].add(1, attributes)
89
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
90
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
91
- metrics["genai_cost"].record(cost, attributes)
92
-
93
- def process_chunk(self, chunk):
25
+ formatted_messages = []
26
+ for message in messages:
27
+ # Handle different message formats
28
+ if hasattr(message, "role") and (hasattr(message, "content") or hasattr(message, "text")):
29
+ # ChatMessage object (AI21 format)
30
+ role = str(message.role) if hasattr(message.role, 'value') else str(message.role)
31
+ content = getattr(message, "content", None) or getattr(message, "text", "")
32
+ elif isinstance(message, dict):
33
+ # Dictionary format
34
+ role = message["role"]
35
+ content = message["content"]
36
+ else:
37
+ # Fallback - try to extract as string
38
+ role = str(getattr(message, "role", "unknown"))
39
+ content = str(getattr(message, "content", "") or getattr(message, "text", ""))
40
+
41
+ if isinstance(content, list):
42
+ content_str = ", ".join(
43
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
44
+ if "type" in item else f'text: {item["text"]}'
45
+ for item in content
46
+ )
47
+ formatted_messages.append(f"{role}: {content_str}")
48
+ else:
49
+ formatted_messages.append(f"{role}: {content}")
50
+
51
+ return "\n".join(formatted_messages)
52
+
53
+ def process_chunk(scope, chunk):
94
54
  """
95
55
  Process a chunk of response data and update state.
96
56
  """
97
57
 
98
58
  end_time = time.time()
99
- # Record the timestamp for the current chunk.
100
- self._timestamps.append(end_time)
101
- if len(self._timestamps) == 1:
102
- # Calculate time-to-first-chunk (TTFT).
103
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
59
+ # Record the timestamp for the current chunk
60
+ scope._timestamps.append(end_time)
61
+
62
+ if len(scope._timestamps) == 1:
63
+ # Calculate time to first chunk
64
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
104
65
 
105
66
  chunked = response_as_dict(chunk)
106
- if (len(chunked.get("choices")) > 0 and
107
- "delta" in chunked.get("choices")[0] and
108
- "content" in chunked.get("choices")[0].get("delta")):
109
- if content := chunked.get("choices")[0].get("delta").get("content"):
110
- self._llmresponse += content
111
- if chunked.get("usage"):
112
- self._input_tokens = chunked.get("usage").get("prompt_tokens")
113
- self._output_tokens = chunked.get("usage").get("completion_tokens")
114
- self._response_id = chunked.get("id")
115
- self._choices += chunked.get("choices")
116
- self._finish_reason = chunked.get("choices")[0].get("finish_reason")
67
+
68
+ # Collect message IDs and aggregated response from events
69
+ if (len(chunked.get("choices", [])) > 0 and
70
+ "delta" in chunked.get("choices")[0] and
71
+ "content" in chunked.get("choices")[0].get("delta", {})):
72
+
73
+ content = chunked.get("choices")[0].get("delta").get("content")
74
+ if content:
75
+ scope._llmresponse += content
76
+
77
+ if chunked.get("usage"):
78
+ scope._input_tokens = chunked.get("usage").get("prompt_tokens")
79
+ scope._output_tokens = chunked.get("usage").get("completion_tokens")
80
+ scope._response_id = chunked.get("id")
81
+ scope._finish_reason = chunked.get("choices", [{}])[0].get("finish_reason")
82
+ scope._end_time = time.time()
117
83
 
118
84
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
119
85
  capture_message_content, disable_metrics, version, is_stream):
120
86
  """
121
- Process chat request and generate Telemetry.
87
+ Process chat request and generate Telemetry
122
88
  """
123
89
 
124
- scope._end_time = time.time()
125
90
  if len(scope._timestamps) > 1:
126
91
  scope._tbt = calculate_tbt(scope._timestamps)
127
92
 
128
- # Extract and format input messages.
129
- formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
130
- prompt = concatenate_all_contents(formatted_messages)
93
+ prompt = format_content(scope._kwargs.get("messages", []))
131
94
  request_model = scope._kwargs.get("model", "jamba-1.5-mini")
132
95
 
133
- # Calculate cost based on token usage.
134
96
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
135
- # Prepare tokens dictionary.
136
- tokens = {
137
- "finish_reason": scope._finish_reason,
138
- "response_id": scope._response_id,
139
- "input_tokens": scope._input_tokens,
140
- "output_tokens": scope._output_tokens,
141
- "total_tokens": scope._input_tokens + scope._output_tokens,
142
- }
143
- extra_attrs = {
144
- SemanticConvention.GEN_AI_REQUEST_IS_STREAM: is_stream,
145
- SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE: scope._input_tokens + scope._output_tokens,
146
- SemanticConvention.GEN_AI_USAGE_COST: cost,
147
- SemanticConvention.GEN_AI_SERVER_TBT: scope._tbt,
148
- SemanticConvention.GEN_AI_SERVER_TTFT: scope._ttft,
149
- SemanticConvention.GEN_AI_SDK_VERSION: version,
150
- SemanticConvention.GEN_AI_OUTPUT_TYPE: "text" if isinstance(scope._llmresponse, str) else "json"
151
- }
152
- # Set span attributes.
153
- setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
154
- scope._server_port, scope._server_address, environment,
155
- application_name, extra_attrs)
156
97
 
98
+ # Common Span Attributes
99
+ common_span_attributes(scope,
100
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
101
+ scope._server_address, scope._server_port, request_model, request_model,
102
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
103
+
104
+ # Span Attributes for Request parameters
105
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", ""))
106
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
107
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
108
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
109
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
110
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 0.4))
111
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
112
+
113
+ # Span Attributes for Response parameters
114
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
115
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
116
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
117
+
118
+ # Span Attributes for Cost and Tokens
119
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
120
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
121
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
122
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
123
+
124
+ # Span Attributes for Tools
125
+ if scope._tools:
126
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function", {}).get("name", ""))
127
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id", "")))
128
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("function", {}).get("arguments", "")))
129
+
130
+ # Span Attributes for Content
157
131
  if capture_message_content:
132
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
133
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
134
+
135
+ # To be removed once the change to span_attributes (from span events) is complete
158
136
  scope._span.add_event(
159
137
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
160
- attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
138
+ attributes={
139
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
140
+ },
161
141
  )
162
142
  scope._span.add_event(
163
143
  name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
164
- attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse},
144
+ attributes={
145
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
146
+ },
165
147
  )
166
148
 
167
149
  scope._span.set_status(Status(StatusCode.OK))
168
150
 
151
+ # Metrics
169
152
  if not disable_metrics:
170
- record_common_metrics(metrics, application_name, environment, request_model,
171
- scope._server_address, scope._server_port,
172
- scope._start_time, scope._end_time,
173
- scope._input_tokens, scope._output_tokens, cost,
174
- include_tbt=True, tbt_value=scope._tbt)
153
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
154
+ scope._server_address, scope._server_port, request_model, request_model, environment,
155
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
156
+ cost, scope._tbt, scope._ttft)
175
157
 
176
- def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
158
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
177
159
  capture_message_content=False, disable_metrics=False, version=""):
178
160
  """
179
- Process a streaming chat response and generate Telemetry.
161
+ Process streaming chat request and generate Telemetry
180
162
  """
181
163
 
182
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
164
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
183
165
  capture_message_content, disable_metrics, version, is_stream=True)
184
166
 
185
167
  def process_chat_response(response, request_model, pricing_info, server_port, server_address,
186
- environment, application_name, metrics, start_time,
187
- span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
168
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
169
+ disable_metrics=False, version="1.0.0", **kwargs):
188
170
  """
189
- Process a synchronous chat response and generate Telemetry.
171
+ Process chat request and generate Telemetry
190
172
  """
191
173
 
192
- # Create a generic scope object to hold telemetry data.
193
- self = type("GenericScope", (), {})()
174
+ # Create scope object
175
+ scope = type("GenericScope", (), {})()
194
176
  response_dict = response_as_dict(response)
195
177
 
196
- # pylint: disable = no-member
197
- self._start_time = start_time
198
- self._end_time = time.time()
199
-
200
- self._span = span
201
- # Concatenate content from all choices.
202
- self._llmresponse = "".join(
178
+ scope._start_time = start_time
179
+ scope._end_time = time.time()
180
+ scope._span = span
181
+ scope._llmresponse = " ".join(
203
182
  (choice.get("message", {}).get("content") or "")
204
183
  for choice in response_dict.get("choices", [])
205
184
  )
206
- self._response_role = response_dict.get("message", {}).get("role", "assistant")
207
- self._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
208
- self._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
209
- self._response_id = response_dict.get("id", "")
210
- self._response_model = request_model
211
- self._finish_reason = response_dict.get("choices", [{}])[0].get("finish_reason")
212
- self._timestamps = []
213
- self._ttft, self._tbt = self._end_time - self._start_time, 0
214
- self._server_address, self._server_port = server_address, server_port
215
- self._kwargs = kwargs
216
- self._choices = response_dict.get("choices")
217
-
218
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
185
+ scope._response_id = response_dict.get("id")
186
+ scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
187
+ scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
188
+ scope._timestamps = []
189
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
190
+ scope._server_address, scope._server_port = server_address, server_port
191
+ scope._kwargs = kwargs
192
+ scope._finish_reason = str(response_dict.get("choices", [])[0].get("finish_reason", ""))
193
+
194
+ # Handle tool calls
195
+ if scope._kwargs.get("tools"):
196
+ scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
197
+ else:
198
+ scope._tools = None
199
+
200
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
219
201
  capture_message_content, disable_metrics, version, is_stream=False)
220
202
 
221
203
  return response
222
204
 
223
- def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
224
- environment, application_name, metrics, start_time,
225
- span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
205
+ def common_chat_rag_logic(scope, pricing_info, environment, application_name, metrics,
206
+ capture_message_content, disable_metrics, version):
226
207
  """
227
- Process a chat response and generate Telemetry.
208
+ Process RAG chat request and generate Telemetry
228
209
  """
229
- end_time = time.time()
230
- response_dict = response_as_dict(response)
231
- # Format input messages into a single prompt string.
232
- messages_input = kwargs.get("messages", "")
233
- formatted_messages = extract_and_format_input(messages_input)
234
- prompt = concatenate_all_contents(formatted_messages)
235
- input_tokens = general_tokens(prompt)
236
210
 
237
- # Create tokens dict and RAG-specific extra attributes.
238
- tokens = {"response_id": response_dict.get("id"), "input_tokens": input_tokens}
239
- extra_attrs = {
240
- SemanticConvention.GEN_AI_REQUEST_IS_STREAM: False,
241
- SemanticConvention.GEN_AI_SERVER_TTFT: end_time - start_time,
242
- SemanticConvention.GEN_AI_SDK_VERSION: version,
243
- SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get("max_segments", -1),
244
- SemanticConvention.GEN_AI_RAG_STRATEGY: kwargs.get("retrieval_strategy", "segments"),
245
- SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get("retrieval_similarity_threshold", -1),
246
- SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get("max_neighbors", -1),
247
- SemanticConvention.GEN_AI_RAG_FILE_IDS: str(kwargs.get("file_ids", "")),
248
- SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get("path", "")
249
- }
250
- # Set common span attributes.
251
- setup_common_span_attributes(span, request_model, kwargs, tokens,
252
- server_port, server_address, environment, application_name,
253
- extra_attrs)
211
+ prompt = format_content(scope._kwargs.get("messages", []))
212
+ request_model = scope._kwargs.get("model", "jamba-1.5-mini")
254
213
 
214
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
215
+
216
+ # Common Span Attributes
217
+ common_span_attributes(scope,
218
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
219
+ scope._server_address, scope._server_port, request_model, scope._response_model,
220
+ environment, application_name, False, scope._tbt, scope._ttft, version)
221
+
222
+ # RAG-specific span attributes
223
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS, scope._kwargs.get("max_segments", -1))
224
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_STRATEGY, scope._kwargs.get("retrieval_strategy", "segments"))
225
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS, scope._kwargs.get("max_neighbors", -1))
226
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_FILE_IDS, str(scope._kwargs.get("file_ids", "")))
227
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH, scope._kwargs.get("path", ""))
228
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD,
229
+ scope._kwargs.get("retrieval_similarity_threshold", -1))
230
+
231
+ # Standard span attributes
232
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
233
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
234
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
235
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
236
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
237
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
238
+
239
+ # Handle tool calls
240
+ if scope._kwargs.get("tools"):
241
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
242
+ str(scope._choices[0].get("message", {}).get("tool_calls", "")))
243
+
244
+ # Content attributes
255
245
  if capture_message_content:
256
- span.add_event(
246
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
247
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
248
+
249
+ # To be removed once the change to span_attributes (from span events) is complete
250
+ scope._span.add_event(
257
251
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
258
- attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
252
+ attributes={
253
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
254
+ },
255
+ )
256
+ scope._span.add_event(
257
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
258
+ attributes={
259
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
260
+ },
259
261
  )
260
262
 
261
- output_tokens = 0
263
+ scope._span.set_status(Status(StatusCode.OK))
264
+
265
+ # Metrics
266
+ if not disable_metrics:
267
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AI21,
268
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
269
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
270
+ cost, scope._tbt, scope._ttft)
271
+
272
+ def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
273
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
274
+ disable_metrics=False, version="1.0.0", **kwargs):
275
+ """
276
+ Process RAG chat request and generate Telemetry
277
+ """
278
+
279
+ # Create scope object
280
+ scope = type("GenericScope", (), {})()
281
+ response_dict = response_as_dict(response)
282
+
283
+ scope._start_time = start_time
284
+ scope._end_time = time.time()
285
+ scope._span = span
286
+
287
+ # Format input messages and calculate input tokens
288
+ prompt = format_content(kwargs.get("messages", []))
289
+ input_tokens = general_tokens(prompt)
290
+
291
+ # Process response choices
262
292
  choices = response_dict.get("choices", [])
263
293
  aggregated_completion = []
294
+ output_tokens = 0
295
+
264
296
  for i in range(kwargs.get("n", 1)):
265
- # Get the response content from each choice and count tokens.
266
297
  content = choices[i].get("content", "")
267
298
  aggregated_completion.append(content)
268
299
  output_tokens += general_tokens(content)
269
- if kwargs.get("tools"):
270
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
271
- str(choices[i].get("message", {}).get("tool_calls")))
272
- # Set output type based on actual content type.
273
- if isinstance(content, str):
274
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
275
- elif content is not None:
276
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "json")
277
-
278
- # Concatenate completion responses.
279
- llmresponse = "".join(aggregated_completion)
280
- tokens["output_tokens"] = output_tokens
281
- tokens["total_tokens"] = input_tokens + output_tokens
282
-
283
- cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
284
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
285
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
286
- span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
287
-
288
- span.set_status(Status(StatusCode.OK))
289
300
 
290
- if capture_message_content:
291
- span.add_event(
292
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
293
- attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: llmresponse},
294
- )
301
+ scope._llmresponse = "".join(aggregated_completion)
302
+ scope._response_id = response_dict.get("id", "")
303
+ scope._response_model = request_model
304
+ scope._input_tokens = input_tokens
305
+ scope._output_tokens = output_tokens
306
+ scope._timestamps = []
307
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
308
+ scope._server_address, scope._server_port = server_address, server_port
309
+ scope._kwargs = kwargs
310
+ scope._finish_reason = ""
311
+ scope._tools = None
312
+ scope._choices = choices
313
+
314
+ common_chat_rag_logic(scope, pricing_info, environment, application_name, metrics,
315
+ capture_message_content, disable_metrics, version)
295
316
 
296
- if not disable_metrics:
297
- record_common_metrics(metrics, application_name, environment, request_model,
298
- server_address, server_port, start_time, end_time,
299
- input_tokens, output_tokens, cost, include_tbt=False)
300
317
  return response
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Groq Functions"""
3
2
 
4
3
  from typing import Collection
@@ -13,15 +12,15 @@ _instruments = ("groq >= 0.5.0",)
13
12
 
14
13
  class GroqInstrumentor(BaseInstrumentor):
15
14
  """
16
- An instrumentor for Groq's client library.
15
+ An instrumentor for Groq client library.
17
16
  """
18
17
 
19
18
  def instrumentation_dependencies(self) -> Collection[str]:
20
19
  return _instruments
21
20
 
22
21
  def _instrument(self, **kwargs):
23
- application_name = kwargs.get("application_name", "default_application")
24
- environment = kwargs.get("environment", "default_environment")
22
+ application_name = kwargs.get("application_name", "default")
23
+ environment = kwargs.get("environment", "default")
25
24
  tracer = kwargs.get("tracer")
26
25
  metrics = kwargs.get("metrics_dict")
27
26
  pricing_info = kwargs.get("pricing_info", {})
@@ -29,22 +28,21 @@ class GroqInstrumentor(BaseInstrumentor):
29
28
  disable_metrics = kwargs.get("disable_metrics")
30
29
  version = importlib.metadata.version("groq")
31
30
 
32
- #sync
31
+ # Chat completions
33
32
  wrap_function_wrapper(
34
33
  "groq.resources.chat.completions",
35
34
  "Completions.create",
36
35
  chat(version, environment, application_name,
37
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
36
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
38
37
  )
39
38
 
40
- #async
39
+ # Chat completions
41
40
  wrap_function_wrapper(
42
41
  "groq.resources.chat.completions",
43
42
  "AsyncCompletions.create",
44
43
  async_chat(version, environment, application_name,
45
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
44
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
46
45
  )
47
46
 
48
47
  def _uninstrument(self, **kwargs):
49
- # Proper uninstrumentation logic to revert patched methods
50
48
  pass