openlit 1.34.4__py3-none-any.whl → 1.34.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,325 @@
1
+ """
2
+ PremAI OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+
9
+ from openlit.__helpers import (
10
+ response_as_dict,
11
+ calculate_ttft,
12
+ calculate_tbt,
13
+ get_chat_model_cost,
14
+ get_embed_model_cost,
15
+ general_tokens,
16
+ create_metrics_attributes,
17
+ )
18
+ from openlit.semcov import SemanticConvention
19
+
20
+ def format_content(messages):
21
+ """
22
+ Process a list of messages to extract content.
23
+ """
24
+
25
+ formatted_messages = []
26
+ for message in messages:
27
+ role = message["role"]
28
+ content = message["content"]
29
+
30
+ if isinstance(content, list):
31
+ content_str = ", ".join(
32
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
33
+ if "type" in item else f'text: {item["text"]}'
34
+ for item in content
35
+ )
36
+ formatted_messages.append(f"{role}: {content_str}")
37
+ else:
38
+ formatted_messages.append(f"{role}: {content}")
39
+
40
+ return "\n".join(formatted_messages)
41
+
42
+ def process_chunk(scope, chunk):
43
+ """
44
+ Process a chunk of response data and update state.
45
+ """
46
+
47
+ end_time = time.time()
48
+ # Record the timestamp for the current chunk
49
+ scope._timestamps.append(end_time)
50
+
51
+ if len(scope._timestamps) == 1:
52
+ # Calculate time to first chunk
53
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
54
+
55
+ chunked = response_as_dict(chunk)
56
+ # Collect message IDs and aggregated response from events
57
+ if chunked.choices:
58
+ first_choice = chunked.get("choices")[0]
59
+
60
+ if first_choice.get("delta").get("content"):
61
+ scope._llmresponse += first_choice.get("delta").get("content")
62
+
63
+ if chunked.get("choices")[0].get("finish_reason"):
64
+ scope._finish_reason = chunked.get("choices")[0].get("finish_reason")
65
+ scope._response_id = chunked.get("id")
66
+ scope._response_model = chunked.get("model")
67
+
68
+ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_address, server_port,
69
+ request_model, response_model, environment, application_name, is_stream, tbt, ttft, version):
70
+ """
71
+ Set common span attributes for both chat and RAG operations.
72
+ """
73
+
74
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
75
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, gen_ai_operation)
76
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, gen_ai_system)
77
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
78
+ scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
79
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
80
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
81
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
82
+ scope._span.set_attribute(SERVICE_NAME, application_name)
83
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
84
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
85
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
86
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
87
+
88
+ def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
89
+ request_model, response_model, environment, application_name, start_time, end_time,
90
+ input_tokens, output_tokens, cost, tbt=None, ttft=None):
91
+ """
92
+ Record completion-specific metrics for the operation.
93
+ """
94
+
95
+ attributes = create_metrics_attributes(
96
+ operation=gen_ai_operation,
97
+ system=gen_ai_system,
98
+ server_address=server_address,
99
+ server_port=server_port,
100
+ request_model=request_model,
101
+ response_model=response_model,
102
+ service_name=application_name,
103
+ deployment_environment=environment,
104
+ )
105
+ metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
106
+ metrics["genai_requests"].add(1, attributes)
107
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
108
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
109
+ metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
110
+ metrics["genai_cost"].record(cost, attributes)
111
+ if tbt is not None:
112
+ metrics["genai_server_tbt"].record(tbt, attributes)
113
+ if ttft is not None:
114
+ metrics["genai_server_ttft"].record(ttft, attributes)
115
+
116
+ def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
117
+ request_model, response_model, environment, application_name, start_time, end_time,
118
+ input_tokens, cost):
119
+ """
120
+ Record embedding-specific metrics for the operation.
121
+ """
122
+
123
+ attributes = create_metrics_attributes(
124
+ operation=gen_ai_operation,
125
+ system=gen_ai_system,
126
+ server_address=server_address,
127
+ server_port=server_port,
128
+ request_model=request_model,
129
+ response_model=response_model,
130
+ service_name=application_name,
131
+ deployment_environment=environment,
132
+ )
133
+ metrics["genai_client_usage_tokens"].record(input_tokens, attributes)
134
+ metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
135
+ metrics["genai_requests"].add(1, attributes)
136
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
137
+ metrics["genai_cost"].record(cost, attributes)
138
+
139
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
140
+ capture_message_content, disable_metrics, version, is_stream):
141
+ """
142
+ Process chat request and generate Telemetry
143
+ """
144
+
145
+ if len(scope._timestamps) > 1:
146
+ scope._tbt = calculate_tbt(scope._timestamps)
147
+
148
+ prompt = format_content(scope._kwargs.get("messages", ""))
149
+ request_model = scope._kwargs.get("model", "llama3.2-3b")
150
+
151
+ # Calculate tokens using input prompt and aggregated response
152
+ if is_stream:
153
+ input_tokens = general_tokens(prompt)
154
+ output_tokens = general_tokens(scope._llmresponse)
155
+ else:
156
+ input_tokens = scope._input_tokens
157
+ output_tokens = scope._output_tokens
158
+
159
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
160
+
161
+ # Common Span Attributes
162
+ common_span_attributes(scope,
163
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,SemanticConvention.GEN_AI_SYSTEM_PREMAI,
164
+ scope._server_address, scope._server_port, request_model, scope._response_model,
165
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
166
+
167
+ # Span Attributes for Response parameters
168
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
169
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", ""))
170
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
171
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
172
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
173
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
174
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
175
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
176
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
177
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
178
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
179
+
180
+ # Span Attributes for Cost and Tokens
181
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
182
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
183
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
184
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
185
+
186
+ # Span Attributes for Tools
187
+ if scope._tools:
188
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function","")).get("name","")
189
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id","")))
190
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("function","").get("arguments","")))
191
+
192
+ # Span Attributes for Content
193
+ if capture_message_content:
194
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
195
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
196
+
197
+ # To be removed one the change to span_attributes (from span events) is complete
198
+ scope._span.add_event(
199
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
200
+ attributes={
201
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
202
+ },
203
+ )
204
+ scope._span.add_event(
205
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
206
+ attributes={
207
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
208
+ },
209
+ )
210
+
211
+ scope._span.set_status(Status(StatusCode.OK))
212
+
213
+ # Metrics
214
+ if not disable_metrics:
215
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_PREMAI,
216
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
217
+ application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
218
+ cost, scope._tbt, scope._ttft)
219
+
220
+ def common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
221
+ capture_message_content, disable_metrics, version):
222
+ """
223
+ Process embedding request and generate Telemetry
224
+ """
225
+
226
+ request_model = scope._kwargs.get("model", "text-embedding-ada-002")
227
+
228
+ cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
229
+
230
+ # Common Span Attributes
231
+ common_span_attributes(scope,
232
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_PREMAI,
233
+ scope._server_address, scope._server_port, request_model, scope._response_model,
234
+ environment, application_name, False, scope._tbt, scope._ttft, version)
235
+
236
+ # Embedding-specific span attributes
237
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS, [scope._kwargs.get("encoding_format", "float")])
238
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
239
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
240
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens)
241
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
242
+
243
+ # Span Attributes for Content
244
+ if capture_message_content:
245
+ scope._span.add_event(
246
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
247
+ attributes={
248
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: str(scope._kwargs.get("input", "")),
249
+ },
250
+ )
251
+
252
+ scope._span.set_status(Status(StatusCode.OK))
253
+
254
+ # Metrics
255
+ if not disable_metrics:
256
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_PREMAI,
257
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
258
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, cost)
259
+
260
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
261
+ capture_message_content=False, disable_metrics=False, version=""):
262
+ """
263
+ Process chat request and generate Telemetry
264
+ """
265
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
266
+ capture_message_content, disable_metrics, version, is_stream=True)
267
+
268
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
269
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
270
+ disable_metrics=False, version="1.0.0", **kwargs):
271
+ """
272
+ Process chat request and generate Telemetry
273
+ """
274
+
275
+ scope = type("GenericScope", (), {})()
276
+ response_dict = response_as_dict(response)
277
+
278
+ scope._start_time = start_time
279
+ scope._end_time = time.time()
280
+ scope._span = span
281
+ scope._llmresponse = str(response_dict.get("choices")[0].get("message").get("content"))
282
+ scope._response_id = response_dict.get("additional_properties", {}).get("id")
283
+ scope._response_model = response_dict.get("model")
284
+ scope._input_tokens = response_dict.get("usage").get("prompt_tokens")
285
+ scope._output_tokens = response_dict.get("usage").get("completion_tokens")
286
+ scope._timestamps = []
287
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
288
+ scope._server_address, scope._server_port = server_address, server_port
289
+ scope._kwargs = kwargs
290
+ scope._finish_reason = str(response_dict.get("choices")[0].get("finish_reason"))
291
+
292
+ if scope._kwargs.get("tools"):
293
+ scope._tools = response_dict.get("choices")[0].get("message").get("tool_calls")
294
+ else:
295
+ scope._tools = None
296
+
297
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
298
+ capture_message_content, disable_metrics, version, is_stream=False)
299
+
300
+ return response
301
+
302
+ def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
303
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
304
+ disable_metrics=False, version="1.0.0", **kwargs):
305
+ """
306
+ Process embedding request and generate Telemetry
307
+ """
308
+
309
+ scope = type("GenericScope", (), {})()
310
+ response_dict = response_as_dict(response)
311
+
312
+ scope._start_time = start_time
313
+ scope._end_time = time.time()
314
+ scope._span = span
315
+ scope._response_model = response_dict.get("model")
316
+ scope._input_tokens = response_dict.get("usage").get("prompt_tokens")
317
+ scope._timestamps = []
318
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
319
+ scope._server_address, scope._server_port = server_address, server_port
320
+ scope._kwargs = kwargs
321
+
322
+ common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
323
+ capture_message_content, disable_metrics, version)
324
+
325
+ return response
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Reka Functions"""
3
2
 
4
3
  from typing import Collection
@@ -17,15 +16,15 @@ _instruments = ("reka-api >= 3.2.0",)
17
16
 
18
17
  class RekaInstrumentor(BaseInstrumentor):
19
18
  """
20
- An instrumentor for Reka's client library.
19
+ An instrumentor for Reka client library.
21
20
  """
22
21
 
23
22
  def instrumentation_dependencies(self) -> Collection[str]:
24
23
  return _instruments
25
24
 
26
25
  def _instrument(self, **kwargs):
27
- application_name = kwargs.get("application_name", "default_application")
28
- environment = kwargs.get("environment", "default_environment")
26
+ application_name = kwargs.get("application_name", "default")
27
+ environment = kwargs.get("environment", "default")
29
28
  tracer = kwargs.get("tracer")
30
29
  metrics = kwargs.get("metrics_dict")
31
30
  pricing_info = kwargs.get("pricing_info", {})
@@ -33,7 +32,7 @@ class RekaInstrumentor(BaseInstrumentor):
33
32
  disable_metrics = kwargs.get("disable_metrics")
34
33
  version = importlib.metadata.version("reka-api")
35
34
 
36
- # sync chat
35
+ # Chat completions
37
36
  wrap_function_wrapper(
38
37
  "reka.chat.client",
39
38
  "ChatClient.create",
@@ -41,7 +40,7 @@ class RekaInstrumentor(BaseInstrumentor):
41
40
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
42
41
  )
43
42
 
44
- # async chat
43
+ # Chat completions
45
44
  wrap_function_wrapper(
46
45
  "reka.chat.client",
47
46
  "AsyncChatClient.create",
@@ -50,5 +49,4 @@ class RekaInstrumentor(BaseInstrumentor):
50
49
  )
51
50
 
52
51
  def _uninstrument(self, **kwargs):
53
- # Proper uninstrumentation logic to revert patched methods
54
52
  pass
@@ -2,53 +2,26 @@
2
2
  Module for monitoring Reka API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
8
  handle_exception,
12
- create_metrics_attributes,
13
9
  set_server_address_and_port
14
10
  )
11
+ from openlit.instrumentation.reka.utils import (
12
+ process_chat_response
13
+ )
15
14
  from openlit.semcov import SemanticConvention
16
15
 
17
- # Initialize logger for logging potential issues and operations
18
- logger = logging.getLogger(__name__)
19
-
20
16
  def async_chat(version, environment, application_name,
21
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
17
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
22
18
  """
23
- Generates a telemetry wrapper for chat to collect metrics.
24
-
25
- Args:
26
- version: Version of the monitoring package.
27
- environment: Deployment environment (e.g., production, staging).
28
- application_name: Name of the application using the Reka API.
29
- tracer: OpenTelemetry tracer for creating spans.
30
- pricing_info: Information used for calculating the cost of Reka usage.
31
- capture_message_content: Flag indicating whether to trace the actual content.
32
-
33
- Returns:
34
- A function that wraps the chat method to add telemetry.
19
+ Generates a telemetry wrapper for GenAI function call
35
20
  """
36
21
 
37
22
  async def wrapper(wrapped, instance, args, kwargs):
38
23
  """
39
- Wraps the 'chat' API call to add telemetry.
40
-
41
- This collects metrics such as execution time, cost, and token usage, and handles errors
42
- gracefully, adding details to the trace for observability.
43
-
44
- Args:
45
- wrapped: The original 'chat' method to be wrapped.
46
- instance: The instance of the class where the original method is defined.
47
- args: Positional arguments for the 'chat' method.
48
- kwargs: Keyword arguments for the 'chat' method.
49
-
50
- Returns:
51
- The response from the original 'chat' method.
24
+ Wraps the GenAI function call.
52
25
  """
53
26
 
54
27
  server_address, server_port = set_server_address_and_port(instance, "api.reka.ai", 443)
@@ -56,142 +29,31 @@ def async_chat(version, environment, application_name,
56
29
 
57
30
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
58
31
 
59
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
32
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
60
33
  start_time = time.time()
61
34
  response = await wrapped(*args, **kwargs)
62
- end_time = time.time()
63
35
 
64
36
  try:
65
- # Format 'messages' into a single string
66
- message_prompt = kwargs.get("messages", "")
67
- formatted_messages = []
68
- for message in message_prompt:
69
- role = message["role"]
70
- content = message["content"]
71
-
72
- if isinstance(content, list):
73
- content_str = ", ".join(
74
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
75
- if "type" in item else f'text: {item["text"]}'
76
- for item in content
77
- )
78
- formatted_messages.append(f"{role}: {content_str}")
79
- else:
80
- formatted_messages.append(f"{role}: {content}")
81
- prompt = "\n".join(formatted_messages)
82
-
83
- input_tokens = response.usage.input_tokens
84
- output_tokens = response.usage.output_tokens
85
-
86
- # Calculate cost of the operation
87
- cost = get_chat_model_cost(request_model,
88
- pricing_info, input_tokens, output_tokens)
89
-
90
- # Set Span attributes (OTel Semconv)
91
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
92
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
93
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
94
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
95
- SemanticConvention.GEN_AI_SYSTEM_REKAAI)
96
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
97
- request_model)
98
- span.set_attribute(SemanticConvention.SERVER_PORT,
99
- server_port)
100
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
101
- kwargs.get("seed", ""))
102
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
103
- kwargs.get("max_tokens", -1))
104
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
105
- kwargs.get("stop", []))
106
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
107
- kwargs.get("presence_penalty", 0.0))
108
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
109
- kwargs.get("temperature", 0.4))
110
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
111
- kwargs.get("top_k", 1.0))
112
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
113
- kwargs.get("top_p", 1.0))
114
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
115
- [response.responses[0].finish_reason])
116
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
117
- response.id)
118
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
119
- response.model)
120
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
121
- input_tokens)
122
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
123
- output_tokens)
124
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
125
- server_address)
126
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
127
- 'text')
128
-
129
- # Set Span attributes (Extra)
130
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
131
- environment)
132
- span.set_attribute(SERVICE_NAME,
133
- application_name)
134
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
135
- False)
136
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
137
- input_tokens + output_tokens)
138
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
139
- cost)
140
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
141
- end_time - start_time)
142
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
143
- version)
144
-
145
- if capture_message_content:
146
- span.add_event(
147
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
148
- attributes={
149
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
150
- },
151
- )
152
- span.add_event(
153
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
154
- attributes={
155
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.responses[0].message.content,
156
- },
157
- )
158
-
159
- span.set_status(Status(StatusCode.OK))
160
-
161
- if disable_metrics is False:
162
- attributes = create_metrics_attributes(
163
- service_name=application_name,
164
- deployment_environment=environment,
165
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
166
- system=SemanticConvention.GEN_AI_SYSTEM_REKAAI,
167
- request_model=request_model,
168
- server_address=server_address,
169
- server_port=server_port,
170
- response_model=response.model,
171
- )
172
-
173
- metrics["genai_client_usage_tokens"].record(
174
- input_tokens + output_tokens, attributes
175
- )
176
- metrics["genai_client_operation_duration"].record(
177
- end_time - start_time, attributes
178
- )
179
- metrics["genai_server_ttft"].record(
180
- end_time - start_time, attributes
181
- )
182
- metrics["genai_requests"].add(1, attributes)
183
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
185
- metrics["genai_cost"].record(cost, attributes)
186
-
187
- # Return original response
188
- return response
37
+ response = process_chat_response(
38
+ response=response,
39
+ request_model=request_model,
40
+ pricing_info=pricing_info,
41
+ server_port=server_port,
42
+ server_address=server_address,
43
+ environment=environment,
44
+ application_name=application_name,
45
+ metrics=metrics,
46
+ start_time=start_time,
47
+ span=span,
48
+ capture_message_content=capture_message_content,
49
+ disable_metrics=disable_metrics,
50
+ version=version,
51
+ **kwargs
52
+ )
189
53
 
190
54
  except Exception as e:
191
55
  handle_exception(span, e)
192
- logger.error("Error in trace creation: %s", e)
193
56
 
194
- # Return original response
195
- return response
57
+ return response
196
58
 
197
59
  return wrapper