openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +83 -0
- openlit/__init__.py +1 -1
- openlit/instrumentation/ag2/ag2.py +2 -2
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +4 -4
- openlit/instrumentation/astra/async_astra.py +4 -4
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +4 -4
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +2 -2
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
- openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
- openlit/instrumentation/crewai/crewai.py +2 -2
- openlit/instrumentation/dynamiq/dynamiq.py +2 -2
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
- openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
- openlit/instrumentation/embedchain/embedchain.py +4 -4
- openlit/instrumentation/firecrawl/firecrawl.py +2 -2
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/gpt4all.py +17 -17
- openlit/instrumentation/groq/async_groq.py +14 -14
- openlit/instrumentation/groq/groq.py +14 -14
- openlit/instrumentation/haystack/haystack.py +2 -2
- openlit/instrumentation/julep/async_julep.py +2 -2
- openlit/instrumentation/julep/julep.py +2 -2
- openlit/instrumentation/langchain/langchain.py +36 -31
- openlit/instrumentation/letta/letta.py +6 -6
- openlit/instrumentation/litellm/async_litellm.py +20 -20
- openlit/instrumentation/litellm/litellm.py +20 -20
- openlit/instrumentation/llamaindex/llamaindex.py +2 -2
- openlit/instrumentation/mem0/mem0.py +2 -2
- openlit/instrumentation/milvus/milvus.py +4 -4
- openlit/instrumentation/mistral/async_mistral.py +18 -18
- openlit/instrumentation/mistral/mistral.py +18 -18
- openlit/instrumentation/multion/async_multion.py +2 -2
- openlit/instrumentation/multion/multion.py +2 -2
- openlit/instrumentation/ollama/async_ollama.py +29 -29
- openlit/instrumentation/ollama/ollama.py +29 -29
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +434 -409
- openlit/instrumentation/openai/openai.py +415 -393
- openlit/instrumentation/phidata/phidata.py +2 -2
- openlit/instrumentation/pinecone/pinecone.py +4 -4
- openlit/instrumentation/premai/premai.py +20 -20
- openlit/instrumentation/qdrant/async_qdrant.py +4 -4
- openlit/instrumentation/qdrant/qdrant.py +4 -4
- openlit/instrumentation/reka/async_reka.py +6 -6
- openlit/instrumentation/reka/reka.py +6 -6
- openlit/instrumentation/together/async_together.py +18 -18
- openlit/instrumentation/together/together.py +18 -18
- openlit/instrumentation/transformers/transformers.py +6 -6
- openlit/instrumentation/vertexai/async_vertexai.py +53 -53
- openlit/instrumentation/vertexai/vertexai.py +53 -53
- openlit/instrumentation/vllm/vllm.py +6 -6
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
- openlit-1.33.9.dist-info/RECORD +121 -0
- {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.7.dist-info/RECORD +0 -122
- {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,15 +1,19 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes, inconsistent-return-statements
|
2
1
|
"""
|
3
2
|
Module for monitoring AI21 calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
handle_exception,
|
12
12
|
response_as_dict,
|
13
|
+
calculate_ttft,
|
14
|
+
calculate_tbt,
|
15
|
+
create_metrics_attributes,
|
16
|
+
set_server_address_and_port,
|
13
17
|
general_tokens
|
14
18
|
)
|
15
19
|
from openlit.semcov import SemanticConvetion
|
@@ -17,13 +21,12 @@ from openlit.semcov import SemanticConvetion
|
|
17
21
|
# Initialize logger for logging potential issues and operations
|
18
22
|
logger = logging.getLogger(__name__)
|
19
23
|
|
20
|
-
def chat(
|
24
|
+
def chat(version, environment, application_name,
|
21
25
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
22
26
|
"""
|
23
27
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
24
28
|
|
25
29
|
Args:
|
26
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
27
30
|
version: Version of the monitoring package.
|
28
31
|
environment: Deployment environment (e.g., production, staging).
|
29
32
|
application_name: Name of the application using the AI21 SDK.
|
@@ -38,6 +41,7 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
38
41
|
class TracedSyncStream:
|
39
42
|
"""
|
40
43
|
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
+
Wraps the 'ai21.AsyncStream' response to collect message IDs and aggregated response.
|
41
45
|
|
42
46
|
This class implements the '__aiter__' and '__anext__' methods that
|
43
47
|
handle asynchronous streaming responses.
|
@@ -50,6 +54,8 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
50
54
|
wrapped,
|
51
55
|
span,
|
52
56
|
kwargs,
|
57
|
+
server_address,
|
58
|
+
server_port,
|
53
59
|
**args,
|
54
60
|
):
|
55
61
|
self.__wrapped__ = wrapped
|
@@ -57,11 +63,19 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
57
63
|
# Placeholder for aggregating streaming response
|
58
64
|
self._llmresponse = ""
|
59
65
|
self._response_id = ""
|
60
|
-
self.
|
61
|
-
self.
|
66
|
+
self._finish_reason = ""
|
67
|
+
self._input_tokens = 0
|
68
|
+
self._output_tokens = 0
|
62
69
|
|
63
70
|
self._args = args
|
64
71
|
self._kwargs = kwargs
|
72
|
+
self._start_time = time.time()
|
73
|
+
self._end_time = None
|
74
|
+
self._timestamps = []
|
75
|
+
self._ttft = 0
|
76
|
+
self._tbt = 0
|
77
|
+
self._server_address = server_address
|
78
|
+
self._server_port = server_port
|
65
79
|
|
66
80
|
def __enter__(self):
|
67
81
|
self.__wrapped__.__enter__()
|
@@ -80,8 +94,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
80
94
|
def __next__(self):
|
81
95
|
try:
|
82
96
|
chunk = self.__wrapped__.__next__()
|
97
|
+
end_time = time.time()
|
98
|
+
# Record the timestamp for the current chunk
|
99
|
+
self._timestamps.append(end_time)
|
100
|
+
|
101
|
+
if len(self._timestamps) == 1:
|
102
|
+
# Calculate time to first chunk
|
103
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
104
|
+
|
83
105
|
chunked = response_as_dict(chunk)
|
84
|
-
# Collect message IDs and aggregated response from events
|
85
106
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
86
107
|
'content' in chunked.get('choices')[0].get('delta'))):
|
87
108
|
|
@@ -90,14 +111,19 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
90
111
|
self._llmresponse += content
|
91
112
|
|
92
113
|
if chunked.get('usage'):
|
93
|
-
self.
|
94
|
-
self.
|
114
|
+
self._input_tokens = chunked.get('usage').get("prompt_tokens")
|
115
|
+
self._output_tokens = chunked.get('usage').get("completion_tokens")
|
95
116
|
|
96
117
|
self._response_id = chunked.get('id')
|
118
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
97
119
|
return chunk
|
98
120
|
except StopIteration:
|
99
121
|
# Handling exception ensure observability without disrupting operation
|
100
122
|
try:
|
123
|
+
self._end_time = time.time()
|
124
|
+
if len(self._timestamps) > 1:
|
125
|
+
self._tbt = calculate_tbt(self._timestamps)
|
126
|
+
|
101
127
|
# Format 'messages' into a single string
|
102
128
|
message_prompt = self._kwargs.get("messages", "")
|
103
129
|
formatted_messages = []
|
@@ -107,7 +133,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
107
133
|
|
108
134
|
if isinstance(content, list):
|
109
135
|
content_str = ", ".join(
|
110
|
-
# pylint: disable=line-too-long
|
111
136
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
112
137
|
if "type" in item else f'text: {item["text"]}'
|
113
138
|
for item in content
|
@@ -117,43 +142,74 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
117
142
|
formatted_messages.append(f"{role}: {content}")
|
118
143
|
prompt = "\n".join(formatted_messages)
|
119
144
|
|
145
|
+
request_model = self._kwargs.get("model", "jamba-1.5-mini")
|
146
|
+
|
120
147
|
# Calculate cost of the operation
|
121
|
-
cost = get_chat_model_cost(
|
122
|
-
pricing_info, self.
|
123
|
-
self.
|
148
|
+
cost = get_chat_model_cost(request_model,
|
149
|
+
pricing_info, self._input_tokens,
|
150
|
+
self._output_tokens)
|
124
151
|
|
125
|
-
# Set Span attributes
|
152
|
+
# Set Span attributes (OTel Semconv)
|
126
153
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
154
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
155
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
127
156
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
128
157
|
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
129
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
130
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
131
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
132
|
-
gen_ai_endpoint)
|
133
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
134
|
-
self._response_id)
|
135
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
136
|
-
environment)
|
137
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
138
|
-
application_name)
|
139
158
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
140
|
-
|
141
|
-
self._span.set_attribute(SemanticConvetion.
|
142
|
-
self._kwargs.get("
|
159
|
+
request_model)
|
160
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
161
|
+
self._kwargs.get("seed", ""))
|
162
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
163
|
+
self._server_port)
|
164
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
165
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
143
166
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
144
167
|
self._kwargs.get("max_tokens", -1))
|
168
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
169
|
+
self._kwargs.get("presence_penalty", 0.0))
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
171
|
+
self._kwargs.get("stop", []))
|
145
172
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
146
|
-
self._kwargs.get("temperature",
|
173
|
+
self._kwargs.get("temperature", 0.4))
|
174
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
175
|
+
self._kwargs.get("top_p", 1.0))
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
177
|
+
[self._finish_reason])
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
179
|
+
self._response_id)
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
181
|
+
request_model)
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
183
|
+
self._input_tokens)
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
185
|
+
self._output_tokens)
|
186
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
187
|
+
self._server_address)
|
188
|
+
|
189
|
+
if isinstance(self._llmresponse, str):
|
190
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
191
|
+
"text")
|
192
|
+
else:
|
193
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
194
|
+
"json")
|
195
|
+
|
196
|
+
# Set Span attributes (Extra)
|
197
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
198
|
+
environment)
|
199
|
+
self._span.set_attribute(SERVICE_NAME,
|
200
|
+
application_name)
|
147
201
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
148
202
|
True)
|
149
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
150
|
-
self._prompt_tokens)
|
151
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
152
|
-
self._completion_tokens)
|
153
203
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
154
|
-
self.
|
204
|
+
self._input_tokens + self._output_tokens)
|
155
205
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
156
206
|
cost)
|
207
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
208
|
+
self._tbt)
|
209
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
210
|
+
self._ttft)
|
211
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
212
|
+
version)
|
157
213
|
if trace_content:
|
158
214
|
self._span.add_event(
|
159
215
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -167,31 +223,35 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
167
223
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
168
224
|
},
|
169
225
|
)
|
170
|
-
|
171
226
|
self._span.set_status(Status(StatusCode.OK))
|
172
227
|
|
173
228
|
if disable_metrics is False:
|
174
|
-
attributes =
|
175
|
-
|
176
|
-
|
177
|
-
SemanticConvetion.
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
185
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
186
|
-
self._kwargs.get("model", "jamba-1.5-mini")
|
187
|
-
}
|
229
|
+
attributes = create_metrics_attributes(
|
230
|
+
service_name=application_name,
|
231
|
+
deployment_environment=environment,
|
232
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
233
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
234
|
+
request_model=request_model,
|
235
|
+
server_address=self._server_address,
|
236
|
+
server_port=self._server_port,
|
237
|
+
response_model=request_model,
|
238
|
+
)
|
188
239
|
|
189
|
-
metrics["
|
190
|
-
|
191
|
-
|
240
|
+
metrics["genai_client_usage_tokens"].record(
|
241
|
+
self._input_tokens + self._output_tokens, attributes
|
242
|
+
)
|
243
|
+
metrics["genai_client_operation_duration"].record(
|
244
|
+
self._end_time - self._start_time, attributes
|
192
245
|
)
|
193
|
-
metrics["
|
194
|
-
|
246
|
+
metrics["genai_server_tbt"].record(
|
247
|
+
self._tbt, attributes
|
248
|
+
)
|
249
|
+
metrics["genai_server_ttft"].record(
|
250
|
+
self._ttft, attributes
|
251
|
+
)
|
252
|
+
metrics["genai_requests"].add(1, attributes)
|
253
|
+
metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
|
254
|
+
metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
|
195
255
|
metrics["genai_cost"].record(cost, attributes)
|
196
256
|
|
197
257
|
except Exception as e:
|
@@ -220,20 +280,25 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
220
280
|
|
221
281
|
# Check if streaming is enabled for the API call
|
222
282
|
streaming = kwargs.get("stream", False)
|
283
|
+
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
284
|
+
request_model = kwargs.get("model", "jamba-1.5-mini")
|
285
|
+
|
286
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
223
287
|
|
224
288
|
# pylint: disable=no-else-return
|
225
289
|
if streaming:
|
226
290
|
# Special handling for streaming response to accommodate the nature of data flow
|
227
291
|
awaited_wrapped = wrapped(*args, **kwargs)
|
228
|
-
span = tracer.start_span(
|
292
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
229
293
|
|
230
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs)
|
294
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
231
295
|
|
232
296
|
# Handling for non-streaming responses
|
233
297
|
else:
|
234
|
-
|
235
|
-
|
298
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
299
|
+
start_time = time.time()
|
236
300
|
response = wrapped(*args, **kwargs)
|
301
|
+
end_time = time.time()
|
237
302
|
|
238
303
|
response_dict = response_as_dict(response)
|
239
304
|
|
@@ -247,7 +312,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
247
312
|
|
248
313
|
if isinstance(content, list):
|
249
314
|
content_str = ", ".join(
|
250
|
-
# pylint: disable=line-too-long
|
251
315
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
252
316
|
if "type" in item else f'text: {item["text"]}'
|
253
317
|
for item in content
|
@@ -257,30 +321,64 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
257
321
|
formatted_messages.append(f"{role}: {content}")
|
258
322
|
prompt = "\n".join(formatted_messages)
|
259
323
|
|
260
|
-
|
324
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
325
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
326
|
+
|
327
|
+
# Calculate cost of the operation
|
328
|
+
cost = get_chat_model_cost(request_model,
|
329
|
+
pricing_info, input_tokens,
|
330
|
+
output_tokens)
|
331
|
+
|
332
|
+
# Set base span attribues (OTel Semconv)
|
261
333
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
334
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
335
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
262
336
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
263
337
|
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
264
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
265
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
266
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
267
|
-
gen_ai_endpoint)
|
268
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
269
|
-
response_dict.get("id"))
|
270
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
271
|
-
environment)
|
272
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
273
|
-
application_name)
|
274
338
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
275
|
-
|
276
|
-
span.set_attribute(SemanticConvetion.
|
277
|
-
kwargs.get("
|
339
|
+
request_model)
|
340
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
341
|
+
kwargs.get("seed", ""))
|
342
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
343
|
+
server_port)
|
344
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
345
|
+
kwargs.get("frequency_penalty", 0.0))
|
278
346
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
279
347
|
kwargs.get("max_tokens", -1))
|
348
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
349
|
+
kwargs.get("presence_penalty", 0.0))
|
350
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
351
|
+
kwargs.get("stop", []))
|
280
352
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
281
|
-
kwargs.get("temperature",
|
353
|
+
kwargs.get("temperature", 0.4))
|
354
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
355
|
+
kwargs.get("top_p", 1.0))
|
356
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
357
|
+
response_dict.get("id"))
|
358
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
359
|
+
request_model)
|
360
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
361
|
+
input_tokens)
|
362
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
363
|
+
output_tokens)
|
364
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
365
|
+
server_address)
|
366
|
+
|
367
|
+
# Set base span attribues (Extras)
|
368
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
369
|
+
environment)
|
370
|
+
span.set_attribute(SERVICE_NAME,
|
371
|
+
application_name)
|
282
372
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
283
373
|
False)
|
374
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
375
|
+
input_tokens + output_tokens)
|
376
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
377
|
+
cost)
|
378
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
379
|
+
end_time - start_time)
|
380
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
381
|
+
version)
|
284
382
|
if trace_content:
|
285
383
|
span.add_event(
|
286
384
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -289,93 +387,54 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
289
387
|
},
|
290
388
|
)
|
291
389
|
|
292
|
-
|
293
|
-
if "tools" not in kwargs:
|
294
|
-
# Calculate cost of the operation
|
295
|
-
cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
|
296
|
-
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
297
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
298
|
-
|
299
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
300
|
-
response_dict.get('usage', {}).get('prompt_tokens', None))
|
301
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
302
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
303
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
304
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
390
|
+
for i in range(kwargs.get('n',1)):
|
305
391
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
306
|
-
[response_dict.get('choices'
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
span.add_event(
|
326
|
-
name=attribute_name,
|
327
|
-
attributes={
|
328
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
329
|
-
},
|
330
|
-
)
|
331
|
-
i += 1
|
332
|
-
|
333
|
-
# Return original response
|
334
|
-
return response
|
335
|
-
|
336
|
-
# Set span attributes when tools is passed to the function call
|
337
|
-
elif "tools" in kwargs:
|
338
|
-
# Calculate cost of the operation
|
339
|
-
cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
|
340
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
341
|
-
response_dict.get('usage').get('completion_tokens'))
|
342
|
-
span.add_event(
|
343
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
344
|
-
attributes={
|
345
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
|
346
|
-
},
|
347
|
-
)
|
348
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
349
|
-
response_dict.get('usage').get('prompt_tokens'))
|
350
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
351
|
-
response_dict.get('usage').get('completion_tokens'))
|
352
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
353
|
-
response_dict.get('usage').get('total_tokens'))
|
354
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
355
|
-
cost)
|
392
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
393
|
+
if trace_content:
|
394
|
+
span.add_event(
|
395
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
396
|
+
attributes={
|
397
|
+
# pylint: disable=line-too-long
|
398
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
399
|
+
},
|
400
|
+
)
|
401
|
+
if kwargs.get('tools'):
|
402
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
403
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
404
|
+
|
405
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
406
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
407
|
+
"text")
|
408
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
409
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
410
|
+
"json")
|
356
411
|
|
357
412
|
span.set_status(Status(StatusCode.OK))
|
358
413
|
|
359
414
|
if disable_metrics is False:
|
360
|
-
attributes =
|
361
|
-
|
362
|
-
|
363
|
-
SemanticConvetion.
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
371
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
372
|
-
kwargs.get("model", "jamba-1.5-mini")
|
373
|
-
}
|
415
|
+
attributes = create_metrics_attributes(
|
416
|
+
service_name=application_name,
|
417
|
+
deployment_environment=environment,
|
418
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
419
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
420
|
+
request_model=request_model,
|
421
|
+
server_address=server_address,
|
422
|
+
server_port=server_port,
|
423
|
+
response_model=request_model,
|
424
|
+
)
|
374
425
|
|
426
|
+
metrics["genai_client_usage_tokens"].record(
|
427
|
+
input_tokens + output_tokens, attributes
|
428
|
+
)
|
429
|
+
metrics["genai_client_operation_duration"].record(
|
430
|
+
end_time - start_time, attributes
|
431
|
+
)
|
432
|
+
metrics["genai_server_ttft"].record(
|
433
|
+
end_time - start_time, attributes
|
434
|
+
)
|
375
435
|
metrics["genai_requests"].add(1, attributes)
|
376
|
-
metrics["
|
377
|
-
metrics["
|
378
|
-
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
436
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
437
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
379
438
|
metrics["genai_cost"].record(cost, attributes)
|
380
439
|
|
381
440
|
# Return original response
|
@@ -390,13 +449,12 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
390
449
|
|
391
450
|
return wrapper
|
392
451
|
|
393
|
-
def chat_rag(
|
452
|
+
def chat_rag(version, environment, application_name,
|
394
453
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
395
454
|
"""
|
396
455
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
397
456
|
|
398
457
|
Args:
|
399
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
400
458
|
version: Version of the monitoring package.
|
401
459
|
environment: Deployment environment (e.g., production, staging).
|
402
460
|
application_name: Name of the application using the AI21 SDK.
|
@@ -425,180 +483,173 @@ def chat_rag(gen_ai_endpoint, version, environment, application_name,
|
|
425
483
|
The response from the original 'chat.completions' method.
|
426
484
|
"""
|
427
485
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
# pylint: disable=no-else-return
|
432
|
-
if streaming:
|
433
|
-
# # Special handling for streaming response to accommodate the nature of data flow
|
434
|
-
# awaited_wrapped = wrapped(*args, **kwargs)
|
435
|
-
# span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
|
436
|
-
|
437
|
-
# return TracedSyncStream(awaited_wrapped, span, kwargs)
|
438
|
-
|
439
|
-
return
|
440
|
-
|
441
|
-
# Handling for non-streaming responses
|
442
|
-
else:
|
443
|
-
# pylint: disable=line-too-long
|
444
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
445
|
-
response = wrapped(*args, **kwargs)
|
446
|
-
|
447
|
-
response_dict = response_as_dict(response)
|
486
|
+
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
487
|
+
request_model = kwargs.get("model", "jamba-1.5-mini")
|
448
488
|
|
449
|
-
|
450
|
-
# Format 'messages' into a single string
|
451
|
-
message_prompt = kwargs.get("messages", "")
|
452
|
-
formatted_messages = []
|
453
|
-
for message in message_prompt:
|
454
|
-
role = message.role
|
455
|
-
content = message.content
|
489
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
456
490
|
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
if "type" in item else f'text: {item["text"]}'
|
462
|
-
for item in content
|
463
|
-
)
|
464
|
-
formatted_messages.append(f"{role}: {content_str}")
|
465
|
-
else:
|
466
|
-
formatted_messages.append(f"{role}: {content}")
|
467
|
-
prompt = "\n".join(formatted_messages)
|
491
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
492
|
+
start_time = time.time()
|
493
|
+
response = wrapped(*args, **kwargs)
|
494
|
+
end_time = time.time()
|
468
495
|
|
469
|
-
|
470
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
471
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
472
|
-
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
473
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
474
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
475
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
476
|
-
gen_ai_endpoint)
|
477
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
478
|
-
response_dict.get("id"))
|
479
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
480
|
-
environment)
|
481
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
482
|
-
application_name)
|
483
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
484
|
-
kwargs.get("model", "jamba-1.5-mini"))
|
485
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
486
|
-
False)
|
487
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
|
488
|
-
kwargs.get("max_segments", -1))
|
489
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
|
490
|
-
kwargs.get("retrieval_strategy", "segments"))
|
491
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
|
492
|
-
kwargs.get("retrieval_similarity_threshold", -1))
|
493
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
|
494
|
-
kwargs.get("max_neighbors", -1))
|
495
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
|
496
|
-
str(kwargs.get("file_ids", "")))
|
497
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
|
498
|
-
kwargs.get("path", ""))
|
496
|
+
response_dict = response_as_dict(response)
|
499
497
|
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
498
|
+
try:
|
499
|
+
# Format 'messages' into a single string
|
500
|
+
message_prompt = kwargs.get("messages", "")
|
501
|
+
formatted_messages = []
|
502
|
+
for message in message_prompt:
|
503
|
+
role = message.role
|
504
|
+
content = message.content
|
505
|
+
|
506
|
+
if isinstance(content, list):
|
507
|
+
content_str = ", ".join(
|
508
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
509
|
+
if "type" in item else f'text: {item["text"]}'
|
510
|
+
for item in content
|
506
511
|
)
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
512
|
+
formatted_messages.append(f"{role}: {content_str}")
|
513
|
+
else:
|
514
|
+
formatted_messages.append(f"{role}: {content}")
|
515
|
+
prompt = "\n".join(formatted_messages)
|
516
|
+
|
517
|
+
input_tokens = general_tokens(prompt)
|
518
|
+
|
519
|
+
# Set base span attribues (OTel Semconv)
|
520
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
521
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
522
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
523
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
524
|
+
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
525
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
526
|
+
request_model)
|
527
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
528
|
+
kwargs.get("seed", ""))
|
529
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
530
|
+
server_port)
|
531
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
532
|
+
kwargs.get("frequency_penalty", 0.0))
|
533
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
534
|
+
kwargs.get("max_tokens", -1))
|
535
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
536
|
+
kwargs.get("presence_penalty", 0.0))
|
537
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
538
|
+
kwargs.get("stop", []))
|
539
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
540
|
+
kwargs.get("temperature", 0.4))
|
541
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
542
|
+
kwargs.get("top_p", 1.0))
|
543
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
544
|
+
response_dict.get("id"))
|
545
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
546
|
+
request_model)
|
547
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
548
|
+
input_tokens)
|
549
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
550
|
+
server_address)
|
551
|
+
|
552
|
+
# Set base span attribues (Extras)
|
553
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
554
|
+
environment)
|
555
|
+
span.set_attribute(SERVICE_NAME,
|
556
|
+
application_name)
|
557
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
558
|
+
False)
|
559
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
560
|
+
end_time - start_time)
|
561
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
562
|
+
version)
|
563
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
|
564
|
+
kwargs.get("max_segments", -1))
|
565
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
|
566
|
+
kwargs.get("retrieval_strategy", "segments"))
|
567
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
|
568
|
+
kwargs.get("retrieval_similarity_threshold", -1))
|
569
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
|
570
|
+
kwargs.get("max_neighbors", -1))
|
571
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
|
572
|
+
str(kwargs.get("file_ids", "")))
|
573
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
|
574
|
+
kwargs.get("path", ""))
|
575
|
+
if trace_content:
|
576
|
+
span.add_event(
|
577
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
578
|
+
attributes={
|
579
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
580
|
+
},
|
581
|
+
)
|
582
|
+
|
583
|
+
output_tokens = 0
|
584
|
+
for i in range(kwargs.get('n',1)):
|
585
|
+
output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
|
524
586
|
|
525
|
-
|
526
|
-
else:
|
527
|
-
i = 0
|
528
|
-
completion_tokens = 0
|
529
|
-
while i < kwargs["n"] and trace_content is True:
|
530
|
-
completion_tokens += general_tokens(response_dict.get('choices')[i].get("message").get("content"))
|
531
|
-
attribute_name = f"gen_ai.content.completion.{i}"
|
532
|
-
span.add_event(
|
533
|
-
name=attribute_name,
|
534
|
-
attributes={
|
535
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
536
|
-
},
|
537
|
-
)
|
538
|
-
i += 1
|
539
|
-
|
540
|
-
# Return original response
|
541
|
-
return response
|
542
|
-
|
543
|
-
# Set span attributes when tools is passed to the function call
|
544
|
-
elif "tools" in kwargs:
|
545
|
-
completion_tokens = -1
|
546
|
-
# Calculate cost of the operation
|
547
|
-
cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
|
548
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
549
|
-
response_dict.get('usage').get('completion_tokens'))
|
587
|
+
if trace_content:
|
550
588
|
span.add_event(
|
551
589
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
552
590
|
attributes={
|
553
|
-
|
591
|
+
# pylint: disable=line-too-long
|
592
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
|
554
593
|
},
|
555
594
|
)
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
595
|
+
if kwargs.get('tools'):
|
596
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
597
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
598
|
+
|
599
|
+
if isinstance(response_dict.get('choices')[i].get('content'), str):
|
600
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
601
|
+
"text")
|
602
|
+
elif response_dict.get('choices')[i].get('content') is not None:
|
603
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
604
|
+
"json")
|
605
|
+
|
606
|
+
# Calculate cost of the operation
|
607
|
+
cost = get_chat_model_cost(request_model,
|
608
|
+
pricing_info, input_tokens,
|
609
|
+
output_tokens)
|
610
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
611
|
+
cost)
|
612
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
613
|
+
output_tokens)
|
614
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
615
|
+
input_tokens + output_tokens)
|
616
|
+
|
617
|
+
span.set_status(Status(StatusCode.OK))
|
618
|
+
|
619
|
+
if disable_metrics is False:
|
620
|
+
attributes = create_metrics_attributes(
|
621
|
+
service_name=application_name,
|
622
|
+
deployment_environment=environment,
|
623
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
624
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
625
|
+
request_model=request_model,
|
626
|
+
server_address=server_address,
|
627
|
+
server_port=server_port,
|
628
|
+
response_model=request_model,
|
629
|
+
)
|
630
|
+
|
631
|
+
metrics["genai_client_usage_tokens"].record(
|
632
|
+
input_tokens + output_tokens, attributes
|
633
|
+
)
|
634
|
+
metrics["genai_client_operation_duration"].record(
|
635
|
+
end_time - start_time, attributes
|
636
|
+
)
|
637
|
+
metrics["genai_server_ttft"].record(
|
638
|
+
end_time - start_time, attributes
|
639
|
+
)
|
640
|
+
metrics["genai_requests"].add(1, attributes)
|
641
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
642
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
643
|
+
metrics["genai_cost"].record(cost, attributes)
|
644
|
+
|
645
|
+
# Return original response
|
646
|
+
return response
|
647
|
+
|
648
|
+
except Exception as e:
|
649
|
+
handle_exception(span, e)
|
650
|
+
logger.error("Error in trace creation: %s", e)
|
651
|
+
|
652
|
+
# Return original response
|
653
|
+
return response
|
603
654
|
|
604
655
|
return wrapper
|