openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +83 -0
- openlit/__init__.py +1 -1
- openlit/instrumentation/ag2/ag2.py +2 -2
- openlit/instrumentation/ai21/__init__.py +4 -4
- openlit/instrumentation/ai21/ai21.py +370 -319
- openlit/instrumentation/ai21/async_ai21.py +371 -319
- openlit/instrumentation/anthropic/__init__.py +4 -4
- openlit/instrumentation/anthropic/anthropic.py +321 -189
- openlit/instrumentation/anthropic/async_anthropic.py +323 -190
- openlit/instrumentation/assemblyai/__init__.py +1 -1
- openlit/instrumentation/assemblyai/assemblyai.py +59 -43
- openlit/instrumentation/astra/astra.py +4 -4
- openlit/instrumentation/astra/async_astra.py +4 -4
- openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
- openlit/instrumentation/bedrock/__init__.py +1 -1
- openlit/instrumentation/bedrock/bedrock.py +115 -58
- openlit/instrumentation/chroma/chroma.py +4 -4
- openlit/instrumentation/cohere/__init__.py +33 -10
- openlit/instrumentation/cohere/async_cohere.py +610 -0
- openlit/instrumentation/cohere/cohere.py +410 -219
- openlit/instrumentation/controlflow/controlflow.py +2 -2
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
- openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
- openlit/instrumentation/crewai/crewai.py +2 -2
- openlit/instrumentation/dynamiq/dynamiq.py +2 -2
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
- openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
- openlit/instrumentation/embedchain/embedchain.py +4 -4
- openlit/instrumentation/firecrawl/firecrawl.py +2 -2
- openlit/instrumentation/google_ai_studio/__init__.py +9 -9
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
- openlit/instrumentation/gpt4all/gpt4all.py +17 -17
- openlit/instrumentation/groq/async_groq.py +14 -14
- openlit/instrumentation/groq/groq.py +14 -14
- openlit/instrumentation/haystack/haystack.py +2 -2
- openlit/instrumentation/julep/async_julep.py +2 -2
- openlit/instrumentation/julep/julep.py +2 -2
- openlit/instrumentation/langchain/langchain.py +36 -31
- openlit/instrumentation/letta/letta.py +6 -6
- openlit/instrumentation/litellm/async_litellm.py +20 -20
- openlit/instrumentation/litellm/litellm.py +20 -20
- openlit/instrumentation/llamaindex/llamaindex.py +2 -2
- openlit/instrumentation/mem0/mem0.py +2 -2
- openlit/instrumentation/milvus/milvus.py +4 -4
- openlit/instrumentation/mistral/async_mistral.py +18 -18
- openlit/instrumentation/mistral/mistral.py +18 -18
- openlit/instrumentation/multion/async_multion.py +2 -2
- openlit/instrumentation/multion/multion.py +2 -2
- openlit/instrumentation/ollama/async_ollama.py +29 -29
- openlit/instrumentation/ollama/ollama.py +29 -29
- openlit/instrumentation/openai/__init__.py +11 -230
- openlit/instrumentation/openai/async_openai.py +434 -409
- openlit/instrumentation/openai/openai.py +415 -393
- openlit/instrumentation/phidata/phidata.py +2 -2
- openlit/instrumentation/pinecone/pinecone.py +4 -4
- openlit/instrumentation/premai/premai.py +20 -20
- openlit/instrumentation/qdrant/async_qdrant.py +4 -4
- openlit/instrumentation/qdrant/qdrant.py +4 -4
- openlit/instrumentation/reka/async_reka.py +6 -6
- openlit/instrumentation/reka/reka.py +6 -6
- openlit/instrumentation/together/async_together.py +18 -18
- openlit/instrumentation/together/together.py +18 -18
- openlit/instrumentation/transformers/transformers.py +6 -6
- openlit/instrumentation/vertexai/async_vertexai.py +53 -53
- openlit/instrumentation/vertexai/vertexai.py +53 -53
- openlit/instrumentation/vllm/vllm.py +6 -6
- openlit/otel/metrics.py +98 -7
- openlit/semcov/__init__.py +113 -80
- {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
- openlit-1.33.9.dist-info/RECORD +121 -0
- {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
- openlit/instrumentation/openai/async_azure_openai.py +0 -900
- openlit/instrumentation/openai/azure_openai.py +0 -898
- openlit-1.33.8.dist-info/RECORD +0 -122
- {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,15 +1,19 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes, inconsistent-return-statements
|
2
1
|
"""
|
3
2
|
Module for monitoring AI21 calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
handle_exception,
|
12
12
|
response_as_dict,
|
13
|
+
calculate_ttft,
|
14
|
+
calculate_tbt,
|
15
|
+
create_metrics_attributes,
|
16
|
+
set_server_address_and_port,
|
13
17
|
general_tokens
|
14
18
|
)
|
15
19
|
from openlit.semcov import SemanticConvetion
|
@@ -17,13 +21,12 @@ from openlit.semcov import SemanticConvetion
|
|
17
21
|
# Initialize logger for logging potential issues and operations
|
18
22
|
logger = logging.getLogger(__name__)
|
19
23
|
|
20
|
-
def async_chat(
|
24
|
+
def async_chat(version, environment, application_name,
|
21
25
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
22
26
|
"""
|
23
27
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
24
28
|
|
25
29
|
Args:
|
26
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
27
30
|
version: Version of the monitoring package.
|
28
31
|
environment: Deployment environment (e.g., production, staging).
|
29
32
|
application_name: Name of the application using the AI21 SDK.
|
@@ -38,6 +41,7 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
38
41
|
class TracedAsyncStream:
|
39
42
|
"""
|
40
43
|
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
+
Wraps the 'ai21.AsyncStream' response to collect message IDs and aggregated response.
|
41
45
|
|
42
46
|
This class implements the '__aiter__' and '__anext__' methods that
|
43
47
|
handle asynchronous streaming responses.
|
@@ -50,6 +54,8 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
50
54
|
wrapped,
|
51
55
|
span,
|
52
56
|
kwargs,
|
57
|
+
server_address,
|
58
|
+
server_port,
|
53
59
|
**args,
|
54
60
|
):
|
55
61
|
self.__wrapped__ = wrapped
|
@@ -57,11 +63,19 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
57
63
|
# Placeholder for aggregating streaming response
|
58
64
|
self._llmresponse = ""
|
59
65
|
self._response_id = ""
|
60
|
-
self.
|
61
|
-
self.
|
66
|
+
self._finish_reason = ""
|
67
|
+
self._input_tokens = 0
|
68
|
+
self._output_tokens = 0
|
62
69
|
|
63
70
|
self._args = args
|
64
71
|
self._kwargs = kwargs
|
72
|
+
self._start_time = time.time()
|
73
|
+
self._end_time = None
|
74
|
+
self._timestamps = []
|
75
|
+
self._ttft = 0
|
76
|
+
self._tbt = 0
|
77
|
+
self._server_address = server_address
|
78
|
+
self._server_port = server_port
|
65
79
|
|
66
80
|
async def __aenter__(self):
|
67
81
|
await self.__wrapped__.__aenter__()
|
@@ -80,8 +94,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
80
94
|
async def __anext__(self):
|
81
95
|
try:
|
82
96
|
chunk = await self.__wrapped__.__anext__()
|
97
|
+
end_time = time.time()
|
98
|
+
# Record the timestamp for the current chunk
|
99
|
+
self._timestamps.append(end_time)
|
100
|
+
|
101
|
+
if len(self._timestamps) == 1:
|
102
|
+
# Calculate time to first chunk
|
103
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
104
|
+
|
83
105
|
chunked = response_as_dict(chunk)
|
84
|
-
# Collect message IDs and aggregated response from events
|
85
106
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
86
107
|
'content' in chunked.get('choices')[0].get('delta'))):
|
87
108
|
|
@@ -90,14 +111,19 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
90
111
|
self._llmresponse += content
|
91
112
|
|
92
113
|
if chunked.get('usage'):
|
93
|
-
self.
|
94
|
-
self.
|
114
|
+
self._input_tokens = chunked.get('usage').get("prompt_tokens")
|
115
|
+
self._output_tokens = chunked.get('usage').get("completion_tokens")
|
95
116
|
|
96
117
|
self._response_id = chunked.get('id')
|
118
|
+
self._finish_reason = chunked.get('choices')[0].get('finish_reason')
|
97
119
|
return chunk
|
98
120
|
except StopAsyncIteration:
|
99
121
|
# Handling exception ensure observability without disrupting operation
|
100
122
|
try:
|
123
|
+
self._end_time = time.time()
|
124
|
+
if len(self._timestamps) > 1:
|
125
|
+
self._tbt = calculate_tbt(self._timestamps)
|
126
|
+
|
101
127
|
# Format 'messages' into a single string
|
102
128
|
message_prompt = self._kwargs.get("messages", "")
|
103
129
|
formatted_messages = []
|
@@ -107,7 +133,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
107
133
|
|
108
134
|
if isinstance(content, list):
|
109
135
|
content_str = ", ".join(
|
110
|
-
# pylint: disable=line-too-long
|
111
136
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
112
137
|
if "type" in item else f'text: {item["text"]}'
|
113
138
|
for item in content
|
@@ -117,43 +142,74 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
117
142
|
formatted_messages.append(f"{role}: {content}")
|
118
143
|
prompt = "\n".join(formatted_messages)
|
119
144
|
|
145
|
+
request_model = self._kwargs.get("model", "jamba-1.5-mini")
|
146
|
+
|
120
147
|
# Calculate cost of the operation
|
121
|
-
cost = get_chat_model_cost(
|
122
|
-
pricing_info, self.
|
123
|
-
self.
|
148
|
+
cost = get_chat_model_cost(request_model,
|
149
|
+
pricing_info, self._input_tokens,
|
150
|
+
self._output_tokens)
|
124
151
|
|
125
|
-
# Set Span attributes
|
152
|
+
# Set Span attributes (OTel Semconv)
|
126
153
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
154
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
155
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
127
156
|
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
128
157
|
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
129
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
130
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
131
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
132
|
-
gen_ai_endpoint)
|
133
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
134
|
-
self._response_id)
|
135
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
136
|
-
environment)
|
137
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
138
|
-
application_name)
|
139
158
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
140
|
-
|
141
|
-
self._span.set_attribute(SemanticConvetion.
|
142
|
-
self._kwargs.get("
|
159
|
+
request_model)
|
160
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
161
|
+
self._kwargs.get("seed", ""))
|
162
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
163
|
+
self._server_port)
|
164
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
165
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
143
166
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
144
167
|
self._kwargs.get("max_tokens", -1))
|
168
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
169
|
+
self._kwargs.get("presence_penalty", 0.0))
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
171
|
+
self._kwargs.get("stop", []))
|
145
172
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
146
|
-
self._kwargs.get("temperature",
|
173
|
+
self._kwargs.get("temperature", 0.4))
|
174
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
175
|
+
self._kwargs.get("top_p", 1.0))
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
177
|
+
[self._finish_reason])
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
179
|
+
self._response_id)
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
181
|
+
request_model)
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
183
|
+
self._input_tokens)
|
184
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
185
|
+
self._output_tokens)
|
186
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
187
|
+
self._server_address)
|
188
|
+
|
189
|
+
if isinstance(self._llmresponse, str):
|
190
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
191
|
+
"text")
|
192
|
+
else:
|
193
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
194
|
+
"json")
|
195
|
+
|
196
|
+
# Set Span attributes (Extra)
|
197
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
198
|
+
environment)
|
199
|
+
self._span.set_attribute(SERVICE_NAME,
|
200
|
+
application_name)
|
147
201
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
148
202
|
True)
|
149
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
150
|
-
self._prompt_tokens)
|
151
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
152
|
-
self._completion_tokens)
|
153
203
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
154
|
-
self.
|
204
|
+
self._input_tokens + self._output_tokens)
|
155
205
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
156
206
|
cost)
|
207
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
208
|
+
self._tbt)
|
209
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
210
|
+
self._ttft)
|
211
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
212
|
+
version)
|
157
213
|
if trace_content:
|
158
214
|
self._span.add_event(
|
159
215
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -167,31 +223,35 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
167
223
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
168
224
|
},
|
169
225
|
)
|
170
|
-
|
171
226
|
self._span.set_status(Status(StatusCode.OK))
|
172
227
|
|
173
228
|
if disable_metrics is False:
|
174
|
-
attributes =
|
175
|
-
|
176
|
-
|
177
|
-
SemanticConvetion.
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
185
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
186
|
-
self._kwargs.get("model", "jamba-1.5-mini")
|
187
|
-
}
|
229
|
+
attributes = create_metrics_attributes(
|
230
|
+
service_name=application_name,
|
231
|
+
deployment_environment=environment,
|
232
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
233
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
234
|
+
request_model=request_model,
|
235
|
+
server_address=self._server_address,
|
236
|
+
server_port=self._server_port,
|
237
|
+
response_model=request_model,
|
238
|
+
)
|
188
239
|
|
189
|
-
metrics["
|
190
|
-
|
191
|
-
|
240
|
+
metrics["genai_client_usage_tokens"].record(
|
241
|
+
self._input_tokens + self._output_tokens, attributes
|
242
|
+
)
|
243
|
+
metrics["genai_client_operation_duration"].record(
|
244
|
+
self._end_time - self._start_time, attributes
|
192
245
|
)
|
193
|
-
metrics["
|
194
|
-
|
246
|
+
metrics["genai_server_tbt"].record(
|
247
|
+
self._tbt, attributes
|
248
|
+
)
|
249
|
+
metrics["genai_server_ttft"].record(
|
250
|
+
self._ttft, attributes
|
251
|
+
)
|
252
|
+
metrics["genai_requests"].add(1, attributes)
|
253
|
+
metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
|
254
|
+
metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
|
195
255
|
metrics["genai_cost"].record(cost, attributes)
|
196
256
|
|
197
257
|
except Exception as e:
|
@@ -220,19 +280,25 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
220
280
|
|
221
281
|
# Check if streaming is enabled for the API call
|
222
282
|
streaming = kwargs.get("stream", False)
|
283
|
+
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
284
|
+
request_model = kwargs.get("model", "jamba-1.5-mini")
|
285
|
+
|
286
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
287
|
+
|
223
288
|
# pylint: disable=no-else-return
|
224
289
|
if streaming:
|
225
290
|
# Special handling for streaming response to accommodate the nature of data flow
|
226
291
|
awaited_wrapped = await wrapped(*args, **kwargs)
|
227
|
-
span = tracer.start_span(
|
292
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
228
293
|
|
229
|
-
return TracedAsyncStream(awaited_wrapped, span, kwargs)
|
294
|
+
return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
230
295
|
|
231
296
|
# Handling for non-streaming responses
|
232
297
|
else:
|
233
|
-
|
234
|
-
|
298
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
299
|
+
start_time = time.time()
|
235
300
|
response = await wrapped(*args, **kwargs)
|
301
|
+
end_time = time.time()
|
236
302
|
|
237
303
|
response_dict = response_as_dict(response)
|
238
304
|
|
@@ -246,7 +312,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
246
312
|
|
247
313
|
if isinstance(content, list):
|
248
314
|
content_str = ", ".join(
|
249
|
-
# pylint: disable=line-too-long
|
250
315
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
251
316
|
if "type" in item else f'text: {item["text"]}'
|
252
317
|
for item in content
|
@@ -256,30 +321,64 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
256
321
|
formatted_messages.append(f"{role}: {content}")
|
257
322
|
prompt = "\n".join(formatted_messages)
|
258
323
|
|
259
|
-
|
324
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
325
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
326
|
+
|
327
|
+
# Calculate cost of the operation
|
328
|
+
cost = get_chat_model_cost(request_model,
|
329
|
+
pricing_info, input_tokens,
|
330
|
+
output_tokens)
|
331
|
+
|
332
|
+
# Set base span attribues (OTel Semconv)
|
260
333
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
334
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
335
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
261
336
|
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
262
337
|
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
263
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
264
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
265
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
266
|
-
gen_ai_endpoint)
|
267
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
268
|
-
response_dict.get("id"))
|
269
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
270
|
-
environment)
|
271
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
272
|
-
application_name)
|
273
338
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
274
|
-
|
275
|
-
span.set_attribute(SemanticConvetion.
|
276
|
-
kwargs.get("
|
339
|
+
request_model)
|
340
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
341
|
+
kwargs.get("seed", ""))
|
342
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
343
|
+
server_port)
|
344
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
345
|
+
kwargs.get("frequency_penalty", 0.0))
|
277
346
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
278
347
|
kwargs.get("max_tokens", -1))
|
348
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
349
|
+
kwargs.get("presence_penalty", 0.0))
|
350
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
351
|
+
kwargs.get("stop", []))
|
279
352
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
280
|
-
kwargs.get("temperature",
|
353
|
+
kwargs.get("temperature", 0.4))
|
354
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
355
|
+
kwargs.get("top_p", 1.0))
|
356
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
357
|
+
response_dict.get("id"))
|
358
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
359
|
+
request_model)
|
360
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
361
|
+
input_tokens)
|
362
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
363
|
+
output_tokens)
|
364
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
365
|
+
server_address)
|
366
|
+
|
367
|
+
# Set base span attribues (Extras)
|
368
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
369
|
+
environment)
|
370
|
+
span.set_attribute(SERVICE_NAME,
|
371
|
+
application_name)
|
281
372
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
282
373
|
False)
|
374
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
375
|
+
input_tokens + output_tokens)
|
376
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
377
|
+
cost)
|
378
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
379
|
+
end_time - start_time)
|
380
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
381
|
+
version)
|
283
382
|
if trace_content:
|
284
383
|
span.add_event(
|
285
384
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -288,93 +387,54 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
288
387
|
},
|
289
388
|
)
|
290
389
|
|
291
|
-
|
292
|
-
if "tools" not in kwargs:
|
293
|
-
# Calculate cost of the operation
|
294
|
-
cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
|
295
|
-
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
296
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
297
|
-
|
298
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
299
|
-
response_dict.get('usage', {}).get('prompt_tokens', None))
|
300
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
301
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
302
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
303
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
390
|
+
for i in range(kwargs.get('n',1)):
|
304
391
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
305
|
-
[response_dict.get('choices'
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
span.add_event(
|
325
|
-
name=attribute_name,
|
326
|
-
attributes={
|
327
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
328
|
-
},
|
329
|
-
)
|
330
|
-
i += 1
|
331
|
-
|
332
|
-
# Return original response
|
333
|
-
return response
|
334
|
-
|
335
|
-
# Set span attributes when tools is passed to the function call
|
336
|
-
elif "tools" in kwargs:
|
337
|
-
# Calculate cost of the operation
|
338
|
-
cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
|
339
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
340
|
-
response_dict.get('usage').get('completion_tokens'))
|
341
|
-
span.add_event(
|
342
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
343
|
-
attributes={
|
344
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
|
345
|
-
},
|
346
|
-
)
|
347
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
348
|
-
response_dict.get('usage').get('prompt_tokens'))
|
349
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
350
|
-
response_dict.get('usage').get('completion_tokens'))
|
351
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
352
|
-
response_dict.get('usage').get('total_tokens'))
|
353
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
354
|
-
cost)
|
392
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
393
|
+
if trace_content:
|
394
|
+
span.add_event(
|
395
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
396
|
+
attributes={
|
397
|
+
# pylint: disable=line-too-long
|
398
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
399
|
+
},
|
400
|
+
)
|
401
|
+
if kwargs.get('tools'):
|
402
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
403
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
404
|
+
|
405
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
406
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
407
|
+
"text")
|
408
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
409
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
410
|
+
"json")
|
355
411
|
|
356
412
|
span.set_status(Status(StatusCode.OK))
|
357
413
|
|
358
414
|
if disable_metrics is False:
|
359
|
-
attributes =
|
360
|
-
|
361
|
-
|
362
|
-
SemanticConvetion.
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
370
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
371
|
-
kwargs.get("model", "jamba-1.5-mini")
|
372
|
-
}
|
415
|
+
attributes = create_metrics_attributes(
|
416
|
+
service_name=application_name,
|
417
|
+
deployment_environment=environment,
|
418
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
419
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
420
|
+
request_model=request_model,
|
421
|
+
server_address=server_address,
|
422
|
+
server_port=server_port,
|
423
|
+
response_model=request_model,
|
424
|
+
)
|
373
425
|
|
426
|
+
metrics["genai_client_usage_tokens"].record(
|
427
|
+
input_tokens + output_tokens, attributes
|
428
|
+
)
|
429
|
+
metrics["genai_client_operation_duration"].record(
|
430
|
+
end_time - start_time, attributes
|
431
|
+
)
|
432
|
+
metrics["genai_server_ttft"].record(
|
433
|
+
end_time - start_time, attributes
|
434
|
+
)
|
374
435
|
metrics["genai_requests"].add(1, attributes)
|
375
|
-
metrics["
|
376
|
-
metrics["
|
377
|
-
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
436
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
437
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
378
438
|
metrics["genai_cost"].record(cost, attributes)
|
379
439
|
|
380
440
|
# Return original response
|
@@ -389,13 +449,12 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
389
449
|
|
390
450
|
return wrapper
|
391
451
|
|
392
|
-
def async_chat_rag(
|
452
|
+
def async_chat_rag(version, environment, application_name,
|
393
453
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
394
454
|
"""
|
395
455
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
396
456
|
|
397
457
|
Args:
|
398
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
399
458
|
version: Version of the monitoring package.
|
400
459
|
environment: Deployment environment (e.g., production, staging).
|
401
460
|
application_name: Name of the application using the AI21 SDK.
|
@@ -424,180 +483,173 @@ def async_chat_rag(gen_ai_endpoint, version, environment, application_name,
|
|
424
483
|
The response from the original 'chat.completions' method.
|
425
484
|
"""
|
426
485
|
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
# pylint: disable=no-else-return
|
431
|
-
if streaming:
|
432
|
-
# # Special handling for streaming response to accommodate the nature of data flow
|
433
|
-
# awaited_wrapped = wrapped(*args, **kwargs)
|
434
|
-
# span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
|
435
|
-
|
436
|
-
# return TracedSyncStream(awaited_wrapped, span, kwargs)
|
486
|
+
server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
|
487
|
+
request_model = kwargs.get("model", "jamba-1.5-mini")
|
437
488
|
|
438
|
-
|
489
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
439
490
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
response = await wrapped(*args, **kwargs)
|
491
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
492
|
+
start_time = time.time()
|
493
|
+
response = await wrapped(*args, **kwargs)
|
494
|
+
end_time = time.time()
|
445
495
|
|
446
|
-
|
496
|
+
response_dict = response_as_dict(response)
|
447
497
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
for item in content
|
462
|
-
)
|
463
|
-
formatted_messages.append(f"{role}: {content_str}")
|
464
|
-
else:
|
465
|
-
formatted_messages.append(f"{role}: {content}")
|
466
|
-
prompt = "\n".join(formatted_messages)
|
467
|
-
|
468
|
-
# Set base span attribues
|
469
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
470
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
471
|
-
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
472
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
473
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
474
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
475
|
-
gen_ai_endpoint)
|
476
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
477
|
-
response_dict.get("id"))
|
478
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
479
|
-
environment)
|
480
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
481
|
-
application_name)
|
482
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
483
|
-
kwargs.get("model", "jamba-1.5-mini"))
|
484
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
485
|
-
False)
|
486
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
|
487
|
-
kwargs.get("max_segments", -1))
|
488
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
|
489
|
-
kwargs.get("retrieval_strategy", "segments"))
|
490
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
|
491
|
-
kwargs.get("retrieval_similarity_threshold", -1))
|
492
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
|
493
|
-
kwargs.get("max_neighbors", -1))
|
494
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
|
495
|
-
str(kwargs.get("file_ids", "")))
|
496
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
|
497
|
-
kwargs.get("path", ""))
|
498
|
-
|
499
|
-
if trace_content:
|
500
|
-
span.add_event(
|
501
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
502
|
-
attributes={
|
503
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
504
|
-
},
|
498
|
+
try:
|
499
|
+
# Format 'messages' into a single string
|
500
|
+
message_prompt = kwargs.get("messages", "")
|
501
|
+
formatted_messages = []
|
502
|
+
for message in message_prompt:
|
503
|
+
role = message.role
|
504
|
+
content = message.content
|
505
|
+
|
506
|
+
if isinstance(content, list):
|
507
|
+
content_str = ", ".join(
|
508
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
509
|
+
if "type" in item else f'text: {item["text"]}'
|
510
|
+
for item in content
|
505
511
|
)
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
512
|
+
formatted_messages.append(f"{role}: {content_str}")
|
513
|
+
else:
|
514
|
+
formatted_messages.append(f"{role}: {content}")
|
515
|
+
prompt = "\n".join(formatted_messages)
|
516
|
+
|
517
|
+
input_tokens = general_tokens(prompt)
|
518
|
+
|
519
|
+
# Set base span attribues (OTel Semconv)
|
520
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
521
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
522
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
523
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
524
|
+
SemanticConvetion.GEN_AI_SYSTEM_AI21)
|
525
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
526
|
+
request_model)
|
527
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
528
|
+
kwargs.get("seed", ""))
|
529
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
530
|
+
server_port)
|
531
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
532
|
+
kwargs.get("frequency_penalty", 0.0))
|
533
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
534
|
+
kwargs.get("max_tokens", -1))
|
535
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
536
|
+
kwargs.get("presence_penalty", 0.0))
|
537
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
538
|
+
kwargs.get("stop", []))
|
539
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
540
|
+
kwargs.get("temperature", 0.4))
|
541
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
542
|
+
kwargs.get("top_p", 1.0))
|
543
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
544
|
+
response_dict.get("id"))
|
545
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
546
|
+
request_model)
|
547
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
548
|
+
input_tokens)
|
549
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
550
|
+
server_address)
|
551
|
+
|
552
|
+
# Set base span attribues (Extras)
|
553
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
554
|
+
environment)
|
555
|
+
span.set_attribute(SERVICE_NAME,
|
556
|
+
application_name)
|
557
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
558
|
+
False)
|
559
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
560
|
+
end_time - start_time)
|
561
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
562
|
+
version)
|
563
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
|
564
|
+
kwargs.get("max_segments", -1))
|
565
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
|
566
|
+
kwargs.get("retrieval_strategy", "segments"))
|
567
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
|
568
|
+
kwargs.get("retrieval_similarity_threshold", -1))
|
569
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
|
570
|
+
kwargs.get("max_neighbors", -1))
|
571
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
|
572
|
+
str(kwargs.get("file_ids", "")))
|
573
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
|
574
|
+
kwargs.get("path", ""))
|
575
|
+
if trace_content:
|
576
|
+
span.add_event(
|
577
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
578
|
+
attributes={
|
579
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
580
|
+
},
|
581
|
+
)
|
582
|
+
|
583
|
+
output_tokens = 0
|
584
|
+
for i in range(kwargs.get('n',1)):
|
585
|
+
output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
|
523
586
|
|
524
|
-
|
525
|
-
else:
|
526
|
-
i = 0
|
527
|
-
completion_tokens = 0
|
528
|
-
while i < kwargs["n"] and trace_content is True:
|
529
|
-
completion_tokens += general_tokens(response_dict.get('choices')[i].get("message").get("content"))
|
530
|
-
attribute_name = f"gen_ai.content.completion.{i}"
|
531
|
-
span.add_event(
|
532
|
-
name=attribute_name,
|
533
|
-
attributes={
|
534
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
535
|
-
},
|
536
|
-
)
|
537
|
-
i += 1
|
538
|
-
|
539
|
-
# Return original response
|
540
|
-
return response
|
541
|
-
|
542
|
-
# Set span attributes when tools is passed to the function call
|
543
|
-
elif "tools" in kwargs:
|
544
|
-
completion_tokens = -1
|
545
|
-
# Calculate cost of the operation
|
546
|
-
cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
|
547
|
-
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
548
|
-
response_dict.get('usage').get('completion_tokens'))
|
587
|
+
if trace_content:
|
549
588
|
span.add_event(
|
550
589
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
551
590
|
attributes={
|
552
|
-
|
591
|
+
# pylint: disable=line-too-long
|
592
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
|
553
593
|
},
|
554
594
|
)
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
595
|
+
if kwargs.get('tools'):
|
596
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
597
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
598
|
+
|
599
|
+
if isinstance(response_dict.get('choices')[i].get('content'), str):
|
600
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
601
|
+
"text")
|
602
|
+
elif response_dict.get('choices')[i].get('content') is not None:
|
603
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
604
|
+
"json")
|
605
|
+
|
606
|
+
# Calculate cost of the operation
|
607
|
+
cost = get_chat_model_cost(request_model,
|
608
|
+
pricing_info, input_tokens,
|
609
|
+
output_tokens)
|
610
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
611
|
+
cost)
|
612
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
613
|
+
output_tokens)
|
614
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
615
|
+
input_tokens + output_tokens)
|
616
|
+
|
617
|
+
span.set_status(Status(StatusCode.OK))
|
618
|
+
|
619
|
+
if disable_metrics is False:
|
620
|
+
attributes = create_metrics_attributes(
|
621
|
+
service_name=application_name,
|
622
|
+
deployment_environment=environment,
|
623
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
624
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
|
625
|
+
request_model=request_model,
|
626
|
+
server_address=server_address,
|
627
|
+
server_port=server_port,
|
628
|
+
response_model=request_model,
|
629
|
+
)
|
630
|
+
|
631
|
+
metrics["genai_client_usage_tokens"].record(
|
632
|
+
input_tokens + output_tokens, attributes
|
633
|
+
)
|
634
|
+
metrics["genai_client_operation_duration"].record(
|
635
|
+
end_time - start_time, attributes
|
636
|
+
)
|
637
|
+
metrics["genai_server_ttft"].record(
|
638
|
+
end_time - start_time, attributes
|
639
|
+
)
|
640
|
+
metrics["genai_requests"].add(1, attributes)
|
641
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
642
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
643
|
+
metrics["genai_cost"].record(cost, attributes)
|
644
|
+
|
645
|
+
# Return original response
|
646
|
+
return response
|
647
|
+
|
648
|
+
except Exception as e:
|
649
|
+
handle_exception(span, e)
|
650
|
+
logger.error("Error in trace creation: %s", e)
|
651
|
+
|
652
|
+
# Return original response
|
653
|
+
return response
|
602
654
|
|
603
655
|
return wrapper
|