openlit 1.34.4__py3-none-any.whl → 1.34.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/google_ai_studio/__init__.py +2 -4
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -6
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -6
- openlit/instrumentation/google_ai_studio/utils.py +1 -2
- openlit/instrumentation/openai/async_openai.py +1 -1
- openlit/instrumentation/premai/__init__.py +0 -1
- openlit/instrumentation/premai/premai.py +84 -454
- openlit/instrumentation/premai/utils.py +325 -0
- openlit/instrumentation/reka/__init__.py +5 -7
- openlit/instrumentation/reka/async_reka.py +25 -163
- openlit/instrumentation/reka/reka.py +24 -162
- openlit/instrumentation/reka/utils.py +193 -0
- openlit/instrumentation/together/__init__.py +3 -5
- openlit/instrumentation/together/async_together.py +70 -476
- openlit/instrumentation/together/together.py +69 -475
- openlit/instrumentation/together/utils.py +320 -0
- {openlit-1.34.4.dist-info → openlit-1.34.7.dist-info}/METADATA +1 -1
- {openlit-1.34.4.dist-info → openlit-1.34.7.dist-info}/RECORD +20 -17
- {openlit-1.34.4.dist-info → openlit-1.34.7.dist-info}/LICENSE +0 -0
- {openlit-1.34.4.dist-info → openlit-1.34.7.dist-info}/WHEEL +0 -0
@@ -1,58 +1,37 @@
|
|
1
1
|
"""
|
2
|
-
Module for monitoring Together calls.
|
2
|
+
Module for monitoring Together API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
6
|
+
from opentelemetry.trace import SpanKind
|
9
7
|
from openlit.__helpers import (
|
10
|
-
get_chat_model_cost,
|
11
|
-
get_image_model_cost,
|
12
8
|
handle_exception,
|
13
|
-
response_as_dict,
|
14
|
-
calculate_ttft,
|
15
|
-
calculate_tbt,
|
16
|
-
create_metrics_attributes,
|
17
9
|
set_server_address_and_port
|
18
10
|
)
|
11
|
+
from openlit.instrumentation.together.utils import (
|
12
|
+
process_chat_response,
|
13
|
+
process_chunk,
|
14
|
+
process_streaming_chat_response,
|
15
|
+
process_image_response
|
16
|
+
)
|
19
17
|
from openlit.semcov import SemanticConvention
|
20
18
|
|
21
|
-
# Initialize logger for logging potential issues and operations
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
19
|
def async_completion(version, environment, application_name,
|
25
|
-
|
20
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
26
21
|
"""
|
27
|
-
Generates a telemetry wrapper for
|
28
|
-
|
29
|
-
Args:
|
30
|
-
version: Version of the monitoring package.
|
31
|
-
environment: Deployment environment (e.g., production, staging).
|
32
|
-
application_name: Name of the application using the Together AI API.
|
33
|
-
tracer: OpenTelemetry tracer for creating spans.
|
34
|
-
pricing_info: Information used for calculating the cost of Together AI usage.
|
35
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
36
|
-
|
37
|
-
Returns:
|
38
|
-
A function that wraps the chat completions method to add telemetry.
|
22
|
+
Generates a telemetry wrapper for GenAI function call
|
39
23
|
"""
|
40
24
|
|
41
25
|
class TracedAsyncStream:
|
42
26
|
"""
|
43
|
-
Wrapper for streaming responses to collect
|
44
|
-
Wraps the response to collect message IDs and aggregated response.
|
45
|
-
|
46
|
-
This class implements the '__aiter__' and '__anext__' methods that
|
47
|
-
handle asynchronous streaming responses.
|
48
|
-
|
49
|
-
This class also implements '__aenter__' and '__aexit__' methods that
|
50
|
-
handle asynchronous context management protocol.
|
27
|
+
Wrapper for streaming responses to collect telemetry.
|
51
28
|
"""
|
29
|
+
|
52
30
|
def __init__(
|
53
31
|
self,
|
54
32
|
wrapped,
|
55
33
|
span,
|
34
|
+
span_name,
|
56
35
|
kwargs,
|
57
36
|
server_address,
|
58
37
|
server_port,
|
@@ -60,14 +39,14 @@ def async_completion(version, environment, application_name,
|
|
60
39
|
):
|
61
40
|
self.__wrapped__ = wrapped
|
62
41
|
self._span = span
|
63
|
-
|
42
|
+
self._span_name = span_name
|
64
43
|
self._llmresponse = ""
|
65
44
|
self._response_id = ""
|
66
45
|
self._response_model = ""
|
67
|
-
self._finish_reason = ""
|
68
46
|
self._input_tokens = 0
|
69
47
|
self._output_tokens = 0
|
70
|
-
|
48
|
+
self._finish_reason = ""
|
49
|
+
self._tools = None
|
71
50
|
self._args = args
|
72
51
|
self._kwargs = kwargs
|
73
52
|
self._start_time = time.time()
|
@@ -95,401 +74,86 @@ def async_completion(version, environment, application_name,
|
|
95
74
|
async def __anext__(self):
|
96
75
|
try:
|
97
76
|
chunk = await self.__wrapped__.__anext__()
|
98
|
-
|
99
|
-
# Record the timestamp for the current chunk
|
100
|
-
self._timestamps.append(end_time)
|
101
|
-
|
102
|
-
if len(self._timestamps) == 1:
|
103
|
-
# Calculate time to first chunk
|
104
|
-
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
105
|
-
|
106
|
-
chunked = response_as_dict(chunk)
|
107
|
-
# Collect message IDs and aggregated response from events
|
108
|
-
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
109
|
-
'content' in chunked.get('choices')[0].get('delta'))):
|
110
|
-
|
111
|
-
content = chunked.get('choices')[0].get('delta').get('content')
|
112
|
-
if content:
|
113
|
-
self._llmresponse += content
|
114
|
-
|
115
|
-
if chunked.get('usage'):
|
116
|
-
self._response_id = chunked.get('id')
|
117
|
-
self._response_model = chunked.get('model')
|
118
|
-
self._finish_reason = str(chunked.get('choices')[0].get('finish_reason'))
|
119
|
-
self._input_tokens = chunked.get('usage').get('prompt_tokens')
|
120
|
-
self._output_tokens = chunked.get('usage').get('completion_tokens')
|
121
|
-
|
77
|
+
process_chunk(self, chunk)
|
122
78
|
return chunk
|
123
|
-
except
|
124
|
-
# Handling exception ensure observability without disrupting operation
|
79
|
+
except StopIteration:
|
125
80
|
try:
|
126
|
-
self.
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
if isinstance(content, list):
|
138
|
-
content_str_list = []
|
139
|
-
for item in content:
|
140
|
-
if item["type"] == "text":
|
141
|
-
content_str_list.append(f'text: {item["text"]}')
|
142
|
-
elif (item["type"] == "image_url" and
|
143
|
-
not item["image_url"]["url"].startswith("data:")):
|
144
|
-
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
145
|
-
content_str = ", ".join(content_str_list)
|
146
|
-
formatted_messages.append(f"{role}: {content_str}")
|
147
|
-
else:
|
148
|
-
formatted_messages.append(f"{role}: {content}")
|
149
|
-
prompt = "\n".join(formatted_messages)
|
150
|
-
|
151
|
-
request_model = self._kwargs.get("model", "gpt-4o")
|
152
|
-
|
153
|
-
# Calculate cost of the operation
|
154
|
-
cost = get_chat_model_cost(request_model,
|
155
|
-
pricing_info, self._input_tokens,
|
156
|
-
self._output_tokens)
|
157
|
-
|
158
|
-
# Set Span attributes (OTel Semconv)
|
159
|
-
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
160
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
161
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
162
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
163
|
-
SemanticConvention.GEN_AI_SYSTEM_TOGETHER)
|
164
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
165
|
-
request_model)
|
166
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
167
|
-
self._kwargs.get("seed", ""))
|
168
|
-
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
169
|
-
self._server_port)
|
170
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
171
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
172
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
173
|
-
self._kwargs.get("max_tokens", -1))
|
174
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
175
|
-
self._kwargs.get("presence_penalty", 0.0))
|
176
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
177
|
-
self._kwargs.get("stop", []))
|
178
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
179
|
-
self._kwargs.get("temperature", 1.0))
|
180
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
181
|
-
self._kwargs.get("top_p", 1.0))
|
182
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
183
|
-
[self._finish_reason])
|
184
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
185
|
-
self._response_id)
|
186
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
187
|
-
self._response_model)
|
188
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
189
|
-
self._input_tokens)
|
190
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
191
|
-
self._output_tokens)
|
192
|
-
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
193
|
-
self._server_address)
|
194
|
-
|
195
|
-
if isinstance(self._llmresponse, str):
|
196
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
197
|
-
"text")
|
198
|
-
else:
|
199
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
200
|
-
"json")
|
201
|
-
|
202
|
-
# Set Span attributes (Extra)
|
203
|
-
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
204
|
-
environment)
|
205
|
-
self._span.set_attribute(SERVICE_NAME,
|
206
|
-
application_name)
|
207
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
208
|
-
self._kwargs.get("user", ""))
|
209
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
210
|
-
True)
|
211
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
212
|
-
self._input_tokens + self._output_tokens)
|
213
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
214
|
-
cost)
|
215
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
216
|
-
self._tbt)
|
217
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
218
|
-
self._ttft)
|
219
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
220
|
-
version)
|
221
|
-
if capture_message_content:
|
222
|
-
self._span.add_event(
|
223
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
224
|
-
attributes={
|
225
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
226
|
-
},
|
227
|
-
)
|
228
|
-
self._span.add_event(
|
229
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
230
|
-
attributes={
|
231
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
232
|
-
},
|
233
|
-
)
|
234
|
-
self._span.set_status(Status(StatusCode.OK))
|
235
|
-
|
236
|
-
if disable_metrics is False:
|
237
|
-
attributes = create_metrics_attributes(
|
238
|
-
service_name=application_name,
|
239
|
-
deployment_environment=environment,
|
240
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
241
|
-
system=SemanticConvention.GEN_AI_SYSTEM_TOGETHER,
|
242
|
-
request_model=request_model,
|
243
|
-
server_address=self._server_address,
|
244
|
-
server_port=self._server_port,
|
245
|
-
response_model=self._response_model,
|
246
|
-
)
|
247
|
-
|
248
|
-
metrics["genai_client_usage_tokens"].record(
|
249
|
-
self._input_tokens + self._output_tokens, attributes
|
250
|
-
)
|
251
|
-
metrics["genai_client_operation_duration"].record(
|
252
|
-
self._end_time - self._start_time, attributes
|
253
|
-
)
|
254
|
-
metrics["genai_server_tbt"].record(
|
255
|
-
self._tbt, attributes
|
256
|
-
)
|
257
|
-
metrics["genai_server_ttft"].record(
|
258
|
-
self._ttft, attributes
|
81
|
+
with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
|
82
|
+
process_streaming_chat_response(
|
83
|
+
self,
|
84
|
+
pricing_info=pricing_info,
|
85
|
+
environment=environment,
|
86
|
+
application_name=application_name,
|
87
|
+
metrics=metrics,
|
88
|
+
capture_message_content=capture_message_content,
|
89
|
+
disable_metrics=disable_metrics,
|
90
|
+
version=version
|
259
91
|
)
|
260
|
-
metrics["genai_requests"].add(1, attributes)
|
261
|
-
metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
|
262
|
-
metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
|
263
|
-
metrics["genai_cost"].record(cost, attributes)
|
264
92
|
|
265
93
|
except Exception as e:
|
266
94
|
handle_exception(self._span, e)
|
267
|
-
|
268
|
-
finally:
|
269
|
-
self._span.end()
|
95
|
+
|
270
96
|
raise
|
97
|
+
|
271
98
|
async def wrapper(wrapped, instance, args, kwargs):
|
272
99
|
"""
|
273
|
-
Wraps the
|
274
|
-
|
275
|
-
This collects metrics such as execution time, cost, and token usage, and handles errors
|
276
|
-
gracefully, adding details to the trace for observability.
|
277
|
-
|
278
|
-
Args:
|
279
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
280
|
-
instance: The instance of the class where the original method is defined.
|
281
|
-
args: Positional arguments for the 'chat.completions' method.
|
282
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
283
|
-
|
284
|
-
Returns:
|
285
|
-
The response from the original 'chat.completions' method.
|
100
|
+
Wraps the GenAI function call.
|
286
101
|
"""
|
287
102
|
|
288
103
|
# Check if streaming is enabled for the API call
|
289
104
|
streaming = kwargs.get("stream", False)
|
105
|
+
|
290
106
|
server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
|
291
107
|
request_model = kwargs.get("model", "gpt-4o")
|
292
108
|
|
293
109
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
294
110
|
|
295
|
-
# pylint: disable=no-else-return
|
296
111
|
if streaming:
|
297
112
|
# Special handling for streaming response to accommodate the nature of data flow
|
298
113
|
awaited_wrapped = await wrapped(*args, **kwargs)
|
299
114
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
300
|
-
|
301
|
-
return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
115
|
+
return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
|
302
116
|
|
303
117
|
# Handling for non-streaming responses
|
304
118
|
else:
|
305
119
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
306
120
|
start_time = time.time()
|
307
121
|
response = await wrapped(*args, **kwargs)
|
308
|
-
end_time = time.time()
|
309
|
-
|
310
|
-
response_dict = response_as_dict(response)
|
311
122
|
|
312
123
|
try:
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
prompt = "\n".join(formatted_messages)
|
330
|
-
|
331
|
-
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
332
|
-
output_tokens = response_dict.get('usage').get('completion_tokens')
|
333
|
-
|
334
|
-
# Calculate cost of the operation
|
335
|
-
cost = get_chat_model_cost(request_model,
|
336
|
-
pricing_info, input_tokens,
|
337
|
-
output_tokens)
|
338
|
-
|
339
|
-
# Set base span attribues (OTel Semconv)
|
340
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
341
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
342
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
343
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
344
|
-
SemanticConvention.GEN_AI_SYSTEM_TOGETHER)
|
345
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
346
|
-
request_model)
|
347
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
|
348
|
-
kwargs.get("seed", ""))
|
349
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
350
|
-
server_port)
|
351
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
352
|
-
kwargs.get("frequency_penalty", 0.0))
|
353
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
354
|
-
kwargs.get("max_tokens", -1))
|
355
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
356
|
-
kwargs.get("presence_penalty", 0.0))
|
357
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
358
|
-
kwargs.get("stop", []))
|
359
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
360
|
-
kwargs.get("temperature", 1.0))
|
361
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
362
|
-
kwargs.get("top_p", 1.0))
|
363
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
364
|
-
response_dict.get("id"))
|
365
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
366
|
-
response_dict.get('model'))
|
367
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
368
|
-
input_tokens)
|
369
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
370
|
-
output_tokens)
|
371
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
372
|
-
server_address)
|
373
|
-
|
374
|
-
# Set base span attribues (Extras)
|
375
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
376
|
-
environment)
|
377
|
-
span.set_attribute(SERVICE_NAME,
|
378
|
-
application_name)
|
379
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
|
380
|
-
kwargs.get("user", ""))
|
381
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
382
|
-
False)
|
383
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
384
|
-
input_tokens + output_tokens)
|
385
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
386
|
-
cost)
|
387
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
388
|
-
end_time - start_time)
|
389
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
390
|
-
version)
|
391
|
-
if capture_message_content:
|
392
|
-
span.add_event(
|
393
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
394
|
-
attributes={
|
395
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
396
|
-
},
|
397
|
-
)
|
398
|
-
|
399
|
-
for i in range(kwargs.get('n',1)):
|
400
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
401
|
-
[str(response_dict.get('choices')[i].get('finish_reason'))])
|
402
|
-
if capture_message_content:
|
403
|
-
span.add_event(
|
404
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
405
|
-
attributes={
|
406
|
-
# pylint: disable=line-too-long
|
407
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
408
|
-
},
|
409
|
-
)
|
410
|
-
if kwargs.get('tools'):
|
411
|
-
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
412
|
-
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
413
|
-
|
414
|
-
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
415
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
416
|
-
"text")
|
417
|
-
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
418
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
419
|
-
"json")
|
420
|
-
|
421
|
-
span.set_status(Status(StatusCode.OK))
|
422
|
-
|
423
|
-
if disable_metrics is False:
|
424
|
-
attributes = create_metrics_attributes(
|
425
|
-
service_name=application_name,
|
426
|
-
deployment_environment=environment,
|
427
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
428
|
-
system=SemanticConvention.GEN_AI_SYSTEM_TOGETHER,
|
429
|
-
request_model=request_model,
|
430
|
-
server_address=server_address,
|
431
|
-
server_port=server_port,
|
432
|
-
response_model=response_dict.get('model'),
|
433
|
-
)
|
434
|
-
|
435
|
-
metrics["genai_client_usage_tokens"].record(
|
436
|
-
input_tokens + output_tokens, attributes
|
437
|
-
)
|
438
|
-
metrics["genai_client_operation_duration"].record(
|
439
|
-
end_time - start_time, attributes
|
440
|
-
)
|
441
|
-
metrics["genai_server_ttft"].record(
|
442
|
-
end_time - start_time, attributes
|
443
|
-
)
|
444
|
-
metrics["genai_requests"].add(1, attributes)
|
445
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
446
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
447
|
-
metrics["genai_cost"].record(cost, attributes)
|
448
|
-
|
449
|
-
# Return original response
|
450
|
-
return response
|
124
|
+
response = process_chat_response(
|
125
|
+
response=response,
|
126
|
+
request_model=request_model,
|
127
|
+
pricing_info=pricing_info,
|
128
|
+
server_port=server_port,
|
129
|
+
server_address=server_address,
|
130
|
+
environment=environment,
|
131
|
+
application_name=application_name,
|
132
|
+
metrics=metrics,
|
133
|
+
start_time=start_time,
|
134
|
+
span=span,
|
135
|
+
capture_message_content=capture_message_content,
|
136
|
+
disable_metrics=disable_metrics,
|
137
|
+
version=version,
|
138
|
+
**kwargs
|
139
|
+
)
|
451
140
|
|
452
141
|
except Exception as e:
|
453
142
|
handle_exception(span, e)
|
454
|
-
logger.error("Error in trace creation: %s", e)
|
455
143
|
|
456
|
-
|
457
|
-
return response
|
144
|
+
return response
|
458
145
|
|
459
146
|
return wrapper
|
460
147
|
|
461
148
|
def async_image_generate(version, environment, application_name,
|
462
|
-
|
149
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
463
150
|
"""
|
464
|
-
Generates a telemetry wrapper for
|
465
|
-
|
466
|
-
Args:
|
467
|
-
version: Version of the monitoring package.
|
468
|
-
environment: Deployment environment (e.g., production, staging).
|
469
|
-
application_name: Name of the application using the Together AI API.
|
470
|
-
tracer: OpenTelemetry tracer for creating spans.
|
471
|
-
pricing_info: Information used for calculating the cost of Together AI image generation.
|
472
|
-
capture_message_content: Flag indicating whether to trace the input prompt and generated images.
|
473
|
-
|
474
|
-
Returns:
|
475
|
-
A function that wraps the image generation method to add telemetry.
|
151
|
+
Generates a telemetry wrapper for GenAI function call
|
476
152
|
"""
|
477
153
|
|
478
154
|
async def wrapper(wrapped, instance, args, kwargs):
|
479
155
|
"""
|
480
|
-
Wraps the
|
481
|
-
|
482
|
-
This collects metrics such as execution time, cost, and handles errors
|
483
|
-
gracefully, adding details to the trace for observability.
|
484
|
-
|
485
|
-
Args:
|
486
|
-
wrapped: The original 'images.generate' method to be wrapped.
|
487
|
-
instance: The instance of the class where the original method is defined.
|
488
|
-
args: Positional arguments for the 'images.generate' method.
|
489
|
-
kwargs: Keyword arguments for the 'images.generate' method.
|
490
|
-
|
491
|
-
Returns:
|
492
|
-
The response from the original 'images.generate' method.
|
156
|
+
Wraps the GenAI function call.
|
493
157
|
"""
|
494
158
|
|
495
159
|
server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
|
@@ -502,99 +166,29 @@ def async_image_generate(version, environment, application_name,
|
|
502
166
|
response = await wrapped(*args, **kwargs)
|
503
167
|
end_time = time.time()
|
504
168
|
|
505
|
-
images_count = 0
|
506
|
-
|
507
169
|
try:
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE)
|
526
|
-
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
527
|
-
SemanticConvention.GEN_AI_SYSTEM_TOGETHER)
|
528
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
529
|
-
request_model)
|
530
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
531
|
-
server_address)
|
532
|
-
span.set_attribute(SemanticConvention.SERVER_PORT,
|
533
|
-
server_port)
|
534
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
|
535
|
-
response.id)
|
536
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
537
|
-
response.model)
|
538
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
539
|
-
"image")
|
540
|
-
|
541
|
-
# Set Span attributes (Extras)
|
542
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
543
|
-
environment)
|
544
|
-
span.set_attribute(SERVICE_NAME,
|
545
|
-
application_name)
|
546
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE,
|
547
|
-
image_size)
|
548
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
549
|
-
version)
|
550
|
-
|
551
|
-
if capture_message_content:
|
552
|
-
span.add_event(
|
553
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
554
|
-
attributes={
|
555
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
556
|
-
},
|
557
|
-
)
|
558
|
-
attribute_name = f"{SemanticConvention.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
559
|
-
span.add_event(
|
560
|
-
name=attribute_name,
|
561
|
-
attributes={
|
562
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
|
563
|
-
},
|
564
|
-
)
|
170
|
+
response = process_image_response(
|
171
|
+
response=response,
|
172
|
+
request_model=request_model,
|
173
|
+
pricing_info=pricing_info,
|
174
|
+
server_address=server_address,
|
175
|
+
server_port=server_port,
|
176
|
+
environment=environment,
|
177
|
+
application_name=application_name,
|
178
|
+
metrics=metrics,
|
179
|
+
start_time=start_time,
|
180
|
+
end_time=end_time,
|
181
|
+
span=span,
|
182
|
+
capture_message_content=capture_message_content,
|
183
|
+
disable_metrics=disable_metrics,
|
184
|
+
version=version,
|
185
|
+
**kwargs
|
186
|
+
)
|
565
187
|
|
566
|
-
images_count+=1
|
567
|
-
|
568
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
569
|
-
len(response.data) * cost)
|
570
|
-
span.set_status(Status(StatusCode.OK))
|
571
|
-
|
572
|
-
if disable_metrics is False:
|
573
|
-
attributes = create_metrics_attributes(
|
574
|
-
service_name=application_name,
|
575
|
-
deployment_environment=environment,
|
576
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE,
|
577
|
-
system=SemanticConvention.GEN_AI_SYSTEM_TOGETHER,
|
578
|
-
request_model=request_model,
|
579
|
-
server_address=server_address,
|
580
|
-
server_port=server_port,
|
581
|
-
response_model=response.model,
|
582
|
-
)
|
583
|
-
|
584
|
-
metrics["genai_client_operation_duration"].record(
|
585
|
-
end_time - start_time, attributes
|
586
|
-
)
|
587
|
-
metrics["genai_requests"].add(1, attributes)
|
588
|
-
metrics["genai_cost"].record(cost, attributes)
|
589
|
-
|
590
|
-
# Return original response
|
591
188
|
return response
|
592
189
|
|
593
190
|
except Exception as e:
|
594
191
|
handle_exception(span, e)
|
595
|
-
logger.error("Error in trace creation: %s", e)
|
596
|
-
|
597
|
-
# Return original response
|
598
192
|
return response
|
599
193
|
|
600
194
|
return wrapper
|