openlit 1.34.3__py3-none-any.whl → 1.34.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/ai21/__init__.py +22 -23
- openlit/instrumentation/ai21/ai21.py +58 -51
- openlit/instrumentation/ai21/async_ai21.py +58 -51
- openlit/instrumentation/ai21/utils.py +134 -243
- openlit/instrumentation/google_ai_studio/__init__.py +2 -4
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +0 -6
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -6
- openlit/instrumentation/google_ai_studio/utils.py +1 -2
- openlit/instrumentation/together/__init__.py +3 -5
- openlit/instrumentation/together/async_together.py +70 -476
- openlit/instrumentation/together/together.py +69 -475
- openlit/instrumentation/together/utils.py +320 -0
- {openlit-1.34.3.dist-info → openlit-1.34.5.dist-info}/METADATA +1 -1
- {openlit-1.34.3.dist-info → openlit-1.34.5.dist-info}/RECORD +16 -15
- {openlit-1.34.3.dist-info → openlit-1.34.5.dist-info}/LICENSE +0 -0
- {openlit-1.34.3.dist-info → openlit-1.34.5.dist-info}/WHEEL +0 -0
@@ -14,9 +14,7 @@ from openlit.__helpers import (
|
|
14
14
|
general_tokens,
|
15
15
|
extract_and_format_input,
|
16
16
|
get_chat_model_cost,
|
17
|
-
handle_exception,
|
18
17
|
create_metrics_attributes,
|
19
|
-
otel_event,
|
20
18
|
concatenate_all_contents
|
21
19
|
)
|
22
20
|
from openlit.semcov import SemanticConvention
|
@@ -29,36 +27,38 @@ def setup_common_span_attributes(span, request_model, kwargs, tokens,
|
|
29
27
|
"""
|
30
28
|
|
31
29
|
# Base attributes from SDK and operation settings.
|
32
|
-
span.set_attribute(TELEMETRY_SDK_NAME,
|
30
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
33
31
|
span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
34
32
|
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AI21)
|
35
33
|
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
36
34
|
span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
37
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, kwargs.get(
|
38
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get(
|
39
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get(
|
40
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get(
|
41
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get(
|
42
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, kwargs.get(
|
43
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, kwargs.get(
|
35
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, kwargs.get("seed", ""))
|
36
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, kwargs.get("frequency_penalty", 0.0))
|
37
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get("max_tokens", -1))
|
38
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, kwargs.get("presence_penalty", 0.0))
|
39
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, kwargs.get("stop", []))
|
40
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, kwargs.get("temperature", 0.4))
|
41
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, kwargs.get("top_p", 1.0))
|
44
42
|
|
45
43
|
# Add token-related attributes if available.
|
46
|
-
if
|
47
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [tokens[
|
48
|
-
if
|
49
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, tokens[
|
50
|
-
if
|
51
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, tokens[
|
52
|
-
if
|
53
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, tokens[
|
54
|
-
if
|
55
|
-
span.set_attribute(SemanticConvention.
|
44
|
+
if "finish_reason" in tokens:
|
45
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [tokens["finish_reason"]])
|
46
|
+
if "response_id" in tokens:
|
47
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, tokens["response_id"])
|
48
|
+
if "input_tokens" in tokens:
|
49
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, tokens["input_tokens"])
|
50
|
+
if "output_tokens" in tokens:
|
51
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, tokens["output_tokens"])
|
52
|
+
if "total_tokens" in tokens:
|
53
|
+
span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, tokens["total_tokens"])
|
56
54
|
|
57
55
|
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
|
58
56
|
span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
57
|
+
|
59
58
|
# Environment and service identifiers.
|
60
59
|
span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
61
60
|
span.set_attribute(SERVICE_NAME, application_name)
|
61
|
+
|
62
62
|
# Set any extra attributes passed in.
|
63
63
|
for key, value in extra_attrs.items():
|
64
64
|
span.set_attribute(key, value)
|
@@ -80,106 +80,15 @@ def record_common_metrics(metrics, application_name, environment, request_model,
|
|
80
80
|
server_port=server_port,
|
81
81
|
response_model=request_model,
|
82
82
|
)
|
83
|
-
metrics[
|
84
|
-
metrics[
|
83
|
+
metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
|
84
|
+
metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
|
85
85
|
if include_tbt and tbt_value is not None:
|
86
|
-
metrics[
|
87
|
-
metrics[
|
88
|
-
metrics[
|
89
|
-
metrics[
|
90
|
-
metrics[
|
91
|
-
metrics[
|
92
|
-
|
93
|
-
def emit_common_events(event_provider, choices, finish_reason, llmresponse, formatted_messages,
|
94
|
-
capture_message_content, n):
|
95
|
-
"""
|
96
|
-
Emit events common to both chat and chat rag operations.
|
97
|
-
"""
|
98
|
-
|
99
|
-
if n > 1:
|
100
|
-
for choice in choices:
|
101
|
-
choice_event_body = {
|
102
|
-
'finish_reason': finish_reason,
|
103
|
-
'index': choice.get('index', 0),
|
104
|
-
'message': {
|
105
|
-
**({'content': choice.get('message', {}).get('content', '')} if capture_message_content else {}),
|
106
|
-
'role': choice.get('message', {}).get('role', 'assistant')
|
107
|
-
}
|
108
|
-
}
|
109
|
-
# If tool calls exist, emit an event for each tool call.
|
110
|
-
tool_calls = choice.get('message', {}).get('tool_calls')
|
111
|
-
if tool_calls:
|
112
|
-
for tool_call in tool_calls:
|
113
|
-
choice_event_body['message'].update({
|
114
|
-
'tool_calls': {
|
115
|
-
'function': {
|
116
|
-
'name': tool_call.get('function', {}).get('name', ''),
|
117
|
-
'arguments': tool_call.get('function', {}).get('arguments', '')
|
118
|
-
},
|
119
|
-
'id': tool_call.get('id', ''),
|
120
|
-
'type': tool_call.get('type', 'function')
|
121
|
-
}
|
122
|
-
})
|
123
|
-
event = otel_event(
|
124
|
-
name=SemanticConvention.GEN_AI_CHOICE,
|
125
|
-
attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
|
126
|
-
body=choice_event_body
|
127
|
-
)
|
128
|
-
event_provider.emit(event)
|
129
|
-
else:
|
130
|
-
event = otel_event(
|
131
|
-
name=SemanticConvention.GEN_AI_CHOICE,
|
132
|
-
attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
|
133
|
-
body=choice_event_body
|
134
|
-
)
|
135
|
-
event_provider.emit(event)
|
136
|
-
else:
|
137
|
-
# Single choice case.
|
138
|
-
choice_event_body = {
|
139
|
-
'finish_reason': finish_reason,
|
140
|
-
'index': 0,
|
141
|
-
'message': {
|
142
|
-
**({'content': llmresponse} if capture_message_content else {}),
|
143
|
-
'role': 'assistant'
|
144
|
-
}
|
145
|
-
}
|
146
|
-
event = otel_event(
|
147
|
-
name=SemanticConvention.GEN_AI_CHOICE,
|
148
|
-
attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
|
149
|
-
body=choice_event_body
|
150
|
-
)
|
151
|
-
event_provider.emit(event)
|
152
|
-
|
153
|
-
# Emit additional role-based events (if formatted messages are available).
|
154
|
-
for role in ['user', 'system', 'assistant', 'tool']:
|
155
|
-
msg = formatted_messages.get(role, {})
|
156
|
-
if msg.get('content', ''):
|
157
|
-
event_body = {
|
158
|
-
**({'content': msg.get('content', '')} if capture_message_content else {}),
|
159
|
-
'role': msg.get('role', [])
|
160
|
-
}
|
161
|
-
# For assistant messages, attach tool call details if they exist.
|
162
|
-
if role == 'assistant' and choices:
|
163
|
-
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
164
|
-
if tool_calls:
|
165
|
-
event_body['tool_calls'] = {
|
166
|
-
'function': {
|
167
|
-
'name': tool_calls[0].get('function', {}).get('name', ''),
|
168
|
-
'arguments': tool_calls[0].get('function', {}).get('arguments', '')
|
169
|
-
},
|
170
|
-
'id': tool_calls[0].get('id', ''),
|
171
|
-
'type': 'function'
|
172
|
-
}
|
173
|
-
if role == 'tool' and choices:
|
174
|
-
tool_calls = choices[0].get('message', {}).get('tool_calls', [])
|
175
|
-
if tool_calls:
|
176
|
-
event_body['id'] = tool_calls[0].get('id', '')
|
177
|
-
event = otel_event(
|
178
|
-
name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
|
179
|
-
attributes={SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AI21},
|
180
|
-
body=event_body
|
181
|
-
)
|
182
|
-
event_provider.emit(event)
|
86
|
+
metrics["genai_server_tbt"].record(tbt_value, attributes)
|
87
|
+
metrics["genai_server_ttft"].record(end_time - start_time, attributes)
|
88
|
+
metrics["genai_requests"].add(1, attributes)
|
89
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
90
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
91
|
+
metrics["genai_cost"].record(cost, attributes)
|
183
92
|
|
184
93
|
def process_chunk(self, chunk):
|
185
94
|
"""
|
@@ -194,21 +103,20 @@ def process_chunk(self, chunk):
|
|
194
103
|
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
195
104
|
|
196
105
|
chunked = response_as_dict(chunk)
|
197
|
-
if (len(chunked.get(
|
198
|
-
|
199
|
-
|
200
|
-
content
|
201
|
-
if content:
|
106
|
+
if (len(chunked.get("choices")) > 0 and
|
107
|
+
"delta" in chunked.get("choices")[0] and
|
108
|
+
"content" in chunked.get("choices")[0].get("delta")):
|
109
|
+
if content := chunked.get("choices")[0].get("delta").get("content"):
|
202
110
|
self._llmresponse += content
|
203
|
-
if chunked.get(
|
204
|
-
self._input_tokens = chunked.get(
|
205
|
-
self._output_tokens = chunked.get(
|
206
|
-
self._response_id = chunked.get(
|
207
|
-
self._choices += chunked.get(
|
208
|
-
self._finish_reason = chunked.get(
|
111
|
+
if chunked.get("usage"):
|
112
|
+
self._input_tokens = chunked.get("usage").get("prompt_tokens")
|
113
|
+
self._output_tokens = chunked.get("usage").get("completion_tokens")
|
114
|
+
self._response_id = chunked.get("id")
|
115
|
+
self._choices += chunked.get("choices")
|
116
|
+
self._finish_reason = chunked.get("choices")[0].get("finish_reason")
|
209
117
|
|
210
118
|
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
211
|
-
|
119
|
+
capture_message_content, disable_metrics, version, is_stream):
|
212
120
|
"""
|
213
121
|
Process chat request and generate Telemetry.
|
214
122
|
"""
|
@@ -218,19 +126,19 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
218
126
|
scope._tbt = calculate_tbt(scope._timestamps)
|
219
127
|
|
220
128
|
# Extract and format input messages.
|
221
|
-
formatted_messages = extract_and_format_input(scope._kwargs.get(
|
129
|
+
formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
|
222
130
|
prompt = concatenate_all_contents(formatted_messages)
|
223
|
-
request_model = scope._kwargs.get(
|
131
|
+
request_model = scope._kwargs.get("model", "jamba-1.5-mini")
|
224
132
|
|
225
133
|
# Calculate cost based on token usage.
|
226
134
|
cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
|
227
135
|
# Prepare tokens dictionary.
|
228
136
|
tokens = {
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
137
|
+
"finish_reason": scope._finish_reason,
|
138
|
+
"response_id": scope._response_id,
|
139
|
+
"input_tokens": scope._input_tokens,
|
140
|
+
"output_tokens": scope._output_tokens,
|
141
|
+
"total_tokens": scope._input_tokens + scope._output_tokens,
|
234
142
|
}
|
235
143
|
extra_attrs = {
|
236
144
|
SemanticConvention.GEN_AI_REQUEST_IS_STREAM: is_stream,
|
@@ -239,14 +147,13 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
239
147
|
SemanticConvention.GEN_AI_SERVER_TBT: scope._tbt,
|
240
148
|
SemanticConvention.GEN_AI_SERVER_TTFT: scope._ttft,
|
241
149
|
SemanticConvention.GEN_AI_SDK_VERSION: version,
|
242
|
-
SemanticConvention.GEN_AI_OUTPUT_TYPE:
|
150
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE: "text" if isinstance(scope._llmresponse, str) else "json"
|
243
151
|
}
|
244
152
|
# Set span attributes.
|
245
153
|
setup_common_span_attributes(scope._span, request_model, scope._kwargs, tokens,
|
246
154
|
scope._server_port, scope._server_address, environment,
|
247
155
|
application_name, extra_attrs)
|
248
156
|
|
249
|
-
# Optionally add events capturing the prompt and completion.
|
250
157
|
if capture_message_content:
|
251
158
|
scope._span.add_event(
|
252
159
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -257,11 +164,6 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
257
164
|
attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse},
|
258
165
|
)
|
259
166
|
|
260
|
-
# Emit events for each choice and message role.
|
261
|
-
n = scope._kwargs.get('n', 1)
|
262
|
-
emit_common_events(event_provider, scope._choices, scope._finish_reason, scope._llmresponse,
|
263
|
-
formatted_messages, capture_message_content, n)
|
264
|
-
|
265
167
|
scope._span.set_status(Status(StatusCode.OK))
|
266
168
|
|
267
169
|
if not disable_metrics:
|
@@ -272,23 +174,23 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
272
174
|
include_tbt=True, tbt_value=scope._tbt)
|
273
175
|
|
274
176
|
def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
|
275
|
-
|
177
|
+
capture_message_content=False, disable_metrics=False, version=""):
|
276
178
|
"""
|
277
179
|
Process a streaming chat response and generate Telemetry.
|
278
180
|
"""
|
279
181
|
|
280
182
|
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
281
|
-
|
183
|
+
capture_message_content, disable_metrics, version, is_stream=True)
|
282
184
|
|
283
185
|
def process_chat_response(response, request_model, pricing_info, server_port, server_address,
|
284
|
-
environment, application_name, metrics,
|
285
|
-
span, capture_message_content=False, disable_metrics=False, version=
|
186
|
+
environment, application_name, metrics, start_time,
|
187
|
+
span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
|
286
188
|
"""
|
287
189
|
Process a synchronous chat response and generate Telemetry.
|
288
190
|
"""
|
289
191
|
|
290
192
|
# Create a generic scope object to hold telemetry data.
|
291
|
-
self = type(
|
193
|
+
self = type("GenericScope", (), {})()
|
292
194
|
response_dict = response_as_dict(response)
|
293
195
|
|
294
196
|
# pylint: disable = no-member
|
@@ -297,113 +199,102 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
297
199
|
|
298
200
|
self._span = span
|
299
201
|
# Concatenate content from all choices.
|
300
|
-
self._llmresponse =
|
301
|
-
(choice.get(
|
302
|
-
for choice in response_dict.get(
|
202
|
+
self._llmresponse = "".join(
|
203
|
+
(choice.get("message", {}).get("content") or "")
|
204
|
+
for choice in response_dict.get("choices", [])
|
303
205
|
)
|
304
|
-
self._response_role = response_dict.get(
|
305
|
-
self._input_tokens = response_dict.get(
|
306
|
-
self._output_tokens = response_dict.get(
|
307
|
-
self._response_id = response_dict.get(
|
206
|
+
self._response_role = response_dict.get("message", {}).get("role", "assistant")
|
207
|
+
self._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
208
|
+
self._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
|
209
|
+
self._response_id = response_dict.get("id", "")
|
308
210
|
self._response_model = request_model
|
309
|
-
self._finish_reason = response_dict.get(
|
211
|
+
self._finish_reason = response_dict.get("choices", [{}])[0].get("finish_reason")
|
310
212
|
self._timestamps = []
|
311
213
|
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
312
214
|
self._server_address, self._server_port = server_address, server_port
|
313
215
|
self._kwargs = kwargs
|
314
|
-
self._choices = response_dict.get(
|
216
|
+
self._choices = response_dict.get("choices")
|
315
217
|
|
316
218
|
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
317
|
-
|
219
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
318
220
|
|
319
221
|
return response
|
320
222
|
|
321
223
|
def process_chat_rag_response(response, request_model, pricing_info, server_port, server_address,
|
322
|
-
environment, application_name, metrics,
|
323
|
-
span, capture_message_content=False, disable_metrics=False, version=
|
224
|
+
environment, application_name, metrics, start_time,
|
225
|
+
span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
|
324
226
|
"""
|
325
227
|
Process a chat response and generate Telemetry.
|
326
228
|
"""
|
327
229
|
end_time = time.time()
|
328
230
|
response_dict = response_as_dict(response)
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
#
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
span.
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
llmresponse, formatted_messages, capture_message_content, n)
|
400
|
-
|
401
|
-
if not disable_metrics:
|
402
|
-
record_common_metrics(metrics, application_name, environment, request_model,
|
403
|
-
server_address, server_port, start_time, end_time,
|
404
|
-
input_tokens, output_tokens, cost, include_tbt=False)
|
405
|
-
return response
|
406
|
-
|
407
|
-
except Exception as e:
|
408
|
-
handle_exception(span, e)
|
409
|
-
return response
|
231
|
+
# Format input messages into a single prompt string.
|
232
|
+
messages_input = kwargs.get("messages", "")
|
233
|
+
formatted_messages = extract_and_format_input(messages_input)
|
234
|
+
prompt = concatenate_all_contents(formatted_messages)
|
235
|
+
input_tokens = general_tokens(prompt)
|
236
|
+
|
237
|
+
# Create tokens dict and RAG-specific extra attributes.
|
238
|
+
tokens = {"response_id": response_dict.get("id"), "input_tokens": input_tokens}
|
239
|
+
extra_attrs = {
|
240
|
+
SemanticConvention.GEN_AI_REQUEST_IS_STREAM: False,
|
241
|
+
SemanticConvention.GEN_AI_SERVER_TTFT: end_time - start_time,
|
242
|
+
SemanticConvention.GEN_AI_SDK_VERSION: version,
|
243
|
+
SemanticConvention.GEN_AI_RAG_MAX_SEGMENTS: kwargs.get("max_segments", -1),
|
244
|
+
SemanticConvention.GEN_AI_RAG_STRATEGY: kwargs.get("retrieval_strategy", "segments"),
|
245
|
+
SemanticConvention.GEN_AI_RAG_SIMILARITY_THRESHOLD: kwargs.get("retrieval_similarity_threshold", -1),
|
246
|
+
SemanticConvention.GEN_AI_RAG_MAX_NEIGHBORS: kwargs.get("max_neighbors", -1),
|
247
|
+
SemanticConvention.GEN_AI_RAG_FILE_IDS: str(kwargs.get("file_ids", "")),
|
248
|
+
SemanticConvention.GEN_AI_RAG_DOCUMENTS_PATH: kwargs.get("path", "")
|
249
|
+
}
|
250
|
+
# Set common span attributes.
|
251
|
+
setup_common_span_attributes(span, request_model, kwargs, tokens,
|
252
|
+
server_port, server_address, environment, application_name,
|
253
|
+
extra_attrs)
|
254
|
+
|
255
|
+
if capture_message_content:
|
256
|
+
span.add_event(
|
257
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
258
|
+
attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
|
259
|
+
)
|
260
|
+
|
261
|
+
output_tokens = 0
|
262
|
+
choices = response_dict.get("choices", [])
|
263
|
+
aggregated_completion = []
|
264
|
+
for i in range(kwargs.get("n", 1)):
|
265
|
+
# Get the response content from each choice and count tokens.
|
266
|
+
content = choices[i].get("content", "")
|
267
|
+
aggregated_completion.append(content)
|
268
|
+
output_tokens += general_tokens(content)
|
269
|
+
if kwargs.get("tools"):
|
270
|
+
span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
|
271
|
+
str(choices[i].get("message", {}).get("tool_calls")))
|
272
|
+
# Set output type based on actual content type.
|
273
|
+
if isinstance(content, str):
|
274
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
275
|
+
elif content is not None:
|
276
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "json")
|
277
|
+
|
278
|
+
# Concatenate completion responses.
|
279
|
+
llmresponse = "".join(aggregated_completion)
|
280
|
+
tokens["output_tokens"] = output_tokens
|
281
|
+
tokens["total_tokens"] = input_tokens + output_tokens
|
282
|
+
|
283
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
284
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
285
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
286
|
+
span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
287
|
+
|
288
|
+
span.set_status(Status(StatusCode.OK))
|
289
|
+
|
290
|
+
if capture_message_content:
|
291
|
+
span.add_event(
|
292
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
293
|
+
attributes={SemanticConvention.GEN_AI_CONTENT_COMPLETION: llmresponse},
|
294
|
+
)
|
295
|
+
|
296
|
+
if not disable_metrics:
|
297
|
+
record_common_metrics(metrics, application_name, environment, request_model,
|
298
|
+
server_address, server_port, start_time, end_time,
|
299
|
+
input_tokens, output_tokens, cost, include_tbt=False)
|
300
|
+
return response
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of Google AI Studio Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -25,8 +24,8 @@ class GoogleAIStudioInstrumentor(BaseInstrumentor):
|
|
25
24
|
return _instruments
|
26
25
|
|
27
26
|
def _instrument(self, **kwargs):
|
28
|
-
application_name = kwargs.get("application_name", "
|
29
|
-
environment = kwargs.get("environment", "
|
27
|
+
application_name = kwargs.get("application_name", "default")
|
28
|
+
environment = kwargs.get("environment", "default")
|
30
29
|
tracer = kwargs.get("tracer")
|
31
30
|
metrics = kwargs.get("metrics_dict")
|
32
31
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -67,5 +66,4 @@ class GoogleAIStudioInstrumentor(BaseInstrumentor):
|
|
67
66
|
)
|
68
67
|
|
69
68
|
def _uninstrument(self, **kwargs):
|
70
|
-
# Proper uninstrumentation logic to revert patched methods
|
71
69
|
pass
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module for monitoring Google AI Studio API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -16,9 +15,6 @@ from openlit.instrumentation.google_ai_studio.utils import (
|
|
16
15
|
)
|
17
16
|
from openlit.semcov import SemanticConvention
|
18
17
|
|
19
|
-
# Initialize logger for logging potential issues and operations
|
20
|
-
logger = logging.getLogger(__name__)
|
21
|
-
|
22
18
|
def async_generate(version, environment, application_name,
|
23
19
|
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
20
|
"""
|
@@ -61,7 +57,6 @@ def async_generate(version, environment, application_name,
|
|
61
57
|
|
62
58
|
except Exception as e:
|
63
59
|
handle_exception(span, e)
|
64
|
-
logger.error("Error in trace creation: %s", e)
|
65
60
|
|
66
61
|
# Return original response
|
67
62
|
return response
|
@@ -144,7 +139,6 @@ def async_generate_stream(version, environment, application_name,
|
|
144
139
|
|
145
140
|
except Exception as e:
|
146
141
|
handle_exception(self._span, e)
|
147
|
-
logger.error("Error in trace creation: %s", e)
|
148
142
|
raise
|
149
143
|
|
150
144
|
async def wrapper(wrapped, instance, args, kwargs):
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module for monitoring Google AI Studio API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -16,9 +15,6 @@ from openlit.instrumentation.google_ai_studio.utils import (
|
|
16
15
|
)
|
17
16
|
from openlit.semcov import SemanticConvention
|
18
17
|
|
19
|
-
# Initialize logger for logging potential issues and operations
|
20
|
-
logger = logging.getLogger(__name__)
|
21
|
-
|
22
18
|
def generate(version, environment, application_name,
|
23
19
|
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
20
|
"""
|
@@ -61,7 +57,6 @@ def generate(version, environment, application_name,
|
|
61
57
|
|
62
58
|
except Exception as e:
|
63
59
|
handle_exception(span, e)
|
64
|
-
logger.error("Error in trace creation: %s", e)
|
65
60
|
|
66
61
|
# Return original response
|
67
62
|
return response
|
@@ -144,7 +139,6 @@ def generate_stream(version, environment, application_name,
|
|
144
139
|
|
145
140
|
except Exception as e:
|
146
141
|
handle_exception(self._span, e)
|
147
|
-
logger.error("Error in trace creation: %s", e)
|
148
142
|
raise
|
149
143
|
|
150
144
|
def wrapper(wrapped, instance, args, kwargs):
|
@@ -15,8 +15,7 @@ from openlit.semcov import SemanticConvention
|
|
15
15
|
|
16
16
|
def format_content(messages):
|
17
17
|
"""
|
18
|
-
Process a list of messages to extract content
|
19
|
-
and concatenate all 'content' fields into a single string with role: content format.
|
18
|
+
Process a list of messages to extract content.
|
20
19
|
"""
|
21
20
|
|
22
21
|
formatted_messages = []
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of Together AI Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -17,15 +16,15 @@ _instruments = ("together >= 1.3.5",)
|
|
17
16
|
|
18
17
|
class TogetherInstrumentor(BaseInstrumentor):
|
19
18
|
"""
|
20
|
-
An instrumentor for Together
|
19
|
+
An instrumentor for Together client library.
|
21
20
|
"""
|
22
21
|
|
23
22
|
def instrumentation_dependencies(self) -> Collection[str]:
|
24
23
|
return _instruments
|
25
24
|
|
26
25
|
def _instrument(self, **kwargs):
|
27
|
-
application_name = kwargs.get("application_name", "
|
28
|
-
environment = kwargs.get("environment", "
|
26
|
+
application_name = kwargs.get("application_name", "default")
|
27
|
+
environment = kwargs.get("environment", "default")
|
29
28
|
tracer = kwargs.get("tracer")
|
30
29
|
metrics = kwargs.get("metrics_dict")
|
31
30
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -66,5 +65,4 @@ class TogetherInstrumentor(BaseInstrumentor):
|
|
66
65
|
)
|
67
66
|
|
68
67
|
def _uninstrument(self, **kwargs):
|
69
|
-
# Proper uninstrumentation logic to revert patched methods
|
70
68
|
pass
|