openlit 1.33.19__py3-none-any.whl → 1.33.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +7 -7
- openlit/__init__.py +3 -3
- openlit/evals/utils.py +7 -7
- openlit/guard/utils.py +7 -7
- openlit/instrumentation/ag2/ag2.py +24 -24
- openlit/instrumentation/ai21/ai21.py +3 -3
- openlit/instrumentation/ai21/async_ai21.py +3 -3
- openlit/instrumentation/ai21/utils.py +59 -59
- openlit/instrumentation/anthropic/anthropic.py +2 -2
- openlit/instrumentation/anthropic/async_anthropic.py +2 -2
- openlit/instrumentation/anthropic/utils.py +34 -34
- openlit/instrumentation/assemblyai/assemblyai.py +24 -24
- openlit/instrumentation/astra/astra.py +3 -3
- openlit/instrumentation/astra/async_astra.py +3 -3
- openlit/instrumentation/astra/utils.py +39 -39
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/utils.py +36 -36
- openlit/instrumentation/bedrock/bedrock.py +2 -2
- openlit/instrumentation/bedrock/utils.py +35 -35
- openlit/instrumentation/chroma/chroma.py +57 -57
- openlit/instrumentation/cohere/async_cohere.py +88 -88
- openlit/instrumentation/cohere/cohere.py +88 -88
- openlit/instrumentation/controlflow/controlflow.py +15 -15
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +14 -14
- openlit/instrumentation/crawl4ai/crawl4ai.py +14 -14
- openlit/instrumentation/crewai/crewai.py +22 -22
- openlit/instrumentation/dynamiq/dynamiq.py +19 -19
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +24 -25
- openlit/instrumentation/elevenlabs/elevenlabs.py +23 -25
- openlit/instrumentation/embedchain/embedchain.py +15 -15
- openlit/instrumentation/firecrawl/firecrawl.py +10 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +33 -33
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +33 -33
- openlit/instrumentation/gpt4all/gpt4all.py +78 -78
- openlit/instrumentation/gpu/__init__.py +8 -8
- openlit/instrumentation/groq/async_groq.py +74 -74
- openlit/instrumentation/groq/groq.py +74 -74
- openlit/instrumentation/haystack/haystack.py +6 -6
- openlit/instrumentation/julep/async_julep.py +14 -14
- openlit/instrumentation/julep/julep.py +14 -14
- openlit/instrumentation/langchain/async_langchain.py +39 -39
- openlit/instrumentation/langchain/langchain.py +39 -39
- openlit/instrumentation/letta/letta.py +26 -26
- openlit/instrumentation/litellm/async_litellm.py +94 -94
- openlit/instrumentation/litellm/litellm.py +94 -94
- openlit/instrumentation/llamaindex/llamaindex.py +7 -7
- openlit/instrumentation/mem0/mem0.py +13 -13
- openlit/instrumentation/milvus/milvus.py +47 -47
- openlit/instrumentation/mistral/async_mistral.py +88 -88
- openlit/instrumentation/mistral/mistral.py +88 -88
- openlit/instrumentation/multion/async_multion.py +21 -21
- openlit/instrumentation/multion/multion.py +21 -21
- openlit/instrumentation/ollama/async_ollama.py +3 -3
- openlit/instrumentation/ollama/ollama.py +3 -3
- openlit/instrumentation/ollama/utils.py +50 -50
- openlit/instrumentation/openai/async_openai.py +225 -225
- openlit/instrumentation/openai/openai.py +225 -225
- openlit/instrumentation/openai_agents/openai_agents.py +11 -11
- openlit/instrumentation/phidata/phidata.py +15 -15
- openlit/instrumentation/pinecone/pinecone.py +43 -43
- openlit/instrumentation/premai/premai.py +86 -86
- openlit/instrumentation/qdrant/async_qdrant.py +95 -95
- openlit/instrumentation/qdrant/qdrant.py +99 -99
- openlit/instrumentation/reka/async_reka.py +33 -33
- openlit/instrumentation/reka/reka.py +33 -33
- openlit/instrumentation/together/async_together.py +90 -90
- openlit/instrumentation/together/together.py +90 -90
- openlit/instrumentation/transformers/transformers.py +26 -26
- openlit/instrumentation/vertexai/async_vertexai.py +64 -64
- openlit/instrumentation/vertexai/vertexai.py +64 -64
- openlit/instrumentation/vllm/vllm.py +24 -24
- openlit/otel/metrics.py +11 -11
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.19.dist-info → openlit-1.33.20.dist-info}/METADATA +8 -8
- openlit-1.33.20.dist-info/RECORD +131 -0
- {openlit-1.33.19.dist-info → openlit-1.33.20.dist-info}/WHEEL +1 -1
- openlit-1.33.19.dist-info/RECORD +0 -131
- {openlit-1.33.19.dist-info → openlit-1.33.20.dist-info}/LICENSE +0 -0
@@ -13,7 +13,7 @@ from openlit.__helpers import (
|
|
13
13
|
calculate_tbt,
|
14
14
|
create_metrics_attributes,
|
15
15
|
)
|
16
|
-
from openlit.semcov import
|
16
|
+
from openlit.semcov import SemanticConvention
|
17
17
|
|
18
18
|
# Initialize logger for logging potential issues and operations
|
19
19
|
logger = logging.getLogger(__name__)
|
@@ -139,26 +139,26 @@ def async_send_message(version, environment, application_name, tracer,
|
|
139
139
|
|
140
140
|
# Set Span attributes (OTel Semconv)
|
141
141
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
142
|
-
self._span.set_attribute(
|
143
|
-
|
144
|
-
self._span.set_attribute(
|
145
|
-
|
146
|
-
self._span.set_attribute(
|
142
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
143
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
144
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
145
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
|
146
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
147
147
|
self._request_model)
|
148
|
-
self._span.set_attribute(
|
148
|
+
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
149
149
|
self._server_port)
|
150
150
|
|
151
151
|
inference_config = self._kwargs.get('generation_config', {})
|
152
152
|
|
153
153
|
# List of attributes and their config keys
|
154
154
|
attributes = [
|
155
|
-
(
|
156
|
-
(
|
157
|
-
(
|
158
|
-
(
|
159
|
-
(
|
160
|
-
(
|
161
|
-
(
|
155
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
156
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
|
157
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
158
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
159
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
160
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
161
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
162
162
|
]
|
163
163
|
|
164
164
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -168,19 +168,19 @@ def async_send_message(version, environment, application_name, tracer,
|
|
168
168
|
if value is not None:
|
169
169
|
self._span.set_attribute(attribute, value)
|
170
170
|
|
171
|
-
self._span.set_attribute(
|
171
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
172
172
|
self._request_model)
|
173
|
-
self._span.set_attribute(
|
173
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
174
174
|
self._input_tokens)
|
175
|
-
self._span.set_attribute(
|
175
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
176
176
|
self._output_tokens)
|
177
|
-
self._span.set_attribute(
|
177
|
+
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
178
178
|
self._server_address)
|
179
179
|
if isinstance(self._llmresponse, str):
|
180
|
-
self._span.set_attribute(
|
180
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
181
181
|
"text")
|
182
182
|
else:
|
183
|
-
self._span.set_attribute(
|
183
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
184
184
|
"json")
|
185
185
|
|
186
186
|
# Set Span attributes (Extra)
|
@@ -188,29 +188,29 @@ def async_send_message(version, environment, application_name, tracer,
|
|
188
188
|
environment)
|
189
189
|
self._span.set_attribute(SERVICE_NAME,
|
190
190
|
application_name)
|
191
|
-
self._span.set_attribute(
|
191
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
192
192
|
True)
|
193
|
-
self._span.set_attribute(
|
193
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
194
194
|
self._input_tokens + self._output_tokens)
|
195
|
-
self._span.set_attribute(
|
195
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
196
196
|
cost)
|
197
|
-
self._span.set_attribute(
|
197
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
198
198
|
self._tbt)
|
199
|
-
self._span.set_attribute(
|
199
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
200
200
|
self._ttft)
|
201
|
-
self._span.set_attribute(
|
201
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
202
202
|
version)
|
203
203
|
if capture_message_content:
|
204
204
|
self._span.add_event(
|
205
|
-
name=
|
205
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
206
206
|
attributes={
|
207
|
-
|
207
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
208
208
|
},
|
209
209
|
)
|
210
210
|
self._span.add_event(
|
211
|
-
name=
|
211
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
212
212
|
attributes={
|
213
|
-
|
213
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
214
214
|
},
|
215
215
|
)
|
216
216
|
self._span.set_status(Status(StatusCode.OK))
|
@@ -219,8 +219,8 @@ def async_send_message(version, environment, application_name, tracer,
|
|
219
219
|
attributes = create_metrics_attributes(
|
220
220
|
service_name=application_name,
|
221
221
|
deployment_environment=environment,
|
222
|
-
operation=
|
223
|
-
system=
|
222
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
223
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
224
224
|
request_model=self._request_model,
|
225
225
|
server_address=self._server_address,
|
226
226
|
server_port=self._server_port,
|
@@ -280,7 +280,7 @@ def async_send_message(version, environment, application_name, tracer,
|
|
280
280
|
|
281
281
|
server_address, server_port = location + '-aiplatform.googleapis.com', 443
|
282
282
|
|
283
|
-
span_name = f"{
|
283
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
284
284
|
|
285
285
|
# pylint: disable=no-else-return
|
286
286
|
if streaming:
|
@@ -342,26 +342,26 @@ def async_send_message(version, environment, application_name, tracer,
|
|
342
342
|
|
343
343
|
# Set base span attribues (OTel Semconv)
|
344
344
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
345
|
-
span.set_attribute(
|
346
|
-
|
347
|
-
span.set_attribute(
|
348
|
-
|
349
|
-
span.set_attribute(
|
345
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
346
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
347
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
348
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
|
349
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
350
350
|
request_model)
|
351
|
-
span.set_attribute(
|
351
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
352
352
|
server_port)
|
353
353
|
|
354
354
|
inference_config = kwargs.get('generation_config', {})
|
355
355
|
|
356
356
|
# List of attributes and their config keys
|
357
357
|
attributes = [
|
358
|
-
(
|
359
|
-
(
|
360
|
-
(
|
361
|
-
(
|
362
|
-
(
|
363
|
-
(
|
364
|
-
(
|
358
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
359
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
|
360
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
361
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
362
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
363
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
364
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
365
365
|
]
|
366
366
|
|
367
367
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -371,15 +371,15 @@ def async_send_message(version, environment, application_name, tracer,
|
|
371
371
|
if value is not None:
|
372
372
|
span.set_attribute(attribute, value)
|
373
373
|
|
374
|
-
span.set_attribute(
|
374
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
375
375
|
request_model)
|
376
|
-
span.set_attribute(
|
376
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
377
377
|
input_tokens)
|
378
|
-
span.set_attribute(
|
378
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
379
379
|
output_tokens)
|
380
|
-
span.set_attribute(
|
380
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
381
381
|
server_address)
|
382
|
-
# span.set_attribute(
|
382
|
+
# span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
383
383
|
# [str(response.candidates[0].finish_reason)])
|
384
384
|
|
385
385
|
# Set base span attribues (Extras)
|
@@ -387,35 +387,35 @@ def async_send_message(version, environment, application_name, tracer,
|
|
387
387
|
environment)
|
388
388
|
span.set_attribute(SERVICE_NAME,
|
389
389
|
application_name)
|
390
|
-
span.set_attribute(
|
390
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
391
391
|
False)
|
392
|
-
span.set_attribute(
|
392
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
393
393
|
input_tokens + output_tokens)
|
394
|
-
span.set_attribute(
|
394
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
395
395
|
cost)
|
396
|
-
span.set_attribute(
|
396
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
397
397
|
end_time - start_time)
|
398
|
-
span.set_attribute(
|
398
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
399
399
|
version)
|
400
400
|
if capture_message_content:
|
401
401
|
span.add_event(
|
402
|
-
name=
|
402
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
403
403
|
attributes={
|
404
|
-
|
404
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
405
405
|
},
|
406
406
|
)
|
407
407
|
span.add_event(
|
408
|
-
name=
|
408
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
409
409
|
attributes={
|
410
|
-
|
410
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.text,
|
411
411
|
},
|
412
412
|
)
|
413
413
|
|
414
414
|
if isinstance(response.text, str):
|
415
|
-
span.set_attribute(
|
415
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
416
416
|
"text")
|
417
417
|
elif response.text is not None:
|
418
|
-
span.set_attribute(
|
418
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
419
419
|
"json")
|
420
420
|
|
421
421
|
span.set_status(Status(StatusCode.OK))
|
@@ -424,8 +424,8 @@ def async_send_message(version, environment, application_name, tracer,
|
|
424
424
|
attributes = create_metrics_attributes(
|
425
425
|
service_name=application_name,
|
426
426
|
deployment_environment=environment,
|
427
|
-
operation=
|
428
|
-
system=
|
427
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
428
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
429
429
|
request_model=request_model,
|
430
430
|
server_address=server_address,
|
431
431
|
server_port=server_port,
|
@@ -13,7 +13,7 @@ from openlit.__helpers import (
|
|
13
13
|
calculate_tbt,
|
14
14
|
create_metrics_attributes,
|
15
15
|
)
|
16
|
-
from openlit.semcov import
|
16
|
+
from openlit.semcov import SemanticConvention
|
17
17
|
|
18
18
|
# Initialize logger for logging potential issues and operations
|
19
19
|
logger = logging.getLogger(__name__)
|
@@ -139,26 +139,26 @@ def send_message(version, environment, application_name, tracer,
|
|
139
139
|
|
140
140
|
# Set Span attributes (OTel Semconv)
|
141
141
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
142
|
-
self._span.set_attribute(
|
143
|
-
|
144
|
-
self._span.set_attribute(
|
145
|
-
|
146
|
-
self._span.set_attribute(
|
142
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
143
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
144
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
145
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
|
146
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
147
147
|
self._request_model)
|
148
|
-
self._span.set_attribute(
|
148
|
+
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
149
149
|
self._server_port)
|
150
150
|
|
151
151
|
inference_config = self._kwargs.get('generation_config', {})
|
152
152
|
|
153
153
|
# List of attributes and their config keys
|
154
154
|
attributes = [
|
155
|
-
(
|
156
|
-
(
|
157
|
-
(
|
158
|
-
(
|
159
|
-
(
|
160
|
-
(
|
161
|
-
(
|
155
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
156
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
|
157
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
158
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
159
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
160
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
161
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
162
162
|
]
|
163
163
|
|
164
164
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -168,19 +168,19 @@ def send_message(version, environment, application_name, tracer,
|
|
168
168
|
if value is not None:
|
169
169
|
self._span.set_attribute(attribute, value)
|
170
170
|
|
171
|
-
self._span.set_attribute(
|
171
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
172
172
|
self._request_model)
|
173
|
-
self._span.set_attribute(
|
173
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
174
174
|
self._input_tokens)
|
175
|
-
self._span.set_attribute(
|
175
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
176
176
|
self._output_tokens)
|
177
|
-
self._span.set_attribute(
|
177
|
+
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
178
178
|
self._server_address)
|
179
179
|
if isinstance(self._llmresponse, str):
|
180
|
-
self._span.set_attribute(
|
180
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
181
181
|
"text")
|
182
182
|
else:
|
183
|
-
self._span.set_attribute(
|
183
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
184
184
|
"json")
|
185
185
|
|
186
186
|
# Set Span attributes (Extra)
|
@@ -188,29 +188,29 @@ def send_message(version, environment, application_name, tracer,
|
|
188
188
|
environment)
|
189
189
|
self._span.set_attribute(SERVICE_NAME,
|
190
190
|
application_name)
|
191
|
-
self._span.set_attribute(
|
191
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
192
192
|
True)
|
193
|
-
self._span.set_attribute(
|
193
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
194
194
|
self._input_tokens + self._output_tokens)
|
195
|
-
self._span.set_attribute(
|
195
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
196
196
|
cost)
|
197
|
-
self._span.set_attribute(
|
197
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
198
198
|
self._tbt)
|
199
|
-
self._span.set_attribute(
|
199
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
200
200
|
self._ttft)
|
201
|
-
self._span.set_attribute(
|
201
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
202
202
|
version)
|
203
203
|
if capture_message_content:
|
204
204
|
self._span.add_event(
|
205
|
-
name=
|
205
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
206
206
|
attributes={
|
207
|
-
|
207
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
208
208
|
},
|
209
209
|
)
|
210
210
|
self._span.add_event(
|
211
|
-
name=
|
211
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
212
212
|
attributes={
|
213
|
-
|
213
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
214
214
|
},
|
215
215
|
)
|
216
216
|
self._span.set_status(Status(StatusCode.OK))
|
@@ -219,8 +219,8 @@ def send_message(version, environment, application_name, tracer,
|
|
219
219
|
attributes = create_metrics_attributes(
|
220
220
|
service_name=application_name,
|
221
221
|
deployment_environment=environment,
|
222
|
-
operation=
|
223
|
-
system=
|
222
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
223
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
224
224
|
request_model=self._request_model,
|
225
225
|
server_address=self._server_address,
|
226
226
|
server_port=self._server_port,
|
@@ -280,7 +280,7 @@ def send_message(version, environment, application_name, tracer,
|
|
280
280
|
|
281
281
|
server_address, server_port = location + '-aiplatform.googleapis.com', 443
|
282
282
|
|
283
|
-
span_name = f"{
|
283
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
284
284
|
|
285
285
|
# pylint: disable=no-else-return
|
286
286
|
if streaming:
|
@@ -342,26 +342,26 @@ def send_message(version, environment, application_name, tracer,
|
|
342
342
|
|
343
343
|
# Set base span attribues (OTel Semconv)
|
344
344
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
345
|
-
span.set_attribute(
|
346
|
-
|
347
|
-
span.set_attribute(
|
348
|
-
|
349
|
-
span.set_attribute(
|
345
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
346
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
347
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
348
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
|
349
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
350
350
|
request_model)
|
351
|
-
span.set_attribute(
|
351
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
352
352
|
server_port)
|
353
353
|
|
354
354
|
inference_config = kwargs.get('generation_config', {})
|
355
355
|
|
356
356
|
# List of attributes and their config keys
|
357
357
|
attributes = [
|
358
|
-
(
|
359
|
-
(
|
360
|
-
(
|
361
|
-
(
|
362
|
-
(
|
363
|
-
(
|
364
|
-
(
|
358
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
359
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
|
360
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
361
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
362
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
363
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
364
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
365
365
|
]
|
366
366
|
|
367
367
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -371,15 +371,15 @@ def send_message(version, environment, application_name, tracer,
|
|
371
371
|
if value is not None:
|
372
372
|
span.set_attribute(attribute, value)
|
373
373
|
|
374
|
-
span.set_attribute(
|
374
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
375
375
|
request_model)
|
376
|
-
span.set_attribute(
|
376
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
377
377
|
input_tokens)
|
378
|
-
span.set_attribute(
|
378
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
379
379
|
output_tokens)
|
380
|
-
span.set_attribute(
|
380
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
381
381
|
server_address)
|
382
|
-
# span.set_attribute(
|
382
|
+
# span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
383
383
|
# [str(response.candidates[0].finish_reason)])
|
384
384
|
|
385
385
|
# Set base span attribues (Extras)
|
@@ -387,35 +387,35 @@ def send_message(version, environment, application_name, tracer,
|
|
387
387
|
environment)
|
388
388
|
span.set_attribute(SERVICE_NAME,
|
389
389
|
application_name)
|
390
|
-
span.set_attribute(
|
390
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
391
391
|
False)
|
392
|
-
span.set_attribute(
|
392
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
393
393
|
input_tokens + output_tokens)
|
394
|
-
span.set_attribute(
|
394
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
395
395
|
cost)
|
396
|
-
span.set_attribute(
|
396
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
397
397
|
end_time - start_time)
|
398
|
-
span.set_attribute(
|
398
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
399
399
|
version)
|
400
400
|
if capture_message_content:
|
401
401
|
span.add_event(
|
402
|
-
name=
|
402
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
403
403
|
attributes={
|
404
|
-
|
404
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
405
405
|
},
|
406
406
|
)
|
407
407
|
span.add_event(
|
408
|
-
name=
|
408
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
409
409
|
attributes={
|
410
|
-
|
410
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.text,
|
411
411
|
},
|
412
412
|
)
|
413
413
|
|
414
414
|
if isinstance(response.text, str):
|
415
|
-
span.set_attribute(
|
415
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
416
416
|
"text")
|
417
417
|
elif response.text is not None:
|
418
|
-
span.set_attribute(
|
418
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
419
419
|
"json")
|
420
420
|
|
421
421
|
span.set_status(Status(StatusCode.OK))
|
@@ -424,8 +424,8 @@ def send_message(version, environment, application_name, tracer,
|
|
424
424
|
attributes = create_metrics_attributes(
|
425
425
|
service_name=application_name,
|
426
426
|
deployment_environment=environment,
|
427
|
-
operation=
|
428
|
-
system=
|
427
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
428
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
429
429
|
request_model=request_model,
|
430
430
|
server_address=server_address,
|
431
431
|
server_port=server_port,
|
@@ -13,7 +13,7 @@ from openlit.__helpers import (
|
|
13
13
|
create_metrics_attributes,
|
14
14
|
set_server_address_and_port
|
15
15
|
)
|
16
|
-
from openlit.semcov import
|
16
|
+
from openlit.semcov import SemanticConvention
|
17
17
|
|
18
18
|
# Initialize logger for logging potential issues and operations
|
19
19
|
logger = logging.getLogger(__name__)
|
@@ -55,7 +55,7 @@ def generate(version, environment, application_name,
|
|
55
55
|
server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
|
56
56
|
request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
57
57
|
|
58
|
-
span_name = f"{
|
58
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
59
59
|
|
60
60
|
# pylint: disable=line-too-long
|
61
61
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
@@ -66,19 +66,19 @@ def generate(version, environment, application_name,
|
|
66
66
|
try:
|
67
67
|
# Set base span attribues
|
68
68
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
69
|
-
span.set_attribute(
|
70
|
-
|
71
|
-
span.set_attribute(
|
72
|
-
|
73
|
-
span.set_attribute(
|
69
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
70
|
+
SemanticConvention.GEN_AI_SYSTEM_VLLM)
|
71
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
72
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
73
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
74
74
|
server_port)
|
75
|
-
span.set_attribute(
|
75
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
76
76
|
request_model)
|
77
|
-
span.set_attribute(
|
77
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
78
78
|
request_model)
|
79
|
-
span.set_attribute(
|
79
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
80
80
|
server_address)
|
81
|
-
span.set_attribute(
|
81
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
82
82
|
"text")
|
83
83
|
|
84
84
|
# Set base span attribues (Extras)
|
@@ -86,11 +86,11 @@ def generate(version, environment, application_name,
|
|
86
86
|
environment)
|
87
87
|
span.set_attribute(SERVICE_NAME,
|
88
88
|
application_name)
|
89
|
-
span.set_attribute(
|
89
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
90
90
|
False)
|
91
|
-
span.set_attribute(
|
91
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
92
92
|
end_time - start_time)
|
93
|
-
span.set_attribute(
|
93
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
94
94
|
version)
|
95
95
|
|
96
96
|
input_tokens = 0
|
@@ -102,34 +102,34 @@ def generate(version, environment, application_name,
|
|
102
102
|
completion_attributes = {}
|
103
103
|
|
104
104
|
for i, output in enumerate(response):
|
105
|
-
prompt_attributes[f"{
|
106
|
-
completion_attributes[f"{
|
105
|
+
prompt_attributes[f"{SemanticConvention.GEN_AI_CONTENT_PROMPT}.{i}"] = output.prompt
|
106
|
+
completion_attributes[f"{SemanticConvention.GEN_AI_CONTENT_COMPLETION}.{i}"] = output.outputs[0].text
|
107
107
|
input_tokens += general_tokens(output.prompt)
|
108
108
|
output_tokens += general_tokens(output.outputs[0].text)
|
109
109
|
|
110
110
|
# Add a single event for all prompts
|
111
111
|
span.add_event(
|
112
|
-
name=
|
112
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
113
113
|
attributes=prompt_attributes,
|
114
114
|
)
|
115
115
|
|
116
116
|
# Add a single event for all completions
|
117
117
|
span.add_event(
|
118
|
-
name=
|
118
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
119
119
|
attributes=completion_attributes,
|
120
120
|
)
|
121
121
|
|
122
|
-
span.set_attribute(
|
122
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
123
123
|
input_tokens)
|
124
|
-
span.set_attribute(
|
124
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
125
125
|
output_tokens)
|
126
|
-
span.set_attribute(
|
126
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
127
127
|
input_tokens + output_tokens)
|
128
128
|
|
129
129
|
# Calculate cost of the operation
|
130
130
|
cost = get_chat_model_cost(request_model, pricing_info,
|
131
131
|
input_tokens, output_tokens)
|
132
|
-
span.set_attribute(
|
132
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
133
133
|
cost)
|
134
134
|
|
135
135
|
span.set_status(Status(StatusCode.OK))
|
@@ -138,8 +138,8 @@ def generate(version, environment, application_name,
|
|
138
138
|
attributes = create_metrics_attributes(
|
139
139
|
service_name=application_name,
|
140
140
|
deployment_environment=environment,
|
141
|
-
operation=
|
142
|
-
system=
|
141
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
142
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
143
143
|
request_model=request_model,
|
144
144
|
server_address=server_address,
|
145
145
|
server_port=server_port,
|