openlit 1.33.19__py3-none-any.whl → 1.33.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +64 -7
- openlit/__init__.py +3 -3
- openlit/evals/utils.py +7 -7
- openlit/guard/utils.py +7 -7
- openlit/instrumentation/ag2/ag2.py +24 -24
- openlit/instrumentation/ai21/ai21.py +3 -3
- openlit/instrumentation/ai21/async_ai21.py +3 -3
- openlit/instrumentation/ai21/utils.py +59 -59
- openlit/instrumentation/anthropic/anthropic.py +2 -2
- openlit/instrumentation/anthropic/async_anthropic.py +2 -2
- openlit/instrumentation/anthropic/utils.py +34 -34
- openlit/instrumentation/assemblyai/assemblyai.py +24 -24
- openlit/instrumentation/astra/astra.py +3 -3
- openlit/instrumentation/astra/async_astra.py +3 -3
- openlit/instrumentation/astra/utils.py +39 -39
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/utils.py +36 -36
- openlit/instrumentation/bedrock/bedrock.py +2 -2
- openlit/instrumentation/bedrock/utils.py +35 -35
- openlit/instrumentation/chroma/chroma.py +57 -57
- openlit/instrumentation/cohere/async_cohere.py +88 -88
- openlit/instrumentation/cohere/cohere.py +88 -88
- openlit/instrumentation/controlflow/controlflow.py +15 -15
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +14 -14
- openlit/instrumentation/crawl4ai/crawl4ai.py +14 -14
- openlit/instrumentation/crewai/crewai.py +22 -22
- openlit/instrumentation/dynamiq/dynamiq.py +19 -19
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +24 -25
- openlit/instrumentation/elevenlabs/elevenlabs.py +23 -25
- openlit/instrumentation/embedchain/embedchain.py +15 -15
- openlit/instrumentation/firecrawl/firecrawl.py +10 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +33 -33
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +33 -33
- openlit/instrumentation/gpt4all/gpt4all.py +78 -78
- openlit/instrumentation/gpu/__init__.py +8 -8
- openlit/instrumentation/groq/async_groq.py +74 -74
- openlit/instrumentation/groq/groq.py +74 -74
- openlit/instrumentation/haystack/haystack.py +6 -6
- openlit/instrumentation/julep/async_julep.py +14 -14
- openlit/instrumentation/julep/julep.py +14 -14
- openlit/instrumentation/langchain/async_langchain.py +39 -39
- openlit/instrumentation/langchain/langchain.py +39 -39
- openlit/instrumentation/letta/letta.py +26 -26
- openlit/instrumentation/litellm/async_litellm.py +94 -94
- openlit/instrumentation/litellm/litellm.py +94 -94
- openlit/instrumentation/llamaindex/llamaindex.py +7 -7
- openlit/instrumentation/mem0/mem0.py +13 -13
- openlit/instrumentation/milvus/milvus.py +47 -47
- openlit/instrumentation/mistral/async_mistral.py +88 -88
- openlit/instrumentation/mistral/mistral.py +88 -88
- openlit/instrumentation/multion/async_multion.py +21 -21
- openlit/instrumentation/multion/multion.py +21 -21
- openlit/instrumentation/ollama/__init__.py +47 -34
- openlit/instrumentation/ollama/async_ollama.py +7 -5
- openlit/instrumentation/ollama/ollama.py +7 -5
- openlit/instrumentation/ollama/utils.py +58 -54
- openlit/instrumentation/openai/async_openai.py +225 -225
- openlit/instrumentation/openai/openai.py +225 -225
- openlit/instrumentation/openai_agents/openai_agents.py +11 -11
- openlit/instrumentation/phidata/phidata.py +15 -15
- openlit/instrumentation/pinecone/pinecone.py +43 -43
- openlit/instrumentation/premai/premai.py +86 -86
- openlit/instrumentation/qdrant/async_qdrant.py +95 -95
- openlit/instrumentation/qdrant/qdrant.py +99 -99
- openlit/instrumentation/reka/async_reka.py +33 -33
- openlit/instrumentation/reka/reka.py +33 -33
- openlit/instrumentation/together/async_together.py +90 -90
- openlit/instrumentation/together/together.py +90 -90
- openlit/instrumentation/transformers/__init__.py +11 -7
- openlit/instrumentation/transformers/transformers.py +32 -168
- openlit/instrumentation/transformers/utils.py +183 -0
- openlit/instrumentation/vertexai/async_vertexai.py +64 -64
- openlit/instrumentation/vertexai/vertexai.py +64 -64
- openlit/instrumentation/vllm/vllm.py +24 -24
- openlit/otel/metrics.py +11 -11
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/METADATA +8 -8
- openlit-1.33.21.dist-info/RECORD +132 -0
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/WHEEL +1 -1
- openlit-1.33.19.dist-info/RECORD +0 -131
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/LICENSE +0 -0
@@ -13,7 +13,7 @@ from openlit.__helpers import (
|
|
13
13
|
create_metrics_attributes,
|
14
14
|
set_server_address_and_port
|
15
15
|
)
|
16
|
-
from openlit.semcov import
|
16
|
+
from openlit.semcov import SemanticConvention
|
17
17
|
|
18
18
|
# Initialize logger for logging potential issues and operations
|
19
19
|
logger = logging.getLogger(__name__)
|
@@ -56,7 +56,7 @@ def generate(version, environment, application_name,
|
|
56
56
|
server_address, server_port = set_server_address_and_port(instance, "generativelanguage.googleapis.com", 443)
|
57
57
|
request_model = kwargs.get("model", "gemini-2.0-flash")
|
58
58
|
|
59
|
-
span_name = f"{
|
59
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
60
60
|
|
61
61
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
62
62
|
start_time = time.time()
|
@@ -110,26 +110,26 @@ def generate(version, environment, application_name,
|
|
110
110
|
|
111
111
|
# Set base span attribues (OTel Semconv)
|
112
112
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
113
|
-
span.set_attribute(
|
114
|
-
|
115
|
-
span.set_attribute(
|
116
|
-
|
117
|
-
span.set_attribute(
|
113
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
114
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
115
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
116
|
+
SemanticConvention.GEN_AI_SYSTEM_GEMINI)
|
117
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
118
118
|
request_model)
|
119
|
-
span.set_attribute(
|
119
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
120
120
|
server_port)
|
121
121
|
|
122
122
|
inference_config = kwargs.get('config', {})
|
123
123
|
|
124
124
|
# List of attributes and their config keys
|
125
125
|
attributes = [
|
126
|
-
(
|
127
|
-
(
|
128
|
-
(
|
129
|
-
(
|
130
|
-
(
|
131
|
-
(
|
132
|
-
(
|
126
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
127
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
|
128
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
129
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
130
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
131
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
132
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
133
133
|
]
|
134
134
|
|
135
135
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -139,15 +139,15 @@ def generate(version, environment, application_name,
|
|
139
139
|
if value is not None:
|
140
140
|
span.set_attribute(attribute, value)
|
141
141
|
|
142
|
-
span.set_attribute(
|
142
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
143
143
|
response_dict.get('model_version'))
|
144
|
-
span.set_attribute(
|
144
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
145
145
|
input_tokens)
|
146
|
-
span.set_attribute(
|
146
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
147
147
|
output_tokens)
|
148
|
-
span.set_attribute(
|
148
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
149
149
|
server_address)
|
150
|
-
span.set_attribute(
|
150
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
151
151
|
[str(response_dict.get('candidates')[0].get('finish_reason'))])
|
152
152
|
|
153
153
|
# Set base span attribues (Extras)
|
@@ -155,35 +155,35 @@ def generate(version, environment, application_name,
|
|
155
155
|
environment)
|
156
156
|
span.set_attribute(SERVICE_NAME,
|
157
157
|
application_name)
|
158
|
-
span.set_attribute(
|
158
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
159
159
|
False)
|
160
|
-
span.set_attribute(
|
160
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
161
161
|
input_tokens + output_tokens)
|
162
|
-
span.set_attribute(
|
162
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
163
163
|
cost)
|
164
|
-
span.set_attribute(
|
164
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
165
165
|
end_time - start_time)
|
166
|
-
span.set_attribute(
|
166
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
167
167
|
version)
|
168
168
|
if capture_message_content:
|
169
169
|
span.add_event(
|
170
|
-
name=
|
170
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
171
171
|
attributes={
|
172
|
-
|
172
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
173
173
|
},
|
174
174
|
)
|
175
175
|
span.add_event(
|
176
|
-
name=
|
176
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
177
177
|
attributes={
|
178
|
-
|
178
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.text,
|
179
179
|
},
|
180
180
|
)
|
181
181
|
|
182
182
|
if isinstance(response_dict.get('text'), str):
|
183
|
-
span.set_attribute(
|
183
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
184
184
|
"text")
|
185
185
|
elif response_dict.get('text') is not None:
|
186
|
-
span.set_attribute(
|
186
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
187
187
|
"json")
|
188
188
|
|
189
189
|
span.set_status(Status(StatusCode.OK))
|
@@ -192,8 +192,8 @@ def generate(version, environment, application_name,
|
|
192
192
|
attributes = create_metrics_attributes(
|
193
193
|
service_name=application_name,
|
194
194
|
deployment_environment=environment,
|
195
|
-
operation=
|
196
|
-
system=
|
195
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
196
|
+
system=SemanticConvention.GEN_AI_SYSTEM_GEMINI,
|
197
197
|
request_model=request_model,
|
198
198
|
server_address=server_address,
|
199
199
|
server_port=server_port,
|
@@ -14,7 +14,7 @@ from openlit.__helpers import (
|
|
14
14
|
calculate_tbt,
|
15
15
|
calculate_ttft
|
16
16
|
)
|
17
|
-
from openlit.semcov import
|
17
|
+
from openlit.semcov import SemanticConvention
|
18
18
|
|
19
19
|
# Initialize logger for logging potential issues and operations
|
20
20
|
logger = logging.getLogger(__name__)
|
@@ -116,39 +116,39 @@ def generate(version, environment, application_name,
|
|
116
116
|
|
117
117
|
# Set Span attributes (OTel Semconv)
|
118
118
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
119
|
-
self._span.set_attribute(
|
120
|
-
|
121
|
-
self._span.set_attribute(
|
122
|
-
|
123
|
-
self._span.set_attribute(
|
119
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
120
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
121
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
122
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
|
123
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
124
124
|
self._request_model)
|
125
|
-
self._span.set_attribute(
|
125
|
+
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
126
126
|
self._server_port)
|
127
|
-
self._span.set_attribute(
|
127
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
128
128
|
self._kwargs.get("repeat_penalty", 1.18))
|
129
|
-
self._span.set_attribute(
|
129
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
130
130
|
self._kwargs.get("max_tokens", 200))
|
131
|
-
self._span.set_attribute(
|
131
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
132
132
|
self._kwargs.get("presence_penalty", 0.0))
|
133
|
-
self._span.set_attribute(
|
133
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
134
134
|
self._kwargs.get("temp", 0.7))
|
135
|
-
self._span.set_attribute(
|
135
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
136
136
|
self._kwargs.get("top_p", 0.4))
|
137
|
-
self._span.set_attribute(
|
137
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
138
138
|
self._kwargs.get("top_k", 40))
|
139
|
-
self._span.set_attribute(
|
139
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
140
140
|
self._request_model)
|
141
|
-
self._span.set_attribute(
|
141
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
142
142
|
input_tokens)
|
143
|
-
self._span.set_attribute(
|
143
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
144
144
|
output_tokens)
|
145
|
-
self._span.set_attribute(
|
145
|
+
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
146
146
|
self._server_address)
|
147
147
|
if isinstance(self._llmresponse, str):
|
148
|
-
self._span.set_attribute(
|
148
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
149
149
|
"text")
|
150
150
|
else:
|
151
|
-
self._span.set_attribute(
|
151
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
152
152
|
"json")
|
153
153
|
|
154
154
|
# Set Span attributes (Extra)
|
@@ -156,29 +156,29 @@ def generate(version, environment, application_name,
|
|
156
156
|
environment)
|
157
157
|
self._span.set_attribute(SERVICE_NAME,
|
158
158
|
application_name)
|
159
|
-
self._span.set_attribute(
|
159
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
160
160
|
True)
|
161
|
-
self._span.set_attribute(
|
161
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
162
162
|
input_tokens + output_tokens)
|
163
|
-
self._span.set_attribute(
|
163
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
164
164
|
self._tbt)
|
165
|
-
self._span.set_attribute(
|
165
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
166
166
|
self._ttft)
|
167
|
-
self._span.set_attribute(
|
167
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
168
168
|
version)
|
169
|
-
self._span.set_attribute(
|
169
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
170
170
|
0)
|
171
171
|
if capture_message_content:
|
172
172
|
self._span.add_event(
|
173
|
-
name=
|
173
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
174
174
|
attributes={
|
175
|
-
|
175
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
176
176
|
},
|
177
177
|
)
|
178
178
|
self._span.add_event(
|
179
|
-
name=
|
179
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
180
180
|
attributes={
|
181
|
-
|
181
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
182
182
|
},
|
183
183
|
)
|
184
184
|
|
@@ -188,8 +188,8 @@ def generate(version, environment, application_name,
|
|
188
188
|
attributes = create_metrics_attributes(
|
189
189
|
service_name=application_name,
|
190
190
|
deployment_environment=environment,
|
191
|
-
operation=
|
192
|
-
system=
|
191
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
192
|
+
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
193
193
|
request_model=self._request_model,
|
194
194
|
server_address=self._server_address,
|
195
195
|
server_port=self._server_port,
|
@@ -243,7 +243,7 @@ def generate(version, environment, application_name,
|
|
243
243
|
server_address, server_port = set_server_address_and_port(instance, "localhost", 80)
|
244
244
|
request_model = str(instance.model.model_path).rsplit('/', maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
|
245
245
|
|
246
|
-
span_name = f"{
|
246
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
247
247
|
|
248
248
|
# pylint: disable=no-else-return
|
249
249
|
if streaming:
|
@@ -269,39 +269,39 @@ def generate(version, environment, application_name,
|
|
269
269
|
|
270
270
|
# Set Span attributes (OTel Semconv)
|
271
271
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
272
|
-
span.set_attribute(
|
273
|
-
|
274
|
-
span.set_attribute(
|
275
|
-
|
276
|
-
span.set_attribute(
|
272
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
273
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
274
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
275
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
|
276
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
277
277
|
request_model)
|
278
|
-
span.set_attribute(
|
278
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
279
279
|
server_port)
|
280
|
-
span.set_attribute(
|
280
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
281
281
|
kwargs.get("repeat_penalty", 1.18))
|
282
|
-
span.set_attribute(
|
282
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
283
283
|
kwargs.get("max_tokens", 200))
|
284
|
-
span.set_attribute(
|
284
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
285
285
|
kwargs.get("presence_penalty", 0.0))
|
286
|
-
span.set_attribute(
|
286
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
287
287
|
kwargs.get("temp", 0.7))
|
288
|
-
span.set_attribute(
|
288
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
289
289
|
kwargs.get("top_p", 0.4))
|
290
|
-
span.set_attribute(
|
290
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
291
291
|
kwargs.get("top_k", 40))
|
292
|
-
span.set_attribute(
|
292
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
293
293
|
request_model)
|
294
|
-
span.set_attribute(
|
294
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
295
295
|
input_tokens)
|
296
|
-
span.set_attribute(
|
296
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
297
297
|
output_tokens)
|
298
|
-
span.set_attribute(
|
298
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
299
299
|
server_address)
|
300
300
|
if isinstance(response, str):
|
301
|
-
span.set_attribute(
|
301
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
302
302
|
"text")
|
303
303
|
else:
|
304
|
-
span.set_attribute(
|
304
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
305
305
|
"json")
|
306
306
|
|
307
307
|
# Set Span attributes (Extra)
|
@@ -309,27 +309,27 @@ def generate(version, environment, application_name,
|
|
309
309
|
environment)
|
310
310
|
span.set_attribute(SERVICE_NAME,
|
311
311
|
application_name)
|
312
|
-
span.set_attribute(
|
312
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
313
313
|
False)
|
314
|
-
span.set_attribute(
|
314
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
315
315
|
input_tokens + output_tokens)
|
316
|
-
span.set_attribute(
|
316
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
317
317
|
end_time - start_time)
|
318
|
-
span.set_attribute(
|
318
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
319
319
|
version)
|
320
|
-
span.set_attribute(
|
320
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
321
321
|
0)
|
322
322
|
if capture_message_content:
|
323
323
|
span.add_event(
|
324
|
-
name=
|
324
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
325
325
|
attributes={
|
326
|
-
|
326
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
327
327
|
},
|
328
328
|
)
|
329
329
|
span.add_event(
|
330
|
-
name=
|
330
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
331
331
|
attributes={
|
332
|
-
|
332
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response,
|
333
333
|
},
|
334
334
|
)
|
335
335
|
|
@@ -339,8 +339,8 @@ def generate(version, environment, application_name,
|
|
339
339
|
attributes = create_metrics_attributes(
|
340
340
|
service_name=application_name,
|
341
341
|
deployment_environment=environment,
|
342
|
-
operation=
|
343
|
-
system=
|
342
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
343
|
+
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
344
344
|
request_model=request_model,
|
345
345
|
server_address=server_address,
|
346
346
|
server_port=server_port,
|
@@ -412,7 +412,7 @@ def embed(version, environment, application_name,
|
|
412
412
|
# pylint: disable=line-too-long
|
413
413
|
request_model = str(instance.gpt4all.model.model_path).rsplit('/', maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
414
414
|
|
415
|
-
span_name = f"{
|
415
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
416
416
|
|
417
417
|
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
418
418
|
start_time = time.time()
|
@@ -425,19 +425,19 @@ def embed(version, environment, application_name,
|
|
425
425
|
|
426
426
|
# Set Span attributes (OTel Semconv)
|
427
427
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
428
|
-
span.set_attribute(
|
429
|
-
|
430
|
-
span.set_attribute(
|
431
|
-
|
432
|
-
span.set_attribute(
|
428
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
429
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
430
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
431
|
+
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
|
432
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
433
433
|
request_model)
|
434
|
-
span.set_attribute(
|
434
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
435
435
|
request_model)
|
436
|
-
span.set_attribute(
|
436
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
437
437
|
server_address)
|
438
|
-
span.set_attribute(
|
438
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
439
439
|
server_port)
|
440
|
-
span.set_attribute(
|
440
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
441
441
|
input_tokens)
|
442
442
|
|
443
443
|
# Set Span attributes (Extras)
|
@@ -445,18 +445,18 @@ def embed(version, environment, application_name,
|
|
445
445
|
environment)
|
446
446
|
span.set_attribute(SERVICE_NAME,
|
447
447
|
application_name)
|
448
|
-
span.set_attribute(
|
448
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
449
449
|
input_tokens)
|
450
|
-
span.set_attribute(
|
450
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
451
451
|
version)
|
452
|
-
span.set_attribute(
|
452
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
453
453
|
0)
|
454
454
|
|
455
455
|
if capture_message_content:
|
456
456
|
span.add_event(
|
457
|
-
name=
|
457
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
458
458
|
attributes={
|
459
|
-
|
459
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
|
460
460
|
},
|
461
461
|
)
|
462
462
|
|
@@ -466,8 +466,8 @@ def embed(version, environment, application_name,
|
|
466
466
|
attributes = create_metrics_attributes(
|
467
467
|
service_name=application_name,
|
468
468
|
deployment_environment=environment,
|
469
|
-
operation=
|
470
|
-
system=
|
469
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
470
|
+
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
471
471
|
request_model=request_model,
|
472
472
|
server_address=server_address,
|
473
473
|
server_port=server_port,
|
@@ -7,7 +7,7 @@ from functools import partial
|
|
7
7
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
8
8
|
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from opentelemetry.metrics import get_meter, CallbackOptions, Observation
|
10
|
-
from openlit.semcov import
|
10
|
+
from openlit.semcov import SemanticConvention
|
11
11
|
|
12
12
|
# Initialize logger for logging potential issues and operations
|
13
13
|
logger = logging.getLogger(__name__)
|
@@ -54,7 +54,7 @@ class GPUInstrumentor(BaseInstrumentor):
|
|
54
54
|
|
55
55
|
for semantic_name, internal_name in metric_names:
|
56
56
|
meter.create_observable_gauge(
|
57
|
-
name=getattr(
|
57
|
+
name=getattr(SemanticConvention, semantic_name),
|
58
58
|
callbacks=[partial(self._collect_metric,
|
59
59
|
environment, application_name, internal_name)],
|
60
60
|
description=f"GPU {internal_name.replace('_', ' ').title()}",
|
@@ -141,9 +141,9 @@ class GPUInstrumentor(BaseInstrumentor):
|
|
141
141
|
TELEMETRY_SDK_NAME: "openlit",
|
142
142
|
SERVICE_NAME: application_name,
|
143
143
|
DEPLOYMENT_ENVIRONMENT: environment,
|
144
|
-
|
145
|
-
|
146
|
-
|
144
|
+
SemanticConvention.GPU_INDEX: str(gpu_index),
|
145
|
+
SemanticConvention.GPU_UUID: safe_decode(pynvml.nvmlDeviceGetUUID(handle)),
|
146
|
+
SemanticConvention.GPU_NAME: safe_decode(pynvml.nvmlDeviceGetName(handle))
|
147
147
|
}
|
148
148
|
yield Observation(get_metric_value(handle, metric_name), attributes)
|
149
149
|
|
@@ -202,10 +202,10 @@ class GPUInstrumentor(BaseInstrumentor):
|
|
202
202
|
SERVICE_NAME: application_name,
|
203
203
|
DEPLOYMENT_ENVIRONMENT: environment,
|
204
204
|
# pylint: disable=line-too-long
|
205
|
-
|
205
|
+
SemanticConvention.GPU_INDEX: amdsmi.amdsmi_get_xgmi_info(device_handle)['index'],
|
206
206
|
# pylint: disable=line-too-long
|
207
|
-
|
208
|
-
|
207
|
+
SemanticConvention.GPU_UUID: amdsmi.amdsmi_get_gpu_asic_info(device_handle)['market_name'],
|
208
|
+
SemanticConvention.GPU_NAME: amdsmi.amdsmi_get_device_name(device_handle)
|
209
209
|
}
|
210
210
|
yield Observation(get_metric_value(device_handle, metric_name), attributes)
|
211
211
|
|