openlit 1.33.19__py3-none-any.whl → 1.33.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +64 -7
- openlit/__init__.py +3 -3
- openlit/evals/utils.py +7 -7
- openlit/guard/utils.py +7 -7
- openlit/instrumentation/ag2/ag2.py +24 -24
- openlit/instrumentation/ai21/ai21.py +3 -3
- openlit/instrumentation/ai21/async_ai21.py +3 -3
- openlit/instrumentation/ai21/utils.py +59 -59
- openlit/instrumentation/anthropic/anthropic.py +2 -2
- openlit/instrumentation/anthropic/async_anthropic.py +2 -2
- openlit/instrumentation/anthropic/utils.py +34 -34
- openlit/instrumentation/assemblyai/assemblyai.py +24 -24
- openlit/instrumentation/astra/astra.py +3 -3
- openlit/instrumentation/astra/async_astra.py +3 -3
- openlit/instrumentation/astra/utils.py +39 -39
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +2 -2
- openlit/instrumentation/azure_ai_inference/utils.py +36 -36
- openlit/instrumentation/bedrock/bedrock.py +2 -2
- openlit/instrumentation/bedrock/utils.py +35 -35
- openlit/instrumentation/chroma/chroma.py +57 -57
- openlit/instrumentation/cohere/async_cohere.py +88 -88
- openlit/instrumentation/cohere/cohere.py +88 -88
- openlit/instrumentation/controlflow/controlflow.py +15 -15
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +14 -14
- openlit/instrumentation/crawl4ai/crawl4ai.py +14 -14
- openlit/instrumentation/crewai/crewai.py +22 -22
- openlit/instrumentation/dynamiq/dynamiq.py +19 -19
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +24 -25
- openlit/instrumentation/elevenlabs/elevenlabs.py +23 -25
- openlit/instrumentation/embedchain/embedchain.py +15 -15
- openlit/instrumentation/firecrawl/firecrawl.py +10 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +33 -33
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +33 -33
- openlit/instrumentation/gpt4all/gpt4all.py +78 -78
- openlit/instrumentation/gpu/__init__.py +8 -8
- openlit/instrumentation/groq/async_groq.py +74 -74
- openlit/instrumentation/groq/groq.py +74 -74
- openlit/instrumentation/haystack/haystack.py +6 -6
- openlit/instrumentation/julep/async_julep.py +14 -14
- openlit/instrumentation/julep/julep.py +14 -14
- openlit/instrumentation/langchain/async_langchain.py +39 -39
- openlit/instrumentation/langchain/langchain.py +39 -39
- openlit/instrumentation/letta/letta.py +26 -26
- openlit/instrumentation/litellm/async_litellm.py +94 -94
- openlit/instrumentation/litellm/litellm.py +94 -94
- openlit/instrumentation/llamaindex/llamaindex.py +7 -7
- openlit/instrumentation/mem0/mem0.py +13 -13
- openlit/instrumentation/milvus/milvus.py +47 -47
- openlit/instrumentation/mistral/async_mistral.py +88 -88
- openlit/instrumentation/mistral/mistral.py +88 -88
- openlit/instrumentation/multion/async_multion.py +21 -21
- openlit/instrumentation/multion/multion.py +21 -21
- openlit/instrumentation/ollama/__init__.py +47 -34
- openlit/instrumentation/ollama/async_ollama.py +7 -5
- openlit/instrumentation/ollama/ollama.py +7 -5
- openlit/instrumentation/ollama/utils.py +58 -54
- openlit/instrumentation/openai/async_openai.py +225 -225
- openlit/instrumentation/openai/openai.py +225 -225
- openlit/instrumentation/openai_agents/openai_agents.py +11 -11
- openlit/instrumentation/phidata/phidata.py +15 -15
- openlit/instrumentation/pinecone/pinecone.py +43 -43
- openlit/instrumentation/premai/premai.py +86 -86
- openlit/instrumentation/qdrant/async_qdrant.py +95 -95
- openlit/instrumentation/qdrant/qdrant.py +99 -99
- openlit/instrumentation/reka/async_reka.py +33 -33
- openlit/instrumentation/reka/reka.py +33 -33
- openlit/instrumentation/together/async_together.py +90 -90
- openlit/instrumentation/together/together.py +90 -90
- openlit/instrumentation/transformers/__init__.py +11 -7
- openlit/instrumentation/transformers/transformers.py +32 -168
- openlit/instrumentation/transformers/utils.py +183 -0
- openlit/instrumentation/vertexai/async_vertexai.py +64 -64
- openlit/instrumentation/vertexai/vertexai.py +64 -64
- openlit/instrumentation/vllm/vllm.py +24 -24
- openlit/otel/metrics.py +11 -11
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/METADATA +8 -8
- openlit-1.33.21.dist-info/RECORD +132 -0
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/WHEEL +1 -1
- openlit-1.33.19.dist-info/RECORD +0 -131
- {openlit-1.33.19.dist-info → openlit-1.33.21.dist-info}/LICENSE +0 -0
@@ -0,0 +1,183 @@
|
|
1
|
+
"""
|
2
|
+
HF Transformers OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import Status, StatusCode
|
8
|
+
|
9
|
+
from openlit.__helpers import (
|
10
|
+
response_as_dict,
|
11
|
+
calculate_tbt,
|
12
|
+
general_tokens,
|
13
|
+
get_chat_model_cost,
|
14
|
+
create_metrics_attributes,
|
15
|
+
format_and_concatenate
|
16
|
+
)
|
17
|
+
from openlit.semcov import SemanticConvention
|
18
|
+
|
19
|
+
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
20
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream):
|
21
|
+
|
22
|
+
"""
|
23
|
+
Process chat request and generate Telemetry
|
24
|
+
"""
|
25
|
+
|
26
|
+
scope._end_time = time.time()
|
27
|
+
if len(scope._timestamps) > 1:
|
28
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
29
|
+
|
30
|
+
forward_params = scope._instance._forward_params
|
31
|
+
request_model = scope._instance.model.config.name_or_path
|
32
|
+
|
33
|
+
input_tokens = general_tokens(scope._prompt)
|
34
|
+
output_tokens = general_tokens(scope._llmresponse)
|
35
|
+
|
36
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
37
|
+
|
38
|
+
# Set Span attributes (OTel Semconv)
|
39
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
40
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
41
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
|
42
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
43
|
+
scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
|
44
|
+
|
45
|
+
# List of attributes and their config keys
|
46
|
+
attributes = [
|
47
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
|
48
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
|
49
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
|
50
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_length"),
|
51
|
+
]
|
52
|
+
|
53
|
+
# Set each attribute if the corresponding value exists and is not None
|
54
|
+
for attribute, key in attributes:
|
55
|
+
value = forward_params.get(key)
|
56
|
+
if value is not None:
|
57
|
+
scope._span.set_attribute(attribute, value)
|
58
|
+
|
59
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
|
60
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
61
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
62
|
+
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
|
63
|
+
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
64
|
+
scope._span.set_attribute(SERVICE_NAME, application_name)
|
65
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
66
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
67
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
68
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
|
69
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
70
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
71
|
+
|
72
|
+
# To be removed one the change to span_attributes (from span events) is complete
|
73
|
+
if capture_message_content:
|
74
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
|
75
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse,)
|
76
|
+
|
77
|
+
scope._span.add_event(
|
78
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
79
|
+
attributes={
|
80
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: scope._prompt,
|
81
|
+
},
|
82
|
+
)
|
83
|
+
scope._span.add_event(
|
84
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
85
|
+
attributes={
|
86
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
87
|
+
},
|
88
|
+
)
|
89
|
+
|
90
|
+
scope._span.set_status(Status(StatusCode.OK))
|
91
|
+
|
92
|
+
if not disable_metrics:
|
93
|
+
metrics_attributes = create_metrics_attributes(
|
94
|
+
service_name=application_name,
|
95
|
+
deployment_environment=environment,
|
96
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
97
|
+
system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
|
98
|
+
request_model=request_model,
|
99
|
+
server_address=scope._server_address,
|
100
|
+
server_port=scope._server_port,
|
101
|
+
response_model=request_model,
|
102
|
+
)
|
103
|
+
|
104
|
+
metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, metrics_attributes)
|
105
|
+
metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
|
106
|
+
metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
|
107
|
+
metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
|
108
|
+
metrics["genai_requests"].add(1, metrics_attributes)
|
109
|
+
metrics["genai_completion_tokens"].add(output_tokens, metrics_attributes)
|
110
|
+
metrics["genai_prompt_tokens"].add(input_tokens, metrics_attributes)
|
111
|
+
metrics["genai_cost"].record(cost, metrics_attributes)
|
112
|
+
|
113
|
+
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
114
|
+
environment, application_name, metrics, start_time,
|
115
|
+
span, args, kwargs, capture_message_content=False, disable_metrics=False, version="1.0.0"):
|
116
|
+
"""
|
117
|
+
Process chat request and generate Telemetry
|
118
|
+
"""
|
119
|
+
|
120
|
+
self = type("GenericScope", (), {})()
|
121
|
+
response_dict = response_as_dict(response)
|
122
|
+
|
123
|
+
# pylint: disable = no-member
|
124
|
+
self._instance = instance
|
125
|
+
self._start_time = start_time
|
126
|
+
self._end_time = time.time()
|
127
|
+
self._span = span
|
128
|
+
self._timestamps = []
|
129
|
+
self._ttft, self._tbt = self._end_time - self._start_time, 0
|
130
|
+
self._server_address, self._server_port = server_address, server_port
|
131
|
+
self._kwargs = kwargs
|
132
|
+
self._args = args
|
133
|
+
|
134
|
+
if self._args and len(self._args) > 0:
|
135
|
+
self._prompt = args[0]
|
136
|
+
else:
|
137
|
+
self._prompt = (
|
138
|
+
kwargs.get("text_inputs") or
|
139
|
+
(kwargs.get("image") and kwargs.get("question") and
|
140
|
+
("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
|
141
|
+
kwargs.get("fallback") or
|
142
|
+
""
|
143
|
+
)
|
144
|
+
self._prompt = format_and_concatenate(self._prompt)
|
145
|
+
|
146
|
+
self._llmresponse = []
|
147
|
+
if self._kwargs.get("task", "text-generation") == "text-generation":
|
148
|
+
first_entry = response_dict[0]
|
149
|
+
|
150
|
+
if isinstance(first_entry, dict) and isinstance(first_entry.get("generated_text"), list):
|
151
|
+
last_element = first_entry.get("generated_text")[-1]
|
152
|
+
self._llmresponse = last_element.get("content", last_element)
|
153
|
+
else:
|
154
|
+
def extract_text(entry):
|
155
|
+
if isinstance(entry, dict):
|
156
|
+
return entry.get("generated_text")
|
157
|
+
if isinstance(entry, list):
|
158
|
+
return " ".join(
|
159
|
+
extract_text(sub_entry) for sub_entry in entry if isinstance(sub_entry, dict)
|
160
|
+
)
|
161
|
+
return ""
|
162
|
+
|
163
|
+
# Process and collect all generated texts
|
164
|
+
self._llmresponse = [
|
165
|
+
extract_text(entry) for entry in response_dict
|
166
|
+
]
|
167
|
+
|
168
|
+
# Join all non-empty responses into a single string
|
169
|
+
self._llmresponse = " ".join(filter(None, self._llmresponse))
|
170
|
+
|
171
|
+
elif self._kwargs.get("task", "text-generation") == "automatic-speech-recognition":
|
172
|
+
self._llmresponse = response_dict.get("text", "")
|
173
|
+
|
174
|
+
elif self._kwargs.get("task", "text-generation") == "image-classification":
|
175
|
+
self._llmresponse = str(response_dict[0])
|
176
|
+
|
177
|
+
elif self._kwargs.get("task", "text-generation") == "visual-question-answering":
|
178
|
+
self._llmresponse = str(response_dict[0]).get("answer")
|
179
|
+
|
180
|
+
common_chat_logic(self, pricing_info, environment, application_name, metrics,
|
181
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
|
182
|
+
|
183
|
+
return response
|
@@ -13,7 +13,7 @@ from openlit.__helpers import (
|
|
13
13
|
calculate_tbt,
|
14
14
|
create_metrics_attributes,
|
15
15
|
)
|
16
|
-
from openlit.semcov import
|
16
|
+
from openlit.semcov import SemanticConvention
|
17
17
|
|
18
18
|
# Initialize logger for logging potential issues and operations
|
19
19
|
logger = logging.getLogger(__name__)
|
@@ -139,26 +139,26 @@ def async_send_message(version, environment, application_name, tracer,
|
|
139
139
|
|
140
140
|
# Set Span attributes (OTel Semconv)
|
141
141
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
142
|
-
self._span.set_attribute(
|
143
|
-
|
144
|
-
self._span.set_attribute(
|
145
|
-
|
146
|
-
self._span.set_attribute(
|
142
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
143
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
144
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
145
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
|
146
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
147
147
|
self._request_model)
|
148
|
-
self._span.set_attribute(
|
148
|
+
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
149
149
|
self._server_port)
|
150
150
|
|
151
151
|
inference_config = self._kwargs.get('generation_config', {})
|
152
152
|
|
153
153
|
# List of attributes and their config keys
|
154
154
|
attributes = [
|
155
|
-
(
|
156
|
-
(
|
157
|
-
(
|
158
|
-
(
|
159
|
-
(
|
160
|
-
(
|
161
|
-
(
|
155
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
156
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
|
157
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
158
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
159
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
160
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
161
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
162
162
|
]
|
163
163
|
|
164
164
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -168,19 +168,19 @@ def async_send_message(version, environment, application_name, tracer,
|
|
168
168
|
if value is not None:
|
169
169
|
self._span.set_attribute(attribute, value)
|
170
170
|
|
171
|
-
self._span.set_attribute(
|
171
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
172
172
|
self._request_model)
|
173
|
-
self._span.set_attribute(
|
173
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
174
174
|
self._input_tokens)
|
175
|
-
self._span.set_attribute(
|
175
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
176
176
|
self._output_tokens)
|
177
|
-
self._span.set_attribute(
|
177
|
+
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
178
178
|
self._server_address)
|
179
179
|
if isinstance(self._llmresponse, str):
|
180
|
-
self._span.set_attribute(
|
180
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
181
181
|
"text")
|
182
182
|
else:
|
183
|
-
self._span.set_attribute(
|
183
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
184
184
|
"json")
|
185
185
|
|
186
186
|
# Set Span attributes (Extra)
|
@@ -188,29 +188,29 @@ def async_send_message(version, environment, application_name, tracer,
|
|
188
188
|
environment)
|
189
189
|
self._span.set_attribute(SERVICE_NAME,
|
190
190
|
application_name)
|
191
|
-
self._span.set_attribute(
|
191
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
192
192
|
True)
|
193
|
-
self._span.set_attribute(
|
193
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
194
194
|
self._input_tokens + self._output_tokens)
|
195
|
-
self._span.set_attribute(
|
195
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
196
196
|
cost)
|
197
|
-
self._span.set_attribute(
|
197
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
198
198
|
self._tbt)
|
199
|
-
self._span.set_attribute(
|
199
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
200
200
|
self._ttft)
|
201
|
-
self._span.set_attribute(
|
201
|
+
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
202
202
|
version)
|
203
203
|
if capture_message_content:
|
204
204
|
self._span.add_event(
|
205
|
-
name=
|
205
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
206
206
|
attributes={
|
207
|
-
|
207
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
208
208
|
},
|
209
209
|
)
|
210
210
|
self._span.add_event(
|
211
|
-
name=
|
211
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
212
212
|
attributes={
|
213
|
-
|
213
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
214
214
|
},
|
215
215
|
)
|
216
216
|
self._span.set_status(Status(StatusCode.OK))
|
@@ -219,8 +219,8 @@ def async_send_message(version, environment, application_name, tracer,
|
|
219
219
|
attributes = create_metrics_attributes(
|
220
220
|
service_name=application_name,
|
221
221
|
deployment_environment=environment,
|
222
|
-
operation=
|
223
|
-
system=
|
222
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
223
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
224
224
|
request_model=self._request_model,
|
225
225
|
server_address=self._server_address,
|
226
226
|
server_port=self._server_port,
|
@@ -280,7 +280,7 @@ def async_send_message(version, environment, application_name, tracer,
|
|
280
280
|
|
281
281
|
server_address, server_port = location + '-aiplatform.googleapis.com', 443
|
282
282
|
|
283
|
-
span_name = f"{
|
283
|
+
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
284
284
|
|
285
285
|
# pylint: disable=no-else-return
|
286
286
|
if streaming:
|
@@ -342,26 +342,26 @@ def async_send_message(version, environment, application_name, tracer,
|
|
342
342
|
|
343
343
|
# Set base span attribues (OTel Semconv)
|
344
344
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
345
|
-
span.set_attribute(
|
346
|
-
|
347
|
-
span.set_attribute(
|
348
|
-
|
349
|
-
span.set_attribute(
|
345
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
346
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
347
|
+
span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
348
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
|
349
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
350
350
|
request_model)
|
351
|
-
span.set_attribute(
|
351
|
+
span.set_attribute(SemanticConvention.SERVER_PORT,
|
352
352
|
server_port)
|
353
353
|
|
354
354
|
inference_config = kwargs.get('generation_config', {})
|
355
355
|
|
356
356
|
# List of attributes and their config keys
|
357
357
|
attributes = [
|
358
|
-
(
|
359
|
-
(
|
360
|
-
(
|
361
|
-
(
|
362
|
-
(
|
363
|
-
(
|
364
|
-
(
|
358
|
+
(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
|
359
|
+
(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
|
360
|
+
(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
|
361
|
+
(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
|
362
|
+
(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
|
363
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
|
364
|
+
(SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
|
365
365
|
]
|
366
366
|
|
367
367
|
# Set each attribute if the corresponding value exists and is not None
|
@@ -371,15 +371,15 @@ def async_send_message(version, environment, application_name, tracer,
|
|
371
371
|
if value is not None:
|
372
372
|
span.set_attribute(attribute, value)
|
373
373
|
|
374
|
-
span.set_attribute(
|
374
|
+
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
375
375
|
request_model)
|
376
|
-
span.set_attribute(
|
376
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
377
377
|
input_tokens)
|
378
|
-
span.set_attribute(
|
378
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
379
379
|
output_tokens)
|
380
|
-
span.set_attribute(
|
380
|
+
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
381
381
|
server_address)
|
382
|
-
# span.set_attribute(
|
382
|
+
# span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
|
383
383
|
# [str(response.candidates[0].finish_reason)])
|
384
384
|
|
385
385
|
# Set base span attribues (Extras)
|
@@ -387,35 +387,35 @@ def async_send_message(version, environment, application_name, tracer,
|
|
387
387
|
environment)
|
388
388
|
span.set_attribute(SERVICE_NAME,
|
389
389
|
application_name)
|
390
|
-
span.set_attribute(
|
390
|
+
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
391
391
|
False)
|
392
|
-
span.set_attribute(
|
392
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
393
393
|
input_tokens + output_tokens)
|
394
|
-
span.set_attribute(
|
394
|
+
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
395
395
|
cost)
|
396
|
-
span.set_attribute(
|
396
|
+
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
397
397
|
end_time - start_time)
|
398
|
-
span.set_attribute(
|
398
|
+
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
399
399
|
version)
|
400
400
|
if capture_message_content:
|
401
401
|
span.add_event(
|
402
|
-
name=
|
402
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
403
403
|
attributes={
|
404
|
-
|
404
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
405
405
|
},
|
406
406
|
)
|
407
407
|
span.add_event(
|
408
|
-
name=
|
408
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
409
409
|
attributes={
|
410
|
-
|
410
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.text,
|
411
411
|
},
|
412
412
|
)
|
413
413
|
|
414
414
|
if isinstance(response.text, str):
|
415
|
-
span.set_attribute(
|
415
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
416
416
|
"text")
|
417
417
|
elif response.text is not None:
|
418
|
-
span.set_attribute(
|
418
|
+
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
419
419
|
"json")
|
420
420
|
|
421
421
|
span.set_status(Status(StatusCode.OK))
|
@@ -424,8 +424,8 @@ def async_send_message(version, environment, application_name, tracer,
|
|
424
424
|
attributes = create_metrics_attributes(
|
425
425
|
service_name=application_name,
|
426
426
|
deployment_environment=environment,
|
427
|
-
operation=
|
428
|
-
system=
|
427
|
+
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
428
|
+
system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
429
429
|
request_model=request_model,
|
430
430
|
server_address=server_address,
|
431
431
|
server_port=server_port,
|