openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +5 -0
- openlit/__init__.py +3 -2
- openlit/instrumentation/ag2/ag2.py +3 -3
- openlit/instrumentation/ai21/ai21.py +1 -1
- openlit/instrumentation/ai21/async_ai21.py +1 -1
- openlit/instrumentation/anthropic/anthropic.py +1 -1
- openlit/instrumentation/anthropic/async_anthropic.py +1 -1
- openlit/instrumentation/astra/astra.py +5 -5
- openlit/instrumentation/astra/async_astra.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
- openlit/instrumentation/chroma/chroma.py +5 -5
- openlit/instrumentation/cohere/async_cohere.py +1 -1
- openlit/instrumentation/cohere/cohere.py +2 -2
- openlit/instrumentation/controlflow/controlflow.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
- openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
- openlit/instrumentation/crewai/crewai.py +4 -2
- openlit/instrumentation/dynamiq/dynamiq.py +3 -3
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
- openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
- openlit/instrumentation/embedchain/embedchain.py +5 -5
- openlit/instrumentation/firecrawl/firecrawl.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +3 -3
- openlit/instrumentation/julep/async_julep.py +3 -3
- openlit/instrumentation/julep/julep.py +3 -3
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +98 -490
- openlit/instrumentation/letta/letta.py +5 -3
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +316 -245
- openlit/instrumentation/litellm/litellm.py +312 -241
- openlit/instrumentation/llamaindex/llamaindex.py +3 -3
- openlit/instrumentation/mem0/mem0.py +3 -3
- openlit/instrumentation/milvus/milvus.py +5 -5
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +421 -248
- openlit/instrumentation/mistral/mistral.py +418 -244
- openlit/instrumentation/multion/async_multion.py +4 -2
- openlit/instrumentation/multion/multion.py +4 -2
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/async_openai.py +7 -9
- openlit/instrumentation/openai/openai.py +7 -9
- openlit/instrumentation/phidata/phidata.py +4 -2
- openlit/instrumentation/pinecone/pinecone.py +5 -5
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +5 -5
- openlit/instrumentation/qdrant/qdrant.py +5 -5
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +75 -44
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +329 -986
- openlit/instrumentation/vertexai/vertexai.py +329 -986
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +62 -32
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -1,32 +1,35 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes
|
2
1
|
"""
|
3
2
|
Module for monitoring Together calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
9
|
from openlit.__helpers import (
|
10
10
|
get_chat_model_cost,
|
11
11
|
get_image_model_cost,
|
12
12
|
handle_exception,
|
13
13
|
response_as_dict,
|
14
|
+
calculate_ttft,
|
15
|
+
calculate_tbt,
|
16
|
+
create_metrics_attributes,
|
17
|
+
set_server_address_and_port
|
14
18
|
)
|
15
19
|
from openlit.semcov import SemanticConvetion
|
16
20
|
|
17
21
|
# Initialize logger for logging potential issues and operations
|
18
22
|
logger = logging.getLogger(__name__)
|
19
23
|
|
20
|
-
def completion(
|
21
|
-
|
24
|
+
def completion(version, environment, application_name,
|
25
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
22
26
|
"""
|
23
27
|
Generates a telemetry wrapper for chat completions to collect metrics.
|
24
28
|
|
25
29
|
Args:
|
26
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
27
30
|
version: Version of the monitoring package.
|
28
31
|
environment: Deployment environment (e.g., production, staging).
|
29
|
-
application_name: Name of the application using the Together AI
|
32
|
+
application_name: Name of the application using the Together AI API.
|
30
33
|
tracer: OpenTelemetry tracer for creating spans.
|
31
34
|
pricing_info: Information used for calculating the cost of Together AI usage.
|
32
35
|
trace_content: Flag indicating whether to trace the actual content.
|
@@ -38,6 +41,7 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
38
41
|
class TracedSyncStream:
|
39
42
|
"""
|
40
43
|
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
+
Wraps the response to collect message IDs and aggregated response.
|
41
45
|
|
42
46
|
This class implements the '__aiter__' and '__anext__' methods that
|
43
47
|
handle asynchronous streaming responses.
|
@@ -50,6 +54,8 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
50
54
|
wrapped,
|
51
55
|
span,
|
52
56
|
kwargs,
|
57
|
+
server_address,
|
58
|
+
server_port,
|
53
59
|
**args,
|
54
60
|
):
|
55
61
|
self.__wrapped__ = wrapped
|
@@ -57,12 +63,20 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
57
63
|
# Placeholder for aggregating streaming response
|
58
64
|
self._llmresponse = ""
|
59
65
|
self._response_id = ""
|
60
|
-
self.
|
61
|
-
self.
|
62
|
-
self.
|
66
|
+
self._response_model = ""
|
67
|
+
self._finish_reason = ""
|
68
|
+
self._input_tokens = 0
|
69
|
+
self._output_tokens = 0
|
63
70
|
|
64
71
|
self._args = args
|
65
72
|
self._kwargs = kwargs
|
73
|
+
self._start_time = time.time()
|
74
|
+
self._end_time = None
|
75
|
+
self._timestamps = []
|
76
|
+
self._ttft = 0
|
77
|
+
self._tbt = 0
|
78
|
+
self._server_address = server_address
|
79
|
+
self._server_port = server_port
|
66
80
|
|
67
81
|
def __enter__(self):
|
68
82
|
self.__wrapped__.__enter__()
|
@@ -81,6 +95,14 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
81
95
|
def __next__(self):
|
82
96
|
try:
|
83
97
|
chunk = self.__wrapped__.__next__()
|
98
|
+
end_time = time.time()
|
99
|
+
# Record the timestamp for the current chunk
|
100
|
+
self._timestamps.append(end_time)
|
101
|
+
|
102
|
+
if len(self._timestamps) == 1:
|
103
|
+
# Calculate time to first chunk
|
104
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
105
|
+
|
84
106
|
chunked = response_as_dict(chunk)
|
85
107
|
# Collect message IDs and aggregated response from events
|
86
108
|
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
@@ -89,15 +111,22 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
89
111
|
content = chunked.get('choices')[0].get('delta').get('content')
|
90
112
|
if content:
|
91
113
|
self._llmresponse += content
|
92
|
-
|
93
|
-
|
94
|
-
self.
|
95
|
-
self.
|
96
|
-
|
114
|
+
|
115
|
+
if chunked.get('usage'):
|
116
|
+
self._response_id = chunked.get('id')
|
117
|
+
self._response_model = chunked.get('model')
|
118
|
+
self._finish_reason = str(chunked.get('choices')[0].get('finish_reason'))
|
119
|
+
self._input_tokens = chunked.get('usage').get('prompt_tokens')
|
120
|
+
self._output_tokens = chunked.get('usage').get('completion_tokens')
|
121
|
+
|
97
122
|
return chunk
|
98
123
|
except StopIteration:
|
99
124
|
# Handling exception ensure observability without disrupting operation
|
100
125
|
try:
|
126
|
+
self._end_time = time.time()
|
127
|
+
if len(self._timestamps) > 1:
|
128
|
+
self._tbt = calculate_tbt(self._timestamps)
|
129
|
+
|
101
130
|
# Format 'messages' into a single string
|
102
131
|
message_prompt = self._kwargs.get("messages", "")
|
103
132
|
formatted_messages = []
|
@@ -106,68 +135,89 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
106
135
|
content = message["content"]
|
107
136
|
|
108
137
|
if isinstance(content, list):
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
138
|
+
content_str_list = []
|
139
|
+
for item in content:
|
140
|
+
if item["type"] == "text":
|
141
|
+
content_str_list.append(f'text: {item["text"]}')
|
142
|
+
elif (item["type"] == "image_url" and
|
143
|
+
not item["image_url"]["url"].startswith("data:")):
|
144
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
145
|
+
content_str = ", ".join(content_str_list)
|
115
146
|
formatted_messages.append(f"{role}: {content_str}")
|
116
147
|
else:
|
117
148
|
formatted_messages.append(f"{role}: {content}")
|
118
149
|
prompt = "\n".join(formatted_messages)
|
119
150
|
|
151
|
+
request_model = self._kwargs.get("model", "gpt-4o")
|
152
|
+
|
120
153
|
# Calculate cost of the operation
|
121
|
-
cost = get_chat_model_cost(
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
self._completion_tokens)
|
127
|
-
|
128
|
-
# Set Span attributes
|
154
|
+
cost = get_chat_model_cost(request_model,
|
155
|
+
pricing_info, self._input_tokens,
|
156
|
+
self._output_tokens)
|
157
|
+
|
158
|
+
# Set Span attributes (OTel Semconv)
|
129
159
|
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
130
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
131
|
-
SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
|
132
160
|
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
133
161
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
134
|
-
self._span.set_attribute(SemanticConvetion.
|
135
|
-
|
162
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
163
|
+
SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
|
164
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
165
|
+
request_model)
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
167
|
+
self._kwargs.get("seed", ""))
|
168
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
169
|
+
self._server_port)
|
170
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
171
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
172
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
173
|
+
self._kwargs.get("max_tokens", -1))
|
174
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
175
|
+
self._kwargs.get("presence_penalty", 0.0))
|
176
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
177
|
+
self._kwargs.get("stop", []))
|
178
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
179
|
+
self._kwargs.get("temperature", 1.0))
|
180
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
181
|
+
self._kwargs.get("top_p", 1.0))
|
182
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
183
|
+
[self._finish_reason])
|
136
184
|
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
137
185
|
self._response_id)
|
138
|
-
self._span.set_attribute(SemanticConvetion.
|
186
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
187
|
+
self._response_model)
|
188
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
189
|
+
self._input_tokens)
|
190
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
191
|
+
self._output_tokens)
|
192
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
193
|
+
self._server_address)
|
194
|
+
|
195
|
+
if isinstance(self._llmresponse, str):
|
196
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
197
|
+
"text")
|
198
|
+
else:
|
199
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
200
|
+
"json")
|
201
|
+
|
202
|
+
# Set Span attributes (Extra)
|
203
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
139
204
|
environment)
|
140
|
-
self._span.set_attribute(
|
205
|
+
self._span.set_attribute(SERVICE_NAME,
|
141
206
|
application_name)
|
142
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
143
|
-
self._kwargs.get(
|
144
|
-
"model",
|
145
|
-
"meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
146
|
-
))
|
147
207
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
148
208
|
self._kwargs.get("user", ""))
|
149
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
150
|
-
self._kwargs.get("top_p", 1.0))
|
151
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
152
|
-
self._kwargs.get("max_tokens", -1))
|
153
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
154
|
-
self._kwargs.get("temperature", 1.0))
|
155
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
156
|
-
self._kwargs.get("presence_penalty", 0.0))
|
157
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
158
|
-
self._kwargs.get("frequency_penalty", 0.0))
|
159
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
160
|
-
self._kwargs.get("seed", ""))
|
161
209
|
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
162
210
|
True)
|
163
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
164
|
-
self._prompt_tokens)
|
165
|
-
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
166
|
-
self._completion_tokens)
|
167
211
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
168
|
-
self.
|
212
|
+
self._input_tokens + self._output_tokens)
|
169
213
|
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
170
214
|
cost)
|
215
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
216
|
+
self._tbt)
|
217
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
218
|
+
self._ttft)
|
219
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
220
|
+
version)
|
171
221
|
if trace_content:
|
172
222
|
self._span.add_event(
|
173
223
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -181,36 +231,35 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
181
231
|
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
182
232
|
},
|
183
233
|
)
|
184
|
-
|
185
234
|
self._span.set_status(Status(StatusCode.OK))
|
186
235
|
|
187
236
|
if disable_metrics is False:
|
188
|
-
attributes =
|
189
|
-
|
190
|
-
|
191
|
-
SemanticConvetion.
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
199
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
200
|
-
self._kwargs.get("model",
|
201
|
-
"meta-llama/Llama-3.3-70B-Instruct-Turbo")
|
202
|
-
}
|
237
|
+
attributes = create_metrics_attributes(
|
238
|
+
service_name=application_name,
|
239
|
+
deployment_environment=environment,
|
240
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
241
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
|
242
|
+
request_model=request_model,
|
243
|
+
server_address=self._server_address,
|
244
|
+
server_port=self._server_port,
|
245
|
+
response_model=self._response_model,
|
246
|
+
)
|
203
247
|
|
204
|
-
metrics["
|
205
|
-
|
206
|
-
self._total_tokens, attributes
|
248
|
+
metrics["genai_client_usage_tokens"].record(
|
249
|
+
self._input_tokens + self._output_tokens, attributes
|
207
250
|
)
|
208
|
-
metrics["
|
209
|
-
self.
|
251
|
+
metrics["genai_client_operation_duration"].record(
|
252
|
+
self._end_time - self._start_time, attributes
|
210
253
|
)
|
211
|
-
metrics["
|
212
|
-
self.
|
254
|
+
metrics["genai_server_tbt"].record(
|
255
|
+
self._tbt, attributes
|
213
256
|
)
|
257
|
+
metrics["genai_server_ttft"].record(
|
258
|
+
self._ttft, attributes
|
259
|
+
)
|
260
|
+
metrics["genai_requests"].add(1, attributes)
|
261
|
+
metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
|
262
|
+
metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
|
214
263
|
metrics["genai_cost"].record(cost, attributes)
|
215
264
|
|
216
265
|
except Exception as e:
|
@@ -219,7 +268,6 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
219
268
|
finally:
|
220
269
|
self._span.end()
|
221
270
|
raise
|
222
|
-
|
223
271
|
def wrapper(wrapped, instance, args, kwargs):
|
224
272
|
"""
|
225
273
|
Wraps the 'chat.completions' API call to add telemetry.
|
@@ -239,20 +287,25 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
239
287
|
|
240
288
|
# Check if streaming is enabled for the API call
|
241
289
|
streaming = kwargs.get("stream", False)
|
290
|
+
server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
|
291
|
+
request_model = kwargs.get("model", "gpt-4o")
|
292
|
+
|
293
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
242
294
|
|
243
295
|
# pylint: disable=no-else-return
|
244
296
|
if streaming:
|
245
297
|
# Special handling for streaming response to accommodate the nature of data flow
|
246
298
|
awaited_wrapped = wrapped(*args, **kwargs)
|
247
|
-
span = tracer.start_span(
|
299
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
248
300
|
|
249
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs)
|
301
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
250
302
|
|
251
303
|
# Handling for non-streaming responses
|
252
304
|
else:
|
253
|
-
|
254
|
-
|
305
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
306
|
+
start_time = time.time()
|
255
307
|
response = wrapped(*args, **kwargs)
|
308
|
+
end_time = time.time()
|
256
309
|
|
257
310
|
response_dict = response_as_dict(response)
|
258
311
|
|
@@ -266,7 +319,6 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
266
319
|
|
267
320
|
if isinstance(content, list):
|
268
321
|
content_str = ", ".join(
|
269
|
-
# pylint: disable=line-too-long
|
270
322
|
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
271
323
|
if "type" in item else f'text: {item["text"]}'
|
272
324
|
for item in content
|
@@ -276,39 +328,66 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
276
328
|
formatted_messages.append(f"{role}: {content}")
|
277
329
|
prompt = "\n".join(formatted_messages)
|
278
330
|
|
279
|
-
|
331
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
332
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
333
|
+
|
334
|
+
# Calculate cost of the operation
|
335
|
+
cost = get_chat_model_cost(request_model,
|
336
|
+
pricing_info, input_tokens,
|
337
|
+
output_tokens)
|
338
|
+
|
339
|
+
# Set base span attribues (OTel Semconv)
|
280
340
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
281
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
282
|
-
SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
|
283
341
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
284
342
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
285
|
-
span.set_attribute(SemanticConvetion.
|
286
|
-
|
343
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
344
|
+
SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
|
345
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
346
|
+
request_model)
|
347
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
348
|
+
kwargs.get("seed", ""))
|
349
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
350
|
+
server_port)
|
351
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
352
|
+
kwargs.get("frequency_penalty", 0.0))
|
353
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
354
|
+
kwargs.get("max_tokens", -1))
|
355
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
356
|
+
kwargs.get("presence_penalty", 0.0))
|
357
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
358
|
+
kwargs.get("stop", []))
|
359
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
360
|
+
kwargs.get("temperature", 1.0))
|
361
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
362
|
+
kwargs.get("top_p", 1.0))
|
287
363
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
288
364
|
response_dict.get("id"))
|
289
|
-
span.set_attribute(SemanticConvetion.
|
365
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
366
|
+
response_dict.get('model'))
|
367
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
368
|
+
input_tokens)
|
369
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
370
|
+
output_tokens)
|
371
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
372
|
+
server_address)
|
373
|
+
|
374
|
+
# Set base span attribues (Extras)
|
375
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
290
376
|
environment)
|
291
|
-
span.set_attribute(
|
377
|
+
span.set_attribute(SERVICE_NAME,
|
292
378
|
application_name)
|
293
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
294
|
-
kwargs.get("model",
|
295
|
-
"meta-llama/Llama-3.3-70B-Instruct-Turbo"))
|
296
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
297
|
-
kwargs.get("top_p", 1.0))
|
298
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
299
|
-
kwargs.get("max_tokens", -1))
|
300
379
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
301
380
|
kwargs.get("user", ""))
|
302
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
303
|
-
kwargs.get("temperature", 1.0))
|
304
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
305
|
-
kwargs.get("presence_penalty", 0.0))
|
306
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
307
|
-
kwargs.get("frequency_penalty", 0.0))
|
308
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
309
|
-
kwargs.get("seed", ""))
|
310
381
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
311
382
|
False)
|
383
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
384
|
+
input_tokens + output_tokens)
|
385
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
386
|
+
cost)
|
387
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
388
|
+
end_time - start_time)
|
389
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
390
|
+
version)
|
312
391
|
if trace_content:
|
313
392
|
span.add_event(
|
314
393
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -317,103 +396,54 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
317
396
|
},
|
318
397
|
)
|
319
398
|
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
response_dict.get('usage', {}).get('completion_tokens', None))
|
335
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
336
|
-
response_dict.get('usage', {}).get('total_tokens', None))
|
337
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
338
|
-
cost)
|
339
|
-
|
340
|
-
# Set span attributes for when n = 1 (default)
|
341
|
-
if "n" not in kwargs or kwargs["n"] == 1:
|
342
|
-
if trace_content:
|
343
|
-
span.add_event(
|
344
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
345
|
-
attributes={
|
346
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
|
347
|
-
},
|
348
|
-
)
|
349
|
-
|
350
|
-
# Set span attributes for when n > 0
|
351
|
-
else:
|
352
|
-
i = 0
|
353
|
-
while i < kwargs["n"] and trace_content is True:
|
354
|
-
attribute_name = f"gen_ai.content.completion.{i}"
|
355
|
-
span.add_event(
|
356
|
-
name=attribute_name,
|
357
|
-
attributes={
|
358
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
359
|
-
},
|
360
|
-
)
|
361
|
-
i += 1
|
362
|
-
|
363
|
-
# Return original response
|
364
|
-
return response
|
365
|
-
|
366
|
-
# Set span attributes when tools is passed to the function call
|
367
|
-
elif "tools" in kwargs:
|
368
|
-
# Calculate cost of the operation
|
369
|
-
cost = get_chat_model_cost(kwargs.get(
|
370
|
-
"model",
|
371
|
-
"meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
372
|
-
),
|
373
|
-
pricing_info,
|
374
|
-
response_dict.get('usage').get('prompt_tokens'),
|
375
|
-
response_dict.get('usage').get('completion_tokens'))
|
399
|
+
for i in range(kwargs.get('n',1)):
|
400
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
401
|
+
[str(response_dict.get('choices')[i].get('finish_reason'))])
|
402
|
+
if trace_content:
|
403
|
+
span.add_event(
|
404
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
405
|
+
attributes={
|
406
|
+
# pylint: disable=line-too-long
|
407
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
408
|
+
},
|
409
|
+
)
|
410
|
+
if kwargs.get('tools'):
|
411
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
412
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
376
413
|
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
384
|
-
response_dict.get('usage').get('prompt_tokens'))
|
385
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
386
|
-
response_dict.get('usage').get('completion_tokens'))
|
387
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
388
|
-
response_dict.get('usage').get('total_tokens'))
|
389
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
390
|
-
cost)
|
414
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
415
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
416
|
+
"text")
|
417
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
418
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
419
|
+
"json")
|
391
420
|
|
392
421
|
span.set_status(Status(StatusCode.OK))
|
393
422
|
|
394
423
|
if disable_metrics is False:
|
395
|
-
attributes =
|
396
|
-
|
397
|
-
|
398
|
-
SemanticConvetion.
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
406
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
407
|
-
kwargs.get("model", "meta-llama/Llama-3.3-70B-Instruct-Turbo")
|
408
|
-
}
|
424
|
+
attributes = create_metrics_attributes(
|
425
|
+
service_name=application_name,
|
426
|
+
deployment_environment=environment,
|
427
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
428
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
|
429
|
+
request_model=request_model,
|
430
|
+
server_address=server_address,
|
431
|
+
server_port=server_port,
|
432
|
+
response_model=response_dict.get('model'),
|
433
|
+
)
|
409
434
|
|
435
|
+
metrics["genai_client_usage_tokens"].record(
|
436
|
+
input_tokens + output_tokens, attributes
|
437
|
+
)
|
438
|
+
metrics["genai_client_operation_duration"].record(
|
439
|
+
end_time - start_time, attributes
|
440
|
+
)
|
441
|
+
metrics["genai_server_ttft"].record(
|
442
|
+
end_time - start_time, attributes
|
443
|
+
)
|
410
444
|
metrics["genai_requests"].add(1, attributes)
|
411
|
-
metrics["
|
412
|
-
|
413
|
-
metrics["genai_completion_tokens"].add(
|
414
|
-
response_dict.get('usage').get('completion_tokens'), attributes)
|
415
|
-
metrics["genai_prompt_tokens"].add(
|
416
|
-
response_dict.get('usage').get('prompt_tokens'), attributes)
|
445
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
446
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
417
447
|
metrics["genai_cost"].record(cost, attributes)
|
418
448
|
|
419
449
|
# Return original response
|
@@ -428,18 +458,17 @@ def completion(gen_ai_endpoint, version, environment, application_name,
|
|
428
458
|
|
429
459
|
return wrapper
|
430
460
|
|
431
|
-
def image_generate(
|
461
|
+
def image_generate(version, environment, application_name,
|
432
462
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
433
463
|
"""
|
434
464
|
Generates a telemetry wrapper for image generation to collect metrics.
|
435
465
|
|
436
466
|
Args:
|
437
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
438
467
|
version: Version of the monitoring package.
|
439
468
|
environment: Deployment environment (e.g., production, staging).
|
440
|
-
application_name: Name of the application using the Together API.
|
469
|
+
application_name: Name of the application using the Together AI API.
|
441
470
|
tracer: OpenTelemetry tracer for creating spans.
|
442
|
-
pricing_info: Information used for calculating the cost of Together image generation.
|
471
|
+
pricing_info: Information used for calculating the cost of Together AI image generation.
|
443
472
|
trace_content: Flag indicating whether to trace the input prompt and generated images.
|
444
473
|
|
445
474
|
Returns:
|
@@ -463,8 +492,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
463
492
|
The response from the original 'images.generate' method.
|
464
493
|
"""
|
465
494
|
|
466
|
-
|
495
|
+
server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
|
496
|
+
request_model = kwargs.get("model", "dall-e-2")
|
497
|
+
|
498
|
+
span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
|
499
|
+
|
500
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
501
|
+
start_time = time.time()
|
467
502
|
response = wrapped(*args, **kwargs)
|
503
|
+
end_time = time.time()
|
504
|
+
|
468
505
|
images_count = 0
|
469
506
|
|
470
507
|
try:
|
@@ -474,37 +511,43 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
474
511
|
else:
|
475
512
|
image = "url"
|
476
513
|
|
514
|
+
image_size = str(kwargs.get('width')) + 'x' + str(kwargs.get('height'))
|
515
|
+
|
477
516
|
# Calculate cost of the operation
|
478
|
-
|
479
|
-
|
480
|
-
"model", "black-forest-labs/FLUX.1-dev"
|
481
|
-
),
|
482
|
-
pricing_info, "1000000",
|
517
|
+
cost = get_image_model_cost(request_model,
|
518
|
+
pricing_info, image_size,
|
483
519
|
kwargs.get("quality", "standard"))
|
484
|
-
pixels = kwargs.get("width", 1024) * kwargs.get("height", 1024)
|
485
|
-
cost = pixels / 1_000_000 * cost_per_million
|
486
520
|
|
487
521
|
for items in response.data:
|
488
|
-
# Set Span attributes
|
522
|
+
# Set Span attributes (OTel Semconv)
|
489
523
|
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
490
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
491
|
-
SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
|
492
524
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
493
525
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
|
494
|
-
span.set_attribute(SemanticConvetion.
|
495
|
-
|
526
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
527
|
+
SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
|
528
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
529
|
+
request_model)
|
530
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
531
|
+
server_address)
|
532
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
533
|
+
server_port)
|
496
534
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
497
535
|
response.id)
|
498
|
-
span.set_attribute(SemanticConvetion.
|
536
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
537
|
+
response.model)
|
538
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
539
|
+
"image")
|
540
|
+
|
541
|
+
# Set Span attributes (Extras)
|
542
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
499
543
|
environment)
|
500
|
-
span.set_attribute(
|
544
|
+
span.set_attribute(SERVICE_NAME,
|
501
545
|
application_name)
|
502
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
503
|
-
kwargs.get("model", "black-forest-labs/FLUX.1-dev"))
|
504
546
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
|
505
547
|
image_size)
|
506
|
-
span.set_attribute(SemanticConvetion.
|
507
|
-
|
548
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
549
|
+
version)
|
550
|
+
|
508
551
|
if trace_content:
|
509
552
|
span.add_event(
|
510
553
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -512,7 +555,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
512
555
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
|
513
556
|
},
|
514
557
|
)
|
515
|
-
attribute_name = f"
|
558
|
+
attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
|
516
559
|
span.add_event(
|
517
560
|
name=attribute_name,
|
518
561
|
attributes={
|
@@ -527,21 +570,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
|
|
527
570
|
span.set_status(Status(StatusCode.OK))
|
528
571
|
|
529
572
|
if disable_metrics is False:
|
530
|
-
attributes =
|
531
|
-
|
532
|
-
|
533
|
-
SemanticConvetion.
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
573
|
+
attributes = create_metrics_attributes(
|
574
|
+
service_name=application_name,
|
575
|
+
deployment_environment=environment,
|
576
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
|
577
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
|
578
|
+
request_model=request_model,
|
579
|
+
server_address=server_address,
|
580
|
+
server_port=server_port,
|
581
|
+
response_model=response.model,
|
582
|
+
)
|
583
|
+
|
584
|
+
metrics["genai_client_operation_duration"].record(
|
585
|
+
end_time - start_time, attributes
|
586
|
+
)
|
545
587
|
metrics["genai_requests"].add(1, attributes)
|
546
588
|
metrics["genai_cost"].record(cost, attributes)
|
547
589
|
|