openlit 1.34.13__py3-none-any.whl → 1.34.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,288 @@
1
+ """
2
+ LiteLLM OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.trace import Status, StatusCode
7
+
8
+ from openlit.__helpers import (
9
+ calculate_ttft,
10
+ response_as_dict,
11
+ calculate_tbt,
12
+ get_chat_model_cost,
13
+ get_embed_model_cost,
14
+ common_span_attributes,
15
+ record_completion_metrics,
16
+ record_embedding_metrics,
17
+ )
18
+ from openlit.semcov import SemanticConvention
19
+
20
+ def format_content(messages):
21
+ """
22
+ Process a list of messages to extract content.
23
+ """
24
+
25
+ formatted_messages = []
26
+ for message in messages:
27
+ role = message['role']
28
+ content = message['content']
29
+
30
+ if isinstance(content, list):
31
+ content_str = ", ".join(
32
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
33
+ if "type" in item else f'text: {item["text"]}'
34
+ for item in content
35
+ )
36
+ formatted_messages.append(f'{role}: {content_str}')
37
+ else:
38
+ formatted_messages.append(f'{role}: {content}')
39
+
40
+ return '\n'.join(formatted_messages)
41
+
42
+ def process_chunk(scope, chunk):
43
+ """
44
+ Process a chunk of response data and update state.
45
+ """
46
+
47
+ end_time = time.time()
48
+ # Record the timestamp for the current chunk
49
+ scope._timestamps.append(end_time)
50
+
51
+ if len(scope._timestamps) == 1:
52
+ # Calculate time to first chunk
53
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
54
+
55
+ chunked = response_as_dict(chunk)
56
+
57
+ # Collect message IDs and aggregated response from events
58
+ if (len(chunked.get('choices', [])) > 0 and ('delta' in chunked.get('choices')[0] and
59
+ 'content' in chunked.get('choices')[0].get('delta', {}))):
60
+
61
+ content = chunked.get('choices')[0].get('delta').get('content')
62
+ if content:
63
+ scope._llmresponse += content
64
+
65
+ # Handle tool calls in streaming - optimized
66
+ delta_tools = chunked.get('choices', [{}])[0].get('delta', {}).get('tool_calls')
67
+ if delta_tools:
68
+ scope._tools = scope._tools or []
69
+
70
+ for tool in delta_tools:
71
+ idx = tool.get('index', 0)
72
+
73
+ # Extend list if needed
74
+ scope._tools.extend([{}] * (idx + 1 - len(scope._tools)))
75
+
76
+ if tool.get('id'): # New tool (id exists)
77
+ func = tool.get('function', {})
78
+ scope._tools[idx] = {
79
+ 'id': tool['id'],
80
+ 'function': {'name': func.get('name', ''), 'arguments': func.get('arguments', '')},
81
+ 'type': tool.get('type', 'function')
82
+ }
83
+ elif scope._tools[idx] and 'function' in tool: # Append args (id is None)
84
+ scope._tools[idx]['function']['arguments'] += tool['function'].get('arguments', '')
85
+
86
+ if chunked.get('usage'):
87
+ scope._input_tokens = chunked.get('usage').get('prompt_tokens', 0)
88
+ scope._output_tokens = chunked.get('usage').get('completion_tokens', 0)
89
+ scope._response_id = chunked.get('id')
90
+ scope._response_model = chunked.get('model')
91
+ scope._finish_reason = chunked.get('choices', [{}])[0].get('finish_reason')
92
+ scope._response_service_tier = str(chunked.get('system_fingerprint', ''))
93
+ scope._end_time = time.time()
94
+
95
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
96
+ capture_message_content, disable_metrics, version, is_stream):
97
+ """
98
+ Process chat request and generate Telemetry
99
+ """
100
+
101
+ if len(scope._timestamps) > 1:
102
+ scope._tbt = calculate_tbt(scope._timestamps)
103
+
104
+ prompt = format_content(scope._kwargs.get('messages', []))
105
+ request_model = scope._kwargs.get('model', 'openai/gpt-4o')
106
+
107
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
108
+
109
+ # Common Span Attributes
110
+ common_span_attributes(scope,
111
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_LITELLM,
112
+ scope._server_address, scope._server_port, request_model, scope._response_model,
113
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
114
+
115
+ # Span Attributes for Request parameters
116
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get('seed', ''))
117
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get('frequency_penalty', 0.0))
118
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
119
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get('presence_penalty', 0.0))
120
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop', []))
121
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
122
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
123
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get('user', ''))
124
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER, scope._kwargs.get('service_tier', 'auto'))
125
+
126
+ # Span Attributes for Response parameters
127
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
128
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
129
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER, scope._response_service_tier)
130
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT, scope._response_service_tier)
131
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
132
+
133
+ # Span Attributes for Cost and Tokens
134
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
135
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
136
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
137
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
138
+
139
+ # Span Attributes for Tools - optimized
140
+ if scope._tools:
141
+ tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
142
+
143
+ names, ids, args = zip(*[
144
+ (t.get("function", {}).get("name", ""),
145
+ str(t.get("id", "")),
146
+ str(t.get("function", {}).get("arguments", "")))
147
+ for t in tools if isinstance(t, dict) and t
148
+ ]) if tools else ([], [], [])
149
+
150
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
151
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
152
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
153
+
154
+ # Span Attributes for Content
155
+ if capture_message_content:
156
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
157
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
158
+
159
+ # To be removed once the change to span_attributes (from span events) is complete
160
+ scope._span.add_event(
161
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
162
+ attributes={
163
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
164
+ },
165
+ )
166
+ scope._span.add_event(
167
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
168
+ attributes={
169
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
170
+ },
171
+ )
172
+
173
+ scope._span.set_status(Status(StatusCode.OK))
174
+
175
+ # Metrics
176
+ if not disable_metrics:
177
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_LITELLM,
178
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
179
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
180
+ cost, scope._tbt, scope._ttft)
181
+
182
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
183
+ capture_message_content=False, disable_metrics=False, version=""):
184
+ """
185
+ Process streaming chat request and generate Telemetry
186
+ """
187
+
188
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
189
+ capture_message_content, disable_metrics, version, is_stream=True)
190
+
191
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
192
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
193
+ disable_metrics=False, version="1.0.0", **kwargs):
194
+ """
195
+ Process chat request and generate Telemetry
196
+ """
197
+
198
+ # Create scope object
199
+ scope = type("GenericScope", (), {})()
200
+ response_dict = response_as_dict(response)
201
+
202
+ scope._start_time = start_time
203
+ scope._end_time = time.time()
204
+ scope._span = span
205
+ scope._llmresponse = " ".join(
206
+ (choice.get("message", {}).get("content") or "")
207
+ for choice in response_dict.get("choices", [])
208
+ )
209
+ scope._input_tokens = response_dict.get('usage', {}).get('prompt_tokens', 0)
210
+ scope._output_tokens = response_dict.get('usage', {}).get('completion_tokens', 0)
211
+ scope._response_id = response_dict.get('id')
212
+ scope._response_model = response_dict.get('model')
213
+ scope._finish_reason = str(response_dict.get('choices', [])[0].get('finish_reason', ''))
214
+ scope._response_service_tier = str(response_dict.get('system_fingerprint', ''))
215
+ scope._timestamps = []
216
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
217
+ scope._server_address, scope._server_port = server_address, server_port
218
+ scope._kwargs = kwargs
219
+
220
+ # Handle tool calls
221
+ if scope._kwargs.get("tools"):
222
+ scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
223
+ else:
224
+ scope._tools = None
225
+
226
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
227
+ capture_message_content, disable_metrics, version, is_stream=False)
228
+
229
+ return response
230
+
231
+ def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
232
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
233
+ disable_metrics=False, version="1.0.0", **kwargs):
234
+ """
235
+ Process embedding request and generate Telemetry
236
+ """
237
+
238
+ # Create scope object
239
+ scope = type("GenericScope", (), {})()
240
+ response_dict = response_as_dict(response)
241
+
242
+ scope._start_time = start_time
243
+ scope._end_time = time.time()
244
+ scope._span = span
245
+ scope._input_tokens = response_dict.get('usage', {}).get('prompt_tokens', 0)
246
+ scope._response_model = response_dict.get('model')
247
+ scope._server_address, scope._server_port = server_address, server_port
248
+ scope._kwargs = kwargs
249
+
250
+ # Calculate cost of the operation
251
+ cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
252
+
253
+ # Common Span Attributes
254
+ common_span_attributes(scope,
255
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_LITELLM,
256
+ scope._server_address, scope._server_port, request_model, scope._response_model,
257
+ environment, application_name, False, 0, scope._end_time - scope._start_time, version)
258
+
259
+ # Span Attributes for Request parameters
260
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS, [scope._kwargs.get('encoding_format', 'float')])
261
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get('user', ''))
262
+
263
+ # Span Attributes for Cost and Tokens
264
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
265
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens)
266
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
267
+
268
+ # Span Attributes for Content
269
+ if capture_message_content:
270
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, str(scope._kwargs.get('input', '')))
271
+
272
+ # To be removed once the change to span_attributes (from span events) is complete
273
+ scope._span.add_event(
274
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
275
+ attributes={
276
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: str(scope._kwargs.get('input', '')),
277
+ },
278
+ )
279
+
280
+ scope._span.set_status(Status(StatusCode.OK))
281
+
282
+ # Metrics
283
+ if not disable_metrics:
284
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_LITELLM,
285
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
286
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, cost)
287
+
288
+ return response
@@ -30,12 +30,19 @@ class TransformersInstrumentor(BaseInstrumentor):
30
30
  version = importlib.metadata.version("transformers")
31
31
 
32
32
  wrap_function_wrapper(
33
- "transformers",
34
- "TextGenerationPipeline.__call__",
35
- pipeline_wrapper(version, environment, application_name,
36
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
33
+ "transformers",
34
+ "TextGenerationPipeline.__call__",
35
+ pipeline_wrapper(
36
+ version,
37
+ environment,
38
+ application_name,
39
+ tracer,
40
+ pricing_info,
41
+ capture_message_content,
42
+ metrics,
43
+ disable_metrics
44
+ ),
37
45
  )
38
46
 
39
47
  def _uninstrument(self, **kwargs):
40
- # Proper uninstrumentation logic to revert patched methods
41
48
  pass
@@ -2,22 +2,15 @@
2
2
  Module for monitoring HF Transformers API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
- from openlit.__helpers import (
9
- set_server_address_and_port
10
- )
11
- from openlit.instrumentation.transformers.utils import (
12
- process_chat_response,
13
- )
7
+ from openlit.__helpers import set_server_address_and_port
8
+ from openlit.instrumentation.transformers.utils import process_chat_response
14
9
  from openlit.semcov import SemanticConvention
15
10
 
16
- # Initialize logger for logging potential issues and operations
17
- logger = logging.getLogger(__name__)
18
11
 
19
- def pipeline_wrapper(version, environment, application_name,
20
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
12
+ def pipeline_wrapper(version, environment, application_name, tracer, pricing_info,
13
+ capture_message_content, metrics, disable_metrics):
21
14
  """
22
15
  Generates a telemetry wrapper for GenAI function call
23
16
  """
@@ -32,27 +25,27 @@ def pipeline_wrapper(version, environment, application_name,
32
25
 
33
26
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
34
27
 
35
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
28
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
36
29
  start_time = time.time()
37
30
  response = wrapped(*args, **kwargs)
38
31
 
39
32
  response = process_chat_response(
40
- instance = instance,
41
- response=response,
42
- request_model=request_model,
43
- pricing_info=pricing_info,
44
- server_port=server_port,
45
- server_address=server_address,
46
- environment=environment,
47
- application_name=application_name,
48
- metrics=metrics,
49
- start_time=start_time,
50
- span=span,
51
- args=args,
52
- kwargs=kwargs,
53
- capture_message_content=capture_message_content,
54
- disable_metrics=disable_metrics,
55
- version=version,
33
+ instance=instance,
34
+ response=response,
35
+ request_model=request_model,
36
+ pricing_info=pricing_info,
37
+ server_port=server_port,
38
+ server_address=server_address,
39
+ environment=environment,
40
+ application_name=application_name,
41
+ metrics=metrics,
42
+ start_time=start_time,
43
+ span=span,
44
+ args=args,
45
+ kwargs=kwargs,
46
+ capture_message_content=capture_message_content,
47
+ disable_metrics=disable_metrics,
48
+ version=version,
56
49
  )
57
50
 
58
51
  return response