openlit 1.33.20__py3-none-any.whl → 1.33.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openlit/__helpers.py CHANGED
@@ -240,6 +240,11 @@ def extract_and_format_input(messages):
240
240
  fixed_roles = ['user', 'assistant', 'system', 'tool', 'developer']
241
241
  formatted_messages = {role_key: {'role': '', 'content': ''} for role_key in fixed_roles}
242
242
 
243
+ # Check if input is a simple string
244
+ if isinstance(messages, str):
245
+ formatted_messages['user'] = {'role': 'user', 'content': messages}
246
+ return formatted_messages
247
+
243
248
  for message in messages:
244
249
  message = response_as_dict(message)
245
250
 
@@ -276,3 +281,55 @@ def concatenate_all_contents(formatted_messages):
276
281
  for message_data in formatted_messages.values()
277
282
  if message_data['content']
278
283
  )
284
+
285
+ def format_and_concatenate(messages):
286
+ """
287
+ Process a list of messages to extract content, categorize them by role,
288
+ and concatenate all 'content' fields into a single string with role: content format.
289
+ """
290
+
291
+ formatted_messages = {}
292
+
293
+ # Check if input is a simple string
294
+ if isinstance(messages, str):
295
+ formatted_messages['user'] = {'role': 'user', 'content': messages}
296
+ elif isinstance(messages, list) and all(isinstance(m, str) for m in messages):
297
+ # If it's a list of strings, each string is 'user' input
298
+ user_content = ' '.join(messages)
299
+ formatted_messages['user'] = {'role': 'user', 'content': user_content}
300
+ else:
301
+ for message in messages:
302
+ message = response_as_dict(message)
303
+ role = message.get('role', 'unknown') # Default to 'unknown' if no role is specified
304
+ content = message.get('content', '')
305
+
306
+ # Initialize role in formatted messages if not present
307
+ if role not in formatted_messages:
308
+ formatted_messages[role] = {'role': role, 'content': ''}
309
+
310
+ # Handle list of dictionaries in content
311
+ if isinstance(content, list):
312
+ content_str = []
313
+ for item in content:
314
+ if isinstance(item, dict):
315
+ # Collect text or other attributes as needed
316
+ text = item.get('text', '')
317
+ image_url = item.get('image_url', '')
318
+ content_str.append(text)
319
+ content_str.append(image_url)
320
+ content_str = ", ".join(filter(None, content_str))
321
+ else:
322
+ content_str = content
323
+
324
+ # Concatenate content
325
+ if formatted_messages[role]['content']:
326
+ formatted_messages[role]['content'] += ' ' + content_str
327
+ else:
328
+ formatted_messages[role]['content'] = content_str
329
+
330
+ # Concatenate role and content for all messages
331
+ return ' '.join(
332
+ f"{message_data['role']}: {message_data['content']}"
333
+ for message_data in formatted_messages.values()
334
+ if message_data['content']
335
+ )
@@ -16,6 +16,29 @@ from openlit.instrumentation.ollama.async_ollama import (
16
16
 
17
17
  _instruments = ("ollama >= 0.2.0",)
18
18
 
19
+ # Dispatch wrapper to route instrumentation to chat or embeddings based on path
20
+ def _dispatch(sync_chat_wrap, sync_emb_wrap):
21
+ def wrapper(wrapped, instance, args, kwargs):
22
+ if len(args) > 2 and isinstance(args[2], str):
23
+ op = args[2].rstrip("/").split("/")[-1]
24
+ if op == "chat":
25
+ return sync_chat_wrap(wrapped, instance, args, kwargs)
26
+ if op == "embeddings":
27
+ return sync_emb_wrap(wrapped, instance, args, kwargs)
28
+ return wrapped(*args, **kwargs)
29
+ return wrapper
30
+
31
+ def _dispatch_async(async_chat_wrap, async_emb_wrap):
32
+ async def wrapper(wrapped, instance, args, kwargs):
33
+ if len(args) > 2 and isinstance(args[2], str):
34
+ op = args[2].rstrip("/").split("/")[-1]
35
+ if op == "chat":
36
+ return await async_chat_wrap(wrapped, instance, args, kwargs)
37
+ if op == "embeddings":
38
+ return await async_emb_wrap(wrapped, instance, args, kwargs)
39
+ return await wrapped(*args, **kwargs)
40
+ return wrapper
41
+
19
42
  class OllamaInstrumentor(BaseInstrumentor):
20
43
  """
21
44
  An instrumentor for Ollama's client library.
@@ -35,48 +58,38 @@ class OllamaInstrumentor(BaseInstrumentor):
35
58
  disable_metrics = kwargs.get("disable_metrics")
36
59
  version = importlib.metadata.version("ollama")
37
60
 
38
- # sync chat
39
- wrap_function_wrapper(
40
- "ollama",
41
- "chat",
42
- chat(version, environment, application_name,
43
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
61
+ # Build wrapper factories for chat and embeddings
62
+ sync_chat_wrap = chat(
63
+ version, environment, application_name,
64
+ tracer, event_provider, pricing_info,
65
+ capture_message_content, metrics, disable_metrics
44
66
  )
45
- wrap_function_wrapper(
46
- "ollama",
47
- "Client.chat",
48
- chat(version, environment, application_name,
49
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
67
+ sync_emb_wrap = embeddings(
68
+ version, environment, application_name,
69
+ tracer, event_provider, pricing_info,
70
+ capture_message_content, metrics, disable_metrics
50
71
  )
51
-
52
- # sync embeddings
53
- wrap_function_wrapper(
54
- "ollama",
55
- "embeddings",
56
- embeddings(version, environment, application_name,
57
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
72
+ async_chat_wrap = async_chat(
73
+ version, environment, application_name,
74
+ tracer, event_provider, pricing_info,
75
+ capture_message_content, metrics, disable_metrics
58
76
  )
59
- wrap_function_wrapper(
60
- "ollama",
61
- "Client.embeddings",
62
- embeddings(version, environment, application_name,
63
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
77
+ async_emb_wrap = async_embeddings(
78
+ version, environment, application_name,
79
+ tracer, event_provider, pricing_info,
80
+ capture_message_content, metrics, disable_metrics
64
81
  )
65
82
 
66
- # async chat
83
+ # Patch underlying request methods to ensure instrumentation regardless of import order
67
84
  wrap_function_wrapper(
68
- "ollama",
69
- "AsyncClient.chat",
70
- async_chat(version, environment, application_name,
71
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
85
+ "ollama._client",
86
+ "Client._request",
87
+ _dispatch(sync_chat_wrap, sync_emb_wrap),
72
88
  )
73
-
74
- # async embeddings
75
89
  wrap_function_wrapper(
76
- "ollama",
77
- "AsyncClient.embeddings",
78
- async_embeddings(version, environment, application_name,
79
- tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
90
+ "ollama._client",
91
+ "AsyncClient._request",
92
+ _dispatch_async(async_chat_wrap, async_emb_wrap),
80
93
  )
81
94
 
82
95
  def _uninstrument(self, **kwargs):
@@ -106,7 +106,8 @@ def async_chat(version, environment, application_name,
106
106
  streaming = kwargs.get("stream", False)
107
107
 
108
108
  server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
109
- request_model = kwargs.get("model", "gpt-4o")
109
+ json_body = kwargs.get("json", {}) or {}
110
+ request_model = json_body.get("model") or kwargs.get("model")
110
111
 
111
112
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
113
 
@@ -154,7 +155,8 @@ def async_embeddings(version, environment, application_name,
154
155
  """
155
156
 
156
157
  server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
157
- request_model = kwargs.get('model', 'all-minilm')
158
+ json_body = kwargs.get('json', {}) or {}
159
+ request_model = json_body.get('model') or kwargs.get('model')
158
160
 
159
161
  span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
160
162
 
@@ -106,7 +106,8 @@ def chat(version, environment, application_name,
106
106
  streaming = kwargs.get("stream", False)
107
107
 
108
108
  server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 11434)
109
- request_model = kwargs.get("model", "gpt-4o")
109
+ json_body = kwargs.get("json", {}) or {}
110
+ request_model = json_body.get("model") or kwargs.get("model")
110
111
 
111
112
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
112
113
 
@@ -154,7 +155,8 @@ def embeddings(version, environment, application_name,
154
155
  """
155
156
 
156
157
  server_address, server_port = set_server_address_and_port(instance, '127.0.0.1', 11434)
157
- request_model = kwargs.get('model', 'all-minilm')
158
+ json_body = kwargs.get('json', {}) or {}
159
+ request_model = json_body.get('model') or kwargs.get('model')
158
160
 
159
161
  span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
160
162
 
@@ -57,8 +57,10 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
57
57
  if len(scope._timestamps) > 1:
58
58
  scope._tbt = calculate_tbt(scope._timestamps)
59
59
 
60
- formatted_messages = extract_and_format_input(scope._kwargs.get("messages", ""))
61
- request_model = scope._kwargs.get("model", "gpt-4o")
60
+ json_body = scope._kwargs.get("json", {}) or {}
61
+ request_model = json_body.get("model") or scope._kwargs.get("model")
62
+ messages = json_body.get("messages", scope._kwargs.get("messages", ""))
63
+ formatted_messages = extract_and_format_input(messages)
62
64
 
63
65
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
64
66
 
@@ -252,7 +254,9 @@ def process_embedding_response(response, request_model, pricing_info, server_por
252
254
  end_time = time.time()
253
255
 
254
256
  try:
255
- input_tokens = general_tokens(str(kwargs.get('prompt')))
257
+ json_body = kwargs.get("json", {}) or {}
258
+ prompt_val = json_body.get('prompt', kwargs.get('prompt', ''))
259
+ input_tokens = general_tokens(str(prompt_val))
256
260
 
257
261
  # Calculate cost of the operation
258
262
  cost = get_embed_model_cost(request_model,
@@ -293,7 +297,7 @@ def process_embedding_response(response, request_model, pricing_info, server_por
293
297
  SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_OLLAMA
294
298
  },
295
299
  body={
296
- **({"content": kwargs.get('prompt', '')} if capture_message_content else {}),
300
+ **({"content": prompt_val} if capture_message_content else {}),
297
301
  "role": 'user'
298
302
  }
299
303
  )
@@ -1,16 +1,20 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
- """Initializer of Auto Instrumentation of HuggingFace Transformer Functions"""
1
+ """
2
+ Initializer of Auto Instrumentation of HuggingFace Transformer Functions
3
+ """
4
+
3
5
  from typing import Collection
4
6
  import importlib.metadata
5
7
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
8
  from wrapt import wrap_function_wrapper
7
9
 
8
- from openlit.instrumentation.transformers.transformers import text_wrap
10
+ from openlit.instrumentation.transformers.transformers import pipeline_wrapper
9
11
 
10
- _instruments = ("transformers >= 4.39.3",)
12
+ _instruments = ("transformers >= 4.48.0",)
11
13
 
12
14
  class TransformersInstrumentor(BaseInstrumentor):
13
- """An instrumentor for HuggingFace Transformer Functions."""
15
+ """
16
+ An instrumentor for HuggingFace Transformer library.
17
+ """
14
18
 
15
19
  def instrumentation_dependencies(self) -> Collection[str]:
16
20
  return _instruments
@@ -28,10 +32,10 @@ class TransformersInstrumentor(BaseInstrumentor):
28
32
  wrap_function_wrapper(
29
33
  "transformers",
30
34
  "TextGenerationPipeline.__call__",
31
- text_wrap(version, environment, application_name,
35
+ pipeline_wrapper(version, environment, application_name,
32
36
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
33
37
  )
34
38
 
35
- @staticmethod
36
39
  def _uninstrument(self, **kwargs):
40
+ # Proper uninstrumentation logic to revert patched methods
37
41
  pass
@@ -1,63 +1,31 @@
1
1
  """
2
- Module for monitoring ChromaDB.
2
+ Module for monitoring HF Transformers API calls.
3
3
  """
4
4
 
5
5
  import logging
6
6
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
- handle_exception,
12
- general_tokens,
13
- create_metrics_attributes,
14
9
  set_server_address_and_port
15
10
  )
11
+
12
+ from openlit.instrumentation.transformers.utils import (
13
+ process_chat_response,
14
+ )
16
15
  from openlit.semcov import SemanticConvention
17
16
 
18
17
  # Initialize logger for logging potential issues and operations
19
18
  logger = logging.getLogger(__name__)
20
19
 
21
- def text_wrap(version, environment, application_name,
22
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
20
+ def pipeline_wrapper(version, environment, application_name,
21
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
23
22
  """
24
- Creates a wrapper around a function call to trace and log its execution metrics.
25
-
26
- This function wraps any given function to measure its execution time,
27
- log its operation, and trace its execution using OpenTelemetry.
28
-
29
- Parameters:
30
- - version (str): The version of the Langchain application.
31
- - environment (str): The deployment environment (e.g., 'production', 'development').
32
- - application_name (str): Name of the Langchain application.
33
- - tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
34
- - pricing_info (dict): Information about the pricing for internal metrics (currently not used).
35
- - capture_message_content (bool): Flag indicating whether to trace the content of the response.
36
-
37
- Returns:
38
- - function: A higher-order function that takes a function 'wrapped' and returns
39
- a new function that wraps 'wrapped' with additional tracing and logging.
23
+ Generates a telemetry wrapper for GenAI function call
40
24
  """
41
25
 
42
26
  def wrapper(wrapped, instance, args, kwargs):
43
27
  """
44
- An inner wrapper function that executes the wrapped function, measures execution
45
- time, and records trace data using OpenTelemetry.
46
-
47
- Parameters:
48
- - wrapped (Callable): The original function that this wrapper will execute.
49
- - instance (object): The instance to which the wrapped function belongs. This
50
- is used for instance methods. For static and classmethods,
51
- this may be None.
52
- - args (tuple): Positional arguments passed to the wrapped function.
53
- - kwargs (dict): Keyword arguments passed to the wrapped function.
54
-
55
- Returns:
56
- - The result of the wrapped function call.
57
-
58
- The wrapper initiates a span with the provided tracer, sets various attributes
59
- on the span based on the function's execution and response, and ensures
60
- errors are handled and logged appropriately.
28
+ Wraps the GenAI function call.
61
29
  """
62
30
 
63
31
  server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
@@ -68,130 +36,26 @@ def text_wrap(version, environment, application_name,
68
36
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
69
37
  start_time = time.time()
70
38
  response = wrapped(*args, **kwargs)
71
- end_time = time.time()
72
-
73
- # pylint: disable=protected-access
74
- forward_params = instance._forward_params
75
-
76
- try:
77
- if args and len(args) > 0:
78
- prompt = args[0]
79
- else:
80
- prompt = kwargs.get("args", "")
81
-
82
- input_tokens = general_tokens(prompt[0])
83
-
84
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
85
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
86
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
87
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
88
- SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
89
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
90
- request_model)
91
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
92
- forward_params.get("temperature", "null"))
93
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
94
- forward_params.get("top_p", "null"))
95
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
96
- forward_params.get("max_length", -1))
97
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
98
- input_tokens)
99
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
100
- server_address)
101
- span.set_attribute(SemanticConvention.SERVER_PORT,
102
- server_port)
103
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
104
- request_model)
105
-
106
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
107
- environment)
108
- span.set_attribute(SERVICE_NAME,
109
- application_name)
110
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
111
- False)
112
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
113
- end_time - start_time)
114
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
115
- version)
116
- if capture_message_content:
117
- span.add_event(
118
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
119
- attributes={
120
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
121
- },
122
- )
123
-
124
- i = 0
125
- output_tokens = 0
126
- for completion in response:
127
- if len(response) > 1:
128
- attribute_name = f"gen_ai.content.completion.{i}"
129
- else:
130
- attribute_name = SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT
131
- if capture_message_content:
132
- # pylint: disable=bare-except
133
- try:
134
- llm_response = completion.get('generated_text', '')
135
- except:
136
- llm_response = completion[i].get('generated_text', '')
137
-
138
- span.add_event(
139
- name=attribute_name,
140
- attributes={
141
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
142
- },
143
- )
144
- output_tokens += general_tokens(llm_response)
145
-
146
- i=i+1
147
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
148
- output_tokens)
149
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
150
- input_tokens + output_tokens)
151
-
152
- # Calculate cost of the operation
153
- cost = get_chat_model_cost(request_model,
154
- pricing_info, input_tokens,
155
- output_tokens)
156
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
157
- cost)
158
-
159
- span.set_status(Status(StatusCode.OK))
160
-
161
- if disable_metrics is False:
162
- attributes = create_metrics_attributes(
163
- service_name=application_name,
164
- deployment_environment=environment,
165
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
166
- system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
167
- request_model=request_model,
168
- server_address=server_address,
169
- server_port=server_port,
170
- response_model=request_model,
171
- )
172
-
173
- metrics["genai_client_usage_tokens"].record(
174
- input_tokens + output_tokens, attributes
175
- )
176
- metrics["genai_client_operation_duration"].record(
177
- end_time - start_time, attributes
178
- )
179
- metrics["genai_server_ttft"].record(
180
- end_time - start_time, attributes
181
- )
182
- metrics["genai_requests"].add(1, attributes)
183
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
184
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
185
- metrics["genai_cost"].record(cost, attributes)
186
-
187
- # Return original response
188
- return response
189
-
190
- except Exception as e:
191
- handle_exception(span, e)
192
- logger.error("Error in trace creation: %s", e)
193
39
 
194
- # Return original response
195
- return response
40
+ response = process_chat_response(
41
+ instance = instance,
42
+ response=response,
43
+ request_model=request_model,
44
+ pricing_info=pricing_info,
45
+ server_port=server_port,
46
+ server_address=server_address,
47
+ environment=environment,
48
+ application_name=application_name,
49
+ metrics=metrics,
50
+ start_time=start_time,
51
+ span=span,
52
+ args=args,
53
+ kwargs=kwargs,
54
+ capture_message_content=capture_message_content,
55
+ disable_metrics=disable_metrics,
56
+ version=version,
57
+ )
58
+
59
+ return response
196
60
 
197
61
  return wrapper
@@ -0,0 +1,183 @@
1
+ """
2
+ HF Transformers OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+
9
+ from openlit.__helpers import (
10
+ response_as_dict,
11
+ calculate_tbt,
12
+ general_tokens,
13
+ get_chat_model_cost,
14
+ create_metrics_attributes,
15
+ format_and_concatenate
16
+ )
17
+ from openlit.semcov import SemanticConvention
18
+
19
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
20
+ capture_message_content, disable_metrics, version, args, kwargs, is_stream):
21
+
22
+ """
23
+ Process chat request and generate Telemetry
24
+ """
25
+
26
+ scope._end_time = time.time()
27
+ if len(scope._timestamps) > 1:
28
+ scope._tbt = calculate_tbt(scope._timestamps)
29
+
30
+ forward_params = scope._instance._forward_params
31
+ request_model = scope._instance.model.config.name_or_path
32
+
33
+ input_tokens = general_tokens(scope._prompt)
34
+ output_tokens = general_tokens(scope._llmresponse)
35
+
36
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
37
+
38
+ # Set Span attributes (OTel Semconv)
39
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
40
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
41
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
42
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
43
+ scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
44
+
45
+ # List of attributes and their config keys
46
+ attributes = [
47
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
48
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
49
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
50
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_length"),
51
+ ]
52
+
53
+ # Set each attribute if the corresponding value exists and is not None
54
+ for attribute, key in attributes:
55
+ value = forward_params.get(key)
56
+ if value is not None:
57
+ scope._span.set_attribute(attribute, value)
58
+
59
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
60
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
61
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
62
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
63
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
64
+ scope._span.set_attribute(SERVICE_NAME, application_name)
65
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
66
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
67
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
68
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
69
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
70
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
71
+
72
+ # To be removed one the change to span_attributes (from span events) is complete
73
+ if capture_message_content:
74
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
75
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse,)
76
+
77
+ scope._span.add_event(
78
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
79
+ attributes={
80
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: scope._prompt,
81
+ },
82
+ )
83
+ scope._span.add_event(
84
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
85
+ attributes={
86
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
87
+ },
88
+ )
89
+
90
+ scope._span.set_status(Status(StatusCode.OK))
91
+
92
+ if not disable_metrics:
93
+ metrics_attributes = create_metrics_attributes(
94
+ service_name=application_name,
95
+ deployment_environment=environment,
96
+ operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
97
+ system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
98
+ request_model=request_model,
99
+ server_address=scope._server_address,
100
+ server_port=scope._server_port,
101
+ response_model=request_model,
102
+ )
103
+
104
+ metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, metrics_attributes)
105
+ metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
106
+ metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
107
+ metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
108
+ metrics["genai_requests"].add(1, metrics_attributes)
109
+ metrics["genai_completion_tokens"].add(output_tokens, metrics_attributes)
110
+ metrics["genai_prompt_tokens"].add(input_tokens, metrics_attributes)
111
+ metrics["genai_cost"].record(cost, metrics_attributes)
112
+
113
+ def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
114
+ environment, application_name, metrics, start_time,
115
+ span, args, kwargs, capture_message_content=False, disable_metrics=False, version="1.0.0"):
116
+ """
117
+ Process chat request and generate Telemetry
118
+ """
119
+
120
+ self = type("GenericScope", (), {})()
121
+ response_dict = response_as_dict(response)
122
+
123
+ # pylint: disable = no-member
124
+ self._instance = instance
125
+ self._start_time = start_time
126
+ self._end_time = time.time()
127
+ self._span = span
128
+ self._timestamps = []
129
+ self._ttft, self._tbt = self._end_time - self._start_time, 0
130
+ self._server_address, self._server_port = server_address, server_port
131
+ self._kwargs = kwargs
132
+ self._args = args
133
+
134
+ if self._args and len(self._args) > 0:
135
+ self._prompt = args[0]
136
+ else:
137
+ self._prompt = (
138
+ kwargs.get("text_inputs") or
139
+ (kwargs.get("image") and kwargs.get("question") and
140
+ ("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
141
+ kwargs.get("fallback") or
142
+ ""
143
+ )
144
+ self._prompt = format_and_concatenate(self._prompt)
145
+
146
+ self._llmresponse = []
147
+ if self._kwargs.get("task", "text-generation") == "text-generation":
148
+ first_entry = response_dict[0]
149
+
150
+ if isinstance(first_entry, dict) and isinstance(first_entry.get("generated_text"), list):
151
+ last_element = first_entry.get("generated_text")[-1]
152
+ self._llmresponse = last_element.get("content", last_element)
153
+ else:
154
+ def extract_text(entry):
155
+ if isinstance(entry, dict):
156
+ return entry.get("generated_text")
157
+ if isinstance(entry, list):
158
+ return " ".join(
159
+ extract_text(sub_entry) for sub_entry in entry if isinstance(sub_entry, dict)
160
+ )
161
+ return ""
162
+
163
+ # Process and collect all generated texts
164
+ self._llmresponse = [
165
+ extract_text(entry) for entry in response_dict
166
+ ]
167
+
168
+ # Join all non-empty responses into a single string
169
+ self._llmresponse = " ".join(filter(None, self._llmresponse))
170
+
171
+ elif self._kwargs.get("task", "text-generation") == "automatic-speech-recognition":
172
+ self._llmresponse = response_dict.get("text", "")
173
+
174
+ elif self._kwargs.get("task", "text-generation") == "image-classification":
175
+ self._llmresponse = str(response_dict[0])
176
+
177
+ elif self._kwargs.get("task", "text-generation") == "visual-question-answering":
178
+ self._llmresponse = str(response_dict[0]).get("answer")
179
+
180
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
181
+ capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
182
+
183
+ return response
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.33.20
3
+ Version: 1.33.21
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -1,4 +1,4 @@
1
- openlit/__helpers.py,sha256=mbcQvTwjf0R3POo2vDPc0Ms94pNCmAAu9OWXeenFHC4,9068
1
+ openlit/__helpers.py,sha256=sg0EGJGC_OlZePR84cLK77l_lZRBPJwdjWjq_RuaYS0,11444
2
2
  openlit/__init__.py,sha256=1OzJQmiZrTlT3Aze_l8GOf1GXH7dAHztJn0Uzd1LAPc,23924
3
3
  openlit/evals/__init__.py,sha256=nJe99nuLo1b5rf7pt9U9BCdSDedzbVi2Fj96cgl7msM,380
4
4
  openlit/evals/all.py,sha256=oWrue3PotE-rB5WePG3MRYSA-ro6WivkclSHjYlAqGs,7154
@@ -90,10 +90,10 @@ openlit/instrumentation/mistral/mistral.py,sha256=_2qM8v4RCL-S0Mm1vbW77m5vUm8aPD
90
90
  openlit/instrumentation/multion/__init__.py,sha256=Wr3lcDyG_YbOLkCUzBFhraAedF6E113tce8eSWlcz10,3149
91
91
  openlit/instrumentation/multion/async_multion.py,sha256=XutZnayCJOZ_NA9bvE1NUoej41KOGR7FRn2tpoGKMEU,6092
92
92
  openlit/instrumentation/multion/multion.py,sha256=-WqRAcu5qiEMY9XDmlJTQHuQiWfdwms9JDn127QCNb8,6074
93
- openlit/instrumentation/ollama/__init__.py,sha256=JjxSqEegmRoRqIVz7ZAq9dLyXPZ2DqV2wGmgXCENNpw,3004
94
- openlit/instrumentation/ollama/async_ollama.py,sha256=rQ637cpOenezcbyJPV16LLQ2UKuROrO31-0lbfyGhoA,6541
95
- openlit/instrumentation/ollama/ollama.py,sha256=9C-XtUZ9FoR1cjpFdO3BHjF6NZqRhzNLt-z2z0cJIyE,6437
96
- openlit/instrumentation/ollama/utils.py,sha256=vAFCWv4qWd-_jThR0IQGoBhjp99uy1QUtJk6Kj6y_Js,14604
93
+ openlit/instrumentation/ollama/__init__.py,sha256=v7VhVxHw_c6QtMznxe6a7z6QrYHZsH_NSXfiXao83Ns,3707
94
+ openlit/instrumentation/ollama/async_ollama.py,sha256=zJPDr2ROh1nvFGoxgdTbe04Zr1KhmgJUYFPeuRLQGLk,6667
95
+ openlit/instrumentation/ollama/ollama.py,sha256=MNUltiP9XVT4azmO_-E2vjhFaoHQyJ0Z6c-HnB0_jCE,6563
96
+ openlit/instrumentation/ollama/utils.py,sha256=41uvYaYkGwWfRyHYqhOOwrFy6cMzBlG1urJYUat9Q24,14819
97
97
  openlit/instrumentation/openai/__init__.py,sha256=FiL4OHDhs957spa3k9sNC_VLt0-txtwbnujQwnevQ5I,5564
98
98
  openlit/instrumentation/openai/async_openai.py,sha256=CiyBpn8Evnd_gh3Cm1WbfkN7eUpDmFh4KMvxka-B4og,71764
99
99
  openlit/instrumentation/openai/openai.py,sha256=r8ZNVoAFTPuCUf18a5v1Lp48LXwCeT9paEB-3USSiiU,71507
@@ -114,8 +114,9 @@ openlit/instrumentation/reka/reka.py,sha256=L6gH7j94tcYlc_FCkQP6SrxH7yBr4uSgtN8B
114
114
  openlit/instrumentation/together/__init__.py,sha256=MLLL2t8FyrytpfMueqcwekiqTKn-JN40HBD_LbZS_jQ,2661
115
115
  openlit/instrumentation/together/async_together.py,sha256=ToSeYqE0mCgSsCNSO0pqoyS7WU6YarHxa3I7ZrzH-d8,30634
116
116
  openlit/instrumentation/together/together.py,sha256=7Da9fjHaZk_ObXMnSZA79-RktgwHRVYevsZAA-OpcXY,30530
117
- openlit/instrumentation/transformers/__init__.py,sha256=f-kWX6_VdiOXqDa64iK6oJyYF3xkHuKxIBPTll-W7Lw,1467
118
- openlit/instrumentation/transformers/transformers.py,sha256=naSIvynfI16RWboB4cZb728AqubU8lUt-zYSRUg6X_k,9174
117
+ openlit/instrumentation/transformers/__init__.py,sha256=9Ubss5nlumcypxprxff8Fv3sst7II27SsvCzqkBX9Kg,1457
118
+ openlit/instrumentation/transformers/transformers.py,sha256=zCAwfXu77HPlhy7vuU-nvNvsmmU4fs4aaFMCBG5AOLA,1993
119
+ openlit/instrumentation/transformers/utils.py,sha256=UP-aB_hP4SVLQ1A0I-PrNXC3mPJkVZnS7UOkQGe6OXc,8087
119
120
  openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
120
121
  openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
121
122
  openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
@@ -125,7 +126,7 @@ openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
125
126
  openlit/otel/metrics.py,sha256=urpadRfC_BjLCPxWgk5J6NGStECeJA55LFkyTD43Jd4,6837
126
127
  openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
127
128
  openlit/semcov/__init__.py,sha256=JF9MwflazC8jHOiQdPYshfv1q5Z9bhB4OGa0N_fr9d4,13305
128
- openlit-1.33.20.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
129
- openlit-1.33.20.dist-info/METADATA,sha256=GZtU_l6KXa6MR28a4KEa0TzHsNh5d0mwmPnZ2dNFMfQ,23470
130
- openlit-1.33.20.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
131
- openlit-1.33.20.dist-info/RECORD,,
129
+ openlit-1.33.21.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
130
+ openlit-1.33.21.dist-info/METADATA,sha256=RP9M2CRa9gXnTVN6I_YjyjoGS8C3zUI04w4lbI6q3yE,23470
131
+ openlit-1.33.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
132
+ openlit-1.33.21.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any