openlit 1.34.12__py3-none-any.whl → 1.34.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,12 +30,19 @@ class TransformersInstrumentor(BaseInstrumentor):
30
30
  version = importlib.metadata.version("transformers")
31
31
 
32
32
  wrap_function_wrapper(
33
- "transformers",
34
- "TextGenerationPipeline.__call__",
35
- pipeline_wrapper(version, environment, application_name,
36
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
33
+ "transformers",
34
+ "TextGenerationPipeline.__call__",
35
+ pipeline_wrapper(
36
+ version,
37
+ environment,
38
+ application_name,
39
+ tracer,
40
+ pricing_info,
41
+ capture_message_content,
42
+ metrics,
43
+ disable_metrics
44
+ ),
37
45
  )
38
46
 
39
47
  def _uninstrument(self, **kwargs):
40
- # Proper uninstrumentation logic to revert patched methods
41
48
  pass
@@ -2,22 +2,15 @@
2
2
  Module for monitoring HF Transformers API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
- from openlit.__helpers import (
9
- set_server_address_and_port
10
- )
11
- from openlit.instrumentation.transformers.utils import (
12
- process_chat_response,
13
- )
7
+ from openlit.__helpers import set_server_address_and_port
8
+ from openlit.instrumentation.transformers.utils import process_chat_response
14
9
  from openlit.semcov import SemanticConvention
15
10
 
16
- # Initialize logger for logging potential issues and operations
17
- logger = logging.getLogger(__name__)
18
11
 
19
- def pipeline_wrapper(version, environment, application_name,
20
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
12
+ def pipeline_wrapper(version, environment, application_name, tracer, pricing_info,
13
+ capture_message_content, metrics, disable_metrics):
21
14
  """
22
15
  Generates a telemetry wrapper for GenAI function call
23
16
  """
@@ -32,27 +25,27 @@ def pipeline_wrapper(version, environment, application_name,
32
25
 
33
26
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
34
27
 
35
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
28
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
36
29
  start_time = time.time()
37
30
  response = wrapped(*args, **kwargs)
38
31
 
39
32
  response = process_chat_response(
40
- instance = instance,
41
- response=response,
42
- request_model=request_model,
43
- pricing_info=pricing_info,
44
- server_port=server_port,
45
- server_address=server_address,
46
- environment=environment,
47
- application_name=application_name,
48
- metrics=metrics,
49
- start_time=start_time,
50
- span=span,
51
- args=args,
52
- kwargs=kwargs,
53
- capture_message_content=capture_message_content,
54
- disable_metrics=disable_metrics,
55
- version=version,
33
+ instance=instance,
34
+ response=response,
35
+ request_model=request_model,
36
+ pricing_info=pricing_info,
37
+ server_port=server_port,
38
+ server_address=server_address,
39
+ environment=environment,
40
+ application_name=application_name,
41
+ metrics=metrics,
42
+ start_time=start_time,
43
+ span=span,
44
+ args=args,
45
+ kwargs=kwargs,
46
+ capture_message_content=capture_message_content,
47
+ disable_metrics=disable_metrics,
48
+ version=version,
56
49
  )
57
50
 
58
51
  return response
@@ -3,19 +3,61 @@ HF Transformers OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
5
 
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
7
 
9
8
  from openlit.__helpers import (
10
- response_as_dict,
11
- calculate_tbt,
12
9
  general_tokens,
13
10
  get_chat_model_cost,
14
- create_metrics_attributes,
15
- format_and_concatenate
11
+ common_span_attributes,
12
+ record_completion_metrics,
16
13
  )
17
14
  from openlit.semcov import SemanticConvention
18
15
 
16
+ def format_content(content):
17
+ """
18
+ Format content to a consistent structure.
19
+ """
20
+ if isinstance(content, str):
21
+ return content
22
+ elif isinstance(content, list):
23
+ # Check if its a list of chat messages (like in the test case)
24
+ if (len(content) > 0 and isinstance(content[0], dict) and
25
+ "role" in content[0] and "content" in content[0]):
26
+ # Handle chat message format like Groq
27
+ formatted_messages = []
28
+ for message in content:
29
+ role = message["role"]
30
+ msg_content = message["content"]
31
+
32
+ if isinstance(msg_content, list):
33
+ content_str = ", ".join(
34
+ f'{item["type"]}: {item["text"] if "text" in item else item.get("image_url", str(item))}'
35
+ if isinstance(item, dict) and "type" in item
36
+ else str(item)
37
+ for item in msg_content
38
+ )
39
+ formatted_messages.append(f"{role}: {content_str}")
40
+ else:
41
+ formatted_messages.append(f"{role}: {msg_content}")
42
+ return "\n".join(formatted_messages)
43
+ else:
44
+ # Handle other list formats (transformers responses)
45
+ formatted_content = []
46
+ for item in content:
47
+ if isinstance(item, str):
48
+ formatted_content.append(item)
49
+ elif isinstance(item, dict):
50
+ # Handle dict format for transformers
51
+ if "generated_text" in item:
52
+ formatted_content.append(str(item["generated_text"]))
53
+ else:
54
+ formatted_content.append(str(item))
55
+ else:
56
+ formatted_content.append(str(item))
57
+ return " ".join(formatted_content)
58
+ else:
59
+ return str(content)
60
+
19
61
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
20
62
  capture_message_content, disable_metrics, version, args, kwargs, is_stream):
21
63
 
@@ -24,56 +66,42 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
24
66
  """
25
67
 
26
68
  scope._end_time = time.time()
27
- if len(scope._timestamps) > 1:
28
- scope._tbt = calculate_tbt(scope._timestamps)
29
-
30
69
  forward_params = scope._instance._forward_params
31
70
  request_model = scope._instance.model.config.name_or_path
32
71
 
33
72
  input_tokens = general_tokens(scope._prompt)
34
- output_tokens = general_tokens(scope._llmresponse)
73
+ output_tokens = general_tokens(scope._completion)
35
74
 
36
75
  cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
37
76
 
38
- # Set Span attributes (OTel Semconv)
39
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
40
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
41
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE)
42
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
43
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
44
-
45
- # List of attributes and their config keys
46
- attributes = [
47
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
48
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k"),
49
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, "top_p"),
50
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "max_length"),
51
- ]
52
-
53
- # Set each attribute if the corresponding value exists and is not None
54
- for attribute, key in attributes:
55
- value = forward_params.get(key)
56
- if value is not None:
57
- scope._span.set_attribute(attribute, value)
58
-
59
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
77
+ # Common Span Attributes
78
+ common_span_attributes(scope,
79
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
80
+ scope._server_address, scope._server_port, request_model, request_model,
81
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
82
+
83
+ # Set request parameters from forward_params
84
+ if forward_params.get("temperature") is not None:
85
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, forward_params["temperature"])
86
+ if forward_params.get("top_k") is not None:
87
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, forward_params["top_k"])
88
+ if forward_params.get("top_p") is not None:
89
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, forward_params["top_p"])
90
+ if forward_params.get("max_length") is not None:
91
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, forward_params["max_length"])
92
+
93
+ # Set token usage and cost attributes
60
94
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
61
95
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
62
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
63
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
64
- scope._span.set_attribute(SERVICE_NAME, application_name)
65
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
66
96
  scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
67
97
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
68
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
71
98
 
72
- # To be removed one the change to span_attributes (from span events) is complete
99
+ # Span Attributes for Content
73
100
  if capture_message_content:
74
101
  scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
75
- scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
102
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._completion)
76
103
 
104
+ # To be removed once the change to span_attributes (from span events) is complete
77
105
  scope._span.add_event(
78
106
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
79
107
  attributes={
@@ -83,32 +111,18 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
83
111
  scope._span.add_event(
84
112
  name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
85
113
  attributes={
86
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
114
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._completion,
87
115
  },
88
116
  )
89
117
 
90
118
  scope._span.set_status(Status(StatusCode.OK))
91
119
 
120
+ # Record metrics using the standardized helper function
92
121
  if not disable_metrics:
93
- metrics_attributes = create_metrics_attributes(
94
- service_name=application_name,
95
- deployment_environment=environment,
96
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
97
- system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
98
- request_model=request_model,
99
- server_address=scope._server_address,
100
- server_port=scope._server_port,
101
- response_model=request_model,
102
- )
103
-
104
- metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, metrics_attributes)
105
- metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
106
- metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
107
- metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
108
- metrics["genai_requests"].add(1, metrics_attributes)
109
- metrics["genai_completion_tokens"].add(output_tokens, metrics_attributes)
110
- metrics["genai_prompt_tokens"].add(input_tokens, metrics_attributes)
111
- metrics["genai_cost"].record(cost, metrics_attributes)
122
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
123
+ SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE, scope._server_address, scope._server_port,
124
+ request_model, request_model, environment, application_name, scope._start_time, scope._end_time,
125
+ cost, input_tokens, output_tokens, scope._tbt, scope._ttft)
112
126
 
113
127
  def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
114
128
  environment, application_name, metrics, start_time,
@@ -117,67 +131,69 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
117
131
  Process chat request and generate Telemetry
118
132
  """
119
133
 
120
- self = type("GenericScope", (), {})()
121
- response_dict = response_as_dict(response)
122
-
123
- # pylint: disable = no-member
124
- self._instance = instance
125
- self._start_time = start_time
126
- self._end_time = time.time()
127
- self._span = span
128
- self._timestamps = []
129
- self._ttft, self._tbt = self._end_time - self._start_time, 0
130
- self._server_address, self._server_port = server_address, server_port
131
- self._kwargs = kwargs
132
- self._args = args
133
-
134
- if self._args and len(self._args) > 0:
135
- self._prompt = args[0]
134
+ scope = type("GenericScope", (), {})()
135
+ scope._instance = instance
136
+ scope._start_time = start_time
137
+ scope._end_time = time.time()
138
+ scope._span = span
139
+ scope._server_address = server_address
140
+ scope._server_port = server_port
141
+ scope._kwargs = kwargs
142
+ scope._args = args
143
+
144
+ # Extract prompt from args or kwargs
145
+ if args and len(args) > 0:
146
+ scope._prompt = args[0]
136
147
  else:
137
- self._prompt = (
148
+ scope._prompt = (
138
149
  kwargs.get("text_inputs") or
139
150
  (kwargs.get("image") and kwargs.get("question") and
140
- ("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
151
+ ("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
141
152
  kwargs.get("fallback") or
142
153
  ""
143
154
  )
144
- self._prompt = format_and_concatenate(self._prompt)
145
-
146
- self._llmresponse = []
147
- if self._kwargs.get("task", "text-generation") == "text-generation":
148
- first_entry = response_dict[0]
149
-
150
- if isinstance(first_entry, dict) and isinstance(first_entry.get("generated_text"), list):
151
- last_element = first_entry.get("generated_text")[-1]
152
- self._llmresponse = last_element.get("content", last_element)
155
+ scope._prompt = format_content(scope._prompt)
156
+
157
+ # Process response based on task type
158
+ task = kwargs.get("task", "text-generation")
159
+
160
+ if task == "text-generation":
161
+ # Handle text generation responses
162
+ if isinstance(response, list) and len(response) > 0:
163
+ first_entry = response[0]
164
+ if isinstance(first_entry, dict):
165
+ if isinstance(first_entry.get("generated_text"), list):
166
+ # Handle nested list format
167
+ last_element = first_entry.get("generated_text")[-1]
168
+ scope._completion = last_element.get("content", str(last_element))
169
+ else:
170
+ # Handle standard format
171
+ scope._completion = first_entry.get("generated_text", "")
172
+ else:
173
+ scope._completion = str(first_entry)
153
174
  else:
154
- def extract_text(entry):
155
- if isinstance(entry, dict):
156
- return entry.get("generated_text")
157
- if isinstance(entry, list):
158
- return " ".join(
159
- extract_text(sub_entry) for sub_entry in entry if isinstance(sub_entry, dict)
160
- )
161
- return ""
162
-
163
- # Process and collect all generated texts
164
- self._llmresponse = [
165
- extract_text(entry) for entry in response_dict
166
- ]
175
+ scope._completion = ""
167
176
 
168
- # Join all non-empty responses into a single string
169
- self._llmresponse = " ".join(filter(None, self._llmresponse))
177
+ elif task == "automatic-speech-recognition":
178
+ scope._completion = response.get("text", "") if isinstance(response, dict) else ""
170
179
 
171
- elif self._kwargs.get("task", "text-generation") == "automatic-speech-recognition":
172
- self._llmresponse = response_dict.get("text", "")
180
+ elif task == "image-classification":
181
+ scope._completion = str(response[0]) if isinstance(response, list) and len(response) > 0 else ""
173
182
 
174
- elif self._kwargs.get("task", "text-generation") == "image-classification":
175
- self._llmresponse = str(response_dict[0])
183
+ elif task == "visual-question-answering":
184
+ if isinstance(response, list) and len(response) > 0 and isinstance(response[0], dict):
185
+ scope._completion = response[0].get("answer", "")
186
+ else:
187
+ scope._completion = ""
188
+ else:
189
+ # Default handling for other tasks
190
+ scope._completion = format_content(response)
176
191
 
177
- elif self._kwargs.get("task", "text-generation") == "visual-question-answering":
178
- self._llmresponse = str(response_dict[0]).get("answer")
192
+ # Initialize timing attributes
193
+ scope._tbt = 0
194
+ scope._ttft = scope._end_time - scope._start_time
179
195
 
180
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
181
- capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
196
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
197
+ capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
182
198
 
183
199
  return response
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of vLLM Functions"""
3
2
 
4
3
  from typing import Collection
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
14
13
 
15
14
  class VLLMInstrumentor(BaseInstrumentor):
16
15
  """
17
- An instrumentor for vLLM's client library.
16
+ An instrumentor for vLLM client library.
18
17
  """
19
18
 
20
19
  def instrumentation_dependencies(self) -> Collection[str]:
21
20
  return _instruments
22
21
 
23
22
  def _instrument(self, **kwargs):
24
- application_name = kwargs.get("application_name", "default_application")
25
- environment = kwargs.get("environment", "default_environment")
23
+ application_name = kwargs.get("application_name", "default")
24
+ environment = kwargs.get("environment", "default")
26
25
  tracer = kwargs.get("tracer")
27
26
  metrics = kwargs.get("metrics_dict")
28
27
  pricing_info = kwargs.get("pricing_info", {})
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
30
29
  disable_metrics = kwargs.get("disable_metrics")
31
30
  version = importlib.metadata.version("vllm")
32
31
 
33
- # sync chat
32
+ # Chat completions
34
33
  wrap_function_wrapper(
35
34
  "vllm.entrypoints.llm",
36
35
  "LLM.generate",
37
36
  generate(version, environment, application_name,
38
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
37
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
39
38
  )
40
39
 
41
40
  def _uninstrument(self, **kwargs):
42
- # Proper uninstrumentation logic to revert patched methods
43
41
  pass
@@ -1,15 +1,15 @@
1
1
  """
2
- Utility functions for vLLM instrumentation.
2
+ vLLM OpenTelemetry instrumentation utility functions
3
3
  """
4
-
5
4
  import time
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
5
+
7
6
  from opentelemetry.trace import Status, StatusCode
7
+
8
8
  from openlit.__helpers import (
9
- calculate_tbt,
10
- get_chat_model_cost,
11
9
  general_tokens,
12
- create_metrics_attributes,
10
+ get_chat_model_cost,
11
+ common_span_attributes,
12
+ record_completion_metrics,
13
13
  )
14
14
  from openlit.semcov import SemanticConvention
15
15
 
@@ -24,77 +24,81 @@ def get_inference_config(args, kwargs):
24
24
  return args[1]
25
25
  return None
26
26
 
27
+ def format_content(prompts):
28
+ """
29
+ Process a list of prompts to extract content.
30
+ """
31
+
32
+ if isinstance(prompts, str):
33
+ return prompts
34
+ elif isinstance(prompts, list):
35
+ return "\n".join(str(prompt) for prompt in prompts)
36
+ else:
37
+ return str(prompts)
38
+
27
39
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
28
40
  capture_message_content, disable_metrics, version, is_stream):
29
41
  """
30
42
  Process chat request and generate Telemetry
31
43
  """
32
44
 
33
- scope._end_time = time.time()
34
- if len(scope._timestamps) > 1:
35
- scope._tbt = calculate_tbt(scope._timestamps)
36
-
37
- # Set base span attributes
38
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
39
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
40
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
41
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
42
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
43
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
44
-
45
- # Handle inference configuration
45
+ request_model = scope._request_model
46
+
47
+ # Extract prompts and completions from vLLM response
48
+ input_tokens = 0
49
+ output_tokens = 0
50
+ prompt = ""
51
+ completion = ""
52
+
53
+ for output in scope._response:
54
+ prompt += output.prompt + "\n"
55
+ if output.outputs and len(output.outputs) > 0:
56
+ completion += output.outputs[0].text + "\n"
57
+ input_tokens += general_tokens(output.prompt)
58
+ output_tokens += general_tokens(output.outputs[0].text)
59
+
60
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
61
+
62
+ # Common Span Attributes
63
+ common_span_attributes(scope,
64
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
65
+ scope._server_address, scope._server_port, request_model, request_model,
66
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
67
+
68
+ # Span Attributes for Request parameters
46
69
  inference_config = get_inference_config(scope._args, scope._kwargs)
47
70
  if inference_config:
48
- attributes = [
49
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
50
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
51
- (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
52
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
53
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
54
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
55
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
56
- ]
57
-
58
- for attribute, key in attributes:
59
- value = getattr(inference_config, key, None)
60
- if value is not None:
61
- scope._span.set_attribute(attribute, value)
62
-
63
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
71
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
72
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
73
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
74
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
75
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
76
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
77
+ getattr(inference_config, 'presence_penalty', 0.0))
78
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
+ getattr(inference_config, 'frequency_penalty', 0.0))
80
+
81
+ # Span Attributes for Response parameters
64
82
  scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
65
83
 
66
- # Set base span attributes (Extras)
67
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
68
- scope._span.set_attribute(SERVICE_NAME, application_name)
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
72
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
73
-
74
- input_tokens = 0
75
- output_tokens = 0
76
- cost = 0
84
+ # Span Attributes for Cost and Tokens
85
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
86
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
87
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
88
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
77
89
 
90
+ # Span Attributes for Content
78
91
  if capture_message_content:
79
- prompt = ""
80
- completion = ""
92
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
93
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
81
94
 
82
- for output in scope._response:
83
- prompt += output.prompt + "\n"
84
- if output.outputs and len(output.outputs) > 0:
85
- completion += output.outputs[0].text + "\n"
86
- input_tokens += general_tokens(output.prompt)
87
- output_tokens += general_tokens(output.outputs[0].text)
88
-
89
- # Add a single event for prompt
95
+ # To be removed once the change to span_attributes (from span events) is complete
90
96
  scope._span.add_event(
91
97
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
92
98
  attributes={
93
99
  SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
94
100
  },
95
101
  )
96
-
97
- # Add a single event for completion
98
102
  scope._span.add_event(
99
103
  name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
100
104
  attributes={
@@ -102,39 +106,14 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
102
106
  },
103
107
  )
104
108
 
105
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
106
- input_tokens)
107
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
108
- output_tokens)
109
- scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
110
- input_tokens + output_tokens)
111
-
112
- # Calculate cost of the operation
113
- cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
114
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
115
-
116
109
  scope._span.set_status(Status(StatusCode.OK))
117
110
 
118
- if disable_metrics is False:
119
- metrics_attributes = create_metrics_attributes(
120
- service_name=application_name,
121
- deployment_environment=environment,
122
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
123
- system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
124
- request_model=scope._request_model,
125
- server_address=scope._server_address,
126
- server_port=scope._server_port,
127
- response_model=scope._request_model,
128
- )
129
- metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
130
- metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
131
- metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
132
- metrics['genai_requests'].add(1, metrics_attributes)
133
- metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
134
- metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
135
- metrics['genai_cost'].record(cost, metrics_attributes)
136
- metrics['genai_client_usage_tokens'].record(
137
- input_tokens + output_tokens, metrics_attributes)
111
+ # Metrics
112
+ if not disable_metrics:
113
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
114
+ scope._server_address, scope._server_port, request_model, request_model, environment,
115
+ application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
116
+ cost, scope._tbt, scope._ttft)
138
117
 
139
118
  def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
140
119
  environment, application_name, metrics, start_time, span, args, kwargs,
@@ -142,20 +121,23 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
142
121
  """
143
122
  Process chat request and generate Telemetry
144
123
  """
145
- self = type('GenericScope', (), {})()
146
- self._response = response
147
- self._start_time = start_time
148
- self._end_time = time.time()
149
- self._span = span
150
- self._ttft, self._tbt = self._end_time - self._start_time, 0
151
- self._server_address = server_address
152
- self._server_port = server_port
153
- self._request_model = request_model
154
- self._timestamps = []
155
- self._args = args
156
- self._kwargs = kwargs
157
-
158
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
124
+
125
+ # Create scope object
126
+ scope = type("GenericScope", (), {})()
127
+
128
+ scope._response = response
129
+ scope._start_time = start_time
130
+ scope._end_time = time.time()
131
+ scope._span = span
132
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
133
+ scope._server_address = server_address
134
+ scope._server_port = server_port
135
+ scope._request_model = request_model
136
+ scope._timestamps = []
137
+ scope._args = args
138
+ scope._kwargs = kwargs
139
+
140
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
159
141
  capture_message_content, disable_metrics, version, is_stream=False)
160
142
 
161
143
  return response
@@ -2,7 +2,6 @@
2
2
  Module for monitoring vLLM API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
14
13
  )
15
14
  from openlit.semcov import SemanticConvention
16
15
 
17
- # Initialize logger for logging potential issues and operations
18
- logger = logging.getLogger(__name__)
19
-
20
- def generate(version, environment, application_name,
21
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
16
+ def generate(version, environment, application_name, tracer, pricing_info,
17
+ capture_message_content, metrics, disable_metrics):
22
18
  """
23
19
  Generates a telemetry wrapper for GenAI function call
24
20
  """
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
27
23
  """
28
24
  Wraps the GenAI function call.
29
25
  """
30
-
31
26
  server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
32
27
  request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
33
28
 
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
56
51
  disable_metrics=disable_metrics,
57
52
  version=version,
58
53
  )
54
+
59
55
  except Exception as e:
60
56
  handle_exception(span, e)
61
- logger.error("Error in trace creation: %s", e)
62
57
 
63
58
  return response
64
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.34.12
3
+ Version: 1.34.14
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -125,20 +125,20 @@ openlit/instrumentation/together/__init__.py,sha256=0UmUqQtppyK3oopb4lTjX2LITgVC
125
125
  openlit/instrumentation/together/async_together.py,sha256=0-h5fKw6rIwN_fvWVpGuvVqizIuM9xFCzz8Z4oGgOj0,6822
126
126
  openlit/instrumentation/together/together.py,sha256=nY6mzHmHgoMbbnB_9eL0EBQjP0ltJVdkQj4pbamHAj0,6723
127
127
  openlit/instrumentation/together/utils.py,sha256=n7r_pM_sqFnJEAkL7OhPydr0Uct0A74vXdcYELdbeW0,14368
128
- openlit/instrumentation/transformers/__init__.py,sha256=9Ubss5nlumcypxprxff8Fv3sst7II27SsvCzqkBX9Kg,1457
129
- openlit/instrumentation/transformers/transformers.py,sha256=y--t7PXhUfPC81w-aEE7qowMah3os9gnKBQ5bN4QLGc,1980
130
- openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-6Vf8K7NEU0EzQ4Nk,8042
128
+ openlit/instrumentation/transformers/__init__.py,sha256=hXq0WUZNl6Sz0Ihk29kA9i8Q1j0e1URFb7v7etnQpxI,1511
129
+ openlit/instrumentation/transformers/transformers.py,sha256=MHnHVo_6NP0gSIqxen6qQpCrZ0fs8Ec80EdZumMpVNo,1797
130
+ openlit/instrumentation/transformers/utils.py,sha256=MMy_SyRyDI4X-0mqbBwStac0xabmw0ZRvv_VWLA_Nkg,8426
131
131
  openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
132
132
  openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
133
133
  openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
134
- openlit/instrumentation/vllm/__init__.py,sha256=VUWsjtYEe1_u4hJRDAZI5abrRfZ2L85LxZmc76irBrk,1524
135
- openlit/instrumentation/vllm/utils.py,sha256=hPVG_UKLY7xTvmmHbBdPy8HT7y_8VIILn37a5zOTYzU,6822
136
- openlit/instrumentation/vllm/vllm.py,sha256=SZosSwnkBUKspPtsm_k6VQaAWnD4kdcFWj2n-StWJus,2175
134
+ openlit/instrumentation/vllm/__init__.py,sha256=uaSzQmgDuKJ-sh61sfVdzVt2qAZaozZIQ8sbmQ0XpZE,1357
135
+ openlit/instrumentation/vllm/utils.py,sha256=HuCPNBgChWg9vA7DHNFCij_y8qj27DjZxdZ0Nvdt2fg,5751
136
+ openlit/instrumentation/vllm/vllm.py,sha256=VzazF2f4LLwjZDO_G8lIN_d622oSJM0fIO9wjxXbhyg,2004
137
137
  openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
138
138
  openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
139
139
  openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
140
140
  openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
141
- openlit-1.34.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
142
- openlit-1.34.12.dist-info/METADATA,sha256=1GWoYLlOr6o5nSG3t4_n58xc9QJjcTSPUGNEGJJfEn8,23470
143
- openlit-1.34.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
144
- openlit-1.34.12.dist-info/RECORD,,
141
+ openlit-1.34.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
142
+ openlit-1.34.14.dist-info/METADATA,sha256=qaOh__y9R5tT0z7qveai1LH4KWY7ampN3PzVhhm7D0M,23470
143
+ openlit-1.34.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
144
+ openlit-1.34.14.dist-info/RECORD,,