openlit 1.34.12__py3-none-any.whl → 1.34.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of vLLM Functions"""
3
2
 
4
3
  from typing import Collection
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
14
13
 
15
14
  class VLLMInstrumentor(BaseInstrumentor):
16
15
  """
17
- An instrumentor for vLLM's client library.
16
+ An instrumentor for vLLM client library.
18
17
  """
19
18
 
20
19
  def instrumentation_dependencies(self) -> Collection[str]:
21
20
  return _instruments
22
21
 
23
22
  def _instrument(self, **kwargs):
24
- application_name = kwargs.get("application_name", "default_application")
25
- environment = kwargs.get("environment", "default_environment")
23
+ application_name = kwargs.get("application_name", "default")
24
+ environment = kwargs.get("environment", "default")
26
25
  tracer = kwargs.get("tracer")
27
26
  metrics = kwargs.get("metrics_dict")
28
27
  pricing_info = kwargs.get("pricing_info", {})
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
30
29
  disable_metrics = kwargs.get("disable_metrics")
31
30
  version = importlib.metadata.version("vllm")
32
31
 
33
- # sync chat
32
+ # Chat completions
34
33
  wrap_function_wrapper(
35
34
  "vllm.entrypoints.llm",
36
35
  "LLM.generate",
37
36
  generate(version, environment, application_name,
38
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
37
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
39
38
  )
40
39
 
41
40
  def _uninstrument(self, **kwargs):
42
- # Proper uninstrumentation logic to revert patched methods
43
41
  pass
@@ -1,15 +1,15 @@
1
1
  """
2
- Utility functions for vLLM instrumentation.
2
+ vLLM OpenTelemetry instrumentation utility functions
3
3
  """
4
-
5
4
  import time
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
5
+
7
6
  from opentelemetry.trace import Status, StatusCode
7
+
8
8
  from openlit.__helpers import (
9
- calculate_tbt,
10
- get_chat_model_cost,
11
9
  general_tokens,
12
- create_metrics_attributes,
10
+ get_chat_model_cost,
11
+ common_span_attributes,
12
+ record_completion_metrics,
13
13
  )
14
14
  from openlit.semcov import SemanticConvention
15
15
 
@@ -24,77 +24,81 @@ def get_inference_config(args, kwargs):
24
24
  return args[1]
25
25
  return None
26
26
 
27
+ def format_content(prompts):
28
+ """
29
+ Process a list of prompts to extract content.
30
+ """
31
+
32
+ if isinstance(prompts, str):
33
+ return prompts
34
+ elif isinstance(prompts, list):
35
+ return "\n".join(str(prompt) for prompt in prompts)
36
+ else:
37
+ return str(prompts)
38
+
27
39
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
28
40
  capture_message_content, disable_metrics, version, is_stream):
29
41
  """
30
42
  Process chat request and generate Telemetry
31
43
  """
32
44
 
33
- scope._end_time = time.time()
34
- if len(scope._timestamps) > 1:
35
- scope._tbt = calculate_tbt(scope._timestamps)
36
-
37
- # Set base span attributes
38
- scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
39
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
40
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_VLLM)
41
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, scope._request_model)
42
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
43
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
44
-
45
- # Handle inference configuration
45
+ request_model = scope._request_model
46
+
47
+ # Extract prompts and completions from vLLM response
48
+ input_tokens = 0
49
+ output_tokens = 0
50
+ prompt = ""
51
+ completion = ""
52
+
53
+ for output in scope._response:
54
+ prompt += output.prompt + "\n"
55
+ if output.outputs and len(output.outputs) > 0:
56
+ completion += output.outputs[0].text + "\n"
57
+ input_tokens += general_tokens(output.prompt)
58
+ output_tokens += general_tokens(output.outputs[0].text)
59
+
60
+ cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
61
+
62
+ # Common Span Attributes
63
+ common_span_attributes(scope,
64
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
65
+ scope._server_address, scope._server_port, request_model, request_model,
66
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
67
+
68
+ # Span Attributes for Request parameters
46
69
  inference_config = get_inference_config(scope._args, scope._kwargs)
47
70
  if inference_config:
48
- attributes = [
49
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
50
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
51
- (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
52
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
53
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
54
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
55
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
56
- ]
57
-
58
- for attribute, key in attributes:
59
- value = getattr(inference_config, key, None)
60
- if value is not None:
61
- scope._span.set_attribute(attribute, value)
62
-
63
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
71
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
72
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
73
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
74
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
75
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
76
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
77
+ getattr(inference_config, 'presence_penalty', 0.0))
78
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
+ getattr(inference_config, 'frequency_penalty', 0.0))
80
+
81
+ # Span Attributes for Response parameters
64
82
  scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
65
83
 
66
- # Set base span attributes (Extras)
67
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
68
- scope._span.set_attribute(SERVICE_NAME, application_name)
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
72
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
73
-
74
- input_tokens = 0
75
- output_tokens = 0
76
- cost = 0
84
+ # Span Attributes for Cost and Tokens
85
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
86
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
87
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
88
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
77
89
 
90
+ # Span Attributes for Content
78
91
  if capture_message_content:
79
- prompt = ""
80
- completion = ""
92
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
93
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
81
94
 
82
- for output in scope._response:
83
- prompt += output.prompt + "\n"
84
- if output.outputs and len(output.outputs) > 0:
85
- completion += output.outputs[0].text + "\n"
86
- input_tokens += general_tokens(output.prompt)
87
- output_tokens += general_tokens(output.outputs[0].text)
88
-
89
- # Add a single event for prompt
95
+ # To be removed once the change to span_attributes (from span events) is complete
90
96
  scope._span.add_event(
91
97
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
92
98
  attributes={
93
99
  SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
94
100
  },
95
101
  )
96
-
97
- # Add a single event for completion
98
102
  scope._span.add_event(
99
103
  name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
100
104
  attributes={
@@ -102,39 +106,14 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
102
106
  },
103
107
  )
104
108
 
105
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
106
- input_tokens)
107
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
108
- output_tokens)
109
- scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
110
- input_tokens + output_tokens)
111
-
112
- # Calculate cost of the operation
113
- cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
114
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
115
-
116
109
  scope._span.set_status(Status(StatusCode.OK))
117
110
 
118
- if disable_metrics is False:
119
- metrics_attributes = create_metrics_attributes(
120
- service_name=application_name,
121
- deployment_environment=environment,
122
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
123
- system=SemanticConvention.GEN_AI_SYSTEM_VLLM,
124
- request_model=scope._request_model,
125
- server_address=scope._server_address,
126
- server_port=scope._server_port,
127
- response_model=scope._request_model,
128
- )
129
- metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
130
- metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
131
- metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
132
- metrics['genai_requests'].add(1, metrics_attributes)
133
- metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
134
- metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
135
- metrics['genai_cost'].record(cost, metrics_attributes)
136
- metrics['genai_client_usage_tokens'].record(
137
- input_tokens + output_tokens, metrics_attributes)
111
+ # Metrics
112
+ if not disable_metrics:
113
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
114
+ scope._server_address, scope._server_port, request_model, request_model, environment,
115
+ application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
116
+ cost, scope._tbt, scope._ttft)
138
117
 
139
118
  def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
140
119
  environment, application_name, metrics, start_time, span, args, kwargs,
@@ -142,20 +121,23 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
142
121
  """
143
122
  Process chat request and generate Telemetry
144
123
  """
145
- self = type('GenericScope', (), {})()
146
- self._response = response
147
- self._start_time = start_time
148
- self._end_time = time.time()
149
- self._span = span
150
- self._ttft, self._tbt = self._end_time - self._start_time, 0
151
- self._server_address = server_address
152
- self._server_port = server_port
153
- self._request_model = request_model
154
- self._timestamps = []
155
- self._args = args
156
- self._kwargs = kwargs
157
-
158
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
124
+
125
+ # Create scope object
126
+ scope = type("GenericScope", (), {})()
127
+
128
+ scope._response = response
129
+ scope._start_time = start_time
130
+ scope._end_time = time.time()
131
+ scope._span = span
132
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
133
+ scope._server_address = server_address
134
+ scope._server_port = server_port
135
+ scope._request_model = request_model
136
+ scope._timestamps = []
137
+ scope._args = args
138
+ scope._kwargs = kwargs
139
+
140
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
159
141
  capture_message_content, disable_metrics, version, is_stream=False)
160
142
 
161
143
  return response
@@ -2,7 +2,6 @@
2
2
  Module for monitoring vLLM API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
14
13
  )
15
14
  from openlit.semcov import SemanticConvention
16
15
 
17
- # Initialize logger for logging potential issues and operations
18
- logger = logging.getLogger(__name__)
19
-
20
- def generate(version, environment, application_name,
21
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
16
+ def generate(version, environment, application_name, tracer, pricing_info,
17
+ capture_message_content, metrics, disable_metrics):
22
18
  """
23
19
  Generates a telemetry wrapper for GenAI function call
24
20
  """
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
27
23
  """
28
24
  Wraps the GenAI function call.
29
25
  """
30
-
31
26
  server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
32
27
  request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
33
28
 
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
56
51
  disable_metrics=disable_metrics,
57
52
  version=version,
58
53
  )
54
+
59
55
  except Exception as e:
60
56
  handle_exception(span, e)
61
- logger.error("Error in trace creation: %s", e)
62
57
 
63
58
  return response
64
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.34.12
3
+ Version: 1.34.13
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
@@ -131,14 +131,14 @@ openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-
131
131
  openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
132
132
  openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
133
133
  openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
134
- openlit/instrumentation/vllm/__init__.py,sha256=VUWsjtYEe1_u4hJRDAZI5abrRfZ2L85LxZmc76irBrk,1524
135
- openlit/instrumentation/vllm/utils.py,sha256=hPVG_UKLY7xTvmmHbBdPy8HT7y_8VIILn37a5zOTYzU,6822
136
- openlit/instrumentation/vllm/vllm.py,sha256=SZosSwnkBUKspPtsm_k6VQaAWnD4kdcFWj2n-StWJus,2175
134
+ openlit/instrumentation/vllm/__init__.py,sha256=uaSzQmgDuKJ-sh61sfVdzVt2qAZaozZIQ8sbmQ0XpZE,1357
135
+ openlit/instrumentation/vllm/utils.py,sha256=HuCPNBgChWg9vA7DHNFCij_y8qj27DjZxdZ0Nvdt2fg,5751
136
+ openlit/instrumentation/vllm/vllm.py,sha256=VzazF2f4LLwjZDO_G8lIN_d622oSJM0fIO9wjxXbhyg,2004
137
137
  openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
138
138
  openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
139
139
  openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
140
140
  openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
141
- openlit-1.34.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
142
- openlit-1.34.12.dist-info/METADATA,sha256=1GWoYLlOr6o5nSG3t4_n58xc9QJjcTSPUGNEGJJfEn8,23470
143
- openlit-1.34.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
144
- openlit-1.34.12.dist-info/RECORD,,
141
+ openlit-1.34.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
142
+ openlit-1.34.13.dist-info/METADATA,sha256=4uHfQSKnuT-yfoNz7kj78yd53TBFDCDYVhOIsz7XF8k,23470
143
+ openlit-1.34.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
144
+ openlit-1.34.13.dist-info/RECORD,,