openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. openlit/__helpers.py +235 -86
  2. openlit/__init__.py +16 -13
  3. openlit/_instrumentors.py +2 -1
  4. openlit/evals/all.py +50 -21
  5. openlit/evals/bias_detection.py +47 -20
  6. openlit/evals/hallucination.py +53 -22
  7. openlit/evals/toxicity.py +50 -21
  8. openlit/evals/utils.py +54 -30
  9. openlit/guard/all.py +61 -19
  10. openlit/guard/prompt_injection.py +34 -14
  11. openlit/guard/restrict_topic.py +46 -15
  12. openlit/guard/sensitive_topic.py +34 -14
  13. openlit/guard/utils.py +58 -22
  14. openlit/instrumentation/ag2/__init__.py +24 -8
  15. openlit/instrumentation/ag2/ag2.py +34 -13
  16. openlit/instrumentation/ag2/async_ag2.py +34 -13
  17. openlit/instrumentation/ag2/utils.py +133 -30
  18. openlit/instrumentation/ai21/__init__.py +43 -14
  19. openlit/instrumentation/ai21/ai21.py +47 -21
  20. openlit/instrumentation/ai21/async_ai21.py +47 -21
  21. openlit/instrumentation/ai21/utils.py +299 -78
  22. openlit/instrumentation/anthropic/__init__.py +21 -4
  23. openlit/instrumentation/anthropic/anthropic.py +28 -17
  24. openlit/instrumentation/anthropic/async_anthropic.py +28 -17
  25. openlit/instrumentation/anthropic/utils.py +145 -35
  26. openlit/instrumentation/assemblyai/__init__.py +11 -2
  27. openlit/instrumentation/assemblyai/assemblyai.py +15 -4
  28. openlit/instrumentation/assemblyai/utils.py +120 -25
  29. openlit/instrumentation/astra/__init__.py +43 -10
  30. openlit/instrumentation/astra/astra.py +28 -5
  31. openlit/instrumentation/astra/async_astra.py +28 -5
  32. openlit/instrumentation/astra/utils.py +151 -55
  33. openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
  34. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
  35. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
  36. openlit/instrumentation/azure_ai_inference/utils.py +307 -83
  37. openlit/instrumentation/bedrock/__init__.py +21 -4
  38. openlit/instrumentation/bedrock/bedrock.py +63 -25
  39. openlit/instrumentation/bedrock/utils.py +139 -30
  40. openlit/instrumentation/chroma/__init__.py +89 -16
  41. openlit/instrumentation/chroma/chroma.py +28 -6
  42. openlit/instrumentation/chroma/utils.py +167 -51
  43. openlit/instrumentation/cohere/__init__.py +63 -18
  44. openlit/instrumentation/cohere/async_cohere.py +63 -24
  45. openlit/instrumentation/cohere/cohere.py +63 -24
  46. openlit/instrumentation/cohere/utils.py +286 -73
  47. openlit/instrumentation/controlflow/__init__.py +35 -9
  48. openlit/instrumentation/controlflow/controlflow.py +66 -33
  49. openlit/instrumentation/crawl4ai/__init__.py +25 -10
  50. openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
  51. openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
  52. openlit/instrumentation/crewai/__init__.py +40 -15
  53. openlit/instrumentation/crewai/async_crewai.py +32 -7
  54. openlit/instrumentation/crewai/crewai.py +32 -7
  55. openlit/instrumentation/crewai/utils.py +159 -56
  56. openlit/instrumentation/dynamiq/__init__.py +46 -12
  57. openlit/instrumentation/dynamiq/dynamiq.py +74 -33
  58. openlit/instrumentation/elevenlabs/__init__.py +23 -4
  59. openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
  60. openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
  61. openlit/instrumentation/elevenlabs/utils.py +128 -25
  62. openlit/instrumentation/embedchain/__init__.py +11 -2
  63. openlit/instrumentation/embedchain/embedchain.py +68 -35
  64. openlit/instrumentation/firecrawl/__init__.py +24 -7
  65. openlit/instrumentation/firecrawl/firecrawl.py +46 -20
  66. openlit/instrumentation/google_ai_studio/__init__.py +45 -10
  67. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
  68. openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
  69. openlit/instrumentation/google_ai_studio/utils.py +180 -67
  70. openlit/instrumentation/gpt4all/__init__.py +22 -7
  71. openlit/instrumentation/gpt4all/gpt4all.py +67 -29
  72. openlit/instrumentation/gpt4all/utils.py +285 -61
  73. openlit/instrumentation/gpu/__init__.py +128 -47
  74. openlit/instrumentation/groq/__init__.py +21 -4
  75. openlit/instrumentation/groq/async_groq.py +33 -21
  76. openlit/instrumentation/groq/groq.py +33 -21
  77. openlit/instrumentation/groq/utils.py +192 -55
  78. openlit/instrumentation/haystack/__init__.py +70 -24
  79. openlit/instrumentation/haystack/async_haystack.py +28 -6
  80. openlit/instrumentation/haystack/haystack.py +28 -6
  81. openlit/instrumentation/haystack/utils.py +196 -74
  82. openlit/instrumentation/julep/__init__.py +69 -19
  83. openlit/instrumentation/julep/async_julep.py +53 -27
  84. openlit/instrumentation/julep/julep.py +53 -28
  85. openlit/instrumentation/langchain/__init__.py +74 -63
  86. openlit/instrumentation/langchain/callback_handler.py +1100 -0
  87. openlit/instrumentation/langchain_community/__init__.py +13 -2
  88. openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
  89. openlit/instrumentation/langchain_community/langchain_community.py +23 -5
  90. openlit/instrumentation/langchain_community/utils.py +35 -9
  91. openlit/instrumentation/letta/__init__.py +68 -15
  92. openlit/instrumentation/letta/letta.py +99 -54
  93. openlit/instrumentation/litellm/__init__.py +43 -14
  94. openlit/instrumentation/litellm/async_litellm.py +51 -26
  95. openlit/instrumentation/litellm/litellm.py +51 -26
  96. openlit/instrumentation/litellm/utils.py +304 -102
  97. openlit/instrumentation/llamaindex/__init__.py +267 -90
  98. openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
  99. openlit/instrumentation/llamaindex/llamaindex.py +28 -6
  100. openlit/instrumentation/llamaindex/utils.py +204 -91
  101. openlit/instrumentation/mem0/__init__.py +11 -2
  102. openlit/instrumentation/mem0/mem0.py +50 -29
  103. openlit/instrumentation/milvus/__init__.py +10 -2
  104. openlit/instrumentation/milvus/milvus.py +31 -6
  105. openlit/instrumentation/milvus/utils.py +166 -67
  106. openlit/instrumentation/mistral/__init__.py +63 -18
  107. openlit/instrumentation/mistral/async_mistral.py +63 -24
  108. openlit/instrumentation/mistral/mistral.py +63 -24
  109. openlit/instrumentation/mistral/utils.py +277 -69
  110. openlit/instrumentation/multion/__init__.py +69 -19
  111. openlit/instrumentation/multion/async_multion.py +57 -26
  112. openlit/instrumentation/multion/multion.py +57 -26
  113. openlit/instrumentation/ollama/__init__.py +39 -18
  114. openlit/instrumentation/ollama/async_ollama.py +57 -26
  115. openlit/instrumentation/ollama/ollama.py +57 -26
  116. openlit/instrumentation/ollama/utils.py +226 -50
  117. openlit/instrumentation/openai/__init__.py +156 -32
  118. openlit/instrumentation/openai/async_openai.py +147 -67
  119. openlit/instrumentation/openai/openai.py +150 -67
  120. openlit/instrumentation/openai/utils.py +657 -185
  121. openlit/instrumentation/openai_agents/__init__.py +5 -1
  122. openlit/instrumentation/openai_agents/processor.py +110 -90
  123. openlit/instrumentation/phidata/__init__.py +13 -5
  124. openlit/instrumentation/phidata/phidata.py +67 -32
  125. openlit/instrumentation/pinecone/__init__.py +48 -9
  126. openlit/instrumentation/pinecone/async_pinecone.py +27 -5
  127. openlit/instrumentation/pinecone/pinecone.py +27 -5
  128. openlit/instrumentation/pinecone/utils.py +153 -47
  129. openlit/instrumentation/premai/__init__.py +22 -7
  130. openlit/instrumentation/premai/premai.py +51 -26
  131. openlit/instrumentation/premai/utils.py +246 -59
  132. openlit/instrumentation/pydantic_ai/__init__.py +49 -22
  133. openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
  134. openlit/instrumentation/pydantic_ai/utils.py +89 -24
  135. openlit/instrumentation/qdrant/__init__.py +19 -4
  136. openlit/instrumentation/qdrant/async_qdrant.py +33 -7
  137. openlit/instrumentation/qdrant/qdrant.py +33 -7
  138. openlit/instrumentation/qdrant/utils.py +228 -93
  139. openlit/instrumentation/reka/__init__.py +23 -10
  140. openlit/instrumentation/reka/async_reka.py +17 -11
  141. openlit/instrumentation/reka/reka.py +17 -11
  142. openlit/instrumentation/reka/utils.py +138 -36
  143. openlit/instrumentation/together/__init__.py +44 -12
  144. openlit/instrumentation/together/async_together.py +50 -27
  145. openlit/instrumentation/together/together.py +50 -27
  146. openlit/instrumentation/together/utils.py +301 -71
  147. openlit/instrumentation/transformers/__init__.py +2 -1
  148. openlit/instrumentation/transformers/transformers.py +13 -3
  149. openlit/instrumentation/transformers/utils.py +139 -36
  150. openlit/instrumentation/vertexai/__init__.py +81 -16
  151. openlit/instrumentation/vertexai/async_vertexai.py +33 -15
  152. openlit/instrumentation/vertexai/utils.py +123 -27
  153. openlit/instrumentation/vertexai/vertexai.py +33 -15
  154. openlit/instrumentation/vllm/__init__.py +12 -5
  155. openlit/instrumentation/vllm/utils.py +121 -31
  156. openlit/instrumentation/vllm/vllm.py +16 -10
  157. openlit/otel/events.py +35 -10
  158. openlit/otel/metrics.py +32 -24
  159. openlit/otel/tracing.py +24 -9
  160. openlit/semcov/__init__.py +72 -6
  161. {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
  162. openlit-1.34.31.dist-info/RECORD +166 -0
  163. openlit/instrumentation/langchain/async_langchain.py +0 -102
  164. openlit/instrumentation/langchain/langchain.py +0 -102
  165. openlit/instrumentation/langchain/utils.py +0 -252
  166. openlit-1.34.30.dist-info/RECORD +0 -168
  167. {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
  168. {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -1,6 +1,7 @@
1
1
  """
2
2
  VertexAI OpenTelemetry instrumentation utility functions
3
3
  """
4
+
4
5
  import time
5
6
 
6
7
  from opentelemetry.trace import Status, StatusCode
@@ -14,6 +15,7 @@ from openlit.__helpers import (
14
15
  )
15
16
  from openlit.semcov import SemanticConvention
16
17
 
18
+
17
19
  def format_content(contents):
18
20
  """
19
21
  Format the VertexAI contents into a string for span events.
@@ -37,7 +39,9 @@ def format_content(contents):
37
39
  if part.thought:
38
40
  content_str.append(f"thought: {part.thought}")
39
41
  if part.code_execution_result:
40
- content_str.append(f"code_execution_result: {part.code_execution_result}")
42
+ content_str.append(
43
+ f"code_execution_result: {part.code_execution_result}"
44
+ )
41
45
  if part.executable_code:
42
46
  content_str.append(f"executable_code: {part.executable_code}")
43
47
  if part.file_data:
@@ -53,6 +57,7 @@ def format_content(contents):
53
57
 
54
58
  return "\n".join(formatted_messages)
55
59
 
60
+
56
61
  def process_chunk(scope, chunk):
57
62
  """
58
63
  Process a chunk of response data and update state.
@@ -71,8 +76,18 @@ def process_chunk(scope, chunk):
71
76
  scope._input_tokens = chunk.usage_metadata.prompt_token_count
72
77
  scope._output_tokens = chunk.usage_metadata.candidates_token_count
73
78
 
74
- def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
75
- capture_message_content, disable_metrics, version, is_stream):
79
+
80
+ def common_chat_logic(
81
+ scope,
82
+ pricing_info,
83
+ environment,
84
+ application_name,
85
+ metrics,
86
+ capture_message_content,
87
+ disable_metrics,
88
+ version,
89
+ is_stream,
90
+ ):
76
91
  """
77
92
  Process chat request and generate Telemetry
78
93
  """
@@ -86,13 +101,26 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
86
101
  formatted_messages = format_content(contents)
87
102
  prompt = formatted_messages or str(scope._args[0][0])
88
103
 
89
- cost = get_chat_model_cost(scope._request_model, pricing_info, scope._input_tokens, scope._output_tokens)
104
+ cost = get_chat_model_cost(
105
+ scope._request_model, pricing_info, scope._input_tokens, scope._output_tokens
106
+ )
90
107
 
91
108
  # Common Span Attributes
92
- common_span_attributes(scope,
93
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
94
- scope._server_address, scope._server_port, scope._request_model, scope._request_model,
95
- environment, application_name, is_stream, scope._tbt, scope._ttft, version)
109
+ common_span_attributes(
110
+ scope,
111
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
112
+ SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
113
+ scope._server_address,
114
+ scope._server_port,
115
+ scope._request_model,
116
+ scope._request_model,
117
+ environment,
118
+ application_name,
119
+ is_stream,
120
+ scope._tbt,
121
+ scope._ttft,
122
+ version,
123
+ )
96
124
 
97
125
  # Span Attributes for Request parameters (VertexAI-specific)
98
126
  inference_config = scope._kwargs.get("generation_config", {})
@@ -115,18 +143,30 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
115
143
  scope._span.set_attribute(attribute, value)
116
144
 
117
145
  # Span Attributes for Response parameters
118
- scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
146
+ scope._span.set_attribute(
147
+ SemanticConvention.GEN_AI_OUTPUT_TYPE,
148
+ "text" if isinstance(scope._llmresponse, str) else "json",
149
+ )
119
150
 
120
151
  # Span Attributes for Cost and Tokens
121
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
122
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
123
- scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
152
+ scope._span.set_attribute(
153
+ SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
154
+ )
155
+ scope._span.set_attribute(
156
+ SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
157
+ )
158
+ scope._span.set_attribute(
159
+ SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
160
+ scope._input_tokens + scope._output_tokens,
161
+ )
124
162
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
125
163
 
126
164
  # Span Attributes for Content
127
165
  if capture_message_content:
128
166
  scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
129
- scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
167
+ scope._span.set_attribute(
168
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
169
+ )
130
170
 
131
171
  # To be removed once the change to span_attributes (from span events) is complete
132
172
  scope._span.add_event(
@@ -146,23 +186,69 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
146
186
 
147
187
  # Record metrics
148
188
  if not disable_metrics:
149
- record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
150
- scope._server_address, scope._server_port, scope._request_model, scope._request_model, environment,
151
- application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
152
- cost, scope._tbt, scope._ttft)
189
+ record_completion_metrics(
190
+ metrics,
191
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
192
+ SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
193
+ scope._server_address,
194
+ scope._server_port,
195
+ scope._request_model,
196
+ scope._request_model,
197
+ environment,
198
+ application_name,
199
+ scope._start_time,
200
+ scope._end_time,
201
+ scope._input_tokens,
202
+ scope._output_tokens,
203
+ cost,
204
+ scope._tbt,
205
+ scope._ttft,
206
+ )
207
+
153
208
 
154
- def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
155
- capture_message_content=False, disable_metrics=False, version=""):
209
+ def process_streaming_chat_response(
210
+ scope,
211
+ pricing_info,
212
+ environment,
213
+ application_name,
214
+ metrics,
215
+ capture_message_content=False,
216
+ disable_metrics=False,
217
+ version="",
218
+ ):
156
219
  """
157
220
  Process streaming chat response and generate telemetry.
158
221
  """
159
222
 
160
- common_chat_logic(scope, pricing_info, environment, application_name, metrics,
161
- capture_message_content, disable_metrics, version, is_stream=True)
162
-
163
- def process_chat_response(response, request_model, pricing_info, server_port, server_address,
164
- environment, application_name, metrics, start_time,
165
- span, capture_message_content=False, disable_metrics=False, version="1.0.0", **kwargs):
223
+ common_chat_logic(
224
+ scope,
225
+ pricing_info,
226
+ environment,
227
+ application_name,
228
+ metrics,
229
+ capture_message_content,
230
+ disable_metrics,
231
+ version,
232
+ is_stream=True,
233
+ )
234
+
235
+
236
+ def process_chat_response(
237
+ response,
238
+ request_model,
239
+ pricing_info,
240
+ server_port,
241
+ server_address,
242
+ environment,
243
+ application_name,
244
+ metrics,
245
+ start_time,
246
+ span,
247
+ capture_message_content=False,
248
+ disable_metrics=False,
249
+ version="1.0.0",
250
+ **kwargs,
251
+ ):
166
252
  """
167
253
  Process non-streaming chat response and generate telemetry.
168
254
  """
@@ -182,11 +268,21 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
182
268
  scope._kwargs = kwargs
183
269
  scope._args = [kwargs.get("contents", [])]
184
270
 
185
- common_chat_logic(scope, pricing_info, environment, application_name, metrics,
186
- capture_message_content, disable_metrics, version, is_stream=False)
271
+ common_chat_logic(
272
+ scope,
273
+ pricing_info,
274
+ environment,
275
+ application_name,
276
+ metrics,
277
+ capture_message_content,
278
+ disable_metrics,
279
+ version,
280
+ is_stream=False,
281
+ )
187
282
 
188
283
  return response
189
284
 
285
+
190
286
  def extract_vertexai_details(instance):
191
287
  """
192
288
  Extract VertexAI-specific details like location and model name.
@@ -19,8 +19,17 @@ from openlit.semcov import SemanticConvention
19
19
  # Initialize logger for logging potential issues and operations
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
- def send_message(version, environment, application_name, tracer,
23
- pricing_info, capture_message_content, metrics, disable_metrics):
22
+
23
+ def send_message(
24
+ version,
25
+ environment,
26
+ application_name,
27
+ tracer,
28
+ pricing_info,
29
+ capture_message_content,
30
+ metrics,
31
+ disable_metrics,
32
+ ):
24
33
  """
25
34
  Generates a telemetry wrapper for VertexAI messages to collect metrics.
26
35
  """
@@ -31,16 +40,16 @@ def send_message(version, environment, application_name, tracer,
31
40
  """
32
41
 
33
42
  def __init__(
34
- self,
35
- wrapped,
36
- span,
37
- span_name,
38
- kwargs,
39
- server_address,
40
- server_port,
41
- request_model,
42
- args,
43
- ):
43
+ self,
44
+ wrapped,
45
+ span,
46
+ span_name,
47
+ kwargs,
48
+ server_address,
49
+ server_port,
50
+ request_model,
51
+ args,
52
+ ):
44
53
  self.__wrapped__ = wrapped
45
54
  self._span = span
46
55
  self._span_name = span_name
@@ -88,7 +97,7 @@ def send_message(version, environment, application_name, tracer,
88
97
  metrics=metrics,
89
98
  capture_message_content=capture_message_content,
90
99
  disable_metrics=disable_metrics,
91
- version=version
100
+ version=version,
92
101
  )
93
102
  except Exception as e:
94
103
  handle_exception(self._span, e)
@@ -108,7 +117,16 @@ def send_message(version, environment, application_name, tracer,
108
117
  awaited_wrapped = wrapped(*args, **kwargs)
109
118
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
110
119
 
111
- return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port, request_model, args)
120
+ return TracedSyncStream(
121
+ awaited_wrapped,
122
+ span,
123
+ span_name,
124
+ kwargs,
125
+ server_address,
126
+ server_port,
127
+ request_model,
128
+ args,
129
+ )
112
130
 
113
131
  else:
114
132
  with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
@@ -130,7 +148,7 @@ def send_message(version, environment, application_name, tracer,
130
148
  capture_message_content=capture_message_content,
131
149
  disable_metrics=disable_metrics,
132
150
  version=version,
133
- **kwargs
151
+ **kwargs,
134
152
  )
135
153
 
136
154
  except Exception as e:
@@ -5,12 +5,11 @@ import importlib.metadata
5
5
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
6
  from wrapt import wrap_function_wrapper
7
7
 
8
- from openlit.instrumentation.vllm.vllm import (
9
- generate
10
- )
8
+ from openlit.instrumentation.vllm.vllm import generate
11
9
 
12
10
  _instruments = ("vllm >= 0.5.4",)
13
11
 
12
+
14
13
  class VLLMInstrumentor(BaseInstrumentor):
15
14
  """
16
15
  An instrumentor for vLLM client library.
@@ -33,8 +32,16 @@ class VLLMInstrumentor(BaseInstrumentor):
33
32
  wrap_function_wrapper(
34
33
  "vllm.entrypoints.llm",
35
34
  "LLM.generate",
36
- generate(version, environment, application_name,
37
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
35
+ generate(
36
+ version,
37
+ environment,
38
+ application_name,
39
+ tracer,
40
+ pricing_info,
41
+ capture_message_content,
42
+ metrics,
43
+ disable_metrics,
44
+ ),
38
45
  )
39
46
 
40
47
  def _uninstrument(self, **kwargs):
@@ -1,6 +1,7 @@
1
1
  """
2
2
  vLLM OpenTelemetry instrumentation utility functions
3
3
  """
4
+
4
5
  import time
5
6
 
6
7
  from opentelemetry.trace import Status, StatusCode
@@ -13,17 +14,19 @@ from openlit.__helpers import (
13
14
  )
14
15
  from openlit.semcov import SemanticConvention
15
16
 
17
+
16
18
  def get_inference_config(args, kwargs):
17
19
  """
18
20
  Safely extract inference configuration from args or kwargs.
19
21
  """
20
22
 
21
- if 'sampling_params' in kwargs:
22
- return kwargs['sampling_params']
23
+ if "sampling_params" in kwargs:
24
+ return kwargs["sampling_params"]
23
25
  if len(args) > 1:
24
26
  return args[1]
25
27
  return None
26
28
 
29
+
27
30
  def format_content(prompts):
28
31
  """
29
32
  Process a list of prompts to extract content.
@@ -36,8 +39,18 @@ def format_content(prompts):
36
39
  else:
37
40
  return str(prompts)
38
41
 
39
- def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
40
- capture_message_content, disable_metrics, version, is_stream):
42
+
43
+ def common_chat_logic(
44
+ scope,
45
+ pricing_info,
46
+ environment,
47
+ application_name,
48
+ metrics,
49
+ capture_message_content,
50
+ disable_metrics,
51
+ version,
52
+ is_stream,
53
+ ):
41
54
  """
42
55
  Process chat request and generate Telemetry
43
56
  """
@@ -60,37 +73,75 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
60
73
  cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
61
74
 
62
75
  # Common Span Attributes
63
- common_span_attributes(scope,
64
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
65
- scope._server_address, scope._server_port, request_model, request_model,
66
- environment, application_name, is_stream, scope._tbt, scope._ttft, version)
76
+ common_span_attributes(
77
+ scope,
78
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
79
+ SemanticConvention.GEN_AI_SYSTEM_VLLM,
80
+ scope._server_address,
81
+ scope._server_port,
82
+ request_model,
83
+ request_model,
84
+ environment,
85
+ application_name,
86
+ is_stream,
87
+ scope._tbt,
88
+ scope._ttft,
89
+ version,
90
+ )
67
91
 
68
92
  # Span Attributes for Request parameters
69
93
  inference_config = get_inference_config(scope._args, scope._kwargs)
70
94
  if inference_config:
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
72
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
73
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
74
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
75
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
76
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
77
- getattr(inference_config, 'presence_penalty', 0.0))
78
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
- getattr(inference_config, 'frequency_penalty', 0.0))
95
+ scope._span.set_attribute(
96
+ SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
97
+ getattr(inference_config, "max_tokens", -1),
98
+ )
99
+ scope._span.set_attribute(
100
+ SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
101
+ getattr(inference_config, "stop_sequences", []),
102
+ )
103
+ scope._span.set_attribute(
104
+ SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
105
+ getattr(inference_config, "temperature", 1.0),
106
+ )
107
+ scope._span.set_attribute(
108
+ SemanticConvention.GEN_AI_REQUEST_TOP_P,
109
+ getattr(inference_config, "top_p", 1.0),
110
+ )
111
+ scope._span.set_attribute(
112
+ SemanticConvention.GEN_AI_REQUEST_TOP_K,
113
+ getattr(inference_config, "top_k", -1),
114
+ )
115
+ scope._span.set_attribute(
116
+ SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
117
+ getattr(inference_config, "presence_penalty", 0.0),
118
+ )
119
+ scope._span.set_attribute(
120
+ SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
121
+ getattr(inference_config, "frequency_penalty", 0.0),
122
+ )
80
123
 
81
124
  # Span Attributes for Response parameters
82
125
  scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
83
126
 
84
127
  # Span Attributes for Cost and Tokens
85
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
86
- scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
87
- scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
128
+ scope._span.set_attribute(
129
+ SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
130
+ )
131
+ scope._span.set_attribute(
132
+ SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens
133
+ )
134
+ scope._span.set_attribute(
135
+ SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens
136
+ )
88
137
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
89
138
 
90
139
  # Span Attributes for Content
91
140
  if capture_message_content:
92
141
  scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
93
- scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
142
+ scope._span.set_attribute(
143
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion
144
+ )
94
145
 
95
146
  # To be removed once the change to span_attributes (from span events) is complete
96
147
  scope._span.add_event(
@@ -110,14 +161,44 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
110
161
 
111
162
  # Metrics
112
163
  if not disable_metrics:
113
- record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
114
- scope._server_address, scope._server_port, request_model, request_model, environment,
115
- application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
116
- cost, scope._tbt, scope._ttft)
117
-
118
- def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
119
- environment, application_name, metrics, start_time, span, args, kwargs,
120
- capture_message_content=False, disable_metrics=False, version="1.0.0"):
164
+ record_completion_metrics(
165
+ metrics,
166
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
167
+ SemanticConvention.GEN_AI_SYSTEM_VLLM,
168
+ scope._server_address,
169
+ scope._server_port,
170
+ request_model,
171
+ request_model,
172
+ environment,
173
+ application_name,
174
+ scope._start_time,
175
+ scope._end_time,
176
+ input_tokens,
177
+ output_tokens,
178
+ cost,
179
+ scope._tbt,
180
+ scope._ttft,
181
+ )
182
+
183
+
184
+ def process_chat_response(
185
+ instance,
186
+ response,
187
+ request_model,
188
+ pricing_info,
189
+ server_port,
190
+ server_address,
191
+ environment,
192
+ application_name,
193
+ metrics,
194
+ start_time,
195
+ span,
196
+ args,
197
+ kwargs,
198
+ capture_message_content=False,
199
+ disable_metrics=False,
200
+ version="1.0.0",
201
+ ):
121
202
  """
122
203
  Process chat request and generate Telemetry
123
204
  """
@@ -137,7 +218,16 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
137
218
  scope._args = args
138
219
  scope._kwargs = kwargs
139
220
 
140
- common_chat_logic(scope, pricing_info, environment, application_name, metrics,
141
- capture_message_content, disable_metrics, version, is_stream=False)
221
+ common_chat_logic(
222
+ scope,
223
+ pricing_info,
224
+ environment,
225
+ application_name,
226
+ metrics,
227
+ capture_message_content,
228
+ disable_metrics,
229
+ version,
230
+ is_stream=False,
231
+ )
142
232
 
143
233
  return response
@@ -4,17 +4,21 @@ Module for monitoring vLLM API calls.
4
4
 
5
5
  import time
6
6
  from opentelemetry.trace import SpanKind
7
- from openlit.__helpers import (
8
- handle_exception,
9
- set_server_address_and_port
10
- )
11
- from openlit.instrumentation.vllm.utils import (
12
- process_chat_response
13
- )
7
+ from openlit.__helpers import handle_exception, set_server_address_and_port
8
+ from openlit.instrumentation.vllm.utils import process_chat_response
14
9
  from openlit.semcov import SemanticConvention
15
10
 
16
- def generate(version, environment, application_name, tracer, pricing_info,
17
- capture_message_content, metrics, disable_metrics):
11
+
12
+ def generate(
13
+ version,
14
+ environment,
15
+ application_name,
16
+ tracer,
17
+ pricing_info,
18
+ capture_message_content,
19
+ metrics,
20
+ disable_metrics,
21
+ ):
18
22
  """
19
23
  Generates a telemetry wrapper for GenAI function call
20
24
  """
@@ -23,7 +27,9 @@ def generate(version, environment, application_name, tracer, pricing_info,
23
27
  """
24
28
  Wraps the GenAI function call.
25
29
  """
26
- server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
30
+ server_address, server_port = set_server_address_and_port(
31
+ instance, "http://127.0.0.1", 443
32
+ )
27
33
  request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
28
34
 
29
35
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
openlit/otel/events.py CHANGED
@@ -4,10 +4,17 @@ Setups up OpenTelemetry events emitter
4
4
 
5
5
  import os
6
6
  from opentelemetry import _events, _logs
7
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.sdk.resources import (
8
+ SERVICE_NAME,
9
+ TELEMETRY_SDK_NAME,
10
+ DEPLOYMENT_ENVIRONMENT,
11
+ )
8
12
  from opentelemetry.sdk.resources import Resource
9
13
  from opentelemetry.sdk._events import EventLoggerProvider
10
- from opentelemetry.sdk._logs.export import BatchLogRecordProcessor, SimpleLogRecordProcessor
14
+ from opentelemetry.sdk._logs.export import (
15
+ BatchLogRecordProcessor,
16
+ SimpleLogRecordProcessor,
17
+ )
11
18
  from opentelemetry.sdk._logs import LoggerProvider
12
19
  from opentelemetry.sdk._logs.export import ConsoleLogExporter
13
20
 
@@ -19,7 +26,15 @@ else:
19
26
  # Global flag to check if the events provider initialization is complete.
20
27
  EVENTS_SET = False
21
28
 
22
- def setup_events(application_name, environment, event_logger, otlp_endpoint, otlp_headers, disable_batch):
29
+
30
+ def setup_events(
31
+ application_name,
32
+ environment,
33
+ event_logger,
34
+ otlp_endpoint,
35
+ otlp_headers,
36
+ disable_batch,
37
+ ):
23
38
  """Setup OpenTelemetry events with the given configuration.
24
39
 
25
40
  Args:
@@ -42,10 +57,12 @@ def setup_events(application_name, environment, event_logger, otlp_endpoint, otl
42
57
  try:
43
58
  if not EVENTS_SET:
44
59
  # Create resource with service and environment information
45
- resource = Resource.create(attributes={
46
- SERVICE_NAME: application_name,
47
- DEPLOYMENT_ENVIRONMENT: environment,
48
- TELEMETRY_SDK_NAME: "openlit"}
60
+ resource = Resource.create(
61
+ attributes={
62
+ SERVICE_NAME: application_name,
63
+ DEPLOYMENT_ENVIRONMENT: environment,
64
+ TELEMETRY_SDK_NAME: "openlit",
65
+ }
49
66
  )
50
67
 
51
68
  # Initialize the LoggerProvider with the created resource.
@@ -57,7 +74,9 @@ def setup_events(application_name, environment, event_logger, otlp_endpoint, otl
57
74
 
58
75
  if otlp_headers is not None:
59
76
  if isinstance(otlp_headers, dict):
60
- headers_str = ','.join(f"{key}={value}" for key, value in otlp_headers.items())
77
+ headers_str = ",".join(
78
+ f"{key}={value}" for key, value in otlp_headers.items()
79
+ )
61
80
  else:
62
81
  headers_str = otlp_headers
63
82
 
@@ -67,10 +86,16 @@ def setup_events(application_name, environment, event_logger, otlp_endpoint, otl
67
86
  if os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT"):
68
87
  event_exporter = OTLPLogExporter()
69
88
  # pylint: disable=line-too-long
70
- logger_provider.add_log_record_processor(SimpleLogRecordProcessor(event_exporter)) if disable_batch else logger_provider.add_log_record_processor(BatchLogRecordProcessor(event_exporter))
89
+ logger_provider.add_log_record_processor(
90
+ SimpleLogRecordProcessor(event_exporter)
91
+ ) if disable_batch else logger_provider.add_log_record_processor(
92
+ BatchLogRecordProcessor(event_exporter)
93
+ )
71
94
  else:
72
95
  event_exporter = ConsoleLogExporter()
73
- logger_provider.add_log_record_processor(SimpleLogRecordProcessor(event_exporter))
96
+ logger_provider.add_log_record_processor(
97
+ SimpleLogRecordProcessor(event_exporter)
98
+ )
74
99
 
75
100
  _logs.set_logger_provider(logger_provider)
76
101
  event_provider = EventLoggerProvider()