openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,423 +1,402 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, protected-access, too-many-lines
2
1
  """
3
2
  Module for monitoring VertexAI API calls.
4
3
  """
5
4
 
6
5
  import logging
7
- import math
6
+ import time
8
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
9
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
10
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ handle_exception,
12
+ calculate_ttft,
13
+ calculate_tbt,
14
+ create_metrics_attributes,
15
+ )
11
16
  from openlit.semcov import SemanticConvetion
12
17
 
13
18
  # Initialize logger for logging potential issues and operations
14
19
  logger = logging.getLogger(__name__)
15
20
 
16
- def generate_content(gen_ai_endpoint, version, environment, application_name, tracer,
21
+ def send_message(version, environment, application_name, tracer,
17
22
  pricing_info, trace_content, metrics, disable_metrics):
18
23
  """
19
24
  Generates a telemetry wrapper for messages to collect metrics.
20
25
 
21
26
  Args:
22
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
23
27
  version: Version of the monitoring package.
24
28
  environment: Deployment environment (e.g., production, staging).
25
- application_name: Name of the application using the OpenAI API.
29
+ application_name: Name of the application using the VertexAI API.
26
30
  tracer: OpenTelemetry tracer for creating spans.
27
- pricing_info: Information used for calculating the cost of OpenAI usage.
31
+ pricing_info: Information used for calculating the cost of VertexAI usage.
28
32
  trace_content: Flag indicating whether to trace the actual content.
29
33
 
30
34
  Returns:
31
35
  A function that wraps the chat method to add telemetry.
32
36
  """
33
37
 
34
- def wrapper(wrapped, instance, args, kwargs):
38
+ class TracedSyncStream:
35
39
  """
36
- Wraps the 'generate_content' API call to add telemetry.
37
-
38
- This collects metrics such as execution time, cost, and token usage, and handles errors
39
- gracefully, adding details to the trace for observability.
40
+ Wrapper for streaming responses to collect metrics and trace data.
41
+ Wraps the response to collect message IDs and aggregated response.
40
42
 
41
- Args:
42
- wrapped: The original 'generate_content' method to be wrapped.
43
- instance: The instance of the class where the original method is defined.
44
- args: Positional arguments for the 'generate_content' method.
45
- kwargs: Keyword arguments for the 'generate_content' method.
43
+ This class implements the '__aiter__' and '__anext__' methods that
44
+ handle asynchronous streaming responses.
46
45
 
47
- Returns:
48
- The response from the original 'generate_content' method.
46
+ This class also implements '__aenter__' and '__aexit__' methods that
47
+ handle asynchronous context management protocol.
49
48
  """
49
+ def __init__(
50
+ self,
51
+ wrapped,
52
+ span,
53
+ kwargs,
54
+ server_address,
55
+ server_port,
56
+ request_model,
57
+ args,
58
+ ):
59
+ self.__wrapped__ = wrapped
60
+ self._span = span
61
+ # Placeholder for aggregating streaming response
62
+ self._llmresponse = ""
63
+ self._input_tokens = ""
64
+ self._output_tokens = ""
65
+
66
+ self._args = args
67
+ self._kwargs = kwargs
68
+ self._start_time = time.time()
69
+ self._end_time = None
70
+ self._timestamps = []
71
+ self._ttft = 0
72
+ self._tbt = 0
73
+ self._server_address = server_address
74
+ self._server_port = server_port
75
+ self._request_model = request_model
76
+
77
+ def __enter__(self):
78
+ self.__wrapped__.__enter__()
79
+ return self
80
+
81
+ def __exit__(self, exc_type, exc_value, traceback):
82
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
83
+
84
+ def __iter__(self):
85
+ return self
86
+
87
+ def __getattr__(self, name):
88
+ """Delegate attribute access to the wrapped object."""
89
+ return getattr(self.__wrapped__, name)
90
+
91
+ def __next__(self):
92
+ try:
93
+ chunk = self.__wrapped__.__next__()
94
+ end_time = time.time()
95
+ # Record the timestamp for the current chunk
96
+ self._timestamps.append(end_time)
50
97
 
51
- # Check if streaming is enabled for the API call
52
- streaming = kwargs.get("stream", False)
53
-
54
- # pylint: disable=no-else-return
55
- if streaming:
56
- # Special handling for streaming response to accommodate the nature of data flow
57
- def stream_generator():
58
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
- # Placeholder for aggregating streaming response
60
- llmresponse = ""
61
-
62
- # Loop through streaming events capturing relevant details
63
- for event in wrapped(*args, **kwargs):
64
- llmresponse += str(event.text)
65
- prompt_tokens = event.usage_metadata.prompt_token_count
66
- completion_tokens = event.usage_metadata.candidates_token_count
67
- total_tokens = event.usage_metadata.total_token_count
68
- yield event
69
-
70
- # Handling exception ensure observability without disrupting operation
71
- try:
72
- prompt = str(args[0][0])
73
-
74
- model = "/".join(instance._model_name.split("/")[3:])
75
-
76
- # Calculate cost of the operation
77
- cost = get_chat_model_cost(model,
78
- pricing_info, prompt_tokens,
79
- completion_tokens)
80
-
81
- # Set Span attributes
82
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
83
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
84
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
85
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
86
- SemanticConvetion.GEN_AI_TYPE_CHAT)
87
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
88
- gen_ai_endpoint)
89
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
90
- environment)
91
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
92
- application_name)
93
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
94
- model)
95
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
96
- True)
97
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
98
- prompt_tokens)
99
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
100
- completion_tokens)
101
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
102
- total_tokens)
103
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
104
- cost)
105
- if trace_content:
106
- span.add_event(
107
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
108
- attributes={
109
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
110
- },
111
- )
112
- span.add_event(
113
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
114
- attributes={
115
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
116
- },
117
- )
118
-
119
- span.set_status(Status(StatusCode.OK))
120
-
121
- if disable_metrics is False:
122
- attributes = {
123
- TELEMETRY_SDK_NAME:
124
- "openlit",
125
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
126
- application_name,
127
- SemanticConvetion.GEN_AI_SYSTEM:
128
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
129
- SemanticConvetion.GEN_AI_ENVIRONMENT:
130
- environment,
131
- SemanticConvetion.GEN_AI_TYPE:
132
- SemanticConvetion.GEN_AI_TYPE_CHAT,
133
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
134
- model
135
- }
136
-
137
- metrics["genai_requests"].add(1, attributes)
138
- metrics["genai_total_tokens"].add(
139
- total_tokens, attributes
140
- )
141
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
142
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
143
- metrics["genai_cost"].record(cost, attributes)
144
-
145
- except Exception as e:
146
- handle_exception(span, e)
147
- logger.error("Error in trace creation: %s", e)
148
-
149
- return stream_generator()
98
+ if len(self._timestamps) == 1:
99
+ # Calculate time to first chunk
100
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
150
101
 
151
- # Handling for non-streaming responses
152
- else:
153
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
154
- response = wrapped(*args, **kwargs)
102
+ self._llmresponse += str(chunk.text)
103
+ self._input_tokens = chunk.usage_metadata.prompt_token_count
104
+ self._output_tokens = chunk.usage_metadata.candidates_token_count
155
105
 
106
+ return chunk
107
+ except StopIteration:
108
+ # Handling exception ensure observability without disrupting operation
156
109
  try:
110
+ self._end_time = time.time()
111
+ if len(self._timestamps) > 1:
112
+ self._tbt = calculate_tbt(self._timestamps)
113
+
157
114
  # Format 'messages' into a single string
158
- prompt = str(args[0][0])
115
+ message_prompt = self._kwargs.get("messages", "")
116
+ formatted_messages = []
117
+ for message in message_prompt:
118
+ role = message["role"]
119
+ content = message["content"]
120
+
121
+ if isinstance(content, list):
122
+ content_str_list = []
123
+ for item in content:
124
+ if item["type"] == "text":
125
+ content_str_list.append(f'text: {item["text"]}')
126
+ elif (item["type"] == "image_url" and
127
+ not item["image_url"]["url"].startswith("data:")):
128
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
129
+ content_str = ", ".join(content_str_list)
130
+ formatted_messages.append(f"{role}: {content_str}")
131
+ else:
132
+ formatted_messages.append(f"{role}: {content}")
133
+ prompt = "\n".join(formatted_messages) or str(self._args[0][0])
159
134
 
160
- model = "/".join(instance._model_name.split("/")[3:])
161
135
  # Calculate cost of the operation
162
- cost = get_chat_model_cost(model,
163
- pricing_info,
164
- response.usage_metadata.prompt_token_count,
165
- response.usage_metadata.candidates_token_count)
166
-
167
- # Set Span attribues
168
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
169
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
136
+ cost = get_chat_model_cost(self._request_model,
137
+ pricing_info, self._input_tokens,
138
+ self._output_tokens)
139
+
140
+ # Set Span attributes (OTel Semconv)
141
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
142
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
143
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
144
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
170
145
  SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
171
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
172
- SemanticConvetion.GEN_AI_TYPE_CHAT)
173
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
174
- gen_ai_endpoint)
175
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
146
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
147
+ self._request_model)
148
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
149
+ self._server_port)
150
+
151
+ inference_config = self._kwargs.get('generation_config', {})
152
+
153
+ # List of attributes and their config keys
154
+ attributes = [
155
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
156
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
157
+ (SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
158
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
159
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
160
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
161
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
162
+ ]
163
+
164
+ # Set each attribute if the corresponding value exists and is not None
165
+ for attribute, key in attributes:
166
+ # Use the `get` method to safely access keys in the dictionary
167
+ value = inference_config.get(key)
168
+ if value is not None:
169
+ self._span.set_attribute(attribute, value)
170
+
171
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
172
+ self._request_model)
173
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
174
+ self._input_tokens)
175
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
176
+ self._output_tokens)
177
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
178
+ self._server_address)
179
+ if isinstance(self._llmresponse, str):
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
181
+ "text")
182
+ else:
183
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
184
+ "json")
185
+
186
+ # Set Span attributes (Extra)
187
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
176
188
  environment)
177
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
189
+ self._span.set_attribute(SERVICE_NAME,
178
190
  application_name)
179
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
180
- model)
181
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
182
- False)
183
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
184
- response.usage_metadata.prompt_token_count)
185
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
186
- response.usage_metadata.candidates_token_count)
187
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
188
- response.usage_metadata.total_token_count)
189
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
191
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
192
+ True)
193
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
194
+ self._input_tokens + self._output_tokens)
195
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
190
196
  cost)
197
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
198
+ self._tbt)
199
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
200
+ self._ttft)
201
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
202
+ version)
191
203
  if trace_content:
192
- span.add_event(
204
+ self._span.add_event(
193
205
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
194
206
  attributes={
195
207
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
196
208
  },
197
209
  )
198
- span.add_event(
210
+ self._span.add_event(
199
211
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
200
212
  attributes={
201
- # pylint: disable=line-too-long
202
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
213
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
203
214
  },
204
215
  )
205
-
206
- span.set_status(Status(StatusCode.OK))
216
+ self._span.set_status(Status(StatusCode.OK))
207
217
 
208
218
  if disable_metrics is False:
209
- attributes = {
210
- TELEMETRY_SDK_NAME:
211
- "openlit",
212
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
213
- application_name,
214
- SemanticConvetion.GEN_AI_SYSTEM:
215
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
216
- SemanticConvetion.GEN_AI_ENVIRONMENT:
217
- environment,
218
- SemanticConvetion.GEN_AI_TYPE:
219
- SemanticConvetion.GEN_AI_TYPE_CHAT,
220
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
221
- model
222
- }
219
+ attributes = create_metrics_attributes(
220
+ service_name=application_name,
221
+ deployment_environment=environment,
222
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
223
+ system=SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
224
+ request_model=self._request_model,
225
+ server_address=self._server_address,
226
+ server_port=self._server_port,
227
+ response_model=self._request_model,
228
+ )
223
229
 
230
+ metrics["genai_client_usage_tokens"].record(
231
+ self._input_tokens + self._output_tokens, attributes
232
+ )
233
+ metrics["genai_client_operation_duration"].record(
234
+ self._end_time - self._start_time, attributes
235
+ )
236
+ metrics["genai_server_tbt"].record(
237
+ self._tbt, attributes
238
+ )
239
+ metrics["genai_server_ttft"].record(
240
+ self._ttft, attributes
241
+ )
224
242
  metrics["genai_requests"].add(1, attributes)
225
- metrics["genai_total_tokens"].add(
226
- response.usage_metadata.total_token_count, attributes)
227
- metrics["genai_completion_tokens"].add(
228
- response.usage_metadata.candidates_token_count, attributes)
229
- metrics["genai_prompt_tokens"].add(
230
- response.usage_metadata.prompt_token_count, attributes)
243
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
244
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
231
245
  metrics["genai_cost"].record(cost, attributes)
232
246
 
233
- # Return original response
234
- return response
235
-
236
247
  except Exception as e:
237
- handle_exception(span, e)
248
+ handle_exception(self._span, e)
238
249
  logger.error("Error in trace creation: %s", e)
239
-
240
- # Return original response
241
- return response
242
-
243
- return wrapper
244
-
245
-
246
- def send_message(gen_ai_endpoint, version, environment, application_name, tracer,
247
- pricing_info, trace_content, metrics, disable_metrics):
248
- """
249
- Generates a telemetry wrapper for messages to collect metrics.
250
-
251
- Args:
252
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
253
- version: Version of the monitoring package.
254
- environment: Deployment environment (e.g., production, staging).
255
- application_name: Name of the application using the OpenAI API.
256
- tracer: OpenTelemetry tracer for creating spans.
257
- pricing_info: Information used for calculating the cost of OpenAI usage.
258
- trace_content: Flag indicating whether to trace the actual content.
259
-
260
- Returns:
261
- A function that wraps the chat method to add telemetry.
262
- """
250
+ finally:
251
+ self._span.end()
252
+ raise
263
253
 
264
254
  def wrapper(wrapped, instance, args, kwargs):
265
255
  """
266
- Wraps the 'generate_content' API call to add telemetry.
256
+ Wraps the 'messages' API call to add telemetry.
267
257
 
268
258
  This collects metrics such as execution time, cost, and token usage, and handles errors
269
259
  gracefully, adding details to the trace for observability.
270
260
 
271
261
  Args:
272
- wrapped: The original 'generate_content' method to be wrapped.
262
+ wrapped: The original 'messages' method to be wrapped.
273
263
  instance: The instance of the class where the original method is defined.
274
- args: Positional arguments for the 'generate_content' method.
275
- kwargs: Keyword arguments for the 'generate_content' method.
264
+ args: Positional arguments for the 'messages' method.
265
+ kwargs: Keyword arguments for the 'messages' method.
276
266
 
277
267
  Returns:
278
- The response from the original 'generate_content' method.
268
+ The response from the original 'messages' method.
279
269
  """
280
270
 
281
271
  # Check if streaming is enabled for the API call
282
272
  streaming = kwargs.get("stream", False)
283
273
 
284
- # pylint: disable=no-else-return
285
- if streaming:
286
- # Special handling for streaming response to accommodate the nature of data flow
287
- def stream_generator():
288
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
289
- # Placeholder for aggregating streaming response
290
- llmresponse = ""
291
-
292
- # Loop through streaming events capturing relevant details
293
- for event in wrapped(*args, **kwargs):
294
- llmresponse += str(event.text)
295
- prompt_tokens = event.usage_metadata.prompt_token_count
296
- completion_tokens = event.usage_metadata.candidates_token_count
297
- total_tokens = event.usage_metadata.total_token_count
298
- yield event
299
-
300
- # Handling exception ensure observability without disrupting operation
301
- try:
302
- prompt = args[0][0]
303
-
304
- model = "/".join(instance._model._model_name.split("/")[3:])
305
-
306
- # Calculate cost of the operation
307
- cost = get_chat_model_cost(model,
308
- pricing_info, prompt_tokens,
309
- completion_tokens)
310
-
311
- # Set Span attributes
312
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
313
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
314
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
315
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
316
- SemanticConvetion.GEN_AI_TYPE_CHAT)
317
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
318
- gen_ai_endpoint)
319
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
320
- environment)
321
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
322
- application_name)
323
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
324
- model)
325
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
326
- True)
327
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
328
- prompt_tokens)
329
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
330
- completion_tokens)
331
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
332
- total_tokens)
333
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
334
- cost)
335
- if trace_content:
336
- span.add_event(
337
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
338
- attributes={
339
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
340
- },
341
- )
342
- span.add_event(
343
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
344
- attributes={
345
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
346
- },
347
- )
274
+ try:
275
+ location = instance._model._location
276
+ request_model = "/".join(instance._model._model_name.split("/")[3:])
277
+ except:
278
+ location = instance._location
279
+ request_model = "/".join(instance._model_name.split("/")[3:])
348
280
 
349
- span.set_status(Status(StatusCode.OK))
281
+ server_address, server_port = location + '-aiplatform.googleapis.com', 443
350
282
 
351
- if disable_metrics is False:
352
- attributes = {
353
- TELEMETRY_SDK_NAME:
354
- "openlit",
355
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
356
- application_name,
357
- SemanticConvetion.GEN_AI_SYSTEM:
358
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
359
- SemanticConvetion.GEN_AI_ENVIRONMENT:
360
- environment,
361
- SemanticConvetion.GEN_AI_TYPE:
362
- SemanticConvetion.GEN_AI_TYPE_CHAT,
363
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
364
- model
365
- }
283
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
366
284
 
367
- metrics["genai_requests"].add(1, attributes)
368
- metrics["genai_total_tokens"].add(
369
- total_tokens, attributes
370
- )
371
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
372
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
373
- metrics["genai_cost"].record(cost, attributes)
374
-
375
- except Exception as e:
376
- handle_exception(span, e)
377
- logger.error("Error in trace creation: %s", e)
285
+ # pylint: disable=no-else-return
286
+ if streaming:
287
+ # Special handling for streaming response to accommodate the nature of data flow
288
+ awaited_wrapped = wrapped(*args, **kwargs)
289
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
378
290
 
379
- return stream_generator()
291
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port, request_model, args)
380
292
 
381
293
  # Handling for non-streaming responses
382
294
  else:
383
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
295
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
296
+ start_time = time.time()
384
297
  response = wrapped(*args, **kwargs)
298
+ end_time = time.time()
385
299
 
386
300
  try:
387
301
  # Format 'messages' into a single string
388
- prompt = args[0]
302
+ message_prompt = kwargs.get("contents", [])
303
+ formatted_messages = []
304
+
305
+ for content in message_prompt:
306
+ role = content.role
307
+ parts = content.parts
308
+ content_str = []
309
+
310
+ for part in parts:
311
+ # Collect relevant fields and handle each type of data that Part could contain
312
+ if part.text:
313
+ content_str.append(f"text: {part.text}")
314
+ if part.video_metadata:
315
+ content_str.append(f"video_metadata: {part.video_metadata}")
316
+ if part.thought:
317
+ content_str.append(f"thought: {part.thought}")
318
+ if part.code_execution_result:
319
+ content_str.append(f"code_execution_result: {part.code_execution_result}")
320
+ if part.executable_code:
321
+ content_str.append(f"executable_code: {part.executable_code}")
322
+ if part.file_data:
323
+ content_str.append(f"file_data: {part.file_data}")
324
+ if part.function_call:
325
+ content_str.append(f"function_call: {part.function_call}")
326
+ if part.function_response:
327
+ content_str.append(f"function_response: {part.function_response}")
328
+ if part.inline_data:
329
+ content_str.append(f"inline_data: {part.inline_data}")
330
+
331
+ formatted_messages.append(f"{role}: {', '.join(content_str)}")
332
+
333
+ prompt = "\n".join(formatted_messages) or str(args[0][0])
334
+
335
+ input_tokens = response.usage_metadata.prompt_token_count
336
+ output_tokens = response.usage_metadata.candidates_token_count
389
337
 
390
- model = "/".join(instance._model._model_name.split("/")[3:])
391
338
  # Calculate cost of the operation
392
- cost = get_chat_model_cost(model,
393
- pricing_info,
394
- response.usage_metadata.prompt_token_count,
395
- response.usage_metadata.candidates_token_count)
339
+ cost = get_chat_model_cost(request_model,
340
+ pricing_info, input_tokens,
341
+ output_tokens)
396
342
 
397
- # Set Span attribues
343
+ # Set base span attribues (OTel Semconv)
398
344
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
345
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
346
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
399
347
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
400
348
  SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
401
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
402
- SemanticConvetion.GEN_AI_TYPE_CHAT)
403
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
404
- gen_ai_endpoint)
405
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
349
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
350
+ request_model)
351
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
352
+ server_port)
353
+
354
+ inference_config = kwargs.get('generation_config', {})
355
+
356
+ # List of attributes and their config keys
357
+ attributes = [
358
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
359
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
360
+ (SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
361
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
362
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
363
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
364
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
365
+ ]
366
+
367
+ # Set each attribute if the corresponding value exists and is not None
368
+ for attribute, key in attributes:
369
+ # Use the `get` method to safely access keys in the dictionary
370
+ value = inference_config.get(key)
371
+ if value is not None:
372
+ span.set_attribute(attribute, value)
373
+
374
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
375
+ request_model)
376
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
377
+ input_tokens)
378
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
379
+ output_tokens)
380
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
381
+ server_address)
382
+ # span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
383
+ # [str(response.candidates[0].finish_reason)])
384
+
385
+ # Set base span attribues (Extras)
386
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
406
387
  environment)
407
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
388
+ span.set_attribute(SERVICE_NAME,
408
389
  application_name)
409
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
410
- model)
411
390
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
412
391
  False)
413
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
414
- response.usage_metadata.prompt_token_count)
415
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
416
- response.usage_metadata.candidates_token_count)
417
392
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
418
- response.usage_metadata.total_token_count)
393
+ input_tokens + output_tokens)
419
394
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
420
395
  cost)
396
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
397
+ end_time - start_time)
398
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
399
+ version)
421
400
  if trace_content:
422
401
  span.add_event(
423
402
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -428,36 +407,43 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
428
407
  span.add_event(
429
408
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
430
409
  attributes={
431
- # pylint: disable=line-too-long
432
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
410
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
433
411
  },
434
412
  )
435
413
 
414
+ if isinstance(response.text, str):
415
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
416
+ "text")
417
+ elif response.text is not None:
418
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
419
+ "json")
420
+
436
421
  span.set_status(Status(StatusCode.OK))
437
422
 
438
423
  if disable_metrics is False:
439
- attributes = {
440
- TELEMETRY_SDK_NAME:
441
- "openlit",
442
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
443
- application_name,
444
- SemanticConvetion.GEN_AI_SYSTEM:
445
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
446
- SemanticConvetion.GEN_AI_ENVIRONMENT:
447
- environment,
448
- SemanticConvetion.GEN_AI_TYPE:
449
- SemanticConvetion.GEN_AI_TYPE_CHAT,
450
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
451
- model
452
- }
424
+ attributes = create_metrics_attributes(
425
+ service_name=application_name,
426
+ deployment_environment=environment,
427
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
428
+ system=SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
429
+ request_model=request_model,
430
+ server_address=server_address,
431
+ server_port=server_port,
432
+ response_model=request_model,
433
+ )
453
434
 
435
+ metrics["genai_client_usage_tokens"].record(
436
+ input_tokens + output_tokens, attributes
437
+ )
438
+ metrics["genai_client_operation_duration"].record(
439
+ end_time - start_time, attributes
440
+ )
441
+ metrics["genai_server_ttft"].record(
442
+ end_time - start_time, attributes
443
+ )
454
444
  metrics["genai_requests"].add(1, attributes)
455
- metrics["genai_total_tokens"].add(
456
- response.usage_metadata.total_token_count, attributes)
457
- metrics["genai_completion_tokens"].add(
458
- response.usage_metadata.candidates_token_count, attributes)
459
- metrics["genai_prompt_tokens"].add(
460
- response.usage_metadata.prompt_token_count, attributes)
445
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
446
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
461
447
  metrics["genai_cost"].record(cost, attributes)
462
448
 
463
449
  # Return original response
@@ -471,646 +457,3 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
471
457
  return response
472
458
 
473
459
  return wrapper
474
-
475
- def predict(gen_ai_endpoint, version, environment, application_name, tracer,
476
- pricing_info, trace_content, metrics, disable_metrics):
477
- """
478
- Generates a telemetry wrapper for messages to collect metrics.
479
-
480
- Args:
481
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
482
- version: Version of the monitoring package.
483
- environment: Deployment environment (e.g., production, staging).
484
- application_name: Name of the application using the OpenAI API.
485
- tracer: OpenTelemetry tracer for creating spans.
486
- pricing_info: Information used for calculating the cost of OpenAI usage.
487
- trace_content: Flag indicating whether to trace the actual content.
488
-
489
- Returns:
490
- A function that wraps the chat method to add telemetry.
491
- """
492
-
493
- def wrapper(wrapped, instance, args, kwargs):
494
- """
495
- Wraps the 'predict' API call to add telemetry.
496
-
497
- This collects metrics such as execution time, cost, and token usage, and handles errors
498
- gracefully, adding details to the trace for observability.
499
-
500
- Args:
501
- wrapped: The original 'predict' method to be wrapped.
502
- instance: The instance of the class where the original method is defined.
503
- args: Positional arguments for the 'predict' method.
504
- kwargs: Keyword arguments for the 'predict' method.
505
-
506
- Returns:
507
- The response from the original 'predict' method.
508
- """
509
-
510
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
511
- response = wrapped(*args, **kwargs)
512
-
513
- try:
514
- prompt = args[0]
515
-
516
- model = instance._model_id
517
- #pylint: disable=line-too-long
518
- prompt_tokens = response._prediction_response.metadata["tokenMetadata"]["inputTokenCount"]["totalTokens"]
519
- completion_tokens = response._prediction_response.metadata["tokenMetadata"]["outputTokenCount"]["totalTokens"]
520
- total_tokens = prompt_tokens + completion_tokens
521
-
522
- #Calculate cost of the operation
523
- cost = get_chat_model_cost(model,
524
- pricing_info, prompt_tokens,
525
- completion_tokens)
526
-
527
- # Set Span attribues
528
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
529
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
530
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
531
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
532
- SemanticConvetion.GEN_AI_TYPE_CHAT)
533
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
534
- gen_ai_endpoint)
535
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
536
- environment)
537
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
538
- application_name)
539
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
540
- model)
541
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
542
- False)
543
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
544
- prompt_tokens)
545
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
546
- completion_tokens)
547
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
548
- total_tokens)
549
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
550
- cost)
551
- if trace_content:
552
- span.add_event(
553
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
554
- attributes={
555
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
556
- },
557
- )
558
- span.add_event(
559
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
560
- attributes={
561
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
562
- },
563
- )
564
-
565
- span.set_status(Status(StatusCode.OK))
566
-
567
- if disable_metrics is False:
568
- attributes = {
569
- TELEMETRY_SDK_NAME:
570
- "openlit",
571
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
572
- application_name,
573
- SemanticConvetion.GEN_AI_SYSTEM:
574
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
575
- SemanticConvetion.GEN_AI_ENVIRONMENT:
576
- environment,
577
- SemanticConvetion.GEN_AI_TYPE:
578
- SemanticConvetion.GEN_AI_TYPE_CHAT,
579
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
580
- model
581
- }
582
-
583
- metrics["genai_requests"].add(1, attributes)
584
- metrics["genai_total_tokens"].add(
585
- total_tokens, attributes)
586
- metrics["genai_completion_tokens"].add(
587
- completion_tokens, attributes)
588
- metrics["genai_prompt_tokens"].add(
589
- prompt_tokens, attributes)
590
- metrics["genai_cost"].record(cost, attributes)
591
-
592
- # Return original response
593
- return response
594
-
595
- except Exception as e:
596
- handle_exception(span, e)
597
- logger.error("Error in trace creation: %s", e)
598
-
599
- # Return original response
600
- return response
601
-
602
- return wrapper
603
-
604
- def predict_streaming(gen_ai_endpoint, version, environment, application_name, tracer,
605
- pricing_info, trace_content, metrics, disable_metrics):
606
- """
607
- Generates a telemetry wrapper for messages to collect metrics.
608
-
609
- Args:
610
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
611
- version: Version of the monitoring package.
612
- environment: Deployment environment (e.g., production, staging).
613
- application_name: Name of the application using the OpenAI API.
614
- tracer: OpenTelemetry tracer for creating spans.
615
- pricing_info: Information used for calculating the cost of OpenAI usage.
616
- trace_content: Flag indicating whether to trace the actual content.
617
-
618
- Returns:
619
- A function that wraps the chat method to add telemetry.
620
- """
621
-
622
- def wrapper(wrapped, instance, args, kwargs):
623
- """
624
- Wraps the 'predict' API call to add telemetry.
625
-
626
- This collects metrics such as execution time, cost, and token usage, and handles errors
627
- gracefully, adding details to the trace for observability.
628
-
629
- Args:
630
- wrapped: The original 'predict' method to be wrapped.
631
- instance: The instance of the class where the original method is defined.
632
- args: Positional arguments for the 'predict' method.
633
- kwargs: Keyword arguments for the 'predict' method.
634
-
635
- Returns:
636
- The response from the original 'predict' method.
637
- """
638
-
639
- # Special handling for streaming response to accommodate the nature of data flow
640
- def stream_generator():
641
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
642
- # Placeholder for aggregating streaming response
643
- llmresponse = ""
644
-
645
- # Loop through streaming events capturing relevant details
646
- for event in wrapped(*args, **kwargs):
647
- llmresponse += str(event)
648
- yield event
649
-
650
- # Handling exception ensure observability without disrupting operation
651
- try:
652
- prompt = args[0]
653
- llmresponse = llmresponse.split('TextGenerationResponse',
654
- maxsplit=1)[0].rstrip()
655
-
656
- prompt_tokens = math.ceil(len(prompt) / 4)
657
- completion_tokens = math.ceil(len(llmresponse) / 4)
658
- total_tokens = prompt_tokens + completion_tokens
659
-
660
- model = instance._model_id
661
-
662
- # Calculate cost of the operation
663
- cost = get_chat_model_cost(model,
664
- pricing_info, prompt_tokens,
665
- completion_tokens)
666
-
667
- # Set Span attributes
668
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
669
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
670
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
671
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
672
- SemanticConvetion.GEN_AI_TYPE_CHAT)
673
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
674
- gen_ai_endpoint)
675
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
676
- environment)
677
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
678
- application_name)
679
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
680
- model)
681
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
682
- True)
683
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
684
- prompt_tokens)
685
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
686
- completion_tokens)
687
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
688
- total_tokens)
689
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
690
- cost)
691
- if trace_content:
692
- span.add_event(
693
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
694
- attributes={
695
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
696
- },
697
- )
698
- span.add_event(
699
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
700
- attributes={
701
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
702
- },
703
- )
704
-
705
- span.set_status(Status(StatusCode.OK))
706
-
707
- if disable_metrics is False:
708
- attributes = {
709
- TELEMETRY_SDK_NAME:
710
- "openlit",
711
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
712
- application_name,
713
- SemanticConvetion.GEN_AI_SYSTEM:
714
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
715
- SemanticConvetion.GEN_AI_ENVIRONMENT:
716
- environment,
717
- SemanticConvetion.GEN_AI_TYPE:
718
- SemanticConvetion.GEN_AI_TYPE_CHAT,
719
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
720
- model
721
- }
722
-
723
- metrics["genai_requests"].add(1, attributes)
724
- metrics["genai_total_tokens"].add(
725
- total_tokens, attributes
726
- )
727
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
728
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
729
- metrics["genai_cost"].record(cost, attributes)
730
-
731
- except Exception as e:
732
- handle_exception(span, e)
733
- logger.error("Error in trace creation: %s", e)
734
-
735
- return stream_generator()
736
-
737
- return wrapper
738
-
739
- def start_chat(gen_ai_endpoint, version, environment, application_name, tracer,
740
- pricing_info, trace_content, metrics, disable_metrics):
741
- """
742
- Generates a telemetry wrapper for messages to collect metrics.
743
-
744
- Args:
745
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
746
- version: Version of the monitoring package.
747
- environment: Deployment environment (e.g., production, staging).
748
- application_name: Name of the application using the OpenAI API.
749
- tracer: OpenTelemetry tracer for creating spans.
750
- pricing_info: Information used for calculating the cost of OpenAI usage.
751
- trace_content: Flag indicating whether to trace the actual content.
752
-
753
- Returns:
754
- A function that wraps the chat method to add telemetry.
755
- """
756
-
757
- def wrapper(wrapped, instance, args, kwargs):
758
- """
759
- Wraps the 'start_chat' API call to add telemetry.
760
-
761
- This collects metrics such as execution time, cost, and token usage, and handles errors
762
- gracefully, adding details to the trace for observability.
763
-
764
- Args:
765
- wrapped: The original 'start_chat' method to be wrapped.
766
- instance: The instance of the class where the original method is defined.
767
- args: Positional arguments for the 'start_chat' method.
768
- kwargs: Keyword arguments for the 'start_chat' method.
769
-
770
- Returns:
771
- The response from the original 'start_chat' method.
772
- """
773
-
774
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
775
- response = wrapped(*args, **kwargs)
776
-
777
- try:
778
- prompt = args[0]
779
-
780
- model = instance._model._model_id
781
-
782
- #pylint: disable=line-too-long
783
- prompt_tokens = response._prediction_response.metadata["tokenMetadata"]["inputTokenCount"]["totalTokens"]
784
- completion_tokens = response._prediction_response.metadata["tokenMetadata"]["outputTokenCount"]["totalTokens"]
785
- total_tokens = prompt_tokens + completion_tokens
786
-
787
- #Calculate cost of the operation
788
- cost = get_chat_model_cost(model,
789
- pricing_info, prompt_tokens,
790
- completion_tokens)
791
-
792
- # Set Span attribues
793
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
794
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
795
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
796
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
797
- SemanticConvetion.GEN_AI_TYPE_CHAT)
798
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
799
- gen_ai_endpoint)
800
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
801
- environment)
802
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
803
- application_name)
804
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
805
- model)
806
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
807
- False)
808
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
809
- prompt_tokens)
810
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
811
- completion_tokens)
812
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
813
- total_tokens)
814
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
815
- cost)
816
- if trace_content:
817
- span.add_event(
818
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
819
- attributes={
820
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
821
- },
822
- )
823
- span.add_event(
824
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
825
- attributes={
826
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
827
- },
828
- )
829
-
830
- span.set_status(Status(StatusCode.OK))
831
-
832
- if disable_metrics is False:
833
- attributes = {
834
- TELEMETRY_SDK_NAME:
835
- "openlit",
836
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
837
- application_name,
838
- SemanticConvetion.GEN_AI_SYSTEM:
839
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
840
- SemanticConvetion.GEN_AI_ENVIRONMENT:
841
- environment,
842
- SemanticConvetion.GEN_AI_TYPE:
843
- SemanticConvetion.GEN_AI_TYPE_CHAT,
844
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
845
- model
846
- }
847
-
848
- metrics["genai_requests"].add(1, attributes)
849
- metrics["genai_total_tokens"].add(
850
- total_tokens, attributes)
851
- metrics["genai_completion_tokens"].add(
852
- completion_tokens, attributes)
853
- metrics["genai_prompt_tokens"].add(
854
- prompt_tokens, attributes)
855
- metrics["genai_cost"].record(cost, attributes)
856
-
857
- # Return original response
858
- return response
859
-
860
- except Exception as e:
861
- handle_exception(span, e)
862
- logger.error("Error in trace creation: %s", e)
863
-
864
- # Return original response
865
- return response
866
-
867
- return wrapper
868
-
869
- def start_chat_streaming(gen_ai_endpoint, version, environment, application_name, tracer,
870
- pricing_info, trace_content, metrics, disable_metrics):
871
- """
872
- Generates a telemetry wrapper for messages to collect metrics.
873
-
874
- Args:
875
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
876
- version: Version of the monitoring package.
877
- environment: Deployment environment (e.g., production, staging).
878
- application_name: Name of the application using the OpenAI API.
879
- tracer: OpenTelemetry tracer for creating spans.
880
- pricing_info: Information used for calculating the cost of OpenAI usage.
881
- trace_content: Flag indicating whether to trace the actual content.
882
-
883
- Returns:
884
- A function that wraps the chat method to add telemetry.
885
- """
886
-
887
- def wrapper(wrapped, instance, args, kwargs):
888
- """
889
- Wraps the 'start_chat' API call to add telemetry.
890
-
891
- This collects metrics such as execution time, cost, and token usage, and handles errors
892
- gracefully, adding details to the trace for observability.
893
-
894
- Args:
895
- wrapped: The original 'start_chat' method to be wrapped.
896
- instance: The instance of the class where the original method is defined.
897
- args: Positional arguments for the 'start_chat' method.
898
- kwargs: Keyword arguments for the 'start_chat' method.
899
-
900
- Returns:
901
- The response from the original 'start_chat' method.
902
- """
903
-
904
- # Special handling for streaming response to accommodate the nature of data flow
905
- def stream_generator():
906
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
907
- # Placeholder for aggregating streaming response
908
- llmresponse = ""
909
-
910
- # Loop through streaming events capturing relevant details
911
- for event in wrapped(*args, **kwargs):
912
- llmresponse += str(event.text)
913
- yield event
914
-
915
- # Handling exception ensure observability without disrupting operation
916
- try:
917
- prompt = args[0]
918
-
919
- prompt_tokens = math.ceil(len(prompt) / 4)
920
- completion_tokens = math.ceil(len(llmresponse) / 4)
921
- total_tokens = prompt_tokens + completion_tokens
922
-
923
- model = instance._model._model_id
924
-
925
- # Calculate cost of the operation
926
- cost = get_chat_model_cost(model,
927
- pricing_info, prompt_tokens,
928
- completion_tokens)
929
-
930
- # Set Span attributes
931
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
932
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
933
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
934
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
935
- SemanticConvetion.GEN_AI_TYPE_CHAT)
936
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
937
- gen_ai_endpoint)
938
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
939
- environment)
940
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
941
- application_name)
942
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
943
- model)
944
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
945
- True)
946
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
947
- prompt_tokens)
948
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
949
- completion_tokens)
950
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
951
- total_tokens)
952
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
953
- cost)
954
- if trace_content:
955
- span.add_event(
956
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
957
- attributes={
958
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
959
- },
960
- )
961
- span.add_event(
962
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
963
- attributes={
964
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
965
- },
966
- )
967
-
968
- span.set_status(Status(StatusCode.OK))
969
-
970
- if disable_metrics is False:
971
- attributes = {
972
- TELEMETRY_SDK_NAME:
973
- "openlit",
974
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
975
- application_name,
976
- SemanticConvetion.GEN_AI_SYSTEM:
977
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
978
- SemanticConvetion.GEN_AI_ENVIRONMENT:
979
- environment,
980
- SemanticConvetion.GEN_AI_TYPE:
981
- SemanticConvetion.GEN_AI_TYPE_CHAT,
982
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
983
- model
984
- }
985
-
986
- metrics["genai_requests"].add(1, attributes)
987
- metrics["genai_total_tokens"].add(
988
- total_tokens, attributes
989
- )
990
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
991
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
992
- metrics["genai_cost"].record(cost, attributes)
993
-
994
- except Exception as e:
995
- handle_exception(span, e)
996
- logger.error("Error in trace creation: %s", e)
997
-
998
- return stream_generator()
999
-
1000
- return wrapper
1001
-
1002
- def embeddings(gen_ai_endpoint, version, environment, application_name, tracer,
1003
- pricing_info, trace_content, metrics, disable_metrics):
1004
- """
1005
- Generates a telemetry wrapper for messages to collect metrics.
1006
-
1007
- Args:
1008
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
1009
- version: Version of the monitoring package.
1010
- environment: Deployment environment (e.g., production, staging).
1011
- application_name: Name of the application using the OpenAI API.
1012
- tracer: OpenTelemetry tracer for creating spans.
1013
- pricing_info: Information used for calculating the cost of OpenAI usage.
1014
- trace_content: Flag indicating whether to trace the actual content.
1015
-
1016
- Returns:
1017
- A function that wraps the chat method to add telemetry.
1018
- """
1019
-
1020
- def wrapper(wrapped, instance, args, kwargs):
1021
- """
1022
- Wraps the 'generate_content' API call to add telemetry.
1023
-
1024
- This collects metrics such as execution time, cost, and token usage, and handles errors
1025
- gracefully, adding details to the trace for observability.
1026
-
1027
- Args:
1028
- wrapped: The original 'generate_content' method to be wrapped.
1029
- instance: The instance of the class where the original method is defined.
1030
- args: Positional arguments for the 'generate_content' method.
1031
- kwargs: Keyword arguments for the 'generate_content' method.
1032
-
1033
- Returns:
1034
- The response from the original 'generate_content' method.
1035
- """
1036
-
1037
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
1038
- response = wrapped(*args, **kwargs)
1039
-
1040
- try:
1041
- prompt = args[0][0]
1042
-
1043
- model = instance._model_id
1044
-
1045
- prompt_tokens = int(response[0].statistics.token_count)
1046
-
1047
- #Calculate cost of the operation
1048
- cost = get_embed_model_cost(model,
1049
- pricing_info, prompt_tokens)
1050
-
1051
- # Set Span attribues
1052
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
1053
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
1054
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
1055
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
1056
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
1057
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
1058
- gen_ai_endpoint)
1059
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
1060
- environment)
1061
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
1062
- application_name)
1063
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
1064
- model)
1065
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
1066
- False)
1067
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
1068
- prompt_tokens)
1069
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
1070
- prompt_tokens)
1071
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
1072
- cost)
1073
- if trace_content:
1074
- span.add_event(
1075
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
1076
- attributes={
1077
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
1078
- },
1079
- )
1080
-
1081
- span.set_status(Status(StatusCode.OK))
1082
-
1083
- if disable_metrics is False:
1084
- attributes = {
1085
- TELEMETRY_SDK_NAME:
1086
- "openlit",
1087
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
1088
- application_name,
1089
- SemanticConvetion.GEN_AI_SYSTEM:
1090
- SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
1091
- SemanticConvetion.GEN_AI_ENVIRONMENT:
1092
- environment,
1093
- SemanticConvetion.GEN_AI_TYPE:
1094
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
1095
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
1096
- model
1097
- }
1098
-
1099
- metrics["genai_requests"].add(1, attributes)
1100
- metrics["genai_total_tokens"].add(
1101
- prompt_tokens, attributes)
1102
- metrics["genai_prompt_tokens"].add(
1103
- prompt_tokens, attributes)
1104
- metrics["genai_cost"].record(cost, attributes)
1105
-
1106
- # Return original response
1107
- return response
1108
-
1109
- except Exception as e:
1110
- handle_exception(span, e)
1111
- logger.error("Error in trace creation: %s", e)
1112
-
1113
- # Return original response
1114
- return response
1115
-
1116
- return wrapper