openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -20,8 +20,8 @@ class AnthropicInstrumentor(BaseInstrumentor):
20
20
  return _instruments
21
21
 
22
22
  def _instrument(self, **kwargs):
23
- application_name = kwargs.get("application_name", "default_application")
24
- environment = kwargs.get("environment", "default_environment")
23
+ application_name = kwargs.get("application_name", "default")
24
+ environment = kwargs.get("environment", "default")
25
25
  tracer = kwargs.get("tracer")
26
26
  metrics = kwargs.get("metrics_dict")
27
27
  pricing_info = kwargs.get("pricing_info", {})
@@ -33,7 +33,7 @@ class AnthropicInstrumentor(BaseInstrumentor):
33
33
  wrap_function_wrapper(
34
34
  "anthropic.resources.messages",
35
35
  "Messages.create",
36
- messages("anthropic.messages", version, environment, application_name,
36
+ messages(version, environment, application_name,
37
37
  tracer, pricing_info, trace_content, metrics, disable_metrics),
38
38
  )
39
39
 
@@ -41,7 +41,7 @@ class AnthropicInstrumentor(BaseInstrumentor):
41
41
  wrap_function_wrapper(
42
42
  "anthropic.resources.messages",
43
43
  "AsyncMessages.create",
44
- async_messages("anthropic.messages", version, environment, application_name,
44
+ async_messages(version, environment, application_name,
45
45
  tracer, pricing_info, trace_content, metrics, disable_metrics),
46
46
  )
47
47
 
@@ -1,35 +1,267 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Anthropic API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, handle_exception
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ handle_exception,
12
+ response_as_dict,
13
+ calculate_ttft,
14
+ calculate_tbt,
15
+ create_metrics_attributes,
16
+ set_server_address_and_port
17
+ )
10
18
  from openlit.semcov import SemanticConvetion
11
19
 
12
20
  # Initialize logger for logging potential issues and operations
13
21
  logger = logging.getLogger(__name__)
14
22
 
15
- def messages(gen_ai_endpoint, version, environment, application_name, tracer,
23
+ def messages(version, environment, application_name, tracer,
16
24
  pricing_info, trace_content, metrics, disable_metrics):
17
25
  """
18
26
  Generates a telemetry wrapper for messages to collect metrics.
19
27
 
20
28
  Args:
21
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
29
  version: Version of the monitoring package.
23
30
  environment: Deployment environment (e.g., production, staging).
24
- application_name: Name of the application using the OpenAI API.
31
+ application_name: Name of the application using the Anthropic API.
25
32
  tracer: OpenTelemetry tracer for creating spans.
26
- pricing_info: Information used for calculating the cost of OpenAI usage.
33
+ pricing_info: Information used for calculating the cost of Anthropic usage.
27
34
  trace_content: Flag indicating whether to trace the actual content.
28
35
 
29
36
  Returns:
30
37
  A function that wraps the chat method to add telemetry.
31
38
  """
32
39
 
40
+ class TracedSyncStream:
41
+ """
42
+ Wrapper for streaming responses to collect metrics and trace data.
43
+ Wraps the response to collect message IDs and aggregated response.
44
+
45
+ This class implements the '__aiter__' and '__anext__' methods that
46
+ handle asynchronous streaming responses.
47
+
48
+ This class also implements '__aenter__' and '__aexit__' methods that
49
+ handle asynchronous context management protocol.
50
+ """
51
+ def __init__(
52
+ self,
53
+ wrapped,
54
+ span,
55
+ kwargs,
56
+ server_address,
57
+ server_port,
58
+ **args,
59
+ ):
60
+ self.__wrapped__ = wrapped
61
+ self._span = span
62
+ # Placeholder for aggregating streaming response
63
+ self._llmresponse = ""
64
+ self._response_id = ""
65
+ self._response_model = ""
66
+ self._finish_reason = ""
67
+ self._input_tokens = ""
68
+ self._output_tokens = ""
69
+
70
+ self._args = args
71
+ self._kwargs = kwargs
72
+ self._start_time = time.time()
73
+ self._end_time = None
74
+ self._timestamps = []
75
+ self._ttft = 0
76
+ self._tbt = 0
77
+ self._server_address = server_address
78
+ self._server_port = server_port
79
+
80
+ def __enter__(self):
81
+ self.__wrapped__.__enter__()
82
+ return self
83
+
84
+ def __exit__(self, exc_type, exc_value, traceback):
85
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
86
+
87
+ def __iter__(self):
88
+ return self
89
+
90
+ def __getattr__(self, name):
91
+ """Delegate attribute access to the wrapped object."""
92
+ return getattr(self.__wrapped__, name)
93
+
94
+ def __next__(self):
95
+ try:
96
+ chunk = self.__wrapped__.__next__()
97
+ end_time = time.time()
98
+ # Record the timestamp for the current chunk
99
+ self._timestamps.append(end_time)
100
+
101
+ if len(self._timestamps) == 1:
102
+ # Calculate time to first chunk
103
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
104
+
105
+ chunked = response_as_dict(chunk)
106
+
107
+ # Collect message IDs and input token from events
108
+ if chunked.get('type') == "message_start":
109
+ self._response_id = chunked.get('message').get('id')
110
+ self._input_tokens = chunked.get('message').get('usage').get('input_tokens')
111
+ self._response_model = chunked.get('message').get('model')
112
+ # Collect message IDs and aggregated response from events
113
+ if chunked.get('type') == "content_block_delta":
114
+ content = chunked.get('delta').get('text')
115
+ if content:
116
+ self._llmresponse += content
117
+ # Collect output tokens and stop reason from events
118
+ if chunked.get('type') == "message_delta":
119
+ self._output_tokens = chunked.get('usage').get('output_tokens')
120
+ self._finish_reason = chunked.get('delta').get('stop_reason')
121
+
122
+ return chunk
123
+ except StopIteration:
124
+ # Handling exception ensure observability without disrupting operation
125
+ try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
130
+ # Format 'messages' into a single string
131
+ message_prompt = self._kwargs.get("messages", "")
132
+ formatted_messages = []
133
+ for message in message_prompt:
134
+ role = message["role"]
135
+ content = message["content"]
136
+
137
+ if isinstance(content, list):
138
+ content_str_list = []
139
+ for item in content:
140
+ if item["type"] == "text":
141
+ content_str_list.append(f'text: {item["text"]}')
142
+ elif (item["type"] == "image_url" and
143
+ not item["image_url"]["url"].startswith("data:")):
144
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
145
+ content_str = ", ".join(content_str_list)
146
+ formatted_messages.append(f"{role}: {content_str}")
147
+ else:
148
+ formatted_messages.append(f"{role}: {content}")
149
+ prompt = "\n".join(formatted_messages)
150
+
151
+ request_model = self._kwargs.get("model", "claude-3-5-sonnet-latest")
152
+
153
+ # Calculate cost of the operation
154
+ cost = get_chat_model_cost(request_model,
155
+ pricing_info, self._input_tokens,
156
+ self._output_tokens)
157
+
158
+ # Set Span attributes (OTel Semconv)
159
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
161
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
162
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
163
+ SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
165
+ request_model)
166
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
167
+ self._server_port)
168
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
169
+ self._kwargs.get("max_tokens", -1))
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
171
+ self._kwargs.get("stop_sequences", []))
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
173
+ self._kwargs.get("temperature", 1.0))
174
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
175
+ self._kwargs.get("top_k", 1.0))
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
177
+ self._kwargs.get("top_p", 1.0))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
179
+ [self._finish_reason])
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
181
+ self._response_id)
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
183
+ self._response_model)
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
185
+ self._input_tokens)
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
187
+ self._output_tokens)
188
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
189
+ self._server_address)
190
+ if isinstance(self._llmresponse, str):
191
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
192
+ "text")
193
+ else:
194
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
195
+ "json")
196
+
197
+ # Set Span attributes (Extra)
198
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
199
+ environment)
200
+ self._span.set_attribute(SERVICE_NAME,
201
+ application_name)
202
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
203
+ True)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
205
+ self._input_tokens + self._output_tokens)
206
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
207
+ cost)
208
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
209
+ self._tbt)
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
211
+ self._ttft)
212
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
213
+ version)
214
+ if trace_content:
215
+ self._span.add_event(
216
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
217
+ attributes={
218
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
219
+ },
220
+ )
221
+ self._span.add_event(
222
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
223
+ attributes={
224
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
225
+ },
226
+ )
227
+ self._span.set_status(Status(StatusCode.OK))
228
+
229
+ if disable_metrics is False:
230
+ attributes = create_metrics_attributes(
231
+ service_name=application_name,
232
+ deployment_environment=environment,
233
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
234
+ system=SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC,
235
+ request_model=request_model,
236
+ server_address=self._server_address,
237
+ server_port=self._server_port,
238
+ response_model=self._response_model,
239
+ )
240
+
241
+ metrics["genai_client_usage_tokens"].record(
242
+ self._input_tokens + self._output_tokens, attributes
243
+ )
244
+ metrics["genai_client_operation_duration"].record(
245
+ self._end_time - self._start_time, attributes
246
+ )
247
+ metrics["genai_server_tbt"].record(
248
+ self._tbt, attributes
249
+ )
250
+ metrics["genai_server_ttft"].record(
251
+ self._ttft, attributes
252
+ )
253
+ metrics["genai_requests"].add(1, attributes)
254
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
255
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
256
+ metrics["genai_cost"].record(cost, attributes)
257
+
258
+ except Exception as e:
259
+ handle_exception(self._span, e)
260
+ logger.error("Error in trace creation: %s", e)
261
+ finally:
262
+ self._span.end()
263
+ raise
264
+
33
265
  def wrapper(wrapped, instance, args, kwargs):
34
266
  """
35
267
  Wraps the 'messages' API call to add telemetry.
@@ -49,147 +281,27 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
49
281
 
50
282
  # Check if streaming is enabled for the API call
51
283
  streaming = kwargs.get("stream", False)
284
+ server_address, server_port = set_server_address_and_port(instance, "api.anthropic.com", 443)
285
+ request_model = kwargs.get("model", "claude-3-5-sonnet-latest")
286
+
287
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
52
288
 
53
289
  # pylint: disable=no-else-return
54
290
  if streaming:
55
291
  # Special handling for streaming response to accommodate the nature of data flow
56
- def stream_generator():
57
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
58
- # Placeholder for aggregating streaming response
59
- llmresponse = ""
60
-
61
- # Loop through streaming events capturing relevant details
62
- for event in wrapped(*args, **kwargs):
63
-
64
- # Collect message IDs and input token from events
65
- if event.type == "message_start":
66
- response_id = event.message.id
67
- prompt_tokens = event.message.usage.input_tokens
68
-
69
- # Aggregate response content
70
- if event.type == "content_block_delta":
71
- llmresponse += event.delta.text
72
-
73
- # Collect output tokens and stop reason from events
74
- if event.type == "message_delta":
75
- completion_tokens = event.usage.output_tokens
76
- finish_reason = event.delta.stop_reason
77
- yield event
78
-
79
- # Handling exception ensure observability without disrupting operation
80
- try:
81
- # Format 'messages' into a single string
82
- message_prompt = kwargs.get("messages", "")
83
- formatted_messages = []
84
- for message in message_prompt:
85
- role = message["role"]
86
- content = message["content"]
87
-
88
- if isinstance(content, list):
89
- content_str = ", ".join(
90
- # pylint: disable=line-too-long
91
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
92
- if "type" in item else f'text: {item["text"]}'
93
- for item in content
94
- )
95
- formatted_messages.append(f"{role}: {content_str}")
96
- else:
97
- formatted_messages.append(f"{role}: {content}")
98
- prompt = "\n".join(formatted_messages)
99
-
100
- # Calculate cost of the operation
101
- cost = get_chat_model_cost(
102
- kwargs.get("model", "claude-3-sonnet-20240229"),
103
- pricing_info, prompt_tokens, completion_tokens
104
- )
105
-
106
- # Set Span attributes
107
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
108
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
109
- SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC)
110
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
111
- SemanticConvetion.GEN_AI_TYPE_CHAT)
112
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
113
- gen_ai_endpoint)
114
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
115
- response_id)
116
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
117
- environment)
118
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
119
- application_name)
120
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
121
- kwargs.get("model", "claude-3-sonnet-20240229"))
122
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
123
- kwargs.get("max_tokens", -1))
124
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
125
- True)
126
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
127
- kwargs.get("temperature", 1.0))
128
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
129
- kwargs.get("top_p", ""))
130
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
131
- kwargs.get("top_k", ""))
132
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
133
- [finish_reason])
134
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
135
- prompt_tokens)
136
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
137
- completion_tokens)
138
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
139
- prompt_tokens + completion_tokens)
140
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
141
- cost)
142
- if trace_content:
143
- span.add_event(
144
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
145
- attributes={
146
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
147
- },
148
- )
149
- span.add_event(
150
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
151
- attributes={
152
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
153
- },
154
- )
155
-
156
- span.set_status(Status(StatusCode.OK))
157
-
158
- if disable_metrics is False:
159
- attributes = {
160
- TELEMETRY_SDK_NAME:
161
- "openlit",
162
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
163
- application_name,
164
- SemanticConvetion.GEN_AI_SYSTEM:
165
- SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC,
166
- SemanticConvetion.GEN_AI_ENVIRONMENT:
167
- environment,
168
- SemanticConvetion.GEN_AI_TYPE:
169
- SemanticConvetion.GEN_AI_TYPE_CHAT,
170
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
171
- kwargs.get("model", "claude-3-sonnet-20240229")
172
- }
173
-
174
- metrics["genai_requests"].add(1, attributes)
175
- metrics["genai_total_tokens"].add(
176
- prompt_tokens + completion_tokens, attributes
177
- )
178
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
179
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
180
- metrics["genai_cost"].record(cost, attributes)
181
-
182
- except Exception as e:
183
- handle_exception(span, e)
184
- logger.error("Error in trace creation: %s", e)
292
+ awaited_wrapped = wrapped(*args, **kwargs)
293
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
185
294
 
186
- return stream_generator()
295
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
187
296
 
188
297
  # Handling for non-streaming responses
189
298
  else:
190
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
299
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
300
+ start_time = time.time()
191
301
  response = wrapped(*args, **kwargs)
302
+ end_time = time.time()
192
303
 
304
+ response_dict = response_as_dict(response)
193
305
  try:
194
306
  # Format 'messages' into a single string
195
307
  message_prompt = kwargs.get("messages", "")
@@ -200,7 +312,6 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
200
312
 
201
313
  if isinstance(content, list):
202
314
  content_str = ", ".join(
203
- # pylint: disable=line-too-long
204
315
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
205
316
  if "type" in item else f'text: {item["text"]}'
206
317
  for item in content
@@ -210,48 +321,70 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
210
321
  formatted_messages.append(f"{role}: {content}")
211
322
  prompt = "\n".join(formatted_messages)
212
323
 
324
+ input_tokens = response_dict.get('usage').get('input_tokens')
325
+ output_tokens = response_dict.get('usage').get('output_tokens')
326
+
213
327
  # Calculate cost of the operation
214
- cost = get_chat_model_cost(kwargs.get("model", "claude-3-sonnet-20240229"),
215
- pricing_info, response.usage.input_tokens,
216
- response.usage.output_tokens)
328
+ cost = get_chat_model_cost(request_model,
329
+ pricing_info, input_tokens,
330
+ output_tokens)
331
+
332
+ llm_response = ""
333
+ for i in range(len(response_dict.get('content'))):
334
+ if response_dict.get('content')[i].get('type') == 'text':
335
+ llm_response = response_dict.get('content')[i].get('text')
217
336
 
218
- # Set Span attribues
337
+ # Set Span attributes (OTel Semconv)
219
338
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
339
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
340
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
220
341
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
221
342
  SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC)
222
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
223
- SemanticConvetion.GEN_AI_TYPE_CHAT)
224
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
225
- gen_ai_endpoint)
226
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
227
- response.id)
228
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
229
- environment)
230
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
231
- application_name)
232
343
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
233
- kwargs.get("model", "claude-3-sonnet-20240229"))
344
+ request_model)
345
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
346
+ server_port)
234
347
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
235
348
  kwargs.get("max_tokens", -1))
236
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
237
- False)
349
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
350
+ kwargs.get("stop_sequences", []))
238
351
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
239
352
  kwargs.get("temperature", 1.0))
240
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
241
- kwargs.get("top_p", ""))
242
353
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
243
- kwargs.get("top_k", ""))
354
+ kwargs.get("top_k", 1.0))
355
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
356
+ kwargs.get("top_p", 1.0))
244
357
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
245
- [response.stop_reason])
246
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
247
- response.usage.input_tokens)
248
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
249
- response.usage.output_tokens)
358
+ [response_dict.get('stop_reason')])
359
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
360
+ response_dict.get('id'))
361
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
362
+ response_dict.get('model'))
363
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
364
+ input_tokens)
365
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
366
+ output_tokens)
367
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
368
+ server_address)
369
+
370
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
371
+ response_dict.get('content')[0].get('type'))
372
+
373
+ # Set Span attributes (Extra)
374
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
375
+ environment)
376
+ span.set_attribute(SERVICE_NAME,
377
+ application_name)
378
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
379
+ False)
250
380
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
251
- response.usage.input_tokens +
252
- response.usage.output_tokens)
381
+ input_tokens + output_tokens)
253
382
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
254
383
  cost)
384
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
385
+ end_time - start_time)
386
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
387
+ version)
255
388
 
256
389
  if trace_content:
257
390
  span.add_event(
@@ -263,37 +396,36 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
263
396
  span.add_event(
264
397
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
265
398
  attributes={
266
- # pylint: disable=line-too-long
267
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.content[0].text if response.content else "",
399
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
268
400
  },
269
401
  )
270
402
 
271
403
  span.set_status(Status(StatusCode.OK))
272
404
 
273
405
  if disable_metrics is False:
274
- attributes = {
275
- TELEMETRY_SDK_NAME:
276
- "openlit",
277
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
278
- application_name,
279
- SemanticConvetion.GEN_AI_SYSTEM:
280
- SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC,
281
- SemanticConvetion.GEN_AI_ENVIRONMENT:
282
- environment,
283
- SemanticConvetion.GEN_AI_TYPE:
284
- SemanticConvetion.GEN_AI_TYPE_CHAT,
285
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
286
- kwargs.get("model", "claude-3-sonnet-20240229")
287
- }
406
+ attributes = create_metrics_attributes(
407
+ service_name=application_name,
408
+ deployment_environment=environment,
409
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
410
+ system=SemanticConvetion.GEN_AI_SYSTEM_ANTHROPIC,
411
+ request_model=request_model,
412
+ server_address=server_address,
413
+ server_port=server_port,
414
+ response_model=response_dict.get('model'),
415
+ )
288
416
 
417
+ metrics["genai_client_usage_tokens"].record(
418
+ input_tokens + output_tokens, attributes
419
+ )
420
+ metrics["genai_client_operation_duration"].record(
421
+ end_time - start_time, attributes
422
+ )
423
+ metrics["genai_server_ttft"].record(
424
+ end_time - start_time, attributes
425
+ )
289
426
  metrics["genai_requests"].add(1, attributes)
290
- metrics["genai_total_tokens"].add(
291
- response.usage.input_tokens +
292
- response.usage.output_tokens, attributes)
293
- metrics["genai_completion_tokens"].add(
294
- response.usage.output_tokens, attributes)
295
- metrics["genai_prompt_tokens"].add(
296
- response.usage.input_tokens, attributes)
427
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
428
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
297
429
  metrics["genai_cost"].record(cost, attributes)
298
430
 
299
431
  # Return original response