openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,39 +1,281 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, used-before-assignment, too-many-branches
2
1
  """
3
2
  Module for monitoring Groq API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, handle_exception
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ handle_exception,
12
+ response_as_dict,
13
+ calculate_ttft,
14
+ calculate_tbt,
15
+ create_metrics_attributes,
16
+ set_server_address_and_port
17
+ )
10
18
  from openlit.semcov import SemanticConvetion
11
19
 
12
20
  # Initialize logger for logging potential issues and operations
13
21
  logger = logging.getLogger(__name__)
14
22
 
15
- def chat(gen_ai_endpoint, version, environment, application_name,
16
- tracer, pricing_info, trace_content, metrics, disable_metrics):
23
+ def chat(version, environment, application_name,
24
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
17
25
  """
18
26
  Generates a telemetry wrapper for chat completions to collect metrics.
19
27
 
20
28
  Args:
21
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
29
  version: Version of the monitoring package.
23
30
  environment: Deployment environment (e.g., production, staging).
24
31
  application_name: Name of the application using the Groq API.
25
32
  tracer: OpenTelemetry tracer for creating spans.
26
33
  pricing_info: Information used for calculating the cost of Groq usage.
27
- trace_content: Flag indicating whether to trace the actual content.
34
+ capture_message_content: Flag indicating whether to trace the actual content.
28
35
 
29
36
  Returns:
30
37
  A function that wraps the chat completions method to add telemetry.
31
38
  """
32
39
 
40
+ class TracedSyncStream:
41
+ """
42
+ Wrapper for streaming responses to collect metrics and trace data.
43
+ Wraps the response to collect message IDs and aggregated response.
44
+
45
+ This class implements the '__aiter__' and '__anext__' methods that
46
+ handle asynchronous streaming responses.
47
+
48
+ This class also implements '__aenter__' and '__aexit__' methods that
49
+ handle asynchronous context management protocol.
50
+ """
51
+ def __init__(
52
+ self,
53
+ wrapped,
54
+ span,
55
+ kwargs,
56
+ server_address,
57
+ server_port,
58
+ **args,
59
+ ):
60
+ self.__wrapped__ = wrapped
61
+ self._span = span
62
+ # Placeholder for aggregating streaming response
63
+ self._llmresponse = ""
64
+ self._response_id = ""
65
+ self._response_model = ""
66
+ self._finish_reason = ""
67
+ self._system_fingerprint = ""
68
+ self._input_tokens = 0
69
+ self._output_tokens = 0
70
+
71
+ self._args = args
72
+ self._kwargs = kwargs
73
+ self._start_time = time.time()
74
+ self._end_time = None
75
+ self._timestamps = []
76
+ self._ttft = 0
77
+ self._tbt = 0
78
+ self._server_address = server_address
79
+ self._server_port = server_port
80
+
81
+ def __enter__(self):
82
+ self.__wrapped__.__enter__()
83
+ return self
84
+
85
+ def __exit__(self, exc_type, exc_value, traceback):
86
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
87
+
88
+ def __iter__(self):
89
+ return self
90
+
91
+ def __getattr__(self, name):
92
+ """Delegate attribute access to the wrapped object."""
93
+ return getattr(self.__wrapped__, name)
94
+
95
+ def __next__(self):
96
+ try:
97
+ chunk = self.__wrapped__.__next__()
98
+ end_time = time.time()
99
+ # Record the timestamp for the current chunk
100
+ self._timestamps.append(end_time)
101
+
102
+ if len(self._timestamps) == 1:
103
+ # Calculate time to first chunk
104
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
105
+
106
+ chunked = response_as_dict(chunk)
107
+ # Collect message IDs and aggregated response from events
108
+ if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
109
+ 'content' in chunked.get('choices')[0].get('delta'))):
110
+
111
+ content = chunked.get('choices')[0].get('delta').get('content')
112
+ if content:
113
+ self._llmresponse += content
114
+
115
+ if chunked.get('usage'):
116
+ self._input_tokens = chunked.get('usage').get('prompt_tokens')
117
+ self._output_tokens = chunked.get('usage').get('completion_tokens')
118
+ self._response_id = chunked.get('id')
119
+ self._response_model = chunked.get('model')
120
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
121
+ self._system_fingerprint = chunked.get('system_fingerprint')
122
+ return chunk
123
+ except StopIteration:
124
+ # Handling exception ensure observability without disrupting operation
125
+ try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
130
+ # Format 'messages' into a single string
131
+ message_prompt = self._kwargs.get("messages", "")
132
+ formatted_messages = []
133
+ for message in message_prompt:
134
+ role = message["role"]
135
+ content = message["content"]
136
+
137
+ if isinstance(content, list):
138
+ content_str_list = []
139
+ for item in content:
140
+ if item["type"] == "text":
141
+ content_str_list.append(f'text: {item["text"]}')
142
+ elif (item["type"] == "image_url" and
143
+ not item["image_url"]["url"].startswith("data:")):
144
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
145
+ content_str = ", ".join(content_str_list)
146
+ formatted_messages.append(f"{role}: {content_str}")
147
+ else:
148
+ formatted_messages.append(f"{role}: {content}")
149
+ prompt = "\n".join(formatted_messages)
150
+
151
+ request_model = self._kwargs.get("model", "gpt-4o")
152
+
153
+ # Calculate cost of the operation
154
+ cost = get_chat_model_cost(request_model,
155
+ pricing_info, self._input_tokens,
156
+ self._output_tokens)
157
+
158
+ # Set Span attributes (OTel Semconv)
159
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
161
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
162
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
163
+ SemanticConvetion.GEN_AI_SYSTEM_GROQ)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
165
+ request_model)
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
167
+ self._kwargs.get("seed", ""))
168
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
169
+ self._server_port)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
171
+ self._kwargs.get("frequency_penalty", 0.0))
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
173
+ self._kwargs.get("max_completion_tokens", -1))
174
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
175
+ self._kwargs.get("presence_penalty", 0.0))
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
177
+ self._kwargs.get("stop", []))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
179
+ self._kwargs.get("temperature", 1.0))
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
181
+ self._kwargs.get("top_p", 1.0))
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
183
+ [self._finish_reason])
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
185
+ self._response_id)
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
187
+ self._response_model)
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
189
+ self._input_tokens)
190
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
191
+ self._output_tokens)
192
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
193
+ self._server_address)
194
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
195
+ self._kwargs.get("service_tier", "on_demand"))
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
197
+ self._system_fingerprint)
198
+ if isinstance(self._llmresponse, str):
199
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
200
+ "text")
201
+ else:
202
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
203
+ "json")
204
+
205
+ # Set Span attributes (Extra)
206
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
207
+ environment)
208
+ self._span.set_attribute(SERVICE_NAME,
209
+ application_name)
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
211
+ self._kwargs.get("user", ""))
212
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
213
+ True)
214
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
215
+ self._input_tokens + self._output_tokens)
216
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
217
+ cost)
218
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
219
+ self._tbt)
220
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
221
+ self._ttft)
222
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
223
+ version)
224
+ if capture_message_content:
225
+ self._span.add_event(
226
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
227
+ attributes={
228
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
229
+ },
230
+ )
231
+ self._span.add_event(
232
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
233
+ attributes={
234
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
235
+ },
236
+ )
237
+ self._span.set_status(Status(StatusCode.OK))
238
+
239
+ if disable_metrics is False:
240
+ attributes = create_metrics_attributes(
241
+ service_name=application_name,
242
+ deployment_environment=environment,
243
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
244
+ system=SemanticConvetion.GEN_AI_SYSTEM_GROQ,
245
+ request_model=request_model,
246
+ server_address=self._server_address,
247
+ server_port=self._server_port,
248
+ response_model=self._response_model,
249
+ )
250
+
251
+ metrics["genai_client_usage_tokens"].record(
252
+ self._input_tokens + self._output_tokens, attributes
253
+ )
254
+ metrics["genai_client_operation_duration"].record(
255
+ self._end_time - self._start_time, attributes
256
+ )
257
+ metrics["genai_server_tbt"].record(
258
+ self._tbt, attributes
259
+ )
260
+ metrics["genai_server_ttft"].record(
261
+ self._ttft, attributes
262
+ )
263
+ metrics["genai_requests"].add(1, attributes)
264
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
265
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
266
+ metrics["genai_cost"].record(cost, attributes)
267
+
268
+ except Exception as e:
269
+ handle_exception(self._span, e)
270
+ logger.error("Error in trace creation: %s", e)
271
+ finally:
272
+ self._span.end()
273
+ raise
274
+
33
275
  def wrapper(wrapped, instance, args, kwargs):
34
276
  """
35
277
  Wraps the 'chat.completions' API call to add telemetry.
36
-
278
+
37
279
  This collects metrics such as execution time, cost, and token usage, and handles errors
38
280
  gracefully, adding details to the trace for observability.
39
281
 
@@ -49,148 +291,27 @@ def chat(gen_ai_endpoint, version, environment, application_name,
49
291
 
50
292
  # Check if streaming is enabled for the API call
51
293
  streaming = kwargs.get("stream", False)
294
+ server_address, server_port = set_server_address_and_port(instance, "api.groq.com", 443)
295
+ request_model = kwargs.get("model", "gpt-4o")
296
+
297
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
52
298
 
53
299
  # pylint: disable=no-else-return
54
300
  if streaming:
55
301
  # Special handling for streaming response to accommodate the nature of data flow
56
- def stream_generator():
57
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
58
- # Placeholder for aggregating streaming response
59
- llmresponse = ""
60
-
61
- # Loop through streaming events capturing relevant details
62
- for chunk in wrapped(*args, **kwargs):
63
- # Collect message IDs and aggregated response from events
64
- if len(chunk.choices) > 0:
65
- # pylint: disable=line-too-long
66
- if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
67
- content = chunk.choices[0].delta.content
68
- if content:
69
- llmresponse += content
70
- if chunk.x_groq is not None and chunk.x_groq.usage is not None:
71
- prompt_tokens = chunk.x_groq.usage.prompt_tokens
72
- completion_tokens = chunk.x_groq.usage.completion_tokens
73
- total_tokens = chunk.x_groq.usage.total_tokens
74
- response_id = chunk.x_groq.id
75
- yield chunk
76
-
77
- # Handling exception ensure observability without disrupting operation
78
- try:
79
- # Format 'messages' into a single string
80
- message_prompt = kwargs.get("messages", "")
81
- formatted_messages = []
82
- for message in message_prompt:
83
- role = message["role"]
84
- content = message["content"]
85
-
86
- if isinstance(content, list):
87
- content_str = ", ".join(
88
- # pylint: disable=line-too-long
89
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
90
- if "type" in item else f'text: {item["text"]}'
91
- for item in content
92
- )
93
- formatted_messages.append(f"{role}: {content_str}")
94
- else:
95
- formatted_messages.append(f"{role}: {content}")
96
- prompt = "\n".join(formatted_messages)
97
-
98
- # Calculate cost of the operation
99
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
100
- pricing_info, prompt_tokens,
101
- completion_tokens)
102
-
103
- # Set Span attributes
104
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
105
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
106
- SemanticConvetion.GEN_AI_SYSTEM_GROQ)
107
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
108
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
109
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
110
- gen_ai_endpoint)
111
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
112
- response_id)
113
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
114
- environment)
115
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
116
- application_name)
117
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
118
- kwargs.get("model", "gpt-3.5-turbo"))
119
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
120
- kwargs.get("user", ""))
121
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
122
- kwargs.get("top_p", 1.0))
123
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
124
- kwargs.get("max_tokens", -1))
125
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
126
- kwargs.get("temperature", 1.0))
127
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
128
- kwargs.get("presence_penalty", 0.0))
129
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
130
- kwargs.get("frequency_penalty", 0.0))
131
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
132
- kwargs.get("seed", ""))
133
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
134
- True)
135
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
136
- prompt_tokens)
137
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
138
- completion_tokens)
139
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
140
- prompt_tokens + completion_tokens)
141
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
142
- cost)
143
- if trace_content:
144
- span.add_event(
145
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
146
- attributes={
147
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
148
- },
149
- )
150
- span.add_event(
151
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
152
- attributes={
153
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
154
- },
155
- )
302
+ awaited_wrapped = wrapped(*args, **kwargs)
303
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
156
304
 
157
- span.set_status(Status(StatusCode.OK))
158
-
159
- if disable_metrics is False:
160
- attributes = {
161
- TELEMETRY_SDK_NAME:
162
- "openlit",
163
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
164
- application_name,
165
- SemanticConvetion.GEN_AI_SYSTEM:
166
- SemanticConvetion.GEN_AI_SYSTEM_GROQ,
167
- SemanticConvetion.GEN_AI_ENVIRONMENT:
168
- environment,
169
- SemanticConvetion.GEN_AI_OPERATION:
170
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
171
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
172
- kwargs.get("model", "gpt-3.5-turbo")
173
- }
174
-
175
- metrics["genai_requests"].add(1, attributes)
176
- metrics["genai_total_tokens"].add(
177
- total_tokens, attributes
178
- )
179
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
180
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
181
- metrics["genai_cost"].record(cost, attributes)
182
-
183
- except Exception as e:
184
- handle_exception(span, e)
185
- logger.error("Error in trace creation: %s", e)
186
-
187
- return stream_generator()
305
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
188
306
 
189
307
  # Handling for non-streaming responses
190
308
  else:
191
- # pylint: disable=line-too-long
192
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
309
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
310
+ start_time = time.time()
193
311
  response = wrapped(*args, **kwargs)
312
+ end_time = time.time()
313
+
314
+ response_dict = response_as_dict(response)
194
315
 
195
316
  try:
196
317
  # Format 'messages' into a single string
@@ -202,7 +323,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
202
323
 
203
324
  if isinstance(content, list):
204
325
  content_str = ", ".join(
205
- # pylint: disable=line-too-long
206
326
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
207
327
  if "type" in item else f'text: {item["text"]}'
208
328
  for item in content
@@ -212,39 +332,71 @@ def chat(gen_ai_endpoint, version, environment, application_name,
212
332
  formatted_messages.append(f"{role}: {content}")
213
333
  prompt = "\n".join(formatted_messages)
214
334
 
215
- # Set base span attribues
335
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
336
+ output_tokens = response_dict.get('usage').get('completion_tokens')
337
+
338
+ # Calculate cost of the operation
339
+ cost = get_chat_model_cost(request_model,
340
+ pricing_info, input_tokens,
341
+ output_tokens)
342
+
343
+ # Set base span attribues (OTel Semconv)
216
344
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
217
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
218
- SemanticConvetion.GEN_AI_SYSTEM_GROQ)
219
345
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
220
346
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
221
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
222
- gen_ai_endpoint)
223
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
224
- response.x_groq["id"])
225
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
226
- environment)
227
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
228
- application_name)
347
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
348
+ SemanticConvetion.GEN_AI_SYSTEM_GROQ)
229
349
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
230
- kwargs.get("model", "llama3-8b-8192"))
231
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
232
- kwargs.get("top_p", 1.0))
350
+ request_model)
351
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
352
+ kwargs.get("seed", ""))
353
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
354
+ server_port)
355
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
356
+ kwargs.get("frequency_penalty", 0.0))
233
357
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
234
- kwargs.get("max_tokens", -1))
235
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
236
- kwargs.get("name", ""))
237
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
238
- kwargs.get("temperature", 1.0))
358
+ kwargs.get("max_completion_tokens", -1))
239
359
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
240
360
  kwargs.get("presence_penalty", 0.0))
241
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
242
- kwargs.get("frequency_penalty", 0.0))
243
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
244
- kwargs.get("seed", ""))
361
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
362
+ kwargs.get("stop", []))
363
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
364
+ kwargs.get("temperature", 1.0))
365
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
366
+ kwargs.get("top_p", 1.0))
367
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
368
+ response_dict.get("id"))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
370
+ response_dict.get('model'))
371
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
372
+ input_tokens)
373
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
374
+ output_tokens)
375
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
376
+ server_address)
377
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
378
+ kwargs.get("service_tier", "on_demand"))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
380
+ response_dict.get('system_fingerprint'))
381
+
382
+ # Set base span attribues (Extras)
383
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
384
+ environment)
385
+ span.set_attribute(SERVICE_NAME,
386
+ application_name)
387
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
388
+ kwargs.get("user", ""))
245
389
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
246
390
  False)
247
- if trace_content:
391
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
392
+ input_tokens + output_tokens)
393
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
394
+ cost)
395
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
396
+ end_time - start_time)
397
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
398
+ version)
399
+ if capture_message_content:
248
400
  span.add_event(
249
401
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
250
402
  attributes={
@@ -252,90 +404,54 @@ def chat(gen_ai_endpoint, version, environment, application_name,
252
404
  },
253
405
  )
254
406
 
255
- # Set span attributes when tools is not passed to the function call
256
- if "tools" not in kwargs:
257
- # Calculate cost of the operation
258
- cost = get_chat_model_cost(kwargs.get("model", "llama3-8b-8192"),
259
- pricing_info, response.usage.prompt_tokens,
260
- response.usage.completion_tokens)
261
-
262
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
263
- response.usage.prompt_tokens)
264
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
265
- response.usage.completion_tokens)
266
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
267
- response.usage.total_tokens)
407
+ for i in range(kwargs.get('n',1)):
268
408
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
269
- [response.choices[0].finish_reason])
270
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
271
- cost)
272
-
273
- # Set span attributes for when n = 1 (default)
274
- if "n" not in kwargs or kwargs["n"] == 1:
275
- if trace_content:
276
- span.add_event(
277
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
278
- attributes={
279
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
280
- },
281
- )
282
-
283
- # Set span attributes for when n > 0
284
- else:
285
- i = 0
286
- while i < kwargs["n"] and trace_content is True:
287
- attribute_name = f"gen_ai.completion.{i}"
288
- span.add_event(
289
- name=attribute_name,
290
- attributes={
291
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[i].message.content,
292
- },
293
- )
294
- i += 1
295
-
296
- # Return original response
297
- return response
298
-
299
- # Set span attributes when tools is passed to the function call
300
- elif "tools" in kwargs:
301
- # Calculate cost of the operation
302
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
303
- pricing_info, response.usage.prompt_tokens,
304
- response.usage.completion_tokens)
305
-
306
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
307
- "Function called with tools")
308
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
309
- response.usage.prompt_tokens)
310
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
311
- response.usage.completion_tokens)
312
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
313
- response.usage.total_tokens)
314
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
315
- cost)
409
+ [response_dict.get('choices')[i].get('finish_reason')])
410
+ if capture_message_content:
411
+ span.add_event(
412
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
413
+ attributes={
414
+ # pylint: disable=line-too-long
415
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
416
+ },
417
+ )
418
+ if kwargs.get('tools'):
419
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
420
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
421
+
422
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
423
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
424
+ "text")
425
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
426
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
427
+ "json")
316
428
 
317
429
  span.set_status(Status(StatusCode.OK))
318
430
 
319
431
  if disable_metrics is False:
320
- attributes = {
321
- TELEMETRY_SDK_NAME:
322
- "openlit",
323
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
324
- application_name,
325
- SemanticConvetion.GEN_AI_SYSTEM:
326
- SemanticConvetion.GEN_AI_SYSTEM_GROQ,
327
- SemanticConvetion.GEN_AI_ENVIRONMENT:
328
- environment,
329
- SemanticConvetion.GEN_AI_OPERATION:
330
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
331
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
332
- kwargs.get("model", "gpt-3.5-turbo")
333
- }
432
+ attributes = create_metrics_attributes(
433
+ service_name=application_name,
434
+ deployment_environment=environment,
435
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
436
+ system=SemanticConvetion.GEN_AI_SYSTEM_GROQ,
437
+ request_model=request_model,
438
+ server_address=server_address,
439
+ server_port=server_port,
440
+ response_model=response_dict.get('model'),
441
+ )
334
442
 
443
+ metrics["genai_client_usage_tokens"].record(
444
+ input_tokens + output_tokens, attributes
445
+ )
446
+ metrics["genai_client_operation_duration"].record(
447
+ end_time - start_time, attributes
448
+ )
449
+ metrics["genai_server_ttft"].record(
450
+ end_time - start_time, attributes
451
+ )
335
452
  metrics["genai_requests"].add(1, attributes)
336
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
337
- metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
338
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
453
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
454
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
339
455
  metrics["genai_cost"].record(cost, attributes)
340
456
 
341
457
  # Return original response