openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,35 +1,38 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes
2
1
  """
3
2
  Module for monitoring Together calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  get_image_model_cost,
12
12
  handle_exception,
13
13
  response_as_dict,
14
+ calculate_ttft,
15
+ calculate_tbt,
16
+ create_metrics_attributes,
17
+ set_server_address_and_port
14
18
  )
15
19
  from openlit.semcov import SemanticConvetion
16
20
 
17
21
  # Initialize logger for logging potential issues and operations
18
22
  logger = logging.getLogger(__name__)
19
23
 
20
- def async_completion(gen_ai_endpoint, version, environment, application_name,
21
- tracer, pricing_info, trace_content, metrics, disable_metrics):
24
+ def async_completion(version, environment, application_name,
25
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
22
26
  """
23
27
  Generates a telemetry wrapper for chat completions to collect metrics.
24
28
 
25
29
  Args:
26
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
30
  version: Version of the monitoring package.
28
31
  environment: Deployment environment (e.g., production, staging).
29
- application_name: Name of the application using the Together AI SDK.
32
+ application_name: Name of the application using the Together AI API.
30
33
  tracer: OpenTelemetry tracer for creating spans.
31
34
  pricing_info: Information used for calculating the cost of Together AI usage.
32
- trace_content: Flag indicating whether to trace the actual content.
35
+ capture_message_content: Flag indicating whether to trace the actual content.
33
36
 
34
37
  Returns:
35
38
  A function that wraps the chat completions method to add telemetry.
@@ -38,6 +41,7 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
38
41
  class TracedAsyncStream:
39
42
  """
40
43
  Wrapper for streaming responses to collect metrics and trace data.
44
+ Wraps the response to collect message IDs and aggregated response.
41
45
 
42
46
  This class implements the '__aiter__' and '__anext__' methods that
43
47
  handle asynchronous streaming responses.
@@ -50,6 +54,8 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
50
54
  wrapped,
51
55
  span,
52
56
  kwargs,
57
+ server_address,
58
+ server_port,
53
59
  **args,
54
60
  ):
55
61
  self.__wrapped__ = wrapped
@@ -57,12 +63,20 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
57
63
  # Placeholder for aggregating streaming response
58
64
  self._llmresponse = ""
59
65
  self._response_id = ""
60
- self._prompt_tokens = 0
61
- self._completion_tokens = 0
62
- self._total_tokens = 0
66
+ self._response_model = ""
67
+ self._finish_reason = ""
68
+ self._input_tokens = 0
69
+ self._output_tokens = 0
63
70
 
64
71
  self._args = args
65
72
  self._kwargs = kwargs
73
+ self._start_time = time.time()
74
+ self._end_time = None
75
+ self._timestamps = []
76
+ self._ttft = 0
77
+ self._tbt = 0
78
+ self._server_address = server_address
79
+ self._server_port = server_port
66
80
 
67
81
  async def __aenter__(self):
68
82
  await self.__wrapped__.__aenter__()
@@ -81,6 +95,14 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
81
95
  async def __anext__(self):
82
96
  try:
83
97
  chunk = await self.__wrapped__.__anext__()
98
+ end_time = time.time()
99
+ # Record the timestamp for the current chunk
100
+ self._timestamps.append(end_time)
101
+
102
+ if len(self._timestamps) == 1:
103
+ # Calculate time to first chunk
104
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
105
+
84
106
  chunked = response_as_dict(chunk)
85
107
  # Collect message IDs and aggregated response from events
86
108
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -89,15 +111,22 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
89
111
  content = chunked.get('choices')[0].get('delta').get('content')
90
112
  if content:
91
113
  self._llmresponse += content
92
- if chunked.get("usage"):
93
- self._prompt_tokens = chunked.get("usage").get("prompt_tokens")
94
- self._completion_tokens = chunked.get("usage").get("completion_tokens")
95
- self._total_tokens = chunked.get("usage").get("total_tokens")
96
- self._response_id = chunked.get('id')
114
+
115
+ if chunked.get('usage'):
116
+ self._response_id = chunked.get('id')
117
+ self._response_model = chunked.get('model')
118
+ self._finish_reason = str(chunked.get('choices')[0].get('finish_reason'))
119
+ self._input_tokens = chunked.get('usage').get('prompt_tokens')
120
+ self._output_tokens = chunked.get('usage').get('completion_tokens')
121
+
97
122
  return chunk
98
123
  except StopAsyncIteration:
99
124
  # Handling exception ensure observability without disrupting operation
100
125
  try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
101
130
  # Format 'messages' into a single string
102
131
  message_prompt = self._kwargs.get("messages", "")
103
132
  formatted_messages = []
@@ -106,69 +135,90 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
106
135
  content = message["content"]
107
136
 
108
137
  if isinstance(content, list):
109
- content_str = ", ".join(
110
- # pylint: disable=line-too-long
111
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
112
- if "type" in item else f'text: {item["text"]}'
113
- for item in content
114
- )
138
+ content_str_list = []
139
+ for item in content:
140
+ if item["type"] == "text":
141
+ content_str_list.append(f'text: {item["text"]}')
142
+ elif (item["type"] == "image_url" and
143
+ not item["image_url"]["url"].startswith("data:")):
144
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
145
+ content_str = ", ".join(content_str_list)
115
146
  formatted_messages.append(f"{role}: {content_str}")
116
147
  else:
117
148
  formatted_messages.append(f"{role}: {content}")
118
149
  prompt = "\n".join(formatted_messages)
119
150
 
151
+ request_model = self._kwargs.get("model", "gpt-4o")
152
+
120
153
  # Calculate cost of the operation
121
- cost = get_chat_model_cost(self._kwargs.get(
122
- "model",
123
- "meta-llama/Llama-3.3-70B-Instruct-Turbo"
124
- ),
125
- pricing_info, self._prompt_tokens,
126
- self._completion_tokens)
127
-
128
- # Set Span attributes
154
+ cost = get_chat_model_cost(request_model,
155
+ pricing_info, self._input_tokens,
156
+ self._output_tokens)
157
+
158
+ # Set Span attributes (OTel Semconv)
129
159
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
130
- self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
131
- SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
132
160
  self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
133
161
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
134
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
135
- gen_ai_endpoint)
162
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
163
+ SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
165
+ request_model)
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
167
+ self._kwargs.get("seed", ""))
168
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
169
+ self._server_port)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
171
+ self._kwargs.get("frequency_penalty", 0.0))
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
173
+ self._kwargs.get("max_tokens", -1))
174
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
175
+ self._kwargs.get("presence_penalty", 0.0))
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
177
+ self._kwargs.get("stop", []))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
179
+ self._kwargs.get("temperature", 1.0))
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
181
+ self._kwargs.get("top_p", 1.0))
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
183
+ [self._finish_reason])
136
184
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
185
  self._response_id)
138
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
187
+ self._response_model)
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
189
+ self._input_tokens)
190
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
191
+ self._output_tokens)
192
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
193
+ self._server_address)
194
+
195
+ if isinstance(self._llmresponse, str):
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
197
+ "text")
198
+ else:
199
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
200
+ "json")
201
+
202
+ # Set Span attributes (Extra)
203
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
139
204
  environment)
140
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
205
+ self._span.set_attribute(SERVICE_NAME,
141
206
  application_name)
142
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
143
- self._kwargs.get(
144
- "model",
145
- "meta-llama/Llama-3.3-70B-Instruct-Turbo"
146
- ))
147
207
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
148
208
  self._kwargs.get("user", ""))
149
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
150
- self._kwargs.get("top_p", 1.0))
151
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
152
- self._kwargs.get("max_tokens", -1))
153
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
154
- self._kwargs.get("temperature", 1.0))
155
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
156
- self._kwargs.get("presence_penalty", 0.0))
157
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
158
- self._kwargs.get("frequency_penalty", 0.0))
159
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
160
- self._kwargs.get("seed", ""))
161
209
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
162
210
  True)
163
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
164
- self._prompt_tokens)
165
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
166
- self._completion_tokens)
167
211
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
168
- self._total_tokens)
212
+ self._input_tokens + self._output_tokens)
169
213
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
170
214
  cost)
171
- if trace_content:
215
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
216
+ self._tbt)
217
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
218
+ self._ttft)
219
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
220
+ version)
221
+ if capture_message_content:
172
222
  self._span.add_event(
173
223
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
174
224
  attributes={
@@ -181,36 +231,35 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
181
231
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
182
232
  },
183
233
  )
184
-
185
234
  self._span.set_status(Status(StatusCode.OK))
186
235
 
187
236
  if disable_metrics is False:
188
- attributes = {
189
- TELEMETRY_SDK_NAME:
190
- "openlit",
191
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
192
- application_name,
193
- SemanticConvetion.GEN_AI_SYSTEM:
194
- SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
195
- SemanticConvetion.GEN_AI_ENVIRONMENT:
196
- environment,
197
- SemanticConvetion.GEN_AI_OPERATION:
198
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
199
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
200
- self._kwargs.get("model",
201
- "meta-llama/Llama-3.3-70B-Instruct-Turbo")
202
- }
237
+ attributes = create_metrics_attributes(
238
+ service_name=application_name,
239
+ deployment_environment=environment,
240
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
241
+ system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
242
+ request_model=request_model,
243
+ server_address=self._server_address,
244
+ server_port=self._server_port,
245
+ response_model=self._response_model,
246
+ )
203
247
 
204
- metrics["genai_requests"].add(1, attributes)
205
- metrics["genai_total_tokens"].add(
206
- self._total_tokens, attributes
248
+ metrics["genai_client_usage_tokens"].record(
249
+ self._input_tokens + self._output_tokens, attributes
207
250
  )
208
- metrics["genai_completion_tokens"].add(
209
- self._completion_tokens, attributes
251
+ metrics["genai_client_operation_duration"].record(
252
+ self._end_time - self._start_time, attributes
210
253
  )
211
- metrics["genai_prompt_tokens"].add(
212
- self._prompt_tokens, attributes
254
+ metrics["genai_server_tbt"].record(
255
+ self._tbt, attributes
213
256
  )
257
+ metrics["genai_server_ttft"].record(
258
+ self._ttft, attributes
259
+ )
260
+ metrics["genai_requests"].add(1, attributes)
261
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
262
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
214
263
  metrics["genai_cost"].record(cost, attributes)
215
264
 
216
265
  except Exception as e:
@@ -219,7 +268,6 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
219
268
  finally:
220
269
  self._span.end()
221
270
  raise
222
-
223
271
  async def wrapper(wrapped, instance, args, kwargs):
224
272
  """
225
273
  Wraps the 'chat.completions' API call to add telemetry.
@@ -239,20 +287,25 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
239
287
 
240
288
  # Check if streaming is enabled for the API call
241
289
  streaming = kwargs.get("stream", False)
290
+ server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
291
+ request_model = kwargs.get("model", "gpt-4o")
292
+
293
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
242
294
 
243
295
  # pylint: disable=no-else-return
244
296
  if streaming:
245
297
  # Special handling for streaming response to accommodate the nature of data flow
246
298
  awaited_wrapped = await wrapped(*args, **kwargs)
247
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
299
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
248
300
 
249
- return TracedAsyncStream(awaited_wrapped, span, kwargs)
301
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
250
302
 
251
303
  # Handling for non-streaming responses
252
304
  else:
253
- # pylint: disable=line-too-long
254
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
305
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
306
+ start_time = time.time()
255
307
  response = await wrapped(*args, **kwargs)
308
+ end_time = time.time()
256
309
 
257
310
  response_dict = response_as_dict(response)
258
311
 
@@ -266,7 +319,6 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
266
319
 
267
320
  if isinstance(content, list):
268
321
  content_str = ", ".join(
269
- # pylint: disable=line-too-long
270
322
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
271
323
  if "type" in item else f'text: {item["text"]}'
272
324
  for item in content
@@ -276,40 +328,67 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
276
328
  formatted_messages.append(f"{role}: {content}")
277
329
  prompt = "\n".join(formatted_messages)
278
330
 
279
- # Set base span attribues
331
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
332
+ output_tokens = response_dict.get('usage').get('completion_tokens')
333
+
334
+ # Calculate cost of the operation
335
+ cost = get_chat_model_cost(request_model,
336
+ pricing_info, input_tokens,
337
+ output_tokens)
338
+
339
+ # Set base span attribues (OTel Semconv)
280
340
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
281
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
282
- SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
283
341
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
284
342
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
285
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
286
- gen_ai_endpoint)
343
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
344
+ SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
345
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
346
+ request_model)
347
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
348
+ kwargs.get("seed", ""))
349
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
350
+ server_port)
351
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
352
+ kwargs.get("frequency_penalty", 0.0))
353
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
354
+ kwargs.get("max_tokens", -1))
355
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
356
+ kwargs.get("presence_penalty", 0.0))
357
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
358
+ kwargs.get("stop", []))
359
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
360
+ kwargs.get("temperature", 1.0))
361
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
362
+ kwargs.get("top_p", 1.0))
287
363
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
288
364
  response_dict.get("id"))
289
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
365
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
366
+ response_dict.get('model'))
367
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
368
+ input_tokens)
369
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
370
+ output_tokens)
371
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
372
+ server_address)
373
+
374
+ # Set base span attribues (Extras)
375
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
290
376
  environment)
291
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
377
+ span.set_attribute(SERVICE_NAME,
292
378
  application_name)
293
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
294
- kwargs.get("model",
295
- "meta-llama/Llama-3.3-70B-Instruct-Turbo"))
296
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
297
- kwargs.get("top_p", 1.0))
298
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
299
- kwargs.get("max_tokens", -1))
300
379
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
301
380
  kwargs.get("user", ""))
302
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
303
- kwargs.get("temperature", 1.0))
304
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
305
- kwargs.get("presence_penalty", 0.0))
306
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
307
- kwargs.get("frequency_penalty", 0.0))
308
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
309
- kwargs.get("seed", ""))
310
381
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
311
382
  False)
312
- if trace_content:
383
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
384
+ input_tokens + output_tokens)
385
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
386
+ cost)
387
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
388
+ end_time - start_time)
389
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
390
+ version)
391
+ if capture_message_content:
313
392
  span.add_event(
314
393
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
315
394
  attributes={
@@ -317,103 +396,54 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
317
396
  },
318
397
  )
319
398
 
320
- # Set span attributes when tools is not passed to the function call
321
- if "tools" not in kwargs:
322
- # Calculate cost of the operation
323
- cost = get_chat_model_cost(kwargs.get(
324
- "model",
325
- "meta-llama/Llama-3.3-70B-Instruct-Turbo"
326
- ),
327
- pricing_info,
328
- response_dict.get('usage', {}).get('prompt_tokens', None),
329
- response_dict.get('usage', {}).get('completion_tokens', None))
330
-
331
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
332
- response_dict.get('usage', {}).get('prompt_tokens', None))
333
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
334
- response_dict.get('usage', {}).get('completion_tokens', None))
335
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
336
- response_dict.get('usage', {}).get('total_tokens', None))
337
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
338
- cost)
339
-
340
- # Set span attributes for when n = 1 (default)
341
- if "n" not in kwargs or kwargs["n"] == 1:
342
- if trace_content:
343
- span.add_event(
344
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
345
- attributes={
346
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
347
- },
348
- )
349
-
350
- # Set span attributes for when n > 0
351
- else:
352
- i = 0
353
- while i < kwargs["n"] and trace_content is True:
354
- attribute_name = f"gen_ai.content.completion.{i}"
355
- span.add_event(
356
- name=attribute_name,
357
- attributes={
358
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
359
- },
360
- )
361
- i += 1
362
-
363
- # Return original response
364
- return response
365
-
366
- # Set span attributes when tools is passed to the function call
367
- elif "tools" in kwargs:
368
- # Calculate cost of the operation
369
- cost = get_chat_model_cost(kwargs.get(
370
- "model",
371
- "meta-llama/Llama-3.3-70B-Instruct-Turbo"
372
- ),
373
- pricing_info,
374
- response_dict.get('usage').get('prompt_tokens'),
375
- response_dict.get('usage').get('completion_tokens'))
399
+ for i in range(kwargs.get('n',1)):
400
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
401
+ [str(response_dict.get('choices')[i].get('finish_reason'))])
402
+ if capture_message_content:
403
+ span.add_event(
404
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
405
+ attributes={
406
+ # pylint: disable=line-too-long
407
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
408
+ },
409
+ )
410
+ if kwargs.get('tools'):
411
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
412
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
376
413
 
377
- span.add_event(
378
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
379
- attributes={
380
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
381
- },
382
- )
383
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
384
- response_dict.get('usage').get('prompt_tokens'))
385
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
386
- response_dict.get('usage').get('completion_tokens'))
387
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
388
- response_dict.get('usage').get('total_tokens'))
389
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
390
- cost)
414
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
415
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
416
+ "text")
417
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
418
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
419
+ "json")
391
420
 
392
421
  span.set_status(Status(StatusCode.OK))
393
422
 
394
423
  if disable_metrics is False:
395
- attributes = {
396
- TELEMETRY_SDK_NAME:
397
- "openlit",
398
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
399
- application_name,
400
- SemanticConvetion.GEN_AI_SYSTEM:
401
- SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
402
- SemanticConvetion.GEN_AI_ENVIRONMENT:
403
- environment,
404
- SemanticConvetion.GEN_AI_OPERATION:
405
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
406
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
407
- kwargs.get("model", "meta-llama/Llama-3.3-70B-Instruct-Turbo")
408
- }
424
+ attributes = create_metrics_attributes(
425
+ service_name=application_name,
426
+ deployment_environment=environment,
427
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
428
+ system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
429
+ request_model=request_model,
430
+ server_address=server_address,
431
+ server_port=server_port,
432
+ response_model=response_dict.get('model'),
433
+ )
409
434
 
435
+ metrics["genai_client_usage_tokens"].record(
436
+ input_tokens + output_tokens, attributes
437
+ )
438
+ metrics["genai_client_operation_duration"].record(
439
+ end_time - start_time, attributes
440
+ )
441
+ metrics["genai_server_ttft"].record(
442
+ end_time - start_time, attributes
443
+ )
410
444
  metrics["genai_requests"].add(1, attributes)
411
- metrics["genai_total_tokens"].add(
412
- response_dict.get('usage').get('total_tokens'), attributes)
413
- metrics["genai_completion_tokens"].add(
414
- response_dict.get('usage').get('completion_tokens'), attributes)
415
- metrics["genai_prompt_tokens"].add(
416
- response_dict.get('usage').get('prompt_tokens'), attributes)
445
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
446
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
417
447
  metrics["genai_cost"].record(cost, attributes)
418
448
 
419
449
  # Return original response
@@ -428,19 +458,18 @@ def async_completion(gen_ai_endpoint, version, environment, application_name,
428
458
 
429
459
  return wrapper
430
460
 
431
- def async_image_generate(gen_ai_endpoint, version, environment, application_name,
432
- tracer, pricing_info, trace_content, metrics, disable_metrics):
461
+ def async_image_generate(version, environment, application_name,
462
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
433
463
  """
434
464
  Generates a telemetry wrapper for image generation to collect metrics.
435
465
 
436
466
  Args:
437
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
438
467
  version: Version of the monitoring package.
439
468
  environment: Deployment environment (e.g., production, staging).
440
- application_name: Name of the application using the Together API.
469
+ application_name: Name of the application using the Together AI API.
441
470
  tracer: OpenTelemetry tracer for creating spans.
442
- pricing_info: Information used for calculating the cost of Together image generation.
443
- trace_content: Flag indicating whether to trace the input prompt and generated images.
471
+ pricing_info: Information used for calculating the cost of Together AI image generation.
472
+ capture_message_content: Flag indicating whether to trace the input prompt and generated images.
444
473
 
445
474
  Returns:
446
475
  A function that wraps the image generation method to add telemetry.
@@ -463,8 +492,16 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
463
492
  The response from the original 'images.generate' method.
464
493
  """
465
494
 
466
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
495
+ server_address, server_port = set_server_address_and_port(instance, "api.together.xyz", 443)
496
+ request_model = kwargs.get("model", "dall-e-2")
497
+
498
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
499
+
500
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
501
+ start_time = time.time()
467
502
  response = await wrapped(*args, **kwargs)
503
+ end_time = time.time()
504
+
468
505
  images_count = 0
469
506
 
470
507
  try:
@@ -474,45 +511,51 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
474
511
  else:
475
512
  image = "url"
476
513
 
514
+ image_size = str(kwargs.get('width')) + 'x' + str(kwargs.get('height'))
515
+
477
516
  # Calculate cost of the operation
478
- image_size = str(kwargs.get("width", 1024)) + "x" + str(kwargs.get("height", 1024))
479
- cost_per_million = get_image_model_cost(kwargs.get(
480
- "model", "black-forest-labs/FLUX.1-dev"
481
- ),
482
- pricing_info, "1000000",
517
+ cost = get_image_model_cost(request_model,
518
+ pricing_info, image_size,
483
519
  kwargs.get("quality", "standard"))
484
- pixels = kwargs.get("width", 1024) * kwargs.get("height", 1024)
485
- cost = pixels / 1_000_000 * cost_per_million
486
520
 
487
521
  for items in response.data:
488
- # Set Span attributes
522
+ # Set Span attributes (OTel Semconv)
489
523
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
490
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
491
- SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
492
524
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
493
525
  SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
494
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
495
- gen_ai_endpoint)
526
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
527
+ SemanticConvetion.GEN_AI_SYSTEM_TOGETHER)
528
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
529
+ request_model)
530
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
531
+ server_address)
532
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
533
+ server_port)
496
534
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
497
535
  response.id)
498
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
536
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
537
+ response.model)
538
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
539
+ "image")
540
+
541
+ # Set Span attributes (Extras)
542
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
499
543
  environment)
500
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
544
+ span.set_attribute(SERVICE_NAME,
501
545
  application_name)
502
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
503
- kwargs.get("model", "black-forest-labs/FLUX.1-dev"))
504
546
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
505
547
  image_size)
506
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
507
- kwargs.get("quality", "standard"))
508
- if trace_content:
548
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
549
+ version)
550
+
551
+ if capture_message_content:
509
552
  span.add_event(
510
553
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
511
554
  attributes={
512
555
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
513
556
  },
514
557
  )
515
- attribute_name = f"gen_ai.response.image.{images_count}"
558
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
516
559
  span.add_event(
517
560
  name=attribute_name,
518
561
  attributes={
@@ -527,21 +570,20 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
527
570
  span.set_status(Status(StatusCode.OK))
528
571
 
529
572
  if disable_metrics is False:
530
- attributes = {
531
- TELEMETRY_SDK_NAME:
532
- "openlit",
533
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
534
- application_name,
535
- SemanticConvetion.GEN_AI_SYSTEM:
536
- SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
537
- SemanticConvetion.GEN_AI_ENVIRONMENT:
538
- environment,
539
- SemanticConvetion.GEN_AI_OPERATION:
540
- SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
541
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
542
- kwargs.get("model", "black-forest-labs/FLUX.1-dev")
543
- }
544
-
573
+ attributes = create_metrics_attributes(
574
+ service_name=application_name,
575
+ deployment_environment=environment,
576
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
577
+ system=SemanticConvetion.GEN_AI_SYSTEM_TOGETHER,
578
+ request_model=request_model,
579
+ server_address=server_address,
580
+ server_port=server_port,
581
+ response_model=response.model,
582
+ )
583
+
584
+ metrics["genai_client_operation_duration"].record(
585
+ end_time - start_time, attributes
586
+ )
545
587
  metrics["genai_requests"].add(1, attributes)
546
588
  metrics["genai_cost"].record(cost, attributes)
547
589