openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,208 +1,325 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring GPT4All API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import handle_exception, general_tokens
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ handle_exception,
11
+ general_tokens,
12
+ create_metrics_attributes,
13
+ set_server_address_and_port,
14
+ calculate_tbt,
15
+ calculate_ttft
16
+ )
10
17
  from openlit.semcov import SemanticConvetion
11
18
 
12
19
  # Initialize logger for logging potential issues and operations
13
20
  logger = logging.getLogger(__name__)
14
21
 
15
- def generate(gen_ai_endpoint, version, environment, application_name,
16
- tracer, pricing_info, trace_content, metrics, disable_metrics):
22
+ def generate(version, environment, application_name,
23
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
17
24
  """
18
- Generates a telemetry wrapper for generate to collect metrics.
25
+ Generates a telemetry wrapper for chat completions to collect metrics.
19
26
 
20
27
  Args:
21
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
28
  version: Version of the monitoring package.
23
29
  environment: Deployment environment (e.g., production, staging).
24
30
  application_name: Name of the application using the GPT4All API.
25
31
  tracer: OpenTelemetry tracer for creating spans.
26
- pricing_info: Information used for calculating the cost of GPT4All usage.
27
- trace_content: Flag indicating whether to trace the actual content.
32
+ pricing_info: Information used for calculating GPT4All usage.
33
+ capture_message_content: Flag indicating whether to trace the actual content.
28
34
 
29
35
  Returns:
30
- A function that wraps the generate method to add telemetry.
36
+ A function that wraps the chat completions method to add telemetry.
31
37
  """
32
38
 
39
+ class TracedSyncStream:
40
+ """
41
+ Wrapper for streaming responses to collect metrics and trace data.
42
+ Wraps the response to collect message IDs and aggregated response.
43
+
44
+ This class implements the '__aiter__' and '__anext__' methods that
45
+ handle asynchronous streaming responses.
46
+
47
+ This class also implements '__aenter__' and '__aexit__' methods that
48
+ handle asynchronous context management protocol.
49
+ """
50
+ def __init__(
51
+ self,
52
+ wrapped,
53
+ span,
54
+ kwargs,
55
+ server_address,
56
+ server_port,
57
+ request_model,
58
+ **args,
59
+ ):
60
+ self.__wrapped__ = wrapped
61
+ self._span = span
62
+ # Placeholder for aggregating streaming response
63
+ self._llmresponse = ""
64
+
65
+ self._args = args
66
+ self._kwargs = kwargs
67
+ self._start_time = time.time()
68
+ self._end_time = None
69
+ self._timestamps = []
70
+ self._ttft = 0
71
+ self._tbt = 0
72
+ self._server_address = server_address
73
+ self._server_port = server_port
74
+ self._request_model = request_model
75
+
76
+ def __enter__(self):
77
+ self.__wrapped__.__enter__()
78
+ return self
79
+
80
+ def __exit__(self, exc_type, exc_value, traceback):
81
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
82
+
83
+ def __iter__(self):
84
+ return self
85
+
86
+ def __getattr__(self, name):
87
+ """Delegate attribute access to the wrapped object."""
88
+ return getattr(self.__wrapped__, name)
89
+
90
+ def __next__(self):
91
+ try:
92
+ chunk = self.__wrapped__.__next__()
93
+ end_time = time.time()
94
+ # Record the timestamp for the current chunk
95
+ self._timestamps.append(end_time)
96
+
97
+ if len(self._timestamps) == 1:
98
+ # Calculate time to first chunk
99
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
100
+
101
+ self._llmresponse += chunk
102
+ return chunk
103
+ except StopIteration:
104
+ # Handling exception ensure LLM observability without disrupting operation
105
+ try:
106
+ self._end_time = time.time()
107
+
108
+ if len(self._timestamps) > 1:
109
+ self._tbt = calculate_tbt(self._timestamps)
110
+
111
+ prompt = self._kwargs.get("prompt") or self._args[0] or ""
112
+
113
+ # Calculate tokens using input prompt and aggregated response
114
+ input_tokens = general_tokens(prompt)
115
+ output_tokens = general_tokens(self._llmresponse)
116
+
117
+ # Set Span attributes (OTel Semconv)
118
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
119
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
120
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
121
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
122
+ SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL)
123
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
124
+ self._request_model)
125
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
126
+ self._server_port)
127
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
128
+ self._kwargs.get("repeat_penalty", 1.18))
129
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
130
+ self._kwargs.get("max_tokens", 200))
131
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
132
+ self._kwargs.get("presence_penalty", 0.0))
133
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
134
+ self._kwargs.get("temp", 0.7))
135
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
136
+ self._kwargs.get("top_p", 0.4))
137
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
138
+ self._kwargs.get("top_k", 40))
139
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
140
+ self._request_model)
141
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
142
+ input_tokens)
143
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
144
+ output_tokens)
145
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
146
+ self._server_address)
147
+ if isinstance(self._llmresponse, str):
148
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
149
+ "text")
150
+ else:
151
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
152
+ "json")
153
+
154
+ # Set Span attributes (Extra)
155
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
156
+ environment)
157
+ self._span.set_attribute(SERVICE_NAME,
158
+ application_name)
159
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
160
+ True)
161
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
162
+ input_tokens + output_tokens)
163
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
164
+ self._tbt)
165
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
166
+ self._ttft)
167
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
168
+ version)
169
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
170
+ 0)
171
+ if capture_message_content:
172
+ self._span.add_event(
173
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
174
+ attributes={
175
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
176
+ },
177
+ )
178
+ self._span.add_event(
179
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
180
+ attributes={
181
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
182
+ },
183
+ )
184
+
185
+ self._span.set_status(Status(StatusCode.OK))
186
+
187
+ if disable_metrics is False:
188
+ attributes = create_metrics_attributes(
189
+ service_name=application_name,
190
+ deployment_environment=environment,
191
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
192
+ system=SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL,
193
+ request_model=self._request_model,
194
+ server_address=self._server_address,
195
+ server_port=self._server_port,
196
+ response_model=self._request_model,
197
+ )
198
+
199
+ metrics["genai_client_usage_tokens"].record(
200
+ input_tokens + output_tokens, attributes
201
+ )
202
+ metrics["genai_client_operation_duration"].record(
203
+ self._end_time - self._start_time, attributes
204
+ )
205
+ metrics["genai_server_tbt"].record(
206
+ self._tbt, attributes
207
+ )
208
+ metrics["genai_server_ttft"].record(
209
+ self._ttft, attributes
210
+ )
211
+ metrics["genai_requests"].add(1, attributes)
212
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
213
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
214
+ metrics["genai_cost"].record(0, attributes)
215
+
216
+ except Exception as e:
217
+ handle_exception(self._span, e)
218
+ logger.error("Error in trace creation: %s", e)
219
+ finally:
220
+ self._span.end()
221
+ raise
222
+
33
223
  def wrapper(wrapped, instance, args, kwargs):
34
224
  """
35
- Wraps the 'generate' API call to add telemetry.
36
-
37
- This collects metrics such as execution time, cost, and token usage, and handles errors
225
+ Wraps the 'chat.completions' API call to add telemetry.
226
+
227
+ This collects metrics such as execution time, and token usage, and handles errors
38
228
  gracefully, adding details to the trace for observability.
39
229
 
40
230
  Args:
41
- wrapped: The original 'generate' method to be wrapped.
231
+ wrapped: The original 'chat.completions' method to be wrapped.
42
232
  instance: The instance of the class where the original method is defined.
43
- args: Positional arguments for the 'generate' method.
44
- kwargs: Keyword arguments for the 'generate' method.
233
+ args: Positional arguments for the 'chat.completions' method.
234
+ kwargs: Keyword arguments for the 'chat.completions' method.
45
235
 
46
236
  Returns:
47
- The response from the original 'generate' method.
237
+ The response from the original 'chat.completions' method.
48
238
  """
49
239
 
50
240
  # Check if streaming is enabled for the API call
51
241
  streaming = kwargs.get("streaming", False)
52
242
 
243
+ server_address, server_port = set_server_address_and_port(instance, "localhost", 80)
244
+ request_model = str(instance.model.model_path).rsplit('/', maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
245
+
246
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
247
+
53
248
  # pylint: disable=no-else-return
54
249
  if streaming:
55
250
  # Special handling for streaming response to accommodate the nature of data flow
56
- def stream_generator():
57
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
58
- # Placeholder for aggregating streaming response
59
- llmresponse = ""
60
-
61
- # Loop through streaming events capturing relevant details
62
- for chunk in wrapped(*args, **kwargs):
63
- # Collect aggregated response from events
64
- llmresponse += chunk
65
-
66
- yield chunk
67
-
68
- # Handling exception ensure observability without disrupting operation
69
- try:
70
- # Calculate cost of the operation
71
- cost = 0
72
-
73
- # pylint: disable=line-too-long
74
- model = str(instance.model.model_path).rsplit('/', maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
75
- prompt = kwargs.get("prompt") or args[0] or ""
76
-
77
- # Calculate cost of the operation
78
- cost = 0
79
- prompt_tokens = general_tokens(prompt)
80
- completion_tokens = general_tokens(llmresponse)
81
- total_tokens = prompt_tokens + completion_tokens
82
-
83
- # Set base span attribues
84
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
85
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
86
- SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL)
87
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
88
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
89
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
90
- gen_ai_endpoint)
91
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
92
- environment)
93
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
94
- application_name)
95
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
96
- model)
97
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
98
- kwargs.get("top_k", 40))
99
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
100
- kwargs.get("top_p", 0.4))
101
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
102
- kwargs.get("max_tokens", 200))
103
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
104
- kwargs.get("temperature", 0.7))
105
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
106
- kwargs.get("frequency_penalty", 1.18))
107
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
108
- True)
109
- if trace_content:
110
- span.add_event(
111
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
112
- attributes={
113
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
114
- },
115
- )
116
- span.add_event(
117
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
118
- attributes={
119
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
120
- },
121
- )
122
-
123
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
124
- prompt_tokens)
125
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
126
- completion_tokens)
127
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
128
- total_tokens)
129
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
130
- cost)
131
-
132
- span.set_status(Status(StatusCode.OK))
133
-
134
- if disable_metrics is False:
135
- attributes = {
136
- TELEMETRY_SDK_NAME:
137
- "openlit",
138
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
139
- application_name,
140
- SemanticConvetion.GEN_AI_SYSTEM:
141
- SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL,
142
- SemanticConvetion.GEN_AI_ENVIRONMENT:
143
- environment,
144
- SemanticConvetion.GEN_AI_OPERATION:
145
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
146
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
147
- model
148
- }
149
-
150
- metrics["genai_requests"].add(1, attributes)
151
- metrics["genai_total_tokens"].add(total_tokens, attributes)
152
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
153
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
154
- metrics["genai_cost"].record(cost, attributes)
155
-
156
- except Exception as e:
157
- handle_exception(span, e)
158
- logger.error("Error in trace creation: %s", e)
159
-
160
- return stream_generator()
251
+ awaited_wrapped = wrapped(*args, **kwargs)
252
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
253
+
254
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port, request_model)
161
255
 
162
256
  # Handling for non-streaming responses
163
257
  else:
164
- # pylint: disable=line-too-long
165
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
258
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
259
+ start_time = time.time()
166
260
  response = wrapped(*args, **kwargs)
261
+ end_time = time.time()
167
262
 
168
- # pylint: disable=line-too-long
169
- model = str(instance.model.model_path).rsplit('/', maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
170
- prompt = kwargs.get("prompt") or args[0] or ""
263
+ try:
264
+ prompt = kwargs.get("prompt") or args[0] or ""
171
265
 
172
- # Calculate cost of the operation
173
- cost = 0
174
- prompt_tokens = general_tokens(prompt)
175
- completion_tokens = general_tokens(response)
176
- total_tokens = prompt_tokens + completion_tokens
266
+ # Calculate tokens using input prompt and aggregated response
267
+ input_tokens = general_tokens(str(prompt))
268
+ output_tokens = general_tokens(str(response))
177
269
 
178
- try:
179
- # Set base span attribues
270
+ # Set Span attributes (OTel Semconv)
180
271
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
181
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
182
- SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL)
183
272
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
184
273
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
185
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
186
- gen_ai_endpoint)
187
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
188
- environment)
189
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
190
- application_name)
274
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
275
+ SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL)
191
276
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
192
- model)
193
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
194
- kwargs.get("top_k", 40))
195
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
196
- kwargs.get("top_p", 0.4))
277
+ request_model)
278
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
279
+ server_port)
280
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
281
+ kwargs.get("repeat_penalty", 1.18))
197
282
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
198
283
  kwargs.get("max_tokens", 200))
284
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
285
+ kwargs.get("presence_penalty", 0.0))
199
286
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
200
- kwargs.get("temperature", 0.7))
201
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
202
- kwargs.get("frequency_penalty", 1.18))
287
+ kwargs.get("temp", 0.7))
288
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
289
+ kwargs.get("top_p", 0.4))
290
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
291
+ kwargs.get("top_k", 40))
292
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
293
+ request_model)
294
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
295
+ input_tokens)
296
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
297
+ output_tokens)
298
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
299
+ server_address)
300
+ if isinstance(response, str):
301
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
302
+ "text")
303
+ else:
304
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
305
+ "json")
306
+
307
+ # Set Span attributes (Extra)
308
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
309
+ environment)
310
+ span.set_attribute(SERVICE_NAME,
311
+ application_name)
203
312
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
204
313
  False)
205
- if trace_content:
314
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
315
+ input_tokens + output_tokens)
316
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
317
+ end_time - start_time)
318
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
319
+ version)
320
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
321
+ 0)
322
+ if capture_message_content:
206
323
  span.add_event(
207
324
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
208
325
  attributes={
@@ -216,38 +333,33 @@ def generate(gen_ai_endpoint, version, environment, application_name,
216
333
  },
217
334
  )
218
335
 
219
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
220
- prompt_tokens)
221
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
222
- completion_tokens)
223
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
224
- total_tokens)
225
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
226
- cost)
227
-
228
336
  span.set_status(Status(StatusCode.OK))
229
337
 
230
338
  if disable_metrics is False:
231
- attributes = {
232
- TELEMETRY_SDK_NAME:
233
- "openlit",
234
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
235
- application_name,
236
- SemanticConvetion.GEN_AI_SYSTEM:
237
- SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL,
238
- SemanticConvetion.GEN_AI_ENVIRONMENT:
239
- environment,
240
- SemanticConvetion.GEN_AI_OPERATION:
241
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
242
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
243
- model
244
- }
339
+ attributes = create_metrics_attributes(
340
+ service_name=application_name,
341
+ deployment_environment=environment,
342
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
343
+ system=SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL,
344
+ request_model=request_model,
345
+ server_address=server_address,
346
+ server_port=server_port,
347
+ response_model=request_model,
348
+ )
245
349
 
350
+ metrics["genai_client_usage_tokens"].record(
351
+ input_tokens + output_tokens, attributes
352
+ )
353
+ metrics["genai_client_operation_duration"].record(
354
+ end_time - start_time, attributes
355
+ )
356
+ metrics["genai_server_ttft"].record(
357
+ end_time - start_time, attributes
358
+ )
246
359
  metrics["genai_requests"].add(1, attributes)
247
- metrics["genai_total_tokens"].add(total_tokens, attributes)
248
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
249
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
250
- metrics["genai_cost"].record(cost, attributes)
360
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
361
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
362
+ metrics["genai_cost"].record(0, attributes)
251
363
 
252
364
  # Return original response
253
365
  return response
@@ -261,20 +373,19 @@ def generate(gen_ai_endpoint, version, environment, application_name,
261
373
 
262
374
  return wrapper
263
375
 
264
- def embed(gen_ai_endpoint, version, environment, application_name,
265
- tracer, pricing_info, trace_content, metrics, disable_metrics):
376
+ def embed(version, environment, application_name,
377
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
266
378
  """
267
379
  Generates a telemetry wrapper for embeddings to collect metrics.
268
380
 
269
381
  Args:
270
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
271
382
  version: Version of the monitoring package.
272
383
  environment: Deployment environment (e.g., production, staging).
273
384
  application_name: Name of the application using the GPT4All API.
274
385
  tracer: OpenTelemetry tracer for creating spans.
275
- pricing_info: Information used for calculating the cost of GPT4All usage.
276
- trace_content: Flag indicating whether to trace the actual content.
277
-
386
+ pricing_info: Information used for calculating GPT4All usage.
387
+ capture_message_content: Flag indicating whether to trace the actual content.
388
+
278
389
  Returns:
279
390
  A function that wraps the embeddings method to add telemetry.
280
391
  """
@@ -283,7 +394,7 @@ def embed(gen_ai_endpoint, version, environment, application_name,
283
394
  """
284
395
  Wraps the 'embeddings' API call to add telemetry.
285
396
 
286
- This collects metrics such as execution time, cost, and token usage, and handles errors
397
+ This collects metrics such as execution time, and token usage, and handles errors
287
398
  gracefully, adding details to the trace for observability.
288
399
 
289
400
  Args:
@@ -296,68 +407,82 @@ def embed(gen_ai_endpoint, version, environment, application_name,
296
407
  The response from the original 'embeddings' method.
297
408
  """
298
409
 
299
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
410
+ server_address, server_port = set_server_address_and_port(instance, "localhost", 80)
411
+
412
+ # pylint: disable=line-too-long
413
+ request_model = str(instance.gpt4all.model.model_path).rsplit('/', maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
414
+
415
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
416
+
417
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
418
+ start_time = time.time()
300
419
  response = wrapped(*args, **kwargs)
420
+ end_time = time.time()
301
421
 
302
422
  try:
303
- # pylint: disable=line-too-long
304
- model = str(instance.gpt4all.model.model_path).rsplit('/', maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
305
423
  prompt = kwargs.get("prompt") or args[0] or ""
424
+ input_tokens = general_tokens(prompt)
306
425
 
307
- # Calculate cost of the operation
308
- cost = 0
309
- prompt_tokens = general_tokens(prompt)
310
-
311
- # Set Span attributes
426
+ # Set Span attributes (OTel Semconv)
312
427
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
313
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
314
- SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL)
315
428
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
316
429
  SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
317
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
318
- gen_ai_endpoint)
319
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
320
- environment)
321
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
322
- application_name)
430
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
431
+ SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL)
323
432
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
324
- model)
433
+ request_model)
434
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
435
+ request_model)
436
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
437
+ server_address)
438
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
439
+ server_port)
325
440
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
326
- prompt_tokens)
441
+ input_tokens)
442
+
443
+ # Set Span attributes (Extras)
444
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
445
+ environment)
446
+ span.set_attribute(SERVICE_NAME,
447
+ application_name)
327
448
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
328
- prompt_tokens)
449
+ input_tokens)
450
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
451
+ version)
329
452
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
330
- cost)
331
- if trace_content:
453
+ 0)
454
+
455
+ if capture_message_content:
332
456
  span.add_event(
333
457
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
334
458
  attributes={
335
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
459
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
336
460
  },
337
461
  )
338
462
 
339
463
  span.set_status(Status(StatusCode.OK))
340
464
 
341
465
  if disable_metrics is False:
342
- attributes = {
343
- TELEMETRY_SDK_NAME:
344
- "openlit",
345
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
346
- application_name,
347
- SemanticConvetion.GEN_AI_SYSTEM:
348
- SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL,
349
- SemanticConvetion.GEN_AI_ENVIRONMENT:
350
- environment,
351
- SemanticConvetion.GEN_AI_OPERATION:
352
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
353
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
354
- model
355
- }
356
-
466
+ attributes = create_metrics_attributes(
467
+ service_name=application_name,
468
+ deployment_environment=environment,
469
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
470
+ system=SemanticConvetion.GEN_AI_SYSTEM_GPT4ALL,
471
+ request_model=request_model,
472
+ server_address=server_address,
473
+ server_port=server_port,
474
+ response_model=request_model,
475
+ )
476
+ metrics["genai_client_usage_tokens"].record(
477
+ input_tokens, attributes
478
+ )
479
+ metrics["genai_client_operation_duration"].record(
480
+ end_time - start_time, attributes
481
+ )
357
482
  metrics["genai_requests"].add(1, attributes)
358
- metrics["genai_total_tokens"].add(prompt_tokens, attributes)
359
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
360
- metrics["genai_cost"].record(cost, attributes)
483
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
484
+ metrics["genai_cost"].record(0, attributes)
485
+
361
486
 
362
487
  # Return original response
363
488
  return response