openlit 1.34.7__py3-none-any.whl → 1.34.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/gpt4all/__init__.py +3 -6
- openlit/instrumentation/gpt4all/gpt4all.py +75 -383
- openlit/instrumentation/gpt4all/utils.py +281 -0
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/utils.py +4 -3
- openlit/instrumentation/reka/utils.py +3 -3
- openlit/instrumentation/together/utils.py +3 -3
- {openlit-1.34.7.dist-info → openlit-1.34.8.dist-info}/METADATA +1 -1
- {openlit-1.34.7.dist-info → openlit-1.34.8.dist-info}/RECORD +11 -10
- {openlit-1.34.7.dist-info → openlit-1.34.8.dist-info}/LICENSE +0 -0
- {openlit-1.34.7.dist-info → openlit-1.34.8.dist-info}/WHEEL +0 -0
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of GPT4All Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -14,15 +13,15 @@ _instruments = ("gpt4all >= 2.6.0",)
|
|
14
13
|
|
15
14
|
class GPT4AllInstrumentor(BaseInstrumentor):
|
16
15
|
"""
|
17
|
-
An instrumentor for GPT4All
|
16
|
+
An instrumentor for GPT4All client library.
|
18
17
|
"""
|
19
18
|
|
20
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
21
20
|
return _instruments
|
22
21
|
|
23
22
|
def _instrument(self, **kwargs):
|
24
|
-
application_name = kwargs.get("application_name", "
|
25
|
-
environment = kwargs.get("environment", "
|
23
|
+
application_name = kwargs.get("application_name", "default")
|
24
|
+
environment = kwargs.get("environment", "default")
|
26
25
|
tracer = kwargs.get("tracer")
|
27
26
|
metrics = kwargs.get("metrics_dict")
|
28
27
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -46,7 +45,5 @@ class GPT4AllInstrumentor(BaseInstrumentor):
|
|
46
45
|
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
47
46
|
)
|
48
47
|
|
49
|
-
|
50
48
|
def _uninstrument(self, **kwargs):
|
51
|
-
# Proper uninstrumentation logic to revert patched methods
|
52
49
|
pass
|
@@ -2,66 +2,47 @@
|
|
2
2
|
Module for monitoring GPT4All API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
6
|
+
from opentelemetry.trace import SpanKind
|
9
7
|
from openlit.__helpers import (
|
10
8
|
handle_exception,
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
set_server_address_and_port
|
10
|
+
)
|
11
|
+
from openlit.instrumentation.gpt4all.utils import (
|
12
|
+
process_generate_response,
|
13
|
+
process_chunk,
|
14
|
+
process_streaming_generate_response,
|
15
|
+
process_embedding_response
|
16
16
|
)
|
17
17
|
from openlit.semcov import SemanticConvention
|
18
18
|
|
19
|
-
# Initialize logger for logging potential issues and operations
|
20
|
-
logger = logging.getLogger(__name__)
|
21
|
-
|
22
19
|
def generate(version, environment, application_name,
|
23
|
-
|
20
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
21
|
"""
|
25
|
-
Generates a telemetry wrapper for
|
26
|
-
|
27
|
-
Args:
|
28
|
-
version: Version of the monitoring package.
|
29
|
-
environment: Deployment environment (e.g., production, staging).
|
30
|
-
application_name: Name of the application using the GPT4All API.
|
31
|
-
tracer: OpenTelemetry tracer for creating spans.
|
32
|
-
pricing_info: Information used for calculating GPT4All usage.
|
33
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
34
|
-
|
35
|
-
Returns:
|
36
|
-
A function that wraps the chat completions method to add telemetry.
|
22
|
+
Generates a telemetry wrapper for GenAI function call
|
37
23
|
"""
|
38
24
|
|
39
25
|
class TracedSyncStream:
|
40
26
|
"""
|
41
|
-
Wrapper for streaming responses to collect
|
42
|
-
Wraps the response to collect message IDs and aggregated response.
|
43
|
-
|
44
|
-
This class implements the '__aiter__' and '__anext__' methods that
|
45
|
-
handle asynchronous streaming responses.
|
46
|
-
|
47
|
-
This class also implements '__aenter__' and '__aexit__' methods that
|
48
|
-
handle asynchronous context management protocol.
|
27
|
+
Wrapper for streaming responses to collect telemetry.
|
49
28
|
"""
|
29
|
+
|
50
30
|
def __init__(
|
51
31
|
self,
|
52
32
|
wrapped,
|
53
33
|
span,
|
34
|
+
span_name,
|
35
|
+
args,
|
54
36
|
kwargs,
|
55
37
|
server_address,
|
56
38
|
server_port,
|
57
39
|
request_model,
|
58
|
-
**args,
|
59
40
|
):
|
60
41
|
self.__wrapped__ = wrapped
|
61
42
|
self._span = span
|
62
|
-
|
43
|
+
self._span_name = span_name
|
63
44
|
self._llmresponse = ""
|
64
|
-
|
45
|
+
self._request_model = request_model
|
65
46
|
self._args = args
|
66
47
|
self._kwargs = kwargs
|
67
48
|
self._start_time = time.time()
|
@@ -71,7 +52,7 @@ def generate(version, environment, application_name,
|
|
71
52
|
self._tbt = 0
|
72
53
|
self._server_address = server_address
|
73
54
|
self._server_port = server_port
|
74
|
-
self.
|
55
|
+
self._tools = None
|
75
56
|
|
76
57
|
def __enter__(self):
|
77
58
|
self.__wrapped__.__enter__()
|
@@ -90,408 +71,119 @@ def generate(version, environment, application_name,
|
|
90
71
|
def __next__(self):
|
91
72
|
try:
|
92
73
|
chunk = self.__wrapped__.__next__()
|
93
|
-
|
94
|
-
# Record the timestamp for the current chunk
|
95
|
-
self._timestamps.append(end_time)
|
96
|
-
|
97
|
-
if len(self._timestamps) == 1:
|
98
|
-
# Calculate time to first chunk
|
99
|
-
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
100
|
-
|
101
|
-
self._llmresponse += chunk
|
74
|
+
process_chunk(self, chunk)
|
102
75
|
return chunk
|
103
76
|
except StopIteration:
|
104
|
-
# Handling exception ensure LLM observability without disrupting operation
|
105
77
|
try:
|
106
|
-
self.
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
# Set Span attributes (OTel Semconv)
|
118
|
-
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
119
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
120
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
121
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
122
|
-
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
|
123
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
124
|
-
self._request_model)
|
125
|
-
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
126
|
-
self._server_port)
|
127
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
128
|
-
self._kwargs.get("repeat_penalty", 1.18))
|
129
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
130
|
-
self._kwargs.get("max_tokens", 200))
|
131
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
132
|
-
self._kwargs.get("presence_penalty", 0.0))
|
133
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
134
|
-
self._kwargs.get("temp", 0.7))
|
135
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
136
|
-
self._kwargs.get("top_p", 0.4))
|
137
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
138
|
-
self._kwargs.get("top_k", 40))
|
139
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
140
|
-
self._request_model)
|
141
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
142
|
-
input_tokens)
|
143
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
144
|
-
output_tokens)
|
145
|
-
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
146
|
-
self._server_address)
|
147
|
-
if isinstance(self._llmresponse, str):
|
148
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
149
|
-
"text")
|
150
|
-
else:
|
151
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
152
|
-
"json")
|
153
|
-
|
154
|
-
# Set Span attributes (Extra)
|
155
|
-
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
156
|
-
environment)
|
157
|
-
self._span.set_attribute(SERVICE_NAME,
|
158
|
-
application_name)
|
159
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
160
|
-
True)
|
161
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
162
|
-
input_tokens + output_tokens)
|
163
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
164
|
-
self._tbt)
|
165
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
166
|
-
self._ttft)
|
167
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
168
|
-
version)
|
169
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
170
|
-
0)
|
171
|
-
if capture_message_content:
|
172
|
-
self._span.add_event(
|
173
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
174
|
-
attributes={
|
175
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
176
|
-
},
|
177
|
-
)
|
178
|
-
self._span.add_event(
|
179
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
180
|
-
attributes={
|
181
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
182
|
-
},
|
78
|
+
with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
|
79
|
+
process_streaming_generate_response(
|
80
|
+
self,
|
81
|
+
pricing_info=pricing_info,
|
82
|
+
environment=environment,
|
83
|
+
application_name=application_name,
|
84
|
+
metrics=metrics,
|
85
|
+
capture_message_content=capture_message_content,
|
86
|
+
disable_metrics=disable_metrics,
|
87
|
+
version=version
|
183
88
|
)
|
184
89
|
|
185
|
-
self._span.set_status(Status(StatusCode.OK))
|
186
|
-
|
187
|
-
if disable_metrics is False:
|
188
|
-
attributes = create_metrics_attributes(
|
189
|
-
service_name=application_name,
|
190
|
-
deployment_environment=environment,
|
191
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
192
|
-
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
193
|
-
request_model=self._request_model,
|
194
|
-
server_address=self._server_address,
|
195
|
-
server_port=self._server_port,
|
196
|
-
response_model=self._request_model,
|
197
|
-
)
|
198
|
-
|
199
|
-
metrics["genai_client_usage_tokens"].record(
|
200
|
-
input_tokens + output_tokens, attributes
|
201
|
-
)
|
202
|
-
metrics["genai_client_operation_duration"].record(
|
203
|
-
self._end_time - self._start_time, attributes
|
204
|
-
)
|
205
|
-
metrics["genai_server_tbt"].record(
|
206
|
-
self._tbt, attributes
|
207
|
-
)
|
208
|
-
metrics["genai_server_ttft"].record(
|
209
|
-
self._ttft, attributes
|
210
|
-
)
|
211
|
-
metrics["genai_requests"].add(1, attributes)
|
212
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
213
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
214
|
-
metrics["genai_cost"].record(0, attributes)
|
215
|
-
|
216
90
|
except Exception as e:
|
217
91
|
handle_exception(self._span, e)
|
218
|
-
|
219
|
-
finally:
|
220
|
-
self._span.end()
|
92
|
+
|
221
93
|
raise
|
222
94
|
|
223
95
|
def wrapper(wrapped, instance, args, kwargs):
|
224
96
|
"""
|
225
|
-
Wraps the
|
226
|
-
|
227
|
-
This collects metrics such as execution time, and token usage, and handles errors
|
228
|
-
gracefully, adding details to the trace for observability.
|
229
|
-
|
230
|
-
Args:
|
231
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
232
|
-
instance: The instance of the class where the original method is defined.
|
233
|
-
args: Positional arguments for the 'chat.completions' method.
|
234
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
235
|
-
|
236
|
-
Returns:
|
237
|
-
The response from the original 'chat.completions' method.
|
97
|
+
Wraps the GenAI function call.
|
238
98
|
"""
|
239
99
|
|
240
100
|
# Check if streaming is enabled for the API call
|
241
101
|
streaming = kwargs.get("streaming", False)
|
242
102
|
|
243
|
-
server_address, server_port = set_server_address_and_port(instance, "
|
244
|
-
request_model = str(instance.model.model_path).rsplit(
|
103
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
104
|
+
request_model = str(instance.model.model_path).rsplit("/", maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
|
245
105
|
|
246
106
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
247
107
|
|
248
|
-
# pylint: disable=no-else-return
|
249
108
|
if streaming:
|
250
109
|
# Special handling for streaming response to accommodate the nature of data flow
|
251
110
|
awaited_wrapped = wrapped(*args, **kwargs)
|
252
111
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
253
|
-
|
254
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port, request_model)
|
112
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, args, kwargs, server_address, server_port, request_model)
|
255
113
|
|
256
114
|
# Handling for non-streaming responses
|
257
115
|
else:
|
258
|
-
with tracer.start_as_current_span(span_name, kind=
|
116
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
259
117
|
start_time = time.time()
|
260
118
|
response = wrapped(*args, **kwargs)
|
261
|
-
end_time = time.time()
|
262
119
|
|
263
120
|
try:
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
kwargs.get("repeat_penalty", 1.18))
|
282
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
283
|
-
kwargs.get("max_tokens", 200))
|
284
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
285
|
-
kwargs.get("presence_penalty", 0.0))
|
286
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
287
|
-
kwargs.get("temp", 0.7))
|
288
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
289
|
-
kwargs.get("top_p", 0.4))
|
290
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
291
|
-
kwargs.get("top_k", 40))
|
292
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
293
|
-
request_model)
|
294
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
295
|
-
input_tokens)
|
296
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
297
|
-
output_tokens)
|
298
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
299
|
-
server_address)
|
300
|
-
if isinstance(response, str):
|
301
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
302
|
-
"text")
|
303
|
-
else:
|
304
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
305
|
-
"json")
|
306
|
-
|
307
|
-
# Set Span attributes (Extra)
|
308
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
309
|
-
environment)
|
310
|
-
span.set_attribute(SERVICE_NAME,
|
311
|
-
application_name)
|
312
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
313
|
-
False)
|
314
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
315
|
-
input_tokens + output_tokens)
|
316
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
317
|
-
end_time - start_time)
|
318
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
319
|
-
version)
|
320
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
321
|
-
0)
|
322
|
-
if capture_message_content:
|
323
|
-
span.add_event(
|
324
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
325
|
-
attributes={
|
326
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
327
|
-
},
|
328
|
-
)
|
329
|
-
span.add_event(
|
330
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
331
|
-
attributes={
|
332
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response,
|
333
|
-
},
|
334
|
-
)
|
335
|
-
|
336
|
-
span.set_status(Status(StatusCode.OK))
|
337
|
-
|
338
|
-
if disable_metrics is False:
|
339
|
-
attributes = create_metrics_attributes(
|
340
|
-
service_name=application_name,
|
341
|
-
deployment_environment=environment,
|
342
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
343
|
-
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
344
|
-
request_model=request_model,
|
345
|
-
server_address=server_address,
|
346
|
-
server_port=server_port,
|
347
|
-
response_model=request_model,
|
348
|
-
)
|
349
|
-
|
350
|
-
metrics["genai_client_usage_tokens"].record(
|
351
|
-
input_tokens + output_tokens, attributes
|
352
|
-
)
|
353
|
-
metrics["genai_client_operation_duration"].record(
|
354
|
-
end_time - start_time, attributes
|
355
|
-
)
|
356
|
-
metrics["genai_server_ttft"].record(
|
357
|
-
end_time - start_time, attributes
|
358
|
-
)
|
359
|
-
metrics["genai_requests"].add(1, attributes)
|
360
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
361
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
362
|
-
metrics["genai_cost"].record(0, attributes)
|
363
|
-
|
364
|
-
# Return original response
|
365
|
-
return response
|
121
|
+
response = process_generate_response(
|
122
|
+
response=response,
|
123
|
+
request_model=request_model,
|
124
|
+
pricing_info=pricing_info,
|
125
|
+
server_port=server_port,
|
126
|
+
server_address=server_address,
|
127
|
+
environment=environment,
|
128
|
+
application_name=application_name,
|
129
|
+
metrics=metrics,
|
130
|
+
start_time=start_time,
|
131
|
+
span=span,
|
132
|
+
args=args,
|
133
|
+
kwargs=kwargs,
|
134
|
+
capture_message_content=capture_message_content,
|
135
|
+
disable_metrics=disable_metrics,
|
136
|
+
version=version
|
137
|
+
)
|
366
138
|
|
367
139
|
except Exception as e:
|
368
140
|
handle_exception(span, e)
|
369
|
-
logger.error("Error in trace creation: %s", e)
|
370
141
|
|
371
|
-
|
372
|
-
return response
|
142
|
+
return response
|
373
143
|
|
374
144
|
return wrapper
|
375
145
|
|
376
146
|
def embed(version, environment, application_name,
|
377
|
-
|
147
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
378
148
|
"""
|
379
|
-
Generates a telemetry wrapper for
|
380
|
-
|
381
|
-
Args:
|
382
|
-
version: Version of the monitoring package.
|
383
|
-
environment: Deployment environment (e.g., production, staging).
|
384
|
-
application_name: Name of the application using the GPT4All API.
|
385
|
-
tracer: OpenTelemetry tracer for creating spans.
|
386
|
-
pricing_info: Information used for calculating GPT4All usage.
|
387
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
388
|
-
|
389
|
-
Returns:
|
390
|
-
A function that wraps the embeddings method to add telemetry.
|
149
|
+
Generates a telemetry wrapper for GenAI function call
|
391
150
|
"""
|
392
151
|
|
393
152
|
def wrapper(wrapped, instance, args, kwargs):
|
394
153
|
"""
|
395
|
-
Wraps the
|
396
|
-
|
397
|
-
This collects metrics such as execution time, and token usage, and handles errors
|
398
|
-
gracefully, adding details to the trace for observability.
|
399
|
-
|
400
|
-
Args:
|
401
|
-
wrapped: The original 'embeddings' method to be wrapped.
|
402
|
-
instance: The instance of the class where the original method is defined.
|
403
|
-
args: Positional arguments for the 'embeddings' method.
|
404
|
-
kwargs: Keyword arguments for the 'embeddings' method.
|
405
|
-
|
406
|
-
Returns:
|
407
|
-
The response from the original 'embeddings' method.
|
154
|
+
Wraps the GenAI function call.
|
408
155
|
"""
|
409
156
|
|
410
|
-
server_address, server_port = set_server_address_and_port(instance, "
|
411
|
-
|
412
|
-
# pylint: disable=line-too-long
|
413
|
-
request_model = str(instance.gpt4all.model.model_path).rsplit('/', maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
157
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
158
|
+
request_model = str(instance.gpt4all.model.model_path).rsplit("/", maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
414
159
|
|
415
160
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
416
161
|
|
417
|
-
with tracer.start_as_current_span(span_name, kind=
|
162
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
418
163
|
start_time = time.time()
|
419
164
|
response = wrapped(*args, **kwargs)
|
420
|
-
end_time = time.time()
|
421
165
|
|
422
166
|
try:
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
server_port)
|
440
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
441
|
-
input_tokens)
|
442
|
-
|
443
|
-
# Set Span attributes (Extras)
|
444
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
445
|
-
environment)
|
446
|
-
span.set_attribute(SERVICE_NAME,
|
447
|
-
application_name)
|
448
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
449
|
-
input_tokens)
|
450
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
451
|
-
version)
|
452
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
453
|
-
0)
|
454
|
-
|
455
|
-
if capture_message_content:
|
456
|
-
span.add_event(
|
457
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
458
|
-
attributes={
|
459
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
|
460
|
-
},
|
461
|
-
)
|
462
|
-
|
463
|
-
span.set_status(Status(StatusCode.OK))
|
464
|
-
|
465
|
-
if disable_metrics is False:
|
466
|
-
attributes = create_metrics_attributes(
|
467
|
-
service_name=application_name,
|
468
|
-
deployment_environment=environment,
|
469
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
470
|
-
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
471
|
-
request_model=request_model,
|
472
|
-
server_address=server_address,
|
473
|
-
server_port=server_port,
|
474
|
-
response_model=request_model,
|
475
|
-
)
|
476
|
-
metrics["genai_client_usage_tokens"].record(
|
477
|
-
input_tokens, attributes
|
478
|
-
)
|
479
|
-
metrics["genai_client_operation_duration"].record(
|
480
|
-
end_time - start_time, attributes
|
481
|
-
)
|
482
|
-
metrics["genai_requests"].add(1, attributes)
|
483
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
484
|
-
metrics["genai_cost"].record(0, attributes)
|
485
|
-
|
486
|
-
|
487
|
-
# Return original response
|
488
|
-
return response
|
167
|
+
response = process_embedding_response(
|
168
|
+
response=response,
|
169
|
+
request_model=request_model,
|
170
|
+
pricing_info=pricing_info,
|
171
|
+
server_port=server_port,
|
172
|
+
server_address=server_address,
|
173
|
+
environment=environment,
|
174
|
+
application_name=application_name,
|
175
|
+
metrics=metrics,
|
176
|
+
start_time=start_time,
|
177
|
+
span=span,
|
178
|
+
capture_message_content=capture_message_content,
|
179
|
+
disable_metrics=disable_metrics,
|
180
|
+
version=version,
|
181
|
+
**kwargs
|
182
|
+
)
|
489
183
|
|
490
184
|
except Exception as e:
|
491
185
|
handle_exception(span, e)
|
492
|
-
logger.error("Error in trace creation: %s", e)
|
493
186
|
|
494
|
-
|
495
|
-
return response
|
187
|
+
return response
|
496
188
|
|
497
189
|
return wrapper
|
@@ -0,0 +1,281 @@
|
|
1
|
+
"""
|
2
|
+
GPT4All OpenTelemetry instrumentation utility functions
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
|
6
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
|
+
from opentelemetry.trace import Status, StatusCode
|
8
|
+
|
9
|
+
from openlit.__helpers import (
|
10
|
+
calculate_ttft,
|
11
|
+
calculate_tbt,
|
12
|
+
general_tokens,
|
13
|
+
create_metrics_attributes,
|
14
|
+
get_chat_model_cost,
|
15
|
+
get_embed_model_cost,
|
16
|
+
)
|
17
|
+
from openlit.semcov import SemanticConvention
|
18
|
+
|
19
|
+
def format_content(prompt):
|
20
|
+
"""
|
21
|
+
Process a prompt to extract content.
|
22
|
+
"""
|
23
|
+
return str(prompt) if prompt else ""
|
24
|
+
|
25
|
+
def process_chunk(scope, chunk):
|
26
|
+
"""
|
27
|
+
Process a chunk of response data and update state.
|
28
|
+
"""
|
29
|
+
|
30
|
+
end_time = time.time()
|
31
|
+
# Record the timestamp for the current chunk
|
32
|
+
scope._timestamps.append(end_time)
|
33
|
+
|
34
|
+
if len(scope._timestamps) == 1:
|
35
|
+
# Calculate time to first chunk
|
36
|
+
scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
|
37
|
+
|
38
|
+
scope._llmresponse += chunk
|
39
|
+
scope._end_time = time.time()
|
40
|
+
|
41
|
+
def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_address, server_port,
|
42
|
+
request_model, response_model, environment, application_name, is_stream, tbt, ttft, version):
|
43
|
+
"""
|
44
|
+
Set common span attributes for both generate and embed operations.
|
45
|
+
"""
|
46
|
+
|
47
|
+
scope._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
48
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, gen_ai_operation)
|
49
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, gen_ai_system)
|
50
|
+
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
51
|
+
scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
52
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
53
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
|
54
|
+
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
55
|
+
scope._span.set_attribute(SERVICE_NAME, application_name)
|
56
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
57
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
|
58
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
|
59
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
60
|
+
|
61
|
+
def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
62
|
+
request_model, response_model, environment, application_name, start_time, end_time,
|
63
|
+
input_tokens, output_tokens, cost, tbt=None, ttft=None):
|
64
|
+
"""
|
65
|
+
Record completion-specific metrics for the operation.
|
66
|
+
"""
|
67
|
+
|
68
|
+
attributes = create_metrics_attributes(
|
69
|
+
operation=gen_ai_operation,
|
70
|
+
system=gen_ai_system,
|
71
|
+
server_address=server_address,
|
72
|
+
server_port=server_port,
|
73
|
+
request_model=request_model,
|
74
|
+
response_model=response_model,
|
75
|
+
service_name=application_name,
|
76
|
+
deployment_environment=environment,
|
77
|
+
)
|
78
|
+
metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
|
79
|
+
metrics["genai_requests"].add(1, attributes)
|
80
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
81
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
82
|
+
metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
|
83
|
+
metrics["genai_cost"].record(cost, attributes)
|
84
|
+
if tbt is not None:
|
85
|
+
metrics["genai_server_tbt"].record(tbt, attributes)
|
86
|
+
if ttft is not None:
|
87
|
+
metrics["genai_server_ttft"].record(ttft, attributes)
|
88
|
+
|
89
|
+
def record_embedding_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
90
|
+
request_model, response_model, environment, application_name, start_time, end_time,
|
91
|
+
input_tokens, cost):
|
92
|
+
"""
|
93
|
+
Record embedding-specific metrics for the operation.
|
94
|
+
"""
|
95
|
+
|
96
|
+
attributes = create_metrics_attributes(
|
97
|
+
operation=gen_ai_operation,
|
98
|
+
system=gen_ai_system,
|
99
|
+
server_address=server_address,
|
100
|
+
server_port=server_port,
|
101
|
+
request_model=request_model,
|
102
|
+
response_model=response_model,
|
103
|
+
service_name=application_name,
|
104
|
+
deployment_environment=environment,
|
105
|
+
)
|
106
|
+
metrics["genai_client_usage_tokens"].record(input_tokens, attributes)
|
107
|
+
metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
|
108
|
+
metrics["genai_requests"].add(1, attributes)
|
109
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
110
|
+
metrics["genai_cost"].record(cost, attributes)
|
111
|
+
|
112
|
+
def common_generate_logic(scope, pricing_info, environment, application_name, metrics,
|
113
|
+
capture_message_content, disable_metrics, version, is_stream):
|
114
|
+
"""
|
115
|
+
Process generate request and generate Telemetry
|
116
|
+
"""
|
117
|
+
|
118
|
+
if len(scope._timestamps) > 1:
|
119
|
+
scope._tbt = calculate_tbt(scope._timestamps)
|
120
|
+
|
121
|
+
prompt = format_content(scope._kwargs.get("prompt") or (scope._args[0] if scope._args else "") or "")
|
122
|
+
request_model = scope._request_model
|
123
|
+
|
124
|
+
# Calculate tokens using input prompt and aggregated response
|
125
|
+
input_tokens = general_tokens(prompt)
|
126
|
+
output_tokens = general_tokens(scope._llmresponse)
|
127
|
+
|
128
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
129
|
+
|
130
|
+
# Common Span Attributes
|
131
|
+
common_span_attributes(scope,
|
132
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
133
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
134
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
135
|
+
|
136
|
+
# Span Attributes for Request parameters
|
137
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("repeat_penalty", 1.18))
|
138
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", 200))
|
139
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
|
140
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temp", 0.7))
|
141
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 0.4))
|
142
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get("top_k", 40))
|
143
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
|
144
|
+
|
145
|
+
# Span Attributes for Cost and Tokens
|
146
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
147
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
148
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
149
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
150
|
+
|
151
|
+
# Span Attributes for Tools
|
152
|
+
if scope._tools:
|
153
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, scope._tools.get("function","")).get("name","")
|
154
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, str(scope._tools.get("id","")))
|
155
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, str(scope._tools.get("function","").get("arguments","")))
|
156
|
+
|
157
|
+
# Span Attributes for Content
|
158
|
+
if capture_message_content:
|
159
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
160
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
|
161
|
+
|
162
|
+
# To be removed one the change to span_attributes (from span events) is complete
|
163
|
+
scope._span.add_event(
|
164
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
165
|
+
attributes={
|
166
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
167
|
+
},
|
168
|
+
)
|
169
|
+
scope._span.add_event(
|
170
|
+
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
171
|
+
attributes={
|
172
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
|
173
|
+
},
|
174
|
+
)
|
175
|
+
|
176
|
+
scope._span.set_status(Status(StatusCode.OK))
|
177
|
+
|
178
|
+
# Metrics
|
179
|
+
if not disable_metrics:
|
180
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
181
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
182
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
183
|
+
cost, scope._tbt, scope._ttft)
|
184
|
+
|
185
|
+
def common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
|
186
|
+
capture_message_content, disable_metrics, version):
|
187
|
+
"""
|
188
|
+
Process embedding request and generate Telemetry
|
189
|
+
"""
|
190
|
+
|
191
|
+
prompt = format_content(scope._kwargs.get("text") or "")
|
192
|
+
request_model = scope._request_model
|
193
|
+
|
194
|
+
input_tokens = general_tokens(prompt)
|
195
|
+
|
196
|
+
cost = get_embed_model_cost(request_model, pricing_info, input_tokens)
|
197
|
+
|
198
|
+
# Common Span Attributes
|
199
|
+
common_span_attributes(scope,
|
200
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
201
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
202
|
+
environment, application_name, False, scope._tbt, scope._ttft, version)
|
203
|
+
|
204
|
+
# Embedding-specific span attributes
|
205
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
206
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens)
|
207
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
208
|
+
|
209
|
+
# Span Attributes for Content
|
210
|
+
if capture_message_content:
|
211
|
+
scope._span.add_event(
|
212
|
+
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
213
|
+
attributes={
|
214
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(scope._kwargs.get("input", "")),
|
215
|
+
},
|
216
|
+
)
|
217
|
+
|
218
|
+
scope._span.set_status(Status(StatusCode.OK))
|
219
|
+
|
220
|
+
# Metrics
|
221
|
+
if not disable_metrics:
|
222
|
+
record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
223
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
224
|
+
application_name, scope._start_time, scope._end_time, input_tokens, cost)
|
225
|
+
|
226
|
+
def process_streaming_generate_response(scope, pricing_info, environment, application_name, metrics,
|
227
|
+
capture_message_content=False, disable_metrics=False, version=""):
|
228
|
+
"""
|
229
|
+
Process generate request and generate Telemetry
|
230
|
+
"""
|
231
|
+
common_generate_logic(scope, pricing_info, environment, application_name, metrics,
|
232
|
+
capture_message_content, disable_metrics, version, is_stream=True)
|
233
|
+
|
234
|
+
def process_generate_response(response, request_model, pricing_info, server_port, server_address,
|
235
|
+
environment, application_name, metrics, start_time, span, args, kwargs, capture_message_content=False,
|
236
|
+
disable_metrics=False, version="1.0.0"):
|
237
|
+
"""
|
238
|
+
Process generate request and generate Telemetry
|
239
|
+
"""
|
240
|
+
|
241
|
+
scope = type("GenericScope", (), {})()
|
242
|
+
|
243
|
+
scope._start_time = start_time
|
244
|
+
scope._end_time = time.time()
|
245
|
+
scope._span = span
|
246
|
+
scope._llmresponse = str(response)
|
247
|
+
scope._request_model = request_model
|
248
|
+
scope._timestamps = []
|
249
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
250
|
+
scope._server_address, scope._server_port = server_address, server_port
|
251
|
+
scope._kwargs = kwargs
|
252
|
+
scope._args = args
|
253
|
+
scope._tools = None
|
254
|
+
|
255
|
+
common_generate_logic(scope, pricing_info, environment, application_name, metrics,
|
256
|
+
capture_message_content, disable_metrics, version, is_stream=False)
|
257
|
+
|
258
|
+
return response
|
259
|
+
|
260
|
+
def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
|
261
|
+
environment, application_name, metrics, start_time, span, capture_message_content=False,
|
262
|
+
disable_metrics=False, version="1.0.0", **kwargs):
|
263
|
+
"""
|
264
|
+
Process embedding request and generate Telemetry
|
265
|
+
"""
|
266
|
+
|
267
|
+
scope = type("GenericScope", (), {})()
|
268
|
+
|
269
|
+
scope._start_time = start_time
|
270
|
+
scope._end_time = time.time()
|
271
|
+
scope._span = span
|
272
|
+
scope._request_model = request_model
|
273
|
+
scope._timestamps = []
|
274
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
275
|
+
scope._server_address, scope._server_port = server_address, server_port
|
276
|
+
scope._kwargs = kwargs
|
277
|
+
|
278
|
+
common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
|
279
|
+
capture_message_content, disable_metrics, version)
|
280
|
+
|
281
|
+
return response
|
@@ -20,8 +20,8 @@ class PremAIInstrumentor(BaseInstrumentor):
|
|
20
20
|
return _instruments
|
21
21
|
|
22
22
|
def _instrument(self, **kwargs):
|
23
|
-
application_name = kwargs.get("application_name", "
|
24
|
-
environment = kwargs.get("environment", "
|
23
|
+
application_name = kwargs.get("application_name", "default")
|
24
|
+
environment = kwargs.get("environment", "default")
|
25
25
|
tracer = kwargs.get("tracer")
|
26
26
|
metrics = kwargs.get("metrics_dict")
|
27
27
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -64,6 +64,7 @@ def process_chunk(scope, chunk):
|
|
64
64
|
scope._finish_reason = chunked.get("choices")[0].get("finish_reason")
|
65
65
|
scope._response_id = chunked.get("id")
|
66
66
|
scope._response_model = chunked.get("model")
|
67
|
+
scope._end_time = time.time()
|
67
68
|
|
68
69
|
def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_address, server_port,
|
69
70
|
request_model, response_model, environment, application_name, is_stream, tbt, ttft, version):
|
@@ -77,12 +78,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
|
|
77
78
|
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
78
79
|
scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
79
80
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
80
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
81
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
|
81
82
|
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
82
83
|
scope._span.set_attribute(SERVICE_NAME, application_name)
|
83
84
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
84
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
85
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
|
86
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
|
86
87
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
87
88
|
|
88
89
|
def record_completion_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
@@ -48,12 +48,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
|
|
48
48
|
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
49
49
|
scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
50
50
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
51
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
51
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
|
52
52
|
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
53
53
|
scope._span.set_attribute(SERVICE_NAME, application_name)
|
54
54
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
55
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
56
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
55
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
|
56
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
|
57
57
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
58
58
|
|
59
59
|
def record_common_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
@@ -80,12 +80,12 @@ def common_span_attributes(scope, gen_ai_operation, gen_ai_system, server_addres
|
|
80
80
|
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
|
81
81
|
scope._span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
|
82
82
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
|
83
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
83
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_model)
|
84
84
|
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
85
85
|
scope._span.set_attribute(SERVICE_NAME, application_name)
|
86
86
|
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
87
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
88
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, tbt)
|
88
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, ttft)
|
89
89
|
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
90
90
|
|
91
91
|
def record_common_metrics(metrics, gen_ai_operation, gen_ai_system, server_address, server_port,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.34.
|
3
|
+
Version: 1.34.8
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -60,8 +60,9 @@ openlit/instrumentation/google_ai_studio/__init__.py,sha256=VLNOlaTFzjOpuUzloynv
|
|
60
60
|
openlit/instrumentation/google_ai_studio/async_google_ai_studio.py,sha256=UL5AdTwkzdTKUomTfETMgYjUl00qL7BB8U0izuXfKFo,5527
|
61
61
|
openlit/instrumentation/google_ai_studio/google_ai_studio.py,sha256=nanOoXz-1uJtdh39aD438_yMk0no3AM7VVNKzDganHo,5429
|
62
62
|
openlit/instrumentation/google_ai_studio/utils.py,sha256=-X5sHk216ajJrl4cP35f5vT8YAZaIE4yLKI7nWEKHkQ,11140
|
63
|
-
openlit/instrumentation/gpt4all/__init__.py,sha256=
|
64
|
-
openlit/instrumentation/gpt4all/gpt4all.py,sha256=
|
63
|
+
openlit/instrumentation/gpt4all/__init__.py,sha256=kXciJbQMZYnTeAYLCjriVYXV7XzUUQrwEZPmyv1WXxI,1627
|
64
|
+
openlit/instrumentation/gpt4all/gpt4all.py,sha256=6VkJbaPIDv5sbFXFiadH4IB0KljljnOZ1HaGAPuyp_E,6704
|
65
|
+
openlit/instrumentation/gpt4all/utils.py,sha256=VQaQAdAFLQp_5IOC8ioC6GUIpJ-iztJWfTSNRmazxag,12485
|
65
66
|
openlit/instrumentation/gpu/__init__.py,sha256=QQCFVEbRfdeTjmdFe-UeEiy19vEEWSIBpj2B1wYGhUs,11036
|
66
67
|
openlit/instrumentation/groq/__init__.py,sha256=RszPvlPMD1j_uRu9MweyO_F_BRIqEExuB6sVQB2py4o,1901
|
67
68
|
openlit/instrumentation/groq/async_groq.py,sha256=BDiGSS1C5uradPLDyfPCqyLWw7f5Emwe4KA1Zd7nXU8,24770
|
@@ -104,9 +105,9 @@ openlit/instrumentation/phidata/__init__.py,sha256=tqls5-UI6FzbjxYgq_qqAfALhWJm8
|
|
104
105
|
openlit/instrumentation/phidata/phidata.py,sha256=ohrxs6i0Oik75P2BrjNGbK71tdZg94ZMmaXixrXwV5M,4834
|
105
106
|
openlit/instrumentation/pinecone/__init__.py,sha256=0guSEPmObaZiOF8yHExpOGY-qW_egHXfZGog3rKGi8M,2596
|
106
107
|
openlit/instrumentation/pinecone/pinecone.py,sha256=7hVUlC0HOj0yQyvLasfdb6kS46hRJQdoSRzZQ4ixIkk,8850
|
107
|
-
openlit/instrumentation/premai/__init__.py,sha256=
|
108
|
+
openlit/instrumentation/premai/__init__.py,sha256=3YlqyV-eNA_4aVUHDVUQUvGJRW8iVVcRtREw91yhbyw,1728
|
108
109
|
openlit/instrumentation/premai/premai.py,sha256=rWRqfoIZUbTz-M7zgC2Z92gTVv9fCj1Z4iJcsG86YeI,6438
|
109
|
-
openlit/instrumentation/premai/utils.py,sha256=
|
110
|
+
openlit/instrumentation/premai/utils.py,sha256=v2DWazztCuwDMFW1IaWbWlyPLsBq9_2vTyU8zIle-ns,14942
|
110
111
|
openlit/instrumentation/pydantic_ai/__init__.py,sha256=mq52QanFI4xDx6JK-qW5yzhFPXwznJqIYsuxRoBA2Xg,2023
|
111
112
|
openlit/instrumentation/pydantic_ai/pydantic_ai.py,sha256=2F2hrowGqcPjTDLG9IeLY8OO-lXZKhLSU93XtZ3tt5A,1868
|
112
113
|
openlit/instrumentation/pydantic_ai/utils.py,sha256=b0TqhSDnRqkPdM_qsOgMuXT3lwTvHzMYpaBv2qibiVo,4307
|
@@ -116,11 +117,11 @@ openlit/instrumentation/qdrant/qdrant.py,sha256=pafjlAzMPzYLRYFfTtWXsLKYVQls-grk
|
|
116
117
|
openlit/instrumentation/reka/__init__.py,sha256=wI5KUYyTAD8ni4E98uziy9WPqoQqlzybDXanFOqDan0,1720
|
117
118
|
openlit/instrumentation/reka/async_reka.py,sha256=CZk5rr7njThDkmrauRAJmNtMBgsLarTbQ54raPQb92A,1909
|
118
119
|
openlit/instrumentation/reka/reka.py,sha256=wou7vVdN_1Y5UZd4tpkLpTPAtgmAl6gmh_onLn4k4GE,1908
|
119
|
-
openlit/instrumentation/reka/utils.py,sha256=
|
120
|
+
openlit/instrumentation/reka/utils.py,sha256=SmwP52XBcDkgLJrozHvNSqJQMSX_vQcmjpidavjNyq0,9211
|
120
121
|
openlit/instrumentation/together/__init__.py,sha256=0UmUqQtppyK3oopb4lTjX2LITgVCR8VtH46IAV1rpA8,2484
|
121
122
|
openlit/instrumentation/together/async_together.py,sha256=0-h5fKw6rIwN_fvWVpGuvVqizIuM9xFCzz8Z4oGgOj0,6822
|
122
123
|
openlit/instrumentation/together/together.py,sha256=nY6mzHmHgoMbbnB_9eL0EBQjP0ltJVdkQj4pbamHAj0,6723
|
123
|
-
openlit/instrumentation/together/utils.py,sha256=
|
124
|
+
openlit/instrumentation/together/utils.py,sha256=n7r_pM_sqFnJEAkL7OhPydr0Uct0A74vXdcYELdbeW0,14368
|
124
125
|
openlit/instrumentation/transformers/__init__.py,sha256=9Ubss5nlumcypxprxff8Fv3sst7II27SsvCzqkBX9Kg,1457
|
125
126
|
openlit/instrumentation/transformers/transformers.py,sha256=y--t7PXhUfPC81w-aEE7qowMah3os9gnKBQ5bN4QLGc,1980
|
126
127
|
openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-6Vf8K7NEU0EzQ4Nk,8042
|
@@ -134,7 +135,7 @@ openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
|
|
134
135
|
openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
|
135
136
|
openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
|
136
137
|
openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
|
137
|
-
openlit-1.34.
|
138
|
-
openlit-1.34.
|
139
|
-
openlit-1.34.
|
140
|
-
openlit-1.34.
|
138
|
+
openlit-1.34.8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
139
|
+
openlit-1.34.8.dist-info/METADATA,sha256=eAza_iFpQukjA6yYb6qqkLQ9aTms9b7jFjx_5r6sz3U,23469
|
140
|
+
openlit-1.34.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
141
|
+
openlit-1.34.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|