openlit 1.34.7__py3-none-any.whl → 1.34.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +46 -0
- openlit/instrumentation/assemblyai/__init__.py +14 -18
- openlit/instrumentation/assemblyai/assemblyai.py +29 -120
- openlit/instrumentation/assemblyai/utils.py +142 -0
- openlit/instrumentation/elevenlabs/__init__.py +5 -27
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +29 -119
- openlit/instrumentation/elevenlabs/elevenlabs.py +28 -118
- openlit/instrumentation/elevenlabs/utils.py +133 -0
- openlit/instrumentation/gpt4all/__init__.py +3 -6
- openlit/instrumentation/gpt4all/gpt4all.py +75 -383
- openlit/instrumentation/gpt4all/utils.py +281 -0
- openlit/instrumentation/ollama/__init__.py +5 -6
- openlit/instrumentation/ollama/async_ollama.py +65 -62
- openlit/instrumentation/ollama/ollama.py +65 -62
- openlit/instrumentation/ollama/utils.py +180 -239
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/utils.py +4 -3
- openlit/instrumentation/reka/utils.py +3 -3
- openlit/instrumentation/together/utils.py +3 -3
- {openlit-1.34.7.dist-info → openlit-1.34.10.dist-info}/METADATA +1 -1
- {openlit-1.34.7.dist-info → openlit-1.34.10.dist-info}/RECORD +23 -20
- {openlit-1.34.7.dist-info → openlit-1.34.10.dist-info}/LICENSE +0 -0
- {openlit-1.34.7.dist-info → openlit-1.34.10.dist-info}/WHEEL +0 -0
@@ -2,66 +2,47 @@
|
|
2
2
|
Module for monitoring GPT4All API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
6
|
+
from opentelemetry.trace import SpanKind
|
9
7
|
from openlit.__helpers import (
|
10
8
|
handle_exception,
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
set_server_address_and_port
|
10
|
+
)
|
11
|
+
from openlit.instrumentation.gpt4all.utils import (
|
12
|
+
process_generate_response,
|
13
|
+
process_chunk,
|
14
|
+
process_streaming_generate_response,
|
15
|
+
process_embedding_response
|
16
16
|
)
|
17
17
|
from openlit.semcov import SemanticConvention
|
18
18
|
|
19
|
-
# Initialize logger for logging potential issues and operations
|
20
|
-
logger = logging.getLogger(__name__)
|
21
|
-
|
22
19
|
def generate(version, environment, application_name,
|
23
|
-
|
20
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
24
21
|
"""
|
25
|
-
Generates a telemetry wrapper for
|
26
|
-
|
27
|
-
Args:
|
28
|
-
version: Version of the monitoring package.
|
29
|
-
environment: Deployment environment (e.g., production, staging).
|
30
|
-
application_name: Name of the application using the GPT4All API.
|
31
|
-
tracer: OpenTelemetry tracer for creating spans.
|
32
|
-
pricing_info: Information used for calculating GPT4All usage.
|
33
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
34
|
-
|
35
|
-
Returns:
|
36
|
-
A function that wraps the chat completions method to add telemetry.
|
22
|
+
Generates a telemetry wrapper for GenAI function call
|
37
23
|
"""
|
38
24
|
|
39
25
|
class TracedSyncStream:
|
40
26
|
"""
|
41
|
-
Wrapper for streaming responses to collect
|
42
|
-
Wraps the response to collect message IDs and aggregated response.
|
43
|
-
|
44
|
-
This class implements the '__aiter__' and '__anext__' methods that
|
45
|
-
handle asynchronous streaming responses.
|
46
|
-
|
47
|
-
This class also implements '__aenter__' and '__aexit__' methods that
|
48
|
-
handle asynchronous context management protocol.
|
27
|
+
Wrapper for streaming responses to collect telemetry.
|
49
28
|
"""
|
29
|
+
|
50
30
|
def __init__(
|
51
31
|
self,
|
52
32
|
wrapped,
|
53
33
|
span,
|
34
|
+
span_name,
|
35
|
+
args,
|
54
36
|
kwargs,
|
55
37
|
server_address,
|
56
38
|
server_port,
|
57
39
|
request_model,
|
58
|
-
**args,
|
59
40
|
):
|
60
41
|
self.__wrapped__ = wrapped
|
61
42
|
self._span = span
|
62
|
-
|
43
|
+
self._span_name = span_name
|
63
44
|
self._llmresponse = ""
|
64
|
-
|
45
|
+
self._request_model = request_model
|
65
46
|
self._args = args
|
66
47
|
self._kwargs = kwargs
|
67
48
|
self._start_time = time.time()
|
@@ -71,7 +52,7 @@ def generate(version, environment, application_name,
|
|
71
52
|
self._tbt = 0
|
72
53
|
self._server_address = server_address
|
73
54
|
self._server_port = server_port
|
74
|
-
self.
|
55
|
+
self._tools = None
|
75
56
|
|
76
57
|
def __enter__(self):
|
77
58
|
self.__wrapped__.__enter__()
|
@@ -90,408 +71,119 @@ def generate(version, environment, application_name,
|
|
90
71
|
def __next__(self):
|
91
72
|
try:
|
92
73
|
chunk = self.__wrapped__.__next__()
|
93
|
-
|
94
|
-
# Record the timestamp for the current chunk
|
95
|
-
self._timestamps.append(end_time)
|
96
|
-
|
97
|
-
if len(self._timestamps) == 1:
|
98
|
-
# Calculate time to first chunk
|
99
|
-
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
100
|
-
|
101
|
-
self._llmresponse += chunk
|
74
|
+
process_chunk(self, chunk)
|
102
75
|
return chunk
|
103
76
|
except StopIteration:
|
104
|
-
# Handling exception ensure LLM observability without disrupting operation
|
105
77
|
try:
|
106
|
-
self.
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
# Set Span attributes (OTel Semconv)
|
118
|
-
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
119
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
120
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
|
121
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
|
122
|
-
SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
|
123
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
|
124
|
-
self._request_model)
|
125
|
-
self._span.set_attribute(SemanticConvention.SERVER_PORT,
|
126
|
-
self._server_port)
|
127
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
128
|
-
self._kwargs.get("repeat_penalty", 1.18))
|
129
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
130
|
-
self._kwargs.get("max_tokens", 200))
|
131
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
132
|
-
self._kwargs.get("presence_penalty", 0.0))
|
133
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
134
|
-
self._kwargs.get("temp", 0.7))
|
135
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
136
|
-
self._kwargs.get("top_p", 0.4))
|
137
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
138
|
-
self._kwargs.get("top_k", 40))
|
139
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
140
|
-
self._request_model)
|
141
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
142
|
-
input_tokens)
|
143
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
144
|
-
output_tokens)
|
145
|
-
self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
146
|
-
self._server_address)
|
147
|
-
if isinstance(self._llmresponse, str):
|
148
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
149
|
-
"text")
|
150
|
-
else:
|
151
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
152
|
-
"json")
|
153
|
-
|
154
|
-
# Set Span attributes (Extra)
|
155
|
-
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
156
|
-
environment)
|
157
|
-
self._span.set_attribute(SERVICE_NAME,
|
158
|
-
application_name)
|
159
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
160
|
-
True)
|
161
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
162
|
-
input_tokens + output_tokens)
|
163
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
|
164
|
-
self._tbt)
|
165
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
166
|
-
self._ttft)
|
167
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
168
|
-
version)
|
169
|
-
self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
170
|
-
0)
|
171
|
-
if capture_message_content:
|
172
|
-
self._span.add_event(
|
173
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
174
|
-
attributes={
|
175
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
176
|
-
},
|
177
|
-
)
|
178
|
-
self._span.add_event(
|
179
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
180
|
-
attributes={
|
181
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
182
|
-
},
|
78
|
+
with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
|
79
|
+
process_streaming_generate_response(
|
80
|
+
self,
|
81
|
+
pricing_info=pricing_info,
|
82
|
+
environment=environment,
|
83
|
+
application_name=application_name,
|
84
|
+
metrics=metrics,
|
85
|
+
capture_message_content=capture_message_content,
|
86
|
+
disable_metrics=disable_metrics,
|
87
|
+
version=version
|
183
88
|
)
|
184
89
|
|
185
|
-
self._span.set_status(Status(StatusCode.OK))
|
186
|
-
|
187
|
-
if disable_metrics is False:
|
188
|
-
attributes = create_metrics_attributes(
|
189
|
-
service_name=application_name,
|
190
|
-
deployment_environment=environment,
|
191
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
192
|
-
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
193
|
-
request_model=self._request_model,
|
194
|
-
server_address=self._server_address,
|
195
|
-
server_port=self._server_port,
|
196
|
-
response_model=self._request_model,
|
197
|
-
)
|
198
|
-
|
199
|
-
metrics["genai_client_usage_tokens"].record(
|
200
|
-
input_tokens + output_tokens, attributes
|
201
|
-
)
|
202
|
-
metrics["genai_client_operation_duration"].record(
|
203
|
-
self._end_time - self._start_time, attributes
|
204
|
-
)
|
205
|
-
metrics["genai_server_tbt"].record(
|
206
|
-
self._tbt, attributes
|
207
|
-
)
|
208
|
-
metrics["genai_server_ttft"].record(
|
209
|
-
self._ttft, attributes
|
210
|
-
)
|
211
|
-
metrics["genai_requests"].add(1, attributes)
|
212
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
213
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
214
|
-
metrics["genai_cost"].record(0, attributes)
|
215
|
-
|
216
90
|
except Exception as e:
|
217
91
|
handle_exception(self._span, e)
|
218
|
-
|
219
|
-
finally:
|
220
|
-
self._span.end()
|
92
|
+
|
221
93
|
raise
|
222
94
|
|
223
95
|
def wrapper(wrapped, instance, args, kwargs):
|
224
96
|
"""
|
225
|
-
Wraps the
|
226
|
-
|
227
|
-
This collects metrics such as execution time, and token usage, and handles errors
|
228
|
-
gracefully, adding details to the trace for observability.
|
229
|
-
|
230
|
-
Args:
|
231
|
-
wrapped: The original 'chat.completions' method to be wrapped.
|
232
|
-
instance: The instance of the class where the original method is defined.
|
233
|
-
args: Positional arguments for the 'chat.completions' method.
|
234
|
-
kwargs: Keyword arguments for the 'chat.completions' method.
|
235
|
-
|
236
|
-
Returns:
|
237
|
-
The response from the original 'chat.completions' method.
|
97
|
+
Wraps the GenAI function call.
|
238
98
|
"""
|
239
99
|
|
240
100
|
# Check if streaming is enabled for the API call
|
241
101
|
streaming = kwargs.get("streaming", False)
|
242
102
|
|
243
|
-
server_address, server_port = set_server_address_and_port(instance, "
|
244
|
-
request_model = str(instance.model.model_path).rsplit(
|
103
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
104
|
+
request_model = str(instance.model.model_path).rsplit("/", maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
|
245
105
|
|
246
106
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
247
107
|
|
248
|
-
# pylint: disable=no-else-return
|
249
108
|
if streaming:
|
250
109
|
# Special handling for streaming response to accommodate the nature of data flow
|
251
110
|
awaited_wrapped = wrapped(*args, **kwargs)
|
252
111
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
253
|
-
|
254
|
-
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port, request_model)
|
112
|
+
return TracedSyncStream(awaited_wrapped, span, span_name, args, kwargs, server_address, server_port, request_model)
|
255
113
|
|
256
114
|
# Handling for non-streaming responses
|
257
115
|
else:
|
258
|
-
with tracer.start_as_current_span(span_name, kind=
|
116
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
259
117
|
start_time = time.time()
|
260
118
|
response = wrapped(*args, **kwargs)
|
261
|
-
end_time = time.time()
|
262
119
|
|
263
120
|
try:
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
kwargs.get("repeat_penalty", 1.18))
|
282
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
283
|
-
kwargs.get("max_tokens", 200))
|
284
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
285
|
-
kwargs.get("presence_penalty", 0.0))
|
286
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
287
|
-
kwargs.get("temp", 0.7))
|
288
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
289
|
-
kwargs.get("top_p", 0.4))
|
290
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
291
|
-
kwargs.get("top_k", 40))
|
292
|
-
span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
|
293
|
-
request_model)
|
294
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
295
|
-
input_tokens)
|
296
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
297
|
-
output_tokens)
|
298
|
-
span.set_attribute(SemanticConvention.SERVER_ADDRESS,
|
299
|
-
server_address)
|
300
|
-
if isinstance(response, str):
|
301
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
302
|
-
"text")
|
303
|
-
else:
|
304
|
-
span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
305
|
-
"json")
|
306
|
-
|
307
|
-
# Set Span attributes (Extra)
|
308
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
309
|
-
environment)
|
310
|
-
span.set_attribute(SERVICE_NAME,
|
311
|
-
application_name)
|
312
|
-
span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
|
313
|
-
False)
|
314
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
315
|
-
input_tokens + output_tokens)
|
316
|
-
span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
|
317
|
-
end_time - start_time)
|
318
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
319
|
-
version)
|
320
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
321
|
-
0)
|
322
|
-
if capture_message_content:
|
323
|
-
span.add_event(
|
324
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
325
|
-
attributes={
|
326
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
327
|
-
},
|
328
|
-
)
|
329
|
-
span.add_event(
|
330
|
-
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
331
|
-
attributes={
|
332
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: response,
|
333
|
-
},
|
334
|
-
)
|
335
|
-
|
336
|
-
span.set_status(Status(StatusCode.OK))
|
337
|
-
|
338
|
-
if disable_metrics is False:
|
339
|
-
attributes = create_metrics_attributes(
|
340
|
-
service_name=application_name,
|
341
|
-
deployment_environment=environment,
|
342
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
343
|
-
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
344
|
-
request_model=request_model,
|
345
|
-
server_address=server_address,
|
346
|
-
server_port=server_port,
|
347
|
-
response_model=request_model,
|
348
|
-
)
|
349
|
-
|
350
|
-
metrics["genai_client_usage_tokens"].record(
|
351
|
-
input_tokens + output_tokens, attributes
|
352
|
-
)
|
353
|
-
metrics["genai_client_operation_duration"].record(
|
354
|
-
end_time - start_time, attributes
|
355
|
-
)
|
356
|
-
metrics["genai_server_ttft"].record(
|
357
|
-
end_time - start_time, attributes
|
358
|
-
)
|
359
|
-
metrics["genai_requests"].add(1, attributes)
|
360
|
-
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
361
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
362
|
-
metrics["genai_cost"].record(0, attributes)
|
363
|
-
|
364
|
-
# Return original response
|
365
|
-
return response
|
121
|
+
response = process_generate_response(
|
122
|
+
response=response,
|
123
|
+
request_model=request_model,
|
124
|
+
pricing_info=pricing_info,
|
125
|
+
server_port=server_port,
|
126
|
+
server_address=server_address,
|
127
|
+
environment=environment,
|
128
|
+
application_name=application_name,
|
129
|
+
metrics=metrics,
|
130
|
+
start_time=start_time,
|
131
|
+
span=span,
|
132
|
+
args=args,
|
133
|
+
kwargs=kwargs,
|
134
|
+
capture_message_content=capture_message_content,
|
135
|
+
disable_metrics=disable_metrics,
|
136
|
+
version=version
|
137
|
+
)
|
366
138
|
|
367
139
|
except Exception as e:
|
368
140
|
handle_exception(span, e)
|
369
|
-
logger.error("Error in trace creation: %s", e)
|
370
141
|
|
371
|
-
|
372
|
-
return response
|
142
|
+
return response
|
373
143
|
|
374
144
|
return wrapper
|
375
145
|
|
376
146
|
def embed(version, environment, application_name,
|
377
|
-
|
147
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
378
148
|
"""
|
379
|
-
Generates a telemetry wrapper for
|
380
|
-
|
381
|
-
Args:
|
382
|
-
version: Version of the monitoring package.
|
383
|
-
environment: Deployment environment (e.g., production, staging).
|
384
|
-
application_name: Name of the application using the GPT4All API.
|
385
|
-
tracer: OpenTelemetry tracer for creating spans.
|
386
|
-
pricing_info: Information used for calculating GPT4All usage.
|
387
|
-
capture_message_content: Flag indicating whether to trace the actual content.
|
388
|
-
|
389
|
-
Returns:
|
390
|
-
A function that wraps the embeddings method to add telemetry.
|
149
|
+
Generates a telemetry wrapper for GenAI function call
|
391
150
|
"""
|
392
151
|
|
393
152
|
def wrapper(wrapped, instance, args, kwargs):
|
394
153
|
"""
|
395
|
-
Wraps the
|
396
|
-
|
397
|
-
This collects metrics such as execution time, and token usage, and handles errors
|
398
|
-
gracefully, adding details to the trace for observability.
|
399
|
-
|
400
|
-
Args:
|
401
|
-
wrapped: The original 'embeddings' method to be wrapped.
|
402
|
-
instance: The instance of the class where the original method is defined.
|
403
|
-
args: Positional arguments for the 'embeddings' method.
|
404
|
-
kwargs: Keyword arguments for the 'embeddings' method.
|
405
|
-
|
406
|
-
Returns:
|
407
|
-
The response from the original 'embeddings' method.
|
154
|
+
Wraps the GenAI function call.
|
408
155
|
"""
|
409
156
|
|
410
|
-
server_address, server_port = set_server_address_and_port(instance, "
|
411
|
-
|
412
|
-
# pylint: disable=line-too-long
|
413
|
-
request_model = str(instance.gpt4all.model.model_path).rsplit('/', maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
157
|
+
server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
|
158
|
+
request_model = str(instance.gpt4all.model.model_path).rsplit("/", maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
414
159
|
|
415
160
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
416
161
|
|
417
|
-
with tracer.start_as_current_span(span_name, kind=
|
162
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
418
163
|
start_time = time.time()
|
419
164
|
response = wrapped(*args, **kwargs)
|
420
|
-
end_time = time.time()
|
421
165
|
|
422
166
|
try:
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
server_port)
|
440
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
441
|
-
input_tokens)
|
442
|
-
|
443
|
-
# Set Span attributes (Extras)
|
444
|
-
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
445
|
-
environment)
|
446
|
-
span.set_attribute(SERVICE_NAME,
|
447
|
-
application_name)
|
448
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
|
449
|
-
input_tokens)
|
450
|
-
span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
|
451
|
-
version)
|
452
|
-
span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
|
453
|
-
0)
|
454
|
-
|
455
|
-
if capture_message_content:
|
456
|
-
span.add_event(
|
457
|
-
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
458
|
-
attributes={
|
459
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
|
460
|
-
},
|
461
|
-
)
|
462
|
-
|
463
|
-
span.set_status(Status(StatusCode.OK))
|
464
|
-
|
465
|
-
if disable_metrics is False:
|
466
|
-
attributes = create_metrics_attributes(
|
467
|
-
service_name=application_name,
|
468
|
-
deployment_environment=environment,
|
469
|
-
operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
470
|
-
system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
|
471
|
-
request_model=request_model,
|
472
|
-
server_address=server_address,
|
473
|
-
server_port=server_port,
|
474
|
-
response_model=request_model,
|
475
|
-
)
|
476
|
-
metrics["genai_client_usage_tokens"].record(
|
477
|
-
input_tokens, attributes
|
478
|
-
)
|
479
|
-
metrics["genai_client_operation_duration"].record(
|
480
|
-
end_time - start_time, attributes
|
481
|
-
)
|
482
|
-
metrics["genai_requests"].add(1, attributes)
|
483
|
-
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
484
|
-
metrics["genai_cost"].record(0, attributes)
|
485
|
-
|
486
|
-
|
487
|
-
# Return original response
|
488
|
-
return response
|
167
|
+
response = process_embedding_response(
|
168
|
+
response=response,
|
169
|
+
request_model=request_model,
|
170
|
+
pricing_info=pricing_info,
|
171
|
+
server_port=server_port,
|
172
|
+
server_address=server_address,
|
173
|
+
environment=environment,
|
174
|
+
application_name=application_name,
|
175
|
+
metrics=metrics,
|
176
|
+
start_time=start_time,
|
177
|
+
span=span,
|
178
|
+
capture_message_content=capture_message_content,
|
179
|
+
disable_metrics=disable_metrics,
|
180
|
+
version=version,
|
181
|
+
**kwargs
|
182
|
+
)
|
489
183
|
|
490
184
|
except Exception as e:
|
491
185
|
handle_exception(span, e)
|
492
|
-
logger.error("Error in trace creation: %s", e)
|
493
186
|
|
494
|
-
|
495
|
-
return response
|
187
|
+
return response
|
496
188
|
|
497
189
|
return wrapper
|