openlit 1.34.5__py3-none-any.whl → 1.34.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of GPT4All Functions"""
3
2
 
4
3
  from typing import Collection
@@ -14,15 +13,15 @@ _instruments = ("gpt4all >= 2.6.0",)
14
13
 
15
14
  class GPT4AllInstrumentor(BaseInstrumentor):
16
15
  """
17
- An instrumentor for GPT4All's client library.
16
+ An instrumentor for GPT4All client library.
18
17
  """
19
18
 
20
19
  def instrumentation_dependencies(self) -> Collection[str]:
21
20
  return _instruments
22
21
 
23
22
  def _instrument(self, **kwargs):
24
- application_name = kwargs.get("application_name", "default_application")
25
- environment = kwargs.get("environment", "default_environment")
23
+ application_name = kwargs.get("application_name", "default")
24
+ environment = kwargs.get("environment", "default")
26
25
  tracer = kwargs.get("tracer")
27
26
  metrics = kwargs.get("metrics_dict")
28
27
  pricing_info = kwargs.get("pricing_info", {})
@@ -46,7 +45,5 @@ class GPT4AllInstrumentor(BaseInstrumentor):
46
45
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
47
46
  )
48
47
 
49
-
50
48
  def _uninstrument(self, **kwargs):
51
- # Proper uninstrumentation logic to revert patched methods
52
49
  pass
@@ -2,66 +2,47 @@
2
2
  Module for monitoring GPT4All API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
8
  handle_exception,
11
- general_tokens,
12
- create_metrics_attributes,
13
- set_server_address_and_port,
14
- calculate_tbt,
15
- calculate_ttft
9
+ set_server_address_and_port
10
+ )
11
+ from openlit.instrumentation.gpt4all.utils import (
12
+ process_generate_response,
13
+ process_chunk,
14
+ process_streaming_generate_response,
15
+ process_embedding_response
16
16
  )
17
17
  from openlit.semcov import SemanticConvention
18
18
 
19
- # Initialize logger for logging potential issues and operations
20
- logger = logging.getLogger(__name__)
21
-
22
19
  def generate(version, environment, application_name,
23
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
20
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
24
21
  """
25
- Generates a telemetry wrapper for chat completions to collect metrics.
26
-
27
- Args:
28
- version: Version of the monitoring package.
29
- environment: Deployment environment (e.g., production, staging).
30
- application_name: Name of the application using the GPT4All API.
31
- tracer: OpenTelemetry tracer for creating spans.
32
- pricing_info: Information used for calculating GPT4All usage.
33
- capture_message_content: Flag indicating whether to trace the actual content.
34
-
35
- Returns:
36
- A function that wraps the chat completions method to add telemetry.
22
+ Generates a telemetry wrapper for GenAI function call
37
23
  """
38
24
 
39
25
  class TracedSyncStream:
40
26
  """
41
- Wrapper for streaming responses to collect metrics and trace data.
42
- Wraps the response to collect message IDs and aggregated response.
43
-
44
- This class implements the '__aiter__' and '__anext__' methods that
45
- handle asynchronous streaming responses.
46
-
47
- This class also implements '__aenter__' and '__aexit__' methods that
48
- handle asynchronous context management protocol.
27
+ Wrapper for streaming responses to collect telemetry.
49
28
  """
29
+
50
30
  def __init__(
51
31
  self,
52
32
  wrapped,
53
33
  span,
34
+ span_name,
35
+ args,
54
36
  kwargs,
55
37
  server_address,
56
38
  server_port,
57
39
  request_model,
58
- **args,
59
40
  ):
60
41
  self.__wrapped__ = wrapped
61
42
  self._span = span
62
- # Placeholder for aggregating streaming response
43
+ self._span_name = span_name
63
44
  self._llmresponse = ""
64
-
45
+ self._request_model = request_model
65
46
  self._args = args
66
47
  self._kwargs = kwargs
67
48
  self._start_time = time.time()
@@ -71,7 +52,7 @@ def generate(version, environment, application_name,
71
52
  self._tbt = 0
72
53
  self._server_address = server_address
73
54
  self._server_port = server_port
74
- self._request_model = request_model
55
+ self._tools = None
75
56
 
76
57
  def __enter__(self):
77
58
  self.__wrapped__.__enter__()
@@ -90,408 +71,119 @@ def generate(version, environment, application_name,
90
71
  def __next__(self):
91
72
  try:
92
73
  chunk = self.__wrapped__.__next__()
93
- end_time = time.time()
94
- # Record the timestamp for the current chunk
95
- self._timestamps.append(end_time)
96
-
97
- if len(self._timestamps) == 1:
98
- # Calculate time to first chunk
99
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
100
-
101
- self._llmresponse += chunk
74
+ process_chunk(self, chunk)
102
75
  return chunk
103
76
  except StopIteration:
104
- # Handling exception ensure LLM observability without disrupting operation
105
77
  try:
106
- self._end_time = time.time()
107
-
108
- if len(self._timestamps) > 1:
109
- self._tbt = calculate_tbt(self._timestamps)
110
-
111
- prompt = self._kwargs.get("prompt") or self._args[0] or ""
112
-
113
- # Calculate tokens using input prompt and aggregated response
114
- input_tokens = general_tokens(prompt)
115
- output_tokens = general_tokens(self._llmresponse)
116
-
117
- # Set Span attributes (OTel Semconv)
118
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
119
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
120
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
121
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
122
- SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
123
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
124
- self._request_model)
125
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
126
- self._server_port)
127
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
128
- self._kwargs.get("repeat_penalty", 1.18))
129
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
130
- self._kwargs.get("max_tokens", 200))
131
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
132
- self._kwargs.get("presence_penalty", 0.0))
133
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
134
- self._kwargs.get("temp", 0.7))
135
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
136
- self._kwargs.get("top_p", 0.4))
137
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
138
- self._kwargs.get("top_k", 40))
139
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
140
- self._request_model)
141
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
142
- input_tokens)
143
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
144
- output_tokens)
145
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
146
- self._server_address)
147
- if isinstance(self._llmresponse, str):
148
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
149
- "text")
150
- else:
151
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
152
- "json")
153
-
154
- # Set Span attributes (Extra)
155
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
156
- environment)
157
- self._span.set_attribute(SERVICE_NAME,
158
- application_name)
159
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
160
- True)
161
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
162
- input_tokens + output_tokens)
163
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
164
- self._tbt)
165
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
166
- self._ttft)
167
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
168
- version)
169
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
170
- 0)
171
- if capture_message_content:
172
- self._span.add_event(
173
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
174
- attributes={
175
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
176
- },
177
- )
178
- self._span.add_event(
179
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
180
- attributes={
181
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
182
- },
78
+ with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
79
+ process_streaming_generate_response(
80
+ self,
81
+ pricing_info=pricing_info,
82
+ environment=environment,
83
+ application_name=application_name,
84
+ metrics=metrics,
85
+ capture_message_content=capture_message_content,
86
+ disable_metrics=disable_metrics,
87
+ version=version
183
88
  )
184
89
 
185
- self._span.set_status(Status(StatusCode.OK))
186
-
187
- if disable_metrics is False:
188
- attributes = create_metrics_attributes(
189
- service_name=application_name,
190
- deployment_environment=environment,
191
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
192
- system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
193
- request_model=self._request_model,
194
- server_address=self._server_address,
195
- server_port=self._server_port,
196
- response_model=self._request_model,
197
- )
198
-
199
- metrics["genai_client_usage_tokens"].record(
200
- input_tokens + output_tokens, attributes
201
- )
202
- metrics["genai_client_operation_duration"].record(
203
- self._end_time - self._start_time, attributes
204
- )
205
- metrics["genai_server_tbt"].record(
206
- self._tbt, attributes
207
- )
208
- metrics["genai_server_ttft"].record(
209
- self._ttft, attributes
210
- )
211
- metrics["genai_requests"].add(1, attributes)
212
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
213
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
214
- metrics["genai_cost"].record(0, attributes)
215
-
216
90
  except Exception as e:
217
91
  handle_exception(self._span, e)
218
- logger.error("Error in trace creation: %s", e)
219
- finally:
220
- self._span.end()
92
+
221
93
  raise
222
94
 
223
95
  def wrapper(wrapped, instance, args, kwargs):
224
96
  """
225
- Wraps the 'chat.completions' API call to add telemetry.
226
-
227
- This collects metrics such as execution time, and token usage, and handles errors
228
- gracefully, adding details to the trace for observability.
229
-
230
- Args:
231
- wrapped: The original 'chat.completions' method to be wrapped.
232
- instance: The instance of the class where the original method is defined.
233
- args: Positional arguments for the 'chat.completions' method.
234
- kwargs: Keyword arguments for the 'chat.completions' method.
235
-
236
- Returns:
237
- The response from the original 'chat.completions' method.
97
+ Wraps the GenAI function call.
238
98
  """
239
99
 
240
100
  # Check if streaming is enabled for the API call
241
101
  streaming = kwargs.get("streaming", False)
242
102
 
243
- server_address, server_port = set_server_address_and_port(instance, "localhost", 80)
244
- request_model = str(instance.model.model_path).rsplit('/', maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
103
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
104
+ request_model = str(instance.model.model_path).rsplit("/", maxsplit=1)[-1] or "orca-mini-3b-gguf2-q4_0.gguf"
245
105
 
246
106
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
247
107
 
248
- # pylint: disable=no-else-return
249
108
  if streaming:
250
109
  # Special handling for streaming response to accommodate the nature of data flow
251
110
  awaited_wrapped = wrapped(*args, **kwargs)
252
111
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
253
-
254
- return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port, request_model)
112
+ return TracedSyncStream(awaited_wrapped, span, span_name, args, kwargs, server_address, server_port, request_model)
255
113
 
256
114
  # Handling for non-streaming responses
257
115
  else:
258
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
116
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
259
117
  start_time = time.time()
260
118
  response = wrapped(*args, **kwargs)
261
- end_time = time.time()
262
119
 
263
120
  try:
264
- prompt = kwargs.get("prompt") or args[0] or ""
265
-
266
- # Calculate tokens using input prompt and aggregated response
267
- input_tokens = general_tokens(str(prompt))
268
- output_tokens = general_tokens(str(response))
269
-
270
- # Set Span attributes (OTel Semconv)
271
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
272
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
273
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
274
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
275
- SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
276
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
277
- request_model)
278
- span.set_attribute(SemanticConvention.SERVER_PORT,
279
- server_port)
280
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
281
- kwargs.get("repeat_penalty", 1.18))
282
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
283
- kwargs.get("max_tokens", 200))
284
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
285
- kwargs.get("presence_penalty", 0.0))
286
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
287
- kwargs.get("temp", 0.7))
288
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
289
- kwargs.get("top_p", 0.4))
290
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
291
- kwargs.get("top_k", 40))
292
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
293
- request_model)
294
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
295
- input_tokens)
296
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
297
- output_tokens)
298
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
299
- server_address)
300
- if isinstance(response, str):
301
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
302
- "text")
303
- else:
304
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
305
- "json")
306
-
307
- # Set Span attributes (Extra)
308
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
309
- environment)
310
- span.set_attribute(SERVICE_NAME,
311
- application_name)
312
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
313
- False)
314
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
315
- input_tokens + output_tokens)
316
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
317
- end_time - start_time)
318
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
319
- version)
320
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
321
- 0)
322
- if capture_message_content:
323
- span.add_event(
324
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
325
- attributes={
326
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
327
- },
328
- )
329
- span.add_event(
330
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
331
- attributes={
332
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: response,
333
- },
334
- )
335
-
336
- span.set_status(Status(StatusCode.OK))
337
-
338
- if disable_metrics is False:
339
- attributes = create_metrics_attributes(
340
- service_name=application_name,
341
- deployment_environment=environment,
342
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
343
- system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
344
- request_model=request_model,
345
- server_address=server_address,
346
- server_port=server_port,
347
- response_model=request_model,
348
- )
349
-
350
- metrics["genai_client_usage_tokens"].record(
351
- input_tokens + output_tokens, attributes
352
- )
353
- metrics["genai_client_operation_duration"].record(
354
- end_time - start_time, attributes
355
- )
356
- metrics["genai_server_ttft"].record(
357
- end_time - start_time, attributes
358
- )
359
- metrics["genai_requests"].add(1, attributes)
360
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
361
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
362
- metrics["genai_cost"].record(0, attributes)
363
-
364
- # Return original response
365
- return response
121
+ response = process_generate_response(
122
+ response=response,
123
+ request_model=request_model,
124
+ pricing_info=pricing_info,
125
+ server_port=server_port,
126
+ server_address=server_address,
127
+ environment=environment,
128
+ application_name=application_name,
129
+ metrics=metrics,
130
+ start_time=start_time,
131
+ span=span,
132
+ args=args,
133
+ kwargs=kwargs,
134
+ capture_message_content=capture_message_content,
135
+ disable_metrics=disable_metrics,
136
+ version=version
137
+ )
366
138
 
367
139
  except Exception as e:
368
140
  handle_exception(span, e)
369
- logger.error("Error in trace creation: %s", e)
370
141
 
371
- # Return original response
372
- return response
142
+ return response
373
143
 
374
144
  return wrapper
375
145
 
376
146
  def embed(version, environment, application_name,
377
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
147
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
378
148
  """
379
- Generates a telemetry wrapper for embeddings to collect metrics.
380
-
381
- Args:
382
- version: Version of the monitoring package.
383
- environment: Deployment environment (e.g., production, staging).
384
- application_name: Name of the application using the GPT4All API.
385
- tracer: OpenTelemetry tracer for creating spans.
386
- pricing_info: Information used for calculating GPT4All usage.
387
- capture_message_content: Flag indicating whether to trace the actual content.
388
-
389
- Returns:
390
- A function that wraps the embeddings method to add telemetry.
149
+ Generates a telemetry wrapper for GenAI function call
391
150
  """
392
151
 
393
152
  def wrapper(wrapped, instance, args, kwargs):
394
153
  """
395
- Wraps the 'embeddings' API call to add telemetry.
396
-
397
- This collects metrics such as execution time, and token usage, and handles errors
398
- gracefully, adding details to the trace for observability.
399
-
400
- Args:
401
- wrapped: The original 'embeddings' method to be wrapped.
402
- instance: The instance of the class where the original method is defined.
403
- args: Positional arguments for the 'embeddings' method.
404
- kwargs: Keyword arguments for the 'embeddings' method.
405
-
406
- Returns:
407
- The response from the original 'embeddings' method.
154
+ Wraps the GenAI function call.
408
155
  """
409
156
 
410
- server_address, server_port = set_server_address_and_port(instance, "localhost", 80)
411
-
412
- # pylint: disable=line-too-long
413
- request_model = str(instance.gpt4all.model.model_path).rsplit('/', maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
157
+ server_address, server_port = set_server_address_and_port(instance, "127.0.0.1", 80)
158
+ request_model = str(instance.gpt4all.model.model_path).rsplit("/", maxsplit=1)[-1] or "all-MiniLM-L6-v2.gguf2.f16.gguf"
414
159
 
415
160
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
416
161
 
417
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
162
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
418
163
  start_time = time.time()
419
164
  response = wrapped(*args, **kwargs)
420
- end_time = time.time()
421
165
 
422
166
  try:
423
- prompt = kwargs.get("prompt") or args[0] or ""
424
- input_tokens = general_tokens(prompt)
425
-
426
- # Set Span attributes (OTel Semconv)
427
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
428
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
429
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
430
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
431
- SemanticConvention.GEN_AI_SYSTEM_GPT4ALL)
432
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
433
- request_model)
434
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
435
- request_model)
436
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
437
- server_address)
438
- span.set_attribute(SemanticConvention.SERVER_PORT,
439
- server_port)
440
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
441
- input_tokens)
442
-
443
- # Set Span attributes (Extras)
444
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
445
- environment)
446
- span.set_attribute(SERVICE_NAME,
447
- application_name)
448
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
449
- input_tokens)
450
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
451
- version)
452
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
453
- 0)
454
-
455
- if capture_message_content:
456
- span.add_event(
457
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
458
- attributes={
459
- SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
460
- },
461
- )
462
-
463
- span.set_status(Status(StatusCode.OK))
464
-
465
- if disable_metrics is False:
466
- attributes = create_metrics_attributes(
467
- service_name=application_name,
468
- deployment_environment=environment,
469
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
470
- system=SemanticConvention.GEN_AI_SYSTEM_GPT4ALL,
471
- request_model=request_model,
472
- server_address=server_address,
473
- server_port=server_port,
474
- response_model=request_model,
475
- )
476
- metrics["genai_client_usage_tokens"].record(
477
- input_tokens, attributes
478
- )
479
- metrics["genai_client_operation_duration"].record(
480
- end_time - start_time, attributes
481
- )
482
- metrics["genai_requests"].add(1, attributes)
483
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
484
- metrics["genai_cost"].record(0, attributes)
485
-
486
-
487
- # Return original response
488
- return response
167
+ response = process_embedding_response(
168
+ response=response,
169
+ request_model=request_model,
170
+ pricing_info=pricing_info,
171
+ server_port=server_port,
172
+ server_address=server_address,
173
+ environment=environment,
174
+ application_name=application_name,
175
+ metrics=metrics,
176
+ start_time=start_time,
177
+ span=span,
178
+ capture_message_content=capture_message_content,
179
+ disable_metrics=disable_metrics,
180
+ version=version,
181
+ **kwargs
182
+ )
489
183
 
490
184
  except Exception as e:
491
185
  handle_exception(span, e)
492
- logger.error("Error in trace creation: %s", e)
493
186
 
494
- # Return original response
495
- return response
187
+ return response
496
188
 
497
189
  return wrapper