paid-python 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paid/_vendor/__init__.py +0 -0
- paid/_vendor/opentelemetry/__init__.py +0 -0
- paid/_vendor/opentelemetry/instrumentation/__init__.py +0 -0
- paid/_vendor/opentelemetry/instrumentation/openai/__init__.py +54 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/__init__.py +399 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1192 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +292 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/config.py +15 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +311 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/event_emitter.py +108 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
- paid/_vendor/opentelemetry/instrumentation/openai/shared/span_utils.py +0 -0
- paid/_vendor/opentelemetry/instrumentation/openai/utils.py +190 -0
- paid/_vendor/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
- paid/_vendor/opentelemetry/instrumentation/openai/v1/__init__.py +358 -0
- paid/_vendor/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +329 -0
- paid/_vendor/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +134 -0
- paid/_vendor/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +996 -0
- paid/_vendor/opentelemetry/instrumentation/openai/version.py +1 -0
- paid/tracing/autoinstrumentation.py +5 -6
- paid/tracing/tracing.py +14 -3
- {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/METADATA +2 -3
- {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/RECORD +26 -6
- {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/LICENSE +0 -0
- {paid_python-0.3.4.dist-info → paid_python-0.3.6.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from opentelemetry import context as context_api
|
|
4
|
+
from opentelemetry import trace
|
|
5
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared import (
|
|
6
|
+
_set_client_attributes,
|
|
7
|
+
_set_functions_attributes,
|
|
8
|
+
_set_request_attributes,
|
|
9
|
+
_set_response_attributes,
|
|
10
|
+
_set_span_attribute,
|
|
11
|
+
_set_span_stream_usage,
|
|
12
|
+
is_streaming_response,
|
|
13
|
+
model_as_dict,
|
|
14
|
+
propagate_trace_context,
|
|
15
|
+
)
|
|
16
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared.config import Config
|
|
17
|
+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
|
|
18
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
|
|
19
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared.event_models import (
|
|
20
|
+
ChoiceEvent,
|
|
21
|
+
MessageEvent,
|
|
22
|
+
)
|
|
23
|
+
from paid._vendor.opentelemetry.instrumentation.openai.utils import (
|
|
24
|
+
_with_tracer_wrapper,
|
|
25
|
+
dont_throw,
|
|
26
|
+
is_openai_v1,
|
|
27
|
+
should_emit_events,
|
|
28
|
+
should_send_prompts,
|
|
29
|
+
)
|
|
30
|
+
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
|
|
31
|
+
from opentelemetry.semconv._incubating.attributes import (
|
|
32
|
+
gen_ai_attributes as GenAIAttributes,
|
|
33
|
+
)
|
|
34
|
+
from opentelemetry.semconv_ai import (
|
|
35
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
|
|
36
|
+
LLMRequestTypeValues,
|
|
37
|
+
SpanAttributes,
|
|
38
|
+
)
|
|
39
|
+
from opentelemetry.trace import SpanKind
|
|
40
|
+
from opentelemetry.trace.status import Status, StatusCode
|
|
41
|
+
|
|
42
|
+
SPAN_NAME = "openai.completion"
|
|
43
|
+
LLM_REQUEST_TYPE = LLMRequestTypeValues.COMPLETION
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@_with_tracer_wrapper
|
|
49
|
+
def completion_wrapper(tracer, wrapped, instance, args, kwargs):
|
|
50
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
|
|
51
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
|
|
52
|
+
):
|
|
53
|
+
return wrapped(*args, **kwargs)
|
|
54
|
+
|
|
55
|
+
# span needs to be opened and closed manually because the response is a generator
|
|
56
|
+
span = tracer.start_span(
|
|
57
|
+
SPAN_NAME,
|
|
58
|
+
kind=SpanKind.CLIENT,
|
|
59
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Use the span as current context to ensure events get proper trace context
|
|
63
|
+
with trace.use_span(span, end_on_exit=False):
|
|
64
|
+
_handle_request(span, kwargs, instance)
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
response = wrapped(*args, **kwargs)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
70
|
+
span.record_exception(e)
|
|
71
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
72
|
+
span.end()
|
|
73
|
+
raise
|
|
74
|
+
|
|
75
|
+
if is_streaming_response(response):
|
|
76
|
+
# span will be closed after the generator is done
|
|
77
|
+
return _build_from_streaming_response(span, kwargs, response)
|
|
78
|
+
else:
|
|
79
|
+
_handle_response(response, span, instance)
|
|
80
|
+
|
|
81
|
+
span.end()
|
|
82
|
+
return response
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@_with_tracer_wrapper
|
|
86
|
+
async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
|
|
87
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
|
|
88
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
|
|
89
|
+
):
|
|
90
|
+
return await wrapped(*args, **kwargs)
|
|
91
|
+
|
|
92
|
+
span = tracer.start_span(
|
|
93
|
+
name=SPAN_NAME,
|
|
94
|
+
kind=SpanKind.CLIENT,
|
|
95
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Use the span as current context to ensure events get proper trace context
|
|
99
|
+
with trace.use_span(span, end_on_exit=False):
|
|
100
|
+
_handle_request(span, kwargs, instance)
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
response = await wrapped(*args, **kwargs)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
106
|
+
span.record_exception(e)
|
|
107
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
108
|
+
span.end()
|
|
109
|
+
raise
|
|
110
|
+
|
|
111
|
+
if is_streaming_response(response):
|
|
112
|
+
# span will be closed after the generator is done
|
|
113
|
+
return _abuild_from_streaming_response(span, kwargs, response)
|
|
114
|
+
else:
|
|
115
|
+
_handle_response(response, span, instance)
|
|
116
|
+
|
|
117
|
+
span.end()
|
|
118
|
+
return response
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dont_throw
|
|
122
|
+
def _handle_request(span, kwargs, instance):
|
|
123
|
+
_set_request_attributes(span, kwargs, instance)
|
|
124
|
+
if should_emit_events():
|
|
125
|
+
_emit_prompts_events(kwargs)
|
|
126
|
+
else:
|
|
127
|
+
if should_send_prompts():
|
|
128
|
+
_set_prompts(span, kwargs.get("prompt"))
|
|
129
|
+
_set_functions_attributes(span, kwargs.get("functions"))
|
|
130
|
+
_set_client_attributes(span, instance)
|
|
131
|
+
if Config.enable_trace_context_propagation:
|
|
132
|
+
propagate_trace_context(span, kwargs)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _emit_prompts_events(kwargs):
|
|
136
|
+
prompt = kwargs.get("prompt")
|
|
137
|
+
if isinstance(prompt, list):
|
|
138
|
+
for p in prompt:
|
|
139
|
+
emit_event(MessageEvent(content=p))
|
|
140
|
+
elif isinstance(prompt, str):
|
|
141
|
+
emit_event(MessageEvent(content=prompt))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dont_throw
|
|
145
|
+
def _handle_response(response, span, instance=None):
|
|
146
|
+
if is_openai_v1():
|
|
147
|
+
response_dict = model_as_dict(response)
|
|
148
|
+
else:
|
|
149
|
+
response_dict = response
|
|
150
|
+
|
|
151
|
+
_set_response_attributes(span, response_dict)
|
|
152
|
+
if should_emit_events():
|
|
153
|
+
for choice in response.choices:
|
|
154
|
+
emit_event(_parse_choice_event(choice))
|
|
155
|
+
else:
|
|
156
|
+
if should_send_prompts():
|
|
157
|
+
_set_completions(span, response_dict.get("choices"))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _set_prompts(span, prompt):
|
|
161
|
+
if not span.is_recording() or not prompt:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
_set_span_attribute(
|
|
165
|
+
span,
|
|
166
|
+
f"{GenAIAttributes.GEN_AI_PROMPT}.0.user",
|
|
167
|
+
prompt[0] if isinstance(prompt, list) else prompt,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dont_throw
|
|
172
|
+
def _set_completions(span, choices):
|
|
173
|
+
if not span.is_recording() or not choices:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
for choice in choices:
|
|
177
|
+
index = choice.get("index")
|
|
178
|
+
prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{index}"
|
|
179
|
+
_set_span_attribute(
|
|
180
|
+
span, f"{prefix}.finish_reason", choice.get("finish_reason")
|
|
181
|
+
)
|
|
182
|
+
_set_span_attribute(span, f"{prefix}.content", choice.get("text"))
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dont_throw
|
|
186
|
+
def _build_from_streaming_response(span, request_kwargs, response):
|
|
187
|
+
complete_response = {"choices": [], "model": "", "id": ""}
|
|
188
|
+
for item in response:
|
|
189
|
+
yield item
|
|
190
|
+
_accumulate_streaming_response(complete_response, item)
|
|
191
|
+
|
|
192
|
+
_set_response_attributes(span, complete_response)
|
|
193
|
+
|
|
194
|
+
_set_token_usage(span, request_kwargs, complete_response)
|
|
195
|
+
|
|
196
|
+
if should_emit_events():
|
|
197
|
+
_emit_streaming_response_events(complete_response)
|
|
198
|
+
else:
|
|
199
|
+
if should_send_prompts():
|
|
200
|
+
_set_completions(span, complete_response.get("choices"))
|
|
201
|
+
|
|
202
|
+
span.set_status(Status(StatusCode.OK))
|
|
203
|
+
span.end()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dont_throw
|
|
207
|
+
async def _abuild_from_streaming_response(span, request_kwargs, response):
|
|
208
|
+
complete_response = {"choices": [], "model": "", "id": ""}
|
|
209
|
+
async for item in response:
|
|
210
|
+
yield item
|
|
211
|
+
_accumulate_streaming_response(complete_response, item)
|
|
212
|
+
|
|
213
|
+
_set_response_attributes(span, complete_response)
|
|
214
|
+
|
|
215
|
+
_set_token_usage(span, request_kwargs, complete_response)
|
|
216
|
+
|
|
217
|
+
if should_emit_events():
|
|
218
|
+
_emit_streaming_response_events(complete_response)
|
|
219
|
+
else:
|
|
220
|
+
if should_send_prompts():
|
|
221
|
+
_set_completions(span, complete_response.get("choices"))
|
|
222
|
+
|
|
223
|
+
span.set_status(Status(StatusCode.OK))
|
|
224
|
+
span.end()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _emit_streaming_response_events(complete_response):
|
|
228
|
+
for i, choice in enumerate(complete_response["choices"]):
|
|
229
|
+
emit_event(
|
|
230
|
+
ChoiceEvent(
|
|
231
|
+
index=choice.get("index", i),
|
|
232
|
+
message={"content": choice.get("text"), "role": "assistant"},
|
|
233
|
+
finish_reason=choice.get("finish_reason", "unknown"),
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@dont_throw
|
|
239
|
+
def _set_token_usage(span, request_kwargs, complete_response):
|
|
240
|
+
prompt_usage = -1
|
|
241
|
+
completion_usage = -1
|
|
242
|
+
|
|
243
|
+
# Use token usage from API response only
|
|
244
|
+
if complete_response.get("usage"):
|
|
245
|
+
usage = complete_response["usage"]
|
|
246
|
+
if usage.get("prompt_tokens"):
|
|
247
|
+
prompt_usage = usage["prompt_tokens"]
|
|
248
|
+
if usage.get("completion_tokens"):
|
|
249
|
+
completion_usage = usage["completion_tokens"]
|
|
250
|
+
|
|
251
|
+
# span record
|
|
252
|
+
_set_span_stream_usage(span, prompt_usage, completion_usage)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@dont_throw
|
|
256
|
+
def _accumulate_streaming_response(complete_response, item):
|
|
257
|
+
if is_openai_v1():
|
|
258
|
+
item = model_as_dict(item)
|
|
259
|
+
|
|
260
|
+
complete_response["model"] = item.get("model")
|
|
261
|
+
complete_response["id"] = item.get("id")
|
|
262
|
+
|
|
263
|
+
# capture usage information from the stream chunks
|
|
264
|
+
if item.get("usage"):
|
|
265
|
+
complete_response["usage"] = item.get("usage")
|
|
266
|
+
|
|
267
|
+
for choice in item.get("choices"):
|
|
268
|
+
index = choice.get("index")
|
|
269
|
+
if len(complete_response.get("choices")) <= index:
|
|
270
|
+
complete_response["choices"].append({"index": index, "text": ""})
|
|
271
|
+
complete_choice = complete_response.get("choices")[index]
|
|
272
|
+
if choice.get("finish_reason"):
|
|
273
|
+
complete_choice["finish_reason"] = choice.get("finish_reason")
|
|
274
|
+
|
|
275
|
+
if choice.get("text"):
|
|
276
|
+
complete_choice["text"] += choice.get("text")
|
|
277
|
+
|
|
278
|
+
return complete_response
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _parse_choice_event(choice) -> ChoiceEvent:
|
|
282
|
+
has_message = choice.text is not None
|
|
283
|
+
has_finish_reason = choice.finish_reason is not None
|
|
284
|
+
|
|
285
|
+
content = choice.text if has_message else None
|
|
286
|
+
finish_reason = choice.finish_reason if has_finish_reason else "unknown"
|
|
287
|
+
|
|
288
|
+
return ChoiceEvent(
|
|
289
|
+
index=choice.index,
|
|
290
|
+
message={"content": content, "role": "assistant"},
|
|
291
|
+
finish_reason=finish_reason,
|
|
292
|
+
)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Callable, Optional
|
|
2
|
+
|
|
3
|
+
from opentelemetry._logs import Logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Config:
|
|
7
|
+
enrich_assistant = False
|
|
8
|
+
exception_logger = None
|
|
9
|
+
get_common_metrics_attributes: Callable[[], dict] = lambda: {}
|
|
10
|
+
upload_base64_image: Callable[[str, str, str, str], str] = (
|
|
11
|
+
lambda trace_id, span_id, image_name, base64_string: str
|
|
12
|
+
)
|
|
13
|
+
enable_trace_context_propagation: bool = True
|
|
14
|
+
use_legacy_attributes = True
|
|
15
|
+
event_logger: Optional[Logger] = None
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
from opentelemetry import context as context_api
|
|
6
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared import (
|
|
7
|
+
OPENAI_LLM_USAGE_TOKEN_TYPES,
|
|
8
|
+
_get_openai_base_url,
|
|
9
|
+
_set_client_attributes,
|
|
10
|
+
_set_request_attributes,
|
|
11
|
+
_set_response_attributes,
|
|
12
|
+
_set_span_attribute,
|
|
13
|
+
_token_type,
|
|
14
|
+
metric_shared_attributes,
|
|
15
|
+
model_as_dict,
|
|
16
|
+
propagate_trace_context,
|
|
17
|
+
)
|
|
18
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared.config import Config
|
|
19
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
|
|
20
|
+
from paid._vendor.opentelemetry.instrumentation.openai.shared.event_models import (
|
|
21
|
+
ChoiceEvent,
|
|
22
|
+
MessageEvent,
|
|
23
|
+
)
|
|
24
|
+
from paid._vendor.opentelemetry.instrumentation.openai.utils import (
|
|
25
|
+
_with_embeddings_telemetry_wrapper,
|
|
26
|
+
dont_throw,
|
|
27
|
+
is_openai_v1,
|
|
28
|
+
should_emit_events,
|
|
29
|
+
should_send_prompts,
|
|
30
|
+
start_as_current_span_async,
|
|
31
|
+
)
|
|
32
|
+
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
|
|
33
|
+
from opentelemetry.metrics import Counter, Histogram
|
|
34
|
+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
|
|
35
|
+
from opentelemetry.semconv._incubating.attributes import (
|
|
36
|
+
gen_ai_attributes as GenAIAttributes,
|
|
37
|
+
)
|
|
38
|
+
from opentelemetry.semconv_ai import (
|
|
39
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
|
|
40
|
+
LLMRequestTypeValues,
|
|
41
|
+
SpanAttributes,
|
|
42
|
+
)
|
|
43
|
+
from opentelemetry.trace import SpanKind, Status, StatusCode
|
|
44
|
+
|
|
45
|
+
from openai._legacy_response import LegacyAPIResponse
|
|
46
|
+
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
|
47
|
+
|
|
48
|
+
SPAN_NAME = "openai.embeddings"
|
|
49
|
+
LLM_REQUEST_TYPE = LLMRequestTypeValues.EMBEDDING
|
|
50
|
+
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@_with_embeddings_telemetry_wrapper
|
|
55
|
+
def embeddings_wrapper(
|
|
56
|
+
tracer,
|
|
57
|
+
token_counter: Counter,
|
|
58
|
+
vector_size_counter: Counter,
|
|
59
|
+
duration_histogram: Histogram,
|
|
60
|
+
exception_counter: Counter,
|
|
61
|
+
wrapped,
|
|
62
|
+
instance,
|
|
63
|
+
args,
|
|
64
|
+
kwargs,
|
|
65
|
+
):
|
|
66
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
|
|
67
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
|
|
68
|
+
):
|
|
69
|
+
return wrapped(*args, **kwargs)
|
|
70
|
+
|
|
71
|
+
with tracer.start_as_current_span(
|
|
72
|
+
name=SPAN_NAME,
|
|
73
|
+
kind=SpanKind.CLIENT,
|
|
74
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
75
|
+
) as span:
|
|
76
|
+
_handle_request(span, kwargs, instance)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# record time for duration
|
|
80
|
+
start_time = time.time()
|
|
81
|
+
response = wrapped(*args, **kwargs)
|
|
82
|
+
end_time = time.time()
|
|
83
|
+
except Exception as e: # pylint: disable=broad-except
|
|
84
|
+
end_time = time.time()
|
|
85
|
+
duration = end_time - start_time if "start_time" in locals() else 0
|
|
86
|
+
attributes = {
|
|
87
|
+
"error.type": e.__class__.__name__,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# if there are legal duration, record it
|
|
91
|
+
if duration > 0 and duration_histogram:
|
|
92
|
+
duration_histogram.record(duration, attributes=attributes)
|
|
93
|
+
if exception_counter:
|
|
94
|
+
exception_counter.add(1, attributes=attributes)
|
|
95
|
+
|
|
96
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
97
|
+
span.record_exception(e)
|
|
98
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
99
|
+
span.end()
|
|
100
|
+
|
|
101
|
+
raise
|
|
102
|
+
|
|
103
|
+
duration = end_time - start_time
|
|
104
|
+
|
|
105
|
+
_handle_response(
|
|
106
|
+
response,
|
|
107
|
+
span,
|
|
108
|
+
instance,
|
|
109
|
+
token_counter,
|
|
110
|
+
vector_size_counter,
|
|
111
|
+
duration_histogram,
|
|
112
|
+
duration,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return response
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@_with_embeddings_telemetry_wrapper
|
|
119
|
+
async def aembeddings_wrapper(
|
|
120
|
+
tracer,
|
|
121
|
+
token_counter: Counter,
|
|
122
|
+
vector_size_counter: Counter,
|
|
123
|
+
duration_histogram: Histogram,
|
|
124
|
+
exception_counter: Counter,
|
|
125
|
+
wrapped,
|
|
126
|
+
instance,
|
|
127
|
+
args,
|
|
128
|
+
kwargs,
|
|
129
|
+
):
|
|
130
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
|
|
131
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
|
|
132
|
+
):
|
|
133
|
+
return await wrapped(*args, **kwargs)
|
|
134
|
+
|
|
135
|
+
async with start_as_current_span_async(
|
|
136
|
+
tracer=tracer,
|
|
137
|
+
name=SPAN_NAME,
|
|
138
|
+
kind=SpanKind.CLIENT,
|
|
139
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
140
|
+
) as span:
|
|
141
|
+
_handle_request(span, kwargs, instance)
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
# record time for duration
|
|
145
|
+
start_time = time.time()
|
|
146
|
+
response = await wrapped(*args, **kwargs)
|
|
147
|
+
end_time = time.time()
|
|
148
|
+
except Exception as e: # pylint: disable=broad-except
|
|
149
|
+
end_time = time.time()
|
|
150
|
+
duration = end_time - start_time if "start_time" in locals() else 0
|
|
151
|
+
attributes = {
|
|
152
|
+
"error.type": e.__class__.__name__,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
# if there are legal duration, record it
|
|
156
|
+
if duration > 0 and duration_histogram:
|
|
157
|
+
duration_histogram.record(duration, attributes=attributes)
|
|
158
|
+
if exception_counter:
|
|
159
|
+
exception_counter.add(1, attributes=attributes)
|
|
160
|
+
|
|
161
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
162
|
+
span.record_exception(e)
|
|
163
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
164
|
+
span.end()
|
|
165
|
+
|
|
166
|
+
raise
|
|
167
|
+
|
|
168
|
+
duration = end_time - start_time
|
|
169
|
+
|
|
170
|
+
_handle_response(
|
|
171
|
+
response,
|
|
172
|
+
span,
|
|
173
|
+
instance,
|
|
174
|
+
token_counter,
|
|
175
|
+
vector_size_counter,
|
|
176
|
+
duration_histogram,
|
|
177
|
+
duration,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return response
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@dont_throw
|
|
184
|
+
def _handle_request(span, kwargs, instance):
|
|
185
|
+
_set_request_attributes(span, kwargs, instance)
|
|
186
|
+
|
|
187
|
+
if should_emit_events():
|
|
188
|
+
_emit_embeddings_message_event(kwargs.get("input"))
|
|
189
|
+
else:
|
|
190
|
+
if should_send_prompts():
|
|
191
|
+
_set_prompts(span, kwargs.get("input"))
|
|
192
|
+
|
|
193
|
+
_set_client_attributes(span, instance)
|
|
194
|
+
|
|
195
|
+
if Config.enable_trace_context_propagation:
|
|
196
|
+
propagate_trace_context(span, kwargs)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@dont_throw
|
|
200
|
+
def _handle_response(
|
|
201
|
+
response,
|
|
202
|
+
span,
|
|
203
|
+
instance=None,
|
|
204
|
+
token_counter=None,
|
|
205
|
+
vector_size_counter=None,
|
|
206
|
+
duration_histogram=None,
|
|
207
|
+
duration=None,
|
|
208
|
+
):
|
|
209
|
+
if is_openai_v1():
|
|
210
|
+
response_dict = model_as_dict(response)
|
|
211
|
+
else:
|
|
212
|
+
response_dict = response
|
|
213
|
+
# metrics record
|
|
214
|
+
_set_embeddings_metrics(
|
|
215
|
+
instance,
|
|
216
|
+
token_counter,
|
|
217
|
+
vector_size_counter,
|
|
218
|
+
duration_histogram,
|
|
219
|
+
response_dict,
|
|
220
|
+
duration,
|
|
221
|
+
)
|
|
222
|
+
# span attributes
|
|
223
|
+
_set_response_attributes(span, response_dict)
|
|
224
|
+
|
|
225
|
+
# emit events
|
|
226
|
+
if should_emit_events():
|
|
227
|
+
_emit_embeddings_choice_event(response)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _set_embeddings_metrics(
|
|
231
|
+
instance,
|
|
232
|
+
token_counter,
|
|
233
|
+
vector_size_counter,
|
|
234
|
+
duration_histogram,
|
|
235
|
+
response_dict,
|
|
236
|
+
duration,
|
|
237
|
+
):
|
|
238
|
+
shared_attributes = metric_shared_attributes(
|
|
239
|
+
response_model=response_dict.get("model") or None,
|
|
240
|
+
operation="embeddings",
|
|
241
|
+
server_address=_get_openai_base_url(instance),
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# token count metrics
|
|
245
|
+
usage = response_dict.get("usage")
|
|
246
|
+
if usage and token_counter:
|
|
247
|
+
for name, val in usage.items():
|
|
248
|
+
if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
|
|
249
|
+
if val is None:
|
|
250
|
+
logging.error(f"Received None value for {name} in usage")
|
|
251
|
+
continue
|
|
252
|
+
attributes_with_token_type = {
|
|
253
|
+
**shared_attributes,
|
|
254
|
+
GenAIAttributes.GEN_AI_TOKEN_TYPE: _token_type(name),
|
|
255
|
+
}
|
|
256
|
+
token_counter.record(val, attributes=attributes_with_token_type)
|
|
257
|
+
|
|
258
|
+
# vec size metrics
|
|
259
|
+
# should use counter for vector_size?
|
|
260
|
+
vec_embedding = (response_dict.get("data") or [{}])[0].get("embedding", [])
|
|
261
|
+
vec_size = len(vec_embedding)
|
|
262
|
+
if vector_size_counter:
|
|
263
|
+
vector_size_counter.add(vec_size, attributes=shared_attributes)
|
|
264
|
+
|
|
265
|
+
# duration metrics
|
|
266
|
+
if duration and isinstance(duration, (float, int)) and duration_histogram:
|
|
267
|
+
duration_histogram.record(duration, attributes=shared_attributes)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _set_prompts(span, prompt):
|
|
271
|
+
if not span.is_recording() or not prompt:
|
|
272
|
+
return
|
|
273
|
+
|
|
274
|
+
if isinstance(prompt, list):
|
|
275
|
+
for i, p in enumerate(prompt):
|
|
276
|
+
_set_span_attribute(span, f"{GenAIAttributes.GEN_AI_PROMPT}.{i}.content", p)
|
|
277
|
+
else:
|
|
278
|
+
_set_span_attribute(
|
|
279
|
+
span,
|
|
280
|
+
f"{GenAIAttributes.GEN_AI_PROMPT}.0.content",
|
|
281
|
+
prompt,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _emit_embeddings_message_event(embeddings) -> None:
|
|
286
|
+
if isinstance(embeddings, str):
|
|
287
|
+
emit_event(MessageEvent(content=embeddings))
|
|
288
|
+
elif isinstance(embeddings, Iterable):
|
|
289
|
+
for i in embeddings:
|
|
290
|
+
emit_event(MessageEvent(content=i))
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _emit_embeddings_choice_event(response) -> None:
|
|
294
|
+
if isinstance(response, CreateEmbeddingResponse):
|
|
295
|
+
for embedding in response.data:
|
|
296
|
+
emit_event(
|
|
297
|
+
ChoiceEvent(
|
|
298
|
+
index=embedding.index,
|
|
299
|
+
message={"content": embedding.embedding, "role": "assistant"},
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
elif isinstance(response, LegacyAPIResponse):
|
|
304
|
+
parsed_response = response.parse()
|
|
305
|
+
for embedding in parsed_response.data:
|
|
306
|
+
emit_event(
|
|
307
|
+
ChoiceEvent(
|
|
308
|
+
index=embedding.index,
|
|
309
|
+
message={"content": embedding.embedding, "role": "assistant"},
|
|
310
|
+
)
|
|
311
|
+
)
|