lmnr 0.6.16__py3-none-any.whl → 0.7.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +6 -15
- lmnr/cli/__init__.py +270 -0
- lmnr/cli/datasets.py +371 -0
- lmnr/{cli.py → cli/evals.py} +20 -102
- lmnr/cli/rules.py +42 -0
- lmnr/opentelemetry_lib/__init__.py +9 -2
- lmnr/opentelemetry_lib/decorators/__init__.py +274 -168
- lmnr/opentelemetry_lib/litellm/__init__.py +352 -38
- lmnr/opentelemetry_lib/litellm/utils.py +82 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +191 -129
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +126 -41
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +59 -61
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
- lmnr/opentelemetry_lib/tracing/__init__.py +119 -18
- lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +124 -25
- lmnr/opentelemetry_lib/tracing/attributes.py +4 -0
- lmnr/opentelemetry_lib/tracing/context.py +200 -0
- lmnr/opentelemetry_lib/tracing/exporter.py +109 -15
- lmnr/opentelemetry_lib/tracing/instruments.py +22 -5
- lmnr/opentelemetry_lib/tracing/processor.py +128 -30
- lmnr/opentelemetry_lib/tracing/span.py +398 -0
- lmnr/opentelemetry_lib/tracing/tracer.py +40 -1
- lmnr/opentelemetry_lib/tracing/utils.py +62 -0
- lmnr/opentelemetry_lib/utils/package_check.py +9 -0
- lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
- lmnr/sdk/browser/background_send_events.py +158 -0
- lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
- lmnr/sdk/browser/browser_use_otel.py +12 -12
- lmnr/sdk/browser/bubus_otel.py +71 -0
- lmnr/sdk/browser/cdp_utils.py +518 -0
- lmnr/sdk/browser/inject_script.js +514 -0
- lmnr/sdk/browser/patchright_otel.py +18 -44
- lmnr/sdk/browser/playwright_otel.py +104 -187
- lmnr/sdk/browser/pw_utils.py +249 -210
- lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
- lmnr/sdk/browser/utils.py +1 -1
- lmnr/sdk/client/asynchronous/async_client.py +47 -15
- lmnr/sdk/client/asynchronous/resources/__init__.py +2 -7
- lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
- lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/asynchronous/resources/evals.py +122 -18
- lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/asynchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/resources/__init__.py +2 -2
- lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/synchronous/resources/evals.py +83 -17
- lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/synchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/sync_client.py +47 -15
- lmnr/sdk/datasets/__init__.py +94 -0
- lmnr/sdk/datasets/file_utils.py +91 -0
- lmnr/sdk/decorators.py +103 -23
- lmnr/sdk/evaluations.py +122 -33
- lmnr/sdk/laminar.py +816 -333
- lmnr/sdk/log.py +7 -2
- lmnr/sdk/types.py +124 -143
- lmnr/sdk/utils.py +115 -2
- lmnr/version.py +1 -1
- {lmnr-0.6.16.dist-info → lmnr-0.7.26.dist-info}/METADATA +71 -78
- lmnr-0.7.26.dist-info/RECORD +116 -0
- lmnr-0.7.26.dist-info/WHEEL +4 -0
- lmnr-0.7.26.dist-info/entry_points.txt +3 -0
- lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
- lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
- lmnr/sdk/client/asynchronous/resources/agent.py +0 -329
- lmnr/sdk/client/synchronous/resources/agent.py +0 -323
- lmnr/sdk/datasets.py +0 -60
- lmnr-0.6.16.dist-info/LICENSE +0 -75
- lmnr-0.6.16.dist-info/RECORD +0 -61
- lmnr-0.6.16.dist-info/WHEEL +0 -4
- lmnr-0.6.16.dist-info/entry_points.txt +0 -3
|
@@ -0,0 +1,1185 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from functools import singledispatch
|
|
7
|
+
from typing import List, Optional, Union
|
|
8
|
+
|
|
9
|
+
from opentelemetry import context as context_api
|
|
10
|
+
from ..shared import (
|
|
11
|
+
OPENAI_LLM_USAGE_TOKEN_TYPES,
|
|
12
|
+
_get_openai_base_url,
|
|
13
|
+
_set_client_attributes,
|
|
14
|
+
_set_functions_attributes,
|
|
15
|
+
_set_request_attributes,
|
|
16
|
+
_set_response_attributes,
|
|
17
|
+
_set_span_attribute,
|
|
18
|
+
_set_span_stream_usage,
|
|
19
|
+
_token_type,
|
|
20
|
+
get_token_count_from_string,
|
|
21
|
+
is_streaming_response,
|
|
22
|
+
metric_shared_attributes,
|
|
23
|
+
model_as_dict,
|
|
24
|
+
propagate_trace_context,
|
|
25
|
+
set_tools_attributes,
|
|
26
|
+
should_record_stream_token_usage,
|
|
27
|
+
)
|
|
28
|
+
from ..shared.config import Config
|
|
29
|
+
from ..shared.event_emitter import emit_event
|
|
30
|
+
from ..shared.event_models import (
|
|
31
|
+
ChoiceEvent,
|
|
32
|
+
MessageEvent,
|
|
33
|
+
ToolCall,
|
|
34
|
+
)
|
|
35
|
+
from ..utils import (
|
|
36
|
+
_with_chat_telemetry_wrapper,
|
|
37
|
+
dont_throw,
|
|
38
|
+
is_openai_v1,
|
|
39
|
+
run_async,
|
|
40
|
+
should_emit_events,
|
|
41
|
+
should_send_prompts,
|
|
42
|
+
)
|
|
43
|
+
from lmnr.opentelemetry_lib.tracing.context import (
|
|
44
|
+
get_current_context,
|
|
45
|
+
get_event_attributes_from_context,
|
|
46
|
+
)
|
|
47
|
+
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
|
|
48
|
+
from opentelemetry.metrics import Counter, Histogram
|
|
49
|
+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
|
|
50
|
+
from opentelemetry.semconv_ai import (
|
|
51
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
|
|
52
|
+
LLMRequestTypeValues,
|
|
53
|
+
SpanAttributes,
|
|
54
|
+
)
|
|
55
|
+
from opentelemetry.trace import SpanKind, Tracer
|
|
56
|
+
from opentelemetry.trace.status import Status, StatusCode
|
|
57
|
+
from wrapt import ObjectProxy
|
|
58
|
+
|
|
59
|
+
import pydantic
|
|
60
|
+
|
|
61
|
+
SPAN_NAME = "openai.chat"
|
|
62
|
+
PROMPT_FILTER_KEY = "prompt_filter_results"
|
|
63
|
+
CONTENT_FILTER_KEY = "content_filter_results"
|
|
64
|
+
|
|
65
|
+
LLM_REQUEST_TYPE = LLMRequestTypeValues.CHAT
|
|
66
|
+
|
|
67
|
+
logger = logging.getLogger(__name__)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@_with_chat_telemetry_wrapper
|
|
71
|
+
def chat_wrapper(
|
|
72
|
+
tracer: Tracer,
|
|
73
|
+
token_counter: Counter,
|
|
74
|
+
choice_counter: Counter,
|
|
75
|
+
duration_histogram: Histogram,
|
|
76
|
+
exception_counter: Counter,
|
|
77
|
+
streaming_time_to_first_token: Histogram,
|
|
78
|
+
streaming_time_to_generate: Histogram,
|
|
79
|
+
wrapped,
|
|
80
|
+
instance,
|
|
81
|
+
args,
|
|
82
|
+
kwargs,
|
|
83
|
+
):
|
|
84
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
|
|
85
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
|
|
86
|
+
):
|
|
87
|
+
return wrapped(*args, **kwargs)
|
|
88
|
+
# span needs to be opened and closed manually because the response is a generator
|
|
89
|
+
|
|
90
|
+
span = tracer.start_span(
|
|
91
|
+
SPAN_NAME,
|
|
92
|
+
kind=SpanKind.CLIENT,
|
|
93
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
94
|
+
context=get_current_context(),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
run_async(_handle_request(span, kwargs, instance))
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
start_time = time.time()
|
|
101
|
+
response = wrapped(*args, **kwargs)
|
|
102
|
+
end_time = time.time()
|
|
103
|
+
except Exception as e: # pylint: disable=broad-except
|
|
104
|
+
end_time = time.time()
|
|
105
|
+
duration = end_time - start_time if "start_time" in locals() else 0
|
|
106
|
+
|
|
107
|
+
attributes = {
|
|
108
|
+
"error.type": e.__class__.__name__,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if duration > 0 and duration_histogram:
|
|
112
|
+
duration_histogram.record(duration, attributes=attributes)
|
|
113
|
+
if exception_counter:
|
|
114
|
+
exception_counter.add(1, attributes=attributes)
|
|
115
|
+
|
|
116
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
117
|
+
attributes = get_event_attributes_from_context()
|
|
118
|
+
span.record_exception(e, attributes=attributes)
|
|
119
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
120
|
+
span.end()
|
|
121
|
+
|
|
122
|
+
raise
|
|
123
|
+
|
|
124
|
+
if is_streaming_response(response):
|
|
125
|
+
# span will be closed after the generator is done
|
|
126
|
+
if is_openai_v1():
|
|
127
|
+
return ChatStream(
|
|
128
|
+
span,
|
|
129
|
+
response,
|
|
130
|
+
instance,
|
|
131
|
+
token_counter,
|
|
132
|
+
choice_counter,
|
|
133
|
+
duration_histogram,
|
|
134
|
+
streaming_time_to_first_token,
|
|
135
|
+
streaming_time_to_generate,
|
|
136
|
+
start_time,
|
|
137
|
+
kwargs,
|
|
138
|
+
)
|
|
139
|
+
else:
|
|
140
|
+
return _build_from_streaming_response(
|
|
141
|
+
span,
|
|
142
|
+
response,
|
|
143
|
+
instance,
|
|
144
|
+
token_counter,
|
|
145
|
+
choice_counter,
|
|
146
|
+
duration_histogram,
|
|
147
|
+
streaming_time_to_first_token,
|
|
148
|
+
streaming_time_to_generate,
|
|
149
|
+
start_time,
|
|
150
|
+
kwargs,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
duration = end_time - start_time
|
|
154
|
+
|
|
155
|
+
_handle_response(
|
|
156
|
+
response,
|
|
157
|
+
span,
|
|
158
|
+
instance,
|
|
159
|
+
token_counter,
|
|
160
|
+
choice_counter,
|
|
161
|
+
duration_histogram,
|
|
162
|
+
duration,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
span.end()
|
|
166
|
+
|
|
167
|
+
return response
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@_with_chat_telemetry_wrapper
|
|
171
|
+
async def achat_wrapper(
|
|
172
|
+
tracer: Tracer,
|
|
173
|
+
token_counter: Counter,
|
|
174
|
+
choice_counter: Counter,
|
|
175
|
+
duration_histogram: Histogram,
|
|
176
|
+
exception_counter: Counter,
|
|
177
|
+
streaming_time_to_first_token: Histogram,
|
|
178
|
+
streaming_time_to_generate: Histogram,
|
|
179
|
+
wrapped,
|
|
180
|
+
instance,
|
|
181
|
+
args,
|
|
182
|
+
kwargs,
|
|
183
|
+
):
|
|
184
|
+
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY) or context_api.get_value(
|
|
185
|
+
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
|
|
186
|
+
):
|
|
187
|
+
return await wrapped(*args, **kwargs)
|
|
188
|
+
|
|
189
|
+
span = tracer.start_span(
|
|
190
|
+
SPAN_NAME,
|
|
191
|
+
kind=SpanKind.CLIENT,
|
|
192
|
+
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
|
193
|
+
context=get_current_context(),
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
await _handle_request(span, kwargs, instance)
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
start_time = time.time()
|
|
200
|
+
response = await wrapped(*args, **kwargs)
|
|
201
|
+
end_time = time.time()
|
|
202
|
+
except Exception as e: # pylint: disable=broad-except
|
|
203
|
+
end_time = time.time()
|
|
204
|
+
duration = end_time - start_time if "start_time" in locals() else 0
|
|
205
|
+
|
|
206
|
+
common_attributes = Config.get_common_metrics_attributes()
|
|
207
|
+
attributes = {
|
|
208
|
+
**common_attributes,
|
|
209
|
+
"error.type": e.__class__.__name__,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if duration > 0 and duration_histogram:
|
|
213
|
+
duration_histogram.record(duration, attributes=attributes)
|
|
214
|
+
if exception_counter:
|
|
215
|
+
exception_counter.add(1, attributes=attributes)
|
|
216
|
+
|
|
217
|
+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
|
218
|
+
attributes = get_event_attributes_from_context()
|
|
219
|
+
span.record_exception(e, attributes=attributes)
|
|
220
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
221
|
+
span.end()
|
|
222
|
+
|
|
223
|
+
raise
|
|
224
|
+
|
|
225
|
+
if is_streaming_response(response):
|
|
226
|
+
# span will be closed after the generator is done
|
|
227
|
+
if is_openai_v1():
|
|
228
|
+
return ChatStream(
|
|
229
|
+
span,
|
|
230
|
+
response,
|
|
231
|
+
instance,
|
|
232
|
+
token_counter,
|
|
233
|
+
choice_counter,
|
|
234
|
+
duration_histogram,
|
|
235
|
+
streaming_time_to_first_token,
|
|
236
|
+
streaming_time_to_generate,
|
|
237
|
+
start_time,
|
|
238
|
+
kwargs,
|
|
239
|
+
)
|
|
240
|
+
else:
|
|
241
|
+
return _abuild_from_streaming_response(
|
|
242
|
+
span,
|
|
243
|
+
response,
|
|
244
|
+
instance,
|
|
245
|
+
token_counter,
|
|
246
|
+
choice_counter,
|
|
247
|
+
duration_histogram,
|
|
248
|
+
streaming_time_to_first_token,
|
|
249
|
+
streaming_time_to_generate,
|
|
250
|
+
start_time,
|
|
251
|
+
kwargs,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
duration = end_time - start_time
|
|
255
|
+
|
|
256
|
+
_handle_response(
|
|
257
|
+
response,
|
|
258
|
+
span,
|
|
259
|
+
instance,
|
|
260
|
+
token_counter,
|
|
261
|
+
choice_counter,
|
|
262
|
+
duration_histogram,
|
|
263
|
+
duration,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
span.end()
|
|
267
|
+
|
|
268
|
+
return response
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@dont_throw
|
|
272
|
+
async def _handle_request(span, kwargs, instance):
|
|
273
|
+
_set_request_attributes(span, kwargs, instance)
|
|
274
|
+
_set_client_attributes(span, instance)
|
|
275
|
+
if should_emit_events():
|
|
276
|
+
for message in kwargs.get("messages", []):
|
|
277
|
+
emit_event(
|
|
278
|
+
MessageEvent(
|
|
279
|
+
content=message.get("content"),
|
|
280
|
+
role=message.get("role"),
|
|
281
|
+
tool_calls=_parse_tool_calls(message.get("tool_calls", None)),
|
|
282
|
+
)
|
|
283
|
+
)
|
|
284
|
+
else:
|
|
285
|
+
if should_send_prompts():
|
|
286
|
+
await _set_prompts(span, kwargs.get("messages"))
|
|
287
|
+
if kwargs.get("functions"):
|
|
288
|
+
_set_functions_attributes(span, kwargs.get("functions"))
|
|
289
|
+
elif kwargs.get("tools"):
|
|
290
|
+
set_tools_attributes(span, kwargs.get("tools"))
|
|
291
|
+
if Config.enable_trace_context_propagation:
|
|
292
|
+
propagate_trace_context(span, kwargs)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@dont_throw
|
|
296
|
+
def _handle_response(
|
|
297
|
+
response,
|
|
298
|
+
span,
|
|
299
|
+
instance=None,
|
|
300
|
+
token_counter=None,
|
|
301
|
+
choice_counter=None,
|
|
302
|
+
duration_histogram=None,
|
|
303
|
+
duration=None,
|
|
304
|
+
is_streaming: bool = False,
|
|
305
|
+
):
|
|
306
|
+
if is_openai_v1():
|
|
307
|
+
response_dict = model_as_dict(response)
|
|
308
|
+
else:
|
|
309
|
+
response_dict = response
|
|
310
|
+
|
|
311
|
+
# metrics record
|
|
312
|
+
_set_chat_metrics(
|
|
313
|
+
instance,
|
|
314
|
+
token_counter,
|
|
315
|
+
choice_counter,
|
|
316
|
+
duration_histogram,
|
|
317
|
+
response_dict,
|
|
318
|
+
duration,
|
|
319
|
+
is_streaming,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# span attributes
|
|
323
|
+
_set_response_attributes(span, response_dict)
|
|
324
|
+
|
|
325
|
+
if should_emit_events():
|
|
326
|
+
if response.choices is not None:
|
|
327
|
+
for choice in response.choices:
|
|
328
|
+
emit_event(_parse_choice_event(choice))
|
|
329
|
+
else:
|
|
330
|
+
if should_send_prompts():
|
|
331
|
+
_set_completions(span, response_dict.get("choices"))
|
|
332
|
+
|
|
333
|
+
return response
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _set_chat_metrics(
|
|
337
|
+
instance,
|
|
338
|
+
token_counter,
|
|
339
|
+
choice_counter,
|
|
340
|
+
duration_histogram,
|
|
341
|
+
response_dict,
|
|
342
|
+
duration,
|
|
343
|
+
is_streaming: bool = False,
|
|
344
|
+
):
|
|
345
|
+
shared_attributes = metric_shared_attributes(
|
|
346
|
+
response_model=response_dict.get("model") or None,
|
|
347
|
+
operation="chat",
|
|
348
|
+
server_address=_get_openai_base_url(instance),
|
|
349
|
+
is_streaming=is_streaming,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# token metrics
|
|
353
|
+
usage = response_dict.get("usage") # type: dict
|
|
354
|
+
if usage and token_counter:
|
|
355
|
+
_set_token_counter_metrics(token_counter, usage, shared_attributes)
|
|
356
|
+
|
|
357
|
+
# choices metrics
|
|
358
|
+
choices = response_dict.get("choices")
|
|
359
|
+
if choices and choice_counter:
|
|
360
|
+
_set_choice_counter_metrics(choice_counter, choices, shared_attributes)
|
|
361
|
+
|
|
362
|
+
# duration metrics
|
|
363
|
+
if duration and isinstance(duration, (float, int)) and duration_histogram:
|
|
364
|
+
duration_histogram.record(duration, attributes=shared_attributes)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _set_choice_counter_metrics(choice_counter, choices, shared_attributes):
|
|
368
|
+
choice_counter.add(len(choices), attributes=shared_attributes)
|
|
369
|
+
for choice in choices:
|
|
370
|
+
attributes_with_reason = {**shared_attributes}
|
|
371
|
+
if choice.get("finish_reason"):
|
|
372
|
+
attributes_with_reason[SpanAttributes.LLM_RESPONSE_FINISH_REASON] = (
|
|
373
|
+
choice.get("finish_reason")
|
|
374
|
+
)
|
|
375
|
+
choice_counter.add(1, attributes=attributes_with_reason)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _set_token_counter_metrics(token_counter, usage, shared_attributes):
|
|
379
|
+
for name, val in usage.items():
|
|
380
|
+
if name in OPENAI_LLM_USAGE_TOKEN_TYPES:
|
|
381
|
+
attributes_with_token_type = {
|
|
382
|
+
**shared_attributes,
|
|
383
|
+
SpanAttributes.LLM_TOKEN_TYPE: _token_type(name),
|
|
384
|
+
}
|
|
385
|
+
token_counter.record(val, attributes=attributes_with_token_type)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _is_base64_image(item):
|
|
389
|
+
if not isinstance(item, dict):
|
|
390
|
+
return False
|
|
391
|
+
|
|
392
|
+
if not isinstance(item.get("image_url"), dict):
|
|
393
|
+
return False
|
|
394
|
+
|
|
395
|
+
if "data:image/" not in item.get("image_url", {}).get("url", ""):
|
|
396
|
+
return False
|
|
397
|
+
|
|
398
|
+
return True
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
async def _process_image_item(item, trace_id, span_id, message_index, content_index):
|
|
402
|
+
if not Config.upload_base64_image:
|
|
403
|
+
return item
|
|
404
|
+
|
|
405
|
+
image_format = item["image_url"]["url"].split(";")[0].split("/")[1]
|
|
406
|
+
image_name = f"message_{message_index}_content_{content_index}.{image_format}"
|
|
407
|
+
base64_string = item["image_url"]["url"].split(",")[1]
|
|
408
|
+
url = await Config.upload_base64_image(trace_id, span_id, image_name, base64_string)
|
|
409
|
+
|
|
410
|
+
return {"type": "image_url", "image_url": {"url": url}}
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
@dont_throw
|
|
414
|
+
async def _set_prompts(span, messages):
|
|
415
|
+
if not span.is_recording() or messages is None:
|
|
416
|
+
return
|
|
417
|
+
|
|
418
|
+
for i, msg in enumerate(messages):
|
|
419
|
+
prefix = f"{SpanAttributes.LLM_PROMPTS}.{i}"
|
|
420
|
+
msg = msg if isinstance(msg, dict) else model_as_dict(msg)
|
|
421
|
+
|
|
422
|
+
_set_span_attribute(span, f"{prefix}.role", msg.get("role"))
|
|
423
|
+
if msg.get("content"):
|
|
424
|
+
content = copy.deepcopy(msg.get("content"))
|
|
425
|
+
if isinstance(content, list):
|
|
426
|
+
content = [
|
|
427
|
+
(
|
|
428
|
+
await _process_image_item(
|
|
429
|
+
item, span.context.trace_id, span.context.span_id, i, j
|
|
430
|
+
)
|
|
431
|
+
if _is_base64_image(item)
|
|
432
|
+
else item
|
|
433
|
+
)
|
|
434
|
+
for j, item in enumerate(content)
|
|
435
|
+
]
|
|
436
|
+
|
|
437
|
+
content = json.dumps(content)
|
|
438
|
+
_set_span_attribute(span, f"{prefix}.content", content)
|
|
439
|
+
if msg.get("tool_call_id"):
|
|
440
|
+
_set_span_attribute(span, f"{prefix}.tool_call_id", msg.get("tool_call_id"))
|
|
441
|
+
tool_calls = msg.get("tool_calls")
|
|
442
|
+
if tool_calls:
|
|
443
|
+
for i, tool_call in enumerate(tool_calls):
|
|
444
|
+
if is_openai_v1():
|
|
445
|
+
tool_call = model_as_dict(tool_call)
|
|
446
|
+
|
|
447
|
+
function = tool_call.get("function")
|
|
448
|
+
_set_span_attribute(
|
|
449
|
+
span,
|
|
450
|
+
f"{prefix}.tool_calls.{i}.id",
|
|
451
|
+
tool_call.get("id"),
|
|
452
|
+
)
|
|
453
|
+
_set_span_attribute(
|
|
454
|
+
span,
|
|
455
|
+
f"{prefix}.tool_calls.{i}.name",
|
|
456
|
+
function.get("name"),
|
|
457
|
+
)
|
|
458
|
+
_set_span_attribute(
|
|
459
|
+
span,
|
|
460
|
+
f"{prefix}.tool_calls.{i}.arguments",
|
|
461
|
+
function.get("arguments"),
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _set_completions(span, choices):
|
|
466
|
+
if choices is None:
|
|
467
|
+
return
|
|
468
|
+
|
|
469
|
+
for choice in choices:
|
|
470
|
+
index = choice.get("index")
|
|
471
|
+
prefix = f"{SpanAttributes.LLM_COMPLETIONS}.{index}"
|
|
472
|
+
_set_span_attribute(
|
|
473
|
+
span, f"{prefix}.finish_reason", choice.get("finish_reason")
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
if choice.get("content_filter_results"):
|
|
477
|
+
_set_span_attribute(
|
|
478
|
+
span,
|
|
479
|
+
f"{prefix}.{CONTENT_FILTER_KEY}",
|
|
480
|
+
json.dumps(choice.get("content_filter_results")),
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
if choice.get("finish_reason") == "content_filter":
|
|
484
|
+
_set_span_attribute(span, f"{prefix}.role", "assistant")
|
|
485
|
+
_set_span_attribute(span, f"{prefix}.content", "FILTERED")
|
|
486
|
+
|
|
487
|
+
return
|
|
488
|
+
|
|
489
|
+
message = choice.get("message")
|
|
490
|
+
if not message:
|
|
491
|
+
return
|
|
492
|
+
|
|
493
|
+
_set_span_attribute(span, f"{prefix}.role", message.get("role"))
|
|
494
|
+
|
|
495
|
+
if message.get("refusal"):
|
|
496
|
+
_set_span_attribute(span, f"{prefix}.refusal", message.get("refusal"))
|
|
497
|
+
else:
|
|
498
|
+
_set_span_attribute(span, f"{prefix}.content", message.get("content"))
|
|
499
|
+
|
|
500
|
+
function_call = message.get("function_call")
|
|
501
|
+
if function_call:
|
|
502
|
+
_set_span_attribute(
|
|
503
|
+
span, f"{prefix}.tool_calls.0.name", function_call.get("name")
|
|
504
|
+
)
|
|
505
|
+
_set_span_attribute(
|
|
506
|
+
span,
|
|
507
|
+
f"{prefix}.tool_calls.0.arguments",
|
|
508
|
+
function_call.get("arguments"),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
tool_calls = message.get("tool_calls")
|
|
512
|
+
if tool_calls:
|
|
513
|
+
for i, tool_call in enumerate(tool_calls):
|
|
514
|
+
function = tool_call.get("function")
|
|
515
|
+
_set_span_attribute(
|
|
516
|
+
span,
|
|
517
|
+
f"{prefix}.tool_calls.{i}.id",
|
|
518
|
+
tool_call.get("id"),
|
|
519
|
+
)
|
|
520
|
+
_set_span_attribute(
|
|
521
|
+
span,
|
|
522
|
+
f"{prefix}.tool_calls.{i}.name",
|
|
523
|
+
function.get("name"),
|
|
524
|
+
)
|
|
525
|
+
_set_span_attribute(
|
|
526
|
+
span,
|
|
527
|
+
f"{prefix}.tool_calls.{i}.arguments",
|
|
528
|
+
function.get("arguments"),
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@dont_throw
|
|
533
|
+
def _set_streaming_token_metrics(
|
|
534
|
+
request_kwargs, complete_response, span, token_counter, shared_attributes
|
|
535
|
+
):
|
|
536
|
+
if not should_record_stream_token_usage():
|
|
537
|
+
return
|
|
538
|
+
|
|
539
|
+
prompt_usage = -1
|
|
540
|
+
completion_usage = -1
|
|
541
|
+
|
|
542
|
+
# prompt_usage
|
|
543
|
+
if request_kwargs and request_kwargs.get("messages"):
|
|
544
|
+
prompt_content = ""
|
|
545
|
+
# setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
|
|
546
|
+
# is used by most of the other model.
|
|
547
|
+
model_name = (
|
|
548
|
+
complete_response.get("model") or request_kwargs.get("model") or "gpt-4"
|
|
549
|
+
)
|
|
550
|
+
for msg in request_kwargs.get("messages"):
|
|
551
|
+
if msg.get("content"):
|
|
552
|
+
prompt_content += msg.get("content")
|
|
553
|
+
if model_name:
|
|
554
|
+
prompt_usage = get_token_count_from_string(prompt_content, model_name)
|
|
555
|
+
|
|
556
|
+
# completion_usage
|
|
557
|
+
if complete_response.get("choices"):
|
|
558
|
+
completion_content = ""
|
|
559
|
+
# setting the default model_name as gpt-4. As this uses the embedding "cl100k_base" that
|
|
560
|
+
# is used by most of the other model.
|
|
561
|
+
model_name = complete_response.get("model") or "gpt-4"
|
|
562
|
+
|
|
563
|
+
for choice in complete_response.get("choices"):
|
|
564
|
+
if choice.get("message") and choice.get("message").get("content"):
|
|
565
|
+
completion_content += choice["message"]["content"]
|
|
566
|
+
|
|
567
|
+
if model_name:
|
|
568
|
+
completion_usage = get_token_count_from_string(
|
|
569
|
+
completion_content, model_name
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# span record
|
|
573
|
+
_set_span_stream_usage(span, prompt_usage, completion_usage)
|
|
574
|
+
|
|
575
|
+
# metrics record
|
|
576
|
+
if token_counter:
|
|
577
|
+
if isinstance(prompt_usage, int) and prompt_usage >= 0:
|
|
578
|
+
attributes_with_token_type = {
|
|
579
|
+
**shared_attributes,
|
|
580
|
+
SpanAttributes.LLM_TOKEN_TYPE: "input",
|
|
581
|
+
}
|
|
582
|
+
token_counter.record(prompt_usage, attributes=attributes_with_token_type)
|
|
583
|
+
|
|
584
|
+
if isinstance(completion_usage, int) and completion_usage >= 0:
|
|
585
|
+
attributes_with_token_type = {
|
|
586
|
+
**shared_attributes,
|
|
587
|
+
SpanAttributes.LLM_TOKEN_TYPE: "output",
|
|
588
|
+
}
|
|
589
|
+
token_counter.record(
|
|
590
|
+
completion_usage, attributes=attributes_with_token_type
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
class ChatStream(ObjectProxy):
|
|
595
|
+
_span = None
|
|
596
|
+
_instance = None
|
|
597
|
+
_token_counter = None
|
|
598
|
+
_choice_counter = None
|
|
599
|
+
_duration_histogram = None
|
|
600
|
+
_streaming_time_to_first_token = None
|
|
601
|
+
_streaming_time_to_generate = None
|
|
602
|
+
_start_time = None
|
|
603
|
+
_request_kwargs = None
|
|
604
|
+
|
|
605
|
+
def __init__(
|
|
606
|
+
self,
|
|
607
|
+
span,
|
|
608
|
+
response,
|
|
609
|
+
instance=None,
|
|
610
|
+
token_counter=None,
|
|
611
|
+
choice_counter=None,
|
|
612
|
+
duration_histogram=None,
|
|
613
|
+
streaming_time_to_first_token=None,
|
|
614
|
+
streaming_time_to_generate=None,
|
|
615
|
+
start_time=None,
|
|
616
|
+
request_kwargs=None,
|
|
617
|
+
):
|
|
618
|
+
super().__init__(response)
|
|
619
|
+
|
|
620
|
+
self._span = span
|
|
621
|
+
self._instance = instance
|
|
622
|
+
self._token_counter = token_counter
|
|
623
|
+
self._choice_counter = choice_counter
|
|
624
|
+
self._duration_histogram = duration_histogram
|
|
625
|
+
self._streaming_time_to_first_token = streaming_time_to_first_token
|
|
626
|
+
self._streaming_time_to_generate = streaming_time_to_generate
|
|
627
|
+
self._start_time = start_time
|
|
628
|
+
self._request_kwargs = request_kwargs
|
|
629
|
+
|
|
630
|
+
self._first_token = True
|
|
631
|
+
# will be updated when first token is received
|
|
632
|
+
self._time_of_first_token = self._start_time
|
|
633
|
+
self._complete_response = {
|
|
634
|
+
"choices": [],
|
|
635
|
+
"model": "",
|
|
636
|
+
"id": "",
|
|
637
|
+
"service_tier": None,
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
# Cleanup state tracking to prevent duplicate operations
|
|
641
|
+
self._cleanup_completed = False
|
|
642
|
+
self._cleanup_lock = threading.Lock()
|
|
643
|
+
|
|
644
|
+
def __del__(self):
|
|
645
|
+
"""Cleanup when object is garbage collected"""
|
|
646
|
+
if hasattr(self, "_cleanup_completed") and not self._cleanup_completed:
|
|
647
|
+
self._ensure_cleanup()
|
|
648
|
+
|
|
649
|
+
def __enter__(self):
|
|
650
|
+
return self
|
|
651
|
+
|
|
652
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
653
|
+
cleanup_exception = None
|
|
654
|
+
try:
|
|
655
|
+
self._ensure_cleanup()
|
|
656
|
+
except Exception as e:
|
|
657
|
+
cleanup_exception = e
|
|
658
|
+
# Don't re-raise to avoid masking original exception
|
|
659
|
+
|
|
660
|
+
result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
|
|
661
|
+
|
|
662
|
+
if cleanup_exception:
|
|
663
|
+
# Log cleanup exception but don't affect context manager behavior
|
|
664
|
+
logger.debug(
|
|
665
|
+
"Error during ChatStream cleanup in __exit__: %s", cleanup_exception
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
return result
|
|
669
|
+
|
|
670
|
+
async def __aenter__(self):
|
|
671
|
+
return self
|
|
672
|
+
|
|
673
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
674
|
+
await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb)
|
|
675
|
+
|
|
676
|
+
def __iter__(self):
|
|
677
|
+
return self
|
|
678
|
+
|
|
679
|
+
def __aiter__(self):
|
|
680
|
+
return self
|
|
681
|
+
|
|
682
|
+
def __next__(self):
|
|
683
|
+
try:
|
|
684
|
+
chunk = self.__wrapped__.__next__()
|
|
685
|
+
except Exception as e:
|
|
686
|
+
if isinstance(e, StopIteration):
|
|
687
|
+
self._process_complete_response()
|
|
688
|
+
else:
|
|
689
|
+
# Handle cleanup for other exceptions during stream iteration
|
|
690
|
+
self._ensure_cleanup()
|
|
691
|
+
if self._span and self._span.is_recording():
|
|
692
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
693
|
+
raise
|
|
694
|
+
else:
|
|
695
|
+
self._process_item(chunk)
|
|
696
|
+
return chunk
|
|
697
|
+
|
|
698
|
+
async def __anext__(self):
|
|
699
|
+
try:
|
|
700
|
+
chunk = await self.__wrapped__.__anext__()
|
|
701
|
+
except Exception as e:
|
|
702
|
+
if isinstance(e, StopAsyncIteration):
|
|
703
|
+
self._process_complete_response()
|
|
704
|
+
else:
|
|
705
|
+
# Handle cleanup for other exceptions during stream iteration
|
|
706
|
+
self._ensure_cleanup()
|
|
707
|
+
if self._span and self._span.is_recording():
|
|
708
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
709
|
+
raise
|
|
710
|
+
else:
|
|
711
|
+
self._process_item(chunk)
|
|
712
|
+
return chunk
|
|
713
|
+
|
|
714
|
+
def _process_item(self, item):
|
|
715
|
+
self._span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
|
|
716
|
+
self._complete_response["id"] = item.id if hasattr(item, "id") else ""
|
|
717
|
+
self._complete_response["service_tier"] = (
|
|
718
|
+
item.service_tier if hasattr(item, "service_tier") else ""
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
if self._first_token and self._streaming_time_to_first_token:
|
|
722
|
+
self._time_of_first_token = time.time()
|
|
723
|
+
self._streaming_time_to_first_token.record(
|
|
724
|
+
self._time_of_first_token - self._start_time,
|
|
725
|
+
attributes=self._shared_attributes(),
|
|
726
|
+
)
|
|
727
|
+
self._first_token = False
|
|
728
|
+
|
|
729
|
+
_accumulate_stream_items(item, self._complete_response)
|
|
730
|
+
|
|
731
|
+
def _shared_attributes(self):
|
|
732
|
+
return metric_shared_attributes(
|
|
733
|
+
response_model=self._complete_response.get("model")
|
|
734
|
+
or self._request_kwargs.get("model")
|
|
735
|
+
or None,
|
|
736
|
+
operation="chat",
|
|
737
|
+
server_address=_get_openai_base_url(self._instance),
|
|
738
|
+
is_streaming=True,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
@dont_throw
|
|
742
|
+
def _process_complete_response(self):
|
|
743
|
+
_set_streaming_token_metrics(
|
|
744
|
+
self._request_kwargs,
|
|
745
|
+
self._complete_response,
|
|
746
|
+
self._span,
|
|
747
|
+
self._token_counter,
|
|
748
|
+
self._shared_attributes(),
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
# choice metrics
|
|
752
|
+
if self._choice_counter and self._complete_response.get("choices"):
|
|
753
|
+
_set_choice_counter_metrics(
|
|
754
|
+
self._choice_counter,
|
|
755
|
+
self._complete_response.get("choices"),
|
|
756
|
+
self._shared_attributes(),
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
# duration metrics
|
|
760
|
+
if self._start_time and isinstance(self._start_time, (float, int)):
|
|
761
|
+
duration = time.time() - self._start_time
|
|
762
|
+
else:
|
|
763
|
+
duration = None
|
|
764
|
+
if duration and isinstance(duration, (float, int)) and self._duration_histogram:
|
|
765
|
+
self._duration_histogram.record(
|
|
766
|
+
duration, attributes=self._shared_attributes()
|
|
767
|
+
)
|
|
768
|
+
if self._streaming_time_to_generate and self._time_of_first_token:
|
|
769
|
+
self._streaming_time_to_generate.record(
|
|
770
|
+
time.time() - self._time_of_first_token,
|
|
771
|
+
attributes=self._shared_attributes(),
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
_set_response_attributes(self._span, self._complete_response)
|
|
775
|
+
if should_emit_events():
|
|
776
|
+
for choice in self._complete_response.get("choices", []):
|
|
777
|
+
emit_event(_parse_choice_event(choice))
|
|
778
|
+
else:
|
|
779
|
+
if should_send_prompts():
|
|
780
|
+
_set_completions(self._span, self._complete_response.get("choices"))
|
|
781
|
+
|
|
782
|
+
self._span.set_status(Status(StatusCode.OK))
|
|
783
|
+
self._span.end()
|
|
784
|
+
self._cleanup_completed = True
|
|
785
|
+
|
|
786
|
+
@dont_throw
|
|
787
|
+
def _ensure_cleanup(self):
|
|
788
|
+
"""Thread-safe cleanup method that handles different cleanup scenarios"""
|
|
789
|
+
with self._cleanup_lock:
|
|
790
|
+
if self._cleanup_completed:
|
|
791
|
+
logger.debug("ChatStream cleanup already completed, skipping")
|
|
792
|
+
return
|
|
793
|
+
|
|
794
|
+
try:
|
|
795
|
+
logger.debug("Starting ChatStream cleanup")
|
|
796
|
+
|
|
797
|
+
# Set span status and close it
|
|
798
|
+
if self._span and self._span.is_recording():
|
|
799
|
+
self._span.set_status(Status(StatusCode.OK))
|
|
800
|
+
self._span.end()
|
|
801
|
+
logger.debug("ChatStream span closed successfully")
|
|
802
|
+
|
|
803
|
+
# Calculate partial metrics based on available data
|
|
804
|
+
self._record_partial_metrics()
|
|
805
|
+
|
|
806
|
+
self._cleanup_completed = True
|
|
807
|
+
logger.debug("ChatStream cleanup completed successfully")
|
|
808
|
+
|
|
809
|
+
except Exception as e:
|
|
810
|
+
# Log cleanup errors but don't propagate to avoid masking original issues
|
|
811
|
+
logger.debug("Error during ChatStream cleanup: %s", str(e))
|
|
812
|
+
|
|
813
|
+
# Still try to close the span even if metrics recording failed
|
|
814
|
+
try:
|
|
815
|
+
if self._span and self._span.is_recording():
|
|
816
|
+
self._span.set_status(
|
|
817
|
+
Status(StatusCode.ERROR, "Cleanup failed")
|
|
818
|
+
)
|
|
819
|
+
self._span.end()
|
|
820
|
+
self._cleanup_completed = True
|
|
821
|
+
except Exception:
|
|
822
|
+
# Final fallback - just mark as completed to prevent infinite loops
|
|
823
|
+
self._cleanup_completed = True
|
|
824
|
+
|
|
825
|
+
@dont_throw
|
|
826
|
+
def _record_partial_metrics(self):
|
|
827
|
+
"""Record metrics based on available partial data"""
|
|
828
|
+
# Always record duration if we have start time
|
|
829
|
+
if (
|
|
830
|
+
self._start_time
|
|
831
|
+
and isinstance(self._start_time, (float, int))
|
|
832
|
+
and self._duration_histogram
|
|
833
|
+
):
|
|
834
|
+
duration = time.time() - self._start_time
|
|
835
|
+
self._duration_histogram.record(
|
|
836
|
+
duration, attributes=self._shared_attributes()
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
# Record basic span attributes even without complete response
|
|
840
|
+
if self._span and self._span.is_recording():
|
|
841
|
+
_set_response_attributes(self._span, self._complete_response)
|
|
842
|
+
|
|
843
|
+
# Record partial token metrics if we have any data
|
|
844
|
+
if self._complete_response.get("choices") or self._request_kwargs:
|
|
845
|
+
_set_streaming_token_metrics(
|
|
846
|
+
self._request_kwargs,
|
|
847
|
+
self._complete_response,
|
|
848
|
+
self._span,
|
|
849
|
+
self._token_counter,
|
|
850
|
+
self._shared_attributes(),
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
# Record choice metrics if we have any choices processed
|
|
854
|
+
if self._choice_counter and self._complete_response.get("choices"):
|
|
855
|
+
_set_choice_counter_metrics(
|
|
856
|
+
self._choice_counter,
|
|
857
|
+
self._complete_response.get("choices"),
|
|
858
|
+
self._shared_attributes(),
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
# Backward compatibility with OpenAI v0
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
@dont_throw
|
|
866
|
+
def _build_from_streaming_response(
|
|
867
|
+
span,
|
|
868
|
+
response,
|
|
869
|
+
instance=None,
|
|
870
|
+
token_counter=None,
|
|
871
|
+
choice_counter=None,
|
|
872
|
+
duration_histogram=None,
|
|
873
|
+
streaming_time_to_first_token=None,
|
|
874
|
+
streaming_time_to_generate=None,
|
|
875
|
+
start_time=None,
|
|
876
|
+
request_kwargs=None,
|
|
877
|
+
):
|
|
878
|
+
complete_response = {"choices": [], "model": "", "id": "", "service_tier": None}
|
|
879
|
+
|
|
880
|
+
first_token = True
|
|
881
|
+
time_of_first_token = start_time # will be updated when first token is received
|
|
882
|
+
|
|
883
|
+
for item in response:
|
|
884
|
+
span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
|
|
885
|
+
|
|
886
|
+
item_to_yield = item
|
|
887
|
+
|
|
888
|
+
if first_token and streaming_time_to_first_token:
|
|
889
|
+
time_of_first_token = time.time()
|
|
890
|
+
streaming_time_to_first_token.record(time_of_first_token - start_time)
|
|
891
|
+
first_token = False
|
|
892
|
+
|
|
893
|
+
_accumulate_stream_items(item, complete_response)
|
|
894
|
+
|
|
895
|
+
yield item_to_yield
|
|
896
|
+
|
|
897
|
+
shared_attributes = {
|
|
898
|
+
SpanAttributes.LLM_RESPONSE_MODEL: complete_response.get("model") or None,
|
|
899
|
+
"server.address": _get_openai_base_url(instance),
|
|
900
|
+
"stream": True,
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
_set_streaming_token_metrics(
|
|
904
|
+
request_kwargs, complete_response, span, token_counter, shared_attributes
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
# choice metrics
|
|
908
|
+
if choice_counter and complete_response.get("choices"):
|
|
909
|
+
_set_choice_counter_metrics(
|
|
910
|
+
choice_counter, complete_response.get("choices"), shared_attributes
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
# duration metrics
|
|
914
|
+
if start_time and isinstance(start_time, (float, int)):
|
|
915
|
+
duration = time.time() - start_time
|
|
916
|
+
else:
|
|
917
|
+
duration = None
|
|
918
|
+
if duration and isinstance(duration, (float, int)) and duration_histogram:
|
|
919
|
+
duration_histogram.record(duration, attributes=shared_attributes)
|
|
920
|
+
if streaming_time_to_generate and time_of_first_token:
|
|
921
|
+
streaming_time_to_generate.record(time.time() - time_of_first_token)
|
|
922
|
+
|
|
923
|
+
_set_response_attributes(span, complete_response)
|
|
924
|
+
if should_emit_events():
|
|
925
|
+
for choice in complete_response.get("choices", []):
|
|
926
|
+
emit_event(_parse_choice_event(choice))
|
|
927
|
+
else:
|
|
928
|
+
if should_send_prompts():
|
|
929
|
+
_set_completions(span, complete_response.get("choices"))
|
|
930
|
+
|
|
931
|
+
span.set_status(Status(StatusCode.OK))
|
|
932
|
+
span.end()
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
@dont_throw
|
|
936
|
+
async def _abuild_from_streaming_response(
|
|
937
|
+
span,
|
|
938
|
+
response,
|
|
939
|
+
instance=None,
|
|
940
|
+
token_counter=None,
|
|
941
|
+
choice_counter=None,
|
|
942
|
+
duration_histogram=None,
|
|
943
|
+
streaming_time_to_first_token=None,
|
|
944
|
+
streaming_time_to_generate=None,
|
|
945
|
+
start_time=None,
|
|
946
|
+
request_kwargs=None,
|
|
947
|
+
):
|
|
948
|
+
complete_response = {"choices": [], "model": "", "id": "", "service_tier": None}
|
|
949
|
+
|
|
950
|
+
first_token = True
|
|
951
|
+
time_of_first_token = start_time # will be updated when first token is received
|
|
952
|
+
|
|
953
|
+
async for item in response:
|
|
954
|
+
span.add_event(name=f"{SpanAttributes.LLM_CONTENT_COMPLETION_CHUNK}")
|
|
955
|
+
|
|
956
|
+
item_to_yield = item
|
|
957
|
+
|
|
958
|
+
if first_token and streaming_time_to_first_token:
|
|
959
|
+
time_of_first_token = time.time()
|
|
960
|
+
streaming_time_to_first_token.record(time_of_first_token - start_time)
|
|
961
|
+
first_token = False
|
|
962
|
+
|
|
963
|
+
_accumulate_stream_items(item, complete_response)
|
|
964
|
+
|
|
965
|
+
yield item_to_yield
|
|
966
|
+
|
|
967
|
+
shared_attributes = {
|
|
968
|
+
SpanAttributes.LLM_RESPONSE_MODEL: complete_response.get("model") or None,
|
|
969
|
+
"server.address": _get_openai_base_url(instance),
|
|
970
|
+
"stream": True,
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
_set_streaming_token_metrics(
|
|
974
|
+
request_kwargs, complete_response, span, token_counter, shared_attributes
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
# choice metrics
|
|
978
|
+
if choice_counter and complete_response.get("choices"):
|
|
979
|
+
_set_choice_counter_metrics(
|
|
980
|
+
choice_counter, complete_response.get("choices"), shared_attributes
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
# duration metrics
|
|
984
|
+
if start_time and isinstance(start_time, (float, int)):
|
|
985
|
+
duration = time.time() - start_time
|
|
986
|
+
else:
|
|
987
|
+
duration = None
|
|
988
|
+
if duration and isinstance(duration, (float, int)) and duration_histogram:
|
|
989
|
+
duration_histogram.record(duration, attributes=shared_attributes)
|
|
990
|
+
if streaming_time_to_generate and time_of_first_token:
|
|
991
|
+
streaming_time_to_generate.record(time.time() - time_of_first_token)
|
|
992
|
+
|
|
993
|
+
_set_response_attributes(span, complete_response)
|
|
994
|
+
if should_emit_events():
|
|
995
|
+
for choice in complete_response.get("choices", []):
|
|
996
|
+
emit_event(_parse_choice_event(choice))
|
|
997
|
+
else:
|
|
998
|
+
if should_send_prompts():
|
|
999
|
+
_set_completions(span, complete_response.get("choices"))
|
|
1000
|
+
|
|
1001
|
+
span.set_status(Status(StatusCode.OK))
|
|
1002
|
+
span.end()
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
def _parse_tool_calls(
|
|
1006
|
+
tool_calls: Optional[List[Union[dict, pydantic.BaseModel]]],
|
|
1007
|
+
) -> Union[List[ToolCall], None]:
|
|
1008
|
+
"""
|
|
1009
|
+
Util to correctly parse the tool calls data from the OpenAI API to this module's
|
|
1010
|
+
standard `ToolCall`.
|
|
1011
|
+
"""
|
|
1012
|
+
if tool_calls is None:
|
|
1013
|
+
return tool_calls
|
|
1014
|
+
|
|
1015
|
+
result = []
|
|
1016
|
+
|
|
1017
|
+
for tool_call in tool_calls:
|
|
1018
|
+
tool_call_data = None
|
|
1019
|
+
|
|
1020
|
+
# Handle dict or ChatCompletionMessageToolCall
|
|
1021
|
+
if isinstance(tool_call, dict):
|
|
1022
|
+
tool_call_data = copy.deepcopy(tool_call)
|
|
1023
|
+
elif _is_tool_call_model(tool_call):
|
|
1024
|
+
tool_call_data = tool_call.model_dump()
|
|
1025
|
+
elif _is_function_call(tool_call):
|
|
1026
|
+
function_call = tool_call.model_dump()
|
|
1027
|
+
tool_call_data = ToolCall(
|
|
1028
|
+
id="",
|
|
1029
|
+
function={
|
|
1030
|
+
"name": function_call.get("name"),
|
|
1031
|
+
"arguments": function_call.get("arguments"),
|
|
1032
|
+
},
|
|
1033
|
+
type="function",
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
result.append(tool_call_data)
|
|
1037
|
+
return result
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
def _is_tool_call_model(tool_call):
|
|
1041
|
+
try:
|
|
1042
|
+
from openai.types.chat.chat_completion_message_tool_call import (
|
|
1043
|
+
ChatCompletionMessageFunctionToolCall,
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
return isinstance(tool_call, ChatCompletionMessageFunctionToolCall)
|
|
1047
|
+
except Exception:
|
|
1048
|
+
return False
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
def _is_function_call(model: Union[dict, pydantic.BaseModel]) -> bool:
|
|
1052
|
+
try:
|
|
1053
|
+
from openai.types.chat.chat_completion_message import FunctionCall
|
|
1054
|
+
|
|
1055
|
+
return isinstance(model, FunctionCall)
|
|
1056
|
+
except Exception:
|
|
1057
|
+
return False
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
@singledispatch
|
|
1061
|
+
def _parse_choice_event(choice) -> ChoiceEvent:
|
|
1062
|
+
has_message = choice.message is not None
|
|
1063
|
+
has_finish_reason = choice.finish_reason is not None
|
|
1064
|
+
has_tool_calls = has_message and choice.message.tool_calls
|
|
1065
|
+
has_function_call = has_message and choice.message.function_call
|
|
1066
|
+
|
|
1067
|
+
content = choice.message.content if has_message else None
|
|
1068
|
+
role = choice.message.role if has_message else "unknown"
|
|
1069
|
+
finish_reason = choice.finish_reason if has_finish_reason else "unknown"
|
|
1070
|
+
|
|
1071
|
+
if has_tool_calls and has_function_call:
|
|
1072
|
+
tool_calls = choice.message.tool_calls + [choice.message.function_call]
|
|
1073
|
+
elif has_tool_calls:
|
|
1074
|
+
tool_calls = choice.message.tool_calls
|
|
1075
|
+
elif has_function_call:
|
|
1076
|
+
tool_calls = [choice.message.function_call]
|
|
1077
|
+
else:
|
|
1078
|
+
tool_calls = None
|
|
1079
|
+
|
|
1080
|
+
return ChoiceEvent(
|
|
1081
|
+
index=choice.index,
|
|
1082
|
+
message={"content": content, "role": role},
|
|
1083
|
+
finish_reason=finish_reason,
|
|
1084
|
+
tool_calls=_parse_tool_calls(tool_calls),
|
|
1085
|
+
)
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
@_parse_choice_event.register
|
|
1089
|
+
def _(choice: dict) -> ChoiceEvent:
|
|
1090
|
+
message = choice.get("message")
|
|
1091
|
+
has_message = message is not None
|
|
1092
|
+
has_finish_reason = choice.get("finish_reason") is not None
|
|
1093
|
+
has_tool_calls = has_message and message.get("tool_calls")
|
|
1094
|
+
has_function_call = has_message and message.get("function_call")
|
|
1095
|
+
|
|
1096
|
+
content = choice.get("message").get("content", "") if has_message else None
|
|
1097
|
+
role = choice.get("message").get("role") if has_message else "unknown"
|
|
1098
|
+
finish_reason = choice.get("finish_reason") if has_finish_reason else "unknown"
|
|
1099
|
+
|
|
1100
|
+
if has_tool_calls and has_function_call:
|
|
1101
|
+
tool_calls = message.get("tool_calls") + [message.get("function_call")]
|
|
1102
|
+
elif has_tool_calls:
|
|
1103
|
+
tool_calls = message.get("tool_calls")
|
|
1104
|
+
elif has_function_call:
|
|
1105
|
+
tool_calls = [message.get("function_call")]
|
|
1106
|
+
else:
|
|
1107
|
+
tool_calls = None
|
|
1108
|
+
|
|
1109
|
+
if tool_calls is not None:
|
|
1110
|
+
for tool_call in tool_calls:
|
|
1111
|
+
tool_call["type"] = "function"
|
|
1112
|
+
|
|
1113
|
+
return ChoiceEvent(
|
|
1114
|
+
index=choice.get("index"),
|
|
1115
|
+
message={"content": content, "role": role},
|
|
1116
|
+
finish_reason=finish_reason,
|
|
1117
|
+
tool_calls=tool_calls,
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
def _accumulate_stream_items(item, complete_response):
|
|
1122
|
+
if is_openai_v1():
|
|
1123
|
+
item = model_as_dict(item)
|
|
1124
|
+
|
|
1125
|
+
complete_response["model"] = item.get("model")
|
|
1126
|
+
complete_response["id"] = item.get("id")
|
|
1127
|
+
complete_response["service_tier"] = item.get("service_tier")
|
|
1128
|
+
|
|
1129
|
+
# capture usage information from the last stream chunks
|
|
1130
|
+
if item.get("usage"):
|
|
1131
|
+
complete_response["usage"] = item.get("usage")
|
|
1132
|
+
elif item.get("choices") and item["choices"][0].get("usage"):
|
|
1133
|
+
# Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
|
|
1134
|
+
complete_response["usage"] = item["choices"][0].get("usage")
|
|
1135
|
+
|
|
1136
|
+
# prompt filter results
|
|
1137
|
+
if item.get("prompt_filter_results"):
|
|
1138
|
+
complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
|
|
1139
|
+
|
|
1140
|
+
for choice in item.get("choices"):
|
|
1141
|
+
index = choice.get("index")
|
|
1142
|
+
if len(complete_response.get("choices")) <= index:
|
|
1143
|
+
complete_response["choices"].append(
|
|
1144
|
+
{"index": index, "message": {"content": "", "role": ""}}
|
|
1145
|
+
)
|
|
1146
|
+
complete_choice = complete_response.get("choices")[index]
|
|
1147
|
+
if choice.get("finish_reason"):
|
|
1148
|
+
complete_choice["finish_reason"] = choice.get("finish_reason")
|
|
1149
|
+
if choice.get("content_filter_results"):
|
|
1150
|
+
complete_choice["content_filter_results"] = choice.get(
|
|
1151
|
+
"content_filter_results"
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
delta = choice.get("delta")
|
|
1155
|
+
|
|
1156
|
+
if delta and delta.get("content"):
|
|
1157
|
+
complete_choice["message"]["content"] += delta.get("content")
|
|
1158
|
+
|
|
1159
|
+
if delta and delta.get("role"):
|
|
1160
|
+
complete_choice["message"]["role"] = delta.get("role")
|
|
1161
|
+
if delta and delta.get("tool_calls"):
|
|
1162
|
+
tool_calls = delta.get("tool_calls")
|
|
1163
|
+
if not isinstance(tool_calls, list) or len(tool_calls) == 0:
|
|
1164
|
+
continue
|
|
1165
|
+
|
|
1166
|
+
if not complete_choice["message"].get("tool_calls"):
|
|
1167
|
+
complete_choice["message"]["tool_calls"] = []
|
|
1168
|
+
|
|
1169
|
+
for tool_call in tool_calls:
|
|
1170
|
+
i = int(tool_call["index"])
|
|
1171
|
+
if len(complete_choice["message"]["tool_calls"]) <= i:
|
|
1172
|
+
complete_choice["message"]["tool_calls"].append(
|
|
1173
|
+
{"id": "", "function": {"name": "", "arguments": ""}}
|
|
1174
|
+
)
|
|
1175
|
+
|
|
1176
|
+
span_tool_call = complete_choice["message"]["tool_calls"][i]
|
|
1177
|
+
span_function = span_tool_call["function"]
|
|
1178
|
+
tool_call_function = tool_call.get("function")
|
|
1179
|
+
|
|
1180
|
+
if tool_call.get("id"):
|
|
1181
|
+
span_tool_call["id"] = tool_call.get("id")
|
|
1182
|
+
if tool_call_function and tool_call_function.get("name"):
|
|
1183
|
+
span_function["name"] = tool_call_function.get("name")
|
|
1184
|
+
if tool_call_function and tool_call_function.get("arguments"):
|
|
1185
|
+
span_function["arguments"] += tool_call_function.get("arguments")
|