lmnr 0.6.21__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +0 -4
- lmnr/opentelemetry_lib/decorators/__init__.py +81 -32
- lmnr/opentelemetry_lib/litellm/__init__.py +5 -2
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +6 -2
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +11 -2
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +3 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +6 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +141 -9
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +10 -2
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +6 -2
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +8 -2
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +4 -1
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +20 -4
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +190 -0
- lmnr/opentelemetry_lib/tracing/__init__.py +89 -1
- lmnr/opentelemetry_lib/tracing/context.py +126 -0
- lmnr/opentelemetry_lib/tracing/processor.py +5 -6
- lmnr/opentelemetry_lib/tracing/tracer.py +29 -0
- lmnr/sdk/browser/browser_use_otel.py +5 -5
- lmnr/sdk/browser/patchright_otel.py +14 -0
- lmnr/sdk/browser/playwright_otel.py +32 -6
- lmnr/sdk/browser/pw_utils.py +119 -112
- lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
- lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
- lmnr/sdk/laminar.py +156 -186
- lmnr/sdk/types.py +17 -11
- lmnr/version.py +1 -1
- {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/METADATA +3 -2
- {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/RECORD +32 -31
- {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/WHEEL +1 -1
- lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
- lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
- {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/entry_points.txt +0 -0
lmnr/__init__.py
CHANGED
@@ -9,7 +9,6 @@ from .sdk.types import (
|
|
9
9
|
HumanEvaluator,
|
10
10
|
RunAgentResponseChunk,
|
11
11
|
StepChunkContent,
|
12
|
-
TracingLevel,
|
13
12
|
)
|
14
13
|
from .sdk.decorators import observe
|
15
14
|
from .sdk.types import LaminarSpanContext
|
@@ -18,7 +17,6 @@ from .opentelemetry_lib.tracing.attributes import Attributes
|
|
18
17
|
from .opentelemetry_lib.tracing.instruments import Instruments
|
19
18
|
from .opentelemetry_lib.tracing.processor import LaminarSpanProcessor
|
20
19
|
from .opentelemetry_lib.tracing.tracer import get_laminar_tracer_provider, get_tracer
|
21
|
-
from opentelemetry.trace import use_span
|
22
20
|
|
23
21
|
__all__ = [
|
24
22
|
"AgentOutput",
|
@@ -36,10 +34,8 @@ __all__ = [
|
|
36
34
|
"LaminarSpanProcessor",
|
37
35
|
"RunAgentResponseChunk",
|
38
36
|
"StepChunkContent",
|
39
|
-
"TracingLevel",
|
40
37
|
"get_laminar_tracer_provider",
|
41
38
|
"get_tracer",
|
42
39
|
"evaluate",
|
43
40
|
"observe",
|
44
|
-
"use_span",
|
45
41
|
]
|
@@ -5,13 +5,19 @@ import orjson
|
|
5
5
|
import types
|
6
6
|
from typing import Any, AsyncGenerator, Callable, Generator, Literal
|
7
7
|
|
8
|
-
from opentelemetry import trace
|
9
8
|
from opentelemetry import context as context_api
|
10
|
-
from opentelemetry.trace import Span
|
11
|
-
|
9
|
+
from opentelemetry.trace import Span, Status, StatusCode
|
10
|
+
|
11
|
+
from lmnr.opentelemetry_lib.tracing.context import (
|
12
|
+
CONTEXT_SESSION_ID_KEY,
|
13
|
+
CONTEXT_USER_ID_KEY,
|
14
|
+
attach_context,
|
15
|
+
detach_context,
|
16
|
+
get_event_attributes_from_context,
|
17
|
+
)
|
12
18
|
from lmnr.sdk.utils import get_input_from_func_args, is_method
|
13
19
|
from lmnr.opentelemetry_lib import MAX_MANUAL_SPAN_PAYLOAD_SIZE
|
14
|
-
from lmnr.opentelemetry_lib.tracing.tracer import
|
20
|
+
from lmnr.opentelemetry_lib.tracing.tracer import get_tracer_with_context
|
15
21
|
from lmnr.opentelemetry_lib.tracing.attributes import (
|
16
22
|
ASSOCIATION_PROPERTIES,
|
17
23
|
SPAN_INPUT,
|
@@ -37,6 +43,7 @@ def default_json(o):
|
|
37
43
|
try:
|
38
44
|
return str(o)
|
39
45
|
except Exception:
|
46
|
+
logger.debug("Failed to serialize data to JSON, inner type: %s", type(o))
|
40
47
|
pass
|
41
48
|
return DEFAULT_PLACEHOLDER
|
42
49
|
|
@@ -61,8 +68,13 @@ def _setup_span(
|
|
61
68
|
span_name: str, span_type: str, association_properties: dict[str, Any] | None
|
62
69
|
):
|
63
70
|
"""Set up a span with the given name, type, and association properties."""
|
64
|
-
with
|
65
|
-
span
|
71
|
+
with get_tracer_with_context() as (tracer, isolated_context):
|
72
|
+
# Create span in isolated context
|
73
|
+
span = tracer.start_span(
|
74
|
+
span_name,
|
75
|
+
context=isolated_context,
|
76
|
+
attributes={SPAN_TYPE: span_type},
|
77
|
+
)
|
66
78
|
|
67
79
|
if association_properties is not None:
|
68
80
|
for key, value in association_properties.items():
|
@@ -148,10 +160,10 @@ def _process_output(
|
|
148
160
|
pass
|
149
161
|
|
150
162
|
|
151
|
-
def _cleanup_span(span: Span,
|
163
|
+
def _cleanup_span(span: Span, wrapper: TracerWrapper):
|
152
164
|
"""Clean up span and context."""
|
153
165
|
span.end()
|
154
|
-
|
166
|
+
wrapper.pop_span_context()
|
155
167
|
|
156
168
|
|
157
169
|
def observe_base(
|
@@ -171,10 +183,25 @@ def observe_base(
|
|
171
183
|
return fn(*args, **kwargs)
|
172
184
|
|
173
185
|
span_name = name or fn.__name__
|
186
|
+
wrapper = TracerWrapper()
|
174
187
|
|
175
188
|
span = _setup_span(span_name, span_type, association_properties)
|
176
|
-
|
177
|
-
|
189
|
+
new_context = wrapper.push_span_context(span)
|
190
|
+
if session_id := association_properties.get("session_id"):
|
191
|
+
new_context = context_api.set_value(
|
192
|
+
CONTEXT_SESSION_ID_KEY, session_id, new_context
|
193
|
+
)
|
194
|
+
if user_id := association_properties.get("user_id"):
|
195
|
+
new_context = context_api.set_value(
|
196
|
+
CONTEXT_USER_ID_KEY, user_id, new_context
|
197
|
+
)
|
198
|
+
# Some auto-instrumentations are not under our control, so they
|
199
|
+
# don't have access to our isolated context. We attach the context
|
200
|
+
# to the OTEL global context, so that spans know their parent
|
201
|
+
# span and trace_id.
|
202
|
+
ctx_token = context_api.attach(new_context)
|
203
|
+
# update our isolated context too
|
204
|
+
isolated_ctx_token = attach_context(new_context)
|
178
205
|
|
179
206
|
_process_input(
|
180
207
|
span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
|
@@ -184,9 +211,12 @@ def observe_base(
|
|
184
211
|
res = fn(*args, **kwargs)
|
185
212
|
except Exception as e:
|
186
213
|
_process_exception(span, e)
|
187
|
-
_cleanup_span(span,
|
214
|
+
_cleanup_span(span, wrapper)
|
188
215
|
raise e
|
189
|
-
|
216
|
+
finally:
|
217
|
+
# Always restore global context
|
218
|
+
context_api.detach(ctx_token)
|
219
|
+
detach_context(isolated_ctx_token)
|
190
220
|
# span will be ended in the generator
|
191
221
|
if isinstance(res, types.GeneratorType):
|
192
222
|
return _handle_generator(span, ctx_token, res)
|
@@ -201,7 +231,7 @@ def observe_base(
|
|
201
231
|
return _ahandle_generator(span, ctx_token, res)
|
202
232
|
|
203
233
|
_process_output(span, res, ignore_output, output_formatter)
|
204
|
-
_cleanup_span(span,
|
234
|
+
_cleanup_span(span, wrapper)
|
205
235
|
return res
|
206
236
|
|
207
237
|
return wrap
|
@@ -227,10 +257,25 @@ def async_observe_base(
|
|
227
257
|
return await fn(*args, **kwargs)
|
228
258
|
|
229
259
|
span_name = name or fn.__name__
|
260
|
+
wrapper = TracerWrapper()
|
230
261
|
|
231
262
|
span = _setup_span(span_name, span_type, association_properties)
|
232
|
-
|
233
|
-
|
263
|
+
new_context = wrapper.push_span_context(span)
|
264
|
+
if session_id := association_properties.get("session_id"):
|
265
|
+
new_context = context_api.set_value(
|
266
|
+
CONTEXT_SESSION_ID_KEY, session_id, new_context
|
267
|
+
)
|
268
|
+
if user_id := association_properties.get("user_id"):
|
269
|
+
new_context = context_api.set_value(
|
270
|
+
CONTEXT_USER_ID_KEY, user_id, new_context
|
271
|
+
)
|
272
|
+
# Some auto-instrumentations are not under our control, so they
|
273
|
+
# don't have access to our isolated context. We attach the context
|
274
|
+
# to the OTEL global context, so that spans know their parent
|
275
|
+
# span and trace_id.
|
276
|
+
ctx_token = context_api.attach(new_context)
|
277
|
+
# update our isolated context too
|
278
|
+
isolated_ctx_token = attach_context(new_context)
|
234
279
|
|
235
280
|
_process_input(
|
236
281
|
span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
|
@@ -240,8 +285,12 @@ def async_observe_base(
|
|
240
285
|
res = await fn(*args, **kwargs)
|
241
286
|
except Exception as e:
|
242
287
|
_process_exception(span, e)
|
243
|
-
_cleanup_span(span,
|
288
|
+
_cleanup_span(span, wrapper)
|
244
289
|
raise e
|
290
|
+
finally:
|
291
|
+
# Always restore global context
|
292
|
+
context_api.detach(ctx_token)
|
293
|
+
detach_context(isolated_ctx_token)
|
245
294
|
|
246
295
|
# span will be ended in the generator
|
247
296
|
if isinstance(res, types.AsyncGeneratorType):
|
@@ -250,7 +299,7 @@ def async_observe_base(
|
|
250
299
|
return await _ahandle_generator(span, ctx_token, res)
|
251
300
|
|
252
301
|
_process_output(span, res, ignore_output, output_formatter)
|
253
|
-
_cleanup_span(span,
|
302
|
+
_cleanup_span(span, wrapper)
|
254
303
|
return res
|
255
304
|
|
256
305
|
return wrap
|
@@ -258,24 +307,24 @@ def async_observe_base(
|
|
258
307
|
return decorate
|
259
308
|
|
260
309
|
|
261
|
-
def _handle_generator(span: Span,
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
context_api.detach(ctx_token)
|
267
|
-
|
310
|
+
def _handle_generator(span: Span, wrapper: TracerWrapper, res: Generator):
|
311
|
+
try:
|
312
|
+
yield from res
|
313
|
+
finally:
|
314
|
+
_cleanup_span(span, wrapper)
|
268
315
|
|
269
|
-
async def _ahandle_generator(span: Span, ctx_token, res: AsyncGenerator[Any, Any]):
|
270
|
-
# async with contextlib.aclosing(res) as closing_gen:
|
271
|
-
async for part in res:
|
272
|
-
yield part
|
273
316
|
|
274
|
-
|
275
|
-
|
276
|
-
|
317
|
+
async def _ahandle_generator(span: Span, wrapper: TracerWrapper, res: AsyncGenerator):
|
318
|
+
try:
|
319
|
+
async for part in res:
|
320
|
+
yield part
|
321
|
+
finally:
|
322
|
+
_cleanup_span(span, wrapper)
|
277
323
|
|
278
324
|
|
279
325
|
def _process_exception(span: Span, e: Exception):
|
280
326
|
# Note that this `escaped` is sent as a StringValue("True"), not a boolean.
|
281
|
-
span.record_exception(
|
327
|
+
span.record_exception(
|
328
|
+
e, attributes=get_event_attributes_from_context(), escaped=True
|
329
|
+
)
|
330
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
@@ -7,6 +7,7 @@ from opentelemetry.trace import SpanKind, Status, StatusCode, Tracer
|
|
7
7
|
from lmnr.opentelemetry_lib.litellm.utils import model_as_dict, set_span_attribute
|
8
8
|
from lmnr.opentelemetry_lib.tracing import TracerWrapper
|
9
9
|
|
10
|
+
from lmnr.opentelemetry_lib.tracing.context import get_event_attributes_from_context
|
10
11
|
from lmnr.opentelemetry_lib.utils.package_check import is_package_installed
|
11
12
|
from lmnr.sdk.log import get_default_logger
|
12
13
|
|
@@ -141,10 +142,12 @@ try:
|
|
141
142
|
else:
|
142
143
|
span.set_status(Status(StatusCode.ERROR))
|
143
144
|
if isinstance(response_obj, Exception):
|
144
|
-
|
145
|
+
attributes = get_event_attributes_from_context()
|
146
|
+
span.record_exception(response_obj, attributes=attributes)
|
145
147
|
|
146
148
|
except Exception as e:
|
147
|
-
|
149
|
+
attributes = get_event_attributes_from_context()
|
150
|
+
span.record_exception(e, attributes=attributes)
|
148
151
|
logger.error(f"Error in Laminar LiteLLM instrumentation: {e}")
|
149
152
|
finally:
|
150
153
|
span.end(int(end_time.timestamp() * 1e9))
|
@@ -30,6 +30,8 @@ from .utils import (
|
|
30
30
|
should_emit_events,
|
31
31
|
)
|
32
32
|
from .version import __version__
|
33
|
+
|
34
|
+
from lmnr.opentelemetry_lib.tracing.context import get_current_context
|
33
35
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
34
36
|
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
|
35
37
|
from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
|
@@ -396,9 +398,10 @@ def _wrap(
|
|
396
398
|
name,
|
397
399
|
kind=SpanKind.CLIENT,
|
398
400
|
attributes={
|
399
|
-
SpanAttributes.LLM_SYSTEM: "
|
401
|
+
SpanAttributes.LLM_SYSTEM: "anthropic",
|
400
402
|
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
|
401
403
|
},
|
404
|
+
context=get_current_context(),
|
402
405
|
)
|
403
406
|
|
404
407
|
_handle_input(span, event_logger, kwargs)
|
@@ -493,9 +496,10 @@ async def _awrap(
|
|
493
496
|
name,
|
494
497
|
kind=SpanKind.CLIENT,
|
495
498
|
attributes={
|
496
|
-
SpanAttributes.LLM_SYSTEM: "
|
499
|
+
SpanAttributes.LLM_SYSTEM: "anthropic",
|
497
500
|
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
|
498
501
|
},
|
502
|
+
context=get_current_context(),
|
499
503
|
)
|
500
504
|
await _ahandle_input(span, event_logger, kwargs)
|
501
505
|
|
@@ -8,6 +8,11 @@ from typing import AsyncGenerator, Callable, Collection, Generator
|
|
8
8
|
|
9
9
|
from google.genai import types
|
10
10
|
|
11
|
+
from lmnr.opentelemetry_lib.tracing.context import (
|
12
|
+
get_current_context,
|
13
|
+
get_event_attributes_from_context,
|
14
|
+
)
|
15
|
+
|
11
16
|
from .config import (
|
12
17
|
Config,
|
13
18
|
)
|
@@ -474,6 +479,7 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
|
|
474
479
|
SpanAttributes.LLM_SYSTEM: "gemini",
|
475
480
|
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
|
476
481
|
},
|
482
|
+
context=get_current_context(),
|
477
483
|
)
|
478
484
|
|
479
485
|
if span.is_recording():
|
@@ -488,8 +494,9 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
|
|
488
494
|
span.end()
|
489
495
|
return response
|
490
496
|
except Exception as e:
|
497
|
+
attributes = get_event_attributes_from_context()
|
491
498
|
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
492
|
-
span.record_exception(e)
|
499
|
+
span.record_exception(e, attributes=attributes)
|
493
500
|
span.set_status(Status(StatusCode.ERROR, str(e)))
|
494
501
|
span.end()
|
495
502
|
raise e
|
@@ -509,6 +516,7 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
|
|
509
516
|
SpanAttributes.LLM_SYSTEM: "gemini",
|
510
517
|
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
|
511
518
|
},
|
519
|
+
context=get_current_context(),
|
512
520
|
)
|
513
521
|
|
514
522
|
if span.is_recording():
|
@@ -525,8 +533,9 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
|
|
525
533
|
span.end()
|
526
534
|
return response
|
527
535
|
except Exception as e:
|
536
|
+
attributes = get_event_attributes_from_context()
|
528
537
|
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
529
|
-
span.record_exception(e)
|
538
|
+
span.record_exception(e, attributes=attributes)
|
530
539
|
span.set_status(Status(StatusCode.ERROR, str(e)))
|
531
540
|
span.end()
|
532
541
|
raise e
|
@@ -27,6 +27,7 @@ from .utils import (
|
|
27
27
|
should_emit_events,
|
28
28
|
)
|
29
29
|
from .version import __version__
|
30
|
+
from lmnr.opentelemetry_lib.tracing.context import get_current_context
|
30
31
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
31
32
|
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
|
32
33
|
from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
|
@@ -245,6 +246,7 @@ def _wrap(
|
|
245
246
|
SpanAttributes.LLM_SYSTEM: "Groq",
|
246
247
|
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
|
247
248
|
},
|
249
|
+
context=get_current_context(),
|
248
250
|
)
|
249
251
|
|
250
252
|
_handle_input(span, kwargs, event_logger)
|
@@ -327,6 +329,7 @@ async def _awrap(
|
|
327
329
|
SpanAttributes.LLM_SYSTEM: "Groq",
|
328
330
|
SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
|
329
331
|
},
|
332
|
+
context=get_current_context(),
|
330
333
|
)
|
331
334
|
|
332
335
|
_handle_input(span, kwargs, event_logger)
|
@@ -12,10 +12,7 @@ from langchain_core.runnables.graph import Graph
|
|
12
12
|
from opentelemetry.trace import Tracer
|
13
13
|
from wrapt import wrap_function_wrapper
|
14
14
|
from opentelemetry.trace import get_tracer
|
15
|
-
|
16
|
-
from lmnr.opentelemetry_lib.tracing.context_properties import (
|
17
|
-
update_association_properties,
|
18
|
-
)
|
15
|
+
from opentelemetry.context import get_value, attach, set_value
|
19
16
|
|
20
17
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
21
18
|
from opentelemetry.instrumentation.utils import unwrap
|
@@ -45,12 +42,13 @@ def wrap_pregel_stream(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs)
|
|
45
42
|
}
|
46
43
|
for edge in graph.edges
|
47
44
|
]
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
)
|
45
|
+
d = {
|
46
|
+
"langgraph.edges": json.dumps(edges),
|
47
|
+
"langgraph.nodes": json.dumps(nodes),
|
48
|
+
}
|
49
|
+
association_properties = get_value("lmnr.langgraph.graph") or {}
|
50
|
+
association_properties.update(d)
|
51
|
+
attach(set_value("lmnr.langgraph.graph", association_properties))
|
54
52
|
return wrapped(*args, **kwargs)
|
55
53
|
|
56
54
|
|
@@ -75,12 +73,14 @@ async def async_wrap_pregel_stream(
|
|
75
73
|
}
|
76
74
|
for edge in graph.edges
|
77
75
|
]
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
)
|
76
|
+
|
77
|
+
d = {
|
78
|
+
"langgraph.edges": json.dumps(edges),
|
79
|
+
"langgraph.nodes": json.dumps(nodes),
|
80
|
+
}
|
81
|
+
association_properties = get_value("lmnr.langgraph.graph") or {}
|
82
|
+
association_properties.update(d)
|
83
|
+
attach(set_value("lmnr.langgraph.graph", association_properties))
|
84
84
|
|
85
85
|
async for item in wrapped(*args, **kwargs):
|
86
86
|
yield item
|
@@ -395,6 +395,12 @@ def get_token_count_from_string(string: str, model_name: str):
|
|
395
395
|
f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
|
396
396
|
)
|
397
397
|
return None
|
398
|
+
except Exception as ex:
|
399
|
+
# Other exceptions in tiktoken
|
400
|
+
logger.warning(
|
401
|
+
f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
|
402
|
+
)
|
403
|
+
return None
|
398
404
|
|
399
405
|
tiktoken_encodings[model_name] = encoding
|
400
406
|
else:
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import copy
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
+
import threading
|
4
5
|
import time
|
5
6
|
from functools import singledispatch
|
6
7
|
from typing import List, Optional, Union
|
@@ -39,6 +40,10 @@ from ..utils import (
|
|
39
40
|
should_emit_events,
|
40
41
|
should_send_prompts,
|
41
42
|
)
|
43
|
+
from lmnr.opentelemetry_lib.tracing.context import (
|
44
|
+
get_current_context,
|
45
|
+
get_event_attributes_from_context,
|
46
|
+
)
|
42
47
|
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
|
43
48
|
from opentelemetry.metrics import Counter, Histogram
|
44
49
|
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
|
@@ -87,6 +92,7 @@ def chat_wrapper(
|
|
87
92
|
SPAN_NAME,
|
88
93
|
kind=SpanKind.CLIENT,
|
89
94
|
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
95
|
+
context=get_current_context(),
|
90
96
|
)
|
91
97
|
|
92
98
|
run_async(_handle_request(span, kwargs, instance))
|
@@ -109,7 +115,8 @@ def chat_wrapper(
|
|
109
115
|
exception_counter.add(1, attributes=attributes)
|
110
116
|
|
111
117
|
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
112
|
-
|
118
|
+
attributes = get_event_attributes_from_context()
|
119
|
+
span.record_exception(e, attributes=attributes)
|
113
120
|
span.set_status(Status(StatusCode.ERROR, str(e)))
|
114
121
|
span.end()
|
115
122
|
|
@@ -184,6 +191,7 @@ async def achat_wrapper(
|
|
184
191
|
SPAN_NAME,
|
185
192
|
kind=SpanKind.CLIENT,
|
186
193
|
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
|
194
|
+
context=get_current_context(),
|
187
195
|
)
|
188
196
|
|
189
197
|
await _handle_request(span, kwargs, instance)
|
@@ -208,7 +216,8 @@ async def achat_wrapper(
|
|
208
216
|
exception_counter.add(1, attributes=attributes)
|
209
217
|
|
210
218
|
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
|
211
|
-
|
219
|
+
attributes = get_event_attributes_from_context()
|
220
|
+
span.record_exception(e, attributes=attributes)
|
212
221
|
span.set_status(Status(StatusCode.ERROR, str(e)))
|
213
222
|
span.end()
|
214
223
|
|
@@ -293,6 +302,7 @@ def _handle_response(
|
|
293
302
|
choice_counter=None,
|
294
303
|
duration_histogram=None,
|
295
304
|
duration=None,
|
305
|
+
is_streaming: bool = False,
|
296
306
|
):
|
297
307
|
if is_openai_v1():
|
298
308
|
response_dict = model_as_dict(response)
|
@@ -307,6 +317,7 @@ def _handle_response(
|
|
307
317
|
duration_histogram,
|
308
318
|
response_dict,
|
309
319
|
duration,
|
320
|
+
is_streaming,
|
310
321
|
)
|
311
322
|
|
312
323
|
# span attributes
|
@@ -324,13 +335,19 @@ def _handle_response(
|
|
324
335
|
|
325
336
|
|
326
337
|
def _set_chat_metrics(
|
327
|
-
instance,
|
338
|
+
instance,
|
339
|
+
token_counter,
|
340
|
+
choice_counter,
|
341
|
+
duration_histogram,
|
342
|
+
response_dict,
|
343
|
+
duration,
|
344
|
+
is_streaming: bool = False,
|
328
345
|
):
|
329
346
|
shared_attributes = metric_shared_attributes(
|
330
347
|
response_model=response_dict.get("model") or None,
|
331
348
|
operation="chat",
|
332
349
|
server_address=_get_openai_base_url(instance),
|
333
|
-
is_streaming=
|
350
|
+
is_streaming=is_streaming,
|
334
351
|
)
|
335
352
|
|
336
353
|
# token metrics
|
@@ -517,11 +534,9 @@ def _set_completions(span, choices):
|
|
517
534
|
def _set_streaming_token_metrics(
|
518
535
|
request_kwargs, complete_response, span, token_counter, shared_attributes
|
519
536
|
):
|
520
|
-
# use tiktoken calculate token usage
|
521
537
|
if not should_record_stream_token_usage():
|
522
538
|
return
|
523
539
|
|
524
|
-
# kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
|
525
540
|
prompt_usage = -1
|
526
541
|
completion_usage = -1
|
527
542
|
|
@@ -618,11 +633,35 @@ class ChatStream(ObjectProxy):
|
|
618
633
|
self._time_of_first_token = self._start_time
|
619
634
|
self._complete_response = {"choices": [], "model": ""}
|
620
635
|
|
636
|
+
# Cleanup state tracking to prevent duplicate operations
|
637
|
+
self._cleanup_completed = False
|
638
|
+
self._cleanup_lock = threading.Lock()
|
639
|
+
|
640
|
+
def __del__(self):
|
641
|
+
"""Cleanup when object is garbage collected"""
|
642
|
+
if hasattr(self, "_cleanup_completed") and not self._cleanup_completed:
|
643
|
+
self._ensure_cleanup()
|
644
|
+
|
621
645
|
def __enter__(self):
|
622
646
|
return self
|
623
647
|
|
624
648
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
625
|
-
|
649
|
+
cleanup_exception = None
|
650
|
+
try:
|
651
|
+
self._ensure_cleanup()
|
652
|
+
except Exception as e:
|
653
|
+
cleanup_exception = e
|
654
|
+
# Don't re-raise to avoid masking original exception
|
655
|
+
|
656
|
+
result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
|
657
|
+
|
658
|
+
if cleanup_exception:
|
659
|
+
# Log cleanup exception but don't affect context manager behavior
|
660
|
+
logger.debug(
|
661
|
+
"Error during ChatStream cleanup in __exit__: %s", cleanup_exception
|
662
|
+
)
|
663
|
+
|
664
|
+
return result
|
626
665
|
|
627
666
|
async def __aenter__(self):
|
628
667
|
return self
|
@@ -642,7 +681,12 @@ class ChatStream(ObjectProxy):
|
|
642
681
|
except Exception as e:
|
643
682
|
if isinstance(e, StopIteration):
|
644
683
|
self._process_complete_response()
|
645
|
-
|
684
|
+
else:
|
685
|
+
# Handle cleanup for other exceptions during stream iteration
|
686
|
+
self._ensure_cleanup()
|
687
|
+
if self._span and self._span.is_recording():
|
688
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
689
|
+
raise
|
646
690
|
else:
|
647
691
|
self._process_item(chunk)
|
648
692
|
return chunk
|
@@ -653,7 +697,12 @@ class ChatStream(ObjectProxy):
|
|
653
697
|
except Exception as e:
|
654
698
|
if isinstance(e, StopAsyncIteration):
|
655
699
|
self._process_complete_response()
|
656
|
-
|
700
|
+
else:
|
701
|
+
# Handle cleanup for other exceptions during stream iteration
|
702
|
+
self._ensure_cleanup()
|
703
|
+
if self._span and self._span.is_recording():
|
704
|
+
self._span.set_status(Status(StatusCode.ERROR, str(e)))
|
705
|
+
raise
|
657
706
|
else:
|
658
707
|
self._process_item(chunk)
|
659
708
|
return chunk
|
@@ -724,6 +773,82 @@ class ChatStream(ObjectProxy):
|
|
724
773
|
|
725
774
|
self._span.set_status(Status(StatusCode.OK))
|
726
775
|
self._span.end()
|
776
|
+
self._cleanup_completed = True
|
777
|
+
|
778
|
+
@dont_throw
|
779
|
+
def _ensure_cleanup(self):
|
780
|
+
"""Thread-safe cleanup method that handles different cleanup scenarios"""
|
781
|
+
with self._cleanup_lock:
|
782
|
+
if self._cleanup_completed:
|
783
|
+
logger.debug("ChatStream cleanup already completed, skipping")
|
784
|
+
return
|
785
|
+
|
786
|
+
try:
|
787
|
+
logger.debug("Starting ChatStream cleanup")
|
788
|
+
|
789
|
+
# Set span status and close it
|
790
|
+
if self._span and self._span.is_recording():
|
791
|
+
self._span.set_status(Status(StatusCode.OK))
|
792
|
+
self._span.end()
|
793
|
+
logger.debug("ChatStream span closed successfully")
|
794
|
+
|
795
|
+
# Calculate partial metrics based on available data
|
796
|
+
self._record_partial_metrics()
|
797
|
+
|
798
|
+
self._cleanup_completed = True
|
799
|
+
logger.debug("ChatStream cleanup completed successfully")
|
800
|
+
|
801
|
+
except Exception as e:
|
802
|
+
# Log cleanup errors but don't propagate to avoid masking original issues
|
803
|
+
logger.debug("Error during ChatStream cleanup: %s", str(e))
|
804
|
+
|
805
|
+
# Still try to close the span even if metrics recording failed
|
806
|
+
try:
|
807
|
+
if self._span and self._span.is_recording():
|
808
|
+
self._span.set_status(
|
809
|
+
Status(StatusCode.ERROR, "Cleanup failed")
|
810
|
+
)
|
811
|
+
self._span.end()
|
812
|
+
self._cleanup_completed = True
|
813
|
+
except Exception:
|
814
|
+
# Final fallback - just mark as completed to prevent infinite loops
|
815
|
+
self._cleanup_completed = True
|
816
|
+
|
817
|
+
@dont_throw
|
818
|
+
def _record_partial_metrics(self):
|
819
|
+
"""Record metrics based on available partial data"""
|
820
|
+
# Always record duration if we have start time
|
821
|
+
if (
|
822
|
+
self._start_time
|
823
|
+
and isinstance(self._start_time, (float, int))
|
824
|
+
and self._duration_histogram
|
825
|
+
):
|
826
|
+
duration = time.time() - self._start_time
|
827
|
+
self._duration_histogram.record(
|
828
|
+
duration, attributes=self._shared_attributes()
|
829
|
+
)
|
830
|
+
|
831
|
+
# Record basic span attributes even without complete response
|
832
|
+
if self._span and self._span.is_recording():
|
833
|
+
_set_response_attributes(self._span, self._complete_response)
|
834
|
+
|
835
|
+
# Record partial token metrics if we have any data
|
836
|
+
if self._complete_response.get("choices") or self._request_kwargs:
|
837
|
+
_set_streaming_token_metrics(
|
838
|
+
self._request_kwargs,
|
839
|
+
self._complete_response,
|
840
|
+
self._span,
|
841
|
+
self._token_counter,
|
842
|
+
self._shared_attributes(),
|
843
|
+
)
|
844
|
+
|
845
|
+
# Record choice metrics if we have any choices processed
|
846
|
+
if self._choice_counter and self._complete_response.get("choices"):
|
847
|
+
_set_choice_counter_metrics(
|
848
|
+
self._choice_counter,
|
849
|
+
self._complete_response.get("choices"),
|
850
|
+
self._shared_attributes(),
|
851
|
+
)
|
727
852
|
|
728
853
|
|
729
854
|
# Backward compatibility with OpenAI v0
|
@@ -972,6 +1097,13 @@ def _accumulate_stream_items(item, complete_response):
|
|
972
1097
|
complete_response["model"] = item.get("model")
|
973
1098
|
complete_response["id"] = item.get("id")
|
974
1099
|
|
1100
|
+
# capture usage information from the last stream chunks
|
1101
|
+
if item.get("usage"):
|
1102
|
+
complete_response["usage"] = item.get("usage")
|
1103
|
+
elif item.get("choices") and item["choices"][0].get("usage"):
|
1104
|
+
# Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
|
1105
|
+
complete_response["usage"] = item["choices"][0].get("usage")
|
1106
|
+
|
975
1107
|
# prompt filter results
|
976
1108
|
if item.get("prompt_filter_results"):
|
977
1109
|
complete_response["prompt_filter_results"] = item.get("prompt_filter_results")
|