lmnr 0.6.21__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. lmnr/__init__.py +0 -4
  2. lmnr/opentelemetry_lib/decorators/__init__.py +81 -32
  3. lmnr/opentelemetry_lib/litellm/__init__.py +5 -2
  4. lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +6 -2
  5. lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +11 -2
  6. lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +3 -0
  7. lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
  8. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +6 -0
  9. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +141 -9
  10. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +10 -2
  11. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +6 -2
  12. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +8 -2
  13. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +4 -1
  14. lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +20 -4
  15. lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +190 -0
  16. lmnr/opentelemetry_lib/tracing/__init__.py +89 -1
  17. lmnr/opentelemetry_lib/tracing/context.py +126 -0
  18. lmnr/opentelemetry_lib/tracing/processor.py +5 -6
  19. lmnr/opentelemetry_lib/tracing/tracer.py +29 -0
  20. lmnr/sdk/browser/browser_use_otel.py +5 -5
  21. lmnr/sdk/browser/patchright_otel.py +14 -0
  22. lmnr/sdk/browser/playwright_otel.py +32 -6
  23. lmnr/sdk/browser/pw_utils.py +119 -112
  24. lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
  25. lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
  26. lmnr/sdk/laminar.py +156 -186
  27. lmnr/sdk/types.py +17 -11
  28. lmnr/version.py +1 -1
  29. {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/METADATA +3 -2
  30. {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/RECORD +32 -31
  31. {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/WHEEL +1 -1
  32. lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
  33. lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
  34. {lmnr-0.6.21.dist-info → lmnr-0.7.1.dist-info}/entry_points.txt +0 -0
lmnr/__init__.py CHANGED
@@ -9,7 +9,6 @@ from .sdk.types import (
9
9
  HumanEvaluator,
10
10
  RunAgentResponseChunk,
11
11
  StepChunkContent,
12
- TracingLevel,
13
12
  )
14
13
  from .sdk.decorators import observe
15
14
  from .sdk.types import LaminarSpanContext
@@ -18,7 +17,6 @@ from .opentelemetry_lib.tracing.attributes import Attributes
18
17
  from .opentelemetry_lib.tracing.instruments import Instruments
19
18
  from .opentelemetry_lib.tracing.processor import LaminarSpanProcessor
20
19
  from .opentelemetry_lib.tracing.tracer import get_laminar_tracer_provider, get_tracer
21
- from opentelemetry.trace import use_span
22
20
 
23
21
  __all__ = [
24
22
  "AgentOutput",
@@ -36,10 +34,8 @@ __all__ = [
36
34
  "LaminarSpanProcessor",
37
35
  "RunAgentResponseChunk",
38
36
  "StepChunkContent",
39
- "TracingLevel",
40
37
  "get_laminar_tracer_provider",
41
38
  "get_tracer",
42
39
  "evaluate",
43
40
  "observe",
44
- "use_span",
45
41
  ]
@@ -5,13 +5,19 @@ import orjson
5
5
  import types
6
6
  from typing import Any, AsyncGenerator, Callable, Generator, Literal
7
7
 
8
- from opentelemetry import trace
9
8
  from opentelemetry import context as context_api
10
- from opentelemetry.trace import Span
11
-
9
+ from opentelemetry.trace import Span, Status, StatusCode
10
+
11
+ from lmnr.opentelemetry_lib.tracing.context import (
12
+ CONTEXT_SESSION_ID_KEY,
13
+ CONTEXT_USER_ID_KEY,
14
+ attach_context,
15
+ detach_context,
16
+ get_event_attributes_from_context,
17
+ )
12
18
  from lmnr.sdk.utils import get_input_from_func_args, is_method
13
19
  from lmnr.opentelemetry_lib import MAX_MANUAL_SPAN_PAYLOAD_SIZE
14
- from lmnr.opentelemetry_lib.tracing.tracer import get_tracer
20
+ from lmnr.opentelemetry_lib.tracing.tracer import get_tracer_with_context
15
21
  from lmnr.opentelemetry_lib.tracing.attributes import (
16
22
  ASSOCIATION_PROPERTIES,
17
23
  SPAN_INPUT,
@@ -37,6 +43,7 @@ def default_json(o):
37
43
  try:
38
44
  return str(o)
39
45
  except Exception:
46
+ logger.debug("Failed to serialize data to JSON, inner type: %s", type(o))
40
47
  pass
41
48
  return DEFAULT_PLACEHOLDER
42
49
 
@@ -61,8 +68,13 @@ def _setup_span(
61
68
  span_name: str, span_type: str, association_properties: dict[str, Any] | None
62
69
  ):
63
70
  """Set up a span with the given name, type, and association properties."""
64
- with get_tracer() as tracer:
65
- span = tracer.start_span(span_name, attributes={SPAN_TYPE: span_type})
71
+ with get_tracer_with_context() as (tracer, isolated_context):
72
+ # Create span in isolated context
73
+ span = tracer.start_span(
74
+ span_name,
75
+ context=isolated_context,
76
+ attributes={SPAN_TYPE: span_type},
77
+ )
66
78
 
67
79
  if association_properties is not None:
68
80
  for key, value in association_properties.items():
@@ -148,10 +160,10 @@ def _process_output(
148
160
  pass
149
161
 
150
162
 
151
- def _cleanup_span(span: Span, ctx_token):
163
+ def _cleanup_span(span: Span, wrapper: TracerWrapper):
152
164
  """Clean up span and context."""
153
165
  span.end()
154
- context_api.detach(ctx_token)
166
+ wrapper.pop_span_context()
155
167
 
156
168
 
157
169
  def observe_base(
@@ -171,10 +183,25 @@ def observe_base(
171
183
  return fn(*args, **kwargs)
172
184
 
173
185
  span_name = name or fn.__name__
186
+ wrapper = TracerWrapper()
174
187
 
175
188
  span = _setup_span(span_name, span_type, association_properties)
176
- ctx = trace.set_span_in_context(span, context_api.get_current())
177
- ctx_token = context_api.attach(ctx)
189
+ new_context = wrapper.push_span_context(span)
190
+ if session_id := association_properties.get("session_id"):
191
+ new_context = context_api.set_value(
192
+ CONTEXT_SESSION_ID_KEY, session_id, new_context
193
+ )
194
+ if user_id := association_properties.get("user_id"):
195
+ new_context = context_api.set_value(
196
+ CONTEXT_USER_ID_KEY, user_id, new_context
197
+ )
198
+ # Some auto-instrumentations are not under our control, so they
199
+ # don't have access to our isolated context. We attach the context
200
+ # to the OTEL global context, so that spans know their parent
201
+ # span and trace_id.
202
+ ctx_token = context_api.attach(new_context)
203
+ # update our isolated context too
204
+ isolated_ctx_token = attach_context(new_context)
178
205
 
179
206
  _process_input(
180
207
  span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
@@ -184,9 +211,12 @@ def observe_base(
184
211
  res = fn(*args, **kwargs)
185
212
  except Exception as e:
186
213
  _process_exception(span, e)
187
- _cleanup_span(span, ctx_token)
214
+ _cleanup_span(span, wrapper)
188
215
  raise e
189
-
216
+ finally:
217
+ # Always restore global context
218
+ context_api.detach(ctx_token)
219
+ detach_context(isolated_ctx_token)
190
220
  # span will be ended in the generator
191
221
  if isinstance(res, types.GeneratorType):
192
222
  return _handle_generator(span, ctx_token, res)
@@ -201,7 +231,7 @@ def observe_base(
201
231
  return _ahandle_generator(span, ctx_token, res)
202
232
 
203
233
  _process_output(span, res, ignore_output, output_formatter)
204
- _cleanup_span(span, ctx_token)
234
+ _cleanup_span(span, wrapper)
205
235
  return res
206
236
 
207
237
  return wrap
@@ -227,10 +257,25 @@ def async_observe_base(
227
257
  return await fn(*args, **kwargs)
228
258
 
229
259
  span_name = name or fn.__name__
260
+ wrapper = TracerWrapper()
230
261
 
231
262
  span = _setup_span(span_name, span_type, association_properties)
232
- ctx = trace.set_span_in_context(span, context_api.get_current())
233
- ctx_token = context_api.attach(ctx)
263
+ new_context = wrapper.push_span_context(span)
264
+ if session_id := association_properties.get("session_id"):
265
+ new_context = context_api.set_value(
266
+ CONTEXT_SESSION_ID_KEY, session_id, new_context
267
+ )
268
+ if user_id := association_properties.get("user_id"):
269
+ new_context = context_api.set_value(
270
+ CONTEXT_USER_ID_KEY, user_id, new_context
271
+ )
272
+ # Some auto-instrumentations are not under our control, so they
273
+ # don't have access to our isolated context. We attach the context
274
+ # to the OTEL global context, so that spans know their parent
275
+ # span and trace_id.
276
+ ctx_token = context_api.attach(new_context)
277
+ # update our isolated context too
278
+ isolated_ctx_token = attach_context(new_context)
234
279
 
235
280
  _process_input(
236
281
  span, fn, args, kwargs, ignore_input, ignore_inputs, input_formatter
@@ -240,8 +285,12 @@ def async_observe_base(
240
285
  res = await fn(*args, **kwargs)
241
286
  except Exception as e:
242
287
  _process_exception(span, e)
243
- _cleanup_span(span, ctx_token)
288
+ _cleanup_span(span, wrapper)
244
289
  raise e
290
+ finally:
291
+ # Always restore global context
292
+ context_api.detach(ctx_token)
293
+ detach_context(isolated_ctx_token)
245
294
 
246
295
  # span will be ended in the generator
247
296
  if isinstance(res, types.AsyncGeneratorType):
@@ -250,7 +299,7 @@ def async_observe_base(
250
299
  return await _ahandle_generator(span, ctx_token, res)
251
300
 
252
301
  _process_output(span, res, ignore_output, output_formatter)
253
- _cleanup_span(span, ctx_token)
302
+ _cleanup_span(span, wrapper)
254
303
  return res
255
304
 
256
305
  return wrap
@@ -258,24 +307,24 @@ def async_observe_base(
258
307
  return decorate
259
308
 
260
309
 
261
- def _handle_generator(span: Span, ctx_token, res: Generator[Any, Any, Any]):
262
- yield from res
263
-
264
- span.end()
265
- if ctx_token is not None:
266
- context_api.detach(ctx_token)
267
-
310
+ def _handle_generator(span: Span, wrapper: TracerWrapper, res: Generator):
311
+ try:
312
+ yield from res
313
+ finally:
314
+ _cleanup_span(span, wrapper)
268
315
 
269
- async def _ahandle_generator(span: Span, ctx_token, res: AsyncGenerator[Any, Any]):
270
- # async with contextlib.aclosing(res) as closing_gen:
271
- async for part in res:
272
- yield part
273
316
 
274
- span.end()
275
- if ctx_token is not None:
276
- context_api.detach(ctx_token)
317
+ async def _ahandle_generator(span: Span, wrapper: TracerWrapper, res: AsyncGenerator):
318
+ try:
319
+ async for part in res:
320
+ yield part
321
+ finally:
322
+ _cleanup_span(span, wrapper)
277
323
 
278
324
 
279
325
  def _process_exception(span: Span, e: Exception):
280
326
  # Note that this `escaped` is sent as a StringValue("True"), not a boolean.
281
- span.record_exception(e, escaped=True)
327
+ span.record_exception(
328
+ e, attributes=get_event_attributes_from_context(), escaped=True
329
+ )
330
+ span.set_status(Status(StatusCode.ERROR, str(e)))
@@ -7,6 +7,7 @@ from opentelemetry.trace import SpanKind, Status, StatusCode, Tracer
7
7
  from lmnr.opentelemetry_lib.litellm.utils import model_as_dict, set_span_attribute
8
8
  from lmnr.opentelemetry_lib.tracing import TracerWrapper
9
9
 
10
+ from lmnr.opentelemetry_lib.tracing.context import get_event_attributes_from_context
10
11
  from lmnr.opentelemetry_lib.utils.package_check import is_package_installed
11
12
  from lmnr.sdk.log import get_default_logger
12
13
 
@@ -141,10 +142,12 @@ try:
141
142
  else:
142
143
  span.set_status(Status(StatusCode.ERROR))
143
144
  if isinstance(response_obj, Exception):
144
- span.record_exception(response_obj)
145
+ attributes = get_event_attributes_from_context()
146
+ span.record_exception(response_obj, attributes=attributes)
145
147
 
146
148
  except Exception as e:
147
- span.record_exception(e)
149
+ attributes = get_event_attributes_from_context()
150
+ span.record_exception(e, attributes=attributes)
148
151
  logger.error(f"Error in Laminar LiteLLM instrumentation: {e}")
149
152
  finally:
150
153
  span.end(int(end_time.timestamp() * 1e9))
@@ -30,6 +30,8 @@ from .utils import (
30
30
  should_emit_events,
31
31
  )
32
32
  from .version import __version__
33
+
34
+ from lmnr.opentelemetry_lib.tracing.context import get_current_context
33
35
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
34
36
  from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
35
37
  from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
@@ -396,9 +398,10 @@ def _wrap(
396
398
  name,
397
399
  kind=SpanKind.CLIENT,
398
400
  attributes={
399
- SpanAttributes.LLM_SYSTEM: "Anthropic",
401
+ SpanAttributes.LLM_SYSTEM: "anthropic",
400
402
  SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
401
403
  },
404
+ context=get_current_context(),
402
405
  )
403
406
 
404
407
  _handle_input(span, event_logger, kwargs)
@@ -493,9 +496,10 @@ async def _awrap(
493
496
  name,
494
497
  kind=SpanKind.CLIENT,
495
498
  attributes={
496
- SpanAttributes.LLM_SYSTEM: "Anthropic",
499
+ SpanAttributes.LLM_SYSTEM: "anthropic",
497
500
  SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
498
501
  },
502
+ context=get_current_context(),
499
503
  )
500
504
  await _ahandle_input(span, event_logger, kwargs)
501
505
 
@@ -8,6 +8,11 @@ from typing import AsyncGenerator, Callable, Collection, Generator
8
8
 
9
9
  from google.genai import types
10
10
 
11
+ from lmnr.opentelemetry_lib.tracing.context import (
12
+ get_current_context,
13
+ get_event_attributes_from_context,
14
+ )
15
+
11
16
  from .config import (
12
17
  Config,
13
18
  )
@@ -474,6 +479,7 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
474
479
  SpanAttributes.LLM_SYSTEM: "gemini",
475
480
  SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
476
481
  },
482
+ context=get_current_context(),
477
483
  )
478
484
 
479
485
  if span.is_recording():
@@ -488,8 +494,9 @@ def _wrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
488
494
  span.end()
489
495
  return response
490
496
  except Exception as e:
497
+ attributes = get_event_attributes_from_context()
491
498
  span.set_attribute(ERROR_TYPE, e.__class__.__name__)
492
- span.record_exception(e)
499
+ span.record_exception(e, attributes=attributes)
493
500
  span.set_status(Status(StatusCode.ERROR, str(e)))
494
501
  span.end()
495
502
  raise e
@@ -509,6 +516,7 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
509
516
  SpanAttributes.LLM_SYSTEM: "gemini",
510
517
  SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
511
518
  },
519
+ context=get_current_context(),
512
520
  )
513
521
 
514
522
  if span.is_recording():
@@ -525,8 +533,9 @@ async def _awrap(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs):
525
533
  span.end()
526
534
  return response
527
535
  except Exception as e:
536
+ attributes = get_event_attributes_from_context()
528
537
  span.set_attribute(ERROR_TYPE, e.__class__.__name__)
529
- span.record_exception(e)
538
+ span.record_exception(e, attributes=attributes)
530
539
  span.set_status(Status(StatusCode.ERROR, str(e)))
531
540
  span.end()
532
541
  raise e
@@ -27,6 +27,7 @@ from .utils import (
27
27
  should_emit_events,
28
28
  )
29
29
  from .version import __version__
30
+ from lmnr.opentelemetry_lib.tracing.context import get_current_context
30
31
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
31
32
  from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
32
33
  from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
@@ -245,6 +246,7 @@ def _wrap(
245
246
  SpanAttributes.LLM_SYSTEM: "Groq",
246
247
  SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
247
248
  },
249
+ context=get_current_context(),
248
250
  )
249
251
 
250
252
  _handle_input(span, kwargs, event_logger)
@@ -327,6 +329,7 @@ async def _awrap(
327
329
  SpanAttributes.LLM_SYSTEM: "Groq",
328
330
  SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.COMPLETION.value,
329
331
  },
332
+ context=get_current_context(),
330
333
  )
331
334
 
332
335
  _handle_input(span, kwargs, event_logger)
@@ -12,10 +12,7 @@ from langchain_core.runnables.graph import Graph
12
12
  from opentelemetry.trace import Tracer
13
13
  from wrapt import wrap_function_wrapper
14
14
  from opentelemetry.trace import get_tracer
15
-
16
- from lmnr.opentelemetry_lib.tracing.context_properties import (
17
- update_association_properties,
18
- )
15
+ from opentelemetry.context import get_value, attach, set_value
19
16
 
20
17
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
21
18
  from opentelemetry.instrumentation.utils import unwrap
@@ -45,12 +42,13 @@ def wrap_pregel_stream(tracer: Tracer, to_wrap, wrapped, instance, args, kwargs)
45
42
  }
46
43
  for edge in graph.edges
47
44
  ]
48
- update_association_properties(
49
- {
50
- "langgraph.edges": json.dumps(edges),
51
- "langgraph.nodes": json.dumps(nodes),
52
- },
53
- )
45
+ d = {
46
+ "langgraph.edges": json.dumps(edges),
47
+ "langgraph.nodes": json.dumps(nodes),
48
+ }
49
+ association_properties = get_value("lmnr.langgraph.graph") or {}
50
+ association_properties.update(d)
51
+ attach(set_value("lmnr.langgraph.graph", association_properties))
54
52
  return wrapped(*args, **kwargs)
55
53
 
56
54
 
@@ -75,12 +73,14 @@ async def async_wrap_pregel_stream(
75
73
  }
76
74
  for edge in graph.edges
77
75
  ]
78
- update_association_properties(
79
- {
80
- "langgraph.edges": json.dumps(edges),
81
- "langgraph.nodes": json.dumps(nodes),
82
- },
83
- )
76
+
77
+ d = {
78
+ "langgraph.edges": json.dumps(edges),
79
+ "langgraph.nodes": json.dumps(nodes),
80
+ }
81
+ association_properties = get_value("lmnr.langgraph.graph") or {}
82
+ association_properties.update(d)
83
+ attach(set_value("lmnr.langgraph.graph", association_properties))
84
84
 
85
85
  async for item in wrapped(*args, **kwargs):
86
86
  yield item
@@ -395,6 +395,12 @@ def get_token_count_from_string(string: str, model_name: str):
395
395
  f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
396
396
  )
397
397
  return None
398
+ except Exception as ex:
399
+ # Other exceptions in tiktoken
400
+ logger.warning(
401
+ f"Failed to get tiktoken encoding for model_name {model_name}, error: {str(ex)}"
402
+ )
403
+ return None
398
404
 
399
405
  tiktoken_encodings[model_name] = encoding
400
406
  else:
@@ -1,6 +1,7 @@
1
1
  import copy
2
2
  import json
3
3
  import logging
4
+ import threading
4
5
  import time
5
6
  from functools import singledispatch
6
7
  from typing import List, Optional, Union
@@ -39,6 +40,10 @@ from ..utils import (
39
40
  should_emit_events,
40
41
  should_send_prompts,
41
42
  )
43
+ from lmnr.opentelemetry_lib.tracing.context import (
44
+ get_current_context,
45
+ get_event_attributes_from_context,
46
+ )
42
47
  from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
43
48
  from opentelemetry.metrics import Counter, Histogram
44
49
  from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
@@ -87,6 +92,7 @@ def chat_wrapper(
87
92
  SPAN_NAME,
88
93
  kind=SpanKind.CLIENT,
89
94
  attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
95
+ context=get_current_context(),
90
96
  )
91
97
 
92
98
  run_async(_handle_request(span, kwargs, instance))
@@ -109,7 +115,8 @@ def chat_wrapper(
109
115
  exception_counter.add(1, attributes=attributes)
110
116
 
111
117
  span.set_attribute(ERROR_TYPE, e.__class__.__name__)
112
- span.record_exception(e)
118
+ attributes = get_event_attributes_from_context()
119
+ span.record_exception(e, attributes=attributes)
113
120
  span.set_status(Status(StatusCode.ERROR, str(e)))
114
121
  span.end()
115
122
 
@@ -184,6 +191,7 @@ async def achat_wrapper(
184
191
  SPAN_NAME,
185
192
  kind=SpanKind.CLIENT,
186
193
  attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
194
+ context=get_current_context(),
187
195
  )
188
196
 
189
197
  await _handle_request(span, kwargs, instance)
@@ -208,7 +216,8 @@ async def achat_wrapper(
208
216
  exception_counter.add(1, attributes=attributes)
209
217
 
210
218
  span.set_attribute(ERROR_TYPE, e.__class__.__name__)
211
- span.record_exception(e)
219
+ attributes = get_event_attributes_from_context()
220
+ span.record_exception(e, attributes=attributes)
212
221
  span.set_status(Status(StatusCode.ERROR, str(e)))
213
222
  span.end()
214
223
 
@@ -293,6 +302,7 @@ def _handle_response(
293
302
  choice_counter=None,
294
303
  duration_histogram=None,
295
304
  duration=None,
305
+ is_streaming: bool = False,
296
306
  ):
297
307
  if is_openai_v1():
298
308
  response_dict = model_as_dict(response)
@@ -307,6 +317,7 @@ def _handle_response(
307
317
  duration_histogram,
308
318
  response_dict,
309
319
  duration,
320
+ is_streaming,
310
321
  )
311
322
 
312
323
  # span attributes
@@ -324,13 +335,19 @@ def _handle_response(
324
335
 
325
336
 
326
337
  def _set_chat_metrics(
327
- instance, token_counter, choice_counter, duration_histogram, response_dict, duration
338
+ instance,
339
+ token_counter,
340
+ choice_counter,
341
+ duration_histogram,
342
+ response_dict,
343
+ duration,
344
+ is_streaming: bool = False,
328
345
  ):
329
346
  shared_attributes = metric_shared_attributes(
330
347
  response_model=response_dict.get("model") or None,
331
348
  operation="chat",
332
349
  server_address=_get_openai_base_url(instance),
333
- is_streaming=False,
350
+ is_streaming=is_streaming,
334
351
  )
335
352
 
336
353
  # token metrics
@@ -517,11 +534,9 @@ def _set_completions(span, choices):
517
534
  def _set_streaming_token_metrics(
518
535
  request_kwargs, complete_response, span, token_counter, shared_attributes
519
536
  ):
520
- # use tiktoken calculate token usage
521
537
  if not should_record_stream_token_usage():
522
538
  return
523
539
 
524
- # kwargs={'model': 'gpt-3.5', 'messages': [{'role': 'user', 'content': '...'}], 'stream': True}
525
540
  prompt_usage = -1
526
541
  completion_usage = -1
527
542
 
@@ -618,11 +633,35 @@ class ChatStream(ObjectProxy):
618
633
  self._time_of_first_token = self._start_time
619
634
  self._complete_response = {"choices": [], "model": ""}
620
635
 
636
+ # Cleanup state tracking to prevent duplicate operations
637
+ self._cleanup_completed = False
638
+ self._cleanup_lock = threading.Lock()
639
+
640
+ def __del__(self):
641
+ """Cleanup when object is garbage collected"""
642
+ if hasattr(self, "_cleanup_completed") and not self._cleanup_completed:
643
+ self._ensure_cleanup()
644
+
621
645
  def __enter__(self):
622
646
  return self
623
647
 
624
648
  def __exit__(self, exc_type, exc_val, exc_tb):
625
- self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
649
+ cleanup_exception = None
650
+ try:
651
+ self._ensure_cleanup()
652
+ except Exception as e:
653
+ cleanup_exception = e
654
+ # Don't re-raise to avoid masking original exception
655
+
656
+ result = self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
657
+
658
+ if cleanup_exception:
659
+ # Log cleanup exception but don't affect context manager behavior
660
+ logger.debug(
661
+ "Error during ChatStream cleanup in __exit__: %s", cleanup_exception
662
+ )
663
+
664
+ return result
626
665
 
627
666
  async def __aenter__(self):
628
667
  return self
@@ -642,7 +681,12 @@ class ChatStream(ObjectProxy):
642
681
  except Exception as e:
643
682
  if isinstance(e, StopIteration):
644
683
  self._process_complete_response()
645
- raise e
684
+ else:
685
+ # Handle cleanup for other exceptions during stream iteration
686
+ self._ensure_cleanup()
687
+ if self._span and self._span.is_recording():
688
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
689
+ raise
646
690
  else:
647
691
  self._process_item(chunk)
648
692
  return chunk
@@ -653,7 +697,12 @@ class ChatStream(ObjectProxy):
653
697
  except Exception as e:
654
698
  if isinstance(e, StopAsyncIteration):
655
699
  self._process_complete_response()
656
- raise e
700
+ else:
701
+ # Handle cleanup for other exceptions during stream iteration
702
+ self._ensure_cleanup()
703
+ if self._span and self._span.is_recording():
704
+ self._span.set_status(Status(StatusCode.ERROR, str(e)))
705
+ raise
657
706
  else:
658
707
  self._process_item(chunk)
659
708
  return chunk
@@ -724,6 +773,82 @@ class ChatStream(ObjectProxy):
724
773
 
725
774
  self._span.set_status(Status(StatusCode.OK))
726
775
  self._span.end()
776
+ self._cleanup_completed = True
777
+
778
+ @dont_throw
779
+ def _ensure_cleanup(self):
780
+ """Thread-safe cleanup method that handles different cleanup scenarios"""
781
+ with self._cleanup_lock:
782
+ if self._cleanup_completed:
783
+ logger.debug("ChatStream cleanup already completed, skipping")
784
+ return
785
+
786
+ try:
787
+ logger.debug("Starting ChatStream cleanup")
788
+
789
+ # Set span status and close it
790
+ if self._span and self._span.is_recording():
791
+ self._span.set_status(Status(StatusCode.OK))
792
+ self._span.end()
793
+ logger.debug("ChatStream span closed successfully")
794
+
795
+ # Calculate partial metrics based on available data
796
+ self._record_partial_metrics()
797
+
798
+ self._cleanup_completed = True
799
+ logger.debug("ChatStream cleanup completed successfully")
800
+
801
+ except Exception as e:
802
+ # Log cleanup errors but don't propagate to avoid masking original issues
803
+ logger.debug("Error during ChatStream cleanup: %s", str(e))
804
+
805
+ # Still try to close the span even if metrics recording failed
806
+ try:
807
+ if self._span and self._span.is_recording():
808
+ self._span.set_status(
809
+ Status(StatusCode.ERROR, "Cleanup failed")
810
+ )
811
+ self._span.end()
812
+ self._cleanup_completed = True
813
+ except Exception:
814
+ # Final fallback - just mark as completed to prevent infinite loops
815
+ self._cleanup_completed = True
816
+
817
+ @dont_throw
818
+ def _record_partial_metrics(self):
819
+ """Record metrics based on available partial data"""
820
+ # Always record duration if we have start time
821
+ if (
822
+ self._start_time
823
+ and isinstance(self._start_time, (float, int))
824
+ and self._duration_histogram
825
+ ):
826
+ duration = time.time() - self._start_time
827
+ self._duration_histogram.record(
828
+ duration, attributes=self._shared_attributes()
829
+ )
830
+
831
+ # Record basic span attributes even without complete response
832
+ if self._span and self._span.is_recording():
833
+ _set_response_attributes(self._span, self._complete_response)
834
+
835
+ # Record partial token metrics if we have any data
836
+ if self._complete_response.get("choices") or self._request_kwargs:
837
+ _set_streaming_token_metrics(
838
+ self._request_kwargs,
839
+ self._complete_response,
840
+ self._span,
841
+ self._token_counter,
842
+ self._shared_attributes(),
843
+ )
844
+
845
+ # Record choice metrics if we have any choices processed
846
+ if self._choice_counter and self._complete_response.get("choices"):
847
+ _set_choice_counter_metrics(
848
+ self._choice_counter,
849
+ self._complete_response.get("choices"),
850
+ self._shared_attributes(),
851
+ )
727
852
 
728
853
 
729
854
  # Backward compatibility with OpenAI v0
@@ -972,6 +1097,13 @@ def _accumulate_stream_items(item, complete_response):
972
1097
  complete_response["model"] = item.get("model")
973
1098
  complete_response["id"] = item.get("id")
974
1099
 
1100
+ # capture usage information from the last stream chunks
1101
+ if item.get("usage"):
1102
+ complete_response["usage"] = item.get("usage")
1103
+ elif item.get("choices") and item["choices"][0].get("usage"):
1104
+ # Some LLM providers like moonshot mistakenly place token usage information within choices[0], handle this.
1105
+ complete_response["usage"] = item["choices"][0].get("usage")
1106
+
975
1107
  # prompt filter results
976
1108
  if item.get("prompt_filter_results"):
977
1109
  complete_response["prompt_filter_results"] = item.get("prompt_filter_results")