mingx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,646 @@
1
+ """
2
+ LangChain & LangGraph OpenTelemetry callback adapter.
3
+
4
+ Maps on_*_start / on_*_end / on_*_error to GenAI semantic layer spans.
5
+ Use with: config={"callbacks": [OpenTelemetryCallbackHandler()]}
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import threading
11
+ from typing import Any, Dict, List, Literal, Optional, Tuple
12
+ from uuid import UUID
13
+
14
+ from langchain_core.callbacks.base import BaseCallbackHandler
15
+ from langchain_core.outputs import LLMResult
16
+ from opentelemetry.trace import SpanKind, Status, StatusCode
17
+
18
+ from mingx._default_attributes import (
19
+ MINGX_SPAN_TYPE,
20
+ SPAN_TYPE_CUSTOM,
21
+ )
22
+ from mingx._trace import get_tracer
23
+ from mingx.genai import attributes as attrs
24
+ from mingx.genai.io import (
25
+ build_input_messages_from_langchain_messages,
26
+ build_input_messages_from_prompts,
27
+ build_output_messages_from_llm_result,
28
+ record_llm_input_output,
29
+ record_span_input_output,
30
+ )
31
+ from mingx.genai.span_attributes import (
32
+ ChainSpanAttributes,
33
+ InferenceInputOutput,
34
+ InferenceResponseAttributes,
35
+ InferenceSpanAttributes,
36
+ RetrieverSpanAttributes,
37
+ TokenUsage,
38
+ ToolSpanAttributes,
39
+ )
40
+
41
+ def _get_float(d: Dict[str, Any], key: str) -> Optional[float]:
42
+ v = d.get(key)
43
+ if v is None:
44
+ return None
45
+ try:
46
+ return float(v)
47
+ except (TypeError, ValueError):
48
+ return None
49
+
50
+
51
+ def _get_int(d: Dict[str, Any], key: str) -> Optional[int]:
52
+ v = d.get(key)
53
+ if v is None:
54
+ return None
55
+ try:
56
+ return int(v)
57
+ except (TypeError, ValueError):
58
+ return None
59
+
60
+
61
+ def _normalize_stop_sequences(stop: Any) -> Optional[List[str]]:
62
+ """将 LangChain stop(str 或 list)转为 gen_ai.request.stop_sequences 的 list[str]。"""
63
+ if stop is None:
64
+ return None
65
+ if isinstance(stop, str):
66
+ return [stop] if stop.strip() else None
67
+ if isinstance(stop, (list, tuple)):
68
+ out = [str(s).strip() for s in stop if s is not None and str(s).strip()]
69
+ return out if out else None
70
+ return None
71
+
72
+
73
+ def _token_usage_from_llm_result(response: LLMResult) -> TokenUsage:
74
+ """从 LangChain LLMResult 解析 token 使用情况。"""
75
+ input_tokens: Optional[int] = None
76
+ output_tokens: Optional[int] = None
77
+ if response.llm_output and len(response.llm_output) > 0:
78
+ out = response.llm_output
79
+ if isinstance(out, dict):
80
+ usage = out.get("token_usage") or out.get("usage") or {}
81
+ input_tokens = _get_int(usage, "input_tokens") or _get_int(usage, "prompt_tokens")
82
+ output_tokens = _get_int(usage, "output_tokens") or _get_int(usage, "completion_tokens")
83
+ return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens)
84
+
85
+
86
+ def _response_attrs_from_llm_result(response: LLMResult) -> InferenceResponseAttributes:
87
+ """从 LangChain LLMResult 解析响应元数据(response_model、finish_reasons)。"""
88
+ response_model: Optional[str] = None
89
+ finish_reasons: Optional[List[str]] = None
90
+ generations = response.generations
91
+ if generations and len(generations) > 0 and len(generations[0]) > 0:
92
+ gen = generations[0][0]
93
+ if hasattr(gen, "message") and gen.message and hasattr(gen.message, "response_metadata"):
94
+ meta = getattr(gen.message, "response_metadata") or {}
95
+ if isinstance(meta, dict):
96
+ if "model_name" in meta:
97
+ response_model = str(meta["model_name"])
98
+ if "finish_reason" in meta:
99
+ finish_reasons = [str(meta["finish_reason"])]
100
+ return InferenceResponseAttributes(
101
+ response_model=response_model,
102
+ finish_reasons=finish_reasons,
103
+ )
104
+
105
+
106
+ # run_id -> (context_manager, optional before_exit callable to run inside context)
107
+ _span_contexts: Dict[UUID, Tuple[Any, Optional[Any]]] = {}
108
+ # run_id -> 输入数据(trace_content 时用于 on_*_end 记录输入/输出)
109
+ _llm_prompts_store: Dict[UUID, List[str]] = {}
110
+ _llm_messages_store: Dict[UUID, List[Any]] = {} # Chat 模型 on_chat_model_start 存的 messages
111
+ _chain_io_store: Dict[UUID, Dict[str, Any]] = {}
112
+ _tool_io_store: Dict[UUID, str] = {}
113
+ _retriever_io_store: Dict[UUID, str] = {}
114
+ _lock = threading.Lock()
115
+
116
+
117
+ def _start_span(name: str, kind: SpanKind, attributes: Optional[Dict[str, Any]] = None) -> Any:
118
+ tracer = get_tracer()
119
+ ctx = tracer.start_as_current_span(name, kind=kind, attributes=attributes or {})
120
+ ctx.__enter__()
121
+ return ctx
122
+
123
+
124
+ def _end_span(
125
+ run_id: UUID,
126
+ before_exit: Optional[Any] = None,
127
+ ) -> bool:
128
+ with _lock:
129
+ entry = _span_contexts.pop(run_id, None)
130
+ if not entry:
131
+ return False
132
+ ctx, _ = entry
133
+ if before_exit is not None:
134
+ try:
135
+ before_exit()
136
+ except Exception:
137
+ pass
138
+ try:
139
+ ctx.__exit__(None, None, None)
140
+ except Exception:
141
+ pass
142
+ return True
143
+
144
+
145
+ def _record_error_span(run_id: UUID, exception: BaseException) -> bool:
146
+ with _lock:
147
+ entry = _span_contexts.pop(run_id, None)
148
+ if not entry:
149
+ return False
150
+ ctx, _ = entry
151
+ try:
152
+ from opentelemetry import trace
153
+ span = trace.get_current_span()
154
+ if span.is_recording():
155
+ span.set_status(Status(StatusCode.ERROR, str(exception)))
156
+ span.record_exception(exception)
157
+ span.set_attribute(attrs.ERROR_TYPE, type(exception).__name__)
158
+ except Exception:
159
+ pass
160
+ try:
161
+ ctx.__exit__(type(exception), exception, exception.__traceback__)
162
+ except Exception:
163
+ pass
164
+ return True
165
+
166
+
167
+ def _store(run_id: UUID, ctx: Any, before_exit: Optional[Any] = None) -> None:
168
+ with _lock:
169
+ _span_contexts[run_id] = (ctx, before_exit)
170
+
171
+
172
+ RecordInputOutputAs = Literal["events", "attributes", "none"]
173
+
174
+
175
+ class OpenTelemetryCallbackHandler(BaseCallbackHandler):
176
+ """
177
+ LangChain/LangGraph callback handler that creates OpenTelemetry spans
178
+ following GenAI semantic conventions.
179
+ """
180
+
181
+ trace_content: bool = True # 默认记录所有节点的入参(输入)与返回值(输出),无返回值记空
182
+ record_input_output_as: RecordInputOutputAs = "events" # 推荐 events,与 Traceloop 等一致
183
+ max_content_length: Optional[int] = None # 可选,单条 content 截断长度(字符)
184
+
185
+ def __init__(
186
+ self,
187
+ *,
188
+ trace_content: bool = True,
189
+ record_input_output_as: RecordInputOutputAs = "events",
190
+ max_content_length: Optional[int] = None,
191
+ ) -> None:
192
+ super().__init__()
193
+ self.trace_content = trace_content
194
+ self.record_input_output_as = record_input_output_as
195
+ self.max_content_length = max_content_length
196
+
197
+ def _start_span(
198
+ self,
199
+ name: str,
200
+ kind: SpanKind,
201
+ attributes: Optional[Dict[str, Any]] = None,
202
+ run_id: Optional[UUID] = None,
203
+ ) -> None:
204
+ if not run_id:
205
+ return
206
+ attrs = dict(attributes or {})
207
+ # 适配器方式下 span_type 为空时默认使用 custom 表示自定义类型
208
+ if not (attrs.get(MINGX_SPAN_TYPE) or "").strip():
209
+ attrs[MINGX_SPAN_TYPE] = SPAN_TYPE_CUSTOM
210
+ ctx = _start_span(name, kind, attrs)
211
+ _store(run_id, ctx, None)
212
+
213
+ def on_llm_start(
214
+ self,
215
+ serialized: Dict[str, Any],
216
+ prompts: List[str],
217
+ *,
218
+ run_id: UUID,
219
+ **kwargs: Any,
220
+ ) -> None:
221
+ serialized = serialized or {}
222
+ lc_kwargs = serialized.get("kwargs") or {}
223
+ model = lc_kwargs.get("model_name") or (serialized.get("id", [])[-1] if serialized.get("id") else "llm")
224
+ provider = self._infer_provider(serialized)
225
+ model_str = str(model) if model else "unknown"
226
+ # span 名称:仅用 name 或模型 id,不拼接
227
+ name = serialized.get("name") or kwargs.get("name") or model_str
228
+ if not isinstance(name, str):
229
+ name = str(name)
230
+ # 从 LangChain LLM kwargs 提取 gen_ai.request.* 参数(与语义规范一致)
231
+ temperature = _get_float(lc_kwargs, "temperature")
232
+ max_tokens = _get_int(lc_kwargs, "max_tokens") or _get_int(lc_kwargs, "max_tokens_limit")
233
+ top_p = _get_float(lc_kwargs, "top_p")
234
+ top_k = _get_int(lc_kwargs, "top_k")
235
+ frequency_penalty = _get_float(lc_kwargs, "frequency_penalty")
236
+ presence_penalty = _get_float(lc_kwargs, "presence_penalty")
237
+ model_kwargs = lc_kwargs.get("model_kwargs") or {}
238
+ if top_p is None:
239
+ top_p = _get_float(model_kwargs, "top_p")
240
+ if top_k is None:
241
+ top_k = _get_int(model_kwargs, "top_k")
242
+ if frequency_penalty is None:
243
+ frequency_penalty = _get_float(model_kwargs, "frequency_penalty")
244
+ if presence_penalty is None:
245
+ presence_penalty = _get_float(model_kwargs, "presence_penalty")
246
+ stop_raw = lc_kwargs.get("stop") or model_kwargs.get("stop")
247
+ stop_sequences = _normalize_stop_sequences(stop_raw)
248
+ span_attrs = InferenceSpanAttributes(
249
+ operation_name=attrs.OPERATION_CHAT,
250
+ provider_name=provider,
251
+ run_id=str(run_id),
252
+ model=model_str,
253
+ temperature=temperature,
254
+ max_tokens=max_tokens,
255
+ top_p=top_p,
256
+ top_k=top_k,
257
+ frequency_penalty=frequency_penalty,
258
+ presence_penalty=presence_penalty,
259
+ stop_sequences=stop_sequences,
260
+ )
261
+ self._start_span(name, SpanKind.CLIENT, span_attrs.to_attributes(), run_id)
262
+ if self.trace_content and prompts:
263
+ with _lock:
264
+ _llm_prompts_store[run_id] = list(prompts)
265
+
266
+ def on_chat_model_start(
267
+ self,
268
+ serialized: Dict[str, Any],
269
+ messages: List[List[Any]],
270
+ *,
271
+ run_id: UUID,
272
+ **kwargs: Any,
273
+ ) -> None:
274
+ """Chat 模型入口:收到真实 Message 列表,存起来供 on_llm_end 生成 JSON 输入/输出事件。"""
275
+ serialized = serialized or {}
276
+ lc_kwargs = serialized.get("kwargs") or {}
277
+ model = lc_kwargs.get("model_name") or (serialized.get("id", [])[-1] if serialized.get("id") else "llm")
278
+ provider = self._infer_provider(serialized)
279
+ model_str = str(model) if model else "unknown"
280
+ name = serialized.get("name") or kwargs.get("name") or model_str
281
+ if not isinstance(name, str):
282
+ name = str(name)
283
+ temperature = _get_float(lc_kwargs, "temperature")
284
+ max_tokens = _get_int(lc_kwargs, "max_tokens") or _get_int(lc_kwargs, "max_tokens_limit")
285
+ top_p = _get_float(lc_kwargs, "top_p")
286
+ top_k = _get_int(lc_kwargs, "top_k")
287
+ frequency_penalty = _get_float(lc_kwargs, "frequency_penalty")
288
+ presence_penalty = _get_float(lc_kwargs, "presence_penalty")
289
+ model_kwargs = lc_kwargs.get("model_kwargs") or {}
290
+ if top_p is None:
291
+ top_p = _get_float(model_kwargs, "top_p")
292
+ if top_k is None:
293
+ top_k = _get_int(model_kwargs, "top_k")
294
+ if frequency_penalty is None:
295
+ frequency_penalty = _get_float(model_kwargs, "frequency_penalty")
296
+ if presence_penalty is None:
297
+ presence_penalty = _get_float(model_kwargs, "presence_penalty")
298
+ stop_raw = lc_kwargs.get("stop") or model_kwargs.get("stop")
299
+ stop_sequences = _normalize_stop_sequences(stop_raw)
300
+ span_attrs = InferenceSpanAttributes(
301
+ operation_name=attrs.OPERATION_CHAT,
302
+ provider_name=provider,
303
+ run_id=str(run_id),
304
+ model=model_str,
305
+ temperature=temperature,
306
+ max_tokens=max_tokens,
307
+ top_p=top_p,
308
+ top_k=top_k,
309
+ frequency_penalty=frequency_penalty,
310
+ presence_penalty=presence_penalty,
311
+ stop_sequences=stop_sequences,
312
+ )
313
+ self._start_span(name, SpanKind.CLIENT, span_attrs.to_attributes(), run_id)
314
+ if self.trace_content and messages:
315
+ with _lock:
316
+ # LangChain 可能传 list[list[BaseMessage]](batch)或 list[BaseMessage];统一取第一批
317
+ first_batch = messages[0] if messages else []
318
+ if isinstance(first_batch, (list, tuple)):
319
+ _llm_messages_store[run_id] = list(first_batch)
320
+ else:
321
+ _llm_messages_store[run_id] = list(messages)
322
+
323
+ def _infer_provider(self, serialized: Dict[str, Any]) -> str:
324
+ serialized = serialized or {}
325
+ id_list = serialized.get("id") or []
326
+ if isinstance(id_list, list):
327
+ last = id_list[-1] if id_list else ""
328
+ if "openai" in str(last).lower():
329
+ return attrs.PROVIDER_OPENAI
330
+ if "anthropic" in str(last).lower():
331
+ return attrs.PROVIDER_ANTHROPIC
332
+ if "vertex" in str(last).lower() or "gemini" in str(last).lower():
333
+ return attrs.PROVIDER_GCP_VERTEX_AI
334
+ if "bedrock" in str(last).lower():
335
+ return attrs.PROVIDER_AWS_BEDROCK
336
+ return "langchain"
337
+
338
+ def on_llm_end(self, response: LLMResult, *, run_id: UUID, **kwargs: Any) -> None:
339
+ usage = _token_usage_from_llm_result(response)
340
+ response_attrs = _response_attrs_from_llm_result(response)
341
+
342
+ def set_llm_response_attrs():
343
+ from opentelemetry import trace
344
+ span = trace.get_current_span()
345
+ if not span.is_recording():
346
+ return
347
+ span.set_status(Status(StatusCode.OK))
348
+ try:
349
+ usage.apply_to_span(span)
350
+ response_attrs.apply_to_span(span)
351
+ if self.trace_content and self.record_input_output_as != "none":
352
+ with _lock:
353
+ stored_messages = _llm_messages_store.pop(run_id, None)
354
+ prompts = _llm_prompts_store.pop(run_id, None) if stored_messages is None else None
355
+ output_messages = build_output_messages_from_llm_result(response)
356
+ if stored_messages is not None:
357
+ input_messages = build_input_messages_from_langchain_messages(
358
+ stored_messages, max_length=self.max_content_length
359
+ )
360
+ elif prompts is not None:
361
+ input_messages = build_input_messages_from_prompts(prompts)
362
+ else:
363
+ input_messages = []
364
+ iio = InferenceInputOutput(
365
+ input_messages=input_messages,
366
+ output_messages=output_messages,
367
+ )
368
+ record_llm_input_output(
369
+ span,
370
+ iio.input_messages,
371
+ iio.output_messages,
372
+ record_as=self.record_input_output_as,
373
+ max_length=self.max_content_length,
374
+ )
375
+ except Exception:
376
+ pass
377
+
378
+ _end_span(run_id, before_exit=set_llm_response_attrs)
379
+
380
+ def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
381
+ stored_messages = None
382
+ prompts = None
383
+ with _lock:
384
+ stored_messages = _llm_messages_store.pop(run_id, None)
385
+ if stored_messages is None:
386
+ prompts = _llm_prompts_store.pop(run_id, None)
387
+ if (stored_messages is not None or prompts is not None) and self.trace_content and self.record_input_output_as != "none":
388
+ try:
389
+ from opentelemetry import trace
390
+ span = trace.get_current_span()
391
+ if span.is_recording():
392
+ if stored_messages is not None:
393
+ input_messages = build_input_messages_from_langchain_messages(
394
+ stored_messages, max_length=self.max_content_length
395
+ )
396
+ else:
397
+ input_messages = build_input_messages_from_prompts(prompts or [])
398
+ record_llm_input_output(
399
+ span,
400
+ input_messages,
401
+ [{"role": "assistant", "content": f"[Error] {type(error).__name__}: {error!s}"}],
402
+ record_as=self.record_input_output_as,
403
+ max_length=self.max_content_length,
404
+ )
405
+ except Exception:
406
+ pass
407
+ _record_error_span(run_id, error)
408
+
409
+ def on_chain_start(
410
+ self,
411
+ serialized: Dict[str, Any],
412
+ inputs: Dict[str, Any],
413
+ *,
414
+ run_id: UUID,
415
+ **kwargs: Any,
416
+ ) -> None:
417
+ serialized = serialized or {}
418
+ # 实测:部分 runnable(RunnableSequence、RunnableLambda、StrOutputParser)serialized 为 None,
419
+ # 名称在 kwargs["name"];有 serialized 时用 id[-1] 或 serialized["name"]
420
+ name = "chain"
421
+ if serialized:
422
+ id_list = serialized.get("id")
423
+ if isinstance(id_list, list) and len(id_list) > 0:
424
+ name = id_list[-1]
425
+ elif serialized.get("name"):
426
+ name = serialized.get("name")
427
+ if name == "chain" and kwargs:
428
+ kw_name = kwargs.get("name")
429
+ if kw_name is not None and str(kw_name).strip():
430
+ name = kw_name
431
+ if not isinstance(name, str):
432
+ name = str(name)
433
+ span_attrs = ChainSpanAttributes(run_id=str(run_id))
434
+ self._start_span(name, SpanKind.INTERNAL, span_attrs.to_attributes(), run_id)
435
+ if self.trace_content and inputs is not None:
436
+ with _lock:
437
+ try:
438
+ _chain_io_store[run_id] = dict(inputs) if isinstance(inputs, dict) else {"inputs": inputs}
439
+ except (TypeError, ValueError):
440
+ _chain_io_store[run_id] = {"inputs": inputs}
441
+
442
+ def on_chain_end(self, outputs: Dict[str, Any], *, run_id: UUID, **kwargs: Any) -> None:
443
+ def set_chain_io():
444
+ from opentelemetry import trace
445
+ span = trace.get_current_span()
446
+ if not span.is_recording():
447
+ return
448
+ span.set_status(Status(StatusCode.OK))
449
+ if not self.trace_content or self.record_input_output_as == "none":
450
+ return
451
+ try:
452
+ with _lock:
453
+ stored_inputs = _chain_io_store.pop(run_id, None)
454
+ if stored_inputs is not None:
455
+ record_span_input_output(
456
+ span,
457
+ stored_inputs,
458
+ outputs if outputs is not None else {},
459
+ record_as=self.record_input_output_as,
460
+ max_length=self.max_content_length,
461
+ )
462
+ except Exception:
463
+ pass
464
+
465
+ _end_span(run_id, before_exit=set_chain_io)
466
+
467
+ def on_chain_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
468
+ stored_inputs = None
469
+ with _lock:
470
+ stored_inputs = _chain_io_store.pop(run_id, None)
471
+ if stored_inputs is not None and self.trace_content and self.record_input_output_as != "none":
472
+ try:
473
+ from opentelemetry import trace
474
+ span = trace.get_current_span()
475
+ if span.is_recording():
476
+ record_span_input_output(
477
+ span,
478
+ stored_inputs,
479
+ {"error": str(error), "error_type": type(error).__name__},
480
+ record_as=self.record_input_output_as,
481
+ max_length=self.max_content_length,
482
+ )
483
+ except Exception:
484
+ pass
485
+ _record_error_span(run_id, error)
486
+
487
+ def on_tool_start(
488
+ self,
489
+ serialized: Dict[str, Any],
490
+ input_str: str,
491
+ *,
492
+ run_id: UUID,
493
+ **kwargs: Any,
494
+ ) -> None:
495
+ serialized = serialized or {}
496
+ # span 名称:仅用 name(或 id 最后一节)
497
+ name = serialized.get("name") or (serialized.get("id", [])[-1] if serialized.get("id") else "tool")
498
+ if not isinstance(name, str):
499
+ name = str(name)
500
+ span_attrs = ToolSpanAttributes(tool_name=name, run_id=str(run_id))
501
+ self._start_span(name, SpanKind.INTERNAL, span_attrs.to_attributes(), run_id)
502
+ if self.trace_content:
503
+ with _lock:
504
+ _tool_io_store[run_id] = input_str
505
+
506
+ def on_tool_end(self, output: Any, *, run_id: UUID, **kwargs: Any) -> None:
507
+ def set_tool_io():
508
+ from opentelemetry import trace
509
+ span = trace.get_current_span()
510
+ if not span.is_recording():
511
+ return
512
+ span.set_status(Status(StatusCode.OK))
513
+ if not self.trace_content or self.record_input_output_as == "none":
514
+ return
515
+ try:
516
+ with _lock:
517
+ stored_input = _tool_io_store.pop(run_id, None)
518
+ if stored_input is not None:
519
+ record_span_input_output(
520
+ span,
521
+ {"input": stored_input},
522
+ {"output": output} if output is not None else {},
523
+ record_as=self.record_input_output_as,
524
+ max_length=self.max_content_length,
525
+ )
526
+ except Exception:
527
+ pass
528
+
529
+ _end_span(run_id, before_exit=set_tool_io)
530
+
531
+ def on_tool_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
532
+ stored_input = None
533
+ with _lock:
534
+ stored_input = _tool_io_store.pop(run_id, None)
535
+ if stored_input is not None and self.trace_content and self.record_input_output_as != "none":
536
+ try:
537
+ from opentelemetry import trace
538
+ span = trace.get_current_span()
539
+ if span.is_recording():
540
+ record_span_input_output(
541
+ span,
542
+ {"input": stored_input},
543
+ {"error": str(error), "error_type": type(error).__name__},
544
+ record_as=self.record_input_output_as,
545
+ max_length=self.max_content_length,
546
+ )
547
+ except Exception:
548
+ pass
549
+ _record_error_span(run_id, error)
550
+
551
+ def on_retriever_start(
552
+ self,
553
+ serialized: Dict[str, Any],
554
+ query: str,
555
+ *,
556
+ run_id: UUID,
557
+ **kwargs: Any,
558
+ ) -> None:
559
+ serialized = serialized or {}
560
+ # span 名称:仅用 name(或 id 最后一节)
561
+ name = serialized.get("name") or (serialized.get("id", [])[-1] if serialized.get("id") else None) or kwargs.get("name") or "retriever"
562
+ if not isinstance(name, str):
563
+ name = str(name)
564
+ span_attrs = RetrieverSpanAttributes(run_id=str(run_id))
565
+ self._start_span(name, SpanKind.INTERNAL, span_attrs.to_attributes(), run_id)
566
+ if self.trace_content:
567
+ with _lock:
568
+ _retriever_io_store[run_id] = query
569
+
570
+ def on_retriever_end(self, documents: Any, *, run_id: UUID, **kwargs: Any) -> None:
571
+ def set_retriever_io():
572
+ from opentelemetry import trace
573
+ span = trace.get_current_span()
574
+ if not span.is_recording():
575
+ return
576
+ span.set_status(Status(StatusCode.OK))
577
+ if not self.trace_content or self.record_input_output_as == "none":
578
+ return
579
+ try:
580
+ with _lock:
581
+ stored_query = _retriever_io_store.pop(run_id, None)
582
+ if stored_query is not None:
583
+ docs_ser = _documents_to_serializable(documents)
584
+ record_span_input_output(
585
+ span,
586
+ {"query": stored_query},
587
+ {"documents": docs_ser},
588
+ record_as=self.record_input_output_as,
589
+ max_length=self.max_content_length,
590
+ )
591
+ except Exception:
592
+ pass
593
+
594
+ _end_span(run_id, before_exit=set_retriever_io)
595
+
596
+ def on_retriever_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
597
+ stored_query = None
598
+ with _lock:
599
+ stored_query = _retriever_io_store.pop(run_id, None)
600
+ if stored_query is not None and self.trace_content and self.record_input_output_as != "none":
601
+ try:
602
+ from opentelemetry import trace
603
+ span = trace.get_current_span()
604
+ if span.is_recording():
605
+ record_span_input_output(
606
+ span,
607
+ {"query": stored_query},
608
+ {"error": str(error), "error_type": type(error).__name__},
609
+ record_as=self.record_input_output_as,
610
+ max_length=self.max_content_length,
611
+ )
612
+ except Exception:
613
+ pass
614
+ _record_error_span(run_id, error)
615
+
616
+
617
+ def _documents_to_serializable(documents: Any) -> List[Dict[str, Any]]:
618
+ """将 LangChain Document 列表转为可 JSON 序列化的 list of dict。"""
619
+ if documents is None:
620
+ return []
621
+ out: List[Dict[str, Any]] = []
622
+ try:
623
+ for d in documents:
624
+ if hasattr(d, "page_content") and hasattr(d, "metadata"):
625
+ out.append({
626
+ "page_content": getattr(d, "page_content", "") or "",
627
+ "metadata": dict(getattr(d, "metadata", None) or {}),
628
+ })
629
+ else:
630
+ out.append({"raw": str(d)})
631
+ except Exception:
632
+ out = [{"raw": str(documents)}]
633
+ return out
634
+
635
+
636
+ def get_langchain_callback(
637
+ trace_content: bool = True,
638
+ record_input_output_as: RecordInputOutputAs = "events",
639
+ max_content_length: Optional[int] = None,
640
+ ) -> OpenTelemetryCallbackHandler:
641
+ """Return a default OpenTelemetryCallbackHandler for LangChain/LangGraph. 默认记录所有节点入参与返回值。"""
642
+ return OpenTelemetryCallbackHandler(
643
+ trace_content=trace_content,
644
+ record_input_output_as=record_input_output_as,
645
+ max_content_length=max_content_length,
646
+ )