mingx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mingx/__init__.py +50 -0
- mingx/_default_attributes.py +76 -0
- mingx/_trace.py +147 -0
- mingx/adapters/__init__.py +21 -0
- mingx/adapters/base.py +77 -0
- mingx/adapters/langchain.py +646 -0
- mingx/decorator.py +185 -0
- mingx/genai/__init__.py +99 -0
- mingx/genai/attributes.py +176 -0
- mingx/genai/io.py +439 -0
- mingx/genai/span_attributes.py +172 -0
- mingx/genai/spans.py +175 -0
- mingx-0.1.0.dist-info/METADATA +373 -0
- mingx-0.1.0.dist-info/RECORD +15 -0
- mingx-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,646 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LangChain & LangGraph OpenTelemetry callback adapter.
|
|
3
|
+
|
|
4
|
+
Maps on_*_start / on_*_end / on_*_error to GenAI semantic layer spans.
|
|
5
|
+
Use with: config={"callbacks": [OpenTelemetryCallbackHandler()]}
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import threading
|
|
11
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
|
|
14
|
+
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
15
|
+
from langchain_core.outputs import LLMResult
|
|
16
|
+
from opentelemetry.trace import SpanKind, Status, StatusCode
|
|
17
|
+
|
|
18
|
+
from mingx._default_attributes import (
|
|
19
|
+
MINGX_SPAN_TYPE,
|
|
20
|
+
SPAN_TYPE_CUSTOM,
|
|
21
|
+
)
|
|
22
|
+
from mingx._trace import get_tracer
|
|
23
|
+
from mingx.genai import attributes as attrs
|
|
24
|
+
from mingx.genai.io import (
|
|
25
|
+
build_input_messages_from_langchain_messages,
|
|
26
|
+
build_input_messages_from_prompts,
|
|
27
|
+
build_output_messages_from_llm_result,
|
|
28
|
+
record_llm_input_output,
|
|
29
|
+
record_span_input_output,
|
|
30
|
+
)
|
|
31
|
+
from mingx.genai.span_attributes import (
|
|
32
|
+
ChainSpanAttributes,
|
|
33
|
+
InferenceInputOutput,
|
|
34
|
+
InferenceResponseAttributes,
|
|
35
|
+
InferenceSpanAttributes,
|
|
36
|
+
RetrieverSpanAttributes,
|
|
37
|
+
TokenUsage,
|
|
38
|
+
ToolSpanAttributes,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def _get_float(d: Dict[str, Any], key: str) -> Optional[float]:
|
|
42
|
+
v = d.get(key)
|
|
43
|
+
if v is None:
|
|
44
|
+
return None
|
|
45
|
+
try:
|
|
46
|
+
return float(v)
|
|
47
|
+
except (TypeError, ValueError):
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_int(d: Dict[str, Any], key: str) -> Optional[int]:
|
|
52
|
+
v = d.get(key)
|
|
53
|
+
if v is None:
|
|
54
|
+
return None
|
|
55
|
+
try:
|
|
56
|
+
return int(v)
|
|
57
|
+
except (TypeError, ValueError):
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _normalize_stop_sequences(stop: Any) -> Optional[List[str]]:
|
|
62
|
+
"""将 LangChain stop(str 或 list)转为 gen_ai.request.stop_sequences 的 list[str]。"""
|
|
63
|
+
if stop is None:
|
|
64
|
+
return None
|
|
65
|
+
if isinstance(stop, str):
|
|
66
|
+
return [stop] if stop.strip() else None
|
|
67
|
+
if isinstance(stop, (list, tuple)):
|
|
68
|
+
out = [str(s).strip() for s in stop if s is not None and str(s).strip()]
|
|
69
|
+
return out if out else None
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _token_usage_from_llm_result(response: LLMResult) -> TokenUsage:
|
|
74
|
+
"""从 LangChain LLMResult 解析 token 使用情况。"""
|
|
75
|
+
input_tokens: Optional[int] = None
|
|
76
|
+
output_tokens: Optional[int] = None
|
|
77
|
+
if response.llm_output and len(response.llm_output) > 0:
|
|
78
|
+
out = response.llm_output
|
|
79
|
+
if isinstance(out, dict):
|
|
80
|
+
usage = out.get("token_usage") or out.get("usage") or {}
|
|
81
|
+
input_tokens = _get_int(usage, "input_tokens") or _get_int(usage, "prompt_tokens")
|
|
82
|
+
output_tokens = _get_int(usage, "output_tokens") or _get_int(usage, "completion_tokens")
|
|
83
|
+
return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _response_attrs_from_llm_result(response: LLMResult) -> InferenceResponseAttributes:
|
|
87
|
+
"""从 LangChain LLMResult 解析响应元数据(response_model、finish_reasons)。"""
|
|
88
|
+
response_model: Optional[str] = None
|
|
89
|
+
finish_reasons: Optional[List[str]] = None
|
|
90
|
+
generations = response.generations
|
|
91
|
+
if generations and len(generations) > 0 and len(generations[0]) > 0:
|
|
92
|
+
gen = generations[0][0]
|
|
93
|
+
if hasattr(gen, "message") and gen.message and hasattr(gen.message, "response_metadata"):
|
|
94
|
+
meta = getattr(gen.message, "response_metadata") or {}
|
|
95
|
+
if isinstance(meta, dict):
|
|
96
|
+
if "model_name" in meta:
|
|
97
|
+
response_model = str(meta["model_name"])
|
|
98
|
+
if "finish_reason" in meta:
|
|
99
|
+
finish_reasons = [str(meta["finish_reason"])]
|
|
100
|
+
return InferenceResponseAttributes(
|
|
101
|
+
response_model=response_model,
|
|
102
|
+
finish_reasons=finish_reasons,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# run_id -> (context_manager, optional before_exit callable to run inside context)
|
|
107
|
+
_span_contexts: Dict[UUID, Tuple[Any, Optional[Any]]] = {}
|
|
108
|
+
# run_id -> 输入数据(trace_content 时用于 on_*_end 记录输入/输出)
|
|
109
|
+
_llm_prompts_store: Dict[UUID, List[str]] = {}
|
|
110
|
+
_llm_messages_store: Dict[UUID, List[Any]] = {} # Chat 模型 on_chat_model_start 存的 messages
|
|
111
|
+
_chain_io_store: Dict[UUID, Dict[str, Any]] = {}
|
|
112
|
+
_tool_io_store: Dict[UUID, str] = {}
|
|
113
|
+
_retriever_io_store: Dict[UUID, str] = {}
|
|
114
|
+
_lock = threading.Lock()
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _start_span(name: str, kind: SpanKind, attributes: Optional[Dict[str, Any]] = None) -> Any:
|
|
118
|
+
tracer = get_tracer()
|
|
119
|
+
ctx = tracer.start_as_current_span(name, kind=kind, attributes=attributes or {})
|
|
120
|
+
ctx.__enter__()
|
|
121
|
+
return ctx
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _end_span(
|
|
125
|
+
run_id: UUID,
|
|
126
|
+
before_exit: Optional[Any] = None,
|
|
127
|
+
) -> bool:
|
|
128
|
+
with _lock:
|
|
129
|
+
entry = _span_contexts.pop(run_id, None)
|
|
130
|
+
if not entry:
|
|
131
|
+
return False
|
|
132
|
+
ctx, _ = entry
|
|
133
|
+
if before_exit is not None:
|
|
134
|
+
try:
|
|
135
|
+
before_exit()
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
try:
|
|
139
|
+
ctx.__exit__(None, None, None)
|
|
140
|
+
except Exception:
|
|
141
|
+
pass
|
|
142
|
+
return True
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _record_error_span(run_id: UUID, exception: BaseException) -> bool:
|
|
146
|
+
with _lock:
|
|
147
|
+
entry = _span_contexts.pop(run_id, None)
|
|
148
|
+
if not entry:
|
|
149
|
+
return False
|
|
150
|
+
ctx, _ = entry
|
|
151
|
+
try:
|
|
152
|
+
from opentelemetry import trace
|
|
153
|
+
span = trace.get_current_span()
|
|
154
|
+
if span.is_recording():
|
|
155
|
+
span.set_status(Status(StatusCode.ERROR, str(exception)))
|
|
156
|
+
span.record_exception(exception)
|
|
157
|
+
span.set_attribute(attrs.ERROR_TYPE, type(exception).__name__)
|
|
158
|
+
except Exception:
|
|
159
|
+
pass
|
|
160
|
+
try:
|
|
161
|
+
ctx.__exit__(type(exception), exception, exception.__traceback__)
|
|
162
|
+
except Exception:
|
|
163
|
+
pass
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _store(run_id: UUID, ctx: Any, before_exit: Optional[Any] = None) -> None:
|
|
168
|
+
with _lock:
|
|
169
|
+
_span_contexts[run_id] = (ctx, before_exit)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
RecordInputOutputAs = Literal["events", "attributes", "none"]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class OpenTelemetryCallbackHandler(BaseCallbackHandler):
|
|
176
|
+
"""
|
|
177
|
+
LangChain/LangGraph callback handler that creates OpenTelemetry spans
|
|
178
|
+
following GenAI semantic conventions.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
trace_content: bool = True # 默认记录所有节点的入参(输入)与返回值(输出),无返回值记空
|
|
182
|
+
record_input_output_as: RecordInputOutputAs = "events" # 推荐 events,与 Traceloop 等一致
|
|
183
|
+
max_content_length: Optional[int] = None # 可选,单条 content 截断长度(字符)
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
*,
|
|
188
|
+
trace_content: bool = True,
|
|
189
|
+
record_input_output_as: RecordInputOutputAs = "events",
|
|
190
|
+
max_content_length: Optional[int] = None,
|
|
191
|
+
) -> None:
|
|
192
|
+
super().__init__()
|
|
193
|
+
self.trace_content = trace_content
|
|
194
|
+
self.record_input_output_as = record_input_output_as
|
|
195
|
+
self.max_content_length = max_content_length
|
|
196
|
+
|
|
197
|
+
def _start_span(
|
|
198
|
+
self,
|
|
199
|
+
name: str,
|
|
200
|
+
kind: SpanKind,
|
|
201
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
202
|
+
run_id: Optional[UUID] = None,
|
|
203
|
+
) -> None:
|
|
204
|
+
if not run_id:
|
|
205
|
+
return
|
|
206
|
+
attrs = dict(attributes or {})
|
|
207
|
+
# 适配器方式下 span_type 为空时默认使用 custom 表示自定义类型
|
|
208
|
+
if not (attrs.get(MINGX_SPAN_TYPE) or "").strip():
|
|
209
|
+
attrs[MINGX_SPAN_TYPE] = SPAN_TYPE_CUSTOM
|
|
210
|
+
ctx = _start_span(name, kind, attrs)
|
|
211
|
+
_store(run_id, ctx, None)
|
|
212
|
+
|
|
213
|
+
def on_llm_start(
|
|
214
|
+
self,
|
|
215
|
+
serialized: Dict[str, Any],
|
|
216
|
+
prompts: List[str],
|
|
217
|
+
*,
|
|
218
|
+
run_id: UUID,
|
|
219
|
+
**kwargs: Any,
|
|
220
|
+
) -> None:
|
|
221
|
+
serialized = serialized or {}
|
|
222
|
+
lc_kwargs = serialized.get("kwargs") or {}
|
|
223
|
+
model = lc_kwargs.get("model_name") or (serialized.get("id", [])[-1] if serialized.get("id") else "llm")
|
|
224
|
+
provider = self._infer_provider(serialized)
|
|
225
|
+
model_str = str(model) if model else "unknown"
|
|
226
|
+
# span 名称:仅用 name 或模型 id,不拼接
|
|
227
|
+
name = serialized.get("name") or kwargs.get("name") or model_str
|
|
228
|
+
if not isinstance(name, str):
|
|
229
|
+
name = str(name)
|
|
230
|
+
# 从 LangChain LLM kwargs 提取 gen_ai.request.* 参数(与语义规范一致)
|
|
231
|
+
temperature = _get_float(lc_kwargs, "temperature")
|
|
232
|
+
max_tokens = _get_int(lc_kwargs, "max_tokens") or _get_int(lc_kwargs, "max_tokens_limit")
|
|
233
|
+
top_p = _get_float(lc_kwargs, "top_p")
|
|
234
|
+
top_k = _get_int(lc_kwargs, "top_k")
|
|
235
|
+
frequency_penalty = _get_float(lc_kwargs, "frequency_penalty")
|
|
236
|
+
presence_penalty = _get_float(lc_kwargs, "presence_penalty")
|
|
237
|
+
model_kwargs = lc_kwargs.get("model_kwargs") or {}
|
|
238
|
+
if top_p is None:
|
|
239
|
+
top_p = _get_float(model_kwargs, "top_p")
|
|
240
|
+
if top_k is None:
|
|
241
|
+
top_k = _get_int(model_kwargs, "top_k")
|
|
242
|
+
if frequency_penalty is None:
|
|
243
|
+
frequency_penalty = _get_float(model_kwargs, "frequency_penalty")
|
|
244
|
+
if presence_penalty is None:
|
|
245
|
+
presence_penalty = _get_float(model_kwargs, "presence_penalty")
|
|
246
|
+
stop_raw = lc_kwargs.get("stop") or model_kwargs.get("stop")
|
|
247
|
+
stop_sequences = _normalize_stop_sequences(stop_raw)
|
|
248
|
+
span_attrs = InferenceSpanAttributes(
|
|
249
|
+
operation_name=attrs.OPERATION_CHAT,
|
|
250
|
+
provider_name=provider,
|
|
251
|
+
run_id=str(run_id),
|
|
252
|
+
model=model_str,
|
|
253
|
+
temperature=temperature,
|
|
254
|
+
max_tokens=max_tokens,
|
|
255
|
+
top_p=top_p,
|
|
256
|
+
top_k=top_k,
|
|
257
|
+
frequency_penalty=frequency_penalty,
|
|
258
|
+
presence_penalty=presence_penalty,
|
|
259
|
+
stop_sequences=stop_sequences,
|
|
260
|
+
)
|
|
261
|
+
self._start_span(name, SpanKind.CLIENT, span_attrs.to_attributes(), run_id)
|
|
262
|
+
if self.trace_content and prompts:
|
|
263
|
+
with _lock:
|
|
264
|
+
_llm_prompts_store[run_id] = list(prompts)
|
|
265
|
+
|
|
266
|
+
def on_chat_model_start(
|
|
267
|
+
self,
|
|
268
|
+
serialized: Dict[str, Any],
|
|
269
|
+
messages: List[List[Any]],
|
|
270
|
+
*,
|
|
271
|
+
run_id: UUID,
|
|
272
|
+
**kwargs: Any,
|
|
273
|
+
) -> None:
|
|
274
|
+
"""Chat 模型入口:收到真实 Message 列表,存起来供 on_llm_end 生成 JSON 输入/输出事件。"""
|
|
275
|
+
serialized = serialized or {}
|
|
276
|
+
lc_kwargs = serialized.get("kwargs") or {}
|
|
277
|
+
model = lc_kwargs.get("model_name") or (serialized.get("id", [])[-1] if serialized.get("id") else "llm")
|
|
278
|
+
provider = self._infer_provider(serialized)
|
|
279
|
+
model_str = str(model) if model else "unknown"
|
|
280
|
+
name = serialized.get("name") or kwargs.get("name") or model_str
|
|
281
|
+
if not isinstance(name, str):
|
|
282
|
+
name = str(name)
|
|
283
|
+
temperature = _get_float(lc_kwargs, "temperature")
|
|
284
|
+
max_tokens = _get_int(lc_kwargs, "max_tokens") or _get_int(lc_kwargs, "max_tokens_limit")
|
|
285
|
+
top_p = _get_float(lc_kwargs, "top_p")
|
|
286
|
+
top_k = _get_int(lc_kwargs, "top_k")
|
|
287
|
+
frequency_penalty = _get_float(lc_kwargs, "frequency_penalty")
|
|
288
|
+
presence_penalty = _get_float(lc_kwargs, "presence_penalty")
|
|
289
|
+
model_kwargs = lc_kwargs.get("model_kwargs") or {}
|
|
290
|
+
if top_p is None:
|
|
291
|
+
top_p = _get_float(model_kwargs, "top_p")
|
|
292
|
+
if top_k is None:
|
|
293
|
+
top_k = _get_int(model_kwargs, "top_k")
|
|
294
|
+
if frequency_penalty is None:
|
|
295
|
+
frequency_penalty = _get_float(model_kwargs, "frequency_penalty")
|
|
296
|
+
if presence_penalty is None:
|
|
297
|
+
presence_penalty = _get_float(model_kwargs, "presence_penalty")
|
|
298
|
+
stop_raw = lc_kwargs.get("stop") or model_kwargs.get("stop")
|
|
299
|
+
stop_sequences = _normalize_stop_sequences(stop_raw)
|
|
300
|
+
span_attrs = InferenceSpanAttributes(
|
|
301
|
+
operation_name=attrs.OPERATION_CHAT,
|
|
302
|
+
provider_name=provider,
|
|
303
|
+
run_id=str(run_id),
|
|
304
|
+
model=model_str,
|
|
305
|
+
temperature=temperature,
|
|
306
|
+
max_tokens=max_tokens,
|
|
307
|
+
top_p=top_p,
|
|
308
|
+
top_k=top_k,
|
|
309
|
+
frequency_penalty=frequency_penalty,
|
|
310
|
+
presence_penalty=presence_penalty,
|
|
311
|
+
stop_sequences=stop_sequences,
|
|
312
|
+
)
|
|
313
|
+
self._start_span(name, SpanKind.CLIENT, span_attrs.to_attributes(), run_id)
|
|
314
|
+
if self.trace_content and messages:
|
|
315
|
+
with _lock:
|
|
316
|
+
# LangChain 可能传 list[list[BaseMessage]](batch)或 list[BaseMessage];统一取第一批
|
|
317
|
+
first_batch = messages[0] if messages else []
|
|
318
|
+
if isinstance(first_batch, (list, tuple)):
|
|
319
|
+
_llm_messages_store[run_id] = list(first_batch)
|
|
320
|
+
else:
|
|
321
|
+
_llm_messages_store[run_id] = list(messages)
|
|
322
|
+
|
|
323
|
+
def _infer_provider(self, serialized: Dict[str, Any]) -> str:
|
|
324
|
+
serialized = serialized or {}
|
|
325
|
+
id_list = serialized.get("id") or []
|
|
326
|
+
if isinstance(id_list, list):
|
|
327
|
+
last = id_list[-1] if id_list else ""
|
|
328
|
+
if "openai" in str(last).lower():
|
|
329
|
+
return attrs.PROVIDER_OPENAI
|
|
330
|
+
if "anthropic" in str(last).lower():
|
|
331
|
+
return attrs.PROVIDER_ANTHROPIC
|
|
332
|
+
if "vertex" in str(last).lower() or "gemini" in str(last).lower():
|
|
333
|
+
return attrs.PROVIDER_GCP_VERTEX_AI
|
|
334
|
+
if "bedrock" in str(last).lower():
|
|
335
|
+
return attrs.PROVIDER_AWS_BEDROCK
|
|
336
|
+
return "langchain"
|
|
337
|
+
|
|
338
|
+
def on_llm_end(self, response: LLMResult, *, run_id: UUID, **kwargs: Any) -> None:
|
|
339
|
+
usage = _token_usage_from_llm_result(response)
|
|
340
|
+
response_attrs = _response_attrs_from_llm_result(response)
|
|
341
|
+
|
|
342
|
+
def set_llm_response_attrs():
|
|
343
|
+
from opentelemetry import trace
|
|
344
|
+
span = trace.get_current_span()
|
|
345
|
+
if not span.is_recording():
|
|
346
|
+
return
|
|
347
|
+
span.set_status(Status(StatusCode.OK))
|
|
348
|
+
try:
|
|
349
|
+
usage.apply_to_span(span)
|
|
350
|
+
response_attrs.apply_to_span(span)
|
|
351
|
+
if self.trace_content and self.record_input_output_as != "none":
|
|
352
|
+
with _lock:
|
|
353
|
+
stored_messages = _llm_messages_store.pop(run_id, None)
|
|
354
|
+
prompts = _llm_prompts_store.pop(run_id, None) if stored_messages is None else None
|
|
355
|
+
output_messages = build_output_messages_from_llm_result(response)
|
|
356
|
+
if stored_messages is not None:
|
|
357
|
+
input_messages = build_input_messages_from_langchain_messages(
|
|
358
|
+
stored_messages, max_length=self.max_content_length
|
|
359
|
+
)
|
|
360
|
+
elif prompts is not None:
|
|
361
|
+
input_messages = build_input_messages_from_prompts(prompts)
|
|
362
|
+
else:
|
|
363
|
+
input_messages = []
|
|
364
|
+
iio = InferenceInputOutput(
|
|
365
|
+
input_messages=input_messages,
|
|
366
|
+
output_messages=output_messages,
|
|
367
|
+
)
|
|
368
|
+
record_llm_input_output(
|
|
369
|
+
span,
|
|
370
|
+
iio.input_messages,
|
|
371
|
+
iio.output_messages,
|
|
372
|
+
record_as=self.record_input_output_as,
|
|
373
|
+
max_length=self.max_content_length,
|
|
374
|
+
)
|
|
375
|
+
except Exception:
|
|
376
|
+
pass
|
|
377
|
+
|
|
378
|
+
_end_span(run_id, before_exit=set_llm_response_attrs)
|
|
379
|
+
|
|
380
|
+
def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
|
|
381
|
+
stored_messages = None
|
|
382
|
+
prompts = None
|
|
383
|
+
with _lock:
|
|
384
|
+
stored_messages = _llm_messages_store.pop(run_id, None)
|
|
385
|
+
if stored_messages is None:
|
|
386
|
+
prompts = _llm_prompts_store.pop(run_id, None)
|
|
387
|
+
if (stored_messages is not None or prompts is not None) and self.trace_content and self.record_input_output_as != "none":
|
|
388
|
+
try:
|
|
389
|
+
from opentelemetry import trace
|
|
390
|
+
span = trace.get_current_span()
|
|
391
|
+
if span.is_recording():
|
|
392
|
+
if stored_messages is not None:
|
|
393
|
+
input_messages = build_input_messages_from_langchain_messages(
|
|
394
|
+
stored_messages, max_length=self.max_content_length
|
|
395
|
+
)
|
|
396
|
+
else:
|
|
397
|
+
input_messages = build_input_messages_from_prompts(prompts or [])
|
|
398
|
+
record_llm_input_output(
|
|
399
|
+
span,
|
|
400
|
+
input_messages,
|
|
401
|
+
[{"role": "assistant", "content": f"[Error] {type(error).__name__}: {error!s}"}],
|
|
402
|
+
record_as=self.record_input_output_as,
|
|
403
|
+
max_length=self.max_content_length,
|
|
404
|
+
)
|
|
405
|
+
except Exception:
|
|
406
|
+
pass
|
|
407
|
+
_record_error_span(run_id, error)
|
|
408
|
+
|
|
409
|
+
def on_chain_start(
|
|
410
|
+
self,
|
|
411
|
+
serialized: Dict[str, Any],
|
|
412
|
+
inputs: Dict[str, Any],
|
|
413
|
+
*,
|
|
414
|
+
run_id: UUID,
|
|
415
|
+
**kwargs: Any,
|
|
416
|
+
) -> None:
|
|
417
|
+
serialized = serialized or {}
|
|
418
|
+
# 实测:部分 runnable(RunnableSequence、RunnableLambda、StrOutputParser)serialized 为 None,
|
|
419
|
+
# 名称在 kwargs["name"];有 serialized 时用 id[-1] 或 serialized["name"]
|
|
420
|
+
name = "chain"
|
|
421
|
+
if serialized:
|
|
422
|
+
id_list = serialized.get("id")
|
|
423
|
+
if isinstance(id_list, list) and len(id_list) > 0:
|
|
424
|
+
name = id_list[-1]
|
|
425
|
+
elif serialized.get("name"):
|
|
426
|
+
name = serialized.get("name")
|
|
427
|
+
if name == "chain" and kwargs:
|
|
428
|
+
kw_name = kwargs.get("name")
|
|
429
|
+
if kw_name is not None and str(kw_name).strip():
|
|
430
|
+
name = kw_name
|
|
431
|
+
if not isinstance(name, str):
|
|
432
|
+
name = str(name)
|
|
433
|
+
span_attrs = ChainSpanAttributes(run_id=str(run_id))
|
|
434
|
+
self._start_span(name, SpanKind.INTERNAL, span_attrs.to_attributes(), run_id)
|
|
435
|
+
if self.trace_content and inputs is not None:
|
|
436
|
+
with _lock:
|
|
437
|
+
try:
|
|
438
|
+
_chain_io_store[run_id] = dict(inputs) if isinstance(inputs, dict) else {"inputs": inputs}
|
|
439
|
+
except (TypeError, ValueError):
|
|
440
|
+
_chain_io_store[run_id] = {"inputs": inputs}
|
|
441
|
+
|
|
442
|
+
def on_chain_end(self, outputs: Dict[str, Any], *, run_id: UUID, **kwargs: Any) -> None:
|
|
443
|
+
def set_chain_io():
|
|
444
|
+
from opentelemetry import trace
|
|
445
|
+
span = trace.get_current_span()
|
|
446
|
+
if not span.is_recording():
|
|
447
|
+
return
|
|
448
|
+
span.set_status(Status(StatusCode.OK))
|
|
449
|
+
if not self.trace_content or self.record_input_output_as == "none":
|
|
450
|
+
return
|
|
451
|
+
try:
|
|
452
|
+
with _lock:
|
|
453
|
+
stored_inputs = _chain_io_store.pop(run_id, None)
|
|
454
|
+
if stored_inputs is not None:
|
|
455
|
+
record_span_input_output(
|
|
456
|
+
span,
|
|
457
|
+
stored_inputs,
|
|
458
|
+
outputs if outputs is not None else {},
|
|
459
|
+
record_as=self.record_input_output_as,
|
|
460
|
+
max_length=self.max_content_length,
|
|
461
|
+
)
|
|
462
|
+
except Exception:
|
|
463
|
+
pass
|
|
464
|
+
|
|
465
|
+
_end_span(run_id, before_exit=set_chain_io)
|
|
466
|
+
|
|
467
|
+
def on_chain_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
|
|
468
|
+
stored_inputs = None
|
|
469
|
+
with _lock:
|
|
470
|
+
stored_inputs = _chain_io_store.pop(run_id, None)
|
|
471
|
+
if stored_inputs is not None and self.trace_content and self.record_input_output_as != "none":
|
|
472
|
+
try:
|
|
473
|
+
from opentelemetry import trace
|
|
474
|
+
span = trace.get_current_span()
|
|
475
|
+
if span.is_recording():
|
|
476
|
+
record_span_input_output(
|
|
477
|
+
span,
|
|
478
|
+
stored_inputs,
|
|
479
|
+
{"error": str(error), "error_type": type(error).__name__},
|
|
480
|
+
record_as=self.record_input_output_as,
|
|
481
|
+
max_length=self.max_content_length,
|
|
482
|
+
)
|
|
483
|
+
except Exception:
|
|
484
|
+
pass
|
|
485
|
+
_record_error_span(run_id, error)
|
|
486
|
+
|
|
487
|
+
def on_tool_start(
|
|
488
|
+
self,
|
|
489
|
+
serialized: Dict[str, Any],
|
|
490
|
+
input_str: str,
|
|
491
|
+
*,
|
|
492
|
+
run_id: UUID,
|
|
493
|
+
**kwargs: Any,
|
|
494
|
+
) -> None:
|
|
495
|
+
serialized = serialized or {}
|
|
496
|
+
# span 名称:仅用 name(或 id 最后一节)
|
|
497
|
+
name = serialized.get("name") or (serialized.get("id", [])[-1] if serialized.get("id") else "tool")
|
|
498
|
+
if not isinstance(name, str):
|
|
499
|
+
name = str(name)
|
|
500
|
+
span_attrs = ToolSpanAttributes(tool_name=name, run_id=str(run_id))
|
|
501
|
+
self._start_span(name, SpanKind.INTERNAL, span_attrs.to_attributes(), run_id)
|
|
502
|
+
if self.trace_content:
|
|
503
|
+
with _lock:
|
|
504
|
+
_tool_io_store[run_id] = input_str
|
|
505
|
+
|
|
506
|
+
def on_tool_end(self, output: Any, *, run_id: UUID, **kwargs: Any) -> None:
|
|
507
|
+
def set_tool_io():
|
|
508
|
+
from opentelemetry import trace
|
|
509
|
+
span = trace.get_current_span()
|
|
510
|
+
if not span.is_recording():
|
|
511
|
+
return
|
|
512
|
+
span.set_status(Status(StatusCode.OK))
|
|
513
|
+
if not self.trace_content or self.record_input_output_as == "none":
|
|
514
|
+
return
|
|
515
|
+
try:
|
|
516
|
+
with _lock:
|
|
517
|
+
stored_input = _tool_io_store.pop(run_id, None)
|
|
518
|
+
if stored_input is not None:
|
|
519
|
+
record_span_input_output(
|
|
520
|
+
span,
|
|
521
|
+
{"input": stored_input},
|
|
522
|
+
{"output": output} if output is not None else {},
|
|
523
|
+
record_as=self.record_input_output_as,
|
|
524
|
+
max_length=self.max_content_length,
|
|
525
|
+
)
|
|
526
|
+
except Exception:
|
|
527
|
+
pass
|
|
528
|
+
|
|
529
|
+
_end_span(run_id, before_exit=set_tool_io)
|
|
530
|
+
|
|
531
|
+
def on_tool_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
|
|
532
|
+
stored_input = None
|
|
533
|
+
with _lock:
|
|
534
|
+
stored_input = _tool_io_store.pop(run_id, None)
|
|
535
|
+
if stored_input is not None and self.trace_content and self.record_input_output_as != "none":
|
|
536
|
+
try:
|
|
537
|
+
from opentelemetry import trace
|
|
538
|
+
span = trace.get_current_span()
|
|
539
|
+
if span.is_recording():
|
|
540
|
+
record_span_input_output(
|
|
541
|
+
span,
|
|
542
|
+
{"input": stored_input},
|
|
543
|
+
{"error": str(error), "error_type": type(error).__name__},
|
|
544
|
+
record_as=self.record_input_output_as,
|
|
545
|
+
max_length=self.max_content_length,
|
|
546
|
+
)
|
|
547
|
+
except Exception:
|
|
548
|
+
pass
|
|
549
|
+
_record_error_span(run_id, error)
|
|
550
|
+
|
|
551
|
+
def on_retriever_start(
|
|
552
|
+
self,
|
|
553
|
+
serialized: Dict[str, Any],
|
|
554
|
+
query: str,
|
|
555
|
+
*,
|
|
556
|
+
run_id: UUID,
|
|
557
|
+
**kwargs: Any,
|
|
558
|
+
) -> None:
|
|
559
|
+
serialized = serialized or {}
|
|
560
|
+
# span 名称:仅用 name(或 id 最后一节)
|
|
561
|
+
name = serialized.get("name") or (serialized.get("id", [])[-1] if serialized.get("id") else None) or kwargs.get("name") or "retriever"
|
|
562
|
+
if not isinstance(name, str):
|
|
563
|
+
name = str(name)
|
|
564
|
+
span_attrs = RetrieverSpanAttributes(run_id=str(run_id))
|
|
565
|
+
self._start_span(name, SpanKind.INTERNAL, span_attrs.to_attributes(), run_id)
|
|
566
|
+
if self.trace_content:
|
|
567
|
+
with _lock:
|
|
568
|
+
_retriever_io_store[run_id] = query
|
|
569
|
+
|
|
570
|
+
def on_retriever_end(self, documents: Any, *, run_id: UUID, **kwargs: Any) -> None:
|
|
571
|
+
def set_retriever_io():
|
|
572
|
+
from opentelemetry import trace
|
|
573
|
+
span = trace.get_current_span()
|
|
574
|
+
if not span.is_recording():
|
|
575
|
+
return
|
|
576
|
+
span.set_status(Status(StatusCode.OK))
|
|
577
|
+
if not self.trace_content or self.record_input_output_as == "none":
|
|
578
|
+
return
|
|
579
|
+
try:
|
|
580
|
+
with _lock:
|
|
581
|
+
stored_query = _retriever_io_store.pop(run_id, None)
|
|
582
|
+
if stored_query is not None:
|
|
583
|
+
docs_ser = _documents_to_serializable(documents)
|
|
584
|
+
record_span_input_output(
|
|
585
|
+
span,
|
|
586
|
+
{"query": stored_query},
|
|
587
|
+
{"documents": docs_ser},
|
|
588
|
+
record_as=self.record_input_output_as,
|
|
589
|
+
max_length=self.max_content_length,
|
|
590
|
+
)
|
|
591
|
+
except Exception:
|
|
592
|
+
pass
|
|
593
|
+
|
|
594
|
+
_end_span(run_id, before_exit=set_retriever_io)
|
|
595
|
+
|
|
596
|
+
def on_retriever_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
|
|
597
|
+
stored_query = None
|
|
598
|
+
with _lock:
|
|
599
|
+
stored_query = _retriever_io_store.pop(run_id, None)
|
|
600
|
+
if stored_query is not None and self.trace_content and self.record_input_output_as != "none":
|
|
601
|
+
try:
|
|
602
|
+
from opentelemetry import trace
|
|
603
|
+
span = trace.get_current_span()
|
|
604
|
+
if span.is_recording():
|
|
605
|
+
record_span_input_output(
|
|
606
|
+
span,
|
|
607
|
+
{"query": stored_query},
|
|
608
|
+
{"error": str(error), "error_type": type(error).__name__},
|
|
609
|
+
record_as=self.record_input_output_as,
|
|
610
|
+
max_length=self.max_content_length,
|
|
611
|
+
)
|
|
612
|
+
except Exception:
|
|
613
|
+
pass
|
|
614
|
+
_record_error_span(run_id, error)
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def _documents_to_serializable(documents: Any) -> List[Dict[str, Any]]:
|
|
618
|
+
"""将 LangChain Document 列表转为可 JSON 序列化的 list of dict。"""
|
|
619
|
+
if documents is None:
|
|
620
|
+
return []
|
|
621
|
+
out: List[Dict[str, Any]] = []
|
|
622
|
+
try:
|
|
623
|
+
for d in documents:
|
|
624
|
+
if hasattr(d, "page_content") and hasattr(d, "metadata"):
|
|
625
|
+
out.append({
|
|
626
|
+
"page_content": getattr(d, "page_content", "") or "",
|
|
627
|
+
"metadata": dict(getattr(d, "metadata", None) or {}),
|
|
628
|
+
})
|
|
629
|
+
else:
|
|
630
|
+
out.append({"raw": str(d)})
|
|
631
|
+
except Exception:
|
|
632
|
+
out = [{"raw": str(documents)}]
|
|
633
|
+
return out
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def get_langchain_callback(
|
|
637
|
+
trace_content: bool = True,
|
|
638
|
+
record_input_output_as: RecordInputOutputAs = "events",
|
|
639
|
+
max_content_length: Optional[int] = None,
|
|
640
|
+
) -> OpenTelemetryCallbackHandler:
|
|
641
|
+
"""Return a default OpenTelemetryCallbackHandler for LangChain/LangGraph. 默认记录所有节点入参与返回值。"""
|
|
642
|
+
return OpenTelemetryCallbackHandler(
|
|
643
|
+
trace_content=trace_content,
|
|
644
|
+
record_input_output_as=record_input_output_as,
|
|
645
|
+
max_content_length=max_content_length,
|
|
646
|
+
)
|