mcp-debugger 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_debugger/__init__.py +5 -0
- mcp_debugger/analytics.py +443 -0
- mcp_debugger/cli.py +2185 -0
- mcp_debugger/config.py +377 -0
- mcp_debugger/display/__init__.py +0 -0
- mcp_debugger/exporters/__init__.py +6 -0
- mcp_debugger/exporters/json_exporter.py +178 -0
- mcp_debugger/exporters/markdown_exporter.py +196 -0
- mcp_debugger/exporters/otlp_exporter.py +206 -0
- mcp_debugger/exporters/otlp_replay_exporter.py +221 -0
- mcp_debugger/protocol/__init__.py +0 -0
- mcp_debugger/protocol/error_classifier.py +108 -0
- mcp_debugger/protocol/schemas.py +92 -0
- mcp_debugger/protocol/validator.py +471 -0
- mcp_debugger/proxy/__init__.py +0 -0
- mcp_debugger/proxy/stdio_proxy.py +408 -0
- mcp_debugger/py.typed +1 -0
- mcp_debugger/replay/__init__.py +14 -0
- mcp_debugger/replay/diff.py +168 -0
- mcp_debugger/replay/engine.py +446 -0
- mcp_debugger/storage/__init__.py +0 -0
- mcp_debugger/storage/database.py +959 -0
- mcp_debugger/validate_live.py +250 -0
- mcp_debugger/version.py +3 -0
- mcp_debugger-0.1.0.dist-info/METADATA +207 -0
- mcp_debugger-0.1.0.dist-info/RECORD +29 -0
- mcp_debugger-0.1.0.dist-info/WHEEL +4 -0
- mcp_debugger-0.1.0.dist-info/entry_points.txt +2 -0
- mcp_debugger-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Markdown exporter – generates a human-readable session report."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Dict, IO, List, Optional
|
|
6
|
+
|
|
7
|
+
from mcp_debugger.analytics import SessionStats
|
|
8
|
+
|
|
9
|
+
# Maximum raw-JSON bytes included inside a <details> block per message.
|
|
10
|
+
_RAW_TRUNCATE_BYTES = 4096
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _fmt_duration(seconds: Optional[int]) -> str:
|
|
14
|
+
if seconds is None:
|
|
15
|
+
return "N/A"
|
|
16
|
+
m, s = divmod(seconds, 60)
|
|
17
|
+
return f"{m}m {s}s" if m > 0 else f"{s}s"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _fmt_latency(ms: Optional[float]) -> str:
|
|
21
|
+
if ms is None:
|
|
22
|
+
return "—"
|
|
23
|
+
return f"{ms:.1f}ms"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _direction_arrow(direction: str) -> str:
|
|
27
|
+
return "→ server" if direction == "client_to_server" else "← client"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _decode_json_field(raw: Any) -> Any:
|
|
31
|
+
if isinstance(raw, str):
|
|
32
|
+
try:
|
|
33
|
+
return json.loads(raw)
|
|
34
|
+
except (json.JSONDecodeError, ValueError):
|
|
35
|
+
return raw
|
|
36
|
+
return raw
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MarkdownExporter:
|
|
40
|
+
"""Generate a Markdown session report.
|
|
41
|
+
|
|
42
|
+
The report contains:
|
|
43
|
+
|
|
44
|
+
* **Metadata** – session header table.
|
|
45
|
+
* **Summary** – key totals.
|
|
46
|
+
* **Tool Inventory** – per-tool stats table.
|
|
47
|
+
* **Errors** – classified errors table.
|
|
48
|
+
* **Message Log** – one row per message; with ``include_raw=True`` each
|
|
49
|
+
row is followed by a ``<details>`` block containing the full JSON.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, include_raw: bool = False, pretty: bool = False) -> None:
|
|
53
|
+
"""Initialise the exporter.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
include_raw: If ``True``, append a ``<details>`` JSON block for
|
|
57
|
+
each message.
|
|
58
|
+
pretty: If ``True``, pretty-print the JSON inside ``<details>``
|
|
59
|
+
blocks (only relevant when *include_raw* is ``True``).
|
|
60
|
+
"""
|
|
61
|
+
self.include_raw = include_raw
|
|
62
|
+
self.pretty = pretty
|
|
63
|
+
|
|
64
|
+
# ------------------------------------------------------------------
|
|
65
|
+
# Public API
|
|
66
|
+
# ------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
def export(
|
|
69
|
+
self,
|
|
70
|
+
session: Dict[str, Any],
|
|
71
|
+
messages: List[Dict[str, Any]],
|
|
72
|
+
tools: List[Dict[str, Any]],
|
|
73
|
+
errors: List[Dict[str, Any]],
|
|
74
|
+
stats: SessionStats,
|
|
75
|
+
out: IO[str],
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Write the full Markdown report to *out*."""
|
|
78
|
+
session_id = session.get("id", "?")
|
|
79
|
+
name = session.get("friendly_name") or f"Session #{session_id}"
|
|
80
|
+
|
|
81
|
+
out.write(f"# MCP Session Report – {name}\n\n")
|
|
82
|
+
self._write_metadata(session, stats, out)
|
|
83
|
+
self._write_summary(stats, out)
|
|
84
|
+
self._write_tools(stats, out)
|
|
85
|
+
self._write_errors(errors, out)
|
|
86
|
+
self._write_message_log(messages, out)
|
|
87
|
+
|
|
88
|
+
# ------------------------------------------------------------------
|
|
89
|
+
# Section writers
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _write_metadata(
|
|
94
|
+
session: Dict[str, Any],
|
|
95
|
+
stats: SessionStats,
|
|
96
|
+
out: IO[str],
|
|
97
|
+
) -> None:
|
|
98
|
+
out.write("## Metadata\n\n")
|
|
99
|
+
out.write("| Property | Value |\n")
|
|
100
|
+
out.write("| :------- | :---- |\n")
|
|
101
|
+
rows = [
|
|
102
|
+
("Session ID", str(session.get("id", ""))),
|
|
103
|
+
("Name", session.get("friendly_name") or "—"),
|
|
104
|
+
("Server command", f"`{session.get('server_command', '')}`"),
|
|
105
|
+
("Started", str(session.get("started_at") or "—")),
|
|
106
|
+
("Ended", str(session.get("ended_at") or "—")),
|
|
107
|
+
("Duration", _fmt_duration(stats.duration_seconds)),
|
|
108
|
+
("Status", str(session.get("status") or "—")),
|
|
109
|
+
]
|
|
110
|
+
for label, value in rows:
|
|
111
|
+
out.write(f"| {label} | {value} |\n")
|
|
112
|
+
out.write("\n")
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _write_summary(stats: SessionStats, out: IO[str]) -> None:
|
|
116
|
+
total_errors = sum(stats.errors_by_category.values())
|
|
117
|
+
total = stats.total_messages
|
|
118
|
+
error_rate_pct = f"{(total_errors / total * 100):.1f}%" if total > 0 else "0%"
|
|
119
|
+
out.write("## Summary\n\n")
|
|
120
|
+
out.write(
|
|
121
|
+
f"- **Total messages:** {total} "
|
|
122
|
+
f"({stats.client_to_server_count} → server, "
|
|
123
|
+
f"{stats.server_to_client_count} ← client)\n"
|
|
124
|
+
)
|
|
125
|
+
out.write(f"- **Errors:** {total_errors} ({error_rate_pct} error rate)\n")
|
|
126
|
+
out.write(f"- **Tools called:** {len(stats.top_tools)}\n")
|
|
127
|
+
if stats.latency_avg is not None:
|
|
128
|
+
out.write(f"- **Avg latency:** {_fmt_latency(stats.latency_avg)}\n")
|
|
129
|
+
out.write("\n")
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _write_tools(stats: SessionStats, out: IO[str]) -> None:
|
|
133
|
+
out.write("## Tool Inventory\n\n")
|
|
134
|
+
if not stats.top_tools:
|
|
135
|
+
out.write("_No tools discovered in this session._\n\n")
|
|
136
|
+
return
|
|
137
|
+
out.write("| Tool | Calls | Avg Latency | Error Rate |\n")
|
|
138
|
+
out.write("| :--- | :---: | :---: | :---: |\n")
|
|
139
|
+
for tool in stats.top_tools:
|
|
140
|
+
avg_lat = _fmt_latency(tool.avg_latency_ms)
|
|
141
|
+
err_pct = f"{tool.error_rate * 100:.0f}%"
|
|
142
|
+
out.write(f"| {tool.name} | {tool.calls} | {avg_lat} | {err_pct} |\n")
|
|
143
|
+
out.write("\n")
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _write_errors(errors: List[Dict[str, Any]], out: IO[str]) -> None:
|
|
147
|
+
out.write("## Errors\n\n")
|
|
148
|
+
if not errors:
|
|
149
|
+
out.write("_No errors recorded._\n\n")
|
|
150
|
+
return
|
|
151
|
+
out.write("| Type | Code | Message | Suggestion |\n")
|
|
152
|
+
out.write("| :--- | :---: | :--- | :--- |\n")
|
|
153
|
+
for err in errors:
|
|
154
|
+
etype = err.get("error_type") or "—"
|
|
155
|
+
code = str(err.get("error_code") or "—")
|
|
156
|
+
msg = (err.get("error_message") or "").replace("|", "\\|")
|
|
157
|
+
sug = (err.get("suggestion") or "—").replace("|", "\\|")
|
|
158
|
+
out.write(f"| {etype} | {code} | {msg} | {sug} |\n")
|
|
159
|
+
out.write("\n")
|
|
160
|
+
|
|
161
|
+
def _write_message_log(self, messages: List[Dict[str, Any]], out: IO[str]) -> None:
|
|
162
|
+
out.write("## Message Log\n\n")
|
|
163
|
+
if not messages:
|
|
164
|
+
out.write("_No messages recorded._\n\n")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
out.write("| # | Direction | Method | Timestamp | Latency |\n")
|
|
168
|
+
out.write("| :- | :-------- | :----- | :-------- | :------ |\n")
|
|
169
|
+
for i, msg in enumerate(messages, start=1):
|
|
170
|
+
direction = _direction_arrow(msg.get("direction") or "")
|
|
171
|
+
method = msg.get("method") or "—"
|
|
172
|
+
ts_raw = msg.get("timestamp")
|
|
173
|
+
ts_str: str
|
|
174
|
+
if isinstance(ts_raw, (int, float)):
|
|
175
|
+
ts_str = datetime.fromtimestamp(ts_raw / 1000.0, tz=timezone.utc).strftime(
|
|
176
|
+
"%H:%M:%S.%f"
|
|
177
|
+
)[:-3] # trim to milliseconds
|
|
178
|
+
else:
|
|
179
|
+
ts_str = str(ts_raw or "—")
|
|
180
|
+
latency = _fmt_latency(msg.get("latency_ms"))
|
|
181
|
+
out.write(f"| {i} | {direction} | `{method}` | {ts_str} | {latency} |\n")
|
|
182
|
+
|
|
183
|
+
if self.include_raw:
|
|
184
|
+
out.write("\n")
|
|
185
|
+
for i, msg in enumerate(messages, start=1):
|
|
186
|
+
direction = msg.get("direction") or ""
|
|
187
|
+
method = msg.get("method") or "message"
|
|
188
|
+
out.write(f"<details>\n<summary>Message #{i}: {method} ({direction})</summary>\n\n")
|
|
189
|
+
# Build a clean dict (decode JSON fields)
|
|
190
|
+
clean: Dict[str, Any] = {
|
|
191
|
+
k: _decode_json_field(v) for k, v in msg.items() if k not in ("session_id",)
|
|
192
|
+
}
|
|
193
|
+
raw_json = json.dumps(clean, indent=2 if self.pretty else None, default=str)
|
|
194
|
+
if len(raw_json) > _RAW_TRUNCATE_BYTES:
|
|
195
|
+
raw_json = raw_json[:_RAW_TRUNCATE_BYTES] + "\n... [truncated]"
|
|
196
|
+
out.write(f"```json\n{raw_json}\n```\n\n</details>\n\n")
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""OTLP exporter – converts MCP session messages into OpenTelemetry spans.
|
|
2
|
+
|
|
3
|
+
Requires the optional ``[export]`` dependency group::
|
|
4
|
+
|
|
5
|
+
pip install mcp-debugger[export]
|
|
6
|
+
|
|
7
|
+
If the ``opentelemetry-sdk`` package is not installed this module raises
|
|
8
|
+
``ImportError`` with a helpful message rather than crashing silently.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from opentelemetry.sdk.resources import Resource
|
|
18
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
19
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
20
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
21
|
+
|
|
22
|
+
_OTLP_AVAILABLE = True
|
|
23
|
+
except ImportError: # pragma: no cover
|
|
24
|
+
_OTLP_AVAILABLE = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_tool_name(params_raw: Optional[str]) -> Optional[str]:
|
|
28
|
+
"""Extract tool name from a serialised ``params`` JSON string."""
|
|
29
|
+
if not params_raw:
|
|
30
|
+
return None
|
|
31
|
+
try:
|
|
32
|
+
params = json.loads(params_raw)
|
|
33
|
+
if isinstance(params, dict):
|
|
34
|
+
return params.get("name")
|
|
35
|
+
except (json.JSONDecodeError, ValueError):
|
|
36
|
+
pass
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class OTLPExporter:
|
|
41
|
+
"""Convert a session's messages into OpenTelemetry spans and export them.
|
|
42
|
+
|
|
43
|
+
Each request–response pair becomes one span. Notifications (no ID)
|
|
44
|
+
become zero-duration spans. All message spans are children of a root
|
|
45
|
+
session span so the full timeline is visible in one trace.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
endpoint: OTLP gRPC collector endpoint (default ``http://localhost:4317``).
|
|
49
|
+
insecure: Disable TLS verification (useful for local testing).
|
|
50
|
+
service_name: Service name attached to all spans.
|
|
51
|
+
limit: If given, export only the first *limit* messages.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
endpoint: str = "http://localhost:4317",
|
|
57
|
+
insecure: bool = True,
|
|
58
|
+
service_name: str = "mcp-debugger",
|
|
59
|
+
limit: Optional[int] = None,
|
|
60
|
+
) -> None:
|
|
61
|
+
if not _OTLP_AVAILABLE:
|
|
62
|
+
raise ImportError(
|
|
63
|
+
"OpenTelemetry SDK is not installed. Run: pip install 'mcp-debugger[export]'"
|
|
64
|
+
)
|
|
65
|
+
self.endpoint = endpoint
|
|
66
|
+
self.insecure = insecure
|
|
67
|
+
self.service_name = service_name
|
|
68
|
+
self.limit = limit
|
|
69
|
+
|
|
70
|
+
def export(
|
|
71
|
+
self,
|
|
72
|
+
session: Dict[str, Any],
|
|
73
|
+
messages: List[Dict[str, Any]],
|
|
74
|
+
) -> int:
|
|
75
|
+
"""Build and export spans for all request–response pairs.
|
|
76
|
+
|
|
77
|
+
Returns the number of spans exported (excluding the root span).
|
|
78
|
+
"""
|
|
79
|
+
msgs = messages[: self.limit] if self.limit is not None else messages
|
|
80
|
+
|
|
81
|
+
resource = Resource.create({"service.name": self.service_name})
|
|
82
|
+
provider = TracerProvider(resource=resource)
|
|
83
|
+
exporter_obj = OTLPSpanExporter(
|
|
84
|
+
endpoint=self.endpoint,
|
|
85
|
+
insecure=self.insecure,
|
|
86
|
+
)
|
|
87
|
+
provider.add_span_processor(BatchSpanProcessor(exporter_obj))
|
|
88
|
+
tracer = provider.get_tracer("mcp-debugger")
|
|
89
|
+
|
|
90
|
+
session_id = session.get("id", 0)
|
|
91
|
+
session_name = session.get("friendly_name") or f"session-{session_id}"
|
|
92
|
+
|
|
93
|
+
# --- root session span ---
|
|
94
|
+
with tracer.start_as_current_span(
|
|
95
|
+
name=f"mcp-session {session_name}",
|
|
96
|
+
attributes={
|
|
97
|
+
"mcp.session.id": str(session_id),
|
|
98
|
+
"mcp.server.command": str(session.get("server_command") or ""),
|
|
99
|
+
"mcp.session.status": str(session.get("status") or ""),
|
|
100
|
+
},
|
|
101
|
+
):
|
|
102
|
+
pairs = self._pair_messages(msgs)
|
|
103
|
+
span_count = 0
|
|
104
|
+
for req, resp in pairs:
|
|
105
|
+
self._emit_span(tracer, req, resp, session)
|
|
106
|
+
span_count += 1
|
|
107
|
+
|
|
108
|
+
provider.shutdown()
|
|
109
|
+
return span_count
|
|
110
|
+
|
|
111
|
+
# ------------------------------------------------------------------
|
|
112
|
+
# Internal helpers
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def _pair_messages(
|
|
117
|
+
messages: List[Dict[str, Any]],
|
|
118
|
+
) -> List[Tuple[Dict[str, Any], Optional[Dict[str, Any]]]]:
|
|
119
|
+
"""Match requests with their responses by message_id.
|
|
120
|
+
|
|
121
|
+
Notifications (no message_id) are emitted as solo pairs ``(msg, None)``.
|
|
122
|
+
Responses without a matching request are skipped.
|
|
123
|
+
"""
|
|
124
|
+
request_map: Dict[str, Dict[str, Any]] = {}
|
|
125
|
+
pairs: List[Tuple[Dict[str, Any], Optional[Dict[str, Any]]]] = []
|
|
126
|
+
|
|
127
|
+
for msg in messages:
|
|
128
|
+
direction = msg.get("direction")
|
|
129
|
+
msg_type = msg.get("message_type")
|
|
130
|
+
msg_id = msg.get("message_id")
|
|
131
|
+
|
|
132
|
+
if msg_type == "notification":
|
|
133
|
+
pairs.append((msg, None))
|
|
134
|
+
elif msg_type == "request" and direction == "client_to_server":
|
|
135
|
+
if msg_id:
|
|
136
|
+
request_map[msg_id] = msg
|
|
137
|
+
else:
|
|
138
|
+
pairs.append((msg, None))
|
|
139
|
+
elif msg_type == "response" and direction == "server_to_client":
|
|
140
|
+
if msg_id and msg_id in request_map:
|
|
141
|
+
pairs.append((request_map.pop(msg_id), msg))
|
|
142
|
+
|
|
143
|
+
# Any unmatched requests (no response received)
|
|
144
|
+
for req in request_map.values():
|
|
145
|
+
pairs.append((req, None))
|
|
146
|
+
|
|
147
|
+
return pairs
|
|
148
|
+
|
|
149
|
+
def _emit_span(
|
|
150
|
+
self,
|
|
151
|
+
tracer: Any,
|
|
152
|
+
req: Dict[str, Any],
|
|
153
|
+
resp: Optional[Dict[str, Any]],
|
|
154
|
+
session: Dict[str, Any],
|
|
155
|
+
) -> None:
|
|
156
|
+
method = req.get("method") or "unknown"
|
|
157
|
+
span_name = f"mcp.{method}"
|
|
158
|
+
latency_ms = resp.get("latency_ms") if resp else None
|
|
159
|
+
|
|
160
|
+
# Build attributes
|
|
161
|
+
attrs: Dict[str, Any] = {
|
|
162
|
+
"mcp.method": method,
|
|
163
|
+
"mcp.direction": str(req.get("direction") or ""),
|
|
164
|
+
"mcp.server.command": str(session.get("server_command") or ""),
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if method == "tools/call":
|
|
168
|
+
tool_name = _get_tool_name(req.get("params"))
|
|
169
|
+
if tool_name:
|
|
170
|
+
attrs["mcp.tool.name"] = tool_name
|
|
171
|
+
|
|
172
|
+
has_error = False
|
|
173
|
+
if resp:
|
|
174
|
+
if resp.get("error"):
|
|
175
|
+
has_error = True
|
|
176
|
+
attrs["mcp.error"] = True
|
|
177
|
+
err_raw = resp.get("error")
|
|
178
|
+
if isinstance(err_raw, str):
|
|
179
|
+
try:
|
|
180
|
+
err_dict = json.loads(err_raw)
|
|
181
|
+
code = err_dict.get("code")
|
|
182
|
+
if code is not None:
|
|
183
|
+
attrs["mcp.error_code"] = int(code)
|
|
184
|
+
except (json.JSONDecodeError, ValueError):
|
|
185
|
+
pass
|
|
186
|
+
elif resp.get("result"):
|
|
187
|
+
try:
|
|
188
|
+
result = (
|
|
189
|
+
json.loads(resp["result"])
|
|
190
|
+
if isinstance(resp["result"], str)
|
|
191
|
+
else resp["result"]
|
|
192
|
+
)
|
|
193
|
+
if isinstance(result, dict) and result.get("isError"):
|
|
194
|
+
has_error = True
|
|
195
|
+
attrs["mcp.error"] = True
|
|
196
|
+
except (json.JSONDecodeError, ValueError):
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
if not has_error:
|
|
200
|
+
attrs["mcp.error"] = False
|
|
201
|
+
|
|
202
|
+
if latency_ms is not None:
|
|
203
|
+
attrs["mcp.latency_ms"] = float(latency_ms)
|
|
204
|
+
|
|
205
|
+
with tracer.start_as_current_span(span_name, attributes=attrs):
|
|
206
|
+
pass # span lifecycle managed by context manager
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""OTLP exporter for replay results.
|
|
2
|
+
|
|
3
|
+
Converts a :class:`~mcp_debugger.replay.engine.ReplayResult` into
|
|
4
|
+
OpenTelemetry trace spans and exports them to an OTLP collector (e.g. Jaeger,
|
|
5
|
+
Grafana Tempo, or any compatible backend).
|
|
6
|
+
|
|
7
|
+
Requires the optional ``[otlp]`` dependency group::
|
|
8
|
+
|
|
9
|
+
pip install 'mcp-debugger[otlp]'
|
|
10
|
+
|
|
11
|
+
If the ``opentelemetry-sdk`` package is not installed this module raises
|
|
12
|
+
``ImportError`` with a helpful message rather than crashing silently.
|
|
13
|
+
|
|
14
|
+
Trace structure
|
|
15
|
+
---------------
|
|
16
|
+
* One trace per replay run.
|
|
17
|
+
* Root span – ``mcp.replay <session_id>`` – carries summary attributes.
|
|
18
|
+
* One child span per replayed message – ``mcp.replay.<method>`` – carries
|
|
19
|
+
per-message attributes and, for mismatches, a structured diff event.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
from typing import TYPE_CHECKING, Any, Dict
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Optional OpenTelemetry imports – guarded so the module can be imported even
|
|
29
|
+
# when the SDK is not installed. The actual classes are only used inside
|
|
30
|
+
# OTLPReplayExporter, which refuses to construct itself without the SDK.
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
try:
|
|
33
|
+
from opentelemetry import trace as _otel_trace # noqa: F401
|
|
34
|
+
from opentelemetry.sdk.resources import Resource
|
|
35
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
36
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
37
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
38
|
+
OTLPSpanExporter,
|
|
39
|
+
)
|
|
40
|
+
from opentelemetry.trace import Status, StatusCode
|
|
41
|
+
|
|
42
|
+
_OTLP_AVAILABLE = True
|
|
43
|
+
except ImportError: # pragma: no cover
|
|
44
|
+
_OTLP_AVAILABLE = False
|
|
45
|
+
# Sentinel stubs so patch.object() in tests can always find these names,
|
|
46
|
+
# even when the SDK is not installed.
|
|
47
|
+
Resource = None # type: ignore
|
|
48
|
+
TracerProvider = None # type: ignore
|
|
49
|
+
BatchSpanProcessor = None # type: ignore
|
|
50
|
+
OTLPSpanExporter = None # type: ignore
|
|
51
|
+
Status = None # type: ignore
|
|
52
|
+
StatusCode = None # type: ignore
|
|
53
|
+
|
|
54
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
55
|
+
from mcp_debugger.replay.engine import ReplayResult, ReplayedMessage
|
|
56
|
+
|
|
57
|
+
_DIFF_SUMMARY_MAX = 255 # OTLP attribute character limit for diff summaries
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class OTLPReplayExporter:
|
|
61
|
+
"""Convert a :class:`~mcp_debugger.replay.engine.ReplayResult` into OTLP spans.
|
|
62
|
+
|
|
63
|
+
Each replay run becomes a single trace:
|
|
64
|
+
|
|
65
|
+
* The root span carries aggregate statistics (total, matches, mismatches,
|
|
66
|
+
timeouts, errors, match percentage).
|
|
67
|
+
* Each replayed message becomes a child span carrying per-message data
|
|
68
|
+
(method, matched, latency, diff summary). Mismatched messages also
|
|
69
|
+
receive a structured ``mcp.replay.diff`` event.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
endpoint: OTLP gRPC collector endpoint (default ``http://localhost:4317``).
|
|
73
|
+
insecure: Disable TLS verification (useful for local testing).
|
|
74
|
+
service_name: Service name attached to all spans.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
endpoint: str = "http://localhost:4317",
|
|
80
|
+
insecure: bool = True,
|
|
81
|
+
service_name: str = "mcp-debugger",
|
|
82
|
+
) -> None:
|
|
83
|
+
if not _OTLP_AVAILABLE:
|
|
84
|
+
raise ImportError(
|
|
85
|
+
"OpenTelemetry SDK is not installed. Run: pip install 'mcp-debugger[otlp]'"
|
|
86
|
+
)
|
|
87
|
+
self.endpoint = endpoint
|
|
88
|
+
self.insecure = insecure
|
|
89
|
+
self.service_name = service_name
|
|
90
|
+
|
|
91
|
+
def export(self, result: "ReplayResult") -> int:
|
|
92
|
+
"""Export *result* as an OTLP trace.
|
|
93
|
+
|
|
94
|
+
Returns the number of child spans (replayed messages) exported, not
|
|
95
|
+
counting the root span. Returns 0 if export fails.
|
|
96
|
+
"""
|
|
97
|
+
resource = Resource.create({"service.name": self.service_name})
|
|
98
|
+
provider = TracerProvider(resource=resource)
|
|
99
|
+
otlp_exporter = OTLPSpanExporter(
|
|
100
|
+
endpoint=self.endpoint,
|
|
101
|
+
insecure=self.insecure,
|
|
102
|
+
)
|
|
103
|
+
provider.add_span_processor(BatchSpanProcessor(otlp_exporter))
|
|
104
|
+
tracer = provider.get_tracer("mcp-debugger.replay")
|
|
105
|
+
|
|
106
|
+
total = result.total_messages_replayed
|
|
107
|
+
matches = sum(1 for m in result.messages if m.matches)
|
|
108
|
+
mismatches = result.mismatched_responses
|
|
109
|
+
timeouts = result.timed_out
|
|
110
|
+
errors = result.failed_responses
|
|
111
|
+
match_pct = round(matches / total * 100, 1) if total else 0.0
|
|
112
|
+
duration_s = (result.ended_at - result.started_at).total_seconds()
|
|
113
|
+
|
|
114
|
+
root_attrs: Dict[str, Any] = {
|
|
115
|
+
"replay.session_id": str(result.session_id),
|
|
116
|
+
"replay.target_server_command": result.target_server_command,
|
|
117
|
+
"replay.total_messages": total,
|
|
118
|
+
"replay.matches": matches,
|
|
119
|
+
"replay.mismatches": mismatches,
|
|
120
|
+
"replay.timeouts": timeouts,
|
|
121
|
+
"replay.errors": errors,
|
|
122
|
+
"replay.match_percentage": match_pct,
|
|
123
|
+
"replay.duration_seconds": round(duration_s, 3),
|
|
124
|
+
}
|
|
125
|
+
if result.replay_id is not None:
|
|
126
|
+
root_attrs["replay.id"] = str(result.replay_id)
|
|
127
|
+
|
|
128
|
+
span_count = 0
|
|
129
|
+
|
|
130
|
+
with tracer.start_as_current_span(
|
|
131
|
+
name=f"mcp.replay session-{result.session_id}",
|
|
132
|
+
attributes=root_attrs,
|
|
133
|
+
) as root_span:
|
|
134
|
+
# Mark root span as error if there were any mismatches or timeouts
|
|
135
|
+
if mismatches > 0 or timeouts > 0 or errors > 0:
|
|
136
|
+
root_span.set_status(
|
|
137
|
+
Status(StatusCode.ERROR, f"{mismatches} mismatch(es), {timeouts} timeout(s)")
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
for msg in result.messages:
|
|
141
|
+
self._emit_message_span(tracer, msg)
|
|
142
|
+
span_count += 1
|
|
143
|
+
|
|
144
|
+
provider.force_flush()
|
|
145
|
+
provider.shutdown()
|
|
146
|
+
return span_count
|
|
147
|
+
|
|
148
|
+
# ------------------------------------------------------------------
|
|
149
|
+
# Internal helpers
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
def _emit_message_span(
|
|
153
|
+
self,
|
|
154
|
+
tracer: Any,
|
|
155
|
+
msg: "ReplayedMessage",
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Create a child span for a single replayed message."""
|
|
158
|
+
method = msg.method or "unknown"
|
|
159
|
+
span_name = f"mcp.replay.{method}"
|
|
160
|
+
|
|
161
|
+
attrs: Dict[str, Any] = {
|
|
162
|
+
"mcp.method": method,
|
|
163
|
+
"mcp.direction": "client_to_server",
|
|
164
|
+
"mcp.replay.original_message_id": str(msg.original_message_id),
|
|
165
|
+
"mcp.replay.matched": msg.matches,
|
|
166
|
+
"mcp.replay.latency_ms": round(msg.latency_ms, 3),
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
# Extract tool name for tools/call spans
|
|
170
|
+
if method == "tools/call" and msg.request_sent:
|
|
171
|
+
params = msg.request_sent.get("params")
|
|
172
|
+
if isinstance(params, dict):
|
|
173
|
+
tool_name = params.get("name")
|
|
174
|
+
elif isinstance(params, str):
|
|
175
|
+
try:
|
|
176
|
+
tool_name = json.loads(params).get("name")
|
|
177
|
+
except (json.JSONDecodeError, ValueError):
|
|
178
|
+
tool_name = None
|
|
179
|
+
else:
|
|
180
|
+
tool_name = None
|
|
181
|
+
if tool_name:
|
|
182
|
+
attrs["mcp.tool.name"] = str(tool_name)
|
|
183
|
+
|
|
184
|
+
# Truncated diff summary for quick scanning in trace UIs
|
|
185
|
+
if msg.diff_text:
|
|
186
|
+
attrs["mcp.replay.diff_summary"] = msg.diff_text[:_DIFF_SUMMARY_MAX]
|
|
187
|
+
|
|
188
|
+
if msg.error:
|
|
189
|
+
attrs["mcp.replay.error"] = msg.error[:_DIFF_SUMMARY_MAX]
|
|
190
|
+
|
|
191
|
+
with tracer.start_as_current_span(span_name, attributes=attrs) as span:
|
|
192
|
+
if not msg.matches or msg.error:
|
|
193
|
+
description = msg.error or f"Response mismatch for {method}"
|
|
194
|
+
span.set_status(Status(StatusCode.ERROR, description))
|
|
195
|
+
|
|
196
|
+
# Add structured diff event for mismatched messages
|
|
197
|
+
if not msg.matches and msg.diff:
|
|
198
|
+
try:
|
|
199
|
+
diff_json = json.dumps(
|
|
200
|
+
[d.model_dump() for d in msg.diff],
|
|
201
|
+
separators=(",", ":"),
|
|
202
|
+
)
|
|
203
|
+
# Truncate to a safe size for OTLP event payloads
|
|
204
|
+
span.add_event(
|
|
205
|
+
"mcp.replay.diff",
|
|
206
|
+
attributes={"diff": diff_json[:1024]},
|
|
207
|
+
)
|
|
208
|
+
except Exception:
|
|
209
|
+
# Never let serialisation errors break the export
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
# Add original and replayed response hashes for grouping
|
|
213
|
+
if msg.original_response is not None:
|
|
214
|
+
try:
|
|
215
|
+
orig_str = json.dumps(msg.original_response, sort_keys=True)
|
|
216
|
+
span.set_attribute(
|
|
217
|
+
"mcp.replay.original_response_hash",
|
|
218
|
+
str(hash(orig_str) & 0xFFFFFFFF),
|
|
219
|
+
)
|
|
220
|
+
except Exception:
|
|
221
|
+
pass
|
|
File without changes
|