pcp-mcp 0.1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcp_mcp/AGENTS.md +70 -0
- pcp_mcp/__init__.py +4 -0
- pcp_mcp/client.py +28 -0
- pcp_mcp/config.py +58 -1
- pcp_mcp/context.py +48 -1
- pcp_mcp/icons.py +31 -0
- pcp_mcp/middleware.py +75 -0
- pcp_mcp/models.py +10 -0
- pcp_mcp/prompts/__init__.py +18 -5
- pcp_mcp/py.typed +0 -0
- pcp_mcp/resources/catalog.py +76 -2
- pcp_mcp/resources/health.py +70 -27
- pcp_mcp/server.py +20 -0
- pcp_mcp/tools/AGENTS.md +61 -0
- pcp_mcp/tools/metrics.py +126 -88
- pcp_mcp/tools/system.py +311 -59
- pcp_mcp/utils/__init__.py +0 -4
- pcp_mcp/utils/extractors.py +18 -0
- {pcp_mcp-0.1.0.dist-info → pcp_mcp-1.0.1.dist-info}/METADATA +22 -10
- pcp_mcp-1.0.1.dist-info/RECORD +26 -0
- pcp_mcp/utils/decorators.py +0 -38
- pcp_mcp-0.1.0.dist-info/RECORD +0 -22
- {pcp_mcp-0.1.0.dist-info → pcp_mcp-1.0.1.dist-info}/WHEEL +0 -0
- {pcp_mcp-0.1.0.dist-info → pcp_mcp-1.0.1.dist-info}/entry_points.txt +0 -0
pcp_mcp/tools/system.py
CHANGED
|
@@ -3,13 +3,24 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
|
-
from typing import TYPE_CHECKING, Annotated, Literal
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated, Literal, Optional
|
|
7
7
|
|
|
8
8
|
from fastmcp import Context
|
|
9
|
+
from mcp.types import ToolAnnotations
|
|
9
10
|
from pydantic import Field
|
|
10
11
|
|
|
11
|
-
from pcp_mcp.context import
|
|
12
|
-
from pcp_mcp.
|
|
12
|
+
from pcp_mcp.context import get_client_for_host
|
|
13
|
+
from pcp_mcp.icons import (
|
|
14
|
+
ICON_DIAGNOSE,
|
|
15
|
+
ICON_HEALTH,
|
|
16
|
+
ICON_PROCESS,
|
|
17
|
+
ICON_SYSTEM,
|
|
18
|
+
TAGS_DIAGNOSE,
|
|
19
|
+
TAGS_HEALTH,
|
|
20
|
+
TAGS_PROCESS,
|
|
21
|
+
TAGS_SYSTEM,
|
|
22
|
+
)
|
|
23
|
+
from pcp_mcp.models import DiagnosisResult, ProcessTopResult, SystemSnapshot
|
|
13
24
|
from pcp_mcp.utils.builders import (
|
|
14
25
|
assess_processes,
|
|
15
26
|
build_cpu_metrics,
|
|
@@ -25,6 +36,8 @@ from pcp_mcp.utils.extractors import get_scalar_value
|
|
|
25
36
|
if TYPE_CHECKING:
|
|
26
37
|
from fastmcp import FastMCP
|
|
27
38
|
|
|
39
|
+
TOOL_ANNOTATIONS = ToolAnnotations(readOnlyHint=True, openWorldHint=True)
|
|
40
|
+
|
|
28
41
|
SNAPSHOT_METRICS = {
|
|
29
42
|
"cpu": [
|
|
30
43
|
"kernel.all.cpu.user",
|
|
@@ -86,17 +99,75 @@ PROCESS_METRICS = {
|
|
|
86
99
|
}
|
|
87
100
|
|
|
88
101
|
|
|
102
|
+
async def _fetch_system_snapshot(
|
|
103
|
+
ctx: Context,
|
|
104
|
+
categories: list[str],
|
|
105
|
+
sample_interval: float,
|
|
106
|
+
host: str | None,
|
|
107
|
+
) -> SystemSnapshot:
|
|
108
|
+
"""Core logic for fetching a system snapshot."""
|
|
109
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
110
|
+
|
|
111
|
+
all_metrics: list[str] = []
|
|
112
|
+
for cat in categories:
|
|
113
|
+
if cat in SNAPSHOT_METRICS:
|
|
114
|
+
all_metrics.extend(SNAPSHOT_METRICS[cat])
|
|
115
|
+
|
|
116
|
+
async def report_progress(current: float, total: float, message: str) -> None:
|
|
117
|
+
await ctx.report_progress(current, total, message)
|
|
118
|
+
|
|
119
|
+
async with get_client_for_host(ctx, host) as client:
|
|
120
|
+
try:
|
|
121
|
+
data = await client.fetch_with_rates(
|
|
122
|
+
all_metrics,
|
|
123
|
+
COUNTER_METRICS,
|
|
124
|
+
sample_interval,
|
|
125
|
+
progress_callback=report_progress,
|
|
126
|
+
)
|
|
127
|
+
except Exception as e:
|
|
128
|
+
raise handle_pcp_error(e, "fetching system snapshot") from e
|
|
129
|
+
|
|
130
|
+
await ctx.report_progress(95, 100, "Building snapshot...")
|
|
131
|
+
|
|
132
|
+
snapshot = SystemSnapshot(
|
|
133
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
134
|
+
hostname=client.target_host,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if "cpu" in categories:
|
|
138
|
+
snapshot.cpu = build_cpu_metrics(data)
|
|
139
|
+
if "memory" in categories:
|
|
140
|
+
snapshot.memory = build_memory_metrics(data)
|
|
141
|
+
if "load" in categories:
|
|
142
|
+
snapshot.load = build_load_metrics(data)
|
|
143
|
+
if "disk" in categories:
|
|
144
|
+
snapshot.disk = build_disk_metrics(data)
|
|
145
|
+
if "network" in categories:
|
|
146
|
+
snapshot.network = build_network_metrics(data)
|
|
147
|
+
|
|
148
|
+
await ctx.report_progress(100, 100, "Complete")
|
|
149
|
+
return snapshot
|
|
150
|
+
|
|
151
|
+
|
|
89
152
|
def register_system_tools(mcp: FastMCP) -> None:
|
|
90
153
|
"""Register system health tools with the MCP server."""
|
|
91
154
|
|
|
92
|
-
@mcp.tool(
|
|
155
|
+
@mcp.tool(
|
|
156
|
+
annotations=TOOL_ANNOTATIONS,
|
|
157
|
+
output_schema=SystemSnapshot.model_json_schema(),
|
|
158
|
+
icons=[ICON_SYSTEM],
|
|
159
|
+
tags=TAGS_SYSTEM,
|
|
160
|
+
)
|
|
93
161
|
async def get_system_snapshot(
|
|
94
162
|
ctx: Context,
|
|
95
163
|
categories: Annotated[
|
|
96
|
-
list[str]
|
|
164
|
+
Optional[list[str]],
|
|
97
165
|
Field(
|
|
98
166
|
default=None,
|
|
99
|
-
description=
|
|
167
|
+
description=(
|
|
168
|
+
"Categories to include: cpu, memory, disk, network, load. "
|
|
169
|
+
"Defaults to all five if not specified."
|
|
170
|
+
),
|
|
100
171
|
),
|
|
101
172
|
] = None,
|
|
102
173
|
sample_interval: Annotated[
|
|
@@ -108,53 +179,62 @@ def register_system_tools(mcp: FastMCP) -> None:
|
|
|
108
179
|
description="Seconds between samples for rate calculation",
|
|
109
180
|
),
|
|
110
181
|
] = 1.0,
|
|
182
|
+
host: Annotated[
|
|
183
|
+
Optional[str],
|
|
184
|
+
Field(description="Target pmcd host to query (default: server's configured target)"),
|
|
185
|
+
] = None,
|
|
111
186
|
) -> SystemSnapshot:
|
|
112
187
|
"""Get a point-in-time system health overview.
|
|
113
188
|
|
|
114
189
|
Returns CPU, memory, disk I/O, network I/O, and load metrics in a single
|
|
115
190
|
call. For rate metrics (CPU %, disk I/O, network throughput), takes two
|
|
116
191
|
samples to calculate per-second rates.
|
|
117
|
-
"""
|
|
118
|
-
from pcp_mcp.errors import handle_pcp_error
|
|
119
192
|
|
|
120
|
-
|
|
193
|
+
Use this tool FIRST for system troubleshooting. It automatically handles
|
|
194
|
+
counter-to-rate conversion. Do NOT use query_metrics() for CPU, disk, or
|
|
195
|
+
network counters - those return raw cumulative values since boot.
|
|
121
196
|
|
|
197
|
+
Examples:
|
|
198
|
+
get_system_snapshot() - Quick health check (all categories)
|
|
199
|
+
get_system_snapshot(categories=["cpu", "memory"]) - CPU and memory only
|
|
200
|
+
get_system_snapshot(categories=["cpu", "load"]) - CPU and load averages
|
|
201
|
+
get_system_snapshot(categories=["disk", "network"]) - I/O analysis
|
|
202
|
+
get_system_snapshot(host="web1.example.com") - Query remote host
|
|
203
|
+
"""
|
|
122
204
|
if categories is None:
|
|
123
205
|
categories = ["cpu", "memory", "disk", "network", "load"]
|
|
206
|
+
return await _fetch_system_snapshot(ctx, categories, sample_interval, host)
|
|
124
207
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
snapshot = SystemSnapshot(
|
|
140
|
-
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
141
|
-
hostname=client.target_host,
|
|
142
|
-
)
|
|
208
|
+
@mcp.tool(
|
|
209
|
+
annotations=TOOL_ANNOTATIONS,
|
|
210
|
+
output_schema=SystemSnapshot.model_json_schema(),
|
|
211
|
+
icons=[ICON_HEALTH],
|
|
212
|
+
tags=TAGS_HEALTH,
|
|
213
|
+
)
|
|
214
|
+
async def quick_health(
|
|
215
|
+
ctx: Context,
|
|
216
|
+
host: Annotated[
|
|
217
|
+
Optional[str],
|
|
218
|
+
Field(description="Target pmcd host to query (default: server's configured target)"),
|
|
219
|
+
] = None,
|
|
220
|
+
) -> SystemSnapshot:
|
|
221
|
+
"""Fast system health check returning only CPU and memory metrics.
|
|
143
222
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
if "memory" in categories:
|
|
147
|
-
snapshot.memory = build_memory_metrics(data)
|
|
148
|
-
if "load" in categories:
|
|
149
|
-
snapshot.load = build_load_metrics(data)
|
|
150
|
-
if "disk" in categories:
|
|
151
|
-
snapshot.disk = build_disk_metrics(data)
|
|
152
|
-
if "network" in categories:
|
|
153
|
-
snapshot.network = build_network_metrics(data)
|
|
223
|
+
Use this for rapid status checks when you don't need disk/network/load
|
|
224
|
+
details. Uses a shorter sample interval (0.5s) for faster results.
|
|
154
225
|
|
|
155
|
-
|
|
226
|
+
Examples:
|
|
227
|
+
quick_health() - Fast health check on default host
|
|
228
|
+
quick_health(host="web1.example.com") - Fast check on remote host
|
|
229
|
+
"""
|
|
230
|
+
return await _fetch_system_snapshot(ctx, ["cpu", "memory"], 0.5, host)
|
|
156
231
|
|
|
157
|
-
@mcp.tool(
|
|
232
|
+
@mcp.tool(
|
|
233
|
+
annotations=TOOL_ANNOTATIONS,
|
|
234
|
+
output_schema=ProcessTopResult.model_json_schema(),
|
|
235
|
+
icons=[ICON_PROCESS],
|
|
236
|
+
tags=TAGS_PROCESS,
|
|
237
|
+
)
|
|
158
238
|
async def get_process_top(
|
|
159
239
|
ctx: Context,
|
|
160
240
|
sort_by: Annotated[
|
|
@@ -174,14 +254,22 @@ def register_system_tools(mcp: FastMCP) -> None:
|
|
|
174
254
|
description="Seconds to sample for CPU/IO rates",
|
|
175
255
|
),
|
|
176
256
|
] = 1.0,
|
|
257
|
+
host: Annotated[
|
|
258
|
+
Optional[str],
|
|
259
|
+
Field(description="Target pmcd host to query (default: server's configured target)"),
|
|
260
|
+
] = None,
|
|
177
261
|
) -> ProcessTopResult:
|
|
178
262
|
"""Get top processes by resource consumption.
|
|
179
263
|
|
|
180
264
|
For CPU and I/O, takes two samples to calculate rates. Memory is instantaneous.
|
|
181
265
|
Returns the top N processes sorted by the requested resource.
|
|
182
|
-
"""
|
|
183
|
-
client = get_client(ctx)
|
|
184
266
|
|
|
267
|
+
Examples:
|
|
268
|
+
get_process_top() - Top 10 by CPU (default)
|
|
269
|
+
get_process_top(sort_by="memory", limit=20) - Top 20 memory consumers
|
|
270
|
+
get_process_top(sort_by="io", sample_interval=2.0) - Top I/O with longer sample
|
|
271
|
+
get_process_top(host="db1.example.com") - Query remote host
|
|
272
|
+
"""
|
|
185
273
|
all_metrics = (
|
|
186
274
|
PROCESS_METRICS["info"] + PROCESS_METRICS["memory"] + PROCESS_METRICS.get(sort_by, [])
|
|
187
275
|
)
|
|
@@ -202,28 +290,192 @@ def register_system_tools(mcp: FastMCP) -> None:
|
|
|
202
290
|
|
|
203
291
|
from pcp_mcp.errors import handle_pcp_error
|
|
204
292
|
|
|
293
|
+
async def report_progress(current: float, total: float, message: str) -> None:
|
|
294
|
+
await ctx.report_progress(current, total, message)
|
|
295
|
+
|
|
296
|
+
async with get_client_for_host(ctx, host) as client:
|
|
297
|
+
try:
|
|
298
|
+
proc_data = await client.fetch_with_rates(
|
|
299
|
+
all_metrics, counter_metrics, sample_interval, progress_callback=report_progress
|
|
300
|
+
)
|
|
301
|
+
sys_data = await client.fetch(system_metrics)
|
|
302
|
+
except Exception as e:
|
|
303
|
+
raise handle_pcp_error(e, "fetching process data") from e
|
|
304
|
+
|
|
305
|
+
await ctx.report_progress(92, 100, "Processing results...")
|
|
306
|
+
|
|
307
|
+
ncpu = get_scalar_value(sys_data, "hinv.ncpu", 1)
|
|
308
|
+
total_mem = get_scalar_value(sys_data, "mem.physmem", 1) * 1024
|
|
309
|
+
|
|
310
|
+
processes = build_process_list(proc_data, sort_by, total_mem, ncpu)
|
|
311
|
+
processes.sort(key=lambda p: get_sort_key(p, sort_by), reverse=True)
|
|
312
|
+
processes = processes[:limit]
|
|
313
|
+
|
|
314
|
+
assessment = assess_processes(processes, sort_by, ncpu)
|
|
315
|
+
|
|
316
|
+
await ctx.report_progress(100, 100, "Complete")
|
|
317
|
+
return ProcessTopResult(
|
|
318
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
319
|
+
hostname=client.target_host,
|
|
320
|
+
sort_by=sort_by,
|
|
321
|
+
sample_interval=sample_interval,
|
|
322
|
+
processes=processes,
|
|
323
|
+
total_memory_bytes=int(total_mem),
|
|
324
|
+
ncpu=ncpu,
|
|
325
|
+
assessment=assessment,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
@mcp.tool(
|
|
329
|
+
annotations=TOOL_ANNOTATIONS,
|
|
330
|
+
output_schema=DiagnosisResult.model_json_schema(),
|
|
331
|
+
icons=[ICON_DIAGNOSE],
|
|
332
|
+
tags=TAGS_DIAGNOSE,
|
|
333
|
+
)
|
|
334
|
+
async def smart_diagnose(
|
|
335
|
+
ctx: Context,
|
|
336
|
+
host: Annotated[
|
|
337
|
+
Optional[str],
|
|
338
|
+
Field(description="Target pmcd host to query (default: server's configured target)"),
|
|
339
|
+
] = None,
|
|
340
|
+
) -> DiagnosisResult:
|
|
341
|
+
"""Use LLM to analyze system metrics and provide diagnosis.
|
|
342
|
+
|
|
343
|
+
Collects a quick system snapshot (CPU, memory, load) and asks the
|
|
344
|
+
connected LLM to analyze the metrics and provide actionable insights.
|
|
345
|
+
|
|
346
|
+
This tool demonstrates FastMCP's LLM sampling capability, where the
|
|
347
|
+
MCP server can request LLM assistance for complex analysis tasks.
|
|
348
|
+
|
|
349
|
+
Examples:
|
|
350
|
+
smart_diagnose() - Analyze default host
|
|
351
|
+
smart_diagnose(host="db1.example.com") - Analyze remote host
|
|
352
|
+
"""
|
|
353
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
354
|
+
|
|
205
355
|
try:
|
|
206
|
-
|
|
207
|
-
sys_data = await client.fetch(system_metrics)
|
|
356
|
+
snapshot = await _fetch_system_snapshot(ctx, ["cpu", "memory", "load"], 0.5, host)
|
|
208
357
|
except Exception as e:
|
|
209
|
-
raise handle_pcp_error(e, "fetching
|
|
358
|
+
raise handle_pcp_error(e, "fetching metrics for diagnosis") from e
|
|
210
359
|
|
|
211
|
-
|
|
212
|
-
total_mem = get_scalar_value(sys_data, "mem.physmem", 1) * 1024
|
|
360
|
+
metrics_summary = _format_snapshot_for_llm(snapshot)
|
|
213
361
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
362
|
+
system_prompt = (
|
|
363
|
+
"You are a system performance analyst. Analyze the metrics and provide:\n"
|
|
364
|
+
"1. A brief diagnosis (2-3 sentences)\n"
|
|
365
|
+
"2. A severity level: 'healthy', 'warning', or 'critical'\n"
|
|
366
|
+
"3. Up to 3 actionable recommendations\n\n"
|
|
367
|
+
"Be concise and focus on actionable insights."
|
|
368
|
+
)
|
|
217
369
|
|
|
218
|
-
|
|
370
|
+
try:
|
|
371
|
+
sampling_result = await ctx.sample(
|
|
372
|
+
messages=f"Analyze these system metrics:\n\n{metrics_summary}",
|
|
373
|
+
system_prompt=system_prompt,
|
|
374
|
+
max_tokens=500,
|
|
375
|
+
result_type=DiagnosisResult,
|
|
376
|
+
)
|
|
377
|
+
result = sampling_result.result
|
|
378
|
+
result.timestamp = snapshot.timestamp
|
|
379
|
+
result.hostname = snapshot.hostname
|
|
380
|
+
return result
|
|
381
|
+
except Exception:
|
|
382
|
+
return _build_fallback_diagnosis(snapshot)
|
|
219
383
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
384
|
+
|
|
385
|
+
def _format_snapshot_for_llm(snapshot: SystemSnapshot) -> str:
|
|
386
|
+
"""Format a system snapshot as text for LLM analysis."""
|
|
387
|
+
lines = [f"Host: {snapshot.hostname}", f"Time: {snapshot.timestamp}", ""]
|
|
388
|
+
|
|
389
|
+
if snapshot.cpu:
|
|
390
|
+
lines.extend(
|
|
391
|
+
[
|
|
392
|
+
"CPU:",
|
|
393
|
+
f" User: {snapshot.cpu.user_percent:.1f}%",
|
|
394
|
+
f" System: {snapshot.cpu.system_percent:.1f}%",
|
|
395
|
+
f" Idle: {snapshot.cpu.idle_percent:.1f}%",
|
|
396
|
+
f" I/O Wait: {snapshot.cpu.iowait_percent:.1f}%",
|
|
397
|
+
f" CPUs: {snapshot.cpu.ncpu}",
|
|
398
|
+
"",
|
|
399
|
+
]
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
if snapshot.memory:
|
|
403
|
+
total_gb = snapshot.memory.total_bytes / (1024**3)
|
|
404
|
+
avail_gb = snapshot.memory.available_bytes / (1024**3)
|
|
405
|
+
lines.extend(
|
|
406
|
+
[
|
|
407
|
+
"Memory:",
|
|
408
|
+
f" Total: {total_gb:.1f} GB",
|
|
409
|
+
f" Available: {avail_gb:.1f} GB",
|
|
410
|
+
f" Used: {snapshot.memory.used_percent:.1f}%",
|
|
411
|
+
f" Swap Used: {snapshot.memory.swap_used_bytes / (1024**3):.1f} GB",
|
|
412
|
+
"",
|
|
413
|
+
]
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if snapshot.load:
|
|
417
|
+
lines.extend(
|
|
418
|
+
[
|
|
419
|
+
"Load:",
|
|
420
|
+
f" 1m/5m/15m: {snapshot.load.load_1m:.2f} / "
|
|
421
|
+
f"{snapshot.load.load_5m:.2f} / {snapshot.load.load_15m:.2f}",
|
|
422
|
+
f" Runnable: {snapshot.load.runnable}",
|
|
423
|
+
f" Total procs: {snapshot.load.nprocs}",
|
|
424
|
+
]
|
|
229
425
|
)
|
|
426
|
+
|
|
427
|
+
return "\n".join(lines)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _build_fallback_diagnosis(snapshot: SystemSnapshot) -> DiagnosisResult:
|
|
431
|
+
"""Build a basic diagnosis when LLM sampling isn't available."""
|
|
432
|
+
issues: list[str] = []
|
|
433
|
+
recommendations: list[str] = []
|
|
434
|
+
severity = "healthy"
|
|
435
|
+
|
|
436
|
+
if snapshot.cpu:
|
|
437
|
+
cpu_busy = 100 - snapshot.cpu.idle_percent
|
|
438
|
+
if cpu_busy > 90:
|
|
439
|
+
issues.append(f"CPU is heavily loaded ({cpu_busy:.0f}% busy)")
|
|
440
|
+
recommendations.append("Identify high-CPU processes with get_process_top")
|
|
441
|
+
severity = "critical"
|
|
442
|
+
elif cpu_busy > 70:
|
|
443
|
+
issues.append(f"CPU moderately busy ({cpu_busy:.0f}%)")
|
|
444
|
+
severity = "warning" if severity == "healthy" else severity
|
|
445
|
+
|
|
446
|
+
if snapshot.memory:
|
|
447
|
+
if snapshot.memory.used_percent > 90:
|
|
448
|
+
issues.append(f"Memory pressure high ({snapshot.memory.used_percent:.0f}% used)")
|
|
449
|
+
recommendations.append("Check for memory leaks or increase RAM")
|
|
450
|
+
severity = "critical"
|
|
451
|
+
elif snapshot.memory.used_percent > 75:
|
|
452
|
+
issues.append(f"Memory usage elevated ({snapshot.memory.used_percent:.0f}%)")
|
|
453
|
+
severity = "warning" if severity == "healthy" else severity
|
|
454
|
+
|
|
455
|
+
if snapshot.load and snapshot.cpu:
|
|
456
|
+
load_per_cpu = snapshot.load.load_1m / snapshot.cpu.ncpu
|
|
457
|
+
if load_per_cpu > 2.0:
|
|
458
|
+
issues.append(
|
|
459
|
+
f"Load very high ({snapshot.load.load_1m:.1f} for {snapshot.cpu.ncpu} CPUs)"
|
|
460
|
+
)
|
|
461
|
+
recommendations.append("Reduce concurrent workload or add capacity")
|
|
462
|
+
severity = "critical"
|
|
463
|
+
elif load_per_cpu > 1.0:
|
|
464
|
+
issues.append(f"Load elevated ({snapshot.load.load_1m:.1f})")
|
|
465
|
+
severity = "warning" if severity == "healthy" else severity
|
|
466
|
+
|
|
467
|
+
if not issues:
|
|
468
|
+
diagnosis = "System is operating normally. No issues detected."
|
|
469
|
+
else:
|
|
470
|
+
diagnosis = " ".join(issues)
|
|
471
|
+
|
|
472
|
+
if not recommendations:
|
|
473
|
+
recommendations = ["Continue monitoring"]
|
|
474
|
+
|
|
475
|
+
return DiagnosisResult(
|
|
476
|
+
timestamp=snapshot.timestamp,
|
|
477
|
+
hostname=snapshot.hostname,
|
|
478
|
+
diagnosis=diagnosis,
|
|
479
|
+
severity=severity,
|
|
480
|
+
recommendations=recommendations,
|
|
481
|
+
)
|
pcp_mcp/utils/__init__.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
This package contains reusable utilities extracted to follow DRY principles:
|
|
4
4
|
- extractors: Metric data extraction helpers
|
|
5
|
-
- decorators: Error handling and other decorators
|
|
6
5
|
- builders: Metric transformation and builder functions
|
|
7
6
|
"""
|
|
8
7
|
|
|
@@ -14,7 +13,6 @@ from pcp_mcp.utils.builders import (
|
|
|
14
13
|
build_network_metrics,
|
|
15
14
|
build_process_list,
|
|
16
15
|
)
|
|
17
|
-
from pcp_mcp.utils.decorators import handle_pcp_errors
|
|
18
16
|
from pcp_mcp.utils.extractors import (
|
|
19
17
|
extract_help_text,
|
|
20
18
|
extract_timestamp,
|
|
@@ -30,8 +28,6 @@ __all__ = [
|
|
|
30
28
|
"sum_instances",
|
|
31
29
|
"extract_help_text",
|
|
32
30
|
"extract_timestamp",
|
|
33
|
-
# Decorators
|
|
34
|
-
"handle_pcp_errors",
|
|
35
31
|
# Builders
|
|
36
32
|
"build_cpu_metrics",
|
|
37
33
|
"build_memory_metrics",
|
pcp_mcp/utils/extractors.py
CHANGED
|
@@ -51,10 +51,28 @@ def extract_timestamp(response: dict) -> float:
|
|
|
51
51
|
return float(ts)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
def format_units(info: dict) -> str:
|
|
55
|
+
"""Format PCP units into a human-readable string."""
|
|
56
|
+
units = info.get("units", "")
|
|
57
|
+
if units:
|
|
58
|
+
return units
|
|
59
|
+
|
|
60
|
+
parts = []
|
|
61
|
+
if info.get("units-space"):
|
|
62
|
+
parts.append(info["units-space"])
|
|
63
|
+
if info.get("units-time"):
|
|
64
|
+
parts.append(info["units-time"])
|
|
65
|
+
if info.get("units-count"):
|
|
66
|
+
parts.append(info["units-count"])
|
|
67
|
+
|
|
68
|
+
return " / ".join(parts) if parts else "none"
|
|
69
|
+
|
|
70
|
+
|
|
54
71
|
__all__ = [
|
|
55
72
|
"get_first_value",
|
|
56
73
|
"get_scalar_value",
|
|
57
74
|
"sum_instances",
|
|
58
75
|
"extract_help_text",
|
|
59
76
|
"extract_timestamp",
|
|
77
|
+
"format_units",
|
|
60
78
|
]
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pcp-mcp
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: MCP server for Performance Co-Pilot
|
|
5
5
|
Keywords: mcp,pcp,performance-co-pilot,monitoring,model-context-protocol
|
|
6
6
|
Author: Major Hayden
|
|
7
7
|
Author-email: Major Hayden <major@mhtx.net>
|
|
8
8
|
License-Expression: MIT
|
|
9
|
-
Classifier: Development Status ::
|
|
9
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: Intended Audience :: System Administrators
|
|
12
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.14
|
|
18
18
|
Classifier: Topic :: System :: Monitoring
|
|
19
19
|
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Dist: cachetools>=5.0
|
|
20
21
|
Requires-Dist: fastmcp>=2.0.0
|
|
21
22
|
Requires-Dist: httpx>=0.27
|
|
22
23
|
Requires-Dist: pydantic-settings>=2.0.0
|
|
@@ -32,6 +33,14 @@ MCP server for [Performance Co-Pilot (PCP)](https://pcp.io/) metrics.
|
|
|
32
33
|
|
|
33
34
|
Query system performance metrics via the Model Context Protocol - CPU, memory, disk I/O, network, processes, and more.
|
|
34
35
|
|
|
36
|
+
📖 **[Full Documentation](https://major.github.io/pcp-mcp)** | 🚀 **[Getting Started](https://major.github.io/pcp-mcp/getting-started/)**
|
|
37
|
+
|
|
38
|
+
[](https://github.com/major/pcp-mcp/actions/workflows/ci.yml)
|
|
39
|
+
[](https://codecov.io/gh/major/pcp-mcp)
|
|
40
|
+
[](https://pypi.org/project/pcp-mcp/)
|
|
41
|
+
[](https://www.python.org/downloads/)
|
|
42
|
+
[](https://opensource.org/licenses/MIT)
|
|
43
|
+
|
|
35
44
|
## 🚀 Installation
|
|
36
45
|
|
|
37
46
|
```bash
|
|
@@ -47,15 +56,15 @@ uv add pcp-mcp
|
|
|
47
56
|
## 📋 Requirements
|
|
48
57
|
|
|
49
58
|
- **Python**: 3.10+
|
|
50
|
-
- **PCP**: Performance Co-Pilot with `pmproxy` running
|
|
59
|
+
- **PCP**: Performance Co-Pilot with `pmcd` and `pmproxy` running
|
|
51
60
|
```bash
|
|
52
61
|
# Fedora/RHEL/CentOS
|
|
53
62
|
sudo dnf install pcp
|
|
54
|
-
sudo systemctl enable --now pmproxy
|
|
63
|
+
sudo systemctl enable --now pmcd pmproxy
|
|
55
64
|
|
|
56
65
|
# Ubuntu/Debian
|
|
57
66
|
sudo apt install pcp
|
|
58
|
-
sudo systemctl enable --now pmproxy
|
|
67
|
+
sudo systemctl enable --now pmcd pmproxy
|
|
59
68
|
```
|
|
60
69
|
|
|
61
70
|
## ⚙️ Configuration
|
|
@@ -68,9 +77,12 @@ Configure via environment variables:
|
|
|
68
77
|
| `PCP_PORT` | pmproxy port | `44322` |
|
|
69
78
|
| `PCP_TARGET_HOST` | Target pmcd host to monitor | `localhost` |
|
|
70
79
|
| `PCP_USE_TLS` | Use HTTPS for pmproxy | `false` |
|
|
80
|
+
| `PCP_TLS_VERIFY` | Verify TLS certificates | `true` |
|
|
81
|
+
| `PCP_TLS_CA_BUNDLE` | Path to custom CA bundle | (optional) |
|
|
71
82
|
| `PCP_TIMEOUT` | Request timeout (seconds) | `30` |
|
|
72
83
|
| `PCP_USERNAME` | HTTP basic auth user | (optional) |
|
|
73
84
|
| `PCP_PASSWORD` | HTTP basic auth password | (optional) |
|
|
85
|
+
| `PCP_ALLOWED_HOSTS` | Hostspecs allowed via host param | (optional) |
|
|
74
86
|
|
|
75
87
|
## 🎯 Usage
|
|
76
88
|
|
|
@@ -163,9 +175,9 @@ For remote monitoring:
|
|
|
163
175
|
|
|
164
176
|
Browse metrics via MCP resources:
|
|
165
177
|
|
|
166
|
-
- `pcp://
|
|
167
|
-
- `pcp://
|
|
168
|
-
- `pcp://
|
|
178
|
+
- `pcp://health` - Quick system health summary
|
|
179
|
+
- `pcp://metrics/common` - Catalog of commonly used metrics
|
|
180
|
+
- `pcp://namespaces` - Live-discovered metric namespaces
|
|
169
181
|
|
|
170
182
|
## 💡 Use Cases
|
|
171
183
|
|
|
@@ -191,9 +203,9 @@ Ask Claude to:
|
|
|
191
203
|
## 🏗️ Architecture
|
|
192
204
|
|
|
193
205
|
```
|
|
194
|
-
┌─────────┐ ┌─────────┐
|
|
206
|
+
┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐
|
|
195
207
|
│ LLM │ ◄─MCP─► │ pcp-mcp │ ◄─HTTP─► │ pmproxy │ ◄─────► │ pmcd │
|
|
196
|
-
└─────────┘ └─────────┘
|
|
208
|
+
└─────────┘ └─────────┘ └─────────┘ └─────────┘
|
|
197
209
|
(REST API) (metrics)
|
|
198
210
|
```
|
|
199
211
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
pcp_mcp/AGENTS.md,sha256=1jIoPBgrOKEX4kQiUN7SR0rD6Gsu4ZdNKOxJq9Su7FA,2473
|
|
2
|
+
pcp_mcp/__init__.py,sha256=5SKlrOQTqKxcWRvmBwmeXJapTqKggal8L89UxxwHTaQ,1949
|
|
3
|
+
pcp_mcp/client.py,sha256=ZGWGXYn77_hbZ81O0vxjXubY9eRpOZWs2cLxqLO3pf8,9188
|
|
4
|
+
pcp_mcp/config.py,sha256=gm-Sp1y-f3ZGZQk_ercMuKCojG145Fu6UjrvjRQUnpg,3526
|
|
5
|
+
pcp_mcp/context.py,sha256=5M6l72phsijPr96PXpZDm1rd_Uvo7mY_LuiLdeiW2SE,2801
|
|
6
|
+
pcp_mcp/errors.py,sha256=sIes9OSNdYQeOmwjFknhfXXjBjOOzXmc94bbB-4b_tg,1598
|
|
7
|
+
pcp_mcp/icons.py,sha256=ZTMU4iDR0369NPOsaAyDhu-MngtdkbDY_QZbA6BjwN0,1497
|
|
8
|
+
pcp_mcp/middleware.py,sha256=oUSdaCHSy1gVkKyeC2J8ASfhJep-3KvY8GFYRFWUvJ0,2387
|
|
9
|
+
pcp_mcp/models.py,sha256=ugU3a-4PY5cVstxt6XAHGqBqXUt95HVYLq7z7sQDkTA,6835
|
|
10
|
+
pcp_mcp/prompts/__init__.py,sha256=x3QDidJFt2CeLmFZWGLs673m9L9NUi2IC4Me5A9nxw4,12586
|
|
11
|
+
pcp_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
pcp_mcp/resources/__init__.py,sha256=55Mm2ZCuyyQWWql9N8cLk6ld1pNj3Ezc416jgFX35dU,541
|
|
13
|
+
pcp_mcp/resources/catalog.py,sha256=si1frqawgrxl2ENhFE9cMEzYmTk7VCzShbO5foUU5Ug,12560
|
|
14
|
+
pcp_mcp/resources/health.py,sha256=cHnH2aRF4gdeo0AVUDi_qWqHocDSULhlsGpnkS-qobE,3925
|
|
15
|
+
pcp_mcp/server.py,sha256=N_XknbUCoURd7JQsNO8Tl7i9sdQZetTTE5LFsRjZozs,5030
|
|
16
|
+
pcp_mcp/tools/AGENTS.md,sha256=1yt_W-TYlGA0aWJYCM7D0i2D4899E6_kIhyoqP1np-g,1963
|
|
17
|
+
pcp_mcp/tools/__init__.py,sha256=sXhOqqnUwzSf16QU6eS79LMvXJcv7jqSXQlrpQG4UV0,505
|
|
18
|
+
pcp_mcp/tools/metrics.py,sha256=r-kiAAFaIZue1FEqQuSHdvxHrXnSYhY73oru2HlSOGI,6834
|
|
19
|
+
pcp_mcp/tools/system.py,sha256=g1f5j1qztUNW_4SL1pwFufZ5ZxphAn2sC6Zt98MApLk,16845
|
|
20
|
+
pcp_mcp/utils/__init__.py,sha256=tTbcqrCV9pBBm7N3MwEI37Lc0JM1CVbw_etw36ejRWc,884
|
|
21
|
+
pcp_mcp/utils/builders.py,sha256=n13Ou6cb1-YToG-M31J8_jWajq8ioJx6tJTKnqaQiio,10293
|
|
22
|
+
pcp_mcp/utils/extractors.py,sha256=fy6aCI23JuGt73oIDxwPW_K4B0fJkFCF1VxYkBst0Y4,2279
|
|
23
|
+
pcp_mcp-1.0.1.dist-info/WHEEL,sha256=XV0cjMrO7zXhVAIyyc8aFf1VjZ33Fen4IiJk5zFlC3g,80
|
|
24
|
+
pcp_mcp-1.0.1.dist-info/entry_points.txt,sha256=PhVo92EGoS05yEpHVRyKEsxKya_bWlPLodp-g4tr2Rg,42
|
|
25
|
+
pcp_mcp-1.0.1.dist-info/METADATA,sha256=1b5nl2lT2GjZJRBJH8bkAAV6H8RPo26-F_uSdF8iyJo,6671
|
|
26
|
+
pcp_mcp-1.0.1.dist-info/RECORD,,
|
pcp_mcp/utils/decorators.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"""Error handling decorators for MCP tools."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from functools import wraps
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
7
|
-
|
|
8
|
-
if TYPE_CHECKING:
|
|
9
|
-
from collections.abc import Callable
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def handle_pcp_errors(operation: str) -> Callable:
|
|
14
|
-
"""Decorator to convert PCP exceptions to ToolError.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
operation: Description of the operation (e.g., "fetching metrics").
|
|
18
|
-
|
|
19
|
-
Returns:
|
|
20
|
-
Decorated async function that handles PCP errors.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def decorator(func: Callable) -> Callable:
|
|
24
|
-
@wraps(func)
|
|
25
|
-
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
26
|
-
from pcp_mcp.errors import handle_pcp_error
|
|
27
|
-
|
|
28
|
-
try:
|
|
29
|
-
return await func(*args, **kwargs)
|
|
30
|
-
except Exception as e:
|
|
31
|
-
raise handle_pcp_error(e, operation) from e
|
|
32
|
-
|
|
33
|
-
return wrapper
|
|
34
|
-
|
|
35
|
-
return decorator
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
__all__ = ["handle_pcp_errors"]
|