pcp-mcp 1.3.0__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcp_mcp/AGENTS.md +0 -1
- pcp_mcp/server.py +0 -9
- {pcp_mcp-1.3.0.dist-info → pcp_mcp-1.3.2.dist-info}/METADATA +1 -9
- {pcp_mcp-1.3.0.dist-info → pcp_mcp-1.3.2.dist-info}/RECORD +6 -9
- {pcp_mcp-1.3.0.dist-info → pcp_mcp-1.3.2.dist-info}/WHEEL +1 -1
- pcp_mcp/resources/__init__.py +0 -21
- pcp_mcp/resources/catalog.py +0 -307
- pcp_mcp/resources/health.py +0 -117
- {pcp_mcp-1.3.0.dist-info → pcp_mcp-1.3.2.dist-info}/entry_points.txt +0 -0
pcp_mcp/AGENTS.md
CHANGED
|
@@ -18,7 +18,6 @@ pcp_mcp/
|
|
|
18
18
|
├── middleware.py # Request caching middleware
|
|
19
19
|
├── icons.py # System assessment icons (emoji mappings)
|
|
20
20
|
├── tools/ # MCP tools (see tools/AGENTS.md)
|
|
21
|
-
├── resources/ # MCP resources (health.py, catalog.py)
|
|
22
21
|
├── utils/ # Extractors, builders
|
|
23
22
|
└── prompts/ # LLM system prompts
|
|
24
23
|
```
|
pcp_mcp/server.py
CHANGED
|
@@ -108,13 +108,6 @@ Tools:
|
|
|
108
108
|
- get_system_snapshot: System overview (CPU, memory, disk, network) - USE THIS FIRST
|
|
109
109
|
- get_process_top: Top processes by resource consumption
|
|
110
110
|
|
|
111
|
-
Resources:
|
|
112
|
-
- pcp://health - Quick system health summary
|
|
113
|
-
- pcp://host/{{hostname}}/health - Per-host health summary (template)
|
|
114
|
-
- pcp://metric/{{name}}/info - Detailed metric metadata (template)
|
|
115
|
-
- pcp://metrics/common - Catalog of commonly used metrics
|
|
116
|
-
- pcp://namespaces - Dynamically discovered metric namespaces
|
|
117
|
-
|
|
118
111
|
Prompts (invoke for guided troubleshooting workflows):
|
|
119
112
|
- diagnose_slow_system: Complete slowness investigation
|
|
120
113
|
- investigate_memory_usage: Memory pressure analysis
|
|
@@ -134,11 +127,9 @@ Prompts (invoke for guided troubleshooting workflows):
|
|
|
134
127
|
mcp.add_middleware(MetricCacheMiddleware())
|
|
135
128
|
|
|
136
129
|
from pcp_mcp.prompts import register_prompts
|
|
137
|
-
from pcp_mcp.resources import register_resources
|
|
138
130
|
from pcp_mcp.tools import register_tools
|
|
139
131
|
|
|
140
132
|
register_tools(mcp)
|
|
141
|
-
register_resources(mcp)
|
|
142
133
|
register_prompts(mcp)
|
|
143
134
|
|
|
144
135
|
return mcp
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pcp-mcp
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.2
|
|
4
4
|
Summary: MCP server for Performance Co-Pilot
|
|
5
5
|
Keywords: mcp,pcp,performance-co-pilot,monitoring,model-context-protocol
|
|
6
6
|
Author: Major Hayden
|
|
@@ -189,14 +189,6 @@ For remote monitoring:
|
|
|
189
189
|
→ Uses describe_metric(name="kernel.all.load")
|
|
190
190
|
```
|
|
191
191
|
|
|
192
|
-
## 📚 Resources
|
|
193
|
-
|
|
194
|
-
Browse metrics via MCP resources:
|
|
195
|
-
|
|
196
|
-
- `pcp://health` - Quick system health summary
|
|
197
|
-
- `pcp://metrics/common` - Catalog of commonly used metrics
|
|
198
|
-
- `pcp://namespaces` - Live-discovered metric namespaces
|
|
199
|
-
|
|
200
192
|
## 💡 Use Cases
|
|
201
193
|
|
|
202
194
|
### Performance Troubleshooting
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
pcp_mcp/AGENTS.md,sha256=
|
|
1
|
+
pcp_mcp/AGENTS.md,sha256=kfitTd6NuPieWLTAl9-m-i93URL1DD7yu-AnK8kA8Yw,2407
|
|
2
2
|
pcp_mcp/__init__.py,sha256=5SKlrOQTqKxcWRvmBwmeXJapTqKggal8L89UxxwHTaQ,1949
|
|
3
3
|
pcp_mcp/client.py,sha256=ZGWGXYn77_hbZ81O0vxjXubY9eRpOZWs2cLxqLO3pf8,9188
|
|
4
4
|
pcp_mcp/config.py,sha256=gm-Sp1y-f3ZGZQk_ercMuKCojG145Fu6UjrvjRQUnpg,3526
|
|
@@ -9,10 +9,7 @@ pcp_mcp/middleware.py,sha256=oUSdaCHSy1gVkKyeC2J8ASfhJep-3KvY8GFYRFWUvJ0,2387
|
|
|
9
9
|
pcp_mcp/models.py,sha256=EPm1R7_qRLDFoqAzwegiv15KpHrmTJTjPD1LpxQdgoc,8202
|
|
10
10
|
pcp_mcp/prompts/__init__.py,sha256=x3QDidJFt2CeLmFZWGLs673m9L9NUi2IC4Me5A9nxw4,12586
|
|
11
11
|
pcp_mcp/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
pcp_mcp/
|
|
13
|
-
pcp_mcp/resources/catalog.py,sha256=si1frqawgrxl2ENhFE9cMEzYmTk7VCzShbO5foUU5Ug,12560
|
|
14
|
-
pcp_mcp/resources/health.py,sha256=cHnH2aRF4gdeo0AVUDi_qWqHocDSULhlsGpnkS-qobE,3925
|
|
15
|
-
pcp_mcp/server.py,sha256=N_XknbUCoURd7JQsNO8Tl7i9sdQZetTTE5LFsRjZozs,5030
|
|
12
|
+
pcp_mcp/server.py,sha256=pyEZBJPQZyEFH9DJDiC6o8J6vQz1L9CKo0Xn5f-ghwI,4635
|
|
16
13
|
pcp_mcp/tools/AGENTS.md,sha256=1yt_W-TYlGA0aWJYCM7D0i2D4899E6_kIhyoqP1np-g,1963
|
|
17
14
|
pcp_mcp/tools/__init__.py,sha256=sXhOqqnUwzSf16QU6eS79LMvXJcv7jqSXQlrpQG4UV0,505
|
|
18
15
|
pcp_mcp/tools/metrics.py,sha256=x0HrO_EQMPVM_IE-nC2aIYFqUys9BX15gYyLDMQkEnA,6721
|
|
@@ -20,7 +17,7 @@ pcp_mcp/tools/system.py,sha256=m07X7lobooZ1v6oBmBz5ZQiIl8Fb5OefRmcFZBTLU6A,20832
|
|
|
20
17
|
pcp_mcp/utils/__init__.py,sha256=tTbcqrCV9pBBm7N3MwEI37Lc0JM1CVbw_etw36ejRWc,884
|
|
21
18
|
pcp_mcp/utils/builders.py,sha256=n13Ou6cb1-YToG-M31J8_jWajq8ioJx6tJTKnqaQiio,10293
|
|
22
19
|
pcp_mcp/utils/extractors.py,sha256=fy6aCI23JuGt73oIDxwPW_K4B0fJkFCF1VxYkBst0Y4,2279
|
|
23
|
-
pcp_mcp-1.3.
|
|
24
|
-
pcp_mcp-1.3.
|
|
25
|
-
pcp_mcp-1.3.
|
|
26
|
-
pcp_mcp-1.3.
|
|
20
|
+
pcp_mcp-1.3.2.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
|
|
21
|
+
pcp_mcp-1.3.2.dist-info/entry_points.txt,sha256=PhVo92EGoS05yEpHVRyKEsxKya_bWlPLodp-g4tr2Rg,42
|
|
22
|
+
pcp_mcp-1.3.2.dist-info/METADATA,sha256=FNHsWq-3UJT0L6WUiAC2AYQPw5LMaFy4O7vFHQJA9sQ,6845
|
|
23
|
+
pcp_mcp-1.3.2.dist-info/RECORD,,
|
pcp_mcp/resources/__init__.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
"""Resource registration for the PCP MCP server."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
|
|
7
|
-
if TYPE_CHECKING:
|
|
8
|
-
from fastmcp import FastMCP
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def register_resources(mcp: FastMCP) -> None:
|
|
12
|
-
"""Register all resources with the MCP server.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
mcp: The FastMCP server instance.
|
|
16
|
-
"""
|
|
17
|
-
from pcp_mcp.resources.catalog import register_catalog_resources
|
|
18
|
-
from pcp_mcp.resources.health import register_health_resources
|
|
19
|
-
|
|
20
|
-
register_health_resources(mcp)
|
|
21
|
-
register_catalog_resources(mcp)
|
pcp_mcp/resources/catalog.py
DELETED
|
@@ -1,307 +0,0 @@
|
|
|
1
|
-
"""Catalog resources for common metrics and namespaces."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
|
|
7
|
-
from fastmcp import Context
|
|
8
|
-
|
|
9
|
-
from pcp_mcp.icons import (
|
|
10
|
-
ICON_CATALOG,
|
|
11
|
-
ICON_INFO,
|
|
12
|
-
ICON_NAMESPACE,
|
|
13
|
-
TAGS_CATALOG,
|
|
14
|
-
TAGS_DISCOVERY,
|
|
15
|
-
TAGS_METRICS,
|
|
16
|
-
)
|
|
17
|
-
from pcp_mcp.utils.extractors import extract_help_text, format_units
|
|
18
|
-
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from fastmcp import FastMCP
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def register_catalog_resources(mcp: FastMCP) -> None:
|
|
24
|
-
"""Register catalog resources with the MCP server.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
mcp: The FastMCP server instance.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
@mcp.resource(
|
|
31
|
-
"pcp://metric/{metric_name}/info",
|
|
32
|
-
icons=[ICON_INFO],
|
|
33
|
-
tags=TAGS_METRICS | TAGS_DISCOVERY,
|
|
34
|
-
)
|
|
35
|
-
async def metric_info(ctx: Context, metric_name: str) -> str:
|
|
36
|
-
"""Detailed metadata for a specific PCP metric.
|
|
37
|
-
|
|
38
|
-
Returns type, semantics, units, and help text. Use to understand
|
|
39
|
-
what a metric measures and how to interpret its values.
|
|
40
|
-
"""
|
|
41
|
-
from pcp_mcp.context import get_client
|
|
42
|
-
from pcp_mcp.errors import handle_pcp_error
|
|
43
|
-
|
|
44
|
-
client = get_client(ctx)
|
|
45
|
-
|
|
46
|
-
try:
|
|
47
|
-
info = await client.describe(metric_name)
|
|
48
|
-
except Exception as e:
|
|
49
|
-
raise handle_pcp_error(e, "describing metric") from e
|
|
50
|
-
|
|
51
|
-
if not info:
|
|
52
|
-
return f"# Metric Not Found\n\nNo metric named `{metric_name}` was found."
|
|
53
|
-
|
|
54
|
-
semantics = info.get("sem", "unknown")
|
|
55
|
-
metric_type = info.get("type", "unknown")
|
|
56
|
-
units = format_units(info)
|
|
57
|
-
help_text = extract_help_text(info) or "No description available."
|
|
58
|
-
indom = info.get("indom")
|
|
59
|
-
|
|
60
|
-
is_counter = semantics == "counter"
|
|
61
|
-
counter_warning = (
|
|
62
|
-
"\n\n> **Warning**: This is a counter metric (cumulative since boot). "
|
|
63
|
-
"Use `get_system_snapshot()` or `get_process_top()` for rate calculation."
|
|
64
|
-
if is_counter
|
|
65
|
-
else ""
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
instances_info = (
|
|
69
|
-
f"\n- **Instance Domain**: {indom} (has per-instance values)"
|
|
70
|
-
if indom and indom != "PM_INDOM_NULL"
|
|
71
|
-
else ""
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
return f"""# Metric: {metric_name}
|
|
75
|
-
|
|
76
|
-
{help_text}{counter_warning}
|
|
77
|
-
|
|
78
|
-
## Properties
|
|
79
|
-
- **Type**: {metric_type}
|
|
80
|
-
- **Semantics**: {semantics}
|
|
81
|
-
- **Units**: {units}{instances_info}
|
|
82
|
-
|
|
83
|
-
## Usage
|
|
84
|
-
|
|
85
|
-
```python
|
|
86
|
-
# Query current value
|
|
87
|
-
query_metrics(["{metric_name}"])
|
|
88
|
-
|
|
89
|
-
# Search related metrics
|
|
90
|
-
search_metrics("{".".join(metric_name.split(".")[:2])}")
|
|
91
|
-
```
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
@mcp.resource("pcp://metrics/common", icons=[ICON_CATALOG], tags=TAGS_CATALOG)
|
|
95
|
-
def common_metrics_catalog() -> str:
|
|
96
|
-
"""Catalog of commonly used metric groups.
|
|
97
|
-
|
|
98
|
-
Returns a structured guide to the most useful PCP metrics organized
|
|
99
|
-
by troubleshooting domain.
|
|
100
|
-
"""
|
|
101
|
-
return """# Common PCP Metric Groups
|
|
102
|
-
|
|
103
|
-
## CPU Performance
|
|
104
|
-
- kernel.all.cpu.user → User-space CPU time (counter) ⚠️
|
|
105
|
-
- kernel.all.cpu.sys → Kernel CPU time (counter) ⚠️
|
|
106
|
-
- kernel.all.cpu.idle → Idle CPU time (counter) ⚠️
|
|
107
|
-
- kernel.all.cpu.wait.total → I/O wait time (counter) ⚠️ High = disk bottleneck
|
|
108
|
-
- kernel.all.load → Load average (1, 5, 15 min) [instances: 1, 5, 15]
|
|
109
|
-
- kernel.all.runnable → Runnable processes (instant)
|
|
110
|
-
- kernel.all.nprocs → Total processes (instant)
|
|
111
|
-
- hinv.ncpu → Number of CPUs (instant)
|
|
112
|
-
|
|
113
|
-
## Memory
|
|
114
|
-
- mem.physmem → Total physical memory in KB (instant)
|
|
115
|
-
- mem.util.used → Used memory in KB (instant)
|
|
116
|
-
- mem.util.free → Free memory in KB (instant)
|
|
117
|
-
- mem.util.available → Available for apps in KB (instant) ⭐ Use this, not "free"
|
|
118
|
-
- mem.util.cached → Cached data in KB (instant)
|
|
119
|
-
- mem.util.bufmem → Buffer memory in KB (instant)
|
|
120
|
-
- mem.util.swapTotal → Total swap in KB (instant)
|
|
121
|
-
- mem.util.swapFree → Free swap in KB (instant)
|
|
122
|
-
- mem.util.slab → Kernel slab allocator in KB (instant)
|
|
123
|
-
|
|
124
|
-
## Disk I/O
|
|
125
|
-
- disk.all.read_bytes → Total bytes read (counter) ⚠️
|
|
126
|
-
- disk.all.write_bytes → Total bytes written (counter) ⚠️
|
|
127
|
-
- disk.all.read → Total read operations (counter) ⚠️
|
|
128
|
-
- disk.all.write → Total write operations (counter) ⚠️
|
|
129
|
-
- disk.dev.read_bytes → Per-disk reads in bytes [instances: sda, sdb, ...] (counter) ⚠️
|
|
130
|
-
- disk.dev.write_bytes → Per-disk writes in bytes [instances: sda, sdb, ...] (counter) ⚠️
|
|
131
|
-
- disk.dev.avactive → Average time disk was active (instant)
|
|
132
|
-
|
|
133
|
-
## Network
|
|
134
|
-
- network.interface.in.bytes → Bytes received [instances: eth0, lo, ...] (counter) ⚠️
|
|
135
|
-
- network.interface.out.bytes → Bytes sent [instances: eth0, lo, ...] (counter) ⚠️
|
|
136
|
-
- network.interface.in.packets → Packets received [instances] (counter) ⚠️
|
|
137
|
-
- network.interface.out.packets → Packets sent [instances] (counter) ⚠️
|
|
138
|
-
- network.interface.in.errors → Receive errors [instances] (counter) ⚠️
|
|
139
|
-
- network.interface.out.errors → Transmit errors [instances] (counter) ⚠️
|
|
140
|
-
|
|
141
|
-
## Process Metrics (⚠️ Use get_process_top instead of raw queries)
|
|
142
|
-
- proc.psinfo.pid → Process ID [instances: PIDs]
|
|
143
|
-
- proc.psinfo.cmd → Command name [instances: PIDs]
|
|
144
|
-
- proc.psinfo.psargs → Full command line [instances: PIDs]
|
|
145
|
-
- proc.memory.rss → Resident set size in KB [instances: PIDs] (instant)
|
|
146
|
-
- proc.memory.vmsize → Virtual memory size in KB [instances: PIDs] (instant)
|
|
147
|
-
- proc.psinfo.utime → User CPU time in ms [instances: PIDs] (counter) ⚠️
|
|
148
|
-
- proc.psinfo.stime → System CPU time in ms [instances: PIDs] (counter) ⚠️
|
|
149
|
-
- proc.io.read_bytes → Process I/O reads in bytes [instances: PIDs] (counter) ⚠️
|
|
150
|
-
- proc.io.write_bytes → Process I/O writes in bytes [instances: PIDs] (counter) ⚠️
|
|
151
|
-
|
|
152
|
-
## System Health
|
|
153
|
-
- kernel.all.uptime → System uptime in seconds (instant)
|
|
154
|
-
- kernel.all.nusers → Logged-in users (instant)
|
|
155
|
-
- pmcd.agent.status → PMDA agent health [instances: agent names] (instant)
|
|
156
|
-
- pmcd.pmlogger.host → Active pmlogger hosts [instances] (instant)
|
|
157
|
-
|
|
158
|
-
## Container Metrics (requires cgroups PMDA)
|
|
159
|
-
- cgroup.cpuacct.usage → CPU usage per cgroup [instances: cgroup paths] (counter) ⚠️
|
|
160
|
-
- cgroup.memory.usage → Memory usage per cgroup [instances: cgroup paths] (instant)
|
|
161
|
-
- cgroup.blkio.io_service_bytes → I/O per cgroup [instances: cgroup paths] (counter) ⚠️
|
|
162
|
-
|
|
163
|
-
---
|
|
164
|
-
|
|
165
|
-
## Legend
|
|
166
|
-
⚠️ = COUNTER METRIC - Use get_system_snapshot() or get_process_top() for rates
|
|
167
|
-
⭐ = Recommended over alternatives
|
|
168
|
-
[instances] = Returns multiple values (per-CPU, per-disk, per-process, etc.)
|
|
169
|
-
(instant) = Instantaneous gauge value
|
|
170
|
-
(counter) = Cumulative counter since boot
|
|
171
|
-
"""
|
|
172
|
-
|
|
173
|
-
@mcp.resource("pcp://namespaces", icons=[ICON_NAMESPACE], tags=TAGS_DISCOVERY)
|
|
174
|
-
async def metric_namespaces(ctx: Context) -> str:
|
|
175
|
-
"""List available PCP metric namespaces discovered from the live system.
|
|
176
|
-
|
|
177
|
-
Queries the connected PCP server to enumerate top-level namespaces
|
|
178
|
-
and active PMDAs, showing exactly what's available on this system.
|
|
179
|
-
"""
|
|
180
|
-
from pcp_mcp.context import get_client
|
|
181
|
-
from pcp_mcp.errors import handle_pcp_error
|
|
182
|
-
|
|
183
|
-
client = get_client(ctx)
|
|
184
|
-
|
|
185
|
-
try:
|
|
186
|
-
all_metrics = await client.search("")
|
|
187
|
-
namespaces = sorted(
|
|
188
|
-
{m.get("name", "").split(".")[0] for m in all_metrics if m.get("name")}
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
pmda_status = await client.fetch(["pmcd.agent.status"])
|
|
192
|
-
active_pmdas = []
|
|
193
|
-
for metric in pmda_status.get("values", []):
|
|
194
|
-
for inst in metric.get("instances", []):
|
|
195
|
-
instance_id = inst.get("instance")
|
|
196
|
-
status = inst.get("value")
|
|
197
|
-
if instance_id is not None and instance_id != -1 and status == 0:
|
|
198
|
-
active_pmdas.append(str(instance_id))
|
|
199
|
-
|
|
200
|
-
except Exception as e:
|
|
201
|
-
raise handle_pcp_error(e, "discovering namespaces") from e
|
|
202
|
-
|
|
203
|
-
output = f"""# PCP Metric Namespaces (Live Discovery)
|
|
204
|
-
|
|
205
|
-
Connected to: {client.target_host}
|
|
206
|
-
Active PMDAs: {len(active_pmdas)}
|
|
207
|
-
Top-level namespaces: {len(namespaces)}
|
|
208
|
-
|
|
209
|
-
## Available Namespaces
|
|
210
|
-
|
|
211
|
-
"""
|
|
212
|
-
|
|
213
|
-
namespace_docs = {
|
|
214
|
-
"kernel": "System-wide kernel statistics (CPU, load, interrupts, uptime)",
|
|
215
|
-
"mem": "Memory subsystem (physmem, swap, cache, buffers, NUMA)",
|
|
216
|
-
"disk": "Disk I/O (aggregates, per-device, partitions, device mapper)",
|
|
217
|
-
"network": "Network interfaces and protocols (TCP, UDP, IP)",
|
|
218
|
-
"proc": "Per-process metrics ⚠️ Use get_process_top instead of raw queries",
|
|
219
|
-
"hinv": "Hardware inventory (ncpu, physmem, architecture - static info)",
|
|
220
|
-
"pmcd": "PCP daemon health (agent status, clients, control)",
|
|
221
|
-
"pmproxy": "pmproxy daemon metrics (if pmproxy PMDA loaded)",
|
|
222
|
-
"cgroup": "Container/cgroup metrics (CPU, memory, I/O per cgroup)",
|
|
223
|
-
"containers": "Container metrics (Docker, Podman via PMDA)",
|
|
224
|
-
"filesys": "Filesystem metrics (capacity, used, free per mount point)",
|
|
225
|
-
"nfs": "NFS version-agnostic metrics",
|
|
226
|
-
"nfs3": "NFSv3 client and server metrics",
|
|
227
|
-
"nfs4": "NFSv4 client and server metrics",
|
|
228
|
-
"swap": "Swap device metrics (activity per swap device)",
|
|
229
|
-
"quota": "Filesystem quota metrics",
|
|
230
|
-
"xfs": "XFS filesystem-specific metrics",
|
|
231
|
-
"btrfs": "Btrfs filesystem-specific metrics",
|
|
232
|
-
"zfs": "ZFS filesystem-specific metrics",
|
|
233
|
-
"kvm": "KVM hypervisor metrics (guest VMs)",
|
|
234
|
-
"libvirt": "libvirt virtualization metrics",
|
|
235
|
-
"redis": "Redis server metrics (via redis PMDA)",
|
|
236
|
-
"postgresql": "PostgreSQL database metrics (via postgresql PMDA)",
|
|
237
|
-
"mysql": "MySQL database metrics (via mysql PMDA)",
|
|
238
|
-
"nginx": "nginx web server metrics",
|
|
239
|
-
"apache": "Apache web server metrics",
|
|
240
|
-
"haproxy": "HAProxy load balancer metrics",
|
|
241
|
-
"elasticsearch": "Elasticsearch metrics",
|
|
242
|
-
"mongodb": "MongoDB metrics",
|
|
243
|
-
"bcc": "eBPF-based advanced profiling (BPF PMDA - requires kernel 4.1+)",
|
|
244
|
-
"hotproc": "Hot process tracking (automatically tracks top resource consumers)",
|
|
245
|
-
"mmv": "Memory-mapped value metrics (custom app instrumentation)",
|
|
246
|
-
"sysfs": "Linux sysfs metrics",
|
|
247
|
-
"event": "System event tracing",
|
|
248
|
-
"ipc": "Inter-process communication metrics (SysV IPC)",
|
|
249
|
-
"jbd2": "JBD2 journal metrics (ext4 filesystem journaling)",
|
|
250
|
-
"rpc": "RPC statistics",
|
|
251
|
-
"acct": "Process accounting metrics",
|
|
252
|
-
"fchost": "Fibre Channel host metrics",
|
|
253
|
-
"tape": "Tape device metrics",
|
|
254
|
-
"hyperv": "Hyper-V guest metrics",
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
for ns in namespaces:
|
|
258
|
-
doc = namespace_docs.get(ns, "Namespace provided by PMDA (no built-in description)")
|
|
259
|
-
output += f"- **{ns}.***: {doc}\n"
|
|
260
|
-
|
|
261
|
-
output += f"""
|
|
262
|
-
## Active PMDAs on This System
|
|
263
|
-
|
|
264
|
-
{", ".join(active_pmdas) if active_pmdas else "Unable to enumerate PMDAs"}
|
|
265
|
-
|
|
266
|
-
Status 0 = Running, non-zero = Error
|
|
267
|
-
|
|
268
|
-
## Namespace Categories
|
|
269
|
-
|
|
270
|
-
### Core System (always available)
|
|
271
|
-
kernel, mem, disk, network, proc, hinv, pmcd
|
|
272
|
-
|
|
273
|
-
### Filesystems
|
|
274
|
-
filesys, xfs, btrfs, zfs, quota, swap
|
|
275
|
-
|
|
276
|
-
### Virtualization
|
|
277
|
-
kvm, libvirt, containers, cgroup, hyperv
|
|
278
|
-
|
|
279
|
-
### Databases
|
|
280
|
-
redis, postgresql, mysql, elasticsearch, mongodb
|
|
281
|
-
|
|
282
|
-
### Web Servers
|
|
283
|
-
nginx, apache, haproxy
|
|
284
|
-
|
|
285
|
-
### Advanced
|
|
286
|
-
bcc (eBPF), hotproc (auto-tracking), mmv (custom metrics), event (tracing)
|
|
287
|
-
|
|
288
|
-
## Discovery Workflow
|
|
289
|
-
|
|
290
|
-
1. **Explore a namespace**: search_metrics("{namespaces[0] if namespaces else "kernel"}")
|
|
291
|
-
2. **Count metrics in namespace**: search_metrics("disk") to see all disk.* metrics
|
|
292
|
-
3. **Get metric details**: describe_metric("full.metric.name")
|
|
293
|
-
4. **Query specific metrics**: query_metrics(["name1", "name2"])
|
|
294
|
-
|
|
295
|
-
## Navigation Strategy
|
|
296
|
-
|
|
297
|
-
**Top-down** (recommended for troubleshooting):
|
|
298
|
-
1. Start with get_system_snapshot() → Identifies problem domain
|
|
299
|
-
2. Drill into relevant namespace (e.g., "disk" issue → search_metrics("disk.dev"))
|
|
300
|
-
3. Query specific metrics with query_metrics([...])
|
|
301
|
-
|
|
302
|
-
**Bottom-up** (exploring new system):
|
|
303
|
-
1. Browse this pcp://namespaces resource → See what's available
|
|
304
|
-
2. search_metrics("interesting.namespace") → Explore subtree
|
|
305
|
-
3. describe_metric("full.name") → Understand semantics
|
|
306
|
-
"""
|
|
307
|
-
return output
|
pcp_mcp/resources/health.py
DELETED
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
"""Health summary resource for quick system status."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from datetime import datetime, timezone
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
7
|
-
|
|
8
|
-
from fastmcp import Context
|
|
9
|
-
|
|
10
|
-
from pcp_mcp.client import PCPClient
|
|
11
|
-
from pcp_mcp.context import get_client, get_client_for_host, get_settings
|
|
12
|
-
from pcp_mcp.icons import ICON_HEALTH, TAGS_HEALTH
|
|
13
|
-
from pcp_mcp.models import CPUMetrics, LoadMetrics, MemoryMetrics
|
|
14
|
-
from pcp_mcp.tools.system import COUNTER_METRICS, SNAPSHOT_METRICS
|
|
15
|
-
from pcp_mcp.utils.builders import (
|
|
16
|
-
build_cpu_metrics,
|
|
17
|
-
build_load_metrics,
|
|
18
|
-
build_memory_metrics,
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
if TYPE_CHECKING:
|
|
22
|
-
from fastmcp import FastMCP
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _format_health_summary(
|
|
26
|
-
client: PCPClient,
|
|
27
|
-
cpu: CPUMetrics,
|
|
28
|
-
memory: MemoryMetrics,
|
|
29
|
-
load: LoadMetrics,
|
|
30
|
-
) -> str:
|
|
31
|
-
"""Format health metrics into a markdown summary."""
|
|
32
|
-
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
33
|
-
|
|
34
|
-
return f"""# System Health Summary
|
|
35
|
-
Host: {client.target_host}
|
|
36
|
-
Time: {timestamp}
|
|
37
|
-
|
|
38
|
-
## CPU
|
|
39
|
-
- User: {cpu.user_percent}%
|
|
40
|
-
- System: {cpu.system_percent}%
|
|
41
|
-
- Idle: {cpu.idle_percent}%
|
|
42
|
-
- I/O Wait: {cpu.iowait_percent}%
|
|
43
|
-
- CPUs: {cpu.ncpu}
|
|
44
|
-
- Assessment: {cpu.assessment}
|
|
45
|
-
|
|
46
|
-
## Memory
|
|
47
|
-
- Used: {memory.used_percent}% ({memory.used_bytes / 1e9:.1f} / {memory.total_bytes / 1e9:.1f} GB)
|
|
48
|
-
- Available: {memory.available_bytes / 1e9:.1f} GB
|
|
49
|
-
- Cached: {memory.cached_bytes / 1e9:.1f} GB
|
|
50
|
-
- Swap: {memory.swap_used_bytes / 1e9:.1f} GB / {memory.swap_total_bytes / 1e9:.1f} GB
|
|
51
|
-
- Assessment: {memory.assessment}
|
|
52
|
-
|
|
53
|
-
## Load
|
|
54
|
-
- 1 min: {load.load_1m}
|
|
55
|
-
- 5 min: {load.load_5m}
|
|
56
|
-
- 15 min: {load.load_15m}
|
|
57
|
-
- Runnable: {load.runnable}
|
|
58
|
-
- Processes: {load.nprocs}
|
|
59
|
-
- Assessment: {load.assessment}
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
async def _fetch_health_data(client: PCPClient) -> tuple[CPUMetrics, MemoryMetrics, LoadMetrics]:
|
|
64
|
-
"""Fetch and build health metrics from a client."""
|
|
65
|
-
metrics = SNAPSHOT_METRICS["cpu"] + SNAPSHOT_METRICS["memory"] + SNAPSHOT_METRICS["load"]
|
|
66
|
-
data = await client.fetch_with_rates(metrics, COUNTER_METRICS, sample_interval=1.0)
|
|
67
|
-
|
|
68
|
-
return (
|
|
69
|
-
build_cpu_metrics(data),
|
|
70
|
-
build_memory_metrics(data),
|
|
71
|
-
build_load_metrics(data),
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def register_health_resources(mcp: FastMCP) -> None:
|
|
76
|
-
"""Register health resources with the MCP server."""
|
|
77
|
-
|
|
78
|
-
@mcp.resource("pcp://health", icons=[ICON_HEALTH], tags=TAGS_HEALTH)
|
|
79
|
-
async def health_summary(ctx: Context) -> str:
|
|
80
|
-
"""Quick system health summary for the default target host.
|
|
81
|
-
|
|
82
|
-
Returns a text summary of CPU, memory, and load status suitable
|
|
83
|
-
for quick health checks. For detailed metrics, use the
|
|
84
|
-
get_system_snapshot tool instead.
|
|
85
|
-
"""
|
|
86
|
-
client = get_client(ctx)
|
|
87
|
-
|
|
88
|
-
try:
|
|
89
|
-
cpu, memory, load = await _fetch_health_data(client)
|
|
90
|
-
except Exception as e:
|
|
91
|
-
return f"Error fetching health data: {e}"
|
|
92
|
-
|
|
93
|
-
return _format_health_summary(client, cpu, memory, load)
|
|
94
|
-
|
|
95
|
-
@mcp.resource("pcp://host/{hostname}/health", icons=[ICON_HEALTH], tags=TAGS_HEALTH)
|
|
96
|
-
async def host_health_summary(ctx: Context, hostname: str) -> str:
|
|
97
|
-
"""System health summary for a specific host.
|
|
98
|
-
|
|
99
|
-
Returns a text summary of CPU, memory, and load status for the
|
|
100
|
-
specified hostname. Requires PCP_ALLOWED_HOSTS to be configured
|
|
101
|
-
if querying hosts other than the default target.
|
|
102
|
-
"""
|
|
103
|
-
settings = get_settings(ctx)
|
|
104
|
-
|
|
105
|
-
if not settings.is_host_allowed(hostname):
|
|
106
|
-
return (
|
|
107
|
-
f"Error: Host '{hostname}' is not allowed. "
|
|
108
|
-
f"Configure PCP_ALLOWED_HOSTS to permit additional hosts."
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
async with get_client_for_host(ctx, hostname) as client:
|
|
112
|
-
try:
|
|
113
|
-
cpu, memory, load = await _fetch_health_data(client)
|
|
114
|
-
except Exception as e:
|
|
115
|
-
return f"Error fetching health data from {hostname}: {e}"
|
|
116
|
-
|
|
117
|
-
return _format_health_summary(client, cpu, memory, load)
|
|
File without changes
|