pcp-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcp_mcp/__init__.py +59 -0
- pcp_mcp/client.py +246 -0
- pcp_mcp/config.py +50 -0
- pcp_mcp/context.py +57 -0
- pcp_mcp/errors.py +47 -0
- pcp_mcp/models.py +142 -0
- pcp_mcp/prompts/__init__.py +295 -0
- pcp_mcp/resources/__init__.py +21 -0
- pcp_mcp/resources/catalog.py +233 -0
- pcp_mcp/resources/health.py +74 -0
- pcp_mcp/server.py +124 -0
- pcp_mcp/tools/__init__.py +21 -0
- pcp_mcp/tools/metrics.py +148 -0
- pcp_mcp/tools/system.py +229 -0
- pcp_mcp/utils/__init__.py +42 -0
- pcp_mcp/utils/builders.py +290 -0
- pcp_mcp/utils/decorators.py +38 -0
- pcp_mcp/utils/extractors.py +60 -0
- pcp_mcp-0.1.0.dist-info/METADATA +227 -0
- pcp_mcp-0.1.0.dist-info/RECORD +22 -0
- pcp_mcp-0.1.0.dist-info/WHEEL +4 -0
- pcp_mcp-0.1.0.dist-info/entry_points.txt +3 -0
pcp_mcp/server.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""FastMCP server setup and lifespan management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import AsyncIterator
|
|
6
|
+
from contextlib import asynccontextmanager
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from fastmcp import FastMCP
|
|
10
|
+
|
|
11
|
+
from pcp_mcp.client import PCPClient
|
|
12
|
+
from pcp_mcp.config import PCPMCPSettings
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@asynccontextmanager
|
|
16
|
+
async def lifespan(mcp: FastMCP) -> AsyncIterator[dict[str, Any]]:
|
|
17
|
+
"""Manage PCPClient lifecycle.
|
|
18
|
+
|
|
19
|
+
Creates a PCPClient for the duration of the server's lifetime,
|
|
20
|
+
making it available to all tools and resources via the context.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
mcp: The FastMCP server instance.
|
|
24
|
+
|
|
25
|
+
Yields:
|
|
26
|
+
Context dict with client and settings.
|
|
27
|
+
"""
|
|
28
|
+
settings = PCPMCPSettings()
|
|
29
|
+
|
|
30
|
+
async with PCPClient(
|
|
31
|
+
base_url=settings.base_url,
|
|
32
|
+
target_host=settings.target_host,
|
|
33
|
+
auth=settings.auth,
|
|
34
|
+
timeout=settings.timeout,
|
|
35
|
+
) as client:
|
|
36
|
+
yield {
|
|
37
|
+
"client": client,
|
|
38
|
+
"settings": settings,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def create_server() -> FastMCP:
|
|
43
|
+
"""Create and configure the MCP server.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Configured FastMCP server instance.
|
|
47
|
+
"""
|
|
48
|
+
settings = PCPMCPSettings()
|
|
49
|
+
|
|
50
|
+
mcp = FastMCP(
|
|
51
|
+
name="pcp",
|
|
52
|
+
instructions=f"""PCP MCP Server - Performance Co-Pilot Metrics
|
|
53
|
+
|
|
54
|
+
Monitoring target: {settings.target_host}
|
|
55
|
+
pmproxy endpoint: {settings.base_url}
|
|
56
|
+
|
|
57
|
+
🎯 QUICK START GUIDE FOR LLMs
|
|
58
|
+
|
|
59
|
+
Common troubleshooting workflows:
|
|
60
|
+
- "System is slow" → get_system_snapshot(categories=["cpu", "load", "memory"])
|
|
61
|
+
- "High CPU usage" → get_process_top(sort_by="cpu") + query_metrics(["kernel.all.cpu.*"])
|
|
62
|
+
- "Memory pressure" → get_system_snapshot(categories=["memory"]) + search_metrics("mem.util")
|
|
63
|
+
- "Disk I/O issues" → get_system_snapshot(categories=["disk"]) + search_metrics("disk.dev")
|
|
64
|
+
- "Network saturation" → get_system_snapshot(categories=["network"]) +
|
|
65
|
+
search_metrics("network.interface")
|
|
66
|
+
|
|
67
|
+
📊 METRIC NAMESPACE GUIDE
|
|
68
|
+
|
|
69
|
+
Key prefixes and what they measure:
|
|
70
|
+
- kernel.all.* → System-wide aggregates (load, CPU totals, interrupts)
|
|
71
|
+
- mem.* → Memory utilization (physmem, swap, buffers, cache)
|
|
72
|
+
- disk.* → Disk I/O (disk.all.* for totals, disk.dev.* per-device)
|
|
73
|
+
- network.* → Network I/O (network.interface.* per-interface)
|
|
74
|
+
- proc.* → Per-process metrics (use get_process_top instead of raw queries)
|
|
75
|
+
- hinv.* → Hardware inventory (ncpu, physmem, machine type)
|
|
76
|
+
- pmcd.* → PCP daemon health (agent status, timeouts)
|
|
77
|
+
- cgroup.* → Container/cgroup metrics (requires cgroups PMDA)
|
|
78
|
+
|
|
79
|
+
⚠️ COUNTER METRICS WARNING
|
|
80
|
+
|
|
81
|
+
These metrics are CUMULATIVE (values since boot):
|
|
82
|
+
- kernel.all.cpu.*
|
|
83
|
+
- disk.all.read_bytes, disk.all.write_bytes
|
|
84
|
+
- network.interface.in.bytes, network.interface.out.bytes
|
|
85
|
+
- proc.psinfo.utime, proc.psinfo.stime
|
|
86
|
+
- proc.io.read_bytes, proc.io.write_bytes
|
|
87
|
+
|
|
88
|
+
For meaningful rates, use:
|
|
89
|
+
- get_system_snapshot (handles rate calculation automatically)
|
|
90
|
+
- get_process_top (handles rate calculation automatically)
|
|
91
|
+
|
|
92
|
+
DO NOT query these directly with query_metrics expecting per-second rates!
|
|
93
|
+
|
|
94
|
+
🔍 DISCOVERY WORKFLOW
|
|
95
|
+
|
|
96
|
+
1. Start broad: get_system_snapshot() or get_process_top()
|
|
97
|
+
2. Drill down: search_metrics("prefix") to find specific metrics
|
|
98
|
+
3. Investigate: describe_metric("full.metric.name") for units/semantics
|
|
99
|
+
4. Query: query_metrics(["name1", "name2"]) for raw values (non-counters only)
|
|
100
|
+
|
|
101
|
+
Tools:
|
|
102
|
+
- query_metrics: Fetch specific metrics by name (use for instant/gauge metrics)
|
|
103
|
+
- search_metrics: Find metrics matching a pattern (e.g., 'kernel.all', 'mem')
|
|
104
|
+
- describe_metric: Get metadata for a metric (type, units, help text)
|
|
105
|
+
- get_system_snapshot: System overview (CPU, memory, disk, network) - USE THIS FIRST
|
|
106
|
+
- get_process_top: Top processes by resource consumption
|
|
107
|
+
|
|
108
|
+
Resources:
|
|
109
|
+
- pcp://health - Quick system health summary
|
|
110
|
+
- pcp://metrics/common - Catalog of commonly used metrics
|
|
111
|
+
- pcp://namespaces - Dynamically discovered metric namespaces
|
|
112
|
+
""",
|
|
113
|
+
lifespan=lifespan,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
from pcp_mcp.prompts import register_prompts
|
|
117
|
+
from pcp_mcp.resources import register_resources
|
|
118
|
+
from pcp_mcp.tools import register_tools
|
|
119
|
+
|
|
120
|
+
register_tools(mcp)
|
|
121
|
+
register_resources(mcp)
|
|
122
|
+
register_prompts(mcp)
|
|
123
|
+
|
|
124
|
+
return mcp
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Tool registration for the PCP MCP server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from fastmcp import FastMCP
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def register_tools(mcp: FastMCP) -> None:
|
|
12
|
+
"""Register all tools with the MCP server.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
mcp: The FastMCP server instance.
|
|
16
|
+
"""
|
|
17
|
+
from pcp_mcp.tools.metrics import register_metrics_tools
|
|
18
|
+
from pcp_mcp.tools.system import register_system_tools
|
|
19
|
+
|
|
20
|
+
register_metrics_tools(mcp)
|
|
21
|
+
register_system_tools(mcp)
|
pcp_mcp/tools/metrics.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Core metric tools for querying PCP metrics."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Annotated
|
|
6
|
+
|
|
7
|
+
from fastmcp import Context
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
10
|
+
from pcp_mcp.context import get_client
|
|
11
|
+
from pcp_mcp.models import MetricInfo, MetricSearchResult, MetricValue
|
|
12
|
+
from pcp_mcp.utils.extractors import extract_help_text
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from fastmcp import FastMCP
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register_metrics_tools(mcp: FastMCP) -> None:
|
|
19
|
+
"""Register core metric tools with the MCP server."""
|
|
20
|
+
|
|
21
|
+
@mcp.tool()
|
|
22
|
+
async def query_metrics(
|
|
23
|
+
ctx: Context,
|
|
24
|
+
names: Annotated[
|
|
25
|
+
list[str],
|
|
26
|
+
Field(description="List of PCP metric names to fetch (e.g., ['kernel.all.load'])"),
|
|
27
|
+
],
|
|
28
|
+
) -> list[MetricValue]:
|
|
29
|
+
"""Fetch current values for specific PCP metrics.
|
|
30
|
+
|
|
31
|
+
Returns the current value for each requested metric. For metrics with
|
|
32
|
+
instances (e.g., per-CPU, per-disk), returns one MetricValue per instance.
|
|
33
|
+
"""
|
|
34
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
35
|
+
|
|
36
|
+
client = get_client(ctx)
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
response = await client.fetch(names)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
raise handle_pcp_error(e, "fetching metrics") from e
|
|
42
|
+
|
|
43
|
+
results: list[MetricValue] = []
|
|
44
|
+
for metric in response.get("values", []):
|
|
45
|
+
metric_name = metric.get("name", "")
|
|
46
|
+
instances = metric.get("instances", [])
|
|
47
|
+
|
|
48
|
+
for inst in instances:
|
|
49
|
+
instance_id = inst.get("instance")
|
|
50
|
+
value = inst.get("value")
|
|
51
|
+
|
|
52
|
+
instance_name = None
|
|
53
|
+
if instance_id is not None and instance_id != -1:
|
|
54
|
+
instance_name = str(instance_id)
|
|
55
|
+
|
|
56
|
+
results.append(
|
|
57
|
+
MetricValue(
|
|
58
|
+
name=metric_name,
|
|
59
|
+
value=value,
|
|
60
|
+
instance=instance_name,
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return results
|
|
65
|
+
|
|
66
|
+
@mcp.tool()
|
|
67
|
+
async def search_metrics(
|
|
68
|
+
ctx: Context,
|
|
69
|
+
pattern: Annotated[
|
|
70
|
+
str,
|
|
71
|
+
Field(description="Metric name prefix to search for (e.g., 'kernel.all', 'mem')"),
|
|
72
|
+
],
|
|
73
|
+
) -> list[MetricSearchResult]:
|
|
74
|
+
"""Find PCP metrics matching a name pattern.
|
|
75
|
+
|
|
76
|
+
Use this to discover available metrics before querying them.
|
|
77
|
+
Returns metric names and brief descriptions.
|
|
78
|
+
"""
|
|
79
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
80
|
+
|
|
81
|
+
client = get_client(ctx)
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
metrics = await client.search(pattern)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise handle_pcp_error(e, "searching metrics") from e
|
|
87
|
+
|
|
88
|
+
return [
|
|
89
|
+
MetricSearchResult(
|
|
90
|
+
name=m.get("name", ""),
|
|
91
|
+
help_text=extract_help_text(m),
|
|
92
|
+
)
|
|
93
|
+
for m in metrics
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
@mcp.tool()
|
|
97
|
+
async def describe_metric(
|
|
98
|
+
ctx: Context,
|
|
99
|
+
name: Annotated[
|
|
100
|
+
str,
|
|
101
|
+
Field(description="Full PCP metric name (e.g., 'kernel.all.cpu.user')"),
|
|
102
|
+
],
|
|
103
|
+
) -> MetricInfo:
|
|
104
|
+
"""Get detailed metadata about a PCP metric.
|
|
105
|
+
|
|
106
|
+
Returns type, semantics, units, and help text for the metric.
|
|
107
|
+
Use this to understand what a metric measures and how to interpret it.
|
|
108
|
+
"""
|
|
109
|
+
from fastmcp.exceptions import ToolError
|
|
110
|
+
|
|
111
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
112
|
+
|
|
113
|
+
client = get_client(ctx)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
info = await client.describe(name)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
raise handle_pcp_error(e, "describing metric") from e
|
|
119
|
+
|
|
120
|
+
if not info:
|
|
121
|
+
raise ToolError(f"Metric not found: {name}")
|
|
122
|
+
|
|
123
|
+
return MetricInfo(
|
|
124
|
+
name=info.get("name", name),
|
|
125
|
+
type=info.get("type", "unknown"),
|
|
126
|
+
semantics=info.get("sem", "unknown"),
|
|
127
|
+
units=_format_units(info),
|
|
128
|
+
help_text=extract_help_text(info),
|
|
129
|
+
indom=info.get("indom"),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _format_units(info: dict) -> str:
|
|
134
|
+
"""Format PCP units into a human-readable string."""
|
|
135
|
+
units = info.get("units", "")
|
|
136
|
+
if units:
|
|
137
|
+
return units
|
|
138
|
+
|
|
139
|
+
# Fallback: build from components if available
|
|
140
|
+
parts = []
|
|
141
|
+
if info.get("units-space"):
|
|
142
|
+
parts.append(info["units-space"])
|
|
143
|
+
if info.get("units-time"):
|
|
144
|
+
parts.append(info["units-time"])
|
|
145
|
+
if info.get("units-count"):
|
|
146
|
+
parts.append(info["units-count"])
|
|
147
|
+
|
|
148
|
+
return " / ".join(parts) if parts else "none"
|
pcp_mcp/tools/system.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""System health tools for clumped metric queries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated, Literal
|
|
7
|
+
|
|
8
|
+
from fastmcp import Context
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
|
|
11
|
+
from pcp_mcp.context import get_client
|
|
12
|
+
from pcp_mcp.models import ProcessTopResult, SystemSnapshot
|
|
13
|
+
from pcp_mcp.utils.builders import (
|
|
14
|
+
assess_processes,
|
|
15
|
+
build_cpu_metrics,
|
|
16
|
+
build_disk_metrics,
|
|
17
|
+
build_load_metrics,
|
|
18
|
+
build_memory_metrics,
|
|
19
|
+
build_network_metrics,
|
|
20
|
+
build_process_list,
|
|
21
|
+
get_sort_key,
|
|
22
|
+
)
|
|
23
|
+
from pcp_mcp.utils.extractors import get_scalar_value
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from fastmcp import FastMCP
|
|
27
|
+
|
|
28
|
+
SNAPSHOT_METRICS = {
|
|
29
|
+
"cpu": [
|
|
30
|
+
"kernel.all.cpu.user",
|
|
31
|
+
"kernel.all.cpu.sys",
|
|
32
|
+
"kernel.all.cpu.idle",
|
|
33
|
+
"kernel.all.cpu.wait.total",
|
|
34
|
+
"hinv.ncpu",
|
|
35
|
+
],
|
|
36
|
+
"memory": [
|
|
37
|
+
"mem.physmem",
|
|
38
|
+
"mem.util.used",
|
|
39
|
+
"mem.util.free",
|
|
40
|
+
"mem.util.available",
|
|
41
|
+
"mem.util.cached",
|
|
42
|
+
"mem.util.bufmem",
|
|
43
|
+
"mem.util.swapTotal",
|
|
44
|
+
"mem.util.swapFree",
|
|
45
|
+
],
|
|
46
|
+
"load": [
|
|
47
|
+
"kernel.all.load",
|
|
48
|
+
"kernel.all.runnable",
|
|
49
|
+
"kernel.all.nprocs",
|
|
50
|
+
"hinv.ncpu",
|
|
51
|
+
],
|
|
52
|
+
"disk": [
|
|
53
|
+
"disk.all.read_bytes",
|
|
54
|
+
"disk.all.write_bytes",
|
|
55
|
+
"disk.all.read",
|
|
56
|
+
"disk.all.write",
|
|
57
|
+
],
|
|
58
|
+
"network": [
|
|
59
|
+
"network.interface.in.bytes",
|
|
60
|
+
"network.interface.out.bytes",
|
|
61
|
+
"network.interface.in.packets",
|
|
62
|
+
"network.interface.out.packets",
|
|
63
|
+
],
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
COUNTER_METRICS = {
|
|
67
|
+
"kernel.all.cpu.user",
|
|
68
|
+
"kernel.all.cpu.sys",
|
|
69
|
+
"kernel.all.cpu.idle",
|
|
70
|
+
"kernel.all.cpu.wait.total",
|
|
71
|
+
"disk.all.read_bytes",
|
|
72
|
+
"disk.all.write_bytes",
|
|
73
|
+
"disk.all.read",
|
|
74
|
+
"disk.all.write",
|
|
75
|
+
"network.interface.in.bytes",
|
|
76
|
+
"network.interface.out.bytes",
|
|
77
|
+
"network.interface.in.packets",
|
|
78
|
+
"network.interface.out.packets",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
PROCESS_METRICS = {
|
|
82
|
+
"cpu": ["proc.psinfo.utime", "proc.psinfo.stime"],
|
|
83
|
+
"memory": ["proc.memory.rss"],
|
|
84
|
+
"io": ["proc.io.read_bytes", "proc.io.write_bytes"],
|
|
85
|
+
"info": ["proc.psinfo.pid", "proc.psinfo.cmd", "proc.psinfo.psargs"],
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def register_system_tools(mcp: FastMCP) -> None:
|
|
90
|
+
"""Register system health tools with the MCP server."""
|
|
91
|
+
|
|
92
|
+
@mcp.tool()
|
|
93
|
+
async def get_system_snapshot(
|
|
94
|
+
ctx: Context,
|
|
95
|
+
categories: Annotated[
|
|
96
|
+
list[str] | None,
|
|
97
|
+
Field(
|
|
98
|
+
default=None,
|
|
99
|
+
description="Categories to include: cpu, memory, disk, network, load",
|
|
100
|
+
),
|
|
101
|
+
] = None,
|
|
102
|
+
sample_interval: Annotated[
|
|
103
|
+
float,
|
|
104
|
+
Field(
|
|
105
|
+
default=1.0,
|
|
106
|
+
ge=0.1,
|
|
107
|
+
le=10.0,
|
|
108
|
+
description="Seconds between samples for rate calculation",
|
|
109
|
+
),
|
|
110
|
+
] = 1.0,
|
|
111
|
+
) -> SystemSnapshot:
|
|
112
|
+
"""Get a point-in-time system health overview.
|
|
113
|
+
|
|
114
|
+
Returns CPU, memory, disk I/O, network I/O, and load metrics in a single
|
|
115
|
+
call. For rate metrics (CPU %, disk I/O, network throughput), takes two
|
|
116
|
+
samples to calculate per-second rates.
|
|
117
|
+
"""
|
|
118
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
119
|
+
|
|
120
|
+
client = get_client(ctx)
|
|
121
|
+
|
|
122
|
+
if categories is None:
|
|
123
|
+
categories = ["cpu", "memory", "disk", "network", "load"]
|
|
124
|
+
|
|
125
|
+
all_metrics: list[str] = []
|
|
126
|
+
for cat in categories:
|
|
127
|
+
if cat in SNAPSHOT_METRICS:
|
|
128
|
+
all_metrics.extend(SNAPSHOT_METRICS[cat])
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
data = await client.fetch_with_rates(
|
|
132
|
+
all_metrics,
|
|
133
|
+
COUNTER_METRICS,
|
|
134
|
+
sample_interval,
|
|
135
|
+
)
|
|
136
|
+
except Exception as e:
|
|
137
|
+
raise handle_pcp_error(e, "fetching system snapshot") from e
|
|
138
|
+
|
|
139
|
+
snapshot = SystemSnapshot(
|
|
140
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
141
|
+
hostname=client.target_host,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if "cpu" in categories:
|
|
145
|
+
snapshot.cpu = build_cpu_metrics(data)
|
|
146
|
+
if "memory" in categories:
|
|
147
|
+
snapshot.memory = build_memory_metrics(data)
|
|
148
|
+
if "load" in categories:
|
|
149
|
+
snapshot.load = build_load_metrics(data)
|
|
150
|
+
if "disk" in categories:
|
|
151
|
+
snapshot.disk = build_disk_metrics(data)
|
|
152
|
+
if "network" in categories:
|
|
153
|
+
snapshot.network = build_network_metrics(data)
|
|
154
|
+
|
|
155
|
+
return snapshot
|
|
156
|
+
|
|
157
|
+
@mcp.tool()
|
|
158
|
+
async def get_process_top(
|
|
159
|
+
ctx: Context,
|
|
160
|
+
sort_by: Annotated[
|
|
161
|
+
Literal["cpu", "memory", "io"],
|
|
162
|
+
Field(description="Resource to sort by"),
|
|
163
|
+
] = "cpu",
|
|
164
|
+
limit: Annotated[
|
|
165
|
+
int,
|
|
166
|
+
Field(default=10, ge=1, le=50, description="Number of processes to return"),
|
|
167
|
+
] = 10,
|
|
168
|
+
sample_interval: Annotated[
|
|
169
|
+
float,
|
|
170
|
+
Field(
|
|
171
|
+
default=1.0,
|
|
172
|
+
ge=0.5,
|
|
173
|
+
le=5.0,
|
|
174
|
+
description="Seconds to sample for CPU/IO rates",
|
|
175
|
+
),
|
|
176
|
+
] = 1.0,
|
|
177
|
+
) -> ProcessTopResult:
|
|
178
|
+
"""Get top processes by resource consumption.
|
|
179
|
+
|
|
180
|
+
For CPU and I/O, takes two samples to calculate rates. Memory is instantaneous.
|
|
181
|
+
Returns the top N processes sorted by the requested resource.
|
|
182
|
+
"""
|
|
183
|
+
client = get_client(ctx)
|
|
184
|
+
|
|
185
|
+
all_metrics = (
|
|
186
|
+
PROCESS_METRICS["info"] + PROCESS_METRICS["memory"] + PROCESS_METRICS.get(sort_by, [])
|
|
187
|
+
)
|
|
188
|
+
if sort_by == "cpu":
|
|
189
|
+
all_metrics.extend(PROCESS_METRICS["cpu"])
|
|
190
|
+
elif sort_by == "io":
|
|
191
|
+
all_metrics.extend(PROCESS_METRICS["io"])
|
|
192
|
+
|
|
193
|
+
all_metrics = list(set(all_metrics))
|
|
194
|
+
system_metrics = ["hinv.ncpu", "mem.physmem"]
|
|
195
|
+
|
|
196
|
+
counter_metrics = {
|
|
197
|
+
"proc.psinfo.utime",
|
|
198
|
+
"proc.psinfo.stime",
|
|
199
|
+
"proc.io.read_bytes",
|
|
200
|
+
"proc.io.write_bytes",
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
from pcp_mcp.errors import handle_pcp_error
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
proc_data = await client.fetch_with_rates(all_metrics, counter_metrics, sample_interval)
|
|
207
|
+
sys_data = await client.fetch(system_metrics)
|
|
208
|
+
except Exception as e:
|
|
209
|
+
raise handle_pcp_error(e, "fetching process data") from e
|
|
210
|
+
|
|
211
|
+
ncpu = get_scalar_value(sys_data, "hinv.ncpu", 1)
|
|
212
|
+
total_mem = get_scalar_value(sys_data, "mem.physmem", 1) * 1024
|
|
213
|
+
|
|
214
|
+
processes = build_process_list(proc_data, sort_by, total_mem, ncpu)
|
|
215
|
+
processes.sort(key=lambda p: get_sort_key(p, sort_by), reverse=True)
|
|
216
|
+
processes = processes[:limit]
|
|
217
|
+
|
|
218
|
+
assessment = assess_processes(processes, sort_by, ncpu)
|
|
219
|
+
|
|
220
|
+
return ProcessTopResult(
|
|
221
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
222
|
+
hostname=client.target_host,
|
|
223
|
+
sort_by=sort_by,
|
|
224
|
+
sample_interval=sample_interval,
|
|
225
|
+
processes=processes,
|
|
226
|
+
total_memory_bytes=int(total_mem),
|
|
227
|
+
ncpu=ncpu,
|
|
228
|
+
assessment=assessment,
|
|
229
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Shared utility modules for PCP-MCP.
|
|
2
|
+
|
|
3
|
+
This package contains reusable utilities extracted to follow DRY principles:
|
|
4
|
+
- extractors: Metric data extraction helpers
|
|
5
|
+
- decorators: Error handling and other decorators
|
|
6
|
+
- builders: Metric transformation and builder functions
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pcp_mcp.utils.builders import (
|
|
10
|
+
build_cpu_metrics,
|
|
11
|
+
build_disk_metrics,
|
|
12
|
+
build_load_metrics,
|
|
13
|
+
build_memory_metrics,
|
|
14
|
+
build_network_metrics,
|
|
15
|
+
build_process_list,
|
|
16
|
+
)
|
|
17
|
+
from pcp_mcp.utils.decorators import handle_pcp_errors
|
|
18
|
+
from pcp_mcp.utils.extractors import (
|
|
19
|
+
extract_help_text,
|
|
20
|
+
extract_timestamp,
|
|
21
|
+
get_first_value,
|
|
22
|
+
get_scalar_value,
|
|
23
|
+
sum_instances,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
# Extractors
|
|
28
|
+
"get_first_value",
|
|
29
|
+
"get_scalar_value",
|
|
30
|
+
"sum_instances",
|
|
31
|
+
"extract_help_text",
|
|
32
|
+
"extract_timestamp",
|
|
33
|
+
# Decorators
|
|
34
|
+
"handle_pcp_errors",
|
|
35
|
+
# Builders
|
|
36
|
+
"build_cpu_metrics",
|
|
37
|
+
"build_memory_metrics",
|
|
38
|
+
"build_load_metrics",
|
|
39
|
+
"build_disk_metrics",
|
|
40
|
+
"build_network_metrics",
|
|
41
|
+
"build_process_list",
|
|
42
|
+
]
|