pcp-mcp 0.1.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,10 @@ from typing import TYPE_CHECKING
7
7
 
8
8
  from fastmcp import Context
9
9
 
10
- from pcp_mcp.context import get_client
10
+ from pcp_mcp.client import PCPClient
11
+ from pcp_mcp.context import get_client, get_client_for_host, get_settings
12
+ from pcp_mcp.icons import ICON_HEALTH, TAGS_HEALTH
13
+ from pcp_mcp.models import CPUMetrics, LoadMetrics, MemoryMetrics
11
14
  from pcp_mcp.tools.system import COUNTER_METRICS, SNAPSHOT_METRICS
12
15
  from pcp_mcp.utils.builders import (
13
16
  build_cpu_metrics,
@@ -19,33 +22,16 @@ if TYPE_CHECKING:
19
22
  from fastmcp import FastMCP
20
23
 
21
24
 
22
- def register_health_resources(mcp: FastMCP) -> None:
23
- """Register health resources with the MCP server."""
24
-
25
- @mcp.resource("pcp://health")
26
- async def health_summary(ctx: Context) -> str:
27
- """Quick system health summary.
28
-
29
- Returns a text summary of CPU, memory, and load status suitable
30
- for quick health checks. For detailed metrics, use the
31
- get_system_snapshot tool instead.
32
- """
33
- client = get_client(ctx)
34
-
35
- metrics = SNAPSHOT_METRICS["cpu"] + SNAPSHOT_METRICS["memory"] + SNAPSHOT_METRICS["load"]
25
+ def _format_health_summary(
26
+ client: PCPClient,
27
+ cpu: CPUMetrics,
28
+ memory: MemoryMetrics,
29
+ load: LoadMetrics,
30
+ ) -> str:
31
+ """Format health metrics into a markdown summary."""
32
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
36
33
 
37
- try:
38
- data = await client.fetch_with_rates(metrics, COUNTER_METRICS, sample_interval=1.0)
39
- except Exception as e:
40
- return f"Error fetching health data: {e}"
41
-
42
- cpu = build_cpu_metrics(data)
43
- memory = build_memory_metrics(data)
44
- load = build_load_metrics(data)
45
-
46
- timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
47
-
48
- return f"""# System Health Summary
34
+ return f"""# System Health Summary
49
35
  Host: {client.target_host}
50
36
  Time: {timestamp}
51
37
 
@@ -72,3 +58,60 @@ Time: {timestamp}
72
58
  - Processes: {load.nprocs}
73
59
  - Assessment: {load.assessment}
74
60
  """
61
+
62
+
63
+ async def _fetch_health_data(client: PCPClient) -> tuple[CPUMetrics, MemoryMetrics, LoadMetrics]:
64
+ """Fetch and build health metrics from a client."""
65
+ metrics = SNAPSHOT_METRICS["cpu"] + SNAPSHOT_METRICS["memory"] + SNAPSHOT_METRICS["load"]
66
+ data = await client.fetch_with_rates(metrics, COUNTER_METRICS, sample_interval=1.0)
67
+
68
+ return (
69
+ build_cpu_metrics(data),
70
+ build_memory_metrics(data),
71
+ build_load_metrics(data),
72
+ )
73
+
74
+
75
+ def register_health_resources(mcp: FastMCP) -> None:
76
+ """Register health resources with the MCP server."""
77
+
78
+ @mcp.resource("pcp://health", icons=[ICON_HEALTH], tags=TAGS_HEALTH)
79
+ async def health_summary(ctx: Context) -> str:
80
+ """Quick system health summary for the default target host.
81
+
82
+ Returns a text summary of CPU, memory, and load status suitable
83
+ for quick health checks. For detailed metrics, use the
84
+ get_system_snapshot tool instead.
85
+ """
86
+ client = get_client(ctx)
87
+
88
+ try:
89
+ cpu, memory, load = await _fetch_health_data(client)
90
+ except Exception as e:
91
+ return f"Error fetching health data: {e}"
92
+
93
+ return _format_health_summary(client, cpu, memory, load)
94
+
95
+ @mcp.resource("pcp://host/{hostname}/health", icons=[ICON_HEALTH], tags=TAGS_HEALTH)
96
+ async def host_health_summary(ctx: Context, hostname: str) -> str:
97
+ """System health summary for a specific host.
98
+
99
+ Returns a text summary of CPU, memory, and load status for the
100
+ specified hostname. Requires PCP_ALLOWED_HOSTS to be configured
101
+ if querying hosts other than the default target.
102
+ """
103
+ settings = get_settings(ctx)
104
+
105
+ if not settings.is_host_allowed(hostname):
106
+ return (
107
+ f"Error: Host '{hostname}' is not allowed. "
108
+ f"Configure PCP_ALLOWED_HOSTS to permit additional hosts."
109
+ )
110
+
111
+ async with get_client_for_host(ctx, hostname) as client:
112
+ try:
113
+ cpu, memory, load = await _fetch_health_data(client)
114
+ except Exception as e:
115
+ return f"Error fetching health data from {hostname}: {e}"
116
+
117
+ return _format_health_summary(client, cpu, memory, load)
pcp_mcp/server.py CHANGED
@@ -7,9 +7,11 @@ from contextlib import asynccontextmanager
7
7
  from typing import Any
8
8
 
9
9
  from fastmcp import FastMCP
10
+ from fastmcp.server.middleware.logging import StructuredLoggingMiddleware
10
11
 
11
12
  from pcp_mcp.client import PCPClient
12
13
  from pcp_mcp.config import PCPMCPSettings
14
+ from pcp_mcp.middleware import MetricCacheMiddleware
13
15
 
14
16
 
15
17
  @asynccontextmanager
@@ -32,6 +34,7 @@ async def lifespan(mcp: FastMCP) -> AsyncIterator[dict[str, Any]]:
32
34
  target_host=settings.target_host,
33
35
  auth=settings.auth,
34
36
  timeout=settings.timeout,
37
+ verify=settings.verify,
35
38
  ) as client:
36
39
  yield {
37
40
  "client": client,
@@ -107,12 +110,29 @@ Tools:
107
110
 
108
111
  Resources:
109
112
  - pcp://health - Quick system health summary
113
+ - pcp://host/{{hostname}}/health - Per-host health summary (template)
114
+ - pcp://metric/{{name}}/info - Detailed metric metadata (template)
110
115
  - pcp://metrics/common - Catalog of commonly used metrics
111
116
  - pcp://namespaces - Dynamically discovered metric namespaces
117
+
118
+ Prompts (invoke for guided troubleshooting workflows):
119
+ - diagnose_slow_system: Complete slowness investigation
120
+ - investigate_memory_usage: Memory pressure analysis
121
+ - find_io_bottleneck: Disk I/O troubleshooting
122
+ - analyze_cpu_usage: CPU utilization analysis
123
+ - check_network_performance: Network saturation detection
112
124
  """,
113
125
  lifespan=lifespan,
114
126
  )
115
127
 
128
+ mcp.add_middleware(
129
+ StructuredLoggingMiddleware(
130
+ include_payload_length=True,
131
+ estimate_payload_tokens=True,
132
+ )
133
+ )
134
+ mcp.add_middleware(MetricCacheMiddleware())
135
+
116
136
  from pcp_mcp.prompts import register_prompts
117
137
  from pcp_mcp.resources import register_resources
118
138
  from pcp_mcp.tools import register_tools
@@ -0,0 +1,61 @@
1
+ # MCP Tools
2
+
3
+ ## OVERVIEW
4
+
5
+ MCP tool implementations. Two modules: metrics (query/search/describe) and system (snapshot/top).
6
+
7
+ ## STRUCTURE
8
+
9
+ ```
10
+ tools/
11
+ ├── __init__.py # register_tools() → calls both modules
12
+ ├── metrics.py # query_metrics, search_metrics, describe_metric
13
+ └── system.py # get_system_snapshot, get_process_top
14
+ ```
15
+
16
+ ## REGISTRATION PATTERN
17
+
18
+ ```python
19
+ def register_metrics_tools(mcp: FastMCP) -> None:
20
+ @mcp.tool()
21
+ async def query_metrics(
22
+ ctx: Context,
23
+ names: Annotated[list[str], Field(description="Metric names to fetch")],
24
+ ) -> list[MetricValue]:
25
+ client = get_client(ctx)
26
+ # ...
27
+ ```
28
+
29
+ ## TOOL REQUIREMENTS
30
+
31
+ 1. **Decorator**: `@mcp.tool()`
32
+ 2. **Context**: First param is `ctx: Context`
33
+ 3. **Annotations**: `Annotated[type, Field(description="...")]` for all params
34
+ 4. **Return**: Pydantic model (not dict)
35
+ 5. **Errors**: Wrap with `handle_pcp_error()`
36
+
37
+ ## TOOLS
38
+
39
+ | Tool | Purpose | Returns |
40
+ |------|---------|---------|
41
+ | `query_metrics` | Fetch raw metric values | `list[MetricValue]` |
42
+ | `search_metrics` | Find metrics by prefix | `list[MetricSearchResult]` |
43
+ | `describe_metric` | Get metric metadata | `MetricInfo` |
44
+ | `get_system_snapshot` | System overview with rates | `SystemSnapshot` |
45
+ | `quick_health` | Fast health check (cached) | `str` (formatted summary) |
46
+ | `get_process_top` | Top N processes | `ProcessTopResult` |
47
+ | `smart_diagnose` | AI-assisted diagnosis | `str` (LLM-generated analysis) |
48
+
49
+ ## ANTI-PATTERNS
50
+
51
+ - **NEVER** return raw dicts (use Pydantic models)
52
+ - **NEVER** skip `Annotated[..., Field(...)]` on params
53
+ - **NEVER** let httpx exceptions escape (wrap with `handle_pcp_error`)
54
+ - **NEVER** block async (no `time.sleep()`, use `asyncio.sleep()`)
55
+
56
+ ## ADDING NEW TOOL
57
+
58
+ 1. Add function in `metrics.py` or `system.py`
59
+ 2. Use `@mcp.tool()` decorator
60
+ 3. Add response model to `models.py` if needed
61
+ 4. Register in module's `register_*_tools()` function
pcp_mcp/tools/metrics.py CHANGED
@@ -1,148 +1,186 @@
1
1
  """Core metric tools for querying PCP metrics."""
2
2
 
3
- from __future__ import annotations
4
-
5
- from typing import TYPE_CHECKING, Annotated
3
+ from typing import TYPE_CHECKING, Annotated, Optional
6
4
 
7
5
  from fastmcp import Context
8
- from pydantic import Field
9
-
10
- from pcp_mcp.context import get_client
6
+ from mcp.types import ToolAnnotations
7
+ from pydantic import Field, TypeAdapter
8
+
9
+ from pcp_mcp.context import get_client_for_host
10
+ from pcp_mcp.icons import (
11
+ ICON_INFO,
12
+ ICON_METRICS,
13
+ ICON_SEARCH,
14
+ TAGS_DISCOVERY,
15
+ TAGS_METRICS,
16
+ )
11
17
  from pcp_mcp.models import MetricInfo, MetricSearchResult, MetricValue
12
- from pcp_mcp.utils.extractors import extract_help_text
18
+ from pcp_mcp.utils.extractors import extract_help_text, format_units
13
19
 
14
20
  if TYPE_CHECKING:
15
21
  from fastmcp import FastMCP
16
22
 
23
+ TOOL_ANNOTATIONS = ToolAnnotations(readOnlyHint=True, openWorldHint=True)
24
+ METRIC_VALUE_LIST_SCHEMA = TypeAdapter(list[MetricValue]).json_schema()
25
+ METRIC_SEARCH_LIST_SCHEMA = TypeAdapter(list[MetricSearchResult]).json_schema()
26
+
17
27
 
18
- def register_metrics_tools(mcp: FastMCP) -> None:
28
+ def register_metrics_tools(mcp: "FastMCP") -> None:
19
29
  """Register core metric tools with the MCP server."""
20
30
 
21
- @mcp.tool()
31
+ @mcp.tool(
32
+ annotations=TOOL_ANNOTATIONS,
33
+ output_schema=METRIC_VALUE_LIST_SCHEMA,
34
+ icons=[ICON_METRICS],
35
+ tags=TAGS_METRICS,
36
+ )
22
37
  async def query_metrics(
23
38
  ctx: Context,
24
39
  names: Annotated[
25
40
  list[str],
26
41
  Field(description="List of PCP metric names to fetch (e.g., ['kernel.all.load'])"),
27
42
  ],
43
+ host: Annotated[
44
+ Optional[str],
45
+ Field(description="Target pmcd host to query (default: server's configured target)"),
46
+ ] = None,
28
47
  ) -> list[MetricValue]:
29
48
  """Fetch current values for specific PCP metrics.
30
49
 
31
50
  Returns the current value for each requested metric. For metrics with
32
51
  instances (e.g., per-CPU, per-disk), returns one MetricValue per instance.
33
- """
34
- from pcp_mcp.errors import handle_pcp_error
35
52
 
36
- client = get_client(ctx)
53
+ Examples:
54
+ query_metrics(["kernel.all.load"]) - Get load averages
55
+ query_metrics(["mem.util.available", "mem.physmem"]) - Get memory stats
56
+ query_metrics(["hinv.ncpu"]) - Get CPU count
57
+ query_metrics(["kernel.all.load"], host="web1.example.com") - Query remote host
37
58
 
38
- try:
39
- response = await client.fetch(names)
40
- except Exception as e:
41
- raise handle_pcp_error(e, "fetching metrics") from e
42
-
43
- results: list[MetricValue] = []
44
- for metric in response.get("values", []):
45
- metric_name = metric.get("name", "")
46
- instances = metric.get("instances", [])
47
-
48
- for inst in instances:
49
- instance_id = inst.get("instance")
50
- value = inst.get("value")
51
-
52
- instance_name = None
53
- if instance_id is not None and instance_id != -1:
54
- instance_name = str(instance_id)
59
+ Warning: CPU, disk, and network metrics are counters (cumulative since boot).
60
+ Use get_system_snapshot() instead for rates.
61
+ """
62
+ from pcp_mcp.errors import handle_pcp_error
55
63
 
56
- results.append(
57
- MetricValue(
58
- name=metric_name,
59
- value=value,
60
- instance=instance_name,
64
+ async with get_client_for_host(ctx, host) as client:
65
+ try:
66
+ response = await client.fetch(names)
67
+ except Exception as e:
68
+ raise handle_pcp_error(e, "fetching metrics") from e
69
+
70
+ results: list[MetricValue] = []
71
+ for metric in response.get("values", []):
72
+ metric_name = metric.get("name", "")
73
+ instances = metric.get("instances", [])
74
+
75
+ for inst in instances:
76
+ instance_id = inst.get("instance")
77
+ value = inst.get("value")
78
+
79
+ instance_name = None
80
+ if instance_id is not None and instance_id != -1:
81
+ instance_name = str(instance_id)
82
+
83
+ results.append(
84
+ MetricValue(
85
+ name=metric_name,
86
+ value=value,
87
+ instance=instance_name,
88
+ )
61
89
  )
62
- )
63
90
 
64
- return results
91
+ return results
65
92
 
66
- @mcp.tool()
93
+ @mcp.tool(
94
+ annotations=TOOL_ANNOTATIONS,
95
+ output_schema=METRIC_SEARCH_LIST_SCHEMA,
96
+ icons=[ICON_SEARCH],
97
+ tags=TAGS_METRICS | TAGS_DISCOVERY,
98
+ )
67
99
  async def search_metrics(
68
100
  ctx: Context,
69
101
  pattern: Annotated[
70
102
  str,
71
103
  Field(description="Metric name prefix to search for (e.g., 'kernel.all', 'mem')"),
72
104
  ],
105
+ host: Annotated[
106
+ Optional[str],
107
+ Field(description="Target pmcd host to query (default: server's configured target)"),
108
+ ] = None,
73
109
  ) -> list[MetricSearchResult]:
74
110
  """Find PCP metrics matching a name pattern.
75
111
 
76
112
  Use this to discover available metrics before querying them.
77
113
  Returns metric names and brief descriptions.
114
+
115
+ Examples:
116
+ search_metrics("kernel.all") - Find kernel-wide metrics
117
+ search_metrics("mem.util") - Find memory utilization metrics
118
+ search_metrics("disk.dev") - Find per-disk metrics
119
+ search_metrics("network.interface") - Find per-interface metrics
120
+ search_metrics("kernel", host="db1.example.com") - Search on remote host
78
121
  """
79
122
  from pcp_mcp.errors import handle_pcp_error
80
123
 
81
- client = get_client(ctx)
124
+ async with get_client_for_host(ctx, host) as client:
125
+ try:
126
+ metrics = await client.search(pattern)
127
+ except Exception as e:
128
+ raise handle_pcp_error(e, "searching metrics") from e
82
129
 
83
- try:
84
- metrics = await client.search(pattern)
85
- except Exception as e:
86
- raise handle_pcp_error(e, "searching metrics") from e
87
-
88
- return [
89
- MetricSearchResult(
90
- name=m.get("name", ""),
91
- help_text=extract_help_text(m),
92
- )
93
- for m in metrics
94
- ]
95
-
96
- @mcp.tool()
130
+ return [
131
+ MetricSearchResult(
132
+ name=m.get("name", ""),
133
+ help_text=extract_help_text(m),
134
+ )
135
+ for m in metrics
136
+ ]
137
+
138
+ @mcp.tool(
139
+ annotations=TOOL_ANNOTATIONS,
140
+ output_schema=MetricInfo.model_json_schema(),
141
+ icons=[ICON_INFO],
142
+ tags=TAGS_METRICS | TAGS_DISCOVERY,
143
+ )
97
144
  async def describe_metric(
98
145
  ctx: Context,
99
146
  name: Annotated[
100
147
  str,
101
148
  Field(description="Full PCP metric name (e.g., 'kernel.all.cpu.user')"),
102
149
  ],
150
+ host: Annotated[
151
+ Optional[str],
152
+ Field(description="Target pmcd host to query (default: server's configured target)"),
153
+ ] = None,
103
154
  ) -> MetricInfo:
104
155
  """Get detailed metadata about a PCP metric.
105
156
 
106
157
  Returns type, semantics, units, and help text for the metric.
107
158
  Use this to understand what a metric measures and how to interpret it.
159
+
160
+ Examples:
161
+ describe_metric("kernel.all.load") - Learn about load average semantics
162
+ describe_metric("mem.util.available") - Understand available memory
163
+ describe_metric("disk.all.read_bytes") - Check if metric is counter vs instant
164
+ describe_metric("kernel.all.load", host="web1.example.com") - Describe on remote
108
165
  """
109
166
  from fastmcp.exceptions import ToolError
110
167
 
111
168
  from pcp_mcp.errors import handle_pcp_error
112
169
 
113
- client = get_client(ctx)
114
-
115
- try:
116
- info = await client.describe(name)
117
- except Exception as e:
118
- raise handle_pcp_error(e, "describing metric") from e
119
-
120
- if not info:
121
- raise ToolError(f"Metric not found: {name}")
122
-
123
- return MetricInfo(
124
- name=info.get("name", name),
125
- type=info.get("type", "unknown"),
126
- semantics=info.get("sem", "unknown"),
127
- units=_format_units(info),
128
- help_text=extract_help_text(info),
129
- indom=info.get("indom"),
130
- )
131
-
132
-
133
- def _format_units(info: dict) -> str:
134
- """Format PCP units into a human-readable string."""
135
- units = info.get("units", "")
136
- if units:
137
- return units
138
-
139
- # Fallback: build from components if available
140
- parts = []
141
- if info.get("units-space"):
142
- parts.append(info["units-space"])
143
- if info.get("units-time"):
144
- parts.append(info["units-time"])
145
- if info.get("units-count"):
146
- parts.append(info["units-count"])
147
-
148
- return " / ".join(parts) if parts else "none"
170
+ async with get_client_for_host(ctx, host) as client:
171
+ try:
172
+ info = await client.describe(name)
173
+ except Exception as e:
174
+ raise handle_pcp_error(e, "describing metric") from e
175
+
176
+ if not info:
177
+ raise ToolError(f"Metric not found: {name}")
178
+
179
+ return MetricInfo(
180
+ name=info.get("name", name),
181
+ type=info.get("type", "unknown"),
182
+ semantics=info.get("sem", "unknown"),
183
+ units=format_units(info),
184
+ help_text=extract_help_text(info),
185
+ indom=info.get("indom"),
186
+ )