@supernova123/docker-mcp-server 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +9 -0
- package/Dockerfile +29 -0
- package/README.md +21 -0
- package/dist/server.js +3 -1
- package/dist/tools/container.js +23 -2
- package/dist/tools/monitoring.d.ts +4 -0
- package/dist/tools/monitoring.js +309 -0
- package/dist/types.d.ts +57 -0
- package/dist/types.js +24 -0
- package/package.json +3 -3
- package/src/server.ts +3 -1
- package/src/tools/container.ts +20 -2
- package/src/tools/monitoring.ts +369 -0
- package/src/types.ts +31 -0
- package/tests/monitoring.test.ts +411 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
import Dockerode from "dockerode";
|
|
2
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
|
+
import {
|
|
4
|
+
FleetStatusSchema,
|
|
5
|
+
FleetStatsSchema,
|
|
6
|
+
WatchEventsSchema,
|
|
7
|
+
SearchLogsSchema,
|
|
8
|
+
CheckThresholdsSchema,
|
|
9
|
+
MonitorDashboardSchema,
|
|
10
|
+
} from "../types.js";
|
|
11
|
+
|
|
12
|
+
export function registerMonitoringTools(server: McpServer, docker: Dockerode): void {
|
|
13
|
+
// 1. fleet_status — health status of all running containers
|
|
14
|
+
server.tool(
|
|
15
|
+
"fleet_status",
|
|
16
|
+
"Get health status of all running containers. Returns name, state, health, uptime, and restart count for each.",
|
|
17
|
+
FleetStatusSchema.shape,
|
|
18
|
+
async (params) => {
|
|
19
|
+
try {
|
|
20
|
+
const containers = await docker.listContainers({ all: false });
|
|
21
|
+
const results = await Promise.all(
|
|
22
|
+
containers.map(async (c) => {
|
|
23
|
+
const info = await docker.getContainer(c.Id).inspect();
|
|
24
|
+
return {
|
|
25
|
+
name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
|
|
26
|
+
id: c.Id.slice(0, 12),
|
|
27
|
+
state: c.State,
|
|
28
|
+
status: c.Status,
|
|
29
|
+
health: info.State.Health?.Status || "no-healthcheck",
|
|
30
|
+
uptime: info.State.StartedAt,
|
|
31
|
+
restartCount: info.RestartCount,
|
|
32
|
+
image: c.Image,
|
|
33
|
+
};
|
|
34
|
+
})
|
|
35
|
+
);
|
|
36
|
+
return {
|
|
37
|
+
content: [{ type: "text", text: JSON.stringify(results, null, 2) }],
|
|
38
|
+
};
|
|
39
|
+
} catch (err: any) {
|
|
40
|
+
return {
|
|
41
|
+
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
42
|
+
isError: true,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
// 2. fleet_stats — resource usage for all running containers
|
|
49
|
+
server.tool(
|
|
50
|
+
"fleet_stats",
|
|
51
|
+
"Get resource usage (CPU%, memory%, network I/O) for all running containers. Sorted by usage.",
|
|
52
|
+
FleetStatsSchema.shape,
|
|
53
|
+
async (params) => {
|
|
54
|
+
try {
|
|
55
|
+
const containers = await docker.listContainers({ all: false });
|
|
56
|
+
const results = await Promise.all(
|
|
57
|
+
containers.map(async (c) => {
|
|
58
|
+
const stats = await docker.getContainer(c.Id).stats({ stream: false });
|
|
59
|
+
const cpuDelta = stats.cpu_stats.cpu_usage.total_usage - (stats.precpu_stats?.cpu_usage?.total_usage ?? 0);
|
|
60
|
+
const systemDelta = stats.cpu_stats.system_cpu_usage - (stats.precpu_stats?.system_cpu_usage ?? 0);
|
|
61
|
+
const cpuCount = stats.cpu_stats.online_cpus ?? 1;
|
|
62
|
+
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * cpuCount * 100 : 0;
|
|
63
|
+
const memUsage = stats.memory_stats?.usage ?? 0;
|
|
64
|
+
const memLimit = stats.memory_stats?.limit ?? 1;
|
|
65
|
+
const memPercent = (memUsage / memLimit) * 100;
|
|
66
|
+
const netRx = Object.values(stats.networks ?? {}).reduce((sum: number, n: any) => sum + (n.rx_bytes ?? 0), 0);
|
|
67
|
+
const netTx = Object.values(stats.networks ?? {}).reduce((sum: number, n: any) => sum + (n.tx_bytes ?? 0), 0);
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
|
|
71
|
+
id: c.Id.slice(0, 12),
|
|
72
|
+
cpu_percent: Math.round(cpuPercent * 100) / 100,
|
|
73
|
+
memory_usage_mb: Math.round((memUsage / 1024 / 1024) * 100) / 100,
|
|
74
|
+
memory_percent: Math.round(memPercent * 100) / 100,
|
|
75
|
+
network_rx_mb: Math.round((netRx / 1024 / 1024) * 100) / 100,
|
|
76
|
+
network_tx_mb: Math.round((netTx / 1024 / 1024) * 100) / 100,
|
|
77
|
+
};
|
|
78
|
+
})
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const sortBy = params.sort_by || "cpu";
|
|
82
|
+
results.sort((a: any, b: any) => {
|
|
83
|
+
if (sortBy === "cpu") return b.cpu_percent - a.cpu_percent;
|
|
84
|
+
if (sortBy === "memory") return b.memory_percent - a.memory_percent;
|
|
85
|
+
return (b.network_rx_mb + b.network_tx_mb) - (a.network_rx_mb + a.network_tx_mb);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
content: [{ type: "text", text: JSON.stringify(results, null, 2) }],
|
|
90
|
+
};
|
|
91
|
+
} catch (err: any) {
|
|
92
|
+
return {
|
|
93
|
+
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
94
|
+
isError: true,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
// 3. watch_events — stream Docker events (simplified: collect events for a duration)
|
|
101
|
+
server.tool(
|
|
102
|
+
"watch_events",
|
|
103
|
+
"Collect Docker events (start, stop, die, restart, health_status) over a time window. Filter by container or event type.",
|
|
104
|
+
WatchEventsSchema.shape,
|
|
105
|
+
async (params) => {
|
|
106
|
+
try {
|
|
107
|
+
const durationMs = (params.duration || 30) * 1000;
|
|
108
|
+
const filter: any = {};
|
|
109
|
+
if (params.container) filter.container = [params.container];
|
|
110
|
+
if (params.event_type && params.event_type !== "all") filter.event = [params.event_type];
|
|
111
|
+
if (params.since) filter.since = [params.since];
|
|
112
|
+
|
|
113
|
+
const events: any[] = [];
|
|
114
|
+
const stream = await docker.getEvents(filter as Dockerode.GetEventsOptions) as unknown as NodeJS.ReadableStream;
|
|
115
|
+
|
|
116
|
+
await new Promise<void>((resolve) => {
|
|
117
|
+
const timeout = setTimeout(() => {
|
|
118
|
+
resolve();
|
|
119
|
+
}, durationMs);
|
|
120
|
+
|
|
121
|
+
stream.on("data", (chunk: Buffer) => {
|
|
122
|
+
try {
|
|
123
|
+
const event = JSON.parse(chunk.toString());
|
|
124
|
+
events.push({
|
|
125
|
+
type: event.Type,
|
|
126
|
+
action: event.Action,
|
|
127
|
+
container: event.Actor?.Attributes?.name || event.Actor?.ID?.slice(0, 12),
|
|
128
|
+
time: new Date(event.time * 1000).toISOString(),
|
|
129
|
+
});
|
|
130
|
+
} catch {}
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
stream.on("error", () => {
|
|
134
|
+
clearTimeout(timeout);
|
|
135
|
+
resolve();
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
stream.on("end", () => {
|
|
139
|
+
clearTimeout(timeout);
|
|
140
|
+
resolve();
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
content: [{ type: "text", text: events.length ? JSON.stringify(events, null, 2) : "No events captured in the time window." }],
|
|
146
|
+
};
|
|
147
|
+
} catch (err: any) {
|
|
148
|
+
return {
|
|
149
|
+
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
150
|
+
isError: true,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
// 4. search_logs — search logs across multiple containers
|
|
157
|
+
server.tool(
|
|
158
|
+
"search_logs",
|
|
159
|
+
"Search logs across multiple containers with regex/grep pattern. Returns matching lines with container name and timestamp.",
|
|
160
|
+
SearchLogsSchema.shape,
|
|
161
|
+
async (params) => {
|
|
162
|
+
try {
|
|
163
|
+
const targetContainers = params.containers || [];
|
|
164
|
+
let containers: { id: string; name: string }[];
|
|
165
|
+
|
|
166
|
+
if (targetContainers.length > 0) {
|
|
167
|
+
containers = await Promise.all(
|
|
168
|
+
targetContainers.map(async (id) => {
|
|
169
|
+
const info = await docker.getContainer(id).inspect();
|
|
170
|
+
return { id, name: info.Name.replace(/^\//, "") };
|
|
171
|
+
})
|
|
172
|
+
);
|
|
173
|
+
} else {
|
|
174
|
+
const list = await docker.listContainers({ all: false });
|
|
175
|
+
containers = list.map((c) => ({ id: c.Id, name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12) }));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const regex = new RegExp(params.pattern, params.ignore_case ? "i" : "");
|
|
179
|
+
const matches: any[] = [];
|
|
180
|
+
|
|
181
|
+
for (const container of containers) {
|
|
182
|
+
try {
|
|
183
|
+
const logStream = await docker.getContainer(container.id).logs({
|
|
184
|
+
stdout: true,
|
|
185
|
+
stderr: true,
|
|
186
|
+
tail: params.tail || 500,
|
|
187
|
+
since: params.since ? Math.floor(new Date(params.since).getTime() / 1000) : undefined,
|
|
188
|
+
});
|
|
189
|
+
const output = logStream.toString("utf-8").replace(/^[\x00-\x0f]{8}/gm, "");
|
|
190
|
+
const lines = output.split("\n");
|
|
191
|
+
for (const line of lines) {
|
|
192
|
+
if (regex.test(line)) {
|
|
193
|
+
matches.push({ container: container.name, line: line.trim() });
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
} catch {}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return {
|
|
200
|
+
content: [{ type: "text", text: matches.length ? JSON.stringify(matches, null, 2) : "No matches found." }],
|
|
201
|
+
};
|
|
202
|
+
} catch (err: any) {
|
|
203
|
+
return {
|
|
204
|
+
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
205
|
+
isError: true,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
// 5. check_thresholds — check all containers against thresholds
|
|
212
|
+
server.tool(
|
|
213
|
+
"check_thresholds",
|
|
214
|
+
"Check all containers against defined thresholds (CPU > X%, memory > Y%, restarts > Z). Returns violations.",
|
|
215
|
+
CheckThresholdsSchema.shape,
|
|
216
|
+
async (params) => {
|
|
217
|
+
try {
|
|
218
|
+
const cpuThreshold = params.cpu_percent ?? 80;
|
|
219
|
+
const memThreshold = params.memory_percent ?? 80;
|
|
220
|
+
const restartThreshold = params.restart_count ?? 5;
|
|
221
|
+
const containers = await docker.listContainers({ all: false });
|
|
222
|
+
const violations: any[] = [];
|
|
223
|
+
|
|
224
|
+
for (const c of containers) {
|
|
225
|
+
const info = await docker.getContainer(c.Id).inspect();
|
|
226
|
+
const issues: string[] = [];
|
|
227
|
+
|
|
228
|
+
// Check restart count
|
|
229
|
+
if (info.RestartCount > restartThreshold) {
|
|
230
|
+
issues.push(`restarts: ${info.RestartCount} > ${restartThreshold}`);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Check CPU and memory
|
|
234
|
+
try {
|
|
235
|
+
const stats = await docker.getContainer(c.Id).stats({ stream: false });
|
|
236
|
+
const cpuDelta = stats.cpu_stats.cpu_usage.total_usage - (stats.precpu_stats?.cpu_usage?.total_usage ?? 0);
|
|
237
|
+
const systemDelta = stats.cpu_stats.system_cpu_usage - (stats.precpu_stats?.system_cpu_usage ?? 0);
|
|
238
|
+
const cpuCount = stats.cpu_stats.online_cpus ?? 1;
|
|
239
|
+
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * cpuCount * 100 : 0;
|
|
240
|
+
const memUsage = stats.memory_stats?.usage ?? 0;
|
|
241
|
+
const memLimit = stats.memory_stats?.limit ?? 1;
|
|
242
|
+
const memPercent = (memUsage / memLimit) * 100;
|
|
243
|
+
|
|
244
|
+
if (cpuPercent > cpuThreshold) issues.push(`cpu: ${Math.round(cpuPercent)}% > ${cpuThreshold}%`);
|
|
245
|
+
if (memPercent > memThreshold) issues.push(`memory: ${Math.round(memPercent)}% > ${memThreshold}%`);
|
|
246
|
+
} catch {}
|
|
247
|
+
|
|
248
|
+
if (issues.length > 0) {
|
|
249
|
+
violations.push({
|
|
250
|
+
container: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
|
|
251
|
+
id: c.Id.slice(0, 12),
|
|
252
|
+
issues,
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
content: [{
|
|
259
|
+
type: "text",
|
|
260
|
+
text: violations.length
|
|
261
|
+
? JSON.stringify({ violations, checked: containers.length }, null, 2)
|
|
262
|
+
: JSON.stringify({ message: "All containers within thresholds.", checked: containers.length }),
|
|
263
|
+
}],
|
|
264
|
+
};
|
|
265
|
+
} catch (err: any) {
|
|
266
|
+
return {
|
|
267
|
+
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
268
|
+
isError: true,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
// 6. monitor_dashboard — single-call fleet summary
|
|
275
|
+
server.tool(
|
|
276
|
+
"monitor_dashboard",
|
|
277
|
+
"Single-call fleet summary: health status, top resource consumers, recent events, threshold violations. Designed for agent quick-assessment.",
|
|
278
|
+
MonitorDashboardSchema.shape,
|
|
279
|
+
async (params) => {
|
|
280
|
+
try {
|
|
281
|
+
const containers = await docker.listContainers({ all: false });
|
|
282
|
+
|
|
283
|
+
// Fleet health
|
|
284
|
+
const health = await Promise.all(
|
|
285
|
+
containers.map(async (c) => {
|
|
286
|
+
const info = await docker.getContainer(c.Id).inspect();
|
|
287
|
+
return {
|
|
288
|
+
name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
|
|
289
|
+
state: c.State,
|
|
290
|
+
health: info.State.Health?.Status || "no-healthcheck",
|
|
291
|
+
restartCount: info.RestartCount,
|
|
292
|
+
};
|
|
293
|
+
})
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
// Resource usage (top 5 by CPU)
|
|
297
|
+
const stats = await Promise.all(
|
|
298
|
+
containers.map(async (c) => {
|
|
299
|
+
try {
|
|
300
|
+
const s = await docker.getContainer(c.Id).stats({ stream: false });
|
|
301
|
+
const cpuDelta = s.cpu_stats.cpu_usage.total_usage - (s.precpu_stats?.cpu_usage?.total_usage ?? 0);
|
|
302
|
+
const systemDelta = s.cpu_stats.system_cpu_usage - (s.precpu_stats?.system_cpu_usage ?? 0);
|
|
303
|
+
const cpuCount = s.cpu_stats.online_cpus ?? 1;
|
|
304
|
+
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * cpuCount * 100 : 0;
|
|
305
|
+
const memUsage = s.memory_stats?.usage ?? 0;
|
|
306
|
+
const memLimit = s.memory_stats?.limit ?? 1;
|
|
307
|
+
const memPercent = (memUsage / memLimit) * 100;
|
|
308
|
+
return {
|
|
309
|
+
name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
|
|
310
|
+
cpu_percent: Math.round(cpuPercent * 100) / 100,
|
|
311
|
+
memory_percent: Math.round(memPercent * 100) / 100,
|
|
312
|
+
};
|
|
313
|
+
} catch {
|
|
314
|
+
return null;
|
|
315
|
+
}
|
|
316
|
+
})
|
|
317
|
+
);
|
|
318
|
+
|
|
319
|
+
const topConsumers = stats.filter(Boolean).sort((a: any, b: any) => b.cpu_percent - a.cpu_percent).slice(0, 5);
|
|
320
|
+
|
|
321
|
+
// Recent events (last 5 minutes) - use simple approach
|
|
322
|
+
const recentEvents: any[] = [];
|
|
323
|
+
try {
|
|
324
|
+
const sinceTs = Math.floor((Date.now() - 5 * 60 * 1000) / 1000);
|
|
325
|
+
const eventStream = await docker.getEvents({ since: sinceTs }) as unknown as NodeJS.ReadableStream;
|
|
326
|
+
await new Promise<void>((resolve) => {
|
|
327
|
+
const timeout = setTimeout(() => { resolve(); }, 2000);
|
|
328
|
+
eventStream.on("data", (chunk: Buffer) => {
|
|
329
|
+
try {
|
|
330
|
+
const e = JSON.parse(chunk.toString());
|
|
331
|
+
recentEvents.push({
|
|
332
|
+
action: e.Action,
|
|
333
|
+
container: e.Actor?.Attributes?.name || e.Actor?.ID?.slice(0, 12),
|
|
334
|
+
time: new Date(e.time * 1000).toISOString(),
|
|
335
|
+
});
|
|
336
|
+
} catch {}
|
|
337
|
+
});
|
|
338
|
+
eventStream.on("error", () => { clearTimeout(timeout); resolve(); });
|
|
339
|
+
eventStream.on("end", () => { clearTimeout(timeout); resolve(); });
|
|
340
|
+
});
|
|
341
|
+
} catch {}
|
|
342
|
+
|
|
343
|
+
// Threshold violations
|
|
344
|
+
const violations = stats.filter(Boolean).filter((s: any) => s.cpu_percent > 80 || s.memory_percent > 80);
|
|
345
|
+
|
|
346
|
+
const dashboard = {
|
|
347
|
+
summary: {
|
|
348
|
+
total_containers: containers.length,
|
|
349
|
+
running: containers.filter((c) => c.State === "running").length,
|
|
350
|
+
unhealthy: health.filter((h) => h.health === "unhealthy").length,
|
|
351
|
+
},
|
|
352
|
+
health,
|
|
353
|
+
top_cpu_consumers: topConsumers,
|
|
354
|
+
recent_events: recentEvents.slice(0, 10),
|
|
355
|
+
threshold_violations: violations,
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
content: [{ type: "text", text: JSON.stringify(dashboard, null, 2) }],
|
|
360
|
+
};
|
|
361
|
+
} catch (err: any) {
|
|
362
|
+
return {
|
|
363
|
+
content: [{ type: "text", text: `Error: ${err.message}` }],
|
|
364
|
+
isError: true,
|
|
365
|
+
};
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
);
|
|
369
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -150,3 +150,34 @@ export const ListNetworksSchema = z.object({
|
|
|
150
150
|
export const ListVolumesSchema = z.object({
|
|
151
151
|
filter: z.string().optional().describe("Filter by name or driver"),
|
|
152
152
|
});
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
// Monitoring schemas (v0.2.0)
|
|
156
|
+
export const FleetStatusSchema = z.object({});
|
|
157
|
+
|
|
158
|
+
export const FleetStatsSchema = z.object({
|
|
159
|
+
sort_by: z.enum(["cpu", "memory", "network"]).optional().describe("Sort results by metric (default: cpu)"),
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
export const WatchEventsSchema = z.object({
|
|
163
|
+
container: z.string().optional().describe("Filter by container name or ID"),
|
|
164
|
+
event_type: z.enum(["start", "stop", "die", "restart", "health_status", "oom", "all"]).optional().describe("Filter by event type (default: all)"),
|
|
165
|
+
since: z.string().optional().describe("Show events since timestamp (e.g., '2026-01-01T00:00:00Z')"),
|
|
166
|
+
duration: z.number().optional().describe("Max seconds to listen (default: 30)"),
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
export const SearchLogsSchema = z.object({
|
|
170
|
+
pattern: z.string().describe("Regex or grep pattern to search for"),
|
|
171
|
+
containers: z.array(z.string()).optional().describe("Specific containers to search (default: all running)"),
|
|
172
|
+
tail: z.number().optional().describe("Max lines to scan per container (default: 500)"),
|
|
173
|
+
since: z.string().optional().describe("Only search logs since timestamp"),
|
|
174
|
+
ignore_case: z.boolean().optional().describe("Case-insensitive search (default: false)"),
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
export const CheckThresholdsSchema = z.object({
|
|
178
|
+
cpu_percent: z.number().optional().describe("Alert if CPU usage exceeds this % (default: 80)"),
|
|
179
|
+
memory_percent: z.number().optional().describe("Alert if memory usage exceeds this % (default: 80)"),
|
|
180
|
+
restart_count: z.number().optional().describe("Alert if restart count exceeds this (default: 5)"),
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
export const MonitorDashboardSchema = z.object({});
|