@supernova123/docker-mcp-server 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ import Dockerode from "dockerode";
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import {
4
+ FleetStatusSchema,
5
+ FleetStatsSchema,
6
+ WatchEventsSchema,
7
+ SearchLogsSchema,
8
+ CheckThresholdsSchema,
9
+ MonitorDashboardSchema,
10
+ } from "../types.js";
11
+
12
+ export function registerMonitoringTools(server: McpServer, docker: Dockerode): void {
13
+ // 1. fleet_status — health status of all running containers
14
+ server.tool(
15
+ "fleet_status",
16
+ "Get health status of all running containers. Returns name, state, health, uptime, and restart count for each.",
17
+ FleetStatusSchema.shape,
18
+ async (params) => {
19
+ try {
20
+ const containers = await docker.listContainers({ all: false });
21
+ const results = await Promise.all(
22
+ containers.map(async (c) => {
23
+ const info = await docker.getContainer(c.Id).inspect();
24
+ return {
25
+ name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
26
+ id: c.Id.slice(0, 12),
27
+ state: c.State,
28
+ status: c.Status,
29
+ health: info.State.Health?.Status || "no-healthcheck",
30
+ uptime: info.State.StartedAt,
31
+ restartCount: info.RestartCount,
32
+ image: c.Image,
33
+ };
34
+ })
35
+ );
36
+ return {
37
+ content: [{ type: "text", text: JSON.stringify(results, null, 2) }],
38
+ };
39
+ } catch (err: any) {
40
+ return {
41
+ content: [{ type: "text", text: `Error: ${err.message}` }],
42
+ isError: true,
43
+ };
44
+ }
45
+ }
46
+ );
47
+
48
+ // 2. fleet_stats — resource usage for all running containers
49
+ server.tool(
50
+ "fleet_stats",
51
+ "Get resource usage (CPU%, memory%, network I/O) for all running containers. Sorted by usage.",
52
+ FleetStatsSchema.shape,
53
+ async (params) => {
54
+ try {
55
+ const containers = await docker.listContainers({ all: false });
56
+ const results = await Promise.all(
57
+ containers.map(async (c) => {
58
+ const stats = await docker.getContainer(c.Id).stats({ stream: false });
59
+ const cpuDelta = stats.cpu_stats.cpu_usage.total_usage - (stats.precpu_stats?.cpu_usage?.total_usage ?? 0);
60
+ const systemDelta = stats.cpu_stats.system_cpu_usage - (stats.precpu_stats?.system_cpu_usage ?? 0);
61
+ const cpuCount = stats.cpu_stats.online_cpus ?? 1;
62
+ const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * cpuCount * 100 : 0;
63
+ const memUsage = stats.memory_stats?.usage ?? 0;
64
+ const memLimit = stats.memory_stats?.limit ?? 1;
65
+ const memPercent = (memUsage / memLimit) * 100;
66
+ const netRx = Object.values(stats.networks ?? {}).reduce((sum: number, n: any) => sum + (n.rx_bytes ?? 0), 0);
67
+ const netTx = Object.values(stats.networks ?? {}).reduce((sum: number, n: any) => sum + (n.tx_bytes ?? 0), 0);
68
+
69
+ return {
70
+ name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
71
+ id: c.Id.slice(0, 12),
72
+ cpu_percent: Math.round(cpuPercent * 100) / 100,
73
+ memory_usage_mb: Math.round((memUsage / 1024 / 1024) * 100) / 100,
74
+ memory_percent: Math.round(memPercent * 100) / 100,
75
+ network_rx_mb: Math.round((netRx / 1024 / 1024) * 100) / 100,
76
+ network_tx_mb: Math.round((netTx / 1024 / 1024) * 100) / 100,
77
+ };
78
+ })
79
+ );
80
+
81
+ const sortBy = params.sort_by || "cpu";
82
+ results.sort((a: any, b: any) => {
83
+ if (sortBy === "cpu") return b.cpu_percent - a.cpu_percent;
84
+ if (sortBy === "memory") return b.memory_percent - a.memory_percent;
85
+ return (b.network_rx_mb + b.network_tx_mb) - (a.network_rx_mb + a.network_tx_mb);
86
+ });
87
+
88
+ return {
89
+ content: [{ type: "text", text: JSON.stringify(results, null, 2) }],
90
+ };
91
+ } catch (err: any) {
92
+ return {
93
+ content: [{ type: "text", text: `Error: ${err.message}` }],
94
+ isError: true,
95
+ };
96
+ }
97
+ }
98
+ );
99
+
100
+ // 3. watch_events — stream Docker events (simplified: collect events for a duration)
101
+ server.tool(
102
+ "watch_events",
103
+ "Collect Docker events (start, stop, die, restart, health_status) over a time window. Filter by container or event type.",
104
+ WatchEventsSchema.shape,
105
+ async (params) => {
106
+ try {
107
+ const durationMs = (params.duration || 30) * 1000;
108
+ const filter: any = {};
109
+ if (params.container) filter.container = [params.container];
110
+ if (params.event_type && params.event_type !== "all") filter.event = [params.event_type];
111
+ if (params.since) filter.since = [params.since];
112
+
113
+ const events: any[] = [];
114
+ const stream = await docker.getEvents(filter as Dockerode.GetEventsOptions) as unknown as NodeJS.ReadableStream;
115
+
116
+ await new Promise<void>((resolve) => {
117
+ const timeout = setTimeout(() => {
118
+ resolve();
119
+ }, durationMs);
120
+
121
+ stream.on("data", (chunk: Buffer) => {
122
+ try {
123
+ const event = JSON.parse(chunk.toString());
124
+ events.push({
125
+ type: event.Type,
126
+ action: event.Action,
127
+ container: event.Actor?.Attributes?.name || event.Actor?.ID?.slice(0, 12),
128
+ time: new Date(event.time * 1000).toISOString(),
129
+ });
130
+ } catch {}
131
+ });
132
+
133
+ stream.on("error", () => {
134
+ clearTimeout(timeout);
135
+ resolve();
136
+ });
137
+
138
+ stream.on("end", () => {
139
+ clearTimeout(timeout);
140
+ resolve();
141
+ });
142
+ });
143
+
144
+ return {
145
+ content: [{ type: "text", text: events.length ? JSON.stringify(events, null, 2) : "No events captured in the time window." }],
146
+ };
147
+ } catch (err: any) {
148
+ return {
149
+ content: [{ type: "text", text: `Error: ${err.message}` }],
150
+ isError: true,
151
+ };
152
+ }
153
+ }
154
+ );
155
+
156
+ // 4. search_logs — search logs across multiple containers
157
+ server.tool(
158
+ "search_logs",
159
+ "Search logs across multiple containers with regex/grep pattern. Returns matching lines with container name and timestamp.",
160
+ SearchLogsSchema.shape,
161
+ async (params) => {
162
+ try {
163
+ const targetContainers = params.containers || [];
164
+ let containers: { id: string; name: string }[];
165
+
166
+ if (targetContainers.length > 0) {
167
+ containers = await Promise.all(
168
+ targetContainers.map(async (id) => {
169
+ const info = await docker.getContainer(id).inspect();
170
+ return { id, name: info.Name.replace(/^\//, "") };
171
+ })
172
+ );
173
+ } else {
174
+ const list = await docker.listContainers({ all: false });
175
+ containers = list.map((c) => ({ id: c.Id, name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12) }));
176
+ }
177
+
178
+ const regex = new RegExp(params.pattern, params.ignore_case ? "i" : "");
179
+ const matches: any[] = [];
180
+
181
+ for (const container of containers) {
182
+ try {
183
+ const logStream = await docker.getContainer(container.id).logs({
184
+ stdout: true,
185
+ stderr: true,
186
+ tail: params.tail || 500,
187
+ since: params.since ? Math.floor(new Date(params.since).getTime() / 1000) : undefined,
188
+ });
189
+ const output = logStream.toString("utf-8").replace(/^[\x00-\x0f]{8}/gm, "");
190
+ const lines = output.split("\n");
191
+ for (const line of lines) {
192
+ if (regex.test(line)) {
193
+ matches.push({ container: container.name, line: line.trim() });
194
+ }
195
+ }
196
+ } catch {}
197
+ }
198
+
199
+ return {
200
+ content: [{ type: "text", text: matches.length ? JSON.stringify(matches, null, 2) : "No matches found." }],
201
+ };
202
+ } catch (err: any) {
203
+ return {
204
+ content: [{ type: "text", text: `Error: ${err.message}` }],
205
+ isError: true,
206
+ };
207
+ }
208
+ }
209
+ );
210
+
211
+ // 5. check_thresholds — check all containers against thresholds
212
+ server.tool(
213
+ "check_thresholds",
214
+ "Check all containers against defined thresholds (CPU > X%, memory > Y%, restarts > Z). Returns violations.",
215
+ CheckThresholdsSchema.shape,
216
+ async (params) => {
217
+ try {
218
+ const cpuThreshold = params.cpu_percent ?? 80;
219
+ const memThreshold = params.memory_percent ?? 80;
220
+ const restartThreshold = params.restart_count ?? 5;
221
+ const containers = await docker.listContainers({ all: false });
222
+ const violations: any[] = [];
223
+
224
+ for (const c of containers) {
225
+ const info = await docker.getContainer(c.Id).inspect();
226
+ const issues: string[] = [];
227
+
228
+ // Check restart count
229
+ if (info.RestartCount > restartThreshold) {
230
+ issues.push(`restarts: ${info.RestartCount} > ${restartThreshold}`);
231
+ }
232
+
233
+ // Check CPU and memory
234
+ try {
235
+ const stats = await docker.getContainer(c.Id).stats({ stream: false });
236
+ const cpuDelta = stats.cpu_stats.cpu_usage.total_usage - (stats.precpu_stats?.cpu_usage?.total_usage ?? 0);
237
+ const systemDelta = stats.cpu_stats.system_cpu_usage - (stats.precpu_stats?.system_cpu_usage ?? 0);
238
+ const cpuCount = stats.cpu_stats.online_cpus ?? 1;
239
+ const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * cpuCount * 100 : 0;
240
+ const memUsage = stats.memory_stats?.usage ?? 0;
241
+ const memLimit = stats.memory_stats?.limit ?? 1;
242
+ const memPercent = (memUsage / memLimit) * 100;
243
+
244
+ if (cpuPercent > cpuThreshold) issues.push(`cpu: ${Math.round(cpuPercent)}% > ${cpuThreshold}%`);
245
+ if (memPercent > memThreshold) issues.push(`memory: ${Math.round(memPercent)}% > ${memThreshold}%`);
246
+ } catch {}
247
+
248
+ if (issues.length > 0) {
249
+ violations.push({
250
+ container: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
251
+ id: c.Id.slice(0, 12),
252
+ issues,
253
+ });
254
+ }
255
+ }
256
+
257
+ return {
258
+ content: [{
259
+ type: "text",
260
+ text: violations.length
261
+ ? JSON.stringify({ violations, checked: containers.length }, null, 2)
262
+ : JSON.stringify({ message: "All containers within thresholds.", checked: containers.length }),
263
+ }],
264
+ };
265
+ } catch (err: any) {
266
+ return {
267
+ content: [{ type: "text", text: `Error: ${err.message}` }],
268
+ isError: true,
269
+ };
270
+ }
271
+ }
272
+ );
273
+
274
+ // 6. monitor_dashboard — single-call fleet summary
275
+ server.tool(
276
+ "monitor_dashboard",
277
+ "Single-call fleet summary: health status, top resource consumers, recent events, threshold violations. Designed for agent quick-assessment.",
278
+ MonitorDashboardSchema.shape,
279
+ async (params) => {
280
+ try {
281
+ const containers = await docker.listContainers({ all: false });
282
+
283
+ // Fleet health
284
+ const health = await Promise.all(
285
+ containers.map(async (c) => {
286
+ const info = await docker.getContainer(c.Id).inspect();
287
+ return {
288
+ name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
289
+ state: c.State,
290
+ health: info.State.Health?.Status || "no-healthcheck",
291
+ restartCount: info.RestartCount,
292
+ };
293
+ })
294
+ );
295
+
296
+ // Resource usage (top 5 by CPU)
297
+ const stats = await Promise.all(
298
+ containers.map(async (c) => {
299
+ try {
300
+ const s = await docker.getContainer(c.Id).stats({ stream: false });
301
+ const cpuDelta = s.cpu_stats.cpu_usage.total_usage - (s.precpu_stats?.cpu_usage?.total_usage ?? 0);
302
+ const systemDelta = s.cpu_stats.system_cpu_usage - (s.precpu_stats?.system_cpu_usage ?? 0);
303
+ const cpuCount = s.cpu_stats.online_cpus ?? 1;
304
+ const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * cpuCount * 100 : 0;
305
+ const memUsage = s.memory_stats?.usage ?? 0;
306
+ const memLimit = s.memory_stats?.limit ?? 1;
307
+ const memPercent = (memUsage / memLimit) * 100;
308
+ return {
309
+ name: c.Names[0]?.replace(/^\//, "") || c.Id.slice(0, 12),
310
+ cpu_percent: Math.round(cpuPercent * 100) / 100,
311
+ memory_percent: Math.round(memPercent * 100) / 100,
312
+ };
313
+ } catch {
314
+ return null;
315
+ }
316
+ })
317
+ );
318
+
319
+ const topConsumers = stats.filter(Boolean).sort((a: any, b: any) => b.cpu_percent - a.cpu_percent).slice(0, 5);
320
+
321
+ // Recent events (last 5 minutes) - use simple approach
322
+ const recentEvents: any[] = [];
323
+ try {
324
+ const sinceTs = Math.floor((Date.now() - 5 * 60 * 1000) / 1000);
325
+ const eventStream = await docker.getEvents({ since: sinceTs }) as unknown as NodeJS.ReadableStream;
326
+ await new Promise<void>((resolve) => {
327
+ const timeout = setTimeout(() => { resolve(); }, 2000);
328
+ eventStream.on("data", (chunk: Buffer) => {
329
+ try {
330
+ const e = JSON.parse(chunk.toString());
331
+ recentEvents.push({
332
+ action: e.Action,
333
+ container: e.Actor?.Attributes?.name || e.Actor?.ID?.slice(0, 12),
334
+ time: new Date(e.time * 1000).toISOString(),
335
+ });
336
+ } catch {}
337
+ });
338
+ eventStream.on("error", () => { clearTimeout(timeout); resolve(); });
339
+ eventStream.on("end", () => { clearTimeout(timeout); resolve(); });
340
+ });
341
+ } catch {}
342
+
343
+ // Threshold violations
344
+ const violations = stats.filter(Boolean).filter((s: any) => s.cpu_percent > 80 || s.memory_percent > 80);
345
+
346
+ const dashboard = {
347
+ summary: {
348
+ total_containers: containers.length,
349
+ running: containers.filter((c) => c.State === "running").length,
350
+ unhealthy: health.filter((h) => h.health === "unhealthy").length,
351
+ },
352
+ health,
353
+ top_cpu_consumers: topConsumers,
354
+ recent_events: recentEvents.slice(0, 10),
355
+ threshold_violations: violations,
356
+ };
357
+
358
+ return {
359
+ content: [{ type: "text", text: JSON.stringify(dashboard, null, 2) }],
360
+ };
361
+ } catch (err: any) {
362
+ return {
363
+ content: [{ type: "text", text: `Error: ${err.message}` }],
364
+ isError: true,
365
+ };
366
+ }
367
+ }
368
+ );
369
+ }
package/src/types.ts CHANGED
@@ -150,3 +150,34 @@ export const ListNetworksSchema = z.object({
150
150
  export const ListVolumesSchema = z.object({
151
151
  filter: z.string().optional().describe("Filter by name or driver"),
152
152
  });
153
+
154
+
155
+ // Monitoring schemas (v0.2.0)
156
+ export const FleetStatusSchema = z.object({});
157
+
158
+ export const FleetStatsSchema = z.object({
159
+ sort_by: z.enum(["cpu", "memory", "network"]).optional().describe("Sort results by metric (default: cpu)"),
160
+ });
161
+
162
+ export const WatchEventsSchema = z.object({
163
+ container: z.string().optional().describe("Filter by container name or ID"),
164
+ event_type: z.enum(["start", "stop", "die", "restart", "health_status", "oom", "all"]).optional().describe("Filter by event type (default: all)"),
165
+ since: z.string().optional().describe("Show events since timestamp (e.g., '2026-01-01T00:00:00Z')"),
166
+ duration: z.number().optional().describe("Max seconds to listen (default: 30)"),
167
+ });
168
+
169
+ export const SearchLogsSchema = z.object({
170
+ pattern: z.string().describe("Regex or grep pattern to search for"),
171
+ containers: z.array(z.string()).optional().describe("Specific containers to search (default: all running)"),
172
+ tail: z.number().optional().describe("Max lines to scan per container (default: 500)"),
173
+ since: z.string().optional().describe("Only search logs since timestamp"),
174
+ ignore_case: z.boolean().optional().describe("Case-insensitive search (default: false)"),
175
+ });
176
+
177
+ export const CheckThresholdsSchema = z.object({
178
+ cpu_percent: z.number().optional().describe("Alert if CPU usage exceeds this % (default: 80)"),
179
+ memory_percent: z.number().optional().describe("Alert if memory usage exceeds this % (default: 80)"),
180
+ restart_count: z.number().optional().describe("Alert if restart count exceeds this (default: 5)"),
181
+ });
182
+
183
+ export const MonitorDashboardSchema = z.object({});