@sleep2agi/commhub-server 0.8.1 → 0.8.3-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sleep2agi/commhub-server",
3
- "version": "0.8.1",
3
+ "version": "0.8.3-preview.0",
4
4
  "description": "CommHub Server — AI Agent communication hub with MCP protocol, multi-network isolation, user auth, and 17 MCP tools.",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
package/src/db.ts CHANGED
@@ -24,6 +24,14 @@ db.exec(`
24
24
  mem_total_gb REAL,
25
25
  mem_used_gb REAL,
26
26
  mem_avail_gb REAL,
27
+ disk_total_gb REAL,
28
+ disk_used_gb REAL,
29
+ disk_avail_gb REAL,
30
+ process_rss_bytes INTEGER,
31
+ process_rss_mb REAL,
32
+ process_cpu_pct REAL,
33
+ process_uptime_seconds REAL,
34
+ process_in_flight_count INTEGER,
27
35
  network_id TEXT NOT NULL DEFAULT 'default',
28
36
  registered_at TEXT NOT NULL DEFAULT (datetime('now')),
29
37
  updated_at TEXT NOT NULL DEFAULT (datetime('now')),
@@ -73,10 +81,48 @@ for (const col of [
73
81
  { name: "mem_total_gb", def: "REAL" },
74
82
  { name: "mem_used_gb", def: "REAL" },
75
83
  { name: "mem_avail_gb", def: "REAL" },
84
+ { name: "disk_total_gb", def: "REAL" },
85
+ { name: "disk_used_gb", def: "REAL" },
86
+ { name: "disk_avail_gb", def: "REAL" },
87
+ { name: "process_rss_bytes", def: "INTEGER" },
88
+ { name: "process_rss_mb", def: "REAL" },
89
+ { name: "process_cpu_pct", def: "REAL" },
90
+ { name: "process_uptime_seconds", def: "REAL" },
91
+ { name: "process_in_flight_count", def: "INTEGER" },
76
92
  ]) {
77
93
  try { db.exec(`ALTER TABLE sessions ADD COLUMN ${col.name} ${col.def}`); } catch {}
78
94
  }
79
95
 
96
+ db.exec(`
97
+ CREATE TABLE IF NOT EXISTS agent_telemetry (
98
+ id TEXT PRIMARY KEY,
99
+ network_id TEXT NOT NULL DEFAULT 'default',
100
+ resume_id TEXT,
101
+ alias TEXT,
102
+ hostname TEXT,
103
+ ip TEXT,
104
+ cpu_load_1min REAL,
105
+ cpu_cores INTEGER,
106
+ mem_total_gb REAL,
107
+ mem_used_gb REAL,
108
+ mem_avail_gb REAL,
109
+ disk_total_gb REAL,
110
+ disk_used_gb REAL,
111
+ disk_avail_gb REAL,
112
+ process_rss_bytes INTEGER,
113
+ process_rss_mb REAL,
114
+ process_cpu_pct REAL,
115
+ process_uptime_seconds REAL,
116
+ process_in_flight_count INTEGER,
117
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
118
+ );
119
+
120
+ CREATE INDEX IF NOT EXISTS idx_agent_telemetry_host_time
121
+ ON agent_telemetry(network_id, hostname, ip, created_at);
122
+ CREATE INDEX IF NOT EXISTS idx_agent_telemetry_alias_time
123
+ ON agent_telemetry(network_id, alias, created_at);
124
+ `);
125
+
80
126
  // inbox: add in_reply_to, requires_response, expires_at, scope
81
127
  for (const col of [
82
128
  { name: "in_reply_to", def: "TEXT" },
@@ -392,6 +438,14 @@ function migrateSessionsNetworkAliasUnique() {
392
438
  mem_total_gb REAL,
393
439
  mem_used_gb REAL,
394
440
  mem_avail_gb REAL,
441
+ disk_total_gb REAL,
442
+ disk_used_gb REAL,
443
+ disk_avail_gb REAL,
444
+ process_rss_bytes INTEGER,
445
+ process_rss_mb REAL,
446
+ process_cpu_pct REAL,
447
+ process_uptime_seconds REAL,
448
+ process_in_flight_count INTEGER,
395
449
  network_id TEXT NOT NULL DEFAULT 'default',
396
450
  UNIQUE (network_id, alias)
397
451
  )
@@ -401,13 +455,17 @@ function migrateSessionsNetworkAliasUnique() {
401
455
  resume_id, alias, tmux_name, server, ip, hostname, agent, project_dir, version,
402
456
  status, task, output, progress, score, registered_at, updated_at, node_id,
403
457
  session_id, config_path, channels, last_seen_at, model, cpu_load_1min,
404
- cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb, network_id
458
+ cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb, disk_total_gb,
459
+ disk_used_gb, disk_avail_gb, process_rss_bytes, process_rss_mb, process_cpu_pct,
460
+ process_uptime_seconds, process_in_flight_count, network_id
405
461
  )
406
462
  SELECT
407
463
  resume_id, alias, tmux_name, server, ip, hostname, agent, project_dir, version,
408
464
  status, task, output, progress, score, registered_at, updated_at, node_id,
409
465
  session_id, config_path, channels, last_seen_at, model, cpu_load_1min,
410
- cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb,
466
+ cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb, disk_total_gb,
467
+ disk_used_gb, disk_avail_gb, process_rss_bytes, process_rss_mb, process_cpu_pct,
468
+ process_uptime_seconds, process_in_flight_count,
411
469
  COALESCE(NULLIF(network_id, ''), 'default')
412
470
  FROM sessions
413
471
  ORDER BY updated_at
package/src/index.ts CHANGED
@@ -229,6 +229,104 @@ function singleNetworkId(scope: RestNetworkScope): string | null {
229
229
  return null;
230
230
  }
231
231
 
232
+ function sqliteTime(date: Date): string {
233
+ return date.toISOString().replace("T", " ").slice(0, 19);
234
+ }
235
+
236
+ function parseSqliteTime(value: string | null | undefined): number {
237
+ if (!value) return 0;
238
+ const normalized = value.includes("T") ? value : `${value.replace(" ", "T")}Z`;
239
+ const ts = Date.parse(normalized);
240
+ return Number.isFinite(ts) ? ts : 0;
241
+ }
242
+
243
+ function cpuPct(load: number | null | undefined, cores: number | null | undefined): number | null {
244
+ if (typeof load !== "number" || typeof cores !== "number" || cores <= 0) return null;
245
+ return Math.round((load / cores) * 1000) / 10;
246
+ }
247
+
248
+ function serverAlertLevel(row: any): { level: "green" | "yellow" | "red"; alerts: string[] } {
249
+ const alerts: string[] = [];
250
+ const pct = cpuPct(row?.cpu_load_1min, row?.cpu_cores);
251
+ if (pct !== null && pct >= 80) alerts.push(`cpu ${pct}%`);
252
+ if (typeof row?.mem_avail_gb === "number" && row.mem_avail_gb < 0.5) alerts.push(`memory ${row.mem_avail_gb}GB available`);
253
+ if (typeof row?.disk_avail_gb === "number" && row.disk_avail_gb < 1) alerts.push(`disk ${row.disk_avail_gb}GB available`);
254
+ if (alerts.length > 0) return { level: "red", alerts };
255
+
256
+ if (pct !== null && pct >= 60) alerts.push(`cpu ${pct}%`);
257
+ if (typeof row?.mem_avail_gb === "number" && row.mem_avail_gb < 1) alerts.push(`memory ${row.mem_avail_gb}GB available`);
258
+ if (typeof row?.disk_avail_gb === "number" && row.disk_avail_gb < 5) alerts.push(`disk ${row.disk_avail_gb}GB available`);
259
+ return { level: alerts.length > 0 ? "yellow" : "green", alerts };
260
+ }
261
+
262
+ function agentHealthChip(status: unknown, lastSeen: string | null | undefined): "online" | "offline" | "stale" {
263
+ if (String(status || "").toLowerCase() === "offline") return "offline";
264
+ const ts = parseSqliteTime(lastSeen);
265
+ if (!ts || Date.now() - ts > 5 * 60 * 1000) return "stale";
266
+ return "online";
267
+ }
268
+
269
+ function bucketTelemetry(rows: any[], fromMs: number, bucketMs: number) {
270
+ const buckets = new Map<number, {
271
+ ts: number;
272
+ count: number;
273
+ cpu_pct_sum: number;
274
+ cpu_pct_count: number;
275
+ cpu_load_sum: number;
276
+ cpu_load_count: number;
277
+ mem_avail_min: number | null;
278
+ mem_used_max: number | null;
279
+ disk_avail_min: number | null;
280
+ disk_used_max: number | null;
281
+ }>();
282
+
283
+ for (const row of rows) {
284
+ const ts = parseSqliteTime(row.created_at);
285
+ if (!ts || ts < fromMs) continue;
286
+ const bucketTs = Math.floor(ts / bucketMs) * bucketMs;
287
+ const bucket = buckets.get(bucketTs) ?? {
288
+ ts: bucketTs,
289
+ count: 0,
290
+ cpu_pct_sum: 0,
291
+ cpu_pct_count: 0,
292
+ cpu_load_sum: 0,
293
+ cpu_load_count: 0,
294
+ mem_avail_min: null,
295
+ mem_used_max: null,
296
+ disk_avail_min: null,
297
+ disk_used_max: null,
298
+ };
299
+ bucket.count += 1;
300
+ const pct = cpuPct(row.cpu_load_1min, row.cpu_cores);
301
+ if (pct !== null) {
302
+ bucket.cpu_pct_sum += pct;
303
+ bucket.cpu_pct_count += 1;
304
+ }
305
+ if (typeof row.cpu_load_1min === "number") {
306
+ bucket.cpu_load_sum += row.cpu_load_1min;
307
+ bucket.cpu_load_count += 1;
308
+ }
309
+ if (typeof row.mem_avail_gb === "number") bucket.mem_avail_min = bucket.mem_avail_min === null ? row.mem_avail_gb : Math.min(bucket.mem_avail_min, row.mem_avail_gb);
310
+ if (typeof row.mem_used_gb === "number") bucket.mem_used_max = bucket.mem_used_max === null ? row.mem_used_gb : Math.max(bucket.mem_used_max, row.mem_used_gb);
311
+ if (typeof row.disk_avail_gb === "number") bucket.disk_avail_min = bucket.disk_avail_min === null ? row.disk_avail_gb : Math.min(bucket.disk_avail_min, row.disk_avail_gb);
312
+ if (typeof row.disk_used_gb === "number") bucket.disk_used_max = bucket.disk_used_max === null ? row.disk_used_gb : Math.max(bucket.disk_used_max, row.disk_used_gb);
313
+ buckets.set(bucketTs, bucket);
314
+ }
315
+
316
+ return Array.from(buckets.values())
317
+ .sort((a, b) => a.ts - b.ts)
318
+ .map((b) => ({
319
+ ts: new Date(b.ts).toISOString(),
320
+ count: b.count,
321
+ cpu_pct: b.cpu_pct_count ? Math.round((b.cpu_pct_sum / b.cpu_pct_count) * 10) / 10 : null,
322
+ cpu_load_1min: b.cpu_load_count ? Math.round((b.cpu_load_sum / b.cpu_load_count) * 100) / 100 : null,
323
+ mem_avail_gb: b.mem_avail_min,
324
+ mem_used_gb: b.mem_used_max,
325
+ disk_avail_gb: b.disk_avail_min,
326
+ disk_used_gb: b.disk_used_max,
327
+ }));
328
+ }
329
+
232
330
  function canRestWriteNetwork(authCtx: { userId: string; networkId: string | null } | null, networkId: string | null, isAdmin: boolean): boolean {
233
331
  if (!authCtx) return true; // legacy global token or open dev mode
234
332
  if (isAdmin) return true;
@@ -244,6 +342,7 @@ const TaskSchema = z.object({
244
342
  priority: z.enum(["high", "normal", "low"]).default("normal"),
245
343
  from: z.string().max(200).optional(),
246
344
  network_id: z.string().max(200).optional(),
345
+ parent_task_id: z.string().max(200).optional(),
247
346
  });
248
347
 
249
348
  const BroadcastSchema = z.object({
@@ -830,6 +929,25 @@ Bun.serve({
830
929
  ...s,
831
930
  model: s.model ?? null,
832
931
  runtime: normalizeRuntime(s.agent),
932
+ host: {
933
+ hostname: s.hostname ?? null,
934
+ ip: s.ip ?? null,
935
+ cpu_load_1min: s.cpu_load_1min ?? null,
936
+ cpu_cores: s.cpu_cores ?? null,
937
+ mem_total_gb: s.mem_total_gb ?? null,
938
+ mem_used_gb: s.mem_used_gb ?? null,
939
+ mem_avail_gb: s.mem_avail_gb ?? null,
940
+ disk_total_gb: s.disk_total_gb ?? null,
941
+ disk_used_gb: s.disk_used_gb ?? null,
942
+ disk_avail_gb: s.disk_avail_gb ?? null,
943
+ },
944
+ process_telemetry: {
945
+ rss_bytes: s.process_rss_bytes ?? null,
946
+ rss_mb: s.process_rss_mb ?? null,
947
+ cpu_pct: s.process_cpu_pct ?? null,
948
+ uptime_seconds: s.process_uptime_seconds ?? null,
949
+ in_flight_count: s.process_in_flight_count ?? null,
950
+ },
833
951
  }));
834
952
  const summary = sessions.reduce((acc: any, session: any) => {
835
953
  const raw = String(session.status || "").toLowerCase();
@@ -894,6 +1012,127 @@ Bun.serve({
894
1012
  return withCors(req, Response.json(Array.from(grouped.values())));
895
1013
  }
896
1014
 
1015
+ const serverDetailMatch = url.pathname.match(/^\/api\/server\/([^/]+)\/(health|agents)$/);
1016
+ if (serverDetailMatch && req.method === "GET") {
1017
+ const host = decodeURIComponent(serverDetailMatch[1]);
1018
+ const detailKind = serverDetailMatch[2];
1019
+ if (!host) return withCors(req, Response.json({ ok: false, error: "host required" }, { status: 400 }));
1020
+
1021
+ const cutoff = sqliteTime(new Date(Date.now() - 10 * 60 * 1000));
1022
+ const staleParams: any[] = [cutoff];
1023
+ let staleSql = "UPDATE sessions SET status = 'offline' WHERE updated_at < ?1 AND status != 'offline'";
1024
+ staleSql = addNetworkScope(staleSql, staleParams, restScope);
1025
+ db.run(staleSql, staleParams);
1026
+
1027
+ if (detailKind === "agents") {
1028
+ const params: any[] = [host, host];
1029
+ let sql = `
1030
+ SELECT alias, agent, status, task, progress, model, hostname, ip,
1031
+ cpu_load_1min, cpu_cores, mem_avail_gb, mem_used_gb, mem_total_gb,
1032
+ disk_avail_gb, disk_used_gb, disk_total_gb,
1033
+ process_rss_bytes, process_rss_mb, process_cpu_pct, process_uptime_seconds, process_in_flight_count,
1034
+ COALESCE(last_seen_at, updated_at) AS last_seen
1035
+ FROM sessions
1036
+ WHERE (hostname = ?1 OR ip = ?2)
1037
+ `;
1038
+ sql = addNetworkScope(sql, params, restScope);
1039
+ sql += " ORDER BY alias";
1040
+ const agents = db.all<any>(sql, ...params).map((s) => ({
1041
+ alias: s.alias,
1042
+ runtime: normalizeRuntime(s.agent),
1043
+ raw_agent: s.agent ?? null,
1044
+ model: s.model ?? null,
1045
+ status: s.status ?? "offline",
1046
+ task: s.task ?? null,
1047
+ progress: s.progress ?? 0,
1048
+ last_seen: s.last_seen ?? null,
1049
+ health: agentHealthChip(s.status, s.last_seen),
1050
+ hostname: s.hostname ?? null,
1051
+ ip: s.ip ?? null,
1052
+ telemetry: {
1053
+ cpu_load_1min: s.cpu_load_1min ?? null,
1054
+ cpu_cores: s.cpu_cores ?? null,
1055
+ cpu_pct: cpuPct(s.cpu_load_1min, s.cpu_cores),
1056
+ mem_total_gb: s.mem_total_gb ?? null,
1057
+ mem_used_gb: s.mem_used_gb ?? null,
1058
+ mem_avail_gb: s.mem_avail_gb ?? null,
1059
+ disk_total_gb: s.disk_total_gb ?? null,
1060
+ disk_used_gb: s.disk_used_gb ?? null,
1061
+ disk_avail_gb: s.disk_avail_gb ?? null,
1062
+ process_rss_bytes: s.process_rss_bytes ?? null,
1063
+ process_rss_mb: s.process_rss_mb ?? null,
1064
+ process_cpu_pct: s.process_cpu_pct ?? null,
1065
+ process_uptime_seconds: s.process_uptime_seconds ?? null,
1066
+ process_in_flight_count: s.process_in_flight_count ?? null,
1067
+ },
1068
+ process_telemetry: {
1069
+ rss_bytes: s.process_rss_bytes ?? null,
1070
+ rss_mb: s.process_rss_mb ?? null,
1071
+ cpu_pct: s.process_cpu_pct ?? null,
1072
+ uptime_seconds: s.process_uptime_seconds ?? null,
1073
+ in_flight_count: s.process_in_flight_count ?? null,
1074
+ },
1075
+ }));
1076
+ if (agents.length === 0) return withCors(req, Response.json({ ok: false, error: "server not found" }, { status: 404 }));
1077
+ return withCors(req, Response.json({ ok: true, host, agent_count: agents.length, agents }));
1078
+ }
1079
+
1080
+ const latestParams: any[] = [host, host];
1081
+ let latestSql = `
1082
+ SELECT hostname, ip, COUNT(*) OVER () AS agent_count,
1083
+ cpu_load_1min, cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb,
1084
+ disk_total_gb, disk_used_gb, disk_avail_gb,
1085
+ COALESCE(last_seen_at, updated_at) AS last_seen
1086
+ FROM sessions
1087
+ WHERE (hostname = ?1 OR ip = ?2)
1088
+ `;
1089
+ latestSql = addNetworkScope(latestSql, latestParams, restScope);
1090
+ latestSql += " ORDER BY COALESCE(last_seen_at, updated_at) DESC LIMIT 1";
1091
+ const latest = db.get<any>(latestSql, ...latestParams);
1092
+ if (!latest) return withCors(req, Response.json({ ok: false, error: "server not found" }, { status: 404 }));
1093
+
1094
+ const since24h = sqliteTime(new Date(Date.now() - 24 * 60 * 60 * 1000));
1095
+ const histParams: any[] = [host, host, since24h];
1096
+ let histSql = `
1097
+ SELECT created_at, cpu_load_1min, cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb,
1098
+ disk_total_gb, disk_used_gb, disk_avail_gb
1099
+ FROM agent_telemetry
1100
+ WHERE (hostname = ?1 OR ip = ?2) AND created_at >= ?3
1101
+ `;
1102
+ histSql = addNetworkScope(histSql, histParams, restScope);
1103
+ histSql += " ORDER BY created_at ASC";
1104
+ const historyRows = db.all<any>(histSql, ...histParams);
1105
+ const now = Date.now();
1106
+ const alert = serverAlertLevel(latest);
1107
+
1108
+ return withCors(req, Response.json({
1109
+ ok: true,
1110
+ host,
1111
+ hostname: latest.hostname ?? null,
1112
+ ip: latest.ip ?? null,
1113
+ agent_count: latest.agent_count ?? 0,
1114
+ alert_level: alert.level,
1115
+ alerts: alert.alerts,
1116
+ latest: {
1117
+ cpu_load_1min: latest.cpu_load_1min ?? null,
1118
+ cpu_cores: latest.cpu_cores ?? null,
1119
+ cpu_pct: cpuPct(latest.cpu_load_1min, latest.cpu_cores),
1120
+ mem_total_gb: latest.mem_total_gb ?? null,
1121
+ mem_used_gb: latest.mem_used_gb ?? null,
1122
+ mem_avail_gb: latest.mem_avail_gb ?? null,
1123
+ disk_total_gb: latest.disk_total_gb ?? null,
1124
+ disk_used_gb: latest.disk_used_gb ?? null,
1125
+ disk_avail_gb: latest.disk_avail_gb ?? null,
1126
+ last_seen: latest.last_seen ?? null,
1127
+ },
1128
+ history: {
1129
+ "5m": bucketTelemetry(historyRows, now - 5 * 60 * 1000, 60 * 1000),
1130
+ "1h": bucketTelemetry(historyRows, now - 60 * 60 * 1000, 5 * 60 * 1000),
1131
+ "24h": bucketTelemetry(historyRows, now - 24 * 60 * 60 * 1000, 60 * 60 * 1000),
1132
+ },
1133
+ }));
1134
+ }
1135
+
897
1136
  // ── REST: send task ──
898
1137
  if (url.pathname === "/api/task" && req.method === "POST") {
899
1138
  let raw: unknown;
@@ -939,9 +1178,9 @@ Bun.serve({
939
1178
  [id, body.alias, body.priority, body.task, fromSession, taskNetId]
940
1179
  );
941
1180
  db.run(
942
- `INSERT INTO tasks (task_id, from_name, to_name, priority, status, content, requires_response, created_at, delivered_at, expires_at, network_id)
943
- VALUES (?1, ?2, ?3, ?4, 'delivered', ?5, 'reply', datetime('now'), datetime('now'), datetime('now', ?6), ?7)`,
944
- [id, fromSession, body.alias, body.priority, body.task, `+${ttlSeconds} seconds`, taskNetId]
1181
+ `INSERT INTO tasks (task_id, from_name, to_name, priority, status, content, requires_response, created_at, delivered_at, expires_at, network_id, parent_task_id)
1182
+ VALUES (?1, ?2, ?3, ?4, 'delivered', ?5, 'reply', datetime('now'), datetime('now'), datetime('now', ?6), ?7, ?8)`,
1183
+ [id, fromSession, body.alias, body.priority, body.task, `+${ttlSeconds} seconds`, taskNetId, body.parent_task_id ?? null]
945
1184
  );
946
1185
  // Touch session row so the dashboard reflects "task in flight"
947
1186
  // immediately, without waiting for the agent's report_status to
@@ -962,7 +1201,7 @@ Bun.serve({
962
1201
  if (taskNetId) { sessionSql += " AND network_id = ?2"; sessionParams.push(taskNetId); }
963
1202
  const targetSession = db.get<any>(sessionSql, ...sessionParams);
964
1203
  if (targetSession) pushEvent(body.alias, { type: "new_task", inbox_count: pending?.cnt ?? 1, priority: body.priority, from: fromSession }, taskNetId);
965
- return withCors(req, Response.json({ ok: true, message_id: id }));
1204
+ return withCors(req, Response.json({ ok: true, task_id: id, message_id: id }));
966
1205
  }
967
1206
 
968
1207
  // ── REST: broadcast ──
@@ -1222,6 +1461,21 @@ Bun.serve({
1222
1461
  return withCors(req, Response.json({ ok: true, nodes: rows, count: rows.length }));
1223
1462
  }
1224
1463
 
1464
+ // ── REST: single task lookup (V2) ──
1465
+ const taskPathMatch = url.pathname.match(/^\/api\/tasks?\/([^/]+)$/);
1466
+ if (taskPathMatch && req.method === "GET") {
1467
+ const taskId = decodeURIComponent(taskPathMatch[1] ?? "");
1468
+ const params: any[] = [taskId];
1469
+ let sql = "SELECT * FROM tasks WHERE task_id = ?1";
1470
+ sql = addNetworkScope(sql, params, restScope);
1471
+ sql += " LIMIT 1";
1472
+ const task = db.get(sql, ...params);
1473
+ if (!task) {
1474
+ return withCors(req, Response.json({ ok: false, error: "task_not_found", task_id: taskId }, { status: 404 }));
1475
+ }
1476
+ return withCors(req, Response.json({ ok: true, task }));
1477
+ }
1478
+
1225
1479
  // ── REST: tasks table (V2) ──
1226
1480
  if (url.pathname === "/api/tasks") {
1227
1481
  const taskId = url.searchParams.get("task_id");
package/src/tools.ts CHANGED
@@ -127,9 +127,20 @@ export function registerTools(server: McpServer, clientIP?: string, enforceNetwo
127
127
  mem_total_gb: z.number().nullable().optional(),
128
128
  mem_used_gb: z.number().nullable().optional(),
129
129
  mem_avail_gb: z.number().nullable().optional(),
130
+ disk_total_gb: z.number().nullable().optional(),
131
+ disk_used_gb: z.number().nullable().optional(),
132
+ disk_avail_gb: z.number().nullable().optional(),
130
133
  }).optional().describe("Host telemetry reported by agent-node"),
134
+ process_telemetry: z.object({
135
+ rss_bytes: z.number().nullable().optional(),
136
+ rss_mb: z.number().nullable().optional(),
137
+ rss: z.number().nullable().optional(),
138
+ cpu_pct: z.number().nullable().optional(),
139
+ uptime_seconds: z.number().nullable().optional(),
140
+ in_flight_count: z.number().nullable().optional(),
141
+ }).optional().describe("Per-agent process telemetry reported by agent-node"),
131
142
  },
132
- async ({ resume_id, alias, status, task, output, score, progress, server: srv, hostname: hn, agent: ag, project_dir: pd, version: ver, tmux_name: tmux, node_id, session_id, config_path, channels, model: mdl, node_name: nn, network_id: netId, host }) => {
143
+ async ({ resume_id, alias, status, task, output, score, progress, server: srv, hostname: hn, agent: ag, project_dir: pd, version: ver, tmux_name: tmux, node_id, session_id, config_path, channels, model: mdl, node_name: nn, network_id: netId, host, process_telemetry: proc }) => {
133
144
  const effectiveNetId = getNetworkId(netId);
134
145
  const sessionNetId = effectiveNetId ?? "default";
135
146
  if (!callerTokenIsNetwork || !enforceNetworkId) {
@@ -147,13 +158,42 @@ export function registerTools(server: McpServer, clientIP?: string, enforceNetwo
147
158
  const memTotalGb = typeof host?.mem_total_gb === "number" ? host.mem_total_gb : null;
148
159
  const memUsedGb = typeof host?.mem_used_gb === "number" ? host.mem_used_gb : null;
149
160
  const memAvailGb = typeof host?.mem_avail_gb === "number" ? host.mem_avail_gb : null;
161
+ const diskTotalGb = typeof host?.disk_total_gb === "number" ? host.disk_total_gb : null;
162
+ const diskUsedGb = typeof host?.disk_used_gb === "number" ? host.disk_used_gb : null;
163
+ const diskAvailGb = typeof host?.disk_avail_gb === "number" ? host.disk_avail_gb : null;
164
+ const processRssBytes = typeof proc?.rss_bytes === "number" ? proc.rss_bytes : (typeof proc?.rss === "number" ? proc.rss : null);
165
+ const processRssMb = typeof proc?.rss_mb === "number"
166
+ ? proc.rss_mb
167
+ : (typeof processRssBytes === "number" ? Math.round((processRssBytes / 1024 / 1024) * 10) / 10 : null);
168
+ const processCpuPct = typeof proc?.cpu_pct === "number" ? proc.cpu_pct : null;
169
+ const processUptimeSeconds = typeof proc?.uptime_seconds === "number" ? proc.uptime_seconds : null;
170
+ const processInFlightCount = typeof proc?.in_flight_count === "number" ? proc.in_flight_count : null;
171
+ const statusHostTelemetry = host ? {
172
+ hostname: hostHostname,
173
+ ip: hostIp,
174
+ cpu_load_1min: cpuLoad1m,
175
+ cpu_cores: cpuCores,
176
+ mem_total_gb: memTotalGb,
177
+ mem_used_gb: memUsedGb,
178
+ mem_avail_gb: memAvailGb,
179
+ disk_total_gb: diskTotalGb,
180
+ disk_used_gb: diskUsedGb,
181
+ disk_avail_gb: diskAvailGb,
182
+ } : null;
183
+ const statusProcessTelemetry = proc ? {
184
+ rss_bytes: processRssBytes,
185
+ rss_mb: processRssMb,
186
+ cpu_pct: processCpuPct,
187
+ uptime_seconds: processUptimeSeconds,
188
+ in_flight_count: processInFlightCount,
189
+ } : null;
150
190
 
151
191
  db.transaction(() => {
152
192
  // Only delete same-alias sessions within the same network
153
193
  db.run("DELETE FROM sessions WHERE alias = ?1 AND resume_id != ?2 AND network_id = ?3", [alias, resume_id, sessionNetId]);
154
194
  db.run(
155
- `INSERT INTO sessions (resume_id, alias, tmux_name, server, ip, hostname, agent, project_dir, version, status, task, output, progress, score, node_id, session_id, config_path, channels, network_id, model, cpu_load_1min, cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb, last_seen_at, updated_at)
156
- VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, datetime('now'), datetime('now'))
195
+ `INSERT INTO sessions (resume_id, alias, tmux_name, server, ip, hostname, agent, project_dir, version, status, task, output, progress, score, node_id, session_id, config_path, channels, network_id, model, cpu_load_1min, cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb, disk_total_gb, disk_used_gb, disk_avail_gb, process_rss_bytes, process_rss_mb, process_cpu_pct, process_uptime_seconds, process_in_flight_count, last_seen_at, updated_at)
196
+ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27, ?28, ?29, ?30, ?31, ?32, ?33, datetime('now'), datetime('now'))
157
197
  ON CONFLICT(resume_id) DO UPDATE SET
158
198
  alias = COALESCE(?2, sessions.alias), tmux_name = COALESCE(?3, sessions.tmux_name),
159
199
  server = COALESCE(?4, sessions.server), ip = COALESCE(?5, sessions.ip),
@@ -170,10 +210,33 @@ export function registerTools(server: McpServer, clientIP?: string, enforceNetwo
170
210
  mem_total_gb = COALESCE(?23, sessions.mem_total_gb),
171
211
  mem_used_gb = COALESCE(?24, sessions.mem_used_gb),
172
212
  mem_avail_gb = COALESCE(?25, sessions.mem_avail_gb),
213
+ disk_total_gb = COALESCE(?26, sessions.disk_total_gb),
214
+ disk_used_gb = COALESCE(?27, sessions.disk_used_gb),
215
+ disk_avail_gb = COALESCE(?28, sessions.disk_avail_gb),
216
+ process_rss_bytes = COALESCE(?29, sessions.process_rss_bytes),
217
+ process_rss_mb = COALESCE(?30, sessions.process_rss_mb),
218
+ process_cpu_pct = COALESCE(?31, sessions.process_cpu_pct),
219
+ process_uptime_seconds = COALESCE(?32, sessions.process_uptime_seconds),
220
+ process_in_flight_count = COALESCE(?33, sessions.process_in_flight_count),
173
221
  last_seen_at = datetime('now'), updated_at = datetime('now')`,
174
- [resume_id, alias, tmux ?? null, srv ?? null, hostIp, hostHostname, ag ?? null, pd ?? null, ver ?? null, status, task ?? null, trimmedOutput ?? null, progress ?? null, score ?? null, node_id ?? null, session_id ?? null, config_path ?? null, channels ?? null, sessionNetId, mdl ?? null, cpuLoad1m, cpuCores, memTotalGb, memUsedGb, memAvailGb]
222
+ [resume_id, alias, tmux ?? null, srv ?? null, hostIp, hostHostname, ag ?? null, pd ?? null, ver ?? null, status, task ?? null, trimmedOutput ?? null, progress ?? null, score ?? null, node_id ?? null, session_id ?? null, config_path ?? null, channels ?? null, sessionNetId, mdl ?? null, cpuLoad1m, cpuCores, memTotalGb, memUsedGb, memAvailGb, diskTotalGb, diskUsedGb, diskAvailGb, processRssBytes, processRssMb, processCpuPct, processUptimeSeconds, processInFlightCount]
175
223
  );
224
+ if (host || proc) {
225
+ db.run(
226
+ `INSERT INTO agent_telemetry (id, network_id, resume_id, alias, hostname, ip, cpu_load_1min, cpu_cores, mem_total_gb, mem_used_gb, mem_avail_gb, disk_total_gb, disk_used_gb, disk_avail_gb, process_rss_bytes, process_rss_mb, process_cpu_pct, process_uptime_seconds, process_in_flight_count, created_at)
227
+ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, datetime('now'))`,
228
+ [uuidv4(), sessionNetId, resume_id, alias, hostHostname, hostIp, cpuLoad1m, cpuCores, memTotalGb, memUsedGb, memAvailGb, diskTotalGb, diskUsedGb, diskAvailGb, processRssBytes, processRssMb, processCpuPct, processUptimeSeconds, processInFlightCount]
229
+ );
230
+ }
176
231
  });
232
+ pushEvent(alias, {
233
+ type: "status_update",
234
+ alias,
235
+ status,
236
+ progress: progress ?? null,
237
+ host: statusHostTelemetry,
238
+ process_telemetry: statusProcessTelemetry,
239
+ }, sessionNetId);
177
240
 
178
241
  // V2: sync tasks table — report_status(working) → tasks.running
179
242
  if (status === "working" && task) {