@agent-relay/dashboard-server 2.0.80 → 2.0.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/server.js +271 -61
  2. package/dist/server.js.map +1 -1
  3. package/out/404.html +1 -1
  4. package/out/about.html +1 -1
  5. package/out/about.txt +1 -1
  6. package/out/app/onboarding.html +1 -1
  7. package/out/app/onboarding.txt +1 -1
  8. package/out/app.html +1 -1
  9. package/out/app.txt +1 -1
  10. package/out/blog/go-to-bed-wake-up-to-a-finished-product.html +1 -1
  11. package/out/blog/go-to-bed-wake-up-to-a-finished-product.txt +1 -1
  12. package/out/blog/let-them-cook-multi-agent-orchestration.html +1 -1
  13. package/out/blog/let-them-cook-multi-agent-orchestration.txt +1 -1
  14. package/out/blog.html +1 -1
  15. package/out/blog.txt +1 -1
  16. package/out/careers.html +1 -1
  17. package/out/careers.txt +1 -1
  18. package/out/changelog.html +1 -1
  19. package/out/changelog.txt +1 -1
  20. package/out/cloud/link.html +1 -1
  21. package/out/cloud/link.txt +1 -1
  22. package/out/complete-profile.html +1 -1
  23. package/out/complete-profile.txt +1 -1
  24. package/out/connect-repos.html +1 -1
  25. package/out/connect-repos.txt +1 -1
  26. package/out/contact.html +1 -1
  27. package/out/contact.txt +1 -1
  28. package/out/docs.html +1 -1
  29. package/out/docs.txt +1 -1
  30. package/out/history.html +1 -1
  31. package/out/history.txt +1 -1
  32. package/out/index.html +1 -1
  33. package/out/index.txt +1 -1
  34. package/out/login.html +1 -1
  35. package/out/login.txt +1 -1
  36. package/out/metrics.html +1 -1
  37. package/out/metrics.txt +1 -1
  38. package/out/pricing.html +1 -1
  39. package/out/pricing.txt +1 -1
  40. package/out/privacy.html +1 -1
  41. package/out/privacy.txt +1 -1
  42. package/out/providers/setup/claude.html +1 -1
  43. package/out/providers/setup/claude.txt +1 -1
  44. package/out/providers/setup/codex.html +1 -1
  45. package/out/providers/setup/codex.txt +1 -1
  46. package/out/providers/setup/cursor.html +1 -1
  47. package/out/providers/setup/cursor.txt +1 -1
  48. package/out/providers.html +1 -1
  49. package/out/providers.txt +1 -1
  50. package/out/security.html +1 -1
  51. package/out/security.txt +1 -1
  52. package/out/signup.html +1 -1
  53. package/out/signup.txt +1 -1
  54. package/out/terms.html +1 -1
  55. package/out/terms.txt +1 -1
  56. package/package.json +1 -1
  57. /package/out/_next/static/{AqelRhy1vr2nBUcU0Iqcp → dYlczDQI12PIQ3tqq3N4Y}/_buildManifest.js +0 -0
  58. /package/out/_next/static/{AqelRhy1vr2nBUcU0Iqcp → dYlczDQI12PIQ3tqq3N4Y}/_ssgManifest.js +0 -0
package/dist/server.js CHANGED
@@ -5,7 +5,7 @@ import path from 'path';
5
5
  import fs from 'fs';
6
6
  import os from 'os';
7
7
  import crypto from 'crypto';
8
- import { exec, execFile } from 'child_process';
8
+ import { exec, execFile, execSync } from 'child_process';
9
9
  import { fileURLToPath } from 'url';
10
10
  import { createStorageAdapter } from '@agent-relay/storage/adapter';
11
11
  import { RelayClient } from '@agent-relay/sdk';
@@ -4183,6 +4183,250 @@ export async function startDashboard(portOrOptions, dataDirArg, teamDirArg, dbPa
4183
4183
  res.status(500).send('# Error computing metrics\n');
4184
4184
  }
4185
4185
  });
4186
+ const isCloudMetricsEnabled = process.env.RELAY_CLOUD_ENABLED === 'true';
4187
+ const isLinux = process.platform === 'linux';
4188
+ const clockTicksPerSecond = (() => {
4189
+ try {
4190
+ const output = execSync('getconf CLK_TCK', { encoding: 'utf8' });
4191
+ const ticks = parseInt(output.trim(), 10);
4192
+ return Number.isFinite(ticks) && ticks > 0 ? ticks : 100;
4193
+ }
4194
+ catch {
4195
+ return 100;
4196
+ }
4197
+ })();
4198
+ const procTreeCpuSamples = new Map();
4199
+ const psTreeSnapshotCache = {
4200
+ timestampMs: 0,
4201
+ processByPid: new Map(),
4202
+ childrenByPid: new Map(),
4203
+ };
4204
+ const psTreeCacheTtlMs = 1000;
4205
+ const getProcStatusRssBytes = (pid) => {
4206
+ try {
4207
+ const status = fs.readFileSync(`/proc/${pid}/status`, 'utf8');
4208
+ const rssMatch = status.match(/VmRSS:\s+(\d+)\s+kB/);
4209
+ if (rssMatch) {
4210
+ return parseInt(rssMatch[1], 10) * 1024;
4211
+ }
4212
+ }
4213
+ catch {
4214
+ return 0;
4215
+ }
4216
+ return 0;
4217
+ };
4218
+ const getProcStatJiffies = (pid) => {
4219
+ try {
4220
+ const statText = fs.readFileSync(`/proc/${pid}/stat`, 'utf8');
4221
+ const closeParen = statText.lastIndexOf(')');
4222
+ if (closeParen < 0)
4223
+ return 0;
4224
+ const parts = statText.slice(closeParen + 1).trim().split(/\s+/);
4225
+ const utime = parseInt(parts[11] ?? '0', 10);
4226
+ const stime = parseInt(parts[12] ?? '0', 10);
4227
+ const utimeValue = Number.isFinite(utime) ? utime : 0;
4228
+ const stimeValue = Number.isFinite(stime) ? stime : 0;
4229
+ return utimeValue + stimeValue;
4230
+ }
4231
+ catch {
4232
+ return 0;
4233
+ }
4234
+ };
4235
+ const getProcTreePids = (rootPid) => {
4236
+ const toProcess = [rootPid];
4237
+ const seen = new Set();
4238
+ const result = [];
4239
+ while (toProcess.length > 0) {
4240
+ const pid = toProcess.shift();
4241
+ if (pid === undefined || seen.has(pid))
4242
+ continue;
4243
+ const procPath = `/proc/${pid}`;
4244
+ if (!fs.existsSync(procPath))
4245
+ continue;
4246
+ seen.add(pid);
4247
+ result.push(pid);
4248
+ const childrenPath = `/proc/${pid}/task/${pid}/children`;
4249
+ if (fs.existsSync(childrenPath)) {
4250
+ const childrenText = fs.readFileSync(childrenPath, 'utf8').trim();
4251
+ if (childrenText) {
4252
+ for (const child of childrenText.split(/\s+/)) {
4253
+ const childPid = parseInt(child, 10);
4254
+ if (Number.isFinite(childPid) && childPid > 0) {
4255
+ toProcess.push(childPid);
4256
+ }
4257
+ }
4258
+ }
4259
+ }
4260
+ }
4261
+ return result;
4262
+ };
4263
+ const getPsTreeSnapshot = () => {
4264
+ const nowMs = Date.now();
4265
+ if (nowMs - psTreeSnapshotCache.timestampMs <= psTreeCacheTtlMs) {
4266
+ return {
4267
+ processByPid: psTreeSnapshotCache.processByPid,
4268
+ childrenByPid: psTreeSnapshotCache.childrenByPid,
4269
+ };
4270
+ }
4271
+ try {
4272
+ const output = execSync('ps -axo pid=,ppid=,rss=,pcpu=', {
4273
+ encoding: 'utf8',
4274
+ timeout: 3000,
4275
+ }).trim();
4276
+ if (!output) {
4277
+ return {
4278
+ processByPid: new Map(),
4279
+ childrenByPid: new Map(),
4280
+ };
4281
+ }
4282
+ const processByPid = new Map();
4283
+ const childrenByPid = new Map();
4284
+ for (const line of output.split('\n')) {
4285
+ const parts = line.trim().split(/\s+/);
4286
+ if (parts.length < 4)
4287
+ continue;
4288
+ const pid = parseInt(parts[0], 10);
4289
+ const ppid = parseInt(parts[1], 10);
4290
+ const rssBytes = parseInt(parts[2], 10) * 1024;
4291
+ const cpuPercent = parseFloat(parts[3]);
4292
+ if (!Number.isFinite(pid) || pid <= 0)
4293
+ continue;
4294
+ processByPid.set(pid, {
4295
+ ppid: Number.isFinite(ppid) ? ppid : 0,
4296
+ rssBytes: Number.isFinite(rssBytes) ? rssBytes : 0,
4297
+ cpuPercent: Number.isFinite(cpuPercent) ? cpuPercent : 0,
4298
+ });
4299
+ const children = childrenByPid.get(Number.isFinite(ppid) ? ppid : 0) || [];
4300
+ children.push(pid);
4301
+ childrenByPid.set(Number.isFinite(ppid) ? ppid : 0, children);
4302
+ }
4303
+ psTreeSnapshotCache.timestampMs = nowMs;
4304
+ psTreeSnapshotCache.processByPid = processByPid;
4305
+ psTreeSnapshotCache.childrenByPid = childrenByPid;
4306
+ return { processByPid, childrenByPid };
4307
+ }
4308
+ catch {
4309
+ return {
4310
+ processByPid: new Map(),
4311
+ childrenByPid: new Map(),
4312
+ };
4313
+ }
4314
+ };
4315
+ const getPsTreeUsage = (rootPid) => {
4316
+ try {
4317
+ const { processByPid, childrenByPid } = getPsTreeSnapshot();
4318
+ if (processByPid.size === 0) {
4319
+ return { rssBytes: 0, cpuPercent: 0 };
4320
+ }
4321
+ const queue = [rootPid];
4322
+ const seen = new Set();
4323
+ let totalRssBytes = 0;
4324
+ let totalCpuPercent = 0;
4325
+ while (queue.length > 0) {
4326
+ const pid = queue.shift();
4327
+ if (pid === undefined || seen.has(pid))
4328
+ continue;
4329
+ const node = processByPid.get(pid);
4330
+ if (!node)
4331
+ continue;
4332
+ seen.add(pid);
4333
+ totalRssBytes += node.rssBytes;
4334
+ totalCpuPercent += node.cpuPercent;
4335
+ const children = childrenByPid.get(pid);
4336
+ if (children) {
4337
+ queue.push(...children);
4338
+ }
4339
+ }
4340
+ return { rssBytes: totalRssBytes, cpuPercent: totalCpuPercent };
4341
+ }
4342
+ catch {
4343
+ return { rssBytes: 0, cpuPercent: 0 };
4344
+ }
4345
+ };
4346
+ const getLegacyProcUsage = (rootPid) => {
4347
+ let rssBytes = 0;
4348
+ let cpuPercent = 0;
4349
+ try {
4350
+ const statusPath = `/proc/${rootPid}/status`;
4351
+ if (fs.existsSync(statusPath)) {
4352
+ const status = fs.readFileSync(statusPath, 'utf8');
4353
+ // Parse VmRSS (Resident Set Size) from /proc/[pid]/status
4354
+ const rssMatch = status.match(/VmRSS:\s+(\d+)\s+kB/);
4355
+ if (rssMatch) {
4356
+ rssBytes = parseInt(rssMatch[1], 10) * 1024;
4357
+ }
4358
+ }
4359
+ else if (process.platform === 'darwin') {
4360
+ // macOS: Use ps command to get RSS and CPU
4361
+ const psOutput = execSync(`ps -o rss=,pcpu= -p ${rootPid}`, { encoding: 'utf8', timeout: 3000 }).trim();
4362
+ if (psOutput) {
4363
+ const [rssStr, cpuStr] = psOutput.split(/\s+/);
4364
+ if (rssStr)
4365
+ rssBytes = parseInt(rssStr, 10) * 1024;
4366
+ if (cpuStr)
4367
+ cpuPercent = parseFloat(cpuStr);
4368
+ }
4369
+ }
4370
+ }
4371
+ catch {
4372
+ // Process may have exited or command failed
4373
+ }
4374
+ return {
4375
+ rssBytes,
4376
+ cpuPercent,
4377
+ };
4378
+ };
4379
+ const getCloudProcTreeUsage = (rootPid) => {
4380
+ if (!Number.isFinite(rootPid) || rootPid <= 0) {
4381
+ return { rssBytes: 0, cpuPercent: 0 };
4382
+ }
4383
+ try {
4384
+ const statusPath = `/proc/${rootPid}/status`;
4385
+ if (!fs.existsSync(statusPath)) {
4386
+ procTreeCpuSamples.delete(rootPid);
4387
+ return getPsTreeUsage(rootPid);
4388
+ }
4389
+ const pids = getProcTreePids(rootPid);
4390
+ if (pids.length === 0) {
4391
+ procTreeCpuSamples.delete(rootPid);
4392
+ return { rssBytes: 0, cpuPercent: 0 };
4393
+ }
4394
+ let totalRssBytes = 0;
4395
+ let totalJiffies = 0;
4396
+ for (const pid of pids) {
4397
+ totalRssBytes += getProcStatusRssBytes(pid);
4398
+ totalJiffies += getProcStatJiffies(pid);
4399
+ }
4400
+ const nowMs = Date.now();
4401
+ const previous = procTreeCpuSamples.get(rootPid);
4402
+ procTreeCpuSamples.set(rootPid, { timestampMs: nowMs, totalJiffies });
4403
+ if (!previous || nowMs <= previous.timestampMs) {
4404
+ return { rssBytes: totalRssBytes, cpuPercent: 0 };
4405
+ }
4406
+ const elapsedMs = nowMs - previous.timestampMs;
4407
+ const elapsedJiffies = totalJiffies - previous.totalJiffies;
4408
+ if (elapsedJiffies <= 0 || elapsedMs <= 0) {
4409
+ return { rssBytes: totalRssBytes, cpuPercent: 0 };
4410
+ }
4411
+ const cpuPercent = Math.max(0, (elapsedJiffies / clockTicksPerSecond / (elapsedMs / 1000)) * 100);
4412
+ return {
4413
+ rssBytes: totalRssBytes,
4414
+ cpuPercent,
4415
+ };
4416
+ }
4417
+ catch {
4418
+ return { rssBytes: 0, cpuPercent: 0 };
4419
+ }
4420
+ };
4421
+ const getProcTreeUsage = (rootPid) => {
4422
+ if (!isCloudMetricsEnabled) {
4423
+ return getLegacyProcUsage(rootPid);
4424
+ }
4425
+ if (!isLinux) {
4426
+ return getPsTreeUsage(rootPid);
4427
+ }
4428
+ return getCloudProcTreeUsage(rootPid);
4429
+ };
4186
4430
  // ===== Agent Memory Metrics API =====
4187
4431
  /**
4188
4432
  * GET /api/metrics/agents - Detailed agent memory and resource metrics
@@ -4198,33 +4442,9 @@ export async function startDashboard(portOrOptions, dataDirArg, teamDirArg, dbPa
4198
4442
  let rssBytes = 0;
4199
4443
  let cpuPercent = 0;
4200
4444
  if (worker.pid) {
4201
- try {
4202
- // Try /proc filesystem first (Linux)
4203
- const statusPath = `/proc/${worker.pid}/status`;
4204
- if (fs.existsSync(statusPath)) {
4205
- const status = fs.readFileSync(statusPath, 'utf8');
4206
- // Parse VmRSS (Resident Set Size) from /proc/[pid]/status
4207
- const rssMatch = status.match(/VmRSS:\s+(\d+)\s+kB/);
4208
- if (rssMatch) {
4209
- rssBytes = parseInt(rssMatch[1], 10) * 1024; // Convert kB to bytes
4210
- }
4211
- }
4212
- else if (process.platform === 'darwin') {
4213
- // macOS: Use ps command to get RSS and CPU
4214
- const { execSync } = await import('child_process');
4215
- const psOutput = execSync(`ps -o rss=,pcpu= -p ${worker.pid}`, { encoding: 'utf8' }).trim();
4216
- if (psOutput) {
4217
- const [rssStr, cpuStr] = psOutput.split(/\s+/);
4218
- if (rssStr)
4219
- rssBytes = parseInt(rssStr, 10) * 1024; // ps reports RSS in KB
4220
- if (cpuStr)
4221
- cpuPercent = parseFloat(cpuStr);
4222
- }
4223
- }
4224
- }
4225
- catch {
4226
- // Process may have exited or command failed
4227
- }
4445
+ const processUsage = getProcTreeUsage(worker.pid);
4446
+ rssBytes = processUsage.rssBytes;
4447
+ cpuPercent = processUsage.cpuPercent;
4228
4448
  }
4229
4449
  agents.push({
4230
4450
  name: worker.name,
@@ -4279,40 +4499,30 @@ export async function startDashboard(portOrOptions, dataDirArg, teamDirArg, dbPa
4279
4499
  // Check for high memory usage
4280
4500
  for (const worker of workers) {
4281
4501
  if (worker.pid) {
4282
- try {
4283
- const { execSync } = await import('child_process');
4284
- const output = execSync(`ps -o rss= -p ${worker.pid}`, {
4285
- encoding: 'utf8',
4286
- timeout: 3000,
4287
- }).trim();
4288
- const rssBytes = parseInt(output, 10) * 1024;
4289
- if (rssBytes > 1.5 * 1024 * 1024 * 1024) {
4290
- // > 1.5GB
4291
- healthScore -= 20;
4292
- issues.push({
4293
- severity: 'critical',
4294
- message: `Agent "${worker.name}" is using ${Math.round(rssBytes / 1024 / 1024)}MB of memory`,
4295
- });
4296
- totalAlerts24h++;
4297
- alerts.push({
4298
- id: `alert-${Date.now()}-${worker.name}`,
4299
- agentName: worker.name,
4300
- alertType: 'oom_imminent',
4301
- message: `Memory usage critical: ${Math.round(rssBytes / 1024 / 1024)}MB`,
4302
- createdAt: new Date().toISOString(),
4303
- });
4304
- }
4305
- else if (rssBytes > 1024 * 1024 * 1024) {
4306
- // > 1GB
4307
- healthScore -= 10;
4308
- issues.push({
4309
- severity: 'high',
4310
- message: `Agent "${worker.name}" memory usage is elevated (${Math.round(rssBytes / 1024 / 1024)}MB)`,
4311
- });
4312
- }
4502
+ const { rssBytes } = getProcTreeUsage(worker.pid);
4503
+ if (rssBytes > 1.5 * 1024 * 1024 * 1024) {
4504
+ // > 1.5GB
4505
+ healthScore -= 20;
4506
+ issues.push({
4507
+ severity: 'critical',
4508
+ message: `Agent "${worker.name}" is using ${Math.round(rssBytes / 1024 / 1024)}MB of memory`,
4509
+ });
4510
+ totalAlerts24h++;
4511
+ alerts.push({
4512
+ id: `alert-${Date.now()}-${worker.name}`,
4513
+ agentName: worker.name,
4514
+ alertType: 'oom_imminent',
4515
+ message: `Memory usage critical: ${Math.round(rssBytes / 1024 / 1024)}MB`,
4516
+ createdAt: new Date().toISOString(),
4517
+ });
4313
4518
  }
4314
- catch {
4315
- // Process may have exited
4519
+ else if (rssBytes > 1024 * 1024 * 1024) {
4520
+ // > 1GB
4521
+ healthScore -= 10;
4522
+ issues.push({
4523
+ severity: 'high',
4524
+ message: `Agent "${worker.name}" memory usage is elevated (${Math.round(rssBytes / 1024 / 1024)}MB)`,
4525
+ });
4316
4526
  }
4317
4527
  }
4318
4528
  }