@xenon-device-management/xenon 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +74 -0
  2. package/lib/package.json +1 -1
  3. package/lib/public/assets/{Layouts-D0WSzKOh.js → Layouts-D6IPfwoe.js} +1 -1
  4. package/lib/public/assets/{ai-settings-DQWDdNd7.js → ai-settings-CflyFKan.js} +1 -1
  5. package/lib/public/assets/{apps-1sLWHOGO.js → apps-Da4dvQ1J.js} +1 -1
  6. package/lib/public/assets/{badge-BiR1gmMm.js → badge-BNR9umdu.js} +1 -1
  7. package/lib/public/assets/{button-BVazt4Z1.js → button-hZFV1ypT.js} +1 -1
  8. package/lib/public/assets/{calendar-yMyP2_Nc.js → calendar-fehdBtun.js} +1 -1
  9. package/lib/public/assets/{clock-CsVplnJ2.js → clock-DrpxSvCL.js} +1 -1
  10. package/lib/public/assets/{cpu-DNC8n7kK.js → cpu-tuyMVZ4I.js} +1 -1
  11. package/lib/public/assets/{device-explorer-DFu8Gxj4.js → device-explorer-DOfRH3zm.js} +1 -1
  12. package/lib/public/assets/{index-S71J2rWg.js → index-BaTiUCeH.js} +18 -18
  13. package/lib/public/assets/{lock-BstCxnX6.js → lock-C6CoqSr2.js} +1 -1
  14. package/lib/public/assets/{maintenance-settings-BwfG9cu2.js → maintenance-settings-CM2oC7-i.js} +1 -1
  15. package/lib/public/assets/{mouse-pointer-2-CSn_Wnc9.js → mouse-pointer-2-CXdnjXIg.js} +1 -1
  16. package/lib/public/assets/{plus-DfjM7G6e.js → plus-B4B1Hukt.js} +1 -1
  17. package/lib/public/assets/{session-dashboard-C6ek4z65.js → session-dashboard-B5OPMTz5.js} +1 -1
  18. package/lib/public/assets/{settings-BDYP8ULf.js → settings-BTHP7fj3.js} +1 -1
  19. package/lib/public/assets/{trash-2-CZWUMK5b.js → trash-2-NJMZJ2Ol.js} +1 -1
  20. package/lib/public/assets/{useSocket-CliVeWS3.js → useSocket-Ct2wo7P2.js} +2 -2
  21. package/lib/public/assets/{webhook-settings-tPiwWf8y.js → webhook-settings-Cz35-QJ7.js} +1 -1
  22. package/lib/public/assets/{zap-ZrK5B58i.js → zap-CssSMAN5.js} +1 -1
  23. package/lib/public/index.html +1 -1
  24. package/lib/schema.json +85 -38
  25. package/lib/src/InternalHttpClient.js +69 -14
  26. package/lib/src/app/index.js +92 -24
  27. package/lib/src/app/routers/apikeys.js +33 -0
  28. package/lib/src/app/routers/apps.js +4 -0
  29. package/lib/src/app/routers/auth.js +36 -0
  30. package/lib/src/app/routers/config.js +4 -0
  31. package/lib/src/app/routers/control.js +61 -10
  32. package/lib/src/app/routers/dashboard.js +5 -6
  33. package/lib/src/app/routers/grid.js +30 -12
  34. package/lib/src/app/routers/processes.js +24 -0
  35. package/lib/src/app/routers/reservation.js +15 -0
  36. package/lib/src/app/routers/webhook.js +6 -3
  37. package/lib/src/auth/nodeSecret.js +33 -0
  38. package/lib/src/config.js +5 -0
  39. package/lib/src/data-service/prisma-store.js +17 -1
  40. package/lib/src/device-managers/AndroidDeviceManager.js +2 -2
  41. package/lib/src/device-managers/NodeDevices.js +8 -1
  42. package/lib/src/device-managers/ios/IOSDiscoveryService.js +7 -4
  43. package/lib/src/device-managers/ios/IOSStreamService.js +7 -0
  44. package/lib/src/device-managers/ios/WDAClient.js +2 -0
  45. package/lib/src/device-utils.js +29 -4
  46. package/lib/src/generated/client/edge.js +2 -2
  47. package/lib/src/generated/client/index.js +2 -2
  48. package/lib/src/generated/client/package.json +1 -1
  49. package/lib/src/generated/client/schema.prisma +3 -0
  50. package/lib/src/helpers/UniversalMjpegProxy.js +23 -0
  51. package/lib/src/index.js +10 -2
  52. package/lib/src/interceptors/CommandInterceptor.js +29 -0
  53. package/lib/src/interfaces/IPluginArgs.js +0 -1
  54. package/lib/src/logger.js +30 -2
  55. package/lib/src/logging/sessionContext.js +28 -0
  56. package/lib/src/middleware/apiKeyMiddleware.js +49 -0
  57. package/lib/src/middleware/csrfMiddleware.js +73 -0
  58. package/lib/src/middleware/nodeSecretMiddleware.js +38 -0
  59. package/lib/src/middleware/rateLimitMiddleware.js +68 -0
  60. package/lib/src/middleware/scopeGuard.js +41 -0
  61. package/lib/src/plugin.js +1 -1
  62. package/lib/src/services/AIService.js +43 -8
  63. package/lib/src/services/ApiKeyService.js +102 -0
  64. package/lib/src/services/CircuitBreaker.js +158 -0
  65. package/lib/src/services/CleanupService.js +137 -39
  66. package/lib/src/services/DeviceReconciler.js +102 -0
  67. package/lib/src/services/MetricsService.js +78 -0
  68. package/lib/src/services/PortAllocator.js +13 -0
  69. package/lib/src/services/ProcessMetricsService.js +99 -0
  70. package/lib/src/services/ProcessRegistry.js +123 -0
  71. package/lib/src/services/ServerManager.js +14 -2
  72. package/lib/src/services/SessionLifecycleService.js +80 -23
  73. package/lib/src/services/ShutdownCoordinator.js +89 -0
  74. package/lib/src/services/SocketClient.js +11 -0
  75. package/lib/src/services/SocketServer.js +109 -6
  76. package/lib/src/services/VideoPipelineService.js +2 -0
  77. package/lib/src/services/healing/HealingMetrics.js +63 -0
  78. package/lib/src/services/healing/HealingOrchestrator.js +32 -4
  79. package/lib/src/services/healing/OcrHealingProvider.js +7 -0
  80. package/lib/test/unit/ApiKeyService.test.js +101 -0
  81. package/lib/test/unit/PortAllocator.test.js +14 -0
  82. package/lib/test/unit/ProcessRegistry.test.js +70 -0
  83. package/lib/test/unit/apiKeyMiddleware.test.js +58 -0
  84. package/lib/test/unit/nodeSecretMiddleware.test.js +38 -0
  85. package/lib/test/unit/rateLimitMiddleware.test.js +37 -0
  86. package/lib/tsconfig.tsbuildinfo +1 -1
  87. package/package.json +2 -2
  88. package/prisma/migrations/20260423081701_add_session_indexes/migration.sql +8 -0
  89. package/prisma/schema.prisma +3 -0
  90. package/schema.json +85 -38
@@ -0,0 +1,102 @@
1
+ "use strict";
2
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
3
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
4
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
5
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
6
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
7
+ };
8
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
9
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
10
+ return new (P || (P = Promise))(function (resolve, reject) {
11
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
12
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
13
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
14
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
15
+ });
16
+ };
17
+ var __importDefault = (this && this.__importDefault) || function (mod) {
18
+ return (mod && mod.__esModule) ? mod : { "default": mod };
19
+ };
20
+ var DeviceReconciler_1;
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.DeviceReconciler = void 0;
23
+ const typedi_1 = require("typedi");
24
+ const logger_1 = __importDefault(require("../logger"));
25
+ const device_store_1 = require("../data-service/device-store");
26
+ const SessionManager_1 = require("../sessions/SessionManager");
27
+ const device_service_1 = require("../data-service/device-service");
28
+ // Closes a gap left by OrphanSweeper (which operates on prisma.session + the
29
+ // Prisma device table via a heartbeat cutoff) and releaseBlockedDevices
30
+ // (which only triggers on newCommandTimeout after a session has actually run
31
+ // a command). Neither covers the case where:
32
+ // - blockDevice() marks a device busy with session_id = X
33
+ // - but X never makes it into SESSION_MANAGER (driver crash during startup,
34
+ // allocation failed after the lock but before registration, Loki store
35
+ // drift, etc.)
36
+ // - the session also never reaches Prisma with a heartbeat, so OrphanSweeper
37
+ // doesn't see it; lastCmdExecutedAt stays undefined, so
38
+ // releaseBlockedDevices skips it too.
39
+ // Result: a ghost "busy" device that blocks allocation forever. This sweep
40
+ // cross-references the device store against SESSION_MANAGER and unblocks
41
+ // those ghosts.
42
+ let DeviceReconciler = DeviceReconciler_1 = class DeviceReconciler {
43
+ constructor() {
44
+ this.logger = logger_1.default.scope('DeviceReconciler');
45
+ this.orphansFreed = 0;
46
+ }
47
+ reconcile() {
48
+ return __awaiter(this, void 0, void 0, function* () {
49
+ const store = device_store_1.DeviceStoreFactory.getStore();
50
+ let devices;
51
+ try {
52
+ devices = yield store.getAllDevices();
53
+ }
54
+ catch (err) {
55
+ this.logger.error(`getAllDevices failed: ${err.message}`);
56
+ return;
57
+ }
58
+ const now = Date.now();
59
+ const orphans = devices.filter((d) => {
60
+ if (!d.busy || !d.session_id)
61
+ return false;
62
+ // Device is busy but isn't tied to an active session at the driver
63
+ // layer. A known session in SESSION_MANAGER means a real driver is
64
+ // holding it — leave those alone.
65
+ if (SessionManager_1.SESSION_MANAGER.isValidSession(d.session_id))
66
+ return false;
67
+ // Respect the bootstrap grace window: if sessionStartTime is 0 or
68
+ // null we don't know when allocation happened, so fall back to
69
+ // requiring SESSION_MANAGER knowledge (i.e. don't touch it).
70
+ if (!d.sessionStartTime || d.sessionStartTime <= 0)
71
+ return false;
72
+ return now - d.sessionStartTime >= DeviceReconciler_1.MIN_BUSY_AGE_MS;
73
+ });
74
+ if (orphans.length === 0)
75
+ return;
76
+ this.logger.warn(`Found ${orphans.length} ghost device(s); releasing: ${orphans
77
+ .map((d) => `${d.udid}@${d.host}(session=${d.session_id})`)
78
+ .join(', ')}`);
79
+ for (const device of orphans) {
80
+ try {
81
+ yield (0, device_service_1.unblockDevice)(device.udid, device.host);
82
+ this.orphansFreed++;
83
+ }
84
+ catch (err) {
85
+ this.logger.error(`Failed to unblock ghost device ${device.udid}: ${err.message}`);
86
+ }
87
+ }
88
+ });
89
+ }
90
+ getOrphansFreedCount() {
91
+ return this.orphansFreed;
92
+ }
93
+ };
94
+ exports.DeviceReconciler = DeviceReconciler;
95
+ // Grace window after sessionStartTime within which we trust the session
96
+ // is still bootstrapping (driver.createSession can legitimately take
97
+ // 30s+ for simulator boot). Below this age we skip the device even if
98
+ // SESSION_MANAGER doesn't know it yet.
99
+ DeviceReconciler.MIN_BUSY_AGE_MS = 60000;
100
+ exports.DeviceReconciler = DeviceReconciler = DeviceReconciler_1 = __decorate([
101
+ (0, typedi_1.Service)()
102
+ ], DeviceReconciler);
@@ -24,6 +24,20 @@ const SessionManager_1 = require("../sessions/SessionManager");
24
24
  const device_store_1 = require("../data-service/device-store");
25
25
  const prisma_1 = require("../prisma");
26
26
  const logger_1 = __importDefault(require("../logger"));
27
+ const HealingMetrics_1 = require("./healing/HealingMetrics");
28
+ const CircuitBreaker_1 = require("./CircuitBreaker");
29
+ const DeviceReconciler_1 = require("./DeviceReconciler");
30
+ const ProcessMetricsService_1 = require("./ProcessMetricsService");
31
+ // Label values must escape backslash, double-quote, and newline per the
32
+ // Prometheus exposition format. Breaker keys contain colons which are fine.
33
+ function escapeLabel(value) {
34
+ return value.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
35
+ }
36
+ const BREAKER_STATE_CODE = {
37
+ closed: 0,
38
+ half_open: 1,
39
+ open: 2,
40
+ };
27
41
  let MetricsService = class MetricsService {
28
42
  constructor() {
29
43
  this.CONFIG_ID = 'metrics';
@@ -88,6 +102,7 @@ let MetricsService = class MetricsService {
88
102
  }
89
103
  getMetrics() {
90
104
  return __awaiter(this, void 0, void 0, function* () {
105
+ var _a;
91
106
  const sessions = SessionManager_1.SESSION_MANAGER.getStats();
92
107
  const devices = yield device_store_1.DeviceStoreFactory.getStore().getAllDevices();
93
108
  const totalDevices = devices.length;
@@ -124,6 +139,69 @@ let MetricsService = class MetricsService {
124
139
  `xenon_healing_total{status="attempt"} ${healingAttempts}`,
125
140
  `xenon_healing_total{status="success"} ${healingSuccesses}`,
126
141
  ];
142
+ // Per-tier healing metrics (in-process; resets on restart — fine for Prom
143
+ // scrape semantics). Tells us which tier is actually earning its compute.
144
+ const tiers = HealingMetrics_1.HEALING_METRICS.snapshot();
145
+ if (tiers.length > 0) {
146
+ lines.push('# HELP xenon_heal_tier_attempts_total Healing attempts per tier', '# TYPE xenon_heal_tier_attempts_total counter');
147
+ for (const t of tiers) {
148
+ const labels = `tier="${t.tier}",name="${escapeLabel(t.name)}"`;
149
+ lines.push(`xenon_heal_tier_attempts_total{${labels}} ${t.attempts}`);
150
+ }
151
+ lines.push('# HELP xenon_heal_tier_successes_total Healing successes per tier', '# TYPE xenon_heal_tier_successes_total counter');
152
+ for (const t of tiers) {
153
+ const labels = `tier="${t.tier}",name="${escapeLabel(t.name)}"`;
154
+ lines.push(`xenon_heal_tier_successes_total{${labels}} ${t.successes}`);
155
+ }
156
+ lines.push('# HELP xenon_heal_tier_failures_total Healing failures per tier', '# TYPE xenon_heal_tier_failures_total counter');
157
+ for (const t of tiers) {
158
+ const labels = `tier="${t.tier}",name="${escapeLabel(t.name)}"`;
159
+ lines.push(`xenon_heal_tier_failures_total{${labels}} ${t.failures}`);
160
+ }
161
+ lines.push('# HELP xenon_heal_tier_duration_seconds_sum Cumulative time spent in each tier', '# TYPE xenon_heal_tier_duration_seconds_sum counter');
162
+ for (const t of tiers) {
163
+ const labels = `tier="${t.tier}",name="${escapeLabel(t.name)}"`;
164
+ const seconds = (t.durationMsSum / 1000).toFixed(3);
165
+ lines.push(`xenon_heal_tier_duration_seconds_sum{${labels}} ${seconds}`);
166
+ }
167
+ }
168
+ lines.push('# HELP xenon_heal_all_tiers_failed_total Healing calls where no tier matched', '# TYPE xenon_heal_all_tiers_failed_total counter', `xenon_heal_all_tiers_failed_total ${HealingMetrics_1.HEALING_METRICS.getAllTiersFailedCount()}`);
169
+ // Tier-advised skips. A rising trend on tier 3 usually means context
170
+ // collection (page source + screenshot) is failing upstream — the
171
+ // remaining tiers are being shed correctly but the root cause lives in
172
+ // the driver, not the heal pipeline.
173
+ const skips = HealingMetrics_1.HEALING_METRICS.skipSnapshot();
174
+ if (skips.length > 0) {
175
+ lines.push('# HELP xenon_heal_tier_skipped_remaining_total Times a tier short-circuited the remaining tiers', '# TYPE xenon_heal_tier_skipped_remaining_total counter');
176
+ for (const s of skips) {
177
+ const labels = `tier="${s.tier}",name="${escapeLabel(s.name)}"`;
178
+ lines.push(`xenon_heal_tier_skipped_remaining_total{${labels}} ${s.count}`);
179
+ }
180
+ }
181
+ // Device reconciliation: non-zero = ghost devices were being leaked and
182
+ // the reconciler caught them. A steady climb usually points at a bug in
183
+ // session allocation or shutdown.
184
+ lines.push('# HELP xenon_device_reconciler_orphans_freed_total Ghost devices released by the reconciler', '# TYPE xenon_device_reconciler_orphans_freed_total counter', `xenon_device_reconciler_orphans_freed_total ${typedi_1.Container.get(DeviceReconciler_1.DeviceReconciler).getOrphansFreedCount()}`);
185
+ // Hub process health. Heap growing while xenon_sessions_active is flat
186
+ // means a leak; event-loop lag climbing while CPU is idle means some
187
+ // handler is doing sync work on the hot path.
188
+ const proc = typedi_1.Container.get(ProcessMetricsService_1.ProcessMetricsService).snapshot();
189
+ lines.push('# HELP xenon_process_memory_bytes Node process memoryUsage breakdown', '# TYPE xenon_process_memory_bytes gauge', `xenon_process_memory_bytes{type="rss"} ${proc.memory.rss}`, `xenon_process_memory_bytes{type="heap_used"} ${proc.memory.heapUsed}`, `xenon_process_memory_bytes{type="heap_total"} ${proc.memory.heapTotal}`, `xenon_process_memory_bytes{type="external"} ${proc.memory.external}`, `xenon_process_memory_bytes{type="array_buffers"} ${proc.memory.arrayBuffers}`, '# HELP xenon_process_event_loop_lag_ms Most recent observed event-loop lag sample', '# TYPE xenon_process_event_loop_lag_ms gauge', `xenon_process_event_loop_lag_ms ${proc.eventLoop.lagMs}`, '# HELP xenon_process_event_loop_lag_max_ms Max event-loop lag since last scrape', '# TYPE xenon_process_event_loop_lag_max_ms gauge', `xenon_process_event_loop_lag_max_ms ${proc.eventLoop.maxLagSinceScrapeMs}`, '# HELP xenon_session_commands_processed_total All Appium commands intercepted (includes errors)', '# TYPE xenon_session_commands_processed_total counter', `xenon_session_commands_processed_total ${proc.commands.processed}`, '# HELP xenon_session_command_duration_ms_sum Cumulative command wall time', '# TYPE xenon_session_command_duration_ms_sum counter', `xenon_session_command_duration_ms_sum ${proc.commands.durationMsSum}`);
190
+ // Circuit breaker state — makes it obvious from a dashboard alert when
191
+ // an AI provider or any future wrapped dependency is shedding traffic.
192
+ // State encoded as int so Grafana can threshold on it easily:
193
+ // 0=closed (healthy), 1=half_open (probing), 2=open (shedding).
194
+ const breakers = CircuitBreaker_1.CIRCUIT_BREAKERS.snapshot();
195
+ if (breakers.length > 0) {
196
+ lines.push('# HELP xenon_circuit_breaker_state 0=closed, 1=half_open, 2=open', '# TYPE xenon_circuit_breaker_state gauge');
197
+ for (const b of breakers) {
198
+ lines.push(`xenon_circuit_breaker_state{key="${escapeLabel(b.key)}"} ${(_a = BREAKER_STATE_CODE[b.state]) !== null && _a !== void 0 ? _a : 0}`);
199
+ }
200
+ lines.push('# HELP xenon_circuit_breaker_consecutive_failures Current failure streak', '# TYPE xenon_circuit_breaker_consecutive_failures gauge');
201
+ for (const b of breakers) {
202
+ lines.push(`xenon_circuit_breaker_consecutive_failures{key="${escapeLabel(b.key)}"} ${b.consecutiveFailures}`);
203
+ }
204
+ }
127
205
  return lines.join('\n') + '\n';
128
206
  });
129
207
  }
@@ -54,6 +54,19 @@ let PortAllocator = class PortAllocator {
54
54
  const ttlMs = (_a = opts.ttlMs) !== null && _a !== void 0 ? _a : 60 * 60 * 1000;
55
55
  const now = Date.now();
56
56
  yield prisma_1.prisma.portLease.deleteMany({ where: { expiresAt: { lt: now } } });
57
+ const existing = yield prisma_1.prisma.portLease.findFirst({
58
+ where: { purpose, leasedToUdid: udid, port: { gte: start, lte: end } },
59
+ select: { port: true },
60
+ });
61
+ if (existing) {
62
+ yield prisma_1.prisma.portLease
63
+ .update({
64
+ where: { port: existing.port },
65
+ data: { leasedAt: now, expiresAt: now + ttlMs, leasedToPid: opts.pid },
66
+ })
67
+ .catch(() => undefined);
68
+ return existing.port;
69
+ }
57
70
  const active = yield prisma_1.prisma.portLease.findMany({
58
71
  where: { purpose, port: { gte: start, lte: end } },
59
72
  select: { port: true },
@@ -0,0 +1,99 @@
1
+ "use strict";
2
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
3
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
4
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
5
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
6
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
7
+ };
8
+ var __importDefault = (this && this.__importDefault) || function (mod) {
9
+ return (mod && mod.__esModule) ? mod : { "default": mod };
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.ProcessMetricsService = void 0;
13
+ const typedi_1 = require("typedi");
14
+ const logger_1 = __importDefault(require("../logger"));
15
+ // Hub-process health metrics. Individual sessions don't spawn child
16
+ // processes (Appium is one Node process, sessions are logical constructs
17
+ // inside it), so true per-session RSS doesn't make sense. What DOES matter
18
+ // is whether the hub process itself is getting starved:
19
+ //
20
+ // - heap growing while session count is flat = memory leak
21
+ // - event-loop lag climbing while CPU is unsaturated = blocking work
22
+ // sneaking into the hot path (sync fs, bad JSON, etc)
23
+ //
24
+ // Memory is cheap to read on demand in getMetrics(). Event-loop lag needs
25
+ // a periodic sampler because by the time /metrics is scraped, the blocking
26
+ // moment is gone — we report the max lag observed since the last scrape.
27
+ let ProcessMetricsService = class ProcessMetricsService {
28
+ constructor() {
29
+ this.logger = logger_1.default.scope('ProcessMetrics');
30
+ this.sampleIntervalMs = 1000;
31
+ this.maxLagSinceScrapeMs = 0;
32
+ this.lastLagMs = 0;
33
+ this.timer = null;
34
+ this.commandsProcessed = 0;
35
+ this.commandDurationMsSum = 0;
36
+ }
37
+ start(intervalMs = 1000) {
38
+ if (this.timer)
39
+ return;
40
+ this.sampleIntervalMs = intervalMs;
41
+ this.scheduleNextSample();
42
+ this.logger.info(`Event-loop lag sampling every ${intervalMs}ms`);
43
+ }
44
+ stop() {
45
+ if (this.timer) {
46
+ clearTimeout(this.timer);
47
+ this.timer = null;
48
+ }
49
+ }
50
+ scheduleNextSample() {
51
+ var _a, _b;
52
+ const scheduledAt = Date.now();
53
+ this.timer = setTimeout(() => {
54
+ // Lag = actual delay beyond the scheduled interval. A healthy loop
55
+ // wakes up within a few ms of the deadline; sustained double-digit
56
+ // ms means something is blocking.
57
+ const actualDelay = Date.now() - scheduledAt;
58
+ const lag = Math.max(0, actualDelay - this.sampleIntervalMs);
59
+ this.lastLagMs = lag;
60
+ if (lag > this.maxLagSinceScrapeMs)
61
+ this.maxLagSinceScrapeMs = lag;
62
+ this.scheduleNextSample();
63
+ }, this.sampleIntervalMs);
64
+ // Don't keep the process alive just for this sampler.
65
+ (_b = (_a = this.timer).unref) === null || _b === void 0 ? void 0 : _b.call(_a);
66
+ }
67
+ recordCommand(durationMs) {
68
+ this.commandsProcessed++;
69
+ this.commandDurationMsSum += durationMs;
70
+ }
71
+ snapshot() {
72
+ const mem = process.memoryUsage();
73
+ // Reading /metrics resets the max-since-scrape counter so Prometheus
74
+ // rate() sees actual per-interval peaks rather than a monotonic climb.
75
+ const maxLag = this.maxLagSinceScrapeMs;
76
+ this.maxLagSinceScrapeMs = 0;
77
+ return {
78
+ memory: {
79
+ rss: mem.rss,
80
+ heapUsed: mem.heapUsed,
81
+ heapTotal: mem.heapTotal,
82
+ external: mem.external,
83
+ arrayBuffers: mem.arrayBuffers,
84
+ },
85
+ eventLoop: {
86
+ lagMs: this.lastLagMs,
87
+ maxLagSinceScrapeMs: maxLag,
88
+ },
89
+ commands: {
90
+ processed: this.commandsProcessed,
91
+ durationMsSum: this.commandDurationMsSum,
92
+ },
93
+ };
94
+ }
95
+ };
96
+ exports.ProcessMetricsService = ProcessMetricsService;
97
+ exports.ProcessMetricsService = ProcessMetricsService = __decorate([
98
+ (0, typedi_1.Service)()
99
+ ], ProcessMetricsService);
@@ -0,0 +1,123 @@
1
+ "use strict";
2
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
3
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
4
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
5
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
6
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
7
+ };
8
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
9
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
10
+ return new (P || (P = Promise))(function (resolve, reject) {
11
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
12
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
13
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
14
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
15
+ });
16
+ };
17
+ var __importDefault = (this && this.__importDefault) || function (mod) {
18
+ return (mod && mod.__esModule) ? mod : { "default": mod };
19
+ };
20
+ Object.defineProperty(exports, "__esModule", { value: true });
21
+ exports.ProcessRegistry = void 0;
22
+ const typedi_1 = require("typedi");
23
+ const crypto_1 = require("crypto");
24
+ const logger_1 = __importDefault(require("../logger"));
25
+ let ProcessRegistry = class ProcessRegistry {
26
+ constructor() {
27
+ this.log = logger_1.default.scope('ProcessRegistry');
28
+ this.processes = new Map();
29
+ }
30
+ track(opts) {
31
+ const id = (0, crypto_1.randomUUID)();
32
+ const entry = {
33
+ id,
34
+ sessionId: opts.sessionId,
35
+ udid: opts.udid,
36
+ kind: opts.kind,
37
+ pid: opts.process.pid || -1,
38
+ process: opts.process,
39
+ startedAt: Date.now(),
40
+ };
41
+ this.processes.set(id, entry);
42
+ opts.process.once('exit', () => this.processes.delete(id));
43
+ return id;
44
+ }
45
+ untrack(id) {
46
+ this.processes.delete(id);
47
+ }
48
+ snapshot() {
49
+ return Array.from(this.processes.values());
50
+ }
51
+ terminate(id_1) {
52
+ return __awaiter(this, arguments, void 0, function* (id, { gracefulMs = 5000 } = {}) {
53
+ const entry = this.processes.get(id);
54
+ if (!entry)
55
+ return;
56
+ const { process: child, pid, kind } = entry;
57
+ const exited = new Promise((resolve) => {
58
+ child.once('exit', () => resolve());
59
+ });
60
+ try {
61
+ if (process.platform === 'win32') {
62
+ child.kill('SIGTERM');
63
+ }
64
+ else {
65
+ try {
66
+ process.kill(-pid, 'SIGTERM');
67
+ }
68
+ catch (_a) {
69
+ child.kill('SIGTERM');
70
+ }
71
+ }
72
+ }
73
+ catch (err) {
74
+ this.log.debug(`SIGTERM failed for ${kind}/${pid}: ${err.message}`);
75
+ }
76
+ const timedOut = yield Promise.race([
77
+ exited.then(() => false),
78
+ new Promise((r) => setTimeout(() => r(true), gracefulMs)),
79
+ ]);
80
+ if (timedOut) {
81
+ try {
82
+ if (process.platform === 'win32') {
83
+ child.kill('SIGKILL');
84
+ }
85
+ else {
86
+ try {
87
+ process.kill(-pid, 'SIGKILL');
88
+ }
89
+ catch (_b) {
90
+ child.kill('SIGKILL');
91
+ }
92
+ }
93
+ }
94
+ catch (err) {
95
+ this.log.warn(`SIGKILL failed for ${kind}/${pid}: ${err.message}`);
96
+ }
97
+ }
98
+ this.processes.delete(id);
99
+ });
100
+ }
101
+ terminateForSession(sessionId, opts) {
102
+ return __awaiter(this, void 0, void 0, function* () {
103
+ const targets = this.snapshot().filter((p) => p.sessionId === sessionId);
104
+ yield Promise.all(targets.map((t) => this.terminate(t.id, opts)));
105
+ });
106
+ }
107
+ terminateForUdid(udid, opts) {
108
+ return __awaiter(this, void 0, void 0, function* () {
109
+ const targets = this.snapshot().filter((p) => p.udid === udid);
110
+ yield Promise.all(targets.map((t) => this.terminate(t.id, opts)));
111
+ });
112
+ }
113
+ terminateAll(opts) {
114
+ return __awaiter(this, void 0, void 0, function* () {
115
+ const targets = this.snapshot();
116
+ yield Promise.all(targets.map((t) => this.terminate(t.id, opts)));
117
+ });
118
+ }
119
+ };
120
+ exports.ProcessRegistry = ProcessRegistry;
121
+ exports.ProcessRegistry = ProcessRegistry = __decorate([
122
+ (0, typedi_1.Service)()
123
+ ], ProcessRegistry);
@@ -200,6 +200,9 @@ let ServerManager = ServerManager_1 = class ServerManager {
200
200
  const { runMigrations } = yield Promise.resolve().then(() => __importStar(require('../scripts/run-migrations')));
201
201
  yield runMigrations();
202
202
  yield device_store_1.DeviceStoreFactory.getStore().clearStorage();
203
+ const { ApiKeyService } = yield Promise.resolve().then(() => __importStar(require('./ApiKeyService')));
204
+ const { config: xenonConfig } = yield Promise.resolve().then(() => __importStar(require('../config')));
205
+ yield typedi_1.Container.get(ApiKeyService).bootstrapIfEmpty(xenonConfig.bootstrapKeyPath);
203
206
  });
204
207
  }
205
208
  registerRoutes(expressApp, cliArgs, pluginArgs) {
@@ -240,13 +243,13 @@ let ServerManager = ServerManager_1 = class ServerManager {
240
243
  const hubArgument = pluginArgs.hub;
241
244
  if (hubArgument !== undefined) {
242
245
  this.logger.info(`📡 I'm a node and my hub is ${hubArgument}`);
243
- yield (0, device_utils_1.setupCronUpdateDeviceList)(pluginArgs.bindHostOrIp, hubArgument, pluginArgs.sendNodeDevicesToHubIntervalMs, pluginArgs.tlsRejectUnauthorized);
246
+ yield (0, device_utils_1.setupCronUpdateDeviceList)(pluginArgs.bindHostOrIp, hubArgument, pluginArgs.sendNodeDevicesToHubIntervalMs, pluginArgs.tlsRejectUnauthorized, pluginArgs.nodeSecret);
244
247
  // Handle graceful shutdown
245
248
  ['SIGINT', 'SIGTERM'].forEach((signal) => {
246
249
  process.once(signal, () => __awaiter(this, void 0, void 0, function* () {
247
250
  logger_1.default.info(`Received ${signal}, unregistering node from hub...`);
248
251
  try {
249
- yield new NodeDevices_1.default(hubArgument, pluginArgs.tlsRejectUnauthorized).unRegisterNode(pluginArgs.bindHostOrIp);
252
+ yield new NodeDevices_1.default(hubArgument, pluginArgs.tlsRejectUnauthorized, pluginArgs.nodeSecret).unRegisterNode(pluginArgs.bindHostOrIp);
250
253
  }
251
254
  catch (err) {
252
255
  logger_1.default.error(`Error during node unregistration: ${err}`);
@@ -295,6 +298,15 @@ let ServerManager = ServerManager_1 = class ServerManager {
295
298
  typedi_1.Container.get(SessionHeartbeatService).start(pluginArgs);
296
299
  // 8. Sweep orphaned sessions on a 30s cron
297
300
  (0, device_utils_1.setupCronSweepOrphanSessions)(pluginArgs.sessionHeartbeatIntervalMs || 30000);
301
+ // 9. Reconcile device-store busy flags against SESSION_MANAGER every
302
+ // 60s to catch devices orphaned mid-allocation (driver crash before
303
+ // session registration, etc.)
304
+ (0, device_utils_1.setupCronReconcileDevices)(60000);
305
+ // 10. Start event-loop lag sampler so xenon_process_event_loop_lag_ms
306
+ // has real values by first scrape. Memory gauges are read on
307
+ // demand; lag needs a running sampler.
308
+ const { ProcessMetricsService } = yield Promise.resolve().then(() => __importStar(require('./ProcessMetricsService')));
309
+ typedi_1.Container.get(ProcessMetricsService).start(1000);
298
310
  }
299
311
  });
300
312
  }
@@ -78,15 +78,27 @@ const RemoteSession_1 = require("../sessions/RemoteSession");
78
78
  const SessionManager_1 = require("../sessions/SessionManager");
79
79
  const event_manager_1 = require("../dashboard/event-manager");
80
80
  const session_service_1 = require("../dashboard/services/session-service");
81
+ const SessionStatus_1 = require("../types/SessionStatus");
81
82
  const SessionType_1 = __importDefault(require("../enums/SessionType"));
82
83
  const async_lock_1 = __importDefault(require("async-lock"));
83
84
  const commandsQueueGuard = new async_lock_1.default();
85
+ // Serializes concurrent deleteSession cleanup for the same sessionId so
86
+ // that onSessionStopped events don't fire twice if two clients race.
87
+ const sessionCleanupLock = new async_lock_1.default();
84
88
  let SessionLifecycleService = class SessionLifecycleService {
85
89
  constructor() {
86
90
  this.logger = logger_1.default.scope('SessionLifecycleService');
87
91
  }
88
92
  createSession(next, driver, caps) {
89
93
  return __awaiter(this, void 0, void 0, function* () {
94
+ // Fail fast during graceful shutdown so clients get a clear error instead
95
+ // of their request hanging until the process dies. Lazy import to avoid
96
+ // a cycle (ShutdownCoordinator -> SessionLifecycleService -> this file).
97
+ const { ShutdownCoordinator } = yield Promise.resolve().then(() => __importStar(require('./ShutdownCoordinator')));
98
+ if (typedi_1.Container.get(ShutdownCoordinator).isDraining) {
99
+ this.logger.warn('Rejecting new session: hub is draining for shutdown');
100
+ throw new Error('Hub is shutting down; please retry against a different node');
101
+ }
90
102
  const context = typedi_1.Container.get(PluginContext_1.PluginContext);
91
103
  const pluginArgs = context.pluginArgs;
92
104
  this.logger.debug(`📱 pluginArgs: ${JSON.stringify(pluginArgs)}`);
@@ -457,15 +469,20 @@ let SessionLifecycleService = class SessionLifecycleService {
457
469
  }
458
470
  deleteSession(next, sessionId, status, reason) {
459
471
  return __awaiter(this, void 0, void 0, function* () {
472
+ // Phase 1: device unblock + finalizeCleanup must be atomic. Without the lock,
473
+ // two racing deletes both observe isStopping=false and both run finalizeCleanup,
474
+ // which releases ports and archives video twice.
460
475
  if (sessionId) {
461
- yield (0, device_service_1.unblockDeviceMatchingFilter)({ session_id: sessionId });
462
- this.logger.info(`📱 Unblocking the device that is blocked for session ${sessionId}`);
463
- }
464
- const session = sessionId ? SessionManager_1.SESSION_MANAGER.getSession(sessionId) : undefined;
465
- if (session) {
466
- session.isStopping = true;
467
- session.stoppedAt = Date.now();
468
- yield this.finalizeCleanup(session, status, reason);
476
+ yield sessionCleanupLock.acquire(sessionId, () => __awaiter(this, void 0, void 0, function* () {
477
+ yield (0, device_service_1.unblockDeviceMatchingFilter)({ session_id: sessionId });
478
+ this.logger.info(`📱 Unblocking the device that is blocked for session ${sessionId}`);
479
+ const session = SessionManager_1.SESSION_MANAGER.getSession(sessionId);
480
+ if (session && !session.isStopping) {
481
+ session.isStopping = true;
482
+ session.stoppedAt = Date.now();
483
+ yield this.finalizeCleanup(session, status, reason);
484
+ }
485
+ }));
469
486
  }
470
487
  let timeoutId;
471
488
  try {
@@ -486,8 +503,12 @@ let SessionLifecycleService = class SessionLifecycleService {
486
503
  }
487
504
  finally {
488
505
  if (sessionId) {
489
- const session = SessionManager_1.SESSION_MANAGER.getSession(sessionId);
490
- if (session) {
506
+ yield sessionCleanupLock.acquire(sessionId, () => __awaiter(this, void 0, void 0, function* () {
507
+ const session = SessionManager_1.SESSION_MANAGER.getSession(sessionId);
508
+ if (!session) {
509
+ // Another concurrent deleteSession already cleaned up.
510
+ return;
511
+ }
491
512
  const device = session.getDevice();
492
513
  try {
493
514
  const { NetworkConditioningService } = yield Promise.resolve().then(() => __importStar(require('./NetworkConditioningService')));
@@ -496,20 +517,20 @@ let SessionLifecycleService = class SessionLifecycleService {
496
517
  catch (resetErr) {
497
518
  this.logger.warn(`⚠️ NetworkConditioningService.reset failed for session ${sessionId}: ${resetErr.message}`);
498
519
  }
499
- }
500
- yield event_manager_1.DASHBORD_EVENT_MANAGER.onSessionStopped(sessionId, status, reason);
501
- SessionManager_1.SESSION_MANAGER.removeSession(sessionId);
502
- try {
503
- const { getSessionById } = yield Promise.resolve().then(() => __importStar(require('../dashboard/services/session-service')));
504
- const sessionData = yield getSessionById(sessionId);
505
- if (sessionData && (sessionData.status === 'failed' || sessionData.failure_reason)) {
506
- const { NotificationService } = yield Promise.resolve().then(() => __importStar(require('./NotificationService')));
507
- yield typedi_1.Container.get(NotificationService).dispatchEvent('session_failed', sessionData);
520
+ yield event_manager_1.DASHBORD_EVENT_MANAGER.onSessionStopped(sessionId, status, reason);
521
+ SessionManager_1.SESSION_MANAGER.removeSession(sessionId);
522
+ try {
523
+ const { getSessionById } = yield Promise.resolve().then(() => __importStar(require('../dashboard/services/session-service')));
524
+ const sessionData = yield getSessionById(sessionId);
525
+ if (sessionData && (sessionData.status === 'failed' || sessionData.failure_reason)) {
526
+ const { NotificationService } = yield Promise.resolve().then(() => __importStar(require('./NotificationService')));
527
+ yield typedi_1.Container.get(NotificationService).dispatchEvent('session_failed', sessionData);
528
+ }
508
529
  }
509
- }
510
- catch (err) {
511
- /* ignore notification errors */
512
- }
530
+ catch (err) {
531
+ /* ignore notification errors */
532
+ }
533
+ }));
513
534
  }
514
535
  }
515
536
  });
@@ -572,6 +593,42 @@ let SessionLifecycleService = class SessionLifecycleService {
572
593
  }
573
594
  });
574
595
  }
596
+ // Shutdown-path counterpart to deleteSession. Runs the same cleanup (unblock
597
+ // device, archive video, release ports, mark failed, emit stopped) WITHOUT
598
+ // needing Appium's `next()` driver shutdown — during process shutdown we
599
+ // don't have it, and any leftover driver state dies with the process
600
+ // anyway. Shares sessionCleanupLock with deleteSession so a racing client
601
+ // delete can't double-archive video.
602
+ stopSessionForShutdown(sessionId, reason) {
603
+ return __awaiter(this, void 0, void 0, function* () {
604
+ yield sessionCleanupLock.acquire(sessionId, () => __awaiter(this, void 0, void 0, function* () {
605
+ try {
606
+ yield (0, device_service_1.unblockDeviceMatchingFilter)({ session_id: sessionId });
607
+ }
608
+ catch (err) {
609
+ this.logger.warn(`[shutdown] unblock failed for ${sessionId}: ${err.message}`);
610
+ }
611
+ const session = SessionManager_1.SESSION_MANAGER.getSession(sessionId);
612
+ if (session && !session.isStopping) {
613
+ session.isStopping = true;
614
+ session.stoppedAt = Date.now();
615
+ try {
616
+ yield this.finalizeCleanup(session, SessionStatus_1.SessionStatus.FAILED, reason);
617
+ }
618
+ catch (err) {
619
+ this.logger.warn(`[shutdown] finalizeCleanup failed for ${sessionId}: ${err.message}`);
620
+ }
621
+ }
622
+ try {
623
+ yield event_manager_1.DASHBORD_EVENT_MANAGER.onSessionStopped(sessionId, SessionStatus_1.SessionStatus.FAILED, reason);
624
+ }
625
+ catch (err) {
626
+ this.logger.warn(`[shutdown] onSessionStopped failed for ${sessionId}: ${err.message}`);
627
+ }
628
+ SessionManager_1.SESSION_MANAGER.removeSession(sessionId);
629
+ }));
630
+ });
631
+ }
575
632
  isHub(args) {
576
633
  return !args.hub;
577
634
  }