plugin-cluster-manager 1.1.10 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/client-v2.d.ts +2 -0
  2. package/client-v2.js +1 -0
  3. package/client.js +1 -0
  4. package/dist/client/index.js +1 -1
  5. package/dist/client-v2/914.5dc1105cf3ada6a6.js +10 -0
  6. package/dist/client-v2/index.js +10 -0
  7. package/dist/externalVersion.js +6 -5
  8. package/dist/locale/en-US.json +138 -28
  9. package/dist/locale/vi-VN.json +139 -28
  10. package/dist/locale/zh-CN.json +140 -28
  11. package/dist/server/actions/cache-monitor.js +301 -0
  12. package/dist/server/actions/cluster-nodes.js +391 -11
  13. package/dist/server/actions/doctor.js +1246 -0
  14. package/dist/server/actions/orchestrator.js +37 -0
  15. package/dist/server/actions/queue-mappings.js +107 -0
  16. package/dist/server/collections/cluster-manager-doctor-runs.js +52 -0
  17. package/dist/server/collections/cluster-manager-doctor.js +44 -0
  18. package/dist/server/collections/worker-queue-mappings.js +106 -0
  19. package/dist/server/hooks/cacheInvalidationHooks.js +81 -0
  20. package/dist/server/middlewares/listMetaCacheMiddleware.js +79 -0
  21. package/dist/server/orchestrator/PackageManager.js +21 -24
  22. package/dist/server/orchestrator/docker-adapter.js +49 -27
  23. package/dist/server/plugin.js +71 -16
  24. package/dist/server/queue-scanner.js +141 -0
  25. package/dist/server/utils/node.js +30 -2
  26. package/dist/server/utils/versionManager.js +91 -0
  27. package/package.json +9 -5
  28. package/server.js +1 -0
  29. package/src/client/AclCacheManager.tsx +292 -287
  30. package/src/client/CacheMonitor.tsx +166 -179
  31. package/src/client/ClusterManagerLayout.tsx +54 -42
  32. package/src/client/ClusterNodes.tsx +698 -418
  33. package/src/client/ContainerOrchestrator.tsx +184 -102
  34. package/src/client/Doctor.tsx +559 -0
  35. package/src/client/NginxCacheManager.tsx +415 -0
  36. package/src/client/PluginOperations.tsx +234 -234
  37. package/src/client/QueueAssignment.tsx +355 -0
  38. package/src/client/TaskManager.tsx +194 -187
  39. package/src/client/WorkflowExecutions.tsx +243 -238
  40. package/src/client/index.tsx +22 -14
  41. package/src/client/utils/clientSafeCache.ts +41 -0
  42. package/src/client/utils/requestDedupInterceptor.ts +213 -0
  43. package/src/client-v2/plugin.tsx +24 -0
  44. package/src/locale/en-US.json +138 -28
  45. package/src/locale/vi-VN.json +139 -28
  46. package/src/locale/zh-CN.json +140 -28
  47. package/src/server/__tests__/doctor.test.ts +53 -0
  48. package/src/server/actions/acl-cache.ts +272 -272
  49. package/src/server/actions/cache-monitor.ts +453 -116
  50. package/src/server/actions/cluster-nodes.ts +878 -378
  51. package/src/server/actions/doctor.ts +1536 -0
  52. package/src/server/actions/orchestrator.ts +54 -2
  53. package/src/server/actions/queue-mappings.ts +94 -0
  54. package/src/server/collections/cluster-manager-doctor-runs.ts +23 -0
  55. package/src/server/collections/cluster-manager-doctor.ts +19 -0
  56. package/src/server/collections/worker-queue-mappings.ts +85 -0
  57. package/src/server/hooks/cacheInvalidationHooks.ts +58 -0
  58. package/src/server/middlewares/listMetaCacheMiddleware.ts +55 -0
  59. package/src/server/orchestrator/PackageManager.ts +20 -24
  60. package/src/server/orchestrator/docker-adapter.ts +74 -37
  61. package/src/server/plugin.ts +347 -270
  62. package/src/server/queue-scanner.ts +154 -0
  63. package/src/server/utils/node.ts +48 -0
  64. package/src/server/utils/versionManager.ts +69 -0
  65. package/dist/client/AclCacheManager.d.ts +0 -2
  66. package/dist/client/CacheMonitor.d.ts +0 -2
  67. package/dist/client/ClusterManagerLayout.d.ts +0 -2
  68. package/dist/client/ClusterNodes.d.ts +0 -2
  69. package/dist/client/ContainerOrchestrator.d.ts +0 -2
  70. package/dist/client/EventQueueMonitor.d.ts +0 -2
  71. package/dist/client/LockMonitor.d.ts +0 -2
  72. package/dist/client/PackageInstaller.d.ts +0 -2
  73. package/dist/client/PluginOperations.d.ts +0 -2
  74. package/dist/client/RedisMonitor.d.ts +0 -2
  75. package/dist/client/TaskManager.d.ts +0 -2
  76. package/dist/client/WorkflowExecutions.d.ts +0 -2
  77. package/dist/client/index.d.ts +0 -5
  78. package/dist/client/utils.d.ts +0 -12
  79. package/dist/index.d.ts +0 -2
  80. package/dist/server/actions/acl-cache.d.ts +0 -53
  81. package/dist/server/actions/cache-monitor.d.ts +0 -23
  82. package/dist/server/actions/cluster-nodes.d.ts +0 -49
  83. package/dist/server/actions/event-queue-monitor.d.ts +0 -13
  84. package/dist/server/actions/lock-monitor.d.ts +0 -19
  85. package/dist/server/actions/orchestrator.d.ts +0 -58
  86. package/dist/server/actions/package-manager.d.ts +0 -6
  87. package/dist/server/actions/plugin-operations.d.ts +0 -6
  88. package/dist/server/actions/redis-monitor.d.ts +0 -12
  89. package/dist/server/actions/tasks.d.ts +0 -7
  90. package/dist/server/actions/workflow-executions.d.ts +0 -7
  91. package/dist/server/adapters/redis-lock-adapter.d.ts +0 -15
  92. package/dist/server/adapters/redis-node-registry.d.ts +0 -12
  93. package/dist/server/adapters/redis-pubsub-adapter.d.ts +0 -16
  94. package/dist/server/collections/app.d.ts +0 -8
  95. package/dist/server/collections/cluster-manager-acl-cache.d.ts +0 -22
  96. package/dist/server/collections/cluster-manager-cache-mgr.d.ts +0 -22
  97. package/dist/server/collections/cluster-manager-cluster.d.ts +0 -22
  98. package/dist/server/collections/cluster-manager-lock.d.ts +0 -22
  99. package/dist/server/collections/cluster-manager-plugins.d.ts +0 -18
  100. package/dist/server/collections/cluster-manager-queue.d.ts +0 -22
  101. package/dist/server/collections/cluster-manager-redis.d.ts +0 -22
  102. package/dist/server/collections/cluster-manager-workflow.d.ts +0 -22
  103. package/dist/server/collections/cluster-manager.d.ts +0 -22
  104. package/dist/server/collections/orchestrator-settings.d.ts +0 -59
  105. package/dist/server/collections/orchestrator-stacks.d.ts +0 -102
  106. package/dist/server/collections/worker-orchestrator.d.ts +0 -22
  107. package/dist/server/collections/worker-packages-configs.d.ts +0 -3
  108. package/dist/server/collections/worker-packages.d.ts +0 -22
  109. package/dist/server/orchestrator/PackageManager.d.ts +0 -39
  110. package/dist/server/orchestrator/docker-adapter.d.ts +0 -41
  111. package/dist/server/orchestrator/index.d.ts +0 -4
  112. package/dist/server/orchestrator/k8s-adapter.d.ts +0 -50
  113. package/dist/server/orchestrator/leader-election.d.ts +0 -48
  114. package/dist/server/orchestrator/types.d.ts +0 -84
  115. package/dist/server/plugin.d.ts +0 -26
  116. package/dist/server/utils/node.d.ts +0 -6
  117. package/dist/server/utils/redis.d.ts +0 -29
  118. package/dist/shared/packages.d.ts +0 -23
  119. /package/{dist/server/index.d.ts → src/client-v2/index.tsx} +0 -0
@@ -1,378 +1,878 @@
1
- import { Context } from '@nocobase/actions';
2
- import { AppSupervisor } from '@nocobase/server';
3
- import os from 'os';
4
- import { promises as fsp } from 'fs';
5
- import path from 'path';
6
- import crypto from 'crypto';
7
- import { RedisNodeRegistry } from '../adapters/redis-node-registry';
8
- import { getRedis } from '../utils/redis';
9
- import { getLocalNodeId } from '../utils/node';
10
-
11
- const LOG_RESPONSE_KEY_PREFIX = 'cluster-manager:log-response:';
12
- const LOG_RESPONSE_TTL = 30; // seconds
13
-
14
- function sleep(ms: number) {
15
- return new Promise((resolve) => setTimeout(resolve, ms));
16
- }
17
-
18
- /**
19
- * Read the last N lines from the local system log file.
20
- * Extracted so it can be called from both the HTTP action and the PubSub subscriber.
21
- */
22
- export async function readLocalLogs(app: any, maxLines: number) {
23
- const logBasePath = process.env.LOGGER_BASE_PATH || path.resolve(process.cwd(), 'storage', 'logs');
24
- const appName = process.env.APP_NAME || app.name || 'main';
25
- const logDir = path.resolve(logBasePath, appName);
26
-
27
- let logFiles: string[] = [];
28
- try {
29
- const files = await fsp.readdir(logDir);
30
- logFiles = files
31
- .filter((f) => f.startsWith('system') && f.endsWith('.log') && !f.includes('error'))
32
- .sort()
33
- .reverse();
34
- } catch {
35
- // logDir doesn't exist or not readable
36
- }
37
-
38
- const nodeInfo = {
39
- hostname: os.hostname(),
40
- pid: process.pid,
41
- workerMode: process.env.WORKER_MODE || 'main',
42
- };
43
-
44
- if (logFiles.length === 0) {
45
- return { node: nodeInfo, lines: [] as string[], file: null };
46
- }
47
-
48
- const logFilePath = path.resolve(logDir, logFiles[0]);
49
- const result: string[] = [];
50
- try {
51
- const stat = await fsp.stat(logFilePath);
52
- const bufferSize = Math.min(stat.size, maxLines * 2048);
53
- const buffer = Buffer.alloc(bufferSize);
54
- const fh = await fsp.open(logFilePath, 'r');
55
- await fh.read(buffer, 0, bufferSize, Math.max(0, stat.size - bufferSize));
56
- await fh.close();
57
-
58
- const content = buffer.toString('utf8');
59
- const allLines = content.split('\n').filter((l) => l.trim());
60
- result.push(...allLines.slice(-maxLines));
61
- } catch {
62
- // File read error
63
- }
64
-
65
- return { node: nodeInfo, lines: result, file: logFiles[0] };
66
- }
67
-
68
-
69
- export const clusterActions = {
70
- /**
71
- * GET /clusterManagerCluster:current
72
- * Always returns info about the APP node (not workers).
73
- * If this request is handled by a worker, we look up the APP node from Redis.
74
- */
75
- async current(ctx: Context, next: () => Promise<void>) {
76
- const currentMode = process.env.WORKER_MODE || 'main';
77
- const isApp = currentMode === 'main' || currentMode === '' || currentMode === 'app';
78
-
79
- if (isApp) {
80
- // This process IS the APP node — return local data directly
81
- const mem = process.memoryUsage();
82
- ctx.body = {
83
- node: {
84
- hostname: os.hostname(),
85
- pid: process.pid,
86
- nodeVersion: process.version,
87
- platform: process.platform,
88
- arch: process.arch,
89
- uptime: process.uptime(),
90
- workerMode: currentMode,
91
- appPort: process.env.APP_PORT || '',
92
- clusterMode: process.env.CLUSTER_MODE || '',
93
- },
94
- memory: {
95
- rss: mem.rss,
96
- heapUsed: mem.heapUsed,
97
- heapTotal: mem.heapTotal,
98
- external: mem.external,
99
- arrayBuffers: mem.arrayBuffers || 0,
100
- },
101
- os: {
102
- totalMemory: os.totalmem(),
103
- freeMemory: os.freemem(),
104
- cpuCount: os.cpus().length,
105
- loadAvg: os.loadavg(),
106
- },
107
- };
108
- } else {
109
- // This process is a WORKER — find the APP node from Redis heartbeat data
110
- const plugin = (ctx.app as any).pm?.get?.('plugin-cluster-manager') as any;
111
- const registry = plugin?.nodeRegistry ?? new RedisNodeRegistry(ctx.app);
112
- const nodes = await registry.getNodes();
113
- const appNode = nodes.find(
114
- (n: any) => n.workerMode === 'main' || n.workerMode === '' || n.workerMode === 'app',
115
- );
116
-
117
- if (appNode?.nodeDetails) {
118
- ctx.body = appNode.nodeDetails;
119
- } else {
120
- // Fallback: return local data with a flag so the UI knows
121
- const mem = process.memoryUsage();
122
- ctx.body = {
123
- node: {
124
- hostname: os.hostname(),
125
- pid: process.pid,
126
- nodeVersion: process.version,
127
- platform: process.platform,
128
- arch: process.arch,
129
- uptime: process.uptime(),
130
- workerMode: currentMode,
131
- appPort: process.env.APP_PORT || '',
132
- clusterMode: process.env.CLUSTER_MODE || '',
133
- },
134
- memory: {
135
- rss: mem.rss,
136
- heapUsed: mem.heapUsed,
137
- heapTotal: mem.heapTotal,
138
- external: mem.external,
139
- arrayBuffers: mem.arrayBuffers || 0,
140
- },
141
- os: {
142
- totalMemory: os.totalmem(),
143
- freeMemory: os.freemem(),
144
- cpuCount: os.cpus().length,
145
- loadAvg: os.loadavg(),
146
- },
147
- _fallback: true,
148
- _note: 'APP node not found in Redis; showing responding worker data',
149
- };
150
- }
151
- }
152
-
153
- await next();
154
- },
155
-
156
- /**
157
- * GET /clusterManagerCluster:list
158
- * Returns all known cluster environments/nodes (if discovery adapter supports it)
159
- */
160
- async list(ctx: Context, next: () => Promise<void>) {
161
- const supervisor = AppSupervisor.getInstance();
162
- const environments: any[] = [];
163
-
164
- const plugin = (ctx.app as any).pm?.get?.('plugin-cluster-manager') as any;
165
- const registry = plugin?.nodeRegistry ?? new RedisNodeRegistry(ctx.app);
166
- const nodes = await registry.getNodes();
167
-
168
- if (nodes && nodes.length > 0) {
169
- for (const env of nodes) {
170
- environments.push({
171
- id: env.id || env.name,
172
- name: env.name,
173
- hostname: env.hostname || os.hostname(),
174
- url: env.url,
175
- available: env.available,
176
- appVersion: env.appVersion,
177
- lastHeartbeatAt: env.lastHeartbeatAt ? new Date(env.lastHeartbeatAt).toISOString() : null,
178
- status: env.status || 'online',
179
- workerMode: env.workerMode,
180
- isSandbox: env.isSandbox,
181
- pid: env.pid
182
- });
183
- }
184
- }
185
-
186
- // If no discovery adapter or empty, at least return current node
187
- if (environments.length === 0) {
188
- environments.push({
189
- name: os.hostname(),
190
- hostname: os.hostname(),
191
- url: null,
192
- available: true,
193
- appVersion: null,
194
- lastHeartbeatAt: new Date().toISOString(),
195
- status: 'online',
196
- });
197
- }
198
-
199
- ctx.body = { data: environments, meta: { count: environments.length } };
200
- await next();
201
- },
202
-
203
- /**
204
- * GET /clusterManagerCluster:health
205
- * Health check for all subsystems
206
- */
207
- async health(ctx: Context, next: () => Promise<void>) {
208
- const checks: Record<string, { status: string; latency?: number; detail?: string }> = {};
209
-
210
- // Redis check
211
- try {
212
- const redis = getRedis(ctx);
213
- if (redis) {
214
- const start = Date.now();
215
- await redis.ping();
216
- checks.redis = { status: 'ok', latency: Date.now() - start };
217
- } else {
218
- checks.redis = { status: 'not_configured' };
219
- }
220
- } catch (e: any) {
221
- checks.redis = { status: 'error', detail: e.message };
222
- }
223
-
224
- // Database check
225
- try {
226
- const start = Date.now();
227
- await ctx.db.sequelize.query('SELECT 1');
228
- checks.database = { status: 'ok', latency: Date.now() - start };
229
- } catch (e: any) {
230
- checks.database = { status: 'error', detail: e.message };
231
- }
232
-
233
- // PubSub check
234
- try {
235
- const connected = await ctx.app.pubSubManager?.isConnected();
236
- const pubSubAdapter = (ctx.app.pubSubManager as any)?.adapter;
237
- checks.pubsub = {
238
- status: connected ? 'connected' : 'disconnected',
239
- detail: pubSubAdapter?.constructor?.name || 'no adapter',
240
- };
241
- } catch (e: any) {
242
- checks.pubsub = { status: 'error', detail: e.message };
243
- }
244
-
245
- // Event Queue check
246
- try {
247
- const connected = ctx.app.eventQueue?.isConnected();
248
- const adapterType = (ctx.app.eventQueue as any)?.adapter?.constructor?.name || 'unknown';
249
- checks.eventQueue = {
250
- status: connected ? 'connected' : 'disconnected',
251
- detail: adapterType,
252
- };
253
- } catch (e: any) {
254
- checks.eventQueue = { status: 'error', detail: e.message };
255
- }
256
-
257
- // Lock Manager check
258
- try {
259
- const lockOptions = (ctx.app.lockManager as any)?.options;
260
- const adapterType = lockOptions?.defaultAdapter || 'local';
261
- checks.lockManager = { status: 'ok', detail: `adapter: ${adapterType}` };
262
- } catch (e: any) {
263
- checks.lockManager = { status: 'error', detail: e.message };
264
- }
265
-
266
- // Cache check
267
- try {
268
- const defaultStore = ctx.app.cacheManager?.defaultStore || 'memory';
269
- checks.cache = { status: 'ok', detail: `store: ${defaultStore}` };
270
- } catch (e: any) {
271
- checks.cache = { status: 'error', detail: e.message };
272
- }
273
-
274
- const allOk = Object.values(checks).every(
275
- (c) => c.status === 'ok' || c.status === 'connected' || c.status === 'not_configured',
276
- );
277
-
278
- ctx.body = { healthy: allOk, checks };
279
- await next();
280
- },
281
-
282
- /**
283
- * POST /clusterManagerCluster:restart
284
- * Publishes a restart signal to target nodes orchestrating a soft NocoBase restart or a hard docker daemon rebirth
285
- */
286
- async restart(ctx: Context, next: () => Promise<void>) {
287
- const { hostname, mode = 'hard' } = ctx.action.params.values || ctx.action.params;
288
- if (!hostname) ctx.throw(400, 'Hostname required');
289
-
290
- // NocoBase initializes pubSubManager ONLY IF PUBSUB_ADAPTER_REDIS_URL is provided natively.
291
- if ((ctx.app as any).pubSubManager) {
292
- await (ctx.app as any).pubSubManager.publish('cluster-manager:restart', JSON.stringify({ hostname, mode }));
293
- ctx.body = { success: true, target: hostname, mode };
294
- } else {
295
- ctx.throw(500, 'PubSub manager is not initialized. HA requires PUBSUB_ADAPTER_REDIS_URL to be set.');
296
- }
297
- await next();
298
- },
299
-
300
- /**
301
- * GET /clusterManagerCluster:logs?targetNodeId=xxx&lines=200
302
- *
303
- * HA-aware log viewer. Reads logs from a specific node in the cluster.
304
- *
305
- * Flow:
306
- * 1. If targetNodeId matches current node (or is empty) → read local FS directly
307
- * 2. Otherwise publish a log request via PubSub → target node reads its local FS
308
- * and writes the result to a Redis key → this handler polls Redis until the
309
- * response arrives (max 10s) → returns it to the client
310
- */
311
- async logs(ctx: Context, next: () => Promise<void>) {
312
- const { lines = 200, targetNodeId } = ctx.action.params;
313
- const maxLines = Math.min(Number(lines) || 200, 1000);
314
- const myNodeId = getLocalNodeId(ctx.app);
315
-
316
- // ── Case 1: Local read (no target specified, or target is this node) ──
317
- if (!targetNodeId || targetNodeId === myNodeId) {
318
- ctx.body = await readLocalLogs(ctx.app, maxLines);
319
- await next();
320
- return;
321
- }
322
-
323
- // ── Case 2: Remote read via PubSub → Redis response pattern ──
324
- const redis = getRedis(ctx);
325
- const pubSub = (ctx.app as any).pubSubManager;
326
-
327
- if (!redis || !pubSub) {
328
- // No HA infrastructure — fall back to local logs with a warning
329
- const localResult = await readLocalLogs(ctx.app, maxLines);
330
- (localResult as any)._fallback = true;
331
- (localResult as any)._note = `PubSub/Redis not available; showing logs from local node instead of ${targetNodeId}`;
332
- ctx.body = localResult;
333
- await next();
334
- return;
335
- }
336
-
337
- // Generate a unique request ID for the response channel
338
- const requestId = crypto.randomBytes(8).toString('hex');
339
- const responseKey = `${LOG_RESPONSE_KEY_PREFIX}${requestId}`;
340
-
341
- // Publish the log request — ONLY the target node is subscribed to this specific channel
342
- await pubSub.publish(
343
- `cluster-manager:log-request:${targetNodeId}`,
344
- JSON.stringify({ requestId, targetNodeId, lines: maxLines }),
345
- );
346
-
347
- // Poll Redis for the response (200ms interval, max 10s = 50 iterations)
348
- let responseData: any = null;
349
- for (let i = 0; i < 50; i++) {
350
- await sleep(200);
351
- try {
352
- const raw = await redis.sendCommand(['GET', responseKey]);
353
- if (raw) {
354
- responseData = JSON.parse(raw);
355
- // Clean up the response key immediately
356
- redis.sendCommand(['DEL', responseKey]).catch(() => {});
357
- break;
358
- }
359
- } catch {
360
- // Parse error or Redis error — continue polling
361
- }
362
- }
363
-
364
- if (responseData) {
365
- ctx.body = responseData;
366
- } else {
367
- // Timeout — target node may be unreachable
368
- ctx.body = {
369
- node: { hostname: 'unknown', pid: null, workerMode: 'unknown', id: targetNodeId },
370
- lines: [],
371
- file: null,
372
- _error: `Timeout waiting for logs from ${targetNodeId}. Node may be offline or PubSub is not connected.`,
373
- };
374
- }
375
-
376
- await next();
377
- },
378
- };
1
+ import { Context } from '@nocobase/actions';
2
+ import os from 'os';
3
+ import { promises as fsp } from 'fs';
4
+ import path from 'path';
5
+ import crypto from 'crypto';
6
+ import { RedisNodeRegistry } from '../adapters/redis-node-registry';
7
+ import { getRedis } from '../utils/redis';
8
+ import { getLocalNodeId, getNodeRoleFrom, isWorkerMode } from '../utils/node';
9
+ import { packagesFromConfig, type CustomPackageMap, type WorkerPackageMap } from '../../shared/packages';
10
+
11
+ const LOG_RESPONSE_KEY_PREFIX = 'cluster-manager:log-response:';
12
+ const LEGACY_MULTI_APP_PLUGINS = ['multi-app-manager', 'multi-app-share-collection'];
13
+
14
+ interface ClusterNodeRecord {
15
+ id?: string;
16
+ name?: string;
17
+ hostname?: string;
18
+ appVersion?: string;
19
+ workerMode?: string;
20
+ isSandbox?: boolean;
21
+ status?: string;
22
+ url?: string | null;
23
+ available?: boolean;
24
+ lastHeartbeatAt?: number;
25
+ pid?: number;
26
+ nodeDetails?: {
27
+ node?: {
28
+ nodeVersion?: string;
29
+ platform?: string;
30
+ arch?: string;
31
+ };
32
+ };
33
+ }
34
+
35
+ interface PackageStatus {
36
+ initStatus?: string;
37
+ initProgressPercent?: number;
38
+ initProgressLog?: string;
39
+ lastInitAt?: string | Date;
40
+ lastInitLog?: string;
41
+ packageWhitelist?:
42
+ | string
43
+ | {
44
+ apt?: string[];
45
+ npm?: string[];
46
+ node?: string[];
47
+ python?: string[];
48
+ };
49
+ }
50
+
51
+ interface NormalizedPackages {
52
+ apt: string[];
53
+ npm: string[];
54
+ python: string[];
55
+ }
56
+
57
+ function sleep(ms: number) {
58
+ return new Promise((resolve) => setTimeout(resolve, ms));
59
+ }
60
+
61
+ function normalizeList(value: unknown): string[] {
62
+ if (!Array.isArray(value)) return [];
63
+ return Array.from(
64
+ new Set(
65
+ value
66
+ .filter((item) => typeof item === 'string')
67
+ .map((item) => item.trim())
68
+ .filter(Boolean),
69
+ ),
70
+ );
71
+ }
72
+
73
+ function normalizePackageMap(packages?: WorkerPackageMap): NormalizedPackages {
74
+ return {
75
+ apt: normalizeList(packages?.apt),
76
+ npm: normalizeList(packages?.npm),
77
+ python: normalizeList(packages?.python),
78
+ };
79
+ }
80
+
81
+ function parseCustomPackages(value: unknown): CustomPackageMap {
82
+ if (!value) {
83
+ return { python: [], node: [], npm: [] };
84
+ }
85
+
86
+ let customValue = value;
87
+ if (typeof customValue === 'string') {
88
+ try {
89
+ customValue = JSON.parse(customValue);
90
+ } catch {
91
+ return { python: [], node: [], npm: [] };
92
+ }
93
+ }
94
+
95
+ if (!customValue || typeof customValue !== 'object' || Array.isArray(customValue)) {
96
+ return { python: [], node: [], npm: [] };
97
+ }
98
+
99
+ const custom = customValue as {
100
+ python?: unknown;
101
+ node?: unknown;
102
+ npm?: unknown;
103
+ };
104
+
105
+ return {
106
+ python: normalizeList(custom.python),
107
+ node: normalizeList(custom.node),
108
+ npm: normalizeList(custom.npm),
109
+ };
110
+ }
111
+
112
+ function parsePackageWhitelist(status?: PackageStatus | null): NormalizedPackages {
113
+ if (!status?.packageWhitelist) {
114
+ return { apt: [], npm: [], python: [] };
115
+ }
116
+
117
+ let whitelistValue: unknown = status.packageWhitelist;
118
+ if (typeof whitelistValue === 'string') {
119
+ try {
120
+ whitelistValue = JSON.parse(whitelistValue);
121
+ } catch {
122
+ return { apt: [], npm: [], python: [] };
123
+ }
124
+ }
125
+
126
+ if (!whitelistValue || typeof whitelistValue !== 'object' || Array.isArray(whitelistValue)) {
127
+ return { apt: [], npm: [], python: [] };
128
+ }
129
+
130
+ const whitelist = whitelistValue as {
131
+ apt?: string[];
132
+ npm?: string[];
133
+ node?: string[];
134
+ python?: string[];
135
+ };
136
+ const npmPackages = [
137
+ ...(Array.isArray(whitelist.npm) ? whitelist.npm : []),
138
+ ...(Array.isArray(whitelist.node) ? whitelist.node : []),
139
+ ];
140
+
141
+ return {
142
+ apt: normalizeList(whitelist.apt),
143
+ npm: normalizeList(npmPackages),
144
+ python: normalizeList(whitelist.python),
145
+ };
146
+ }
147
+
148
+ function diffPackages(expected: NormalizedPackages, installed: NormalizedPackages): NormalizedPackages {
149
+ return {
150
+ apt: expected.apt.filter((pkg) => !installed.apt.includes(pkg)),
151
+ npm: expected.npm.filter((pkg) => !installed.npm.includes(pkg)),
152
+ python: expected.python.filter((pkg) => !installed.python.includes(pkg)),
153
+ };
154
+ }
155
+
156
+ function hasMissingPackages(packages: NormalizedPackages): boolean {
157
+ return packages.apt.length > 0 || packages.npm.length > 0 || packages.python.length > 0;
158
+ }
159
+
160
+ function getErrorMessage(error: unknown) {
161
+ return error instanceof Error ? error.message : String(error);
162
+ }
163
+
164
+ function getNodeRole(node: ClusterNodeRecord): 'app' | 'worker' | 'sandbox' {
165
+ return getNodeRoleFrom({ workerMode: node.workerMode, isSandbox: node.isSandbox });
166
+ }
167
+
168
+ function getReferenceVersion(nodes: ClusterNodeRecord[]) {
169
+ const appNode = nodes.find((node) => getNodeRole(node) === 'app' && node.appVersion);
170
+ if (appNode?.appVersion) {
171
+ return appNode.appVersion;
172
+ }
173
+
174
+ const counts = new Map<string, number>();
175
+ for (const node of nodes) {
176
+ if (!node.appVersion) continue;
177
+ counts.set(node.appVersion, (counts.get(node.appVersion) || 0) + 1);
178
+ }
179
+
180
+ return [...counts.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] || null;
181
+ }
182
+
183
+ async function getClusterNodes(ctx: Context): Promise<ClusterNodeRecord[]> {
184
+ const plugin = (ctx.app as any).pm?.get?.('plugin-cluster-manager') as any;
185
+ const registry = plugin?.nodeRegistry ?? new RedisNodeRegistry(ctx.app);
186
+ return registry.getNodes();
187
+ }
188
+
189
+ async function getExpectedPackages(ctx: Context): Promise<NormalizedPackages> {
190
+ const repo = ctx.db.getRepository('workerPackagesConfigs');
191
+ const config = await repo?.findOne?.();
192
+ if (!config) {
193
+ return normalizePackageMap(packagesFromConfig({}));
194
+ }
195
+
196
+ const configured = packagesFromConfig({
197
+ aptPackages: config.get('aptPackages'),
198
+ pythonPackages: config.get('pythonPackages'),
199
+ npmPackages: config.get('npmPackages'),
200
+ });
201
+ const custom = parseCustomPackages(config.get('customPackages'));
202
+
203
+ return normalizePackageMap({
204
+ apt: configured.apt,
205
+ npm: [...(configured.npm || []), ...(custom.node || []), ...(custom.npm || [])],
206
+ python: [...(configured.python || []), ...(custom.python || [])],
207
+ });
208
+ }
209
+
210
+ async function readPackageStatus(ctx: Context, node: ClusterNodeRecord): Promise<PackageStatus | null> {
211
+ const redis = getRedis(ctx);
212
+ if (!redis) return null;
213
+
214
+ const keys = [
215
+ node.id ? `cluster-manager:pkg-status:${node.id}` : null,
216
+ node.hostname ? `orchestrator:pkg-status:${node.hostname}` : null,
217
+ node.name ? `orchestrator:pkg-status:${node.name}` : null,
218
+ ].filter(Boolean) as string[];
219
+
220
+ for (const key of keys) {
221
+ try {
222
+ const raw = await redis.sendCommand(['GET', key]);
223
+ if (raw && typeof raw === 'string') {
224
+ return JSON.parse(raw);
225
+ }
226
+ } catch {
227
+ // Try the next key.
228
+ }
229
+ }
230
+
231
+ return null;
232
+ }
233
+
234
+ async function getApplicationPluginRows(ctx: Context) {
235
+ const repo = ctx.db.getRepository('applicationPlugins');
236
+ if (!repo) return [];
237
+ const rows = await repo.find({ sort: ['name'] });
238
+ return rows.map((row: any) => row.toJSON());
239
+ }
240
+
241
+ function getPayload(ctx: Context) {
242
+ return (ctx.action.params.values || (ctx as any).request?.body?.values || (ctx as any).request?.body || {}) as any;
243
+ }
244
+
245
+ /**
246
+ * Read the last N lines from the local system log file.
247
+ * Extracted so it can be called from both the HTTP action and the PubSub subscriber.
248
+ */
249
+ export async function readLocalLogs(app: any, maxLines: number) {
250
+ const logBasePath = process.env.LOGGER_BASE_PATH || path.resolve(process.cwd(), 'storage', 'logs');
251
+ const appName = process.env.APP_NAME || app.name || 'main';
252
+ const logDir = path.resolve(logBasePath, appName);
253
+
254
+ let logFiles: string[] = [];
255
+ try {
256
+ const files = await fsp.readdir(logDir);
257
+ logFiles = files
258
+ .filter((f) => f.startsWith('system') && f.endsWith('.log') && !f.includes('error'))
259
+ .sort()
260
+ .reverse();
261
+ } catch {
262
+ // logDir doesn't exist or not readable
263
+ }
264
+
265
+ const nodeInfo = {
266
+ hostname: os.hostname(),
267
+ pid: process.pid,
268
+ workerMode: process.env.WORKER_MODE || 'main',
269
+ };
270
+
271
+ if (logFiles.length === 0) {
272
+ return { node: nodeInfo, lines: [] as string[], file: null };
273
+ }
274
+
275
+ const logFilePath = path.resolve(logDir, logFiles[0]);
276
+ const result: string[] = [];
277
+ try {
278
+ const stat = await fsp.stat(logFilePath);
279
+ const bufferSize = Math.min(stat.size, maxLines * 2048);
280
+ const buffer = Buffer.alloc(bufferSize);
281
+ const fh = await fsp.open(logFilePath, 'r');
282
+ await fh.read(buffer, 0, bufferSize, Math.max(0, stat.size - bufferSize));
283
+ await fh.close();
284
+
285
+ const content = buffer.toString('utf8');
286
+ const allLines = content.split('\n').filter((l) => l.trim());
287
+ result.push(...allLines.slice(-maxLines));
288
+ } catch {
289
+ // File read error
290
+ }
291
+
292
+ return { node: nodeInfo, lines: result, file: logFiles[0] };
293
+ }
294
+
295
+ export const clusterActions = {
296
+ /**
297
+ * GET /clusterManagerCluster:current
298
+ * Always returns info about the APP node (not workers).
299
+ * If this request is handled by a worker, we look up the APP node from Redis.
300
+ */
301
+ async current(ctx: Context, next: () => Promise<void>) {
302
+ const currentMode = process.env.WORKER_MODE || 'main';
303
+ const isApp = !isWorkerMode(process.env.WORKER_MODE);
304
+
305
+ if (isApp) {
306
+ // This process IS the APP node return local data directly
307
+ const mem = process.memoryUsage();
308
+ ctx.body = {
309
+ node: {
310
+ hostname: os.hostname(),
311
+ pid: process.pid,
312
+ nodeVersion: process.version,
313
+ platform: process.platform,
314
+ arch: process.arch,
315
+ uptime: process.uptime(),
316
+ workerMode: currentMode,
317
+ appPort: process.env.APP_PORT || '',
318
+ clusterMode: process.env.CLUSTER_MODE || '',
319
+ },
320
+ memory: {
321
+ rss: mem.rss,
322
+ heapUsed: mem.heapUsed,
323
+ heapTotal: mem.heapTotal,
324
+ external: mem.external,
325
+ arrayBuffers: mem.arrayBuffers || 0,
326
+ },
327
+ os: {
328
+ totalMemory: os.totalmem(),
329
+ freeMemory: os.freemem(),
330
+ cpuCount: os.cpus().length,
331
+ loadAvg: os.loadavg(),
332
+ },
333
+ };
334
+ } else {
335
+ // This process is a WORKER — find the APP node from Redis heartbeat data
336
+ const plugin = (ctx.app as any).pm?.get?.('plugin-cluster-manager') as any;
337
+ const registry = plugin?.nodeRegistry ?? new RedisNodeRegistry(ctx.app);
338
+ const nodes = await registry.getNodes();
339
+ const appNode = nodes.find((n: any) => n.workerMode === 'main' || n.workerMode === '' || n.workerMode === 'app');
340
+
341
+ if (appNode?.nodeDetails) {
342
+ ctx.body = appNode.nodeDetails;
343
+ } else {
344
+ // Fallback: return local data with a flag so the UI knows
345
+ const mem = process.memoryUsage();
346
+ ctx.body = {
347
+ node: {
348
+ hostname: os.hostname(),
349
+ pid: process.pid,
350
+ nodeVersion: process.version,
351
+ platform: process.platform,
352
+ arch: process.arch,
353
+ uptime: process.uptime(),
354
+ workerMode: currentMode,
355
+ appPort: process.env.APP_PORT || '',
356
+ clusterMode: process.env.CLUSTER_MODE || '',
357
+ },
358
+ memory: {
359
+ rss: mem.rss,
360
+ heapUsed: mem.heapUsed,
361
+ heapTotal: mem.heapTotal,
362
+ external: mem.external,
363
+ arrayBuffers: mem.arrayBuffers || 0,
364
+ },
365
+ os: {
366
+ totalMemory: os.totalmem(),
367
+ freeMemory: os.freemem(),
368
+ cpuCount: os.cpus().length,
369
+ loadAvg: os.loadavg(),
370
+ },
371
+ _fallback: true,
372
+ _note: 'APP node not found in Redis; showing responding worker data',
373
+ };
374
+ }
375
+ }
376
+
377
+ await next();
378
+ },
379
+
380
+ /**
381
+ * GET /clusterManagerCluster:list
382
+ * Returns all known cluster environments/nodes (if discovery adapter supports it)
383
+ */
384
+ async list(ctx: Context, next: () => Promise<void>) {
385
+ const environments: any[] = [];
386
+
387
+ const nodes = await getClusterNodes(ctx);
388
+
389
+ if (nodes && nodes.length > 0) {
390
+ for (const env of nodes) {
391
+ environments.push({
392
+ id: env.id || env.name,
393
+ name: env.name,
394
+ hostname: env.hostname || os.hostname(),
395
+ url: env.url,
396
+ available: env.available,
397
+ appVersion: env.appVersion,
398
+ lastHeartbeatAt: env.lastHeartbeatAt ? new Date(env.lastHeartbeatAt).toISOString() : null,
399
+ status: env.status || 'online',
400
+ workerMode: env.workerMode,
401
+ isSandbox: env.isSandbox,
402
+ pid: env.pid,
403
+ });
404
+ }
405
+ }
406
+
407
+ // If no discovery adapter or empty, at least return current node
408
+ if (environments.length === 0) {
409
+ environments.push({
410
+ name: os.hostname(),
411
+ hostname: os.hostname(),
412
+ url: null,
413
+ available: true,
414
+ appVersion: null,
415
+ lastHeartbeatAt: new Date().toISOString(),
416
+ status: 'online',
417
+ });
418
+ }
419
+
420
+ ctx.body = { data: environments, meta: { count: environments.length } };
421
+ await next();
422
+ },
423
+
424
+ /**
425
+ * GET /clusterManagerCluster:drift
426
+ * Reports version/runtime/package drift across active cluster nodes.
427
+ */
428
+ async drift(ctx: Context, next: () => Promise<void>) {
429
+ const nodes = await getClusterNodes(ctx);
430
+ const referenceVersion = getReferenceVersion(nodes);
431
+ const expectedPackages = await getExpectedPackages(ctx);
432
+
433
+ const versionDrifts = nodes
434
+ .filter((node) => node.status !== 'offline')
435
+ .filter((node) => referenceVersion && node.appVersion && node.appVersion !== referenceVersion)
436
+ .map((node) => ({
437
+ id: node.id,
438
+ name: node.name,
439
+ hostname: node.hostname,
440
+ role: getNodeRole(node),
441
+ expectedVersion: referenceVersion,
442
+ actualVersion: node.appVersion,
443
+ }));
444
+
445
+ const runtimeReference = nodes.find((node) => getNodeRole(node) === 'app')?.nodeDetails?.node;
446
+ const runtimeDrifts = runtimeReference
447
+ ? nodes
448
+ .filter((node) => node.status !== 'offline')
449
+ .filter((node) => {
450
+ const runtime = node.nodeDetails?.node;
451
+ if (!runtime) return false;
452
+ return (
453
+ runtime.nodeVersion !== runtimeReference.nodeVersion ||
454
+ runtime.platform !== runtimeReference.platform ||
455
+ runtime.arch !== runtimeReference.arch
456
+ );
457
+ })
458
+ .map((node) => ({
459
+ id: node.id,
460
+ name: node.name,
461
+ hostname: node.hostname,
462
+ role: getNodeRole(node),
463
+ expected: {
464
+ nodeVersion: runtimeReference.nodeVersion,
465
+ platform: runtimeReference.platform,
466
+ arch: runtimeReference.arch,
467
+ },
468
+ actual: {
469
+ nodeVersion: node.nodeDetails?.node?.nodeVersion,
470
+ platform: node.nodeDetails?.node?.platform,
471
+ arch: node.nodeDetails?.node?.arch,
472
+ },
473
+ }))
474
+ : [];
475
+
476
+ const packageDrifts: Array<Record<string, unknown>> = [];
477
+ for (const node of nodes.filter((item) => item.status !== 'offline' && getNodeRole(item) !== 'app')) {
478
+ const status = await readPackageStatus(ctx, node);
479
+ const installedPackages = parsePackageWhitelist(status);
480
+ const missingPackages = diffPackages(expectedPackages, installedPackages);
481
+ const hasPackageStatus = Boolean(status);
482
+ const statusOk = status?.initStatus === 'succeeded';
483
+ if (!hasPackageStatus || !statusOk || hasMissingPackages(missingPackages)) {
484
+ packageDrifts.push({
485
+ id: node.id,
486
+ name: node.name,
487
+ hostname: node.hostname,
488
+ role: getNodeRole(node),
489
+ status: status?.initStatus || 'unknown',
490
+ lastInitAt: status?.lastInitAt || null,
491
+ missingPackages,
492
+ installedPackages,
493
+ initProgressLog: status?.initProgressLog || '',
494
+ });
495
+ }
496
+ }
497
+
498
+ ctx.body = {
499
+ healthy: versionDrifts.length === 0 && runtimeDrifts.length === 0 && packageDrifts.length === 0,
500
+ referenceVersion,
501
+ expectedPackages,
502
+ versionDrifts,
503
+ runtimeDrifts,
504
+ packageDrifts,
505
+ checkedAt: new Date().toISOString(),
506
+ summary: {
507
+ nodes: nodes.length,
508
+ versionDrifts: versionDrifts.length,
509
+ runtimeDrifts: runtimeDrifts.length,
510
+ packageDrifts: packageDrifts.length,
511
+ },
512
+ };
513
+ await next();
514
+ },
515
+
516
+ /**
517
+ * GET /clusterManagerCluster:legacyDiagnostics
518
+ * Detects deprecated legacy multi-app plugins and leftover application records.
519
+ */
520
+ async legacyDiagnostics(ctx: Context, next: () => Promise<void>) {
521
+ const rows = await getApplicationPluginRows(ctx);
522
+ const plugins = LEGACY_MULTI_APP_PLUGINS.map((name) => {
523
+ const row = rows.find((item: any) => item.name === name || item.packageName === `@nocobase/plugin-${name}`);
524
+ const loaded = Boolean(
525
+ (ctx.app as any).pm?.get?.(name) || (ctx.app as any).pm?.get?.(`@nocobase/plugin-${name}`),
526
+ );
527
+ return {
528
+ name,
529
+ packageName: `@nocobase/plugin-${name}`,
530
+ installed: Boolean(row),
531
+ enabled: Boolean(row?.enabled),
532
+ loaded,
533
+ version: row?.version,
534
+ };
535
+ });
536
+
537
+ let legacyApplicationCount = 0;
538
+ if (ctx.db.hasCollection?.('applications')) {
539
+ try {
540
+ legacyApplicationCount = await ctx.db.getRepository('applications').count();
541
+ } catch {
542
+ legacyApplicationCount = 0;
543
+ }
544
+ }
545
+
546
+ const findings = [];
547
+ const manager = plugins.find((plugin) => plugin.name === 'multi-app-manager');
548
+ const shareCollection = plugins.find((plugin) => plugin.name === 'multi-app-share-collection');
549
+ const appSupervisor = rows.find(
550
+ (item: any) => item.name === 'app-supervisor' || item.packageName === '@nocobase/plugin-app-supervisor',
551
+ );
552
+
553
+ if (manager?.enabled || manager?.loaded) {
554
+ findings.push({
555
+ level: 'warning',
556
+ code: 'legacy_multi_app_manager_active',
557
+ messageKey:
558
+ 'Deprecated multi-app manager is active. It runs apps in shared process memory and should not be used for production cluster isolation.',
559
+ message:
560
+ 'Deprecated multi-app manager is active. It runs apps in shared process memory and should not be used for production cluster isolation.',
561
+ });
562
+ }
563
+
564
+ if (shareCollection?.enabled || shareCollection?.loaded) {
565
+ findings.push({
566
+ level: 'warning',
567
+ code: 'legacy_share_collection_active',
568
+ messageKey:
569
+ 'Deprecated multi-app share collection is active. Avoid schema/table sharing for new cluster deployments.',
570
+ message:
571
+ 'Deprecated multi-app share collection is active. Avoid schema/table sharing for new cluster deployments.',
572
+ });
573
+ }
574
+
575
+ if (legacyApplicationCount > 0) {
576
+ findings.push({
577
+ level: 'warning',
578
+ code: 'legacy_app_records_found',
579
+ messageKey: '{count} legacy application record(s) were found in the applications collection.',
580
+ messageArgs: { count: legacyApplicationCount },
581
+ message: `${legacyApplicationCount} legacy application record(s) were found in the applications collection.`,
582
+ });
583
+ }
584
+
585
+ if (!appSupervisor?.enabled) {
586
+ findings.push({
587
+ level: 'info',
588
+ code: 'app_supervisor_not_enabled',
589
+ messageKey:
590
+ 'App Supervisor is not enabled. Use it for new multi-application management instead of deprecated multi-app plugins.',
591
+ message:
592
+ 'App Supervisor is not enabled. Use it for new multi-application management instead of deprecated multi-app plugins.',
593
+ });
594
+ }
595
+
596
+ ctx.body = {
597
+ healthy: findings.every((finding) => finding.level !== 'warning'),
598
+ plugins,
599
+ appSupervisor: appSupervisor
600
+ ? {
601
+ installed: true,
602
+ enabled: Boolean(appSupervisor.enabled),
603
+ version: appSupervisor.version,
604
+ }
605
+ : { installed: false, enabled: false },
606
+ legacyApplicationCount,
607
+ findings,
608
+ };
609
+ await next();
610
+ },
611
+
612
+ /**
613
+ * GET /clusterManagerCluster:health
614
+ * Health check for all subsystems
615
+ */
616
+ async health(ctx: Context, next: () => Promise<void>) {
617
+ const checks: Record<string, { status: string; latency?: number; detail?: string }> = {};
618
+
619
+ // Redis check
620
+ try {
621
+ const redis = getRedis(ctx);
622
+ if (redis) {
623
+ const start = Date.now();
624
+ await redis.ping();
625
+ checks.redis = { status: 'ok', latency: Date.now() - start };
626
+ } else {
627
+ checks.redis = { status: 'not_configured' };
628
+ }
629
+ } catch (e: any) {
630
+ checks.redis = { status: 'error', detail: e.message };
631
+ }
632
+
633
+ // Database check
634
+ try {
635
+ const start = Date.now();
636
+ await ctx.db.sequelize.query('SELECT 1');
637
+ checks.database = { status: 'ok', latency: Date.now() - start };
638
+ } catch (e: any) {
639
+ checks.database = { status: 'error', detail: e.message };
640
+ }
641
+
642
+ // PubSub check
643
+ try {
644
+ const connected = await ctx.app.pubSubManager?.isConnected();
645
+ const pubSubAdapter = (ctx.app.pubSubManager as any)?.adapter;
646
+ checks.pubsub = {
647
+ status: connected ? 'connected' : 'disconnected',
648
+ detail: pubSubAdapter?.constructor?.name || 'no adapter',
649
+ };
650
+ } catch (e: any) {
651
+ checks.pubsub = { status: 'error', detail: e.message };
652
+ }
653
+
654
+ // Event Queue check
655
+ try {
656
+ const connected = ctx.app.eventQueue?.isConnected();
657
+ const adapterType = (ctx.app.eventQueue as any)?.adapter?.constructor?.name || 'unknown';
658
+ checks.eventQueue = {
659
+ status: connected ? 'connected' : 'disconnected',
660
+ detail: adapterType,
661
+ };
662
+ } catch (e: any) {
663
+ checks.eventQueue = { status: 'error', detail: e.message };
664
+ }
665
+
666
+ // Lock Manager check
667
+ try {
668
+ const lockOptions = (ctx.app.lockManager as any)?.options;
669
+ const adapterType = lockOptions?.defaultAdapter || 'local';
670
+ checks.lockManager = { status: 'ok', detail: `adapter: ${adapterType}` };
671
+ } catch (e: any) {
672
+ checks.lockManager = { status: 'error', detail: e.message };
673
+ }
674
+
675
+ // Cache check
676
+ try {
677
+ const defaultStore = ctx.app.cacheManager?.defaultStore || 'memory';
678
+ checks.cache = { status: 'ok', detail: `store: ${defaultStore}` };
679
+ } catch (e: any) {
680
+ checks.cache = { status: 'error', detail: e.message };
681
+ }
682
+
683
+ const allOk = Object.values(checks).every(
684
+ (c) => c.status === 'ok' || c.status === 'connected' || c.status === 'not_configured',
685
+ );
686
+
687
+ ctx.body = { healthy: allOk, checks };
688
+ await next();
689
+ },
690
+
691
+ /**
692
+ * POST /clusterManagerCluster:restart
693
+ * Publishes a restart signal to target nodes orchestrating a soft NocoBase restart or a hard docker daemon rebirth
694
+ */
695
+ async restart(ctx: Context, next: () => Promise<void>) {
696
+ const { hostname, mode = 'hard' } = ctx.action.params.values || ctx.action.params;
697
+ if (!hostname) ctx.throw(400, 'Hostname required');
698
+
699
+ // NocoBase initializes pubSubManager ONLY IF PUBSUB_ADAPTER_REDIS_URL is provided natively.
700
+ if ((ctx.app as any).pubSubManager) {
701
+ await (ctx.app as any).pubSubManager.publish('cluster-manager:restart', JSON.stringify({ hostname, mode }));
702
+ ctx.body = { success: true, target: hostname, mode };
703
+ } else {
704
+ ctx.throw(500, 'PubSub manager is not initialized. HA requires PUBSUB_ADAPTER_REDIS_URL to be set.');
705
+ }
706
+ await next();
707
+ },
708
+
709
+ /**
710
+ * POST /clusterManagerCluster:rollingRestart
711
+ * Restarts online nodes one-by-one, optionally filtered by role.
712
+ */
713
+ async rollingRestart(ctx: Context, next: () => Promise<void>) {
714
+ const payload = getPayload(ctx);
715
+ const mode = payload.mode === 'soft' ? 'soft' : 'hard';
716
+ const role = payload.role || 'worker';
717
+ const delayMs = Math.min(Math.max(Number(payload.delayMs) || 5000, 1000), 60000);
718
+ const requestedNodeIds = Array.isArray(payload.nodeIds) ? payload.nodeIds.map(String) : [];
719
+
720
+ const pubSub = (ctx.app as any).pubSubManager;
721
+ if (!pubSub) {
722
+ ctx.throw(500, 'PubSub manager is not initialized. HA requires PUBSUB_ADAPTER_REDIS_URL to be set.');
723
+ }
724
+
725
+ const nodes = (await getClusterNodes(ctx)).filter((node) => {
726
+ if (node.status === 'offline') return false;
727
+ if (requestedNodeIds.length > 0) return node.id && requestedNodeIds.includes(node.id);
728
+ if (role === 'all') return true;
729
+ return getNodeRole(node) === role;
730
+ });
731
+
732
+ if (nodes.length === 0) {
733
+ ctx.throw(404, 'No online nodes match the rolling restart target.');
734
+ }
735
+
736
+ const myNodeId = getLocalNodeId(ctx.app);
737
+ const sortedNodes = nodes.sort((a, b) => {
738
+ if (a.id === myNodeId) return 1;
739
+ if (b.id === myNodeId) return -1;
740
+ return String(a.name || a.id).localeCompare(String(b.name || b.id));
741
+ });
742
+
743
+ const restartId = crypto.randomBytes(8).toString('hex');
744
+ const startedAt = Date.now();
745
+ const logger = ctx.app.logger;
746
+ const published = sortedNodes.map((node, index) => ({
747
+ id: node.id,
748
+ name: node.name,
749
+ hostname: node.hostname,
750
+ role: getNodeRole(node),
751
+ mode,
752
+ order: index + 1,
753
+ scheduledDelayMs: index * delayMs,
754
+ scheduledAt: new Date(startedAt + index * delayMs).toISOString(),
755
+ }));
756
+
757
+ sortedNodes.forEach((node, index) => {
758
+ setTimeout(() => {
759
+ try {
760
+ const publishResult = pubSub.publish(
761
+ 'cluster-manager:restart',
762
+ JSON.stringify({
763
+ restartId,
764
+ targetNodeId: node.id,
765
+ hostname: node.hostname,
766
+ mode,
767
+ }),
768
+ );
769
+ Promise.resolve(publishResult).catch((error) => {
770
+ logger.error(
771
+ `[ClusterManager] Failed to publish rolling restart ${restartId} for ${
772
+ node.id || node.hostname
773
+ }: ${getErrorMessage(error)}`,
774
+ );
775
+ });
776
+ } catch (error) {
777
+ logger.error(
778
+ `[ClusterManager] Failed to schedule rolling restart ${restartId} for ${
779
+ node.id || node.hostname
780
+ }: ${getErrorMessage(error)}`,
781
+ );
782
+ }
783
+ }, index * delayMs);
784
+ });
785
+
786
+ ctx.body = {
787
+ success: true,
788
+ restartId,
789
+ mode,
790
+ role,
791
+ delayMs,
792
+ scheduled: true,
793
+ estimatedDurationMs: Math.max(0, (sortedNodes.length - 1) * delayMs),
794
+ published,
795
+ };
796
+ await next();
797
+ },
798
+
799
+ /**
800
+ * GET /clusterManagerCluster:logs?targetNodeId=xxx&lines=200
801
+ *
802
+ * HA-aware log viewer. Reads logs from a specific node in the cluster.
803
+ *
804
+ * Flow:
805
+ * 1. If targetNodeId matches current node (or is empty) → read local FS directly
806
+ * 2. Otherwise → publish a log request via PubSub → target node reads its local FS
807
+ * and writes the result to a Redis key → this handler polls Redis until the
808
+ * response arrives (max 10s) → returns it to the client
809
+ */
810
+ async logs(ctx: Context, next: () => Promise<void>) {
811
+ const { lines = 200, targetNodeId } = ctx.action.params;
812
+ const maxLines = Math.min(Number(lines) || 200, 1000);
813
+ const myNodeId = getLocalNodeId(ctx.app);
814
+
815
+ // ── Case 1: Local read (no target specified, or target is this node) ──
816
+ if (!targetNodeId || targetNodeId === myNodeId) {
817
+ ctx.body = await readLocalLogs(ctx.app, maxLines);
818
+ await next();
819
+ return;
820
+ }
821
+
822
+ // ── Case 2: Remote read via PubSub → Redis response pattern ──
823
+ const redis = getRedis(ctx);
824
+ const pubSub = (ctx.app as any).pubSubManager;
825
+
826
+ if (!redis || !pubSub) {
827
+ // No HA infrastructure — fall back to local logs with a warning
828
+ const localResult = await readLocalLogs(ctx.app, maxLines);
829
+ (localResult as any)._fallback = true;
830
+ (localResult as any)._note =
831
+ `PubSub/Redis not available; showing logs from local node instead of ${targetNodeId}`;
832
+ ctx.body = localResult;
833
+ await next();
834
+ return;
835
+ }
836
+
837
+ // Generate a unique request ID for the response channel
838
+ const requestId = crypto.randomBytes(8).toString('hex');
839
+ const responseKey = `${LOG_RESPONSE_KEY_PREFIX}${requestId}`;
840
+
841
+ // Publish the log request — ONLY the target node is subscribed to this specific channel
842
+ await pubSub.publish(
843
+ `cluster-manager:log-request:${targetNodeId}`,
844
+ JSON.stringify({ requestId, targetNodeId, lines: maxLines }),
845
+ );
846
+
847
+ // Poll Redis for the response (200ms interval, max 10s = 50 iterations)
848
+ let responseData: any = null;
849
+ for (let i = 0; i < 50; i++) {
850
+ await sleep(200);
851
+ try {
852
+ const raw = await redis.sendCommand(['GET', responseKey]);
853
+ if (raw) {
854
+ responseData = JSON.parse(raw);
855
+ // Clean up the response key immediately
856
+ redis.sendCommand(['DEL', responseKey]).catch(() => {});
857
+ break;
858
+ }
859
+ } catch {
860
+ // Parse error or Redis error — continue polling
861
+ }
862
+ }
863
+
864
+ if (responseData) {
865
+ ctx.body = responseData;
866
+ } else {
867
+ // Timeout — target node may be unreachable
868
+ ctx.body = {
869
+ node: { hostname: 'unknown', pid: null, workerMode: 'unknown', id: targetNodeId },
870
+ lines: [],
871
+ file: null,
872
+ _error: `Timeout waiting for logs from ${targetNodeId}. Node may be offline or PubSub is not connected.`,
873
+ };
874
+ }
875
+
876
+ await next();
877
+ },
878
+ };