@abtnode/core 1.17.4-beta-20251204-080001-08643fbe → 1.17.4-beta-20251205-104405-28838df1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/api/team.js CHANGED
@@ -3062,7 +3062,11 @@ class TeamAPI extends EventEmitter {
3062
3062
 
3063
3063
  return result;
3064
3064
  } catch (err) {
3065
- logger.error('Failed to create org', { err, teamDid });
3065
+ logger.error('Failed to create org', err, {
3066
+ teamDid,
3067
+ name: rest.name,
3068
+ userDid: rest.ownerDid || context.user.did || '',
3069
+ });
3066
3070
  throw err;
3067
3071
  }
3068
3072
  }
@@ -45,7 +45,7 @@ const {
45
45
  EVENTS,
46
46
  USER_PROFILE_SYNC_FIELDS,
47
47
  } = require('@abtnode/constant');
48
-
48
+ const { BLOCKLET_SITE_GROUP_SUFFIX } = require('@abtnode/constant');
49
49
  const { getBlockletEngine } = require('@blocklet/meta/lib/engine');
50
50
  const {
51
51
  isDeletableBlocklet,
@@ -245,7 +245,7 @@ const { installExternalDependencies } = require('../../util/install-external-dep
245
245
  const { dockerExecChown } = require('../../util/docker/docker-exec-chown');
246
246
  const checkDockerRunHistory = require('../../util/docker/check-docker-run-history');
247
247
  const { shouldJobBackoff } = require('../../util/env');
248
- const ensureBlockletRunning = require('./ensure-blocklet-running');
248
+ const { ensureBlockletRunning } = require('./ensure-blocklet-running');
249
249
 
250
250
  const { transformNotification } = require('../../util/notification');
251
251
  const { generateUserUpdateData } = require('../../util/user');
@@ -1916,7 +1916,7 @@ class DiskBlockletManager extends BaseBlockletManager {
1916
1916
  if (!aliasDomainSite) {
1917
1917
  return null;
1918
1918
  }
1919
- targetDid = (aliasDomainSite.domain || '').replace('.blocklet-domain-group', '');
1919
+ targetDid = (aliasDomainSite.domain || '').replace(BLOCKLET_SITE_GROUP_SUFFIX, '');
1920
1920
  }
1921
1921
 
1922
1922
  if (!targetDid) {
@@ -1,14 +1,22 @@
1
+ /* eslint-disable no-await-in-loop */
1
2
  const logger = require('@abtnode/logger')('@abtnode/core:blocklet-status-checker');
2
3
  const pAll = require('p-all');
3
4
  const { BlockletStatus } = require('@blocklet/constant');
4
5
  const sleep = require('@abtnode/util/lib/sleep');
5
6
  const { getDisplayName } = require('@blocklet/meta/lib/util');
6
- const { isValid } = require('@arcblock/did');
7
-
8
7
  const states = require('../../states');
9
8
  const { isBlockletPortHealthy, shouldCheckHealthy } = require('../../util/blocklet');
10
9
 
11
- const inProgressStatuses = [BlockletStatus.stopping, BlockletStatus.restarting, BlockletStatus.waiting];
10
+ const inProgressStatuses = [
11
+ BlockletStatus.stopping,
12
+ BlockletStatus.restarting,
13
+ BlockletStatus.waiting,
14
+ BlockletStatus.starting,
15
+ BlockletStatus.downloading,
16
+ ];
17
+
18
+ // Restart queue concurrency, 这个改大,容易 blocklet 超时导致启动失败
19
+ const RESTART_CONCURRENCY = 2;
12
20
 
13
21
  class EnsureBlockletRunning {
14
22
  canRunEnsureBlockletRunning = false;
@@ -17,16 +25,16 @@ class EnsureBlockletRunning {
17
25
 
18
26
  whenCycleCheck = false;
19
27
 
28
+ cycleCheckCount = 0;
29
+
20
30
  // 每次任务的最小间隔时间
21
- checkInterval = +process.env.ABT_NODE_ENSURE_RUNNING_CHECK_INTERVAL || 180 * 1000;
31
+ checkInterval = +process.env.ABT_NODE_ENSURE_RUNNING_CHECK_INTERVAL || 120 * 1000;
22
32
 
23
33
  minCheckInterval = 30_000;
24
34
 
25
- everyBlockletCheckInterval = 2000;
35
+ preCheckInterval = 1000;
26
36
 
27
- everyBlockletDoingInterval = 5000;
28
-
29
- fakeRunningToWaitingOnce = false;
37
+ everyBlockletCheckInterval = 2000;
30
38
 
31
39
  highLoadCpu = +process.env.ABT_NODE_ENSURE_RUNNING_HIGH_LOAD_CPU || 0.85;
32
40
 
@@ -34,27 +42,41 @@ class EnsureBlockletRunning {
34
42
 
35
43
  highLoadDisk = +process.env.ABT_NODE_ENSURE_RUNNING_HIGH_LOAD_DISK || 0.95;
36
44
 
37
- // 进行中状态超时时间(毫秒)
38
- inProgressTimeout = +process.env.ABT_NODE_ENSURE_RUNNING_IN_PROGRESS_TIMEOUT || 5 * 60 * 1000;
45
+ // 各个状态的超时阈值(毫秒)
46
+ // 如果是首次调用(whenCycleCheck false),这些值应该是 0
47
+ stoppingTimeout = +process.env.ABT_NODE_ENSURE_RUNNING_STOPPING_TIMEOUT || 60 * 1000;
39
48
 
40
- runningBlocklets = {};
49
+ restartingTimeout = +process.env.ABT_NODE_ENSURE_RUNNING_RESTARTING_TIMEOUT || 6 * 60 * 1000;
41
50
 
42
- runningRootBlocklets = {};
51
+ waitingTimeout = +process.env.ABT_NODE_ENSURE_RUNNING_WAITING_TIMEOUT || 60 * 1000;
43
52
 
44
- fakeRunningBlocklets = {};
53
+ downloadingTimeout = +process.env.ABT_NODE_ENSURE_RUNNING_DOWNLOADING_TIMEOUT || 3 * 60 * 1000;
45
54
 
46
- needRestartBlocklets = {};
55
+ startingTimeout = +process.env.ABT_NODE_ENSURE_RUNNING_STARTING_TIMEOUT || 6 * 60 * 1000;
47
56
 
48
- restartingBlocklets = {};
57
+ runningBlocklets = {};
58
+
59
+ rootBlockletsInfo = {};
49
60
 
50
- errorStartBlocklets = {};
61
+ progressBlockletsTime = {};
51
62
 
52
63
  stopped = false;
53
64
 
65
+ // Queue for restarting fake running blocklets
66
+ restartQueue = [];
67
+
68
+ // Set to track queue keys for fast lookup
69
+ restartQueueKeys = new Set();
70
+
71
+ restartQueueProcessing = false;
72
+
73
+ // Track pending jobs by componentDid to prevent duplicate processing
74
+ pendingJobs = {};
75
+
54
76
  // Ease to mock
55
77
  isBlockletPortHealthy = isBlockletPortHealthy;
56
78
 
57
- isBlockletPortHealthyWithRetries = async (blocklet, fastCheck = false) => {
79
+ isBlockletPortHealthyWithRetries = async (blocklet) => {
58
80
  let error;
59
81
  if (!this.whenCycleCheck) {
60
82
  try {
@@ -81,9 +103,7 @@ class EnsureBlockletRunning {
81
103
  } catch (e) {
82
104
  error = e;
83
105
  // eslint-disable-next-line no-await-in-loop
84
- await sleep(
85
- fastCheck && this.whenCycleCheck ? this.everyBlockletDoingInterval : this.everyBlockletCheckInterval
86
- );
106
+ await sleep(this.everyBlockletCheckInterval);
87
107
  }
88
108
  }
89
109
  logger.error('blocklet port is not healthy', error);
@@ -106,27 +126,44 @@ class EnsureBlockletRunning {
106
126
  this.checkSystemHighLoad = checkSystemHighLoad;
107
127
  logger.info('check and fix blocklet status interval', this.checkInterval);
108
128
  const task = async () => {
129
+ await sleep(this.preCheckInterval);
130
+
131
+ // 完全停止,后续也不再继续检查
132
+ if (this.stopped) {
133
+ logger.info('blocklet status checker stopped');
134
+ return;
135
+ }
109
136
  // 如果还没进入到需要检查的阶段,则等待 1 秒后继续检查
110
137
  if (!this.canRunEnsureBlockletRunning) {
111
- await sleep(1000);
112
138
  task();
113
139
  return;
114
140
  }
115
- if (this.whenCycleCheck) {
116
- await sleep(Math.max(this.checkInterval, this.minCheckInterval));
117
- }
141
+
142
+ // 首次检查前不等待(whenCycleCheck 为 false)
118
143
  try {
119
144
  await this.checkAndFix();
145
+
146
+ // 每次检查完之后查看消耗的时间
147
+ await sleep(Math.max(this.checkInterval, this.minCheckInterval));
148
+ this.cycleCheckCount++;
149
+ if (this.cycleCheckCount >= 3) {
150
+ this.cycleCheckCount = 0;
151
+ this.whenCycleCheck = true;
152
+ }
120
153
  } catch (e) {
121
154
  logger.error('check and fix blocklet status failed', e);
155
+ // 出错时也要等待,避免频繁重试
156
+ if (this.whenCycleCheck) {
157
+ await sleep(Math.max(this.checkInterval, this.minCheckInterval));
158
+ }
122
159
  }
123
160
  task();
124
161
  };
125
162
  task();
126
163
  };
127
164
 
128
- getDisplayNameByRootDid = (rootDid) => {
129
- const rootBlocklet = this.runningRootBlocklets[rootDid];
165
+ getDisplayNameByRootDid = async (rootDid) => {
166
+ const rootBlocklet = this.rootBlockletsInfo[rootDid] || (await this.states.blocklet.getBlocklet(rootDid));
130
167
  if (rootBlocklet) {
131
168
  return getDisplayName(rootBlocklet);
132
169
  }
@@ -137,6 +174,42 @@ class EnsureBlockletRunning {
137
174
  return blocklet.meta.title || blocklet.meta.name || blocklet.meta.did;
138
175
  };
139
176
 
177
+ /**
178
+ * Get timeout threshold for a specific status
179
+ * @param {string} status - Blocklet status
180
+ * @returns {number} Timeout threshold in milliseconds
181
+ */
182
+ getStatusTimeout = (status) => {
183
+ // 如果是首次调用,所有阈值都是 0
184
+ if (!this.whenCycleCheck) {
185
+ return 0;
186
+ }
187
+
188
+ let timeout = 0;
189
+ switch (status) {
190
+ case BlockletStatus.stopping:
191
+ timeout = this.stoppingTimeout;
192
+ break;
193
+ case BlockletStatus.restarting:
194
+ timeout = this.restartingTimeout;
195
+ break;
196
+ case BlockletStatus.waiting:
197
+ timeout = this.waitingTimeout;
198
+ break;
199
+ case BlockletStatus.downloading:
200
+ timeout = this.downloadingTimeout;
201
+ break;
202
+ case BlockletStatus.starting:
203
+ timeout = this.startingTimeout;
204
+ break;
205
+ default:
206
+ timeout = this.downloadingTimeout;
207
+ break;
208
+ }
209
+ // 需要减去检查间隔时间,因为每次检查都会在第二次检查之后才比对时间,最少间隔时间不能小于 waitingTimeout
210
+ return Math.max(timeout - this.checkInterval, this.waitingTimeout);
211
+ };
212
+
140
213
  checkAndFix = async () => {
141
214
  logger.info('check and fix blocklet status');
142
215
  const systemHighLoad = this.checkSystemHighLoad({
@@ -147,50 +220,51 @@ class EnsureBlockletRunning {
147
220
 
148
221
  if (this.whenCycleCheck && systemHighLoad.isHighLoad) {
149
222
  logger.warn('Skip once ensure blocklet running because system high load', systemHighLoad);
150
- return;
223
+ return 0;
151
224
  }
152
225
 
153
226
  this.runningBlocklets = {};
154
- this.fakeRunningBlocklets = {};
155
- this.needRestartBlocklets = {};
156
-
157
227
  const startTime = Date.now();
158
228
  try {
229
+ this.startRestartQueueProcessor();
159
230
  await this.getRunningBlocklets();
160
231
  await this.getFakeRunningBlocklets();
161
- await this.restartFakeRunningBlocklets();
162
232
  } catch (e) {
163
233
  logger.error('ensure blocklet status failed', e);
164
234
  }
235
+ const elapsedTime = Date.now() - startTime;
165
236
  logger.info(
166
- `ensure blocklet status finished in ${Date.now() - startTime}ms. It's server first start: ${!this.whenCycleCheck}`
237
+ `ensure blocklet status finished in ${elapsedTime}ms. It's server first start: ${!this.whenCycleCheck}`
167
238
  );
168
- this.runningRootBlocklets = {};
169
- this.whenCycleCheck = true;
239
+ return elapsedTime;
170
240
  };
171
241
 
172
242
  getRunningBlocklets = async () => {
173
- const runningStatuses = this.whenCycleCheck
174
- ? [BlockletStatus.running, BlockletStatus.waiting]
175
- : [BlockletStatus.running, BlockletStatus.waiting, BlockletStatus.error];
176
-
177
- const blocklets = await this.states.blocklet.getBlocklets();
178
- for (const rootBlocklet of blocklets) {
179
- const { did } = rootBlocklet.meta;
243
+ const rootBlocklets = await this.states.blocklet.getBlocklets();
244
+ for (const rootBlocklet of rootBlocklets) {
245
+ const rootDid = rootBlocklet.appPid || rootBlocklet.meta.did;
180
246
  if (rootBlocklet.children) {
181
247
  for (const childBlocklet of rootBlocklet.children) {
182
248
  const isRunning =
183
- runningStatuses.includes(childBlocklet.status) || childBlocklet.greenStatus === BlockletStatus.running;
184
- const isInProgress = inProgressStatuses.includes(childBlocklet.status);
249
+ childBlocklet.status === BlockletStatus.running || childBlocklet.greenStatus === BlockletStatus.running;
250
+ const isInProgress =
251
+ inProgressStatuses.includes(childBlocklet.status) || inProgressStatuses.includes(childBlocklet.greenStatus);
252
+ const isStopped =
253
+ childBlocklet.status === BlockletStatus.stopped && childBlocklet.greenStatus === BlockletStatus.stopped;
254
+
255
+ // 如果处于过 running, 或 stopped,则删除 progressBlockletsTime
256
+ if (isRunning || isStopped) {
257
+ delete this.progressBlockletsTime[`${rootDid}-${childBlocklet.meta.did}`];
258
+ }
185
259
  if (isRunning || isInProgress) {
186
- if (!this.runningBlocklets[did]) {
187
- this.runningBlocklets[did] = [];
260
+ if (!this.runningBlocklets[rootDid]) {
261
+ this.runningBlocklets[rootDid] = [];
188
262
  }
189
- if (this.runningBlocklets[did].find((b) => b.meta.did === childBlocklet.meta.did)) {
263
+ if (this.runningBlocklets[rootDid].find((child) => child.meta.did === childBlocklet.meta.did)) {
190
264
  continue;
191
265
  }
192
- this.runningBlocklets[did].push(childBlocklet);
193
- this.runningRootBlocklets[did] = rootBlocklet;
266
+ this.runningBlocklets[rootDid].push(childBlocklet);
267
+ this.rootBlockletsInfo[rootDid] = rootBlocklet;
194
268
  }
195
269
  }
196
270
  }
@@ -201,195 +275,318 @@ class EnsureBlockletRunning {
201
275
  getFakeRunningBlocklets = async () => {
202
276
  const rootDids = Object.keys(this.runningBlocklets);
203
277
  await pAll(
204
- rootDids.map((did) => {
278
+ rootDids.map((rootDid) => {
205
279
  return async () => {
206
- const blocklets = this.runningBlocklets[did];
280
+ // runningBlocklets[rootDid] 存储的是该根 blocklet 下的所有子组件(childBlocklets)
281
+ const childBlocklets = this.runningBlocklets[rootDid];
207
282
  // eslint-disable-next-line
283
+ const fakeDids = [];
208
284
  await pAll(
209
- blocklets.map((blocklet) => {
285
+ childBlocklets.map((childBlocklet) => {
210
286
  return async () => {
211
- if (!shouldCheckHealthy(blocklet)) {
212
- // 如果 blocklet 是不需要启动的,并且不是 running,则设置为 running 状态
213
- if (blocklet.status !== BlockletStatus.running && blocklet.greenStatus !== BlockletStatus.running) {
214
- await this.states.blocklet.setBlockletStatus(did, BlockletStatus.running, {
215
- componentDids: [blocklet.meta.did],
287
+ if (!shouldCheckHealthy(childBlocklet)) {
288
+ // 如果 childBlocklet 是不需要启动的,并且不是 running,则设置为 running 状态
289
+ if (
290
+ childBlocklet.status !== BlockletStatus.running &&
291
+ childBlocklet.greenStatus !== BlockletStatus.running
292
+ ) {
293
+ await this.states.blocklet.setBlockletStatus(rootDid, BlockletStatus.running, {
294
+ componentDids: [childBlocklet.meta.did],
216
295
  });
217
296
  }
218
297
  return;
219
298
  }
220
299
 
221
- // Skip health check if blocklet is in progress status
222
- if (inProgressStatuses.includes(blocklet.status)) {
223
- // Check if it's user-initiated operation
224
- // 如果 operator 是 z 开头的字符串,表示是 did, 跳过健康检查
225
- if (blocklet.operator && isValid(blocklet.operator)) {
226
- logger.info('Skip ensure running check for user-initiated operation', {
227
- did,
228
- componentDid: blocklet.meta.did,
229
- status: blocklet.status,
230
- operator: blocklet.operator,
231
- });
232
- return;
233
- }
300
+ const isInProgress =
301
+ inProgressStatuses.includes(childBlocklet.status) ||
302
+ inProgressStatuses.includes(childBlocklet.greenStatus);
234
303
 
235
- // Check timeout for daemon-initiated or no operator recorded operations
236
- if (blocklet.inProgressStart) {
237
- const elapsedTime = Date.now() - new Date(blocklet.inProgressStart).getTime();
238
- if (elapsedTime < this.inProgressTimeout) {
239
- logger.info('Skip ensure running check due to timeout not reached', {
240
- did,
241
- componentDid: blocklet.meta.did,
242
- status: blocklet.status,
243
- elapsedTime,
244
- timeout: this.inProgressTimeout,
245
- });
246
- return;
247
- }
304
+ // 如果处于进行中状态,则记录上次检查时间
305
+ if (isInProgress) {
306
+ const key = `${rootDid}-${childBlocklet.meta.did}`;
307
+ if (!this.progressBlockletsTime[key]) {
308
+ this.progressBlockletsTime[key] = Date.now();
309
+ }
310
+ const lastProgressTime = this.progressBlockletsTime[key];
311
+ // 首次调用或者超过阈值时间,则认为是 fake running
312
+ if (
313
+ !this.whenCycleCheck ||
314
+ Date.now() - lastProgressTime > this.getStatusTimeout(childBlocklet.status)
315
+ ) {
248
316
  logger.warn('InProgress timeout reached, proceeding with health check', {
249
- did,
250
- componentDid: blocklet.meta.did,
251
- status: blocklet.status,
252
- elapsedTime,
253
- timeout: this.inProgressTimeout,
317
+ did: rootDid,
318
+ componentDid: childBlocklet.meta.did,
319
+ status: childBlocklet.status,
254
320
  });
321
+ } else if (this.whenCycleCheck) {
322
+ // 如果没有 inProgressStart 时间戳,且非首次调用,跳过检查
323
+ logger.info('Skip ensure running check: no inProgressStart timestamp', {
324
+ did: rootDid,
325
+ componentDid: childBlocklet.meta.did,
326
+ status: childBlocklet.status,
327
+ });
328
+ return;
255
329
  }
256
330
  }
257
331
 
258
- const health = await this.isBlockletPortHealthyWithRetries(
259
- blocklet,
260
- inProgressStatuses.includes(blocklet.status)
261
- );
262
-
263
- if (health) {
264
- return;
332
+ if (!isInProgress) {
333
+ const health = await this.isBlockletPortHealthyWithRetries(childBlocklet);
334
+ if (health) {
335
+ return;
336
+ }
265
337
  }
266
338
 
267
- logger.warn('check blocklet port healthy', did, blocklet.meta.did, 'no healthy');
339
+ logger.warn('check blocklet port healthy', rootDid, childBlocklet.meta.did, 'no healthy');
268
340
 
269
- if (!this.fakeRunningBlocklets[did]) {
270
- this.fakeRunningBlocklets[did] = [];
271
- }
272
- if (this.fakeRunningBlocklets[did].find((b) => b.meta.did === blocklet.meta.did)) {
273
- return;
274
- }
275
- this.fakeRunningBlocklets[did].push(blocklet);
341
+ // Add to restart queue immediately
342
+ fakeDids.push(childBlocklet.meta.did);
276
343
  };
277
344
  }),
278
345
  { concurrency: 10 }
279
346
  );
347
+ if (fakeDids.length > 0) {
348
+ this.addToRestartQueue(rootDid, fakeDids);
349
+ }
280
350
  };
281
351
  }),
282
352
  { concurrency: 8 }
283
353
  );
354
+ };
355
+
356
+ /**
357
+ * Add a childBlocklet to the restart queue
358
+ * @param {string} rootDid - Root blocklet DID
359
+ * @param {Object} childBlocklet - Child blocklet (component) to restart
360
+ */
361
+ addToRestartQueue = (rootDid, dids) => {
362
+ // Check if job is pending (being processed)
363
+ if (this.restartQueueKeys.has(rootDid) || this.pendingJobs[rootDid]) {
364
+ return;
365
+ }
284
366
 
285
- logger.info('get fake running blocklets', Object.keys(this.fakeRunningBlocklets).length);
367
+ this.restartQueue.push({
368
+ rootDid,
369
+ componentDids: dids,
370
+ firstCycle: !this.whenCycleCheck,
371
+ });
372
+ this.restartQueueKeys.add(rootDid);
286
373
  };
287
374
 
288
- restartFakeRunningBlocklets = async () => {
289
- // blocklet 一组组重启
290
- const blockletDids = Object.keys(this.fakeRunningBlocklets);
291
- await pAll(
292
- blockletDids.map((did) => {
293
- return async () => {
294
- const blocklets = this.fakeRunningBlocklets[did];
295
- const componentDids = blocklets.map((b) => b.meta.did);
296
- if (componentDids.length > 0) {
297
- const key = `${did}-${componentDids.join('-')}`;
298
- this.needRestartBlocklets[key] = true;
299
- if (this.restartingBlocklets[key] && this.restartingBlocklets[key] + this.checkInterval < Date.now()) {
300
- return;
301
- }
302
- this.restartingBlocklets[key] = Date.now();
375
+ // 启动重启队列,保持 4 worker 并发处理,如果有一个完成了,则会补充到队列中
376
+ startRestartQueueProcessor = () => {
377
+ if (this.restartQueueProcessing) {
378
+ return;
379
+ }
380
+ this.restartQueueProcessing = true;
303
381
 
304
- const blockletDisplayName = this.getDisplayNameByRootDid(did);
305
- const restartTitle = 'Blocklet health check failed';
306
- const restartDescription = `Blocklet ${blockletDisplayName} with components ${componentDids.map((v) => this.getDisplayName(blocklets.find((b) => b.meta.did === v))).join(', ')} health check failed, restarting...`;
307
- if (this.whenCycleCheck) {
308
- this.notification(did, restartTitle, restartDescription, 'warning');
309
- }
382
+ const runWorker = async () => {
383
+ while (!this.stopped) {
384
+ const item = this.restartQueue.shift();
385
+ if (!item) {
386
+ return;
387
+ }
310
388
 
311
- try {
312
- logger.info('restart blocklet:', did, componentDids);
313
- await this.start({
314
- did,
315
- componentDids,
316
- checkHealthImmediately: true,
317
- atomic: true,
318
- operator: 'ensure-blocklet-running',
319
- });
320
- if (this.whenCycleCheck) {
321
- this.createAuditLog({
322
- action: 'ensureBlockletRunning',
323
- args: {
324
- teamDid: did,
325
- componentDids,
326
- },
327
- context: {
328
- user: {
329
- did,
330
- role: 'daemon',
331
- blockletDid: did,
332
- fullName: blockletDisplayName,
333
- elevated: false,
334
- },
335
- },
336
- result: {
337
- title: restartTitle,
338
- description: restartDescription,
339
- },
340
- });
341
- }
342
-
343
- delete this.restartingBlocklets[key];
344
- } catch (e) {
345
- logger.error('restart blocklet failed', did, componentDids, e);
346
- if (!this.errorStartBlocklets[key]) {
347
- this.errorStartBlocklets[key] = 0;
348
- }
349
- this.errorStartBlocklets[key] += 1;
350
-
351
- // 如果重启失败次数超过 3 次,则发送通知, 如果 server 是第一次启动遇到失败,则立刻发送通知
352
- if (this.errorStartBlocklets[key] >= 3 || !this.whenCycleCheck) {
353
- const title = 'Restart blocklet failed when health check failed';
354
- const description = `Restart blocklet ${blockletDisplayName} with components ${componentDids.map((v) => this.getDisplayName(blocklets.find((b) => b.meta.did === v))).join(', ')} failed`;
355
- this.notification(did, title, description, 'error');
356
- delete this.errorStartBlocklets[key];
357
- logger.error('restart many times blocklet failed', did, componentDids, e);
358
- try {
359
- // 失败了应该保持 error 状态
360
- await this.states.blocklet.setBlockletStatus(did, BlockletStatus.error, { componentDids });
361
- this.createAuditLog({
362
- action: 'ensureBlockletRunning',
363
- args: {
364
- blockletDisplayName,
365
- teamDid: did,
366
- componentDids,
367
- },
368
- context: {
369
- user: {
370
- did,
371
- role: 'daemon',
372
- blockletDid: did,
373
- fullName: blockletDisplayName,
374
- elevated: false,
375
- },
376
- },
377
- result: {
378
- title,
379
- description,
380
- },
381
- });
382
- } catch (err) {
383
- logger.error('ensure blocklet running, create audit log failed', did, componentDids, err);
384
- }
385
- }
386
- }
389
+ this.restartQueueKeys.delete(item.rootDid);
390
+
391
+ try {
392
+ await this.restartBlockletFromQueue(item);
393
+ } catch (err) {
394
+ logger.error('restart blocklet failed', err);
395
+ }
396
+ }
397
+ };
398
+
399
+ const processQueue = async () => {
400
+ while (!this.stopped) {
401
+ // 防止没有重启队列时,快速空转
402
+ await sleep(this.preCheckInterval);
403
+
404
+ if (this.restartQueue.length === 0) {
405
+ continue; // 等下一轮
406
+ }
407
+
408
+ // 创建固定数量 worker
409
+ const workers = [];
410
+ for (let i = 0; i < RESTART_CONCURRENCY; i++) {
411
+ workers.push(runWorker());
412
+ }
413
+
414
+ try {
415
+ await Promise.all(workers);
416
+ } catch (err) {
417
+ logger.error('restart queue processor batch failed', err);
418
+ }
419
+ }
420
+ };
421
+
422
+ processQueue().catch((err) => {
423
+ logger.error('restart queue processor failed', err);
424
+ this.restartQueueProcessing = false;
425
+ });
426
+ };
427
+
428
+ /**
429
+ * Create restart notification and audit log context
430
+ * @param {string} rootDid - Root blocklet DID
431
+ * @param {Object} childBlocklet - Child blocklet object
432
+ * @param {string[]} componentDids - Component DIDs
433
+ * @returns {Object} Context object with displayName, title, description
434
+ */
435
+ createRestartContext = async (rootDid, componentDids) => {
436
+ const blockletDisplayName = await this.getDisplayNameByRootDid(rootDid);
437
+ const componentNames = componentDids.map((componentDid) => {
438
+ const child = this.rootBlockletsInfo[rootDid]?.children?.find((bl) => bl.meta.did === componentDid);
439
+ return child ? this.getDisplayName(child) : componentDid;
440
+ });
441
+ const componentNamesStr = componentNames.length === 1 ? componentNames[0] : componentNames.join(', ');
442
+
443
+ return {
444
+ blockletDisplayName,
445
+ title: 'Blocklet health check failed',
446
+ description: `Blocklet ${blockletDisplayName} with component${componentNames.length > 1 ? 's' : ''} ${componentNamesStr} health check failed, restarting...`,
447
+ };
448
+ };
449
+
450
+ /**
451
+ * Handle restart success
452
+ * @param {string} key - Queue item key
453
+ * @param {string} rootDid - Root blocklet DID
454
+ * @param {string} componentDid - Component DID
455
+ * @param {Object} context - Restart context
456
+ */
457
+ handleRestartSuccess = (rootDid, componentDids, firstCycle, context) => {
458
+ if (firstCycle) {
459
+ return;
460
+ }
461
+ this.createAuditLog({
462
+ action: 'ensureBlockletRunning',
463
+ args: {
464
+ teamDid: rootDid,
465
+ componentDids,
466
+ },
467
+ context: {
468
+ user: {
469
+ did: rootDid,
470
+ role: 'daemon',
471
+ blockletDid: rootDid,
472
+ fullName: context.blockletDisplayName,
473
+ elevated: false,
474
+ },
475
+ },
476
+ result: {
477
+ title: context.title,
478
+ description: context.description,
479
+ },
480
+ });
481
+ };
482
+
483
+ /**
484
+ * Handle restart failure
485
+ * @param {string} key - Queue item key
486
+ * @param {string} rootDid - Root blocklet DID
487
+ * @param {string[]} componentDids - Component DIDs
488
+ * @param {Object} context - Restart context
489
+ * @param {Error} error - Error object
490
+ */
491
+ handleRestartFailure = (rootDid, componentDids, context, error) => {
492
+ const title = 'Restart blocklet failed when health check failed';
493
+ const description = `Ensure blocklet running failed, restart blocklet ${context.blockletDisplayName} with component${componentDids.length > 1 ? 's' : ''} ${componentDids.join(', ')} failed`;
494
+ this.notification(rootDid, title, description, 'error');
495
+ logger.error('restart many times blocklet failed', rootDid, componentDids, error);
496
+ try {
497
+ this.createAuditLog({
498
+ action: 'ensureBlockletRunning',
499
+ args: {
500
+ blockletDisplayName: context.blockletDisplayName,
501
+ teamDid: rootDid,
502
+ componentDids,
503
+ },
504
+ context: {
505
+ user: {
506
+ did: rootDid,
507
+ role: 'daemon',
508
+ blockletDid: rootDid,
509
+ fullName: context.blockletDisplayName,
510
+ elevated: false,
511
+ },
512
+ },
513
+ result: {
514
+ title,
515
+ description,
516
+ },
517
+ });
518
+ } catch (err) {
519
+ logger.error('ensure blocklet running, create audit log failed', rootDid, componentDids, err);
520
+ }
521
+ };
522
+
523
+ /**
524
+ * Restart a childBlocklet from the queue
525
+ * @param {Object} item - Queue item with rootDid, componentDids, firstCycle
526
+ */
527
+ restartBlockletFromQueue = async ({ rootDid, componentDids, firstCycle }) => {
528
+ // Set pending status to prevent duplicate processing
529
+ if (this.pendingJobs[rootDid]) {
530
+ logger.warn('Skip restart: job is already pending', { rootDid });
531
+ return;
532
+ }
533
+ this.pendingJobs[rootDid] = true;
534
+
535
+ try {
536
+ const context = await this.createRestartContext(rootDid, componentDids);
537
+ if (!firstCycle) {
538
+ this.notification(rootDid, context.title, context.description, 'warning');
539
+ }
540
+
541
+ logger.info('restart blocklet:', rootDid, componentDids);
542
+ try {
543
+ await this.start({
544
+ did: rootDid,
545
+ componentDids,
546
+ checkHealthImmediately: true,
547
+ atomic: true,
548
+ operator: 'ensure-blocklet-running',
549
+ });
550
+ } catch (e) {
551
+ // 如果启动失败,则尝试启动一次 error 状态的组件
552
+ const blocklet = await this.states.blocklet.getBlocklet(rootDid);
553
+ if (blocklet) {
554
+ const errorComponentDids = blocklet.children
555
+ .filter((child) => child.status === BlockletStatus.error || child.greenStatus === BlockletStatus.error)
556
+ .map((child) => child.meta.did);
557
+ if (errorComponentDids.length) {
558
+ logger.error('restart blocklet failed, retry with once, error:', e);
559
+ await this.start({
560
+ did: rootDid,
561
+ componentDids: errorComponentDids,
562
+ checkHealthImmediately: true,
563
+ atomic: true,
564
+ operator: 'ensure-blocklet-running',
565
+ });
566
+ } else {
567
+ throw e;
387
568
  }
388
- };
389
- }),
390
- { concurrency: 8 }
391
- );
569
+ } else {
570
+ throw e;
571
+ }
572
+ }
573
+ this.handleRestartSuccess(rootDid, componentDids, firstCycle, context);
574
+ } catch (e) {
575
+ await this.handleRestartFailure(rootDid, componentDids, context, e);
576
+ } finally {
577
+ // Clear pending status after processing
578
+ delete this.pendingJobs[rootDid];
579
+ // Clear progress blocklets time
580
+ for (const componentDid of componentDids) {
581
+ delete this.progressBlockletsTime[`${rootDid}-${componentDid}`];
582
+ }
583
+ }
392
584
  };
393
585
  }
394
586
 
395
- module.exports = new EnsureBlockletRunning();
587
+ const ensureBlockletRunning = new EnsureBlockletRunning();
588
+
589
+ module.exports = {
590
+ ensureBlockletRunning,
591
+ EnsureBlockletRunning,
592
+ };
@@ -1,5 +1,6 @@
1
1
  const { outputJson } = require('fs-extra');
2
2
  const { join } = require('path');
3
+ const { BLOCKLET_SITE_GROUP_SUFFIX } = require('@abtnode/constant');
3
4
  const states = require('../../../states');
4
5
  const { BaseBackup } = require('./base');
5
6
  const { getFileObject } = require('../utils/disk');
@@ -24,7 +25,7 @@ class RoutingRuleBackup extends BaseBackup {
24
25
  */
25
26
  async export() {
26
27
  const routingRule = await states.site.findOne({
27
- domain: `${this.blocklet.meta.did}.blocklet-domain-group`,
28
+ domain: `${this.blocklet.meta.did}${BLOCKLET_SITE_GROUP_SUFFIX}`,
28
29
  });
29
30
 
30
31
  await outputJson(this.routingRuleExportPath, routingRule);
@@ -1,6 +1,7 @@
1
1
  /* eslint-disable no-continue */
2
2
  /* eslint-disable no-await-in-loop */
3
3
  const normalizePathPrefix = require('@abtnode/util/lib/normalize-path-prefix');
4
+ const { BLOCKLET_SITE_GROUP_SUFFIX } = require('@abtnode/constant');
4
5
 
5
6
  const findBlocklet = (site, blocklets) => {
6
7
  // prefix = /
@@ -101,7 +102,7 @@ module.exports = async ({ states, node, printInfo }) => {
101
102
  // generate new blocklet site for every installed blocklet
102
103
  const newBlockletSites = {}; // <blockletDid>: <site>
103
104
  for (const blocklet of blocklets) {
104
- const domain = `${blocklet.meta.did}.blocklet-domain-group`;
105
+ const domain = `${blocklet.meta.did}${BLOCKLET_SITE_GROUP_SUFFIX}`;
105
106
  newBlockletSites[blocklet.meta.did] = {
106
107
  domain,
107
108
  domainAliases: [],
@@ -980,7 +980,7 @@ const startBlockletProcess = async (
980
980
  try {
981
981
  await promiseSpawn(nextOptions.env.connectInternalDockerNetwork, { mute: true });
982
982
  } catch (err) {
983
- logger.error('blocklet connect internal docker network failed', { processId: processIdName, error: err });
983
+ logger.warn('blocklet connect internal docker network failed', { processId: processIdName, error: err });
984
984
  }
985
985
  }
986
986
 
@@ -1,7 +1,7 @@
1
1
  const { Joi } = require('@arcblock/validator');
2
2
 
3
3
  const createOrgInputSchema = Joi.object({
4
- name: Joi.string().required().trim().min(1).max(20),
4
+ name: Joi.string().required().trim().min(1).max(64),
5
5
  description: Joi.string().optional().allow('').trim().min(1).max(255),
6
6
  ownerDid: Joi.DID().optional().allow('').allow(null),
7
7
  });
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "publishConfig": {
4
4
  "access": "public"
5
5
  },
6
- "version": "1.17.4-beta-20251204-080001-08643fbe",
6
+ "version": "1.17.4-beta-20251205-104405-28838df1",
7
7
  "description": "",
8
8
  "main": "lib/index.js",
9
9
  "files": [
@@ -17,21 +17,21 @@
17
17
  "author": "wangshijun <wangshijun2010@gmail.com> (http://github.com/wangshijun)",
18
18
  "license": "Apache-2.0",
19
19
  "dependencies": {
20
- "@abtnode/analytics": "1.17.4-beta-20251204-080001-08643fbe",
21
- "@abtnode/auth": "1.17.4-beta-20251204-080001-08643fbe",
22
- "@abtnode/certificate-manager": "1.17.4-beta-20251204-080001-08643fbe",
23
- "@abtnode/constant": "1.17.4-beta-20251204-080001-08643fbe",
24
- "@abtnode/cron": "1.17.4-beta-20251204-080001-08643fbe",
25
- "@abtnode/db-cache": "1.17.4-beta-20251204-080001-08643fbe",
26
- "@abtnode/docker-utils": "1.17.4-beta-20251204-080001-08643fbe",
27
- "@abtnode/logger": "1.17.4-beta-20251204-080001-08643fbe",
28
- "@abtnode/models": "1.17.4-beta-20251204-080001-08643fbe",
29
- "@abtnode/queue": "1.17.4-beta-20251204-080001-08643fbe",
30
- "@abtnode/rbac": "1.17.4-beta-20251204-080001-08643fbe",
31
- "@abtnode/router-provider": "1.17.4-beta-20251204-080001-08643fbe",
32
- "@abtnode/static-server": "1.17.4-beta-20251204-080001-08643fbe",
33
- "@abtnode/timemachine": "1.17.4-beta-20251204-080001-08643fbe",
34
- "@abtnode/util": "1.17.4-beta-20251204-080001-08643fbe",
20
+ "@abtnode/analytics": "1.17.4-beta-20251205-104405-28838df1",
21
+ "@abtnode/auth": "1.17.4-beta-20251205-104405-28838df1",
22
+ "@abtnode/certificate-manager": "1.17.4-beta-20251205-104405-28838df1",
23
+ "@abtnode/constant": "1.17.4-beta-20251205-104405-28838df1",
24
+ "@abtnode/cron": "1.17.4-beta-20251205-104405-28838df1",
25
+ "@abtnode/db-cache": "1.17.4-beta-20251205-104405-28838df1",
26
+ "@abtnode/docker-utils": "1.17.4-beta-20251205-104405-28838df1",
27
+ "@abtnode/logger": "1.17.4-beta-20251205-104405-28838df1",
28
+ "@abtnode/models": "1.17.4-beta-20251205-104405-28838df1",
29
+ "@abtnode/queue": "1.17.4-beta-20251205-104405-28838df1",
30
+ "@abtnode/rbac": "1.17.4-beta-20251205-104405-28838df1",
31
+ "@abtnode/router-provider": "1.17.4-beta-20251205-104405-28838df1",
32
+ "@abtnode/static-server": "1.17.4-beta-20251205-104405-28838df1",
33
+ "@abtnode/timemachine": "1.17.4-beta-20251205-104405-28838df1",
34
+ "@abtnode/util": "1.17.4-beta-20251205-104405-28838df1",
35
35
  "@aigne/aigne-hub": "^0.10.10",
36
36
  "@arcblock/did": "^1.27.12",
37
37
  "@arcblock/did-connect-js": "^1.27.12",
@@ -43,15 +43,15 @@
43
43
  "@arcblock/pm2-events": "^0.0.5",
44
44
  "@arcblock/validator": "^1.27.12",
45
45
  "@arcblock/vc": "^1.27.12",
46
- "@blocklet/constant": "1.17.4-beta-20251204-080001-08643fbe",
46
+ "@blocklet/constant": "1.17.4-beta-20251205-104405-28838df1",
47
47
  "@blocklet/did-space-js": "^1.2.6",
48
- "@blocklet/env": "1.17.4-beta-20251204-080001-08643fbe",
48
+ "@blocklet/env": "1.17.4-beta-20251205-104405-28838df1",
49
49
  "@blocklet/error": "^0.3.3",
50
- "@blocklet/meta": "1.17.4-beta-20251204-080001-08643fbe",
51
- "@blocklet/resolver": "1.17.4-beta-20251204-080001-08643fbe",
52
- "@blocklet/sdk": "1.17.4-beta-20251204-080001-08643fbe",
53
- "@blocklet/server-js": "1.17.4-beta-20251204-080001-08643fbe",
54
- "@blocklet/store": "1.17.4-beta-20251204-080001-08643fbe",
50
+ "@blocklet/meta": "1.17.4-beta-20251205-104405-28838df1",
51
+ "@blocklet/resolver": "1.17.4-beta-20251205-104405-28838df1",
52
+ "@blocklet/sdk": "1.17.4-beta-20251205-104405-28838df1",
53
+ "@blocklet/server-js": "1.17.4-beta-20251205-104405-28838df1",
54
+ "@blocklet/store": "1.17.4-beta-20251205-104405-28838df1",
55
55
  "@blocklet/theme": "^3.2.11",
56
56
  "@fidm/x509": "^1.2.1",
57
57
  "@ocap/mcrypto": "^1.27.12",
@@ -116,5 +116,5 @@
116
116
  "express": "^4.18.2",
117
117
  "unzipper": "^0.10.11"
118
118
  },
119
- "gitHead": "740b2884f1f0cede865a6e7df2db394f3cef1997"
119
+ "gitHead": "f8e18c8b32b19fd98de3d292a0036e628e59d474"
120
120
  }