@platformatic/runtime 3.13.0 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/runtime.js CHANGED
@@ -16,7 +16,6 @@ import { existsSync } from 'node:fs'
16
16
  import { readFile } from 'node:fs/promises'
17
17
  import { STATUS_CODES } from 'node:http'
18
18
  import { createRequire } from 'node:module'
19
- import os from 'node:os'
20
19
  import { dirname, isAbsolute, join } from 'node:path'
21
20
  import { setImmediate as immediate, setTimeout as sleep } from 'node:timers/promises'
22
21
  import { pathToFileURL } from 'node:url'
@@ -25,11 +24,14 @@ import SonicBoom from 'sonic-boom'
25
24
  import { Agent, request, interceptors as undiciInterceptors } from 'undici'
26
25
  import { createThreadInterceptor } from 'undici-thread-interceptor'
27
26
  import { pprofCapturePreloadPath } from './config.js'
27
+ import { DynamicWorkersScaler } from './dynamic-workers-scaler.js'
28
28
  import {
29
29
  ApplicationAlreadyStartedError,
30
30
  ApplicationNotFoundError,
31
31
  ApplicationNotStartedError,
32
32
  ApplicationStartTimeoutError,
33
+ CannotRemoveEntrypointError,
34
+ GetHeapStatisticUnavailable,
33
35
  InvalidArgumentError,
34
36
  MessagingError,
35
37
  MissingEntrypointError,
@@ -40,28 +42,29 @@ import {
40
42
  } from './errors.js'
41
43
  import { abstractLogger, createLogger } from './logger.js'
42
44
  import { startManagementApi } from './management-api.js'
43
- import { getMemoryInfo } from './metrics.js'
44
45
  import { createChannelCreationHook } from './policies.js'
45
46
  import { startPrometheusServer } from './prom-server.js'
46
- import ScalingAlgorithm from './scaling-algorithm.js'
47
47
  import { startScheduler } from './scheduler.js'
48
48
  import { createSharedStore } from './shared-http-cache.js'
49
49
  import { version } from './version.js'
50
50
  import { sendViaITC, waitEventFromITC } from './worker/itc.js'
51
51
  import { RoundRobinMap } from './worker/round-robin-map.js'
52
+ import { HealthSignalsQueue } from './worker/health-signals.js'
52
53
  import {
53
54
  kApplicationId,
54
55
  kConfig,
55
56
  kFullId,
56
57
  kHealthCheckTimer,
58
+ kHealthMetricsTimer,
57
59
  kId,
58
60
  kITC,
59
- kLastELU,
61
+ kLastHealthCheckELU,
60
62
  kStderrMarker,
61
63
  kWorkerId,
62
64
  kWorkersBroadcast,
63
65
  kWorkerStartTime,
64
- kWorkerStatus
66
+ kWorkerStatus,
67
+ kWorkerHealthSignals
65
68
  } from './worker/symbols.js'
66
69
 
67
70
  const kWorkerFile = join(import.meta.dirname, 'worker/main.js')
@@ -106,10 +109,13 @@ export class Runtime extends EventEmitter {
106
109
  #metricsLabelName
107
110
 
108
111
  #applicationsConfigsPatches
112
+ #applications
109
113
  #workers
110
114
  #workersBroadcastChannel
111
115
  #workerITCHandlers
116
+ #restartingApplications
112
117
  #restartingWorkers
118
+ #dynamicWorkersScaler
113
119
 
114
120
  #sharedHttpCache
115
121
  #scheduler
@@ -126,6 +132,7 @@ export class Runtime extends EventEmitter {
126
132
  this.#context = context ?? {}
127
133
  this.#isProduction = this.#context.isProduction ?? this.#context.production ?? false
128
134
  this.#concurrency = this.#context.concurrency ?? MAX_CONCURRENCY
135
+ this.#applications = new Map()
129
136
  this.#workers = new RoundRobinMap()
130
137
  this.#url = undefined
131
138
  this.#channelCreationHook = createChannelCreationHook(this.#config)
@@ -136,6 +143,7 @@ export class Runtime extends EventEmitter {
136
143
  })
137
144
  this.logger = abstractLogger // This is replaced by the real logger in init() and eventually removed in close()
138
145
  this.#status = undefined
146
+ this.#restartingApplications = new Set()
139
147
  this.#restartingWorkers = new Map()
140
148
  this.#sharedHttpCache = null
141
149
  this.#applicationsConfigsPatches = new Map()
@@ -158,9 +166,18 @@ export class Runtime extends EventEmitter {
158
166
  deleteHttpCacheValue: this.#deleteHttpCacheValue.bind(this),
159
167
  invalidateHttpCache: this.invalidateHttpCache.bind(this),
160
168
  updateSharedContext: this.updateSharedContext.bind(this),
161
- getSharedContext: this.getSharedContext.bind(this)
169
+ getSharedContext: this.getSharedContext.bind(this),
170
+ sendHealthSignals: this.#processHealthSignals.bind(this)
162
171
  }
163
172
  this.#sharedContext = {}
173
+
174
+ if (this.#isProduction) {
175
+ this.#env.PLT_DEV = 'false'
176
+ this.#env.PLT_ENVIRONMENT = 'production'
177
+ } else {
178
+ this.#env.PLT_DEV = 'true'
179
+ this.#env.PLT_ENVIRONMENT = 'development'
180
+ }
164
181
  }
165
182
 
166
183
  async init () {
@@ -190,31 +207,17 @@ export class Runtime extends EventEmitter {
190
207
 
191
208
  this.#createWorkersBroadcastChannel()
192
209
 
193
- const workersConfig = []
194
- for (const application of config.applications) {
195
- const count = application.workers ?? this.#config.workers ?? 1
196
- if (count > 1 && application.entrypoint && !features.node.reusePort) {
210
+ if (this.#config.workers.dynamic) {
211
+ if (this.#config.workers.dynamic === false) {
197
212
  this.logger.warn(
198
- `"${application.id}" is set as the entrypoint, but reusePort is not available in your OS; setting workers to 1 instead of ${count}`
213
+ `Vertical scaler disabled because the "workers" configuration is set to ${this.#config.workers.static}.`
199
214
  )
200
- workersConfig.push({ id: application.id, workers: 1 })
201
215
  } else {
202
- workersConfig.push({ id: application.id, workers: count })
216
+ this.#dynamicWorkersScaler = new DynamicWorkersScaler(this, this.#config.workers)
203
217
  }
204
218
  }
205
219
 
206
- this.#workers.configure(workersConfig)
207
-
208
- if (this.#isProduction) {
209
- this.#env['PLT_DEV'] = 'false'
210
- this.#env['PLT_ENVIRONMENT'] = 'production'
211
- } else {
212
- this.#env['PLT_DEV'] = 'true'
213
- this.#env['PLT_ENVIRONMENT'] = 'development'
214
- }
215
-
216
- await this.#setupApplications()
217
-
220
+ await this.addApplications(this.#config.applications)
218
221
  await this.#setDispatcher(config.undici)
219
222
 
220
223
  if (config.scheduler) {
@@ -236,12 +239,7 @@ export class Runtime extends EventEmitter {
236
239
  this.#createWorkersBroadcastChannel()
237
240
 
238
241
  try {
239
- const startInvocations = []
240
- for (const application of this.getApplicationsIds()) {
241
- startInvocations.push([application, silent])
242
- }
243
-
244
- await executeInParallel(this.startApplication.bind(this), startInvocations, this.#concurrency)
242
+ await this.startApplications(this.getApplicationsIds(), silent)
245
243
 
246
244
  if (this.#config.inspectorOptions) {
247
245
  const { port } = this.#config.inspectorOptions
@@ -287,66 +285,34 @@ export class Runtime extends EventEmitter {
287
285
  this.startCollectingMetrics()
288
286
  }
289
287
 
290
- if (this.#config.verticalScaler?.enabled) {
291
- await this.#setupVerticalScaler()
292
- }
293
-
288
+ await this.#dynamicWorkersScaler?.start()
294
289
  this.#showUrl()
295
290
  return this.#url
296
291
  }
297
292
 
298
293
  async stop (silent = false) {
299
- if (this.#scheduler) {
300
- await this.#scheduler.stop()
301
- }
302
-
303
294
  if (this.#status === 'starting') {
304
295
  await once(this, 'started')
305
296
  }
306
297
 
307
298
  this.#updateStatus('stopping')
308
299
 
300
+ if (this.#scheduler) {
301
+ await this.#scheduler.stop()
302
+ }
303
+
309
304
  if (this.#inspectorServer) {
310
305
  await this.#inspectorServer.close()
311
306
  }
312
307
 
308
+ await this.#dynamicWorkersScaler?.stop()
309
+
313
310
  // Stop the entrypoint first so that no new requests are accepted
314
311
  if (this.#entrypointId) {
315
312
  await this.stopApplication(this.#entrypointId, silent)
316
313
  }
317
314
 
318
- const stopInvocations = []
319
-
320
- // Construct the reverse dependency graph
321
- const dependents = {}
322
-
323
- try {
324
- const allApplications = await this.getApplications(true)
325
- for (const application of allApplications.applications) {
326
- for (const dependency of application.dependencies ?? []) {
327
- let applicationDependents = dependents[dependency]
328
- if (!applicationDependents) {
329
- applicationDependents = new Set()
330
- dependents[dependency] = applicationDependents
331
- }
332
-
333
- applicationDependents.add(application.id)
334
- }
335
- }
336
- } catch (e) {
337
- // Noop - This only happens if stop is invoked after a failed start, in which case we don't care about deps
338
- }
339
-
340
- for (const application of this.getApplicationsIds()) {
341
- // The entrypoint has been stopped above
342
- if (application === this.#entrypointId) {
343
- continue
344
- }
345
-
346
- stopInvocations.push([application, silent, Array.from(dependents[application] ?? [])])
347
- }
348
-
349
- await executeInParallel(this.stopApplication.bind(this), stopInvocations, this.#concurrency)
315
+ await this.stopApplications(this.getApplicationsIds(), silent)
350
316
 
351
317
  await this.#meshInterceptor.close()
352
318
  this.#workersBroadcastChannel?.close()
@@ -357,14 +323,13 @@ export class Runtime extends EventEmitter {
357
323
  async restart (applications = []) {
358
324
  this.emitAndNotify('restarting')
359
325
 
360
- const restartInvocations = []
326
+ const toRestart = []
361
327
  for (const application of this.getApplicationsIds()) {
362
328
  if (applications.length === 0 || applications.includes(application)) {
363
- restartInvocations.push([application])
329
+ toRestart.push(application)
364
330
  }
365
331
  }
366
-
367
- await executeInParallel(this.restartApplication.bind(this), restartInvocations, this.#concurrency)
332
+ await this.restartApplications(toRestart)
368
333
 
369
334
  this.emitAndNotify('restarted')
370
335
 
@@ -478,47 +443,151 @@ export class Runtime extends EventEmitter {
478
443
  }
479
444
  }
480
445
 
481
- async startApplication (id, silent = false) {
482
- // Since when an application is stopped the worker is deleted, we consider an application start if its first application
483
- // is no longer in the init phase
484
- const firstWorker = this.#workers.get(`${id}:0`)
485
- if (firstWorker && firstWorker[kWorkerStatus] !== 'boot' && firstWorker[kWorkerStatus] !== 'init') {
486
- throw new ApplicationAlreadyStartedError()
446
+ async addApplications (applications, start = false) {
447
+ const setupInvocations = []
448
+
449
+ const toStart = []
450
+ for (const application of applications) {
451
+ const workers = application.workers
452
+
453
+ if ((workers.static > 1 || workers.minimum > 1) && application.entrypoint && !features.node.reusePort) {
454
+ this.logger.warn(
455
+ `"${application.id}" is set as the entrypoint, but reusePort is not available in your OS; setting workers to 1 instead of ${workers.static}`
456
+ )
457
+ workers.static = 1
458
+ workers.minimum = 1
459
+ }
460
+
461
+ this.#applications.set(application.id, application)
462
+ setupInvocations.push([application])
463
+ toStart.push(application.id)
464
+ }
465
+
466
+ await executeInParallel(this.#setupApplication.bind(this), setupInvocations, this.#concurrency)
467
+
468
+ for (const application of applications) {
469
+ this.emitAndNotify('application:added', application)
470
+ }
471
+
472
+ if (start) {
473
+ await this.startApplications(toStart)
474
+ }
475
+ }
476
+
477
+ async removeApplications (applications, silent = false) {
478
+ if (applications.includes(this.#entrypointId)) {
479
+ throw new CannotRemoveEntrypointError()
480
+ }
481
+
482
+ await this.stopApplications(applications, silent, true)
483
+
484
+ for (const application of applications) {
485
+ this.#dynamicWorkersScaler?.remove(application)
486
+ this.#applications.delete(application)
487
+ }
488
+
489
+ for (const application of applications) {
490
+ this.emitAndNotify('application:removed', application)
491
+ }
492
+ }
493
+
494
+ async startApplications (applicationsToStart, silent = false) {
495
+ const startInvocations = []
496
+ for (const application of applicationsToStart) {
497
+ startInvocations.push([application, silent])
498
+ }
499
+
500
+ return executeInParallel(this.startApplication.bind(this), startInvocations, this.#concurrency)
501
+ }
502
+
503
+ async stopApplications (applicationsToStop, silent = false, skipDependencies = false) {
504
+ const stopInvocations = []
505
+
506
+ // Construct the reverse dependency graph
507
+ const dependents = {}
508
+
509
+ if (!skipDependencies) {
510
+ try {
511
+ const { applications } = await this.getApplications(true)
512
+ for (const application of applications) {
513
+ for (const dependency of application.dependencies ?? []) {
514
+ let applicationDependents = dependents[dependency]
515
+ if (!applicationDependents) {
516
+ applicationDependents = new Set()
517
+ dependents[dependency] = applicationDependents
518
+ }
519
+
520
+ applicationDependents.add(application.id)
521
+ }
522
+ }
523
+ } catch (e) {
524
+ // Noop - This only happens if stop is invoked after a failed start, in which case we don't care about deps
525
+ }
526
+ }
527
+
528
+ for (const application of applicationsToStop) {
529
+ // The entrypoint has been stopped above
530
+ if (application === this.#entrypointId) {
531
+ continue
532
+ }
533
+
534
+ stopInvocations.push([application, silent, Array.from(dependents[application] ?? [])])
535
+ }
536
+
537
+ return executeInParallel(this.stopApplication.bind(this), stopInvocations, this.#concurrency)
538
+ }
539
+
540
+ async restartApplications (applicationsToRestart) {
541
+ const restartInvocations = []
542
+
543
+ for (const application of applicationsToRestart) {
544
+ restartInvocations.push([application])
487
545
  }
488
546
 
547
+ return executeInParallel(this.restartApplication.bind(this), restartInvocations, this.#concurrency)
548
+ }
549
+
550
+ async startApplication (id, silent = false) {
489
551
  const config = this.#config
490
- const applicationConfig = config.applications.find(s => s.id === id)
552
+ const applicationConfig = this.#applications.get(id)
491
553
 
492
554
  if (!applicationConfig) {
493
555
  throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
494
556
  }
495
557
 
496
- const workersCount = await this.#workers.getCount(applicationConfig.id)
558
+ const workers = applicationConfig.workers.static
559
+ for (let i = 0; i < workers; i++) {
560
+ const worker = this.#workers.get(`${id}:${i}`)
561
+ const status = worker?.[kWorkerStatus]
562
+
563
+ if (status && status !== 'boot' && status !== 'init') {
564
+ throw new ApplicationAlreadyStartedError()
565
+ }
566
+ }
497
567
 
498
568
  this.emitAndNotify('application:starting', id)
499
569
 
500
- for (let i = 0; i < workersCount; i++) {
501
- await this.#startWorker(config, applicationConfig, workersCount, id, i, silent)
570
+ for (let i = 0; i < workers; i++) {
571
+ await this.#startWorker(config, applicationConfig, workers, id, i, silent)
502
572
  }
503
573
 
504
574
  this.emitAndNotify('application:started', id)
505
575
  }
506
576
 
507
577
  async stopApplication (id, silent = false, dependents = []) {
508
- const config = this.#config
509
- const applicationConfig = config.applications.find(s => s.id === id)
510
-
511
- if (!applicationConfig) {
578
+ if (!this.#applications.has(id)) {
512
579
  throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
513
580
  }
514
581
 
515
- const workersCount = await this.#workers.getCount(applicationConfig.id)
582
+ const workersIds = this.#workers.getKeys(id)
583
+ const workersCount = workersIds.length
516
584
 
517
585
  this.emitAndNotify('application:stopping', id)
518
586
 
519
587
  if (typeof workersCount === 'number') {
520
588
  const stopInvocations = []
521
- for (let i = 0; i < workersCount; i++) {
589
+ for (const workerId of workersIds) {
590
+ const i = parseInt(workerId.split(':')[1])
522
591
  stopInvocations.push([workersCount, id, i, silent, undefined, dependents])
523
592
  }
524
593
 
@@ -529,24 +598,39 @@ export class Runtime extends EventEmitter {
529
598
  }
530
599
 
531
600
  async restartApplication (id) {
532
- const config = this.#config
533
- const applicationConfig = this.#config.applications.find(s => s.id === id)
534
- const workersCount = await this.#workers.getCount(id)
601
+ const applicationConfig = this.#applications.get(id)
535
602
 
536
- this.emitAndNotify('application:restarting', id)
603
+ if (!applicationConfig) {
604
+ throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
605
+ }
537
606
 
538
- for (let i = 0; i < workersCount; i++) {
539
- const label = `${id}:${i}`
540
- const worker = this.#workers.get(label)
607
+ if (this.#restartingApplications.has(id)) {
608
+ return
609
+ }
610
+ this.#restartingApplications.add(id)
541
611
 
542
- if (i > 0 && config.workersRestartDelay > 0) {
543
- await sleep(config.workersRestartDelay)
612
+ try {
613
+ const config = this.#config
614
+ const workersIds = await this.#workers.getKeys(id)
615
+ const workersCount = workersIds.length
616
+
617
+ this.emitAndNotify('application:restarting', id)
618
+
619
+ for (let i = 0; i < workersCount; i++) {
620
+ const workerId = workersIds[i]
621
+ const worker = this.#workers.get(workerId)
622
+
623
+ if (i > 0 && config.workersRestartDelay > 0) {
624
+ await sleep(config.workersRestartDelay)
625
+ }
626
+
627
+ await this.#replaceWorker(config, applicationConfig, workersCount, id, i, worker, true)
544
628
  }
545
629
 
546
- await this.#replaceWorker(config, applicationConfig, workersCount, id, i, worker, true)
630
+ this.emitAndNotify('application:restarted', id)
631
+ } finally {
632
+ this.#restartingApplications.delete(id)
547
633
  }
548
-
549
- this.emitAndNotify('application:restarted', id)
550
634
  }
551
635
 
552
636
  async buildApplication (id) {
@@ -798,7 +882,7 @@ export class Runtime extends EventEmitter {
798
882
  this.#concurrency = concurrency
799
883
  }
800
884
 
801
- async getUrl () {
885
+ getUrl () {
802
886
  return this.#url
803
887
  }
804
888
 
@@ -861,14 +945,11 @@ export class Runtime extends EventEmitter {
861
945
  async getCustomHealthChecks () {
862
946
  const status = {}
863
947
 
864
- for (const [application, { count }] of Object.entries(this.#workers.configuration)) {
865
- for (let i = 0; i < count; i++) {
866
- const label = `${application}:${i}`
867
- const worker = this.#workers.get(label)
868
-
869
- if (worker) {
870
- status[label] = await sendViaITC(worker, 'getCustomHealthCheck')
871
- }
948
+ for (const id of this.#applications.keys()) {
949
+ const workersIds = this.#workers.getKeys(id)
950
+ for (const workerId of workersIds) {
951
+ const worker = this.#workers.get(workerId)
952
+ status[workerId] = await sendViaITC(worker, 'getCustomHealthCheck')
872
953
  }
873
954
  }
874
955
 
@@ -878,14 +959,11 @@ export class Runtime extends EventEmitter {
878
959
  async getCustomReadinessChecks () {
879
960
  const status = {}
880
961
 
881
- for (const [application, { count }] of Object.entries(this.#workers.configuration)) {
882
- for (let i = 0; i < count; i++) {
883
- const label = `${application}:${i}`
884
- const worker = this.#workers.get(label)
885
-
886
- if (worker) {
887
- status[label] = await sendViaITC(worker, 'getCustomReadinessCheck')
888
- }
962
+ for (const id of this.#applications.keys()) {
963
+ const workersIds = this.#workers.getKeys(id)
964
+ for (const workerId of workersIds) {
965
+ const worker = this.#workers.get(workerId)
966
+ status[workerId] = await sendViaITC(worker, 'getCustomReadinessCheck')
889
967
  }
890
968
  }
891
969
 
@@ -1055,16 +1133,15 @@ export class Runtime extends EventEmitter {
1055
1133
  }
1056
1134
 
1057
1135
  async getApplicationResourcesInfo (id) {
1058
- const workers = this.#workers.getCount(id)
1059
-
1060
- const worker = await this.#getWorkerById(id, 0, false, false)
1136
+ const workersCount = this.#workers.getKeys(id).length
1137
+ const worker = await this.#getWorkerByIdOrNext(id, 0, false, false)
1061
1138
  const health = worker[kConfig].health
1062
1139
 
1063
- return { workers, health }
1140
+ return { workers: workersCount, health }
1064
1141
  }
1065
1142
 
1066
1143
  getApplicationsIds () {
1067
- return this.#config.applications.map(application => application.id)
1144
+ return Array.from(this.#applications.keys())
1068
1145
  }
1069
1146
 
1070
1147
  async getApplications (allowUnloaded = false) {
@@ -1077,26 +1154,6 @@ export class Runtime extends EventEmitter {
1077
1154
  }
1078
1155
  }
1079
1156
 
1080
- async getWorkers () {
1081
- const status = {}
1082
-
1083
- for (const [application, { count }] of Object.entries(this.#workers.configuration)) {
1084
- for (let i = 0; i < count; i++) {
1085
- const label = `${application}:${i}`
1086
- const worker = this.#workers.get(label)
1087
-
1088
- status[label] = {
1089
- application,
1090
- worker: i,
1091
- status: worker?.[kWorkerStatus] ?? 'exited',
1092
- thread: worker?.threadId
1093
- }
1094
- }
1095
- }
1096
-
1097
- return status
1098
- }
1099
-
1100
1157
  async getApplicationMeta (id) {
1101
1158
  const application = await this.#getApplicationById(id)
1102
1159
 
@@ -1141,7 +1198,7 @@ export class Runtime extends EventEmitter {
1141
1198
  }
1142
1199
 
1143
1200
  if (this.#isProduction) {
1144
- applicationDetails.workers = this.#workers.getCount(id)
1201
+ applicationDetails.workers = this.#workers.getKeys(id).length
1145
1202
  }
1146
1203
 
1147
1204
  if (entrypoint) {
@@ -1179,6 +1236,45 @@ export class Runtime extends EventEmitter {
1179
1236
  return sendViaITC(application, 'getApplicationGraphQLSchema')
1180
1237
  }
1181
1238
 
1239
+ async getWorkers (includeRaw = false) {
1240
+ const status = {}
1241
+
1242
+ for (const [key, worker] of this.#workers.entries()) {
1243
+ const [application, index] = key.split(':')
1244
+
1245
+ status[key] = {
1246
+ application,
1247
+ worker: index,
1248
+ status: worker[kWorkerStatus],
1249
+ thread: worker.threadId,
1250
+ raw: includeRaw ? worker : undefined
1251
+ }
1252
+ }
1253
+
1254
+ return status
1255
+ }
1256
+
1257
+ async getWorkerHealth (worker, options = {}) {
1258
+ if (!features.node.worker.getHeapStatistics) {
1259
+ throw new GetHeapStatisticUnavailable()
1260
+ }
1261
+
1262
+ const currentELU = worker.performance.eventLoopUtilization()
1263
+ const previousELU = options.previousELU
1264
+
1265
+ let elu = currentELU
1266
+ if (previousELU) {
1267
+ elu = worker.performance.eventLoopUtilization(elu, previousELU)
1268
+ }
1269
+
1270
+ const { used_heap_size: heapUsed, total_heap_size: heapTotal } = await worker.getHeapStatistics()
1271
+ return { elu: elu.utilization, heapUsed, heapTotal, currentELU }
1272
+ }
1273
+
1274
+ getDynamicWorkersScaler () {
1275
+ return this.#dynamicWorkersScaler
1276
+ }
1277
+
1182
1278
  #getHttpCacheValue ({ request }) {
1183
1279
  if (!this.#sharedHttpCache) {
1184
1280
  return
@@ -1230,59 +1326,49 @@ export class Runtime extends EventEmitter {
1230
1326
  this.logger.info(`Platformatic is now listening at ${this.#url}`)
1231
1327
  }
1232
1328
 
1233
- async #setupApplications () {
1329
+ async #setupApplication (applicationConfig) {
1330
+ if (this.#status === 'stopping' || this.#status === 'closed') {
1331
+ return
1332
+ }
1333
+
1334
+ const id = applicationConfig.id
1234
1335
  const config = this.#config
1235
- const setupInvocations = []
1236
1336
 
1237
- // Parse all applications and verify we're not missing any path or resolved application
1238
- for (const applicationConfig of config.applications) {
1337
+ if (!applicationConfig.path) {
1239
1338
  // If there is no application path, check if the application was resolved
1240
- if (!applicationConfig.path) {
1241
- if (applicationConfig.url) {
1242
- // Try to backfill the path for external applications
1243
- applicationConfig.path = join(this.#root, config.resolvedApplicationsBasePath, applicationConfig.id)
1244
-
1245
- if (!existsSync(applicationConfig.path)) {
1246
- const executable = globalThis.platformatic?.executable ?? 'platformatic'
1247
- this.logger.error(
1248
- `The path for application "%s" does not exist. Please run "${executable} resolve" and try again.`,
1249
- applicationConfig.id
1250
- )
1339
+ if (applicationConfig.url) {
1340
+ // Try to backfill the path for external applications
1341
+ applicationConfig.path = join(this.#root, config.resolvedApplicationsBasePath, id)
1251
1342
 
1252
- await this.closeAndThrow(new RuntimeAbortedError())
1253
- }
1254
- } else {
1343
+ if (!existsSync(applicationConfig.path)) {
1344
+ const executable = globalThis.platformatic?.executable ?? 'platformatic'
1255
1345
  this.logger.error(
1256
- 'The application "%s" has no path defined. Please check your configuration and try again.',
1257
- applicationConfig.id
1346
+ `The path for application "%s" does not exist. Please run "${executable} resolve" and try again.`,
1347
+ id
1258
1348
  )
1259
1349
 
1260
1350
  await this.closeAndThrow(new RuntimeAbortedError())
1261
1351
  }
1262
- }
1263
-
1264
- setupInvocations.push([applicationConfig])
1265
- }
1266
-
1267
- await executeInParallel(this.#setupApplication.bind(this), setupInvocations, this.#concurrency)
1268
- }
1352
+ } else {
1353
+ this.logger.error(
1354
+ 'The application "%s" has no path defined. Please check your configuration and try again.',
1355
+ id
1356
+ )
1269
1357
 
1270
- async #setupApplication (applicationConfig) {
1271
- if (this.#status === 'stopping' || this.#status === 'closed') {
1272
- return
1358
+ await this.closeAndThrow(new RuntimeAbortedError())
1359
+ }
1273
1360
  }
1274
1361
 
1275
- const config = this.#config
1276
- const workersCount = await this.#workers.getCount(applicationConfig.id)
1277
- const id = applicationConfig.id
1362
+ const workers = applicationConfig.workers.static
1278
1363
  const setupInvocations = []
1279
1364
 
1280
- for (let i = 0; i < workersCount; i++) {
1281
- setupInvocations.push([config, applicationConfig, workersCount, id, i])
1365
+ for (let i = 0; i < workers; i++) {
1366
+ setupInvocations.push([config, applicationConfig, workers, id, i])
1282
1367
  }
1283
1368
 
1284
1369
  await executeInParallel(this.#setupWorker.bind(this), setupInvocations, this.#concurrency)
1285
1370
 
1371
+ await this.#dynamicWorkersScaler?.add(applicationConfig)
1286
1372
  this.emitAndNotify('application:init', id)
1287
1373
  }
1288
1374
 
@@ -1344,9 +1430,9 @@ export class Runtime extends EventEmitter {
1344
1430
  const workerEnv = structuredClone(this.#env)
1345
1431
 
1346
1432
  if (applicationConfig.nodeOptions?.trim().length > 0) {
1347
- const originalNodeOptions = workerEnv['NODE_OPTIONS'] ?? ''
1433
+ const originalNodeOptions = workerEnv.NODE_OPTIONS ?? ''
1348
1434
 
1349
- workerEnv['NODE_OPTIONS'] = `${originalNodeOptions} ${applicationConfig.nodeOptions}`.trim()
1435
+ workerEnv.NODE_OPTIONS = `${originalNodeOptions} ${applicationConfig.nodeOptions}`.trim()
1350
1436
  }
1351
1437
 
1352
1438
  const maxHeapTotal =
@@ -1391,7 +1477,7 @@ export class Runtime extends EventEmitter {
1391
1477
  stderr: true
1392
1478
  })
1393
1479
 
1394
- this.#handleWorkerStandardStreams(worker, applicationId, workersCount > 1 ? index : undefined)
1480
+ this.#handleWorkerStandardStreams(worker, applicationId, index)
1395
1481
 
1396
1482
  // Make sure the listener can handle a lot of API requests at once before raising a warning
1397
1483
  worker.setMaxListeners(1e3)
@@ -1445,10 +1531,10 @@ export class Runtime extends EventEmitter {
1445
1531
  })
1446
1532
  })
1447
1533
 
1448
- worker[kId] = workersCount > 1 ? workerId : applicationId
1534
+ worker[kId] = workerId
1449
1535
  worker[kFullId] = workerId
1450
1536
  worker[kApplicationId] = applicationId
1451
- worker[kWorkerId] = workersCount > 1 ? index : undefined
1537
+ worker[kWorkerId] = index
1452
1538
  worker[kWorkerStatus] = 'boot'
1453
1539
 
1454
1540
  if (inspectorOptions) {
@@ -1476,6 +1562,14 @@ export class Runtime extends EventEmitter {
1476
1562
  this.logger.trace({ event, payload }, 'Runtime event')
1477
1563
  })
1478
1564
 
1565
+ worker[kITC].on('request:restart', async () => {
1566
+ try {
1567
+ await this.restartApplication(applicationId)
1568
+ } catch (e) {
1569
+ this.logger.error(e)
1570
+ }
1571
+ })
1572
+
1479
1573
  // Only activate watch for the first instance
1480
1574
  if (index === 0) {
1481
1575
  // Handle applications changes
@@ -1527,92 +1621,141 @@ export class Runtime extends EventEmitter {
1527
1621
  return worker
1528
1622
  }
1529
1623
 
1530
- async #getHealth (worker) {
1531
- if (features.node.worker.getHeapStatistics) {
1532
- const { used_heap_size: heapUsed, total_heap_size: heapTotal } = await worker.getHeapStatistics()
1533
- const currentELU = worker.performance.eventLoopUtilization()
1534
- const elu = worker[kLastELU] ? worker.performance.eventLoopUtilization(currentELU, worker[kLastELU]) : currentELU
1535
- worker[kLastELU] = currentELU
1536
- return { elu: elu.utilization, heapUsed, heapTotal }
1537
- }
1538
-
1539
- const health = await worker[kITC].send('getHealth')
1540
- return health
1541
- }
1542
-
1543
- #setupHealthCheck (config, applicationConfig, workersCount, id, index, worker, errorLabel) {
1624
+ #setupHealthMetrics (id, index, worker, errorLabel) {
1544
1625
  // Clear the timeout when exiting
1545
- worker.on('exit', () => clearTimeout(worker[kHealthCheckTimer]))
1626
+ worker.on('exit', () => clearTimeout(worker[kHealthMetricsTimer]))
1546
1627
 
1547
- const { maxELU, maxHeapUsed, maxHeapTotal, maxUnhealthyChecks, interval } = worker[kConfig].health
1548
- const maxHeapTotalNumber = typeof maxHeapTotal === 'string' ? parseMemorySize(maxHeapTotal) : maxHeapTotal
1628
+ worker[kHealthMetricsTimer] = setTimeout(async () => {
1629
+ if (worker[kWorkerStatus] !== 'started') return
1549
1630
 
1550
- let unhealthyChecks = 0
1551
-
1552
- worker[kHealthCheckTimer] = setTimeout(async () => {
1553
- if (worker[kWorkerStatus] !== 'started') {
1554
- return
1555
- }
1556
-
1557
- let health, unhealthy, memoryUsage
1631
+ let health = null
1558
1632
  try {
1559
- health = await this.#getHealth(worker)
1560
- memoryUsage = health.heapUsed / maxHeapTotalNumber
1561
- unhealthy = health.elu > maxELU || memoryUsage > maxHeapUsed
1633
+ health = await this.getWorkerHealth(worker, {
1634
+ previousELU: worker[kLastHealthCheckELU]
1635
+ })
1562
1636
  } catch (err) {
1563
1637
  this.logger.error({ err }, `Failed to get health for ${errorLabel}.`)
1564
- unhealthy = true
1565
- memoryUsage = -1
1566
- health = { elu: -1, heapUsed: -1, heapTotal: -1 }
1638
+ } finally {
1639
+ worker[kLastHealthCheckELU] = health?.currentELU ?? null
1567
1640
  }
1568
1641
 
1569
- this.emitAndNotify('application:worker:health', {
1642
+ const healthSignals = worker[kWorkerHealthSignals]?.getAll() ?? []
1643
+
1644
+ this.emitAndNotify('application:worker:health:metrics', {
1570
1645
  id: worker[kId],
1571
1646
  application: id,
1572
1647
  worker: index,
1573
1648
  currentHealth: health,
1574
- unhealthy,
1575
- healthConfig: worker[kConfig].health
1649
+ healthSignals
1576
1650
  })
1577
1651
 
1578
- if (unhealthy) {
1652
+ worker[kHealthMetricsTimer].refresh()
1653
+ }, 1000).unref()
1654
+ }
1655
+
1656
+ #setupHealthCheck (config, applicationConfig, workersCount, id, index, worker, errorLabel) {
1657
+ let healthMetricsListener = null
1658
+
1659
+ // Clear the timeout and listener when exiting
1660
+ worker.on('exit', () => {
1661
+ clearTimeout(worker[kHealthCheckTimer])
1662
+ if (healthMetricsListener) {
1663
+ this.removeListener('application:worker:health:metrics', healthMetricsListener)
1664
+ }
1665
+ })
1666
+
1667
+ const healthConfig = worker[kConfig].health
1668
+
1669
+ let {
1670
+ maxELU,
1671
+ maxHeapUsed,
1672
+ maxHeapTotal,
1673
+ maxUnhealthyChecks,
1674
+ interval
1675
+ } = worker[kConfig].health
1676
+
1677
+ if (typeof maxHeapTotal === 'string') {
1678
+ maxHeapTotal = parseMemorySize(maxHeapTotal)
1679
+ }
1680
+
1681
+ if (interval < 1000) {
1682
+ interval = 1000
1683
+ this.logger.warn(
1684
+ `The health check interval for the "${errorLabel}" is set to ${healthConfig.interval}ms. ` +
1685
+ 'The minimum health check interval is 1s. It will be set to 1000ms.'
1686
+ )
1687
+ }
1688
+
1689
+ let lastHealthMetrics = null
1690
+
1691
+ healthMetricsListener = healthCheck => {
1692
+ if (healthCheck.id === worker[kId]) {
1693
+ lastHealthMetrics = healthCheck
1694
+ }
1695
+ }
1696
+
1697
+ this.on('application:worker:health:metrics', healthMetricsListener)
1698
+
1699
+ let unhealthyChecks = 0
1700
+
1701
+ worker[kHealthCheckTimer] = setTimeout(async () => {
1702
+ if (worker[kWorkerStatus] !== 'started') return
1703
+
1704
+ if (lastHealthMetrics) {
1705
+ const health = lastHealthMetrics.currentHealth
1706
+ const memoryUsage = health.heapUsed / maxHeapTotal
1707
+ const unhealthy = health.elu > maxELU || memoryUsage > maxHeapUsed
1708
+
1709
+ this.emitAndNotify('application:worker:health', {
1710
+ id: worker[kId],
1711
+ application: id,
1712
+ worker: index,
1713
+ currentHealth: health,
1714
+ unhealthy,
1715
+ healthConfig
1716
+ })
1717
+
1579
1718
  if (health.elu > maxELU) {
1580
1719
  this.logger.error(
1581
- `The ${errorLabel} has an ELU of ${(health.elu * 100).toFixed(2)} %, above the maximum allowed usage of ${(maxELU * 100).toFixed(2)} %.`
1720
+ `The ${errorLabel} has an ELU of ${(health.elu * 100).toFixed(2)} %, ` +
1721
+ `above the maximum allowed usage of ${(maxELU * 100).toFixed(2)} %.`
1582
1722
  )
1583
1723
  }
1584
1724
 
1585
1725
  if (memoryUsage > maxHeapUsed) {
1586
1726
  this.logger.error(
1587
- `The ${errorLabel} is using ${(memoryUsage * 100).toFixed(2)} % of the memory, above the maximum allowed usage of ${(maxHeapUsed * 100).toFixed(2)} %.`
1727
+ `The ${errorLabel} is using ${(memoryUsage * 100).toFixed(2)} % of the memory, ` +
1728
+ `above the maximum allowed usage of ${(maxHeapUsed * 100).toFixed(2)} %.`
1588
1729
  )
1589
1730
  }
1590
1731
 
1591
- unhealthyChecks++
1592
- } else {
1593
- unhealthyChecks = 0
1594
- }
1732
+ if (unhealthy) {
1733
+ unhealthyChecks++
1734
+ } else {
1735
+ unhealthyChecks = 0
1736
+ }
1595
1737
 
1596
- if (unhealthyChecks === maxUnhealthyChecks) {
1597
- try {
1598
- this.emitAndNotify('application:worker:unhealthy', { application: id, worker: index })
1738
+ if (unhealthyChecks === maxUnhealthyChecks) {
1739
+ try {
1740
+ this.emitAndNotify('application:worker:unhealthy', { application: id, worker: index })
1599
1741
 
1600
- this.logger.error(
1601
- { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1742
+ this.logger.error(
1743
+ { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1602
1744
  `The ${errorLabel} is unhealthy. Replacing it ...`
1603
- )
1745
+ )
1604
1746
 
1605
- await this.#replaceWorker(config, applicationConfig, workersCount, id, index, worker)
1606
- } catch (e) {
1607
- this.logger.error(
1608
- { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1747
+ await this.#replaceWorker(config, applicationConfig, workersCount, id, index, worker)
1748
+ } catch (e) {
1749
+ this.logger.error(
1750
+ { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1609
1751
  `Cannot replace the ${errorLabel}. Forcefully terminating it ...`
1610
- )
1752
+ )
1611
1753
 
1612
- worker.terminate()
1754
+ worker.terminate()
1755
+ }
1756
+ } else {
1757
+ worker[kHealthCheckTimer].refresh()
1613
1758
  }
1614
- } else {
1615
- worker[kHealthCheckTimer].refresh()
1616
1759
  }
1617
1760
  }, interval).unref()
1618
1761
  }
@@ -1635,7 +1778,7 @@ export class Runtime extends EventEmitter {
1635
1778
  }
1636
1779
 
1637
1780
  if (!worker) {
1638
- worker = await this.#getWorkerById(id, index, false, false)
1781
+ worker = await this.#getWorkerByIdOrNext(id, index, false, false)
1639
1782
  }
1640
1783
 
1641
1784
  const eventPayload = { application: id, worker: index, workersCount }
@@ -1643,7 +1786,7 @@ export class Runtime extends EventEmitter {
1643
1786
  // The application was stopped, recreate the thread
1644
1787
  if (!worker) {
1645
1788
  await this.#setupApplication(applicationConfig, index)
1646
- worker = await this.#getWorkerById(id, index)
1789
+ worker = await this.#getWorkerByIdOrNext(id, index)
1647
1790
  }
1648
1791
 
1649
1792
  worker[kWorkerStatus] = 'starting'
@@ -1680,6 +1823,8 @@ export class Runtime extends EventEmitter {
1680
1823
  this.logger.info(`Started the ${label}...`)
1681
1824
  }
1682
1825
 
1826
+ this.#setupHealthMetrics(id, index, worker, label)
1827
+
1683
1828
  const { enabled, gracePeriod } = worker[kConfig].health
1684
1829
  if (enabled && config.restartOnError > 0) {
1685
1830
  // if gracePeriod is 0, it will be set to 1 to start health checks immediately
@@ -1746,7 +1891,7 @@ export class Runtime extends EventEmitter {
1746
1891
 
1747
1892
  async #stopWorker (workersCount, id, index, silent, worker, dependents) {
1748
1893
  if (!worker) {
1749
- worker = await this.#getWorkerById(id, index, false, false)
1894
+ worker = await this.#getWorkerByIdOrNext(id, index, false, false)
1750
1895
  }
1751
1896
 
1752
1897
  if (!worker) {
@@ -1827,10 +1972,8 @@ export class Runtime extends EventEmitter {
1827
1972
  return this.#cleanupWorker(worker)
1828
1973
  }
1829
1974
 
1830
- #workerExtendedLabel (applicationId, workerId, workersCount) {
1831
- return workersCount > 1
1832
- ? `worker ${workerId} of the application "${applicationId}"`
1833
- : `application "${applicationId}"`
1975
+ #workerExtendedLabel (applicationId, workerId, _workersCount) {
1976
+ return `worker ${workerId} of the application "${applicationId}"`
1834
1977
  }
1835
1978
 
1836
1979
  async #restartCrashedWorker (config, applicationConfig, workersCount, id, index, silent, bootstrapAttempt) {
@@ -1924,7 +2067,6 @@ export class Runtime extends EventEmitter {
1924
2067
  }
1925
2068
 
1926
2069
  async #getApplicationById (applicationId, ensureStarted = false, mustExist = true) {
1927
- // If the applicationId includes the worker, properly split
1928
2070
  let workerId
1929
2071
  const matched = applicationId.match(/^(.+):(\d+)$/)
1930
2072
 
@@ -1933,16 +2075,23 @@ export class Runtime extends EventEmitter {
1933
2075
  workerId = matched[2]
1934
2076
  }
1935
2077
 
1936
- return this.#getWorkerById(applicationId, workerId, ensureStarted, mustExist)
2078
+ if (!this.#applications.has(applicationId)) {
2079
+ throw new ApplicationNotFoundError(applicationId, this.getApplicationsIds().join(', '))
2080
+ }
2081
+
2082
+ return this.#getWorkerByIdOrNext(applicationId, workerId, ensureStarted, mustExist)
1937
2083
  }
1938
2084
 
1939
- async #getWorkerById (applicationId, workerId, ensureStarted = false, mustExist = true) {
2085
+ // This method can work in two modes: when workerId is provided, it will return the specific worker
2086
+ // otherwise it will return the next available worker for the application.
2087
+ async #getWorkerByIdOrNext (applicationId, workerId, ensureStarted = false, mustExist = true) {
1940
2088
  let worker
1941
2089
 
1942
- if (typeof workerId !== 'undefined') {
1943
- worker = this.#workers.get(`${applicationId}:${workerId}`)
1944
- } else {
2090
+ // Note that in this class "== null" is purposely used instead of "===" to check for both null and undefined
2091
+ if (workerId == null) {
1945
2092
  worker = this.#workers.next(applicationId)
2093
+ } else {
2094
+ worker = this.#workers.get(`${applicationId}:${workerId}`)
1946
2095
  }
1947
2096
 
1948
2097
  const applicationsIds = this.getApplicationsIds()
@@ -1953,8 +2102,8 @@ export class Runtime extends EventEmitter {
1953
2102
  }
1954
2103
 
1955
2104
  if (applicationsIds.includes(applicationId)) {
1956
- const availableWorkers = Array.from(this.#workers.keys())
1957
- .filter(key => key.startsWith(applicationId + ':'))
2105
+ const availableWorkers = this.#workers
2106
+ .getKeys(applicationId)
1958
2107
  .map(key => key.split(':')[1])
1959
2108
  .join(', ')
1960
2109
  throw new WorkerNotFoundError(workerId, applicationId, availableWorkers)
@@ -2019,7 +2168,7 @@ export class Runtime extends EventEmitter {
2019
2168
  )
2020
2169
  }
2021
2170
 
2022
- const target = await this.#getWorkerById(application, worker, true, true)
2171
+ const target = await this.#getWorkerByIdOrNext(application, worker, true, true)
2023
2172
 
2024
2173
  const { port1, port2 } = new MessageChannel()
2025
2174
 
@@ -2154,16 +2303,14 @@ export class Runtime extends EventEmitter {
2154
2303
  async #updateApplicationConfigWorkers (applicationId, workers) {
2155
2304
  this.logger.info(`Updating application "${applicationId}" config workers to ${workers}`)
2156
2305
 
2157
- this.#config.applications.find(s => s.id === applicationId).workers = workers
2158
- const application = await this.#getApplicationById(applicationId)
2159
- this.#workers.setCount(applicationId, workers)
2160
- application[kConfig].workers = workers
2306
+ this.#applications.get(applicationId).workers.static = workers
2161
2307
 
2308
+ const workersIds = this.#workers.getKeys(applicationId)
2162
2309
  const promises = []
2163
- for (const [workerId, worker] of this.#workers.entries()) {
2164
- if (workerId.startsWith(`${applicationId}:`)) {
2165
- promises.push(sendViaITC(worker, 'updateWorkersCount', { applicationId, workers }))
2166
- }
2310
+
2311
+ for (const workerId of workersIds) {
2312
+ const worker = this.#workers.get(workerId)
2313
+ promises.push(sendViaITC(worker, 'updateWorkersCount', { applicationId, workers }))
2167
2314
  }
2168
2315
 
2169
2316
  const results = await Promise.allSettled(promises)
@@ -2179,7 +2326,7 @@ export class Runtime extends EventEmitter {
2179
2326
  this.logger.info(`Updating application "${applicationId}" config health heap to ${JSON.stringify(health)}`)
2180
2327
  const { maxHeapTotal, maxYoungGeneration } = health
2181
2328
 
2182
- const application = this.#config.applications.find(s => s.id === applicationId)
2329
+ const application = this.#applications.get(applicationId)
2183
2330
  if (maxHeapTotal) {
2184
2331
  application.health.maxHeapTotal = maxHeapTotal
2185
2332
  }
@@ -2196,7 +2343,6 @@ export class Runtime extends EventEmitter {
2196
2343
  throw new InvalidArgumentError('updates', 'must have at least one element')
2197
2344
  }
2198
2345
 
2199
- const config = this.#config
2200
2346
  const validatedUpdates = []
2201
2347
  for (const update of updates) {
2202
2348
  const { application: applicationId } = update
@@ -2204,7 +2350,7 @@ export class Runtime extends EventEmitter {
2204
2350
  if (!applicationId) {
2205
2351
  throw new InvalidArgumentError('application', 'must be a string')
2206
2352
  }
2207
- const applicationConfig = config.applications.find(s => s.id === applicationId)
2353
+ const applicationConfig = this.#applications.get(applicationId)
2208
2354
  if (!applicationConfig) {
2209
2355
  throw new ApplicationNotFoundError(applicationId, Array.from(this.getApplicationsIds()).join(', '))
2210
2356
  }
@@ -2391,7 +2537,7 @@ export class Runtime extends EventEmitter {
2391
2537
  `Restarting application "${applicationId}" worker ${i} to update config health heap...`
2392
2538
  )
2393
2539
 
2394
- const worker = await this.#getWorkerById(applicationId, i)
2540
+ const worker = await this.#getWorkerByIdOrNext(applicationId, i)
2395
2541
  if (health.maxHeapTotal) {
2396
2542
  worker[kConfig].health.maxHeapTotal = health.maxHeapTotal
2397
2543
  }
@@ -2407,6 +2553,10 @@ export class Runtime extends EventEmitter {
2407
2553
  )
2408
2554
  }
2409
2555
  report.success = true
2556
+
2557
+ if (report.success) {
2558
+ this.emitAndNotify('application:resources:health:updated', { application: applicationId, health })
2559
+ }
2410
2560
  } catch (err) {
2411
2561
  if (report.updated.length < 1) {
2412
2562
  this.logger.error({ err }, 'Cannot update application health heap, no worker updated')
@@ -2423,30 +2573,29 @@ export class Runtime extends EventEmitter {
2423
2573
  }
2424
2574
 
2425
2575
  async #updateApplicationWorkers (applicationId, config, applicationConfig, workers, currentWorkers) {
2426
- const report = {
2427
- current: currentWorkers,
2428
- new: workers
2429
- }
2576
+ const report = { current: currentWorkers, new: workers }
2577
+
2578
+ let startedWorkersCount = 0
2579
+ let stoppedWorkersCount = 0
2580
+
2430
2581
  if (currentWorkers < workers) {
2431
2582
  report.started = []
2432
2583
  try {
2433
- await this.#updateApplicationConfigWorkers(applicationId, workers)
2434
2584
  for (let i = currentWorkers; i < workers; i++) {
2435
2585
  await this.#setupWorker(config, applicationConfig, workers, applicationId, i)
2436
2586
  await this.#startWorker(config, applicationConfig, workers, applicationId, i, false, 0)
2437
2587
  report.started.push(i)
2588
+ startedWorkersCount++
2438
2589
  }
2439
2590
  report.success = true
2440
2591
  } catch (err) {
2441
- if (report.started.length < 1) {
2592
+ if (startedWorkersCount < 1) {
2442
2593
  this.logger.error({ err }, 'Cannot start application workers, no worker started')
2443
- await this.#updateApplicationConfigWorkers(applicationId, currentWorkers)
2444
2594
  } else {
2445
2595
  this.logger.error(
2446
2596
  { err },
2447
- `Cannot start application workers, started workers: ${report.started.length} out of ${workers}`
2597
+ `Cannot start application workers, started workers: ${startedWorkersCount} out of ${workers}`
2448
2598
  )
2449
- await this.#updateApplicationConfigWorkers(applicationId, currentWorkers + report.started.length)
2450
2599
  }
2451
2600
  report.success = false
2452
2601
  }
@@ -2455,26 +2604,35 @@ export class Runtime extends EventEmitter {
2455
2604
  report.stopped = []
2456
2605
  try {
2457
2606
  for (let i = currentWorkers - 1; i >= workers; i--) {
2458
- const worker = await this.#getWorkerById(applicationId, i, false, false)
2607
+ const worker = await this.#getWorkerByIdOrNext(applicationId, i, false, false)
2459
2608
  await sendViaITC(worker, 'removeFromMesh')
2460
2609
  await this.#stopWorker(currentWorkers, applicationId, i, false, worker, [])
2461
2610
  report.stopped.push(i)
2611
+ stoppedWorkersCount++
2462
2612
  }
2463
- await this.#updateApplicationConfigWorkers(applicationId, workers)
2464
2613
  report.success = true
2465
2614
  } catch (err) {
2466
- if (report.stopped.length < 1) {
2615
+ if (stoppedWorkersCount < 1) {
2467
2616
  this.logger.error({ err }, 'Cannot stop application workers, no worker stopped')
2468
2617
  } else {
2469
2618
  this.logger.error(
2470
2619
  { err },
2471
- `Cannot stop application workers, stopped workers: ${report.stopped.length} out of ${workers}`
2620
+ `Cannot stop application workers, stopped workers: ${stoppedWorkersCount} out of ${workers}`
2472
2621
  )
2473
- await this.#updateApplicationConfigWorkers(applicationId, currentWorkers - report.stopped)
2474
2622
  }
2475
2623
  report.success = false
2476
2624
  }
2477
2625
  }
2626
+
2627
+ const newWorkersCount = currentWorkers + startedWorkersCount - stoppedWorkersCount
2628
+ if (newWorkersCount !== currentWorkers) {
2629
+ await this.#updateApplicationConfigWorkers(applicationId, newWorkersCount)
2630
+ }
2631
+
2632
+ if (report.success) {
2633
+ this.emitAndNotify('application:resources:workers:updated', { application: applicationId, workers })
2634
+ }
2635
+
2478
2636
  return report
2479
2637
  }
2480
2638
 
@@ -2486,199 +2644,6 @@ export class Runtime extends EventEmitter {
2486
2644
  }
2487
2645
  }
2488
2646
 
2489
- async #setupVerticalScaler () {
2490
- const fixedWorkersCount = this.#config.workers
2491
- if (fixedWorkersCount !== undefined) {
2492
- this.logger.warn(`Vertical scaler disabled because the "workers" configuration is set to ${fixedWorkersCount}`)
2493
- return
2494
- }
2495
-
2496
- const scalerConfig = this.#config.verticalScaler
2497
- const memInfo = await getMemoryInfo()
2498
- const memScope = memInfo.scope
2499
-
2500
- scalerConfig.maxTotalWorkers ??= os.availableParallelism()
2501
- scalerConfig.maxTotalMemory ??= memInfo.total * 0.9
2502
- scalerConfig.maxWorkers ??= scalerConfig.maxTotalWorkers
2503
- scalerConfig.minWorkers ??= 1
2504
- scalerConfig.cooldownSec ??= 60
2505
- scalerConfig.scaleUpELU ??= 0.8
2506
- scalerConfig.scaleDownELU ??= 0.2
2507
- scalerConfig.scaleIntervalSec ??= 60
2508
- scalerConfig.timeWindowSec ??= 10
2509
- scalerConfig.scaleDownTimeWindowSec ??= 60
2510
- scalerConfig.gracePeriod ??= 30 * 1000
2511
- scalerConfig.applications ??= {}
2512
-
2513
- const maxTotalWorkers = scalerConfig.maxTotalWorkers
2514
- const maxTotalMemory = scalerConfig.maxTotalMemory
2515
- const maxWorkers = scalerConfig.maxWorkers
2516
- const minWorkers = scalerConfig.minWorkers
2517
- const cooldown = scalerConfig.cooldownSec
2518
- const scaleUpELU = scalerConfig.scaleUpELU
2519
- const scaleDownELU = scalerConfig.scaleDownELU
2520
- const scaleIntervalSec = scalerConfig.scaleIntervalSec
2521
- const timeWindowSec = scalerConfig.timeWindowSec
2522
- const scaleDownTimeWindowSec = scalerConfig.scaleDownTimeWindowSec
2523
- const applicationsConfigs = scalerConfig.applications
2524
- const gracePeriod = scalerConfig.gracePeriod
2525
- const healthCheckInterval = 1000
2526
-
2527
- const initialResourcesUpdates = []
2528
-
2529
- for (const application of this.#config.applications) {
2530
- if (application.entrypoint && !features.node.reusePort) {
2531
- this.logger.warn(
2532
- `The "${application.id}" application cannot be scaled because it is an entrypoint` +
2533
- ' and the "reusePort" feature is not available in your OS.'
2534
- )
2535
-
2536
- applicationsConfigs[application.id] = {
2537
- minWorkers: 1,
2538
- maxWorkers: 1
2539
- }
2540
- continue
2541
- }
2542
- if (application.workers !== undefined) {
2543
- this.logger.warn(
2544
- `The "${application.id}" application cannot be scaled because` +
2545
- ` it has a fixed number of workers (${application.workers}).`
2546
- )
2547
- applicationsConfigs[application.id] = {
2548
- minWorkers: application.workers,
2549
- maxWorkers: application.workers
2550
- }
2551
- continue
2552
- }
2553
-
2554
- applicationsConfigs[application.id] ??= {}
2555
- applicationsConfigs[application.id].minWorkers ??= minWorkers
2556
- applicationsConfigs[application.id].maxWorkers ??= maxWorkers
2557
-
2558
- const appMinWorkers = applicationsConfigs[application.id].minWorkers
2559
- if (appMinWorkers > 1) {
2560
- initialResourcesUpdates.push({
2561
- application: application.id,
2562
- workers: minWorkers
2563
- })
2564
- }
2565
- }
2566
-
2567
- if (initialResourcesUpdates.length > 0) {
2568
- await this.updateApplicationsResources(initialResourcesUpdates)
2569
- }
2570
-
2571
- for (const applicationId in applicationsConfigs) {
2572
- const application = this.#config.applications.find(app => app.id === applicationId)
2573
- if (!application) {
2574
- delete applicationsConfigs[applicationId]
2575
-
2576
- this.logger.warn(
2577
- `Vertical scaler configuration has a configuration for non-existing application "${applicationId}"`
2578
- )
2579
- }
2580
- }
2581
-
2582
- const scalingAlgorithm = new ScalingAlgorithm({
2583
- maxTotalWorkers,
2584
- scaleUpELU,
2585
- scaleDownELU,
2586
- scaleUpTimeWindowSec: timeWindowSec,
2587
- scaleDownTimeWindowSec,
2588
- applications: applicationsConfigs
2589
- })
2590
-
2591
- const healthCheckTimeout = setTimeout(async () => {
2592
- let shouldCheckForScaling = false
2593
-
2594
- const now = Date.now()
2595
-
2596
- for (const worker of this.#workers.values()) {
2597
- if (worker[kWorkerStatus] !== 'started' || worker[kWorkerStartTime] + gracePeriod > now) {
2598
- continue
2599
- }
2600
-
2601
- try {
2602
- const health = await this.#getHealth(worker)
2603
- if (!health) continue
2604
-
2605
- scalingAlgorithm.addWorkerHealthInfo({
2606
- workerId: worker[kId],
2607
- applicationId: worker[kApplicationId],
2608
- elu: health.elu,
2609
- heapUsed: health.heapUsed,
2610
- heapTotal: health.heapTotal
2611
- })
2612
-
2613
- if (health.elu > scaleUpELU) {
2614
- shouldCheckForScaling = true
2615
- }
2616
- } catch (err) {
2617
- this.logger.error({ err }, 'Failed to get health for worker')
2618
- }
2619
- }
2620
-
2621
- if (shouldCheckForScaling) {
2622
- await checkForScaling()
2623
- }
2624
-
2625
- healthCheckTimeout.refresh()
2626
- }, healthCheckInterval).unref()
2627
-
2628
- let isScaling = false
2629
- let lastScaling = 0
2630
-
2631
- const checkForScaling = async () => {
2632
- const isInCooldown = Date.now() < lastScaling + cooldown * 1000
2633
- if (isScaling || isInCooldown) return
2634
- isScaling = true
2635
-
2636
- try {
2637
- const workersInfo = await this.getWorkers()
2638
- const mem = await getMemoryInfo({ scope: memScope })
2639
-
2640
- const appsWorkersInfo = {}
2641
- for (const worker of Object.values(workersInfo)) {
2642
- if (worker.status === 'exited') continue
2643
-
2644
- const applicationId = worker.application
2645
- appsWorkersInfo[applicationId] ??= 0
2646
- appsWorkersInfo[applicationId]++
2647
- }
2648
-
2649
- const availableMemory = maxTotalMemory - mem.used
2650
- const recommendations = scalingAlgorithm.getRecommendations(appsWorkersInfo, {
2651
- availableMemory
2652
- })
2653
- if (recommendations.length > 0) {
2654
- await applyRecommendations(recommendations)
2655
- lastScaling = Date.now()
2656
- }
2657
- } catch (err) {
2658
- this.logger.error({ err }, 'Failed to scale applications')
2659
- } finally {
2660
- isScaling = false
2661
- }
2662
- }
2663
-
2664
- const applyRecommendations = async recommendations => {
2665
- const resourcesUpdates = []
2666
- for (const recommendation of recommendations) {
2667
- const { applicationId, workersCount, direction } = recommendation
2668
- this.logger.info(`Scaling ${direction} the "${applicationId}" app to ${workersCount} workers`)
2669
-
2670
- resourcesUpdates.push({
2671
- application: applicationId,
2672
- workers: workersCount
2673
- })
2674
- }
2675
- await this.updateApplicationsResources(resourcesUpdates)
2676
- }
2677
-
2678
- // Interval for periodic scaling checks
2679
- setInterval(checkForScaling, scaleIntervalSec * 1000).unref()
2680
- }
2681
-
2682
2647
  #setupPermissions (applicationConfig) {
2683
2648
  const argv = []
2684
2649
  const allows = new Set()
@@ -2722,4 +2687,11 @@ export class Runtime extends EventEmitter {
2722
2687
  argv.push('--permission', ...allows)
2723
2688
  return argv
2724
2689
  }
2690
+
2691
+ #processHealthSignals ({ workerId, signals }) {
2692
+ const worker = this.#workers.get(workerId)
2693
+
2694
+ worker[kWorkerHealthSignals] ??= new HealthSignalsQueue()
2695
+ worker[kWorkerHealthSignals].add(signals)
2696
+ }
2725
2697
  }