@platformatic/runtime 3.13.1 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/runtime.js CHANGED
@@ -16,7 +16,6 @@ import { existsSync } from 'node:fs'
16
16
  import { readFile } from 'node:fs/promises'
17
17
  import { STATUS_CODES } from 'node:http'
18
18
  import { createRequire } from 'node:module'
19
- import os from 'node:os'
20
19
  import { dirname, isAbsolute, join } from 'node:path'
21
20
  import { setImmediate as immediate, setTimeout as sleep } from 'node:timers/promises'
22
21
  import { pathToFileURL } from 'node:url'
@@ -25,45 +24,47 @@ import SonicBoom from 'sonic-boom'
25
24
  import { Agent, request, interceptors as undiciInterceptors } from 'undici'
26
25
  import { createThreadInterceptor } from 'undici-thread-interceptor'
27
26
  import { pprofCapturePreloadPath } from './config.js'
27
+ import { DynamicWorkersScaler } from './dynamic-workers-scaler.js'
28
28
  import {
29
29
  ApplicationAlreadyStartedError,
30
30
  ApplicationNotFoundError,
31
31
  ApplicationNotStartedError,
32
32
  ApplicationStartTimeoutError,
33
+ CannotRemoveEntrypointError,
34
+ GetHeapStatisticUnavailable,
33
35
  InvalidArgumentError,
34
36
  MessagingError,
35
37
  MissingEntrypointError,
36
38
  MissingPprofCapture,
37
39
  RuntimeAbortedError,
38
40
  RuntimeExitedError,
39
- WorkerNotFoundError,
40
- GetHeapStatisticUnavailable
41
+ WorkerNotFoundError
41
42
  } from './errors.js'
42
43
  import { abstractLogger, createLogger } from './logger.js'
43
44
  import { startManagementApi } from './management-api.js'
44
- import { getMemoryInfo } from './metrics.js'
45
45
  import { createChannelCreationHook } from './policies.js'
46
46
  import { startPrometheusServer } from './prom-server.js'
47
- import ScalingAlgorithm from './scaling-algorithm.js'
48
47
  import { startScheduler } from './scheduler.js'
49
48
  import { createSharedStore } from './shared-http-cache.js'
50
49
  import { version } from './version.js'
51
50
  import { sendViaITC, waitEventFromITC } from './worker/itc.js'
52
51
  import { RoundRobinMap } from './worker/round-robin-map.js'
52
+ import { HealthSignalsQueue } from './worker/health-signals.js'
53
53
  import {
54
54
  kApplicationId,
55
55
  kConfig,
56
56
  kFullId,
57
57
  kHealthCheckTimer,
58
+ kHealthMetricsTimer,
58
59
  kId,
59
60
  kITC,
60
61
  kLastHealthCheckELU,
61
- kLastVerticalScalerELU,
62
62
  kStderrMarker,
63
63
  kWorkerId,
64
64
  kWorkersBroadcast,
65
65
  kWorkerStartTime,
66
- kWorkerStatus
66
+ kWorkerStatus,
67
+ kWorkerHealthSignals
67
68
  } from './worker/symbols.js'
68
69
 
69
70
  const kWorkerFile = join(import.meta.dirname, 'worker/main.js')
@@ -108,11 +109,13 @@ export class Runtime extends EventEmitter {
108
109
  #metricsLabelName
109
110
 
110
111
  #applicationsConfigsPatches
112
+ #applications
111
113
  #workers
112
- #workersConfigs
113
114
  #workersBroadcastChannel
114
115
  #workerITCHandlers
116
+ #restartingApplications
115
117
  #restartingWorkers
118
+ #dynamicWorkersScaler
116
119
 
117
120
  #sharedHttpCache
118
121
  #scheduler
@@ -129,6 +132,7 @@ export class Runtime extends EventEmitter {
129
132
  this.#context = context ?? {}
130
133
  this.#isProduction = this.#context.isProduction ?? this.#context.production ?? false
131
134
  this.#concurrency = this.#context.concurrency ?? MAX_CONCURRENCY
135
+ this.#applications = new Map()
132
136
  this.#workers = new RoundRobinMap()
133
137
  this.#url = undefined
134
138
  this.#channelCreationHook = createChannelCreationHook(this.#config)
@@ -139,6 +143,7 @@ export class Runtime extends EventEmitter {
139
143
  })
140
144
  this.logger = abstractLogger // This is replaced by the real logger in init() and eventually removed in close()
141
145
  this.#status = undefined
146
+ this.#restartingApplications = new Set()
142
147
  this.#restartingWorkers = new Map()
143
148
  this.#sharedHttpCache = null
144
149
  this.#applicationsConfigsPatches = new Map()
@@ -161,9 +166,18 @@ export class Runtime extends EventEmitter {
161
166
  deleteHttpCacheValue: this.#deleteHttpCacheValue.bind(this),
162
167
  invalidateHttpCache: this.invalidateHttpCache.bind(this),
163
168
  updateSharedContext: this.updateSharedContext.bind(this),
164
- getSharedContext: this.getSharedContext.bind(this)
169
+ getSharedContext: this.getSharedContext.bind(this),
170
+ sendHealthSignals: this.#processHealthSignals.bind(this)
165
171
  }
166
172
  this.#sharedContext = {}
173
+
174
+ if (this.#isProduction) {
175
+ this.#env.PLT_DEV = 'false'
176
+ this.#env.PLT_ENVIRONMENT = 'production'
177
+ } else {
178
+ this.#env.PLT_DEV = 'true'
179
+ this.#env.PLT_ENVIRONMENT = 'development'
180
+ }
167
181
  }
168
182
 
169
183
  async init () {
@@ -193,28 +207,17 @@ export class Runtime extends EventEmitter {
193
207
 
194
208
  this.#createWorkersBroadcastChannel()
195
209
 
196
- this.#workersConfigs = {}
197
- for (const application of this.#config.applications) {
198
- let count = application.workers ?? this.#config.workers ?? 1
199
- if (count > 1 && application.entrypoint && !features.node.reusePort) {
210
+ if (this.#config.workers.dynamic) {
211
+ if (this.#config.workers.dynamic === false) {
200
212
  this.logger.warn(
201
- `"${application.id}" is set as the entrypoint, but reusePort is not available in your OS; setting workers to 1 instead of ${count}`
213
+ `Vertical scaler disabled because the "workers" configuration is set to ${this.#config.workers.static}.`
202
214
  )
203
- count = 1
215
+ } else {
216
+ this.#dynamicWorkersScaler = new DynamicWorkersScaler(this, this.#config.workers)
204
217
  }
205
- this.#workersConfigs[application.id] = { count }
206
218
  }
207
219
 
208
- if (this.#isProduction) {
209
- this.#env.PLT_DEV = 'false'
210
- this.#env.PLT_ENVIRONMENT = 'production'
211
- } else {
212
- this.#env.PLT_DEV = 'true'
213
- this.#env.PLT_ENVIRONMENT = 'development'
214
- }
215
-
216
- await this.#setupApplications()
217
-
220
+ await this.addApplications(this.#config.applications)
218
221
  await this.#setDispatcher(config.undici)
219
222
 
220
223
  if (config.scheduler) {
@@ -236,12 +239,7 @@ export class Runtime extends EventEmitter {
236
239
  this.#createWorkersBroadcastChannel()
237
240
 
238
241
  try {
239
- const startInvocations = []
240
- for (const application of this.getApplicationsIds()) {
241
- startInvocations.push([application, silent])
242
- }
243
-
244
- await executeInParallel(this.startApplication.bind(this), startInvocations, this.#concurrency)
242
+ await this.startApplications(this.getApplicationsIds(), silent)
245
243
 
246
244
  if (this.#config.inspectorOptions) {
247
245
  const { port } = this.#config.inspectorOptions
@@ -287,66 +285,34 @@ export class Runtime extends EventEmitter {
287
285
  this.startCollectingMetrics()
288
286
  }
289
287
 
290
- if (this.#config.verticalScaler?.enabled) {
291
- await this.#setupVerticalScaler()
292
- }
293
-
288
+ await this.#dynamicWorkersScaler?.start()
294
289
  this.#showUrl()
295
290
  return this.#url
296
291
  }
297
292
 
298
293
  async stop (silent = false) {
299
- if (this.#scheduler) {
300
- await this.#scheduler.stop()
301
- }
302
-
303
294
  if (this.#status === 'starting') {
304
295
  await once(this, 'started')
305
296
  }
306
297
 
307
298
  this.#updateStatus('stopping')
308
299
 
300
+ if (this.#scheduler) {
301
+ await this.#scheduler.stop()
302
+ }
303
+
309
304
  if (this.#inspectorServer) {
310
305
  await this.#inspectorServer.close()
311
306
  }
312
307
 
308
+ await this.#dynamicWorkersScaler?.stop()
309
+
313
310
  // Stop the entrypoint first so that no new requests are accepted
314
311
  if (this.#entrypointId) {
315
312
  await this.stopApplication(this.#entrypointId, silent)
316
313
  }
317
314
 
318
- const stopInvocations = []
319
-
320
- // Construct the reverse dependency graph
321
- const dependents = {}
322
-
323
- try {
324
- const allApplications = await this.getApplications(true)
325
- for (const application of allApplications.applications) {
326
- for (const dependency of application.dependencies ?? []) {
327
- let applicationDependents = dependents[dependency]
328
- if (!applicationDependents) {
329
- applicationDependents = new Set()
330
- dependents[dependency] = applicationDependents
331
- }
332
-
333
- applicationDependents.add(application.id)
334
- }
335
- }
336
- } catch (e) {
337
- // Noop - This only happens if stop is invoked after a failed start, in which case we don't care about deps
338
- }
339
-
340
- for (const application of this.getApplicationsIds()) {
341
- // The entrypoint has been stopped above
342
- if (application === this.#entrypointId) {
343
- continue
344
- }
345
-
346
- stopInvocations.push([application, silent, Array.from(dependents[application] ?? [])])
347
- }
348
-
349
- await executeInParallel(this.stopApplication.bind(this), stopInvocations, this.#concurrency)
315
+ await this.stopApplications(this.getApplicationsIds(), silent)
350
316
 
351
317
  await this.#meshInterceptor.close()
352
318
  this.#workersBroadcastChannel?.close()
@@ -357,14 +323,13 @@ export class Runtime extends EventEmitter {
357
323
  async restart (applications = []) {
358
324
  this.emitAndNotify('restarting')
359
325
 
360
- const restartInvocations = []
326
+ const toRestart = []
361
327
  for (const application of this.getApplicationsIds()) {
362
328
  if (applications.length === 0 || applications.includes(application)) {
363
- restartInvocations.push([application])
329
+ toRestart.push(application)
364
330
  }
365
331
  }
366
-
367
- await executeInParallel(this.restartApplication.bind(this), restartInvocations, this.#concurrency)
332
+ await this.restartApplications(toRestart)
368
333
 
369
334
  this.emitAndNotify('restarted')
370
335
 
@@ -478,17 +443,120 @@ export class Runtime extends EventEmitter {
478
443
  }
479
444
  }
480
445
 
446
+ async addApplications (applications, start = false) {
447
+ const setupInvocations = []
448
+
449
+ const toStart = []
450
+ for (const application of applications) {
451
+ const workers = application.workers
452
+
453
+ if ((workers.static > 1 || workers.minimum > 1) && application.entrypoint && !features.node.reusePort) {
454
+ this.logger.warn(
455
+ `"${application.id}" is set as the entrypoint, but reusePort is not available in your OS; setting workers to 1 instead of ${workers.static}`
456
+ )
457
+ workers.static = 1
458
+ workers.minimum = 1
459
+ }
460
+
461
+ this.#applications.set(application.id, application)
462
+ setupInvocations.push([application])
463
+ toStart.push(application.id)
464
+ }
465
+
466
+ await executeInParallel(this.#setupApplication.bind(this), setupInvocations, this.#concurrency)
467
+
468
+ for (const application of applications) {
469
+ this.emitAndNotify('application:added', application)
470
+ }
471
+
472
+ if (start) {
473
+ await this.startApplications(toStart)
474
+ }
475
+ }
476
+
477
+ async removeApplications (applications, silent = false) {
478
+ if (applications.includes(this.#entrypointId)) {
479
+ throw new CannotRemoveEntrypointError()
480
+ }
481
+
482
+ await this.stopApplications(applications, silent, true)
483
+
484
+ for (const application of applications) {
485
+ this.#dynamicWorkersScaler?.remove(application)
486
+ this.#applications.delete(application)
487
+ }
488
+
489
+ for (const application of applications) {
490
+ this.emitAndNotify('application:removed', application)
491
+ }
492
+ }
493
+
494
+ async startApplications (applicationsToStart, silent = false) {
495
+ const startInvocations = []
496
+ for (const application of applicationsToStart) {
497
+ startInvocations.push([application, silent])
498
+ }
499
+
500
+ return executeInParallel(this.startApplication.bind(this), startInvocations, this.#concurrency)
501
+ }
502
+
503
+ async stopApplications (applicationsToStop, silent = false, skipDependencies = false) {
504
+ const stopInvocations = []
505
+
506
+ // Construct the reverse dependency graph
507
+ const dependents = {}
508
+
509
+ if (!skipDependencies) {
510
+ try {
511
+ const { applications } = await this.getApplications(true)
512
+ for (const application of applications) {
513
+ for (const dependency of application.dependencies ?? []) {
514
+ let applicationDependents = dependents[dependency]
515
+ if (!applicationDependents) {
516
+ applicationDependents = new Set()
517
+ dependents[dependency] = applicationDependents
518
+ }
519
+
520
+ applicationDependents.add(application.id)
521
+ }
522
+ }
523
+ } catch (e) {
524
+ // Noop - This only happens if stop is invoked after a failed start, in which case we don't care about deps
525
+ }
526
+ }
527
+
528
+ for (const application of applicationsToStop) {
529
+ // The entrypoint has been stopped above
530
+ if (application === this.#entrypointId) {
531
+ continue
532
+ }
533
+
534
+ stopInvocations.push([application, silent, Array.from(dependents[application] ?? [])])
535
+ }
536
+
537
+ return executeInParallel(this.stopApplication.bind(this), stopInvocations, this.#concurrency)
538
+ }
539
+
540
+ async restartApplications (applicationsToRestart) {
541
+ const restartInvocations = []
542
+
543
+ for (const application of applicationsToRestart) {
544
+ restartInvocations.push([application])
545
+ }
546
+
547
+ return executeInParallel(this.restartApplication.bind(this), restartInvocations, this.#concurrency)
548
+ }
549
+
481
550
  async startApplication (id, silent = false) {
482
551
  const config = this.#config
483
- const applicationConfig = config.applications.find(s => s.id === id)
552
+ const applicationConfig = this.#applications.get(id)
484
553
 
485
554
  if (!applicationConfig) {
486
555
  throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
487
556
  }
488
557
 
489
- const workersConfigs = this.#workersConfigs[id]
490
-
491
- for (let i = 0; i < workersConfigs.count; i++) {
558
+ const workers = applicationConfig.workers.static
559
+ for (let i = 0; i < workers; i++) {
492
560
  const worker = this.#workers.get(`${id}:${i}`)
493
561
  const status = worker?.[kWorkerStatus]
494
562
 
@@ -499,18 +567,15 @@ export class Runtime extends EventEmitter {
499
567
 
500
568
  this.emitAndNotify('application:starting', id)
501
569
 
502
- for (let i = 0; i < workersConfigs.count; i++) {
503
- await this.#startWorker(config, applicationConfig, workersConfigs.count, id, i, silent)
570
+ for (let i = 0; i < workers; i++) {
571
+ await this.#startWorker(config, applicationConfig, workers, id, i, silent)
504
572
  }
505
573
 
506
574
  this.emitAndNotify('application:started', id)
507
575
  }
508
576
 
509
577
  async stopApplication (id, silent = false, dependents = []) {
510
- const config = this.#config
511
- const applicationConfig = config.applications.find(s => s.id === id)
512
-
513
- if (!applicationConfig) {
578
+ if (!this.#applications.has(id)) {
514
579
  throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
515
580
  }
516
581
 
@@ -533,26 +598,39 @@ export class Runtime extends EventEmitter {
533
598
  }
534
599
 
535
600
  async restartApplication (id) {
536
- const config = this.#config
537
- const applicationConfig = this.#config.applications.find(s => s.id === id)
601
+ const applicationConfig = this.#applications.get(id)
538
602
 
539
- const workersIds = await this.#workers.getKeys(id)
540
- const workersCount = workersIds.length
603
+ if (!applicationConfig) {
604
+ throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
605
+ }
541
606
 
542
- this.emitAndNotify('application:restarting', id)
607
+ if (this.#restartingApplications.has(id)) {
608
+ return
609
+ }
610
+ this.#restartingApplications.add(id)
543
611
 
544
- for (let i = 0; i < workersCount; i++) {
545
- const workerId = workersIds[i]
546
- const worker = this.#workers.get(workerId)
612
+ try {
613
+ const config = this.#config
614
+ const workersIds = await this.#workers.getKeys(id)
615
+ const workersCount = workersIds.length
547
616
 
548
- if (i > 0 && config.workersRestartDelay > 0) {
549
- await sleep(config.workersRestartDelay)
617
+ this.emitAndNotify('application:restarting', id)
618
+
619
+ for (let i = 0; i < workersCount; i++) {
620
+ const workerId = workersIds[i]
621
+ const worker = this.#workers.get(workerId)
622
+
623
+ if (i > 0 && config.workersRestartDelay > 0) {
624
+ await sleep(config.workersRestartDelay)
625
+ }
626
+
627
+ await this.#replaceWorker(config, applicationConfig, workersCount, id, i, worker, true)
550
628
  }
551
629
 
552
- await this.#replaceWorker(config, applicationConfig, workersCount, id, i, worker, true)
630
+ this.emitAndNotify('application:restarted', id)
631
+ } finally {
632
+ this.#restartingApplications.delete(id)
553
633
  }
554
-
555
- this.emitAndNotify('application:restarted', id)
556
634
  }
557
635
 
558
636
  async buildApplication (id) {
@@ -804,7 +882,7 @@ export class Runtime extends EventEmitter {
804
882
  this.#concurrency = concurrency
805
883
  }
806
884
 
807
- async getUrl () {
885
+ getUrl () {
808
886
  return this.#url
809
887
  }
810
888
 
@@ -867,8 +945,8 @@ export class Runtime extends EventEmitter {
867
945
  async getCustomHealthChecks () {
868
946
  const status = {}
869
947
 
870
- for (const application of this.#config.applications) {
871
- const workersIds = this.#workers.getKeys(application.id)
948
+ for (const id of this.#applications.keys()) {
949
+ const workersIds = this.#workers.getKeys(id)
872
950
  for (const workerId of workersIds) {
873
951
  const worker = this.#workers.get(workerId)
874
952
  status[workerId] = await sendViaITC(worker, 'getCustomHealthCheck')
@@ -881,8 +959,8 @@ export class Runtime extends EventEmitter {
881
959
  async getCustomReadinessChecks () {
882
960
  const status = {}
883
961
 
884
- for (const application of this.#config.applications) {
885
- const workersIds = this.#workers.getKeys(application.id)
962
+ for (const id of this.#applications.keys()) {
963
+ const workersIds = this.#workers.getKeys(id)
886
964
  for (const workerId of workersIds) {
887
965
  const worker = this.#workers.get(workerId)
888
966
  status[workerId] = await sendViaITC(worker, 'getCustomReadinessCheck')
@@ -1063,7 +1141,7 @@ export class Runtime extends EventEmitter {
1063
1141
  }
1064
1142
 
1065
1143
  getApplicationsIds () {
1066
- return this.#config.applications.map(application => application.id)
1144
+ return Array.from(this.#applications.keys())
1067
1145
  }
1068
1146
 
1069
1147
  async getApplications (allowUnloaded = false) {
@@ -1076,22 +1154,6 @@ export class Runtime extends EventEmitter {
1076
1154
  }
1077
1155
  }
1078
1156
 
1079
- async getWorkers () {
1080
- const status = {}
1081
-
1082
- for (const [key, worker] of this.#workers.entries()) {
1083
- const [application, index] = key.split(':')
1084
- status[key] = {
1085
- application,
1086
- worker: index,
1087
- status: worker[kWorkerStatus],
1088
- thread: worker.threadId
1089
- }
1090
- }
1091
-
1092
- return status
1093
- }
1094
-
1095
1157
  async getApplicationMeta (id) {
1096
1158
  const application = await this.#getApplicationById(id)
1097
1159
 
@@ -1174,6 +1236,45 @@ export class Runtime extends EventEmitter {
1174
1236
  return sendViaITC(application, 'getApplicationGraphQLSchema')
1175
1237
  }
1176
1238
 
1239
+ async getWorkers (includeRaw = false) {
1240
+ const status = {}
1241
+
1242
+ for (const [key, worker] of this.#workers.entries()) {
1243
+ const [application, index] = key.split(':')
1244
+
1245
+ status[key] = {
1246
+ application,
1247
+ worker: index,
1248
+ status: worker[kWorkerStatus],
1249
+ thread: worker.threadId,
1250
+ raw: includeRaw ? worker : undefined
1251
+ }
1252
+ }
1253
+
1254
+ return status
1255
+ }
1256
+
1257
+ async getWorkerHealth (worker, options = {}) {
1258
+ if (!features.node.worker.getHeapStatistics) {
1259
+ throw new GetHeapStatisticUnavailable()
1260
+ }
1261
+
1262
+ const currentELU = worker.performance.eventLoopUtilization()
1263
+ const previousELU = options.previousELU
1264
+
1265
+ let elu = currentELU
1266
+ if (previousELU) {
1267
+ elu = worker.performance.eventLoopUtilization(elu, previousELU)
1268
+ }
1269
+
1270
+ const { used_heap_size: heapUsed, total_heap_size: heapTotal } = await worker.getHeapStatistics()
1271
+ return { elu: elu.utilization, heapUsed, heapTotal, currentELU }
1272
+ }
1273
+
1274
+ getDynamicWorkersScaler () {
1275
+ return this.#dynamicWorkersScaler
1276
+ }
1277
+
1177
1278
  #getHttpCacheValue ({ request }) {
1178
1279
  if (!this.#sharedHttpCache) {
1179
1280
  return
@@ -1225,60 +1326,49 @@ export class Runtime extends EventEmitter {
1225
1326
  this.logger.info(`Platformatic is now listening at ${this.#url}`)
1226
1327
  }
1227
1328
 
1228
- async #setupApplications () {
1329
+ async #setupApplication (applicationConfig) {
1330
+ if (this.#status === 'stopping' || this.#status === 'closed') {
1331
+ return
1332
+ }
1333
+
1334
+ const id = applicationConfig.id
1229
1335
  const config = this.#config
1230
- const setupInvocations = []
1231
1336
 
1232
- // Parse all applications and verify we're not missing any path or resolved application
1233
- for (const applicationConfig of config.applications) {
1337
+ if (!applicationConfig.path) {
1234
1338
  // If there is no application path, check if the application was resolved
1235
- if (!applicationConfig.path) {
1236
- if (applicationConfig.url) {
1237
- // Try to backfill the path for external applications
1238
- applicationConfig.path = join(this.#root, config.resolvedApplicationsBasePath, applicationConfig.id)
1339
+ if (applicationConfig.url) {
1340
+ // Try to backfill the path for external applications
1341
+ applicationConfig.path = join(this.#root, config.resolvedApplicationsBasePath, id)
1239
1342
 
1240
- if (!existsSync(applicationConfig.path)) {
1241
- const executable = globalThis.platformatic?.executable ?? 'platformatic'
1242
- this.logger.error(
1243
- `The path for application "%s" does not exist. Please run "${executable} resolve" and try again.`,
1244
- applicationConfig.id
1245
- )
1246
-
1247
- await this.closeAndThrow(new RuntimeAbortedError())
1248
- }
1249
- } else {
1343
+ if (!existsSync(applicationConfig.path)) {
1344
+ const executable = globalThis.platformatic?.executable ?? 'platformatic'
1250
1345
  this.logger.error(
1251
- 'The application "%s" has no path defined. Please check your configuration and try again.',
1252
- applicationConfig.id
1346
+ `The path for application "%s" does not exist. Please run "${executable} resolve" and try again.`,
1347
+ id
1253
1348
  )
1254
1349
 
1255
1350
  await this.closeAndThrow(new RuntimeAbortedError())
1256
1351
  }
1257
- }
1258
-
1259
- setupInvocations.push([applicationConfig])
1260
- }
1261
-
1262
- await executeInParallel(this.#setupApplication.bind(this), setupInvocations, this.#concurrency)
1263
- }
1352
+ } else {
1353
+ this.logger.error(
1354
+ 'The application "%s" has no path defined. Please check your configuration and try again.',
1355
+ id
1356
+ )
1264
1357
 
1265
- async #setupApplication (applicationConfig) {
1266
- if (this.#status === 'stopping' || this.#status === 'closed') {
1267
- return
1358
+ await this.closeAndThrow(new RuntimeAbortedError())
1359
+ }
1268
1360
  }
1269
1361
 
1270
- const config = this.#config
1271
-
1272
- const workersConfigs = this.#workersConfigs[applicationConfig.id]
1273
- const id = applicationConfig.id
1362
+ const workers = applicationConfig.workers.static
1274
1363
  const setupInvocations = []
1275
1364
 
1276
- for (let i = 0; i < workersConfigs.count; i++) {
1277
- setupInvocations.push([config, applicationConfig, workersConfigs.count, id, i])
1365
+ for (let i = 0; i < workers; i++) {
1366
+ setupInvocations.push([config, applicationConfig, workers, id, i])
1278
1367
  }
1279
1368
 
1280
1369
  await executeInParallel(this.#setupWorker.bind(this), setupInvocations, this.#concurrency)
1281
1370
 
1371
+ await this.#dynamicWorkersScaler?.add(applicationConfig)
1282
1372
  this.emitAndNotify('application:init', id)
1283
1373
  }
1284
1374
 
@@ -1472,6 +1562,14 @@ export class Runtime extends EventEmitter {
1472
1562
  this.logger.trace({ event, payload }, 'Runtime event')
1473
1563
  })
1474
1564
 
1565
+ worker[kITC].on('request:restart', async () => {
1566
+ try {
1567
+ await this.restartApplication(applicationId)
1568
+ } catch (e) {
1569
+ this.logger.error(e)
1570
+ }
1571
+ })
1572
+
1475
1573
  // Only activate watch for the first instance
1476
1574
  if (index === 0) {
1477
1575
  // Handle applications changes
@@ -1523,100 +1621,141 @@ export class Runtime extends EventEmitter {
1523
1621
  return worker
1524
1622
  }
1525
1623
 
1526
- async #getHealth (worker, options = {}) {
1527
- if (!features.node.worker.getHeapStatistics) {
1528
- throw new GetHeapStatisticUnavailable()
1529
- }
1530
-
1531
- const currentELU = worker.performance.eventLoopUtilization()
1532
- const previousELU = options.previousELU
1533
-
1534
- let elu = currentELU
1535
- if (previousELU) {
1536
- elu = worker.performance.eventLoopUtilization(elu, previousELU)
1537
- }
1538
-
1539
- const { used_heap_size: heapUsed, total_heap_size: heapTotal } = await worker.getHeapStatistics()
1540
- return { elu: elu.utilization, heapUsed, heapTotal, currentELU }
1541
- }
1542
-
1543
- #setupHealthCheck (config, applicationConfig, workersCount, id, index, worker, errorLabel) {
1624
+ #setupHealthMetrics (id, index, worker, errorLabel) {
1544
1625
  // Clear the timeout when exiting
1545
- worker.on('exit', () => clearTimeout(worker[kHealthCheckTimer]))
1546
-
1547
- const { maxELU, maxHeapUsed, maxHeapTotal, maxUnhealthyChecks, interval } = worker[kConfig].health
1548
- const maxHeapTotalNumber = typeof maxHeapTotal === 'string' ? parseMemorySize(maxHeapTotal) : maxHeapTotal
1549
-
1550
- let unhealthyChecks = 0
1626
+ worker.on('exit', () => clearTimeout(worker[kHealthMetricsTimer]))
1551
1627
 
1552
- worker[kHealthCheckTimer] = setTimeout(async () => {
1553
- if (worker[kWorkerStatus] !== 'started') {
1554
- return
1555
- }
1628
+ worker[kHealthMetricsTimer] = setTimeout(async () => {
1629
+ if (worker[kWorkerStatus] !== 'started') return
1556
1630
 
1557
- let health, unhealthy, memoryUsage
1631
+ let health = null
1558
1632
  try {
1559
- health = await this.#getHealth(worker, {
1633
+ health = await this.getWorkerHealth(worker, {
1560
1634
  previousELU: worker[kLastHealthCheckELU]
1561
1635
  })
1562
- worker[kLastHealthCheckELU] = health.currentELU
1563
- memoryUsage = health.heapUsed / maxHeapTotalNumber
1564
- unhealthy = health.elu > maxELU || memoryUsage > maxHeapUsed
1565
1636
  } catch (err) {
1566
1637
  this.logger.error({ err }, `Failed to get health for ${errorLabel}.`)
1567
- worker[kLastHealthCheckELU] = null
1568
- unhealthy = true
1569
- memoryUsage = -1
1570
- health = { elu: -1, heapUsed: -1, heapTotal: -1 }
1638
+ } finally {
1639
+ worker[kLastHealthCheckELU] = health?.currentELU ?? null
1571
1640
  }
1572
1641
 
1573
- this.emitAndNotify('application:worker:health', {
1642
+ const healthSignals = worker[kWorkerHealthSignals]?.getAll() ?? []
1643
+
1644
+ this.emitAndNotify('application:worker:health:metrics', {
1574
1645
  id: worker[kId],
1575
1646
  application: id,
1576
1647
  worker: index,
1577
1648
  currentHealth: health,
1578
- unhealthy,
1579
- healthConfig: worker[kConfig].health
1649
+ healthSignals
1580
1650
  })
1581
1651
 
1582
- if (unhealthy) {
1652
+ worker[kHealthMetricsTimer].refresh()
1653
+ }, 1000).unref()
1654
+ }
1655
+
1656
+ #setupHealthCheck (config, applicationConfig, workersCount, id, index, worker, errorLabel) {
1657
+ let healthMetricsListener = null
1658
+
1659
+ // Clear the timeout and listener when exiting
1660
+ worker.on('exit', () => {
1661
+ clearTimeout(worker[kHealthCheckTimer])
1662
+ if (healthMetricsListener) {
1663
+ this.removeListener('application:worker:health:metrics', healthMetricsListener)
1664
+ }
1665
+ })
1666
+
1667
+ const healthConfig = worker[kConfig].health
1668
+
1669
+ let {
1670
+ maxELU,
1671
+ maxHeapUsed,
1672
+ maxHeapTotal,
1673
+ maxUnhealthyChecks,
1674
+ interval
1675
+ } = worker[kConfig].health
1676
+
1677
+ if (typeof maxHeapTotal === 'string') {
1678
+ maxHeapTotal = parseMemorySize(maxHeapTotal)
1679
+ }
1680
+
1681
+ if (interval < 1000) {
1682
+ interval = 1000
1683
+ this.logger.warn(
1684
+ `The health check interval for the "${errorLabel}" is set to ${healthConfig.interval}ms. ` +
1685
+ 'The minimum health check interval is 1s. It will be set to 1000ms.'
1686
+ )
1687
+ }
1688
+
1689
+ let lastHealthMetrics = null
1690
+
1691
+ healthMetricsListener = healthCheck => {
1692
+ if (healthCheck.id === worker[kId]) {
1693
+ lastHealthMetrics = healthCheck
1694
+ }
1695
+ }
1696
+
1697
+ this.on('application:worker:health:metrics', healthMetricsListener)
1698
+
1699
+ let unhealthyChecks = 0
1700
+
1701
+ worker[kHealthCheckTimer] = setTimeout(async () => {
1702
+ if (worker[kWorkerStatus] !== 'started') return
1703
+
1704
+ if (lastHealthMetrics) {
1705
+ const health = lastHealthMetrics.currentHealth
1706
+ const memoryUsage = health.heapUsed / maxHeapTotal
1707
+ const unhealthy = health.elu > maxELU || memoryUsage > maxHeapUsed
1708
+
1709
+ this.emitAndNotify('application:worker:health', {
1710
+ id: worker[kId],
1711
+ application: id,
1712
+ worker: index,
1713
+ currentHealth: health,
1714
+ unhealthy,
1715
+ healthConfig
1716
+ })
1717
+
1583
1718
  if (health.elu > maxELU) {
1584
1719
  this.logger.error(
1585
- `The ${errorLabel} has an ELU of ${(health.elu * 100).toFixed(2)} %, above the maximum allowed usage of ${(maxELU * 100).toFixed(2)} %.`
1720
+ `The ${errorLabel} has an ELU of ${(health.elu * 100).toFixed(2)} %, ` +
1721
+ `above the maximum allowed usage of ${(maxELU * 100).toFixed(2)} %.`
1586
1722
  )
1587
1723
  }
1588
1724
 
1589
1725
  if (memoryUsage > maxHeapUsed) {
1590
1726
  this.logger.error(
1591
- `The ${errorLabel} is using ${(memoryUsage * 100).toFixed(2)} % of the memory, above the maximum allowed usage of ${(maxHeapUsed * 100).toFixed(2)} %.`
1727
+ `The ${errorLabel} is using ${(memoryUsage * 100).toFixed(2)} % of the memory, ` +
1728
+ `above the maximum allowed usage of ${(maxHeapUsed * 100).toFixed(2)} %.`
1592
1729
  )
1593
1730
  }
1594
1731
 
1595
- unhealthyChecks++
1596
- } else {
1597
- unhealthyChecks = 0
1598
- }
1732
+ if (unhealthy) {
1733
+ unhealthyChecks++
1734
+ } else {
1735
+ unhealthyChecks = 0
1736
+ }
1599
1737
 
1600
- if (unhealthyChecks === maxUnhealthyChecks) {
1601
- try {
1602
- this.emitAndNotify('application:worker:unhealthy', { application: id, worker: index })
1738
+ if (unhealthyChecks === maxUnhealthyChecks) {
1739
+ try {
1740
+ this.emitAndNotify('application:worker:unhealthy', { application: id, worker: index })
1603
1741
 
1604
- this.logger.error(
1605
- { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1742
+ this.logger.error(
1743
+ { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1606
1744
  `The ${errorLabel} is unhealthy. Replacing it ...`
1607
- )
1745
+ )
1608
1746
 
1609
- await this.#replaceWorker(config, applicationConfig, workersCount, id, index, worker)
1610
- } catch (e) {
1611
- this.logger.error(
1612
- { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1747
+ await this.#replaceWorker(config, applicationConfig, workersCount, id, index, worker)
1748
+ } catch (e) {
1749
+ this.logger.error(
1750
+ { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1613
1751
  `Cannot replace the ${errorLabel}. Forcefully terminating it ...`
1614
- )
1752
+ )
1615
1753
 
1616
- worker.terminate()
1754
+ worker.terminate()
1755
+ }
1756
+ } else {
1757
+ worker[kHealthCheckTimer].refresh()
1617
1758
  }
1618
- } else {
1619
- worker[kHealthCheckTimer].refresh()
1620
1759
  }
1621
1760
  }, interval).unref()
1622
1761
  }
@@ -1684,6 +1823,8 @@ export class Runtime extends EventEmitter {
1684
1823
  this.logger.info(`Started the ${label}...`)
1685
1824
  }
1686
1825
 
1826
+ this.#setupHealthMetrics(id, index, worker, label)
1827
+
1687
1828
  const { enabled, gracePeriod } = worker[kConfig].health
1688
1829
  if (enabled && config.restartOnError > 0) {
1689
1830
  // if gracePeriod is 0, it will be set to 1 to start health checks immediately
@@ -1934,6 +2075,10 @@ export class Runtime extends EventEmitter {
1934
2075
  workerId = matched[2]
1935
2076
  }
1936
2077
 
2078
+ if (!this.#applications.has(applicationId)) {
2079
+ throw new ApplicationNotFoundError(applicationId, this.getApplicationsIds().join(', '))
2080
+ }
2081
+
1937
2082
  return this.#getWorkerByIdOrNext(applicationId, workerId, ensureStarted, mustExist)
1938
2083
  }
1939
2084
 
@@ -2158,9 +2303,7 @@ export class Runtime extends EventEmitter {
2158
2303
  async #updateApplicationConfigWorkers (applicationId, workers) {
2159
2304
  this.logger.info(`Updating application "${applicationId}" config workers to ${workers}`)
2160
2305
 
2161
- this.#config.applications.find(s => s.id === applicationId).workers = workers
2162
- const application = await this.#getApplicationById(applicationId)
2163
- application[kConfig].workers = workers
2306
+ this.#applications.get(applicationId).workers.static = workers
2164
2307
 
2165
2308
  const workersIds = this.#workers.getKeys(applicationId)
2166
2309
  const promises = []
@@ -2183,7 +2326,7 @@ export class Runtime extends EventEmitter {
2183
2326
  this.logger.info(`Updating application "${applicationId}" config health heap to ${JSON.stringify(health)}`)
2184
2327
  const { maxHeapTotal, maxYoungGeneration } = health
2185
2328
 
2186
- const application = this.#config.applications.find(s => s.id === applicationId)
2329
+ const application = this.#applications.get(applicationId)
2187
2330
  if (maxHeapTotal) {
2188
2331
  application.health.maxHeapTotal = maxHeapTotal
2189
2332
  }
@@ -2200,7 +2343,6 @@ export class Runtime extends EventEmitter {
2200
2343
  throw new InvalidArgumentError('updates', 'must have at least one element')
2201
2344
  }
2202
2345
 
2203
- const config = this.#config
2204
2346
  const validatedUpdates = []
2205
2347
  for (const update of updates) {
2206
2348
  const { application: applicationId } = update
@@ -2208,7 +2350,7 @@ export class Runtime extends EventEmitter {
2208
2350
  if (!applicationId) {
2209
2351
  throw new InvalidArgumentError('application', 'must be a string')
2210
2352
  }
2211
- const applicationConfig = config.applications.find(s => s.id === applicationId)
2353
+ const applicationConfig = this.#applications.get(applicationId)
2212
2354
  if (!applicationConfig) {
2213
2355
  throw new ApplicationNotFoundError(applicationId, Array.from(this.getApplicationsIds()).join(', '))
2214
2356
  }
@@ -2411,6 +2553,10 @@ export class Runtime extends EventEmitter {
2411
2553
  )
2412
2554
  }
2413
2555
  report.success = true
2556
+
2557
+ if (report.success) {
2558
+ this.emitAndNotify('application:resources:health:updated', { application: applicationId, health })
2559
+ }
2414
2560
  } catch (err) {
2415
2561
  if (report.updated.length < 1) {
2416
2562
  this.logger.error({ err }, 'Cannot update application health heap, no worker updated')
@@ -2483,6 +2629,10 @@ export class Runtime extends EventEmitter {
2483
2629
  await this.#updateApplicationConfigWorkers(applicationId, newWorkersCount)
2484
2630
  }
2485
2631
 
2632
+ if (report.success) {
2633
+ this.emitAndNotify('application:resources:workers:updated', { application: applicationId, workers })
2634
+ }
2635
+
2486
2636
  return report
2487
2637
  }
2488
2638
 
@@ -2494,203 +2644,6 @@ export class Runtime extends EventEmitter {
2494
2644
  }
2495
2645
  }
2496
2646
 
2497
- async #setupVerticalScaler () {
2498
- const fixedWorkersCount = this.#config.workers
2499
- if (fixedWorkersCount !== undefined) {
2500
- this.logger.warn(`Vertical scaler disabled because the "workers" configuration is set to ${fixedWorkersCount}`)
2501
- return
2502
- }
2503
-
2504
- const scalerConfig = this.#config.verticalScaler
2505
- const memInfo = await getMemoryInfo()
2506
- const memScope = memInfo.scope
2507
-
2508
- scalerConfig.maxTotalWorkers ??= os.availableParallelism()
2509
- scalerConfig.maxTotalMemory ??= memInfo.total * 0.9
2510
- scalerConfig.maxWorkers ??= scalerConfig.maxTotalWorkers
2511
- scalerConfig.minWorkers ??= 1
2512
- scalerConfig.cooldownSec ??= 60
2513
- scalerConfig.scaleUpELU ??= 0.8
2514
- scalerConfig.scaleDownELU ??= 0.2
2515
- scalerConfig.scaleIntervalSec ??= 60
2516
- scalerConfig.timeWindowSec ??= 10
2517
- scalerConfig.scaleDownTimeWindowSec ??= 60
2518
- scalerConfig.gracePeriod ??= 30 * 1000
2519
- scalerConfig.applications ??= {}
2520
-
2521
- const maxTotalWorkers = scalerConfig.maxTotalWorkers
2522
- const maxTotalMemory = scalerConfig.maxTotalMemory
2523
- const maxWorkers = scalerConfig.maxWorkers
2524
- const minWorkers = scalerConfig.minWorkers
2525
- const cooldown = scalerConfig.cooldownSec
2526
- const scaleUpELU = scalerConfig.scaleUpELU
2527
- const scaleDownELU = scalerConfig.scaleDownELU
2528
- const scaleIntervalSec = scalerConfig.scaleIntervalSec
2529
- const timeWindowSec = scalerConfig.timeWindowSec
2530
- const scaleDownTimeWindowSec = scalerConfig.scaleDownTimeWindowSec
2531
- const applicationsConfigs = scalerConfig.applications
2532
- const gracePeriod = scalerConfig.gracePeriod
2533
- const healthCheckInterval = 1000
2534
-
2535
- const initialResourcesUpdates = []
2536
-
2537
- for (const application of this.#config.applications) {
2538
- if (application.entrypoint && !features.node.reusePort) {
2539
- this.logger.warn(
2540
- `The "${application.id}" application cannot be scaled because it is an entrypoint` +
2541
- ' and the "reusePort" feature is not available in your OS.'
2542
- )
2543
-
2544
- applicationsConfigs[application.id] = {
2545
- minWorkers: 1,
2546
- maxWorkers: 1
2547
- }
2548
- continue
2549
- }
2550
- if (application.workers !== undefined) {
2551
- this.logger.warn(
2552
- `The "${application.id}" application cannot be scaled because` +
2553
- ` it has a fixed number of workers (${application.workers}).`
2554
- )
2555
- applicationsConfigs[application.id] = {
2556
- minWorkers: application.workers,
2557
- maxWorkers: application.workers
2558
- }
2559
- continue
2560
- }
2561
-
2562
- applicationsConfigs[application.id] ??= {}
2563
- applicationsConfigs[application.id].minWorkers ??= minWorkers
2564
- applicationsConfigs[application.id].maxWorkers ??= maxWorkers
2565
-
2566
- const appMinWorkers = applicationsConfigs[application.id].minWorkers
2567
- if (appMinWorkers > 1) {
2568
- initialResourcesUpdates.push({
2569
- application: application.id,
2570
- workers: minWorkers
2571
- })
2572
- }
2573
- }
2574
-
2575
- if (initialResourcesUpdates.length > 0) {
2576
- await this.updateApplicationsResources(initialResourcesUpdates)
2577
- }
2578
-
2579
- for (const applicationId in applicationsConfigs) {
2580
- const application = this.#config.applications.find(app => app.id === applicationId)
2581
- if (!application) {
2582
- delete applicationsConfigs[applicationId]
2583
-
2584
- this.logger.warn(
2585
- `Vertical scaler configuration has a configuration for non-existing application "${applicationId}"`
2586
- )
2587
- }
2588
- }
2589
-
2590
- const scalingAlgorithm = new ScalingAlgorithm({
2591
- maxTotalWorkers,
2592
- scaleUpELU,
2593
- scaleDownELU,
2594
- scaleUpTimeWindowSec: timeWindowSec,
2595
- scaleDownTimeWindowSec,
2596
- applications: applicationsConfigs
2597
- })
2598
-
2599
- const healthCheckTimeout = setTimeout(async () => {
2600
- let shouldCheckForScaling = false
2601
-
2602
- const now = Date.now()
2603
-
2604
- for (const worker of this.#workers.values()) {
2605
- if (worker[kWorkerStatus] !== 'started' || worker[kWorkerStartTime] + gracePeriod > now) {
2606
- continue
2607
- }
2608
-
2609
- try {
2610
- const health = await this.#getHealth(worker, {
2611
- previousELU: worker[kLastVerticalScalerELU]
2612
- })
2613
- worker[kLastVerticalScalerELU] = health.currentELU
2614
-
2615
- if (!health) continue
2616
-
2617
- scalingAlgorithm.addWorkerHealthInfo({
2618
- workerId: worker[kId],
2619
- applicationId: worker[kApplicationId],
2620
- elu: health.elu,
2621
- heapUsed: health.heapUsed,
2622
- heapTotal: health.heapTotal
2623
- })
2624
-
2625
- if (health.elu > scaleUpELU) {
2626
- shouldCheckForScaling = true
2627
- }
2628
- } catch (err) {
2629
- this.logger.error({ err }, 'Failed to get health for worker')
2630
- }
2631
- }
2632
-
2633
- if (shouldCheckForScaling) {
2634
- await checkForScaling()
2635
- }
2636
-
2637
- healthCheckTimeout.refresh()
2638
- }, healthCheckInterval).unref()
2639
-
2640
- let isScaling = false
2641
- let lastScaling = 0
2642
-
2643
- const checkForScaling = async () => {
2644
- const isInCooldown = Date.now() < lastScaling + cooldown * 1000
2645
- if (isScaling || isInCooldown) return
2646
- isScaling = true
2647
-
2648
- try {
2649
- const workersInfo = await this.getWorkers()
2650
- const mem = await getMemoryInfo({ scope: memScope })
2651
-
2652
- const appsWorkersInfo = {}
2653
- for (const worker of Object.values(workersInfo)) {
2654
- if (worker.status === 'exited') continue
2655
-
2656
- const applicationId = worker.application
2657
- appsWorkersInfo[applicationId] ??= 0
2658
- appsWorkersInfo[applicationId]++
2659
- }
2660
-
2661
- const availableMemory = maxTotalMemory - mem.used
2662
- const recommendations = scalingAlgorithm.getRecommendations(appsWorkersInfo, {
2663
- availableMemory
2664
- })
2665
- if (recommendations.length > 0) {
2666
- await applyRecommendations(recommendations)
2667
- lastScaling = Date.now()
2668
- }
2669
- } catch (err) {
2670
- this.logger.error({ err }, 'Failed to scale applications')
2671
- } finally {
2672
- isScaling = false
2673
- }
2674
- }
2675
-
2676
- const applyRecommendations = async recommendations => {
2677
- const resourcesUpdates = []
2678
- for (const recommendation of recommendations) {
2679
- const { applicationId, workersCount, direction } = recommendation
2680
- this.logger.info(`Scaling ${direction} the "${applicationId}" app to ${workersCount} workers`)
2681
-
2682
- resourcesUpdates.push({
2683
- application: applicationId,
2684
- workers: workersCount
2685
- })
2686
- }
2687
- await this.updateApplicationsResources(resourcesUpdates)
2688
- }
2689
-
2690
- // Interval for periodic scaling checks
2691
- setInterval(checkForScaling, scaleIntervalSec * 1000).unref()
2692
- }
2693
-
2694
2647
  #setupPermissions (applicationConfig) {
2695
2648
  const argv = []
2696
2649
  const allows = new Set()
@@ -2734,4 +2687,11 @@ export class Runtime extends EventEmitter {
2734
2687
  argv.push('--permission', ...allows)
2735
2688
  return argv
2736
2689
  }
2690
+
2691
+ #processHealthSignals ({ workerId, signals }) {
2692
+ const worker = this.#workers.get(workerId)
2693
+
2694
+ worker[kWorkerHealthSignals] ??= new HealthSignalsQueue()
2695
+ worker[kWorkerHealthSignals].add(signals)
2696
+ }
2737
2697
  }