@platformatic/runtime 3.4.1 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +1 -1
  2. package/config.d.ts +224 -77
  3. package/eslint.config.js +3 -5
  4. package/index.d.ts +73 -24
  5. package/index.js +173 -29
  6. package/lib/config.js +279 -197
  7. package/lib/errors.js +126 -34
  8. package/lib/generator.js +640 -0
  9. package/lib/logger.js +43 -41
  10. package/lib/management-api.js +109 -118
  11. package/lib/prom-server.js +202 -16
  12. package/lib/runtime.js +1963 -585
  13. package/lib/scheduler.js +119 -0
  14. package/lib/schema.js +22 -234
  15. package/lib/shared-http-cache.js +43 -0
  16. package/lib/upgrade.js +6 -8
  17. package/lib/utils.js +6 -61
  18. package/lib/version.js +7 -0
  19. package/lib/versions/v1.36.0.js +2 -4
  20. package/lib/versions/v1.5.0.js +2 -4
  21. package/lib/versions/v2.0.0.js +3 -5
  22. package/lib/versions/v3.0.0.js +16 -0
  23. package/lib/worker/controller.js +302 -0
  24. package/lib/worker/http-cache.js +171 -0
  25. package/lib/worker/interceptors.js +190 -10
  26. package/lib/worker/itc.js +146 -59
  27. package/lib/worker/main.js +220 -81
  28. package/lib/worker/messaging.js +182 -0
  29. package/lib/worker/round-robin-map.js +62 -0
  30. package/lib/worker/shared-context.js +22 -0
  31. package/lib/worker/symbols.js +14 -5
  32. package/package.json +47 -38
  33. package/schema.json +1383 -55
  34. package/help/compile.txt +0 -8
  35. package/help/help.txt +0 -5
  36. package/help/start.txt +0 -21
  37. package/index.test-d.ts +0 -41
  38. package/lib/build-server.js +0 -69
  39. package/lib/compile.js +0 -98
  40. package/lib/dependencies.js +0 -59
  41. package/lib/generator/README.md +0 -32
  42. package/lib/generator/errors.js +0 -10
  43. package/lib/generator/runtime-generator.d.ts +0 -37
  44. package/lib/generator/runtime-generator.js +0 -498
  45. package/lib/start.js +0 -190
  46. package/lib/worker/app.js +0 -278
  47. package/lib/worker/default-stackable.js +0 -33
  48. package/lib/worker/metrics.js +0 -122
  49. package/runtime.mjs +0 -54
package/lib/runtime.js CHANGED
@@ -1,144 +1,271 @@
1
- 'use strict'
2
-
3
- const { once, EventEmitter } = require('node:events')
4
- const { createReadStream, watch } = require('node:fs')
5
- const { readdir, readFile, stat, access } = require('node:fs/promises')
6
- const inspector = require('node:inspector')
7
- const { join } = require('node:path')
8
- const { setTimeout: sleep } = require('node:timers/promises')
9
- const { Worker } = require('node:worker_threads')
10
- const { ITC } = require('@platformatic/itc')
11
- const { ensureLoggableError, executeWithTimeout } = require('@platformatic/utils')
12
- const ts = require('tail-file-stream')
13
- const { createThreadInterceptor } = require('undici-thread-interceptor')
14
-
15
- const { checkDependencies, topologicalSort } = require('./dependencies')
16
- const errors = require('./errors')
17
- const { createLogger } = require('./logger')
18
- const { startManagementApi } = require('./management-api')
19
- const { startPrometheusServer } = require('./prom-server')
20
- const { getRuntimeTmpDir } = require('./utils')
21
- const { sendViaITC, waitEventFromITC } = require('./worker/itc')
22
- const { kId, kITC, kConfig } = require('./worker/symbols')
23
-
24
- const platformaticVersion = require('../package.json').version
25
- const kWorkerFile = join(__dirname, 'worker/main.js')
1
+ import {
2
+ deepmerge,
3
+ ensureError,
4
+ ensureLoggableError,
5
+ executeInParallel,
6
+ executeWithTimeout,
7
+ features,
8
+ kMetadata,
9
+ kTimeout,
10
+ parseMemorySize
11
+ } from '@platformatic/foundation'
12
+ import { ITC } from '@platformatic/itc'
13
+ import fastify from 'fastify'
14
+ import { EventEmitter, once } from 'node:events'
15
+ import { existsSync } from 'node:fs'
16
+ import { readFile } from 'node:fs/promises'
17
+ import { STATUS_CODES } from 'node:http'
18
+ import { createRequire } from 'node:module'
19
+ import { join } from 'node:path'
20
+ import { setImmediate as immediate, setTimeout as sleep } from 'node:timers/promises'
21
+ import { pathToFileURL } from 'node:url'
22
+ import { Worker } from 'node:worker_threads'
23
+ import SonicBoom from 'sonic-boom'
24
+ import { Agent, request, interceptors as undiciInterceptors } from 'undici'
25
+ import { createThreadInterceptor } from 'undici-thread-interceptor'
26
+ import {
27
+ ApplicationAlreadyStartedError,
28
+ ApplicationNotFoundError,
29
+ ApplicationNotStartedError,
30
+ ApplicationStartTimeoutError,
31
+ InvalidArgumentError,
32
+ MessagingError,
33
+ MissingEntrypointError,
34
+ MissingPprofCapture,
35
+ RuntimeAbortedError,
36
+ RuntimeExitedError,
37
+ WorkerNotFoundError
38
+ } from './errors.js'
39
+ import { abstractLogger, createLogger } from './logger.js'
40
+ import { startManagementApi } from './management-api.js'
41
+ import { startPrometheusServer } from './prom-server.js'
42
+ import { startScheduler } from './scheduler.js'
43
+ import { createSharedStore } from './shared-http-cache.js'
44
+ import { version } from './version.js'
45
+ import { sendViaITC, waitEventFromITC } from './worker/itc.js'
46
+ import { RoundRobinMap } from './worker/round-robin-map.js'
47
+ import {
48
+ kApplicationId,
49
+ kConfig,
50
+ kFullId,
51
+ kHealthCheckTimer,
52
+ kId,
53
+ kITC,
54
+ kLastELU,
55
+ kStderrMarker,
56
+ kWorkerId,
57
+ kWorkersBroadcast,
58
+ kWorkerStatus
59
+ } from './worker/symbols.js'
60
+
61
+ const kWorkerFile = join(import.meta.dirname, 'worker/main.js')
62
+ const kInspectorOptions = Symbol('plt.runtime.worker.inspectorOptions')
26
63
 
27
64
  const MAX_LISTENERS_COUNT = 100
28
65
  const MAX_METRICS_QUEUE_LENGTH = 5 * 60 // 5 minutes in seconds
29
66
  const COLLECT_METRICS_TIMEOUT = 1000
30
67
 
68
+ const MAX_CONCURRENCY = 5
31
69
  const MAX_BOOTSTRAP_ATTEMPTS = 5
70
+ const IMMEDIATE_RESTART_MAX_THRESHOLD = 10
71
+ const MAX_WORKERS = 100
32
72
 
33
- class Runtime extends EventEmitter {
34
- #configManager
35
- #runtimeTmpDir
36
- #runtimeLogsDir
73
+ export class Runtime extends EventEmitter {
74
+ logger
75
+ error
76
+
77
+ #loggerDestination
78
+ #stdio
79
+
80
+ #status // starting, started, stopping, stopped, closed
81
+ #root
82
+ #config
37
83
  #env
38
- #services
39
- #servicesIds
40
- #entrypoint
84
+ #context
85
+ #sharedContext
86
+ #isProduction
87
+ #concurrency
41
88
  #entrypointId
42
89
  #url
43
- #loggerDestination
90
+
44
91
  #metrics
45
92
  #metricsTimeout
46
- #status
47
- #interceptor
93
+
94
+ #meshInterceptor
95
+ #dispatcher
96
+
48
97
  #managementApi
49
98
  #prometheusServer
50
- #startedServices
51
- #restartPromises
52
- #bootstrapAttempts
99
+ #inspectorServer
100
+ #metricsLabelName
101
+
102
+ #applicationsConfigsPatches
103
+ #workers
104
+ #workersBroadcastChannel
105
+ #workerITCHandlers
106
+ #restartingWorkers
53
107
 
54
- constructor (configManager, runtimeLogsDir, env) {
108
+ #sharedHttpCache
109
+ #scheduler
110
+
111
+ constructor (config, context) {
55
112
  super()
56
113
  this.setMaxListeners(MAX_LISTENERS_COUNT)
57
114
 
58
- this.#configManager = configManager
59
- this.#runtimeTmpDir = getRuntimeTmpDir(configManager.dirname)
60
- this.#runtimeLogsDir = runtimeLogsDir
61
- this.#env = env
62
- this.#services = new Map()
63
- this.#servicesIds = []
115
+ this.#config = config
116
+ this.#root = config[kMetadata].root
117
+ this.#env = config[kMetadata].env
118
+ this.#context = context ?? {}
119
+ this.#isProduction = this.#context.isProduction ?? this.#context.production ?? false
120
+ this.#concurrency = this.#context.concurrency ?? MAX_CONCURRENCY
121
+ this.#workers = new RoundRobinMap()
64
122
  this.#url = undefined
65
- // Note: nothing hits the main thread so there is no reason to set the globalDispatcher here
66
- this.#interceptor = createThreadInterceptor({ domain: '.plt.local', timeout: true })
123
+ this.#meshInterceptor = createThreadInterceptor({ domain: '.plt.local', timeout: this.#config.applicationTimeout })
124
+ this.logger = abstractLogger // This is replaced by the real logger in init() and eventually removed in close()
67
125
  this.#status = undefined
68
- this.#startedServices = new Map()
69
- this.#restartPromises = new Map()
70
- this.#bootstrapAttempts = new Map()
126
+ this.#restartingWorkers = new Map()
127
+ this.#sharedHttpCache = null
128
+ this.#applicationsConfigsPatches = new Map()
129
+
130
+ if (!this.#config.logger.captureStdio) {
131
+ this.#stdio = {
132
+ stdout: new SonicBoom({ fd: process.stdout.fd }),
133
+ stderr: new SonicBoom({ fd: process.stderr.fd })
134
+ }
135
+ }
136
+
137
+ this.#workerITCHandlers = {
138
+ getApplicationMeta: this.getApplicationMeta.bind(this),
139
+ listApplications: this.getApplicationsIds.bind(this),
140
+ getApplications: this.getApplications.bind(this),
141
+ getWorkers: this.getWorkers.bind(this),
142
+ getWorkerMessagingChannel: this.#getWorkerMessagingChannel.bind(this),
143
+ getHttpCacheValue: this.#getHttpCacheValue.bind(this),
144
+ setHttpCacheValue: this.#setHttpCacheValue.bind(this),
145
+ deleteHttpCacheValue: this.#deleteHttpCacheValue.bind(this),
146
+ invalidateHttpCache: this.invalidateHttpCache.bind(this),
147
+ updateSharedContext: this.updateSharedContext.bind(this),
148
+ getSharedContext: this.getSharedContext.bind(this)
149
+ }
150
+ this.#sharedContext = {}
71
151
  }
72
152
 
73
153
  async init () {
74
- const config = this.#configManager.current
75
- const autoloadEnabled = config.autoload
154
+ if (typeof this.#status !== 'undefined') {
155
+ return
156
+ }
76
157
 
77
- // This cannot be transferred to worker threads
78
- delete config.configManager
158
+ const config = this.#config
79
159
 
80
160
  if (config.managementApi) {
81
- this.#managementApi = await startManagementApi(this, this.#configManager)
161
+ this.#managementApi = await startManagementApi(this, this.#root)
82
162
  }
83
163
 
84
164
  if (config.metrics) {
165
+ // Use the configured application label name for metrics (defaults to 'applicationId')
166
+ this.#metricsLabelName = config.metrics.applicationLabel || 'applicationId'
85
167
  this.#prometheusServer = await startPrometheusServer(this, config.metrics)
168
+ } else {
169
+ // Default to applicationId if metrics are not configured
170
+ this.#metricsLabelName = 'applicationId'
86
171
  }
87
172
 
88
173
  // Create the logger
89
- const [logger, destination] = createLogger(config, this.#runtimeLogsDir)
174
+ const [logger, destination] = await createLogger(config)
90
175
  this.logger = logger
91
176
  this.#loggerDestination = destination
92
177
 
93
- // Handle inspector
94
- const inspectorOptions = config.inspectorOptions
95
- if (inspectorOptions) {
96
- /* c8 ignore next 6 */
97
- if (inspectorOptions.watchDisabled) {
98
- logger.info('debugging flags were detected. hot reloading has been disabled')
99
- }
178
+ this.#createWorkersBroadcastChannel()
100
179
 
101
- inspector.open(inspectorOptions.port, inspectorOptions.host, inspectorOptions.breakFirstLine)
180
+ const workersConfig = []
181
+ for (const application of config.applications) {
182
+ const count = application.workers ?? this.#config.workers
183
+ if (count > 1 && application.entrypoint && !features.node.reusePort) {
184
+ this.logger.warn(
185
+ `"${application.id}" is set as the entrypoint, but reusePort is not available in your OS; setting workers to 1 instead of ${count}`
186
+ )
187
+ workersConfig.push({ id: application.id, workers: 1 })
188
+ } else {
189
+ workersConfig.push({ id: application.id, workers: count })
190
+ }
102
191
  }
103
192
 
104
- // Create all services, each in is own worker thread
105
- for (const serviceConfig of config.services) {
106
- // Setup forwarding of logs from the worker threads to the main thread
107
- await this.#setupService(serviceConfig)
193
+ this.#workers.configure(workersConfig)
194
+
195
+ if (this.#isProduction) {
196
+ this.#env['PLT_DEV'] = 'false'
197
+ this.#env['PLT_ENVIRONMENT'] = 'production'
198
+ } else {
199
+ this.#env['PLT_DEV'] = 'true'
200
+ this.#env['PLT_ENVIRONMENT'] = 'development'
108
201
  }
109
202
 
110
- try {
111
- // Make sure the list exists before computing the dependencies, otherwise some services might not be stopped
112
- this.#servicesIds = config.services.map(service => service.id)
203
+ await this.#setupApplications()
113
204
 
114
- if (autoloadEnabled) {
115
- checkDependencies(config.services)
116
- this.#services = topologicalSort(this.#services, config)
117
- }
205
+ await this.#setDispatcher(config.undici)
118
206
 
119
- // Recompute the list of services after sorting
120
- this.#servicesIds = config.services.map(service => service.id)
121
- } catch (e) {
122
- await this.close()
123
- throw e
207
+ if (config.scheduler) {
208
+ this.#scheduler = startScheduler(config.scheduler, this.#dispatcher, logger)
124
209
  }
125
210
 
126
211
  this.#updateStatus('init')
127
212
  }
128
213
 
129
- async start () {
214
+ async start (silent = false) {
215
+ if (typeof this.#status === 'undefined') {
216
+ await this.init()
217
+ }
218
+
219
+ if (typeof this.#config.entrypoint === 'undefined') {
220
+ throw new MissingEntrypointError()
221
+ }
130
222
  this.#updateStatus('starting')
223
+ this.#createWorkersBroadcastChannel()
131
224
 
132
- // Important: do not use Promise.all here since it won't properly manage dependencies
133
225
  try {
134
- for (const service of this.#servicesIds) {
135
- await this.startService(service)
226
+ const startInvocations = []
227
+ for (const application of this.getApplicationsIds()) {
228
+ startInvocations.push([application, silent])
229
+ }
230
+
231
+ await executeInParallel(this.startApplication.bind(this), startInvocations, this.#concurrency)
232
+
233
+ if (this.#config.inspectorOptions) {
234
+ const { port } = this.#config.inspectorOptions
235
+
236
+ const server = fastify({
237
+ loggerInstance: this.logger.child({ name: 'inspector' }, { level: 'warn' })
238
+ })
239
+
240
+ const version = await fetch(`http://127.0.0.1:${this.#config.inspectorOptions.port + 1}/json/version`).then(
241
+ res => res.json()
242
+ )
243
+
244
+ const data = await Promise.all(
245
+ Array.from(this.#workers.values()).map(async worker => {
246
+ const data = worker[kInspectorOptions]
247
+
248
+ const res = await fetch(`http://127.0.0.1:${data.port}/json/list`)
249
+ const details = await res.json()
250
+ return {
251
+ ...details[0],
252
+ title: data.id
253
+ }
254
+ })
255
+ )
256
+
257
+ server.get('/json/list', () => data)
258
+ server.get('/json', () => data)
259
+ server.get('/json/version', () => version)
260
+
261
+ await server.listen({ port })
262
+ this.logger.info(
263
+ 'The inspector server is now listening for all applications. Open `chrome://inspect` in Google Chrome to connect.'
264
+ )
265
+ this.#inspectorServer = server
136
266
  }
137
267
  } catch (error) {
138
- // Wait for the next tick so that the error is logged first
139
- await sleep(1)
140
- await this.close()
141
- throw error
268
+ await this.closeAndThrow(error)
142
269
  }
143
270
 
144
271
  this.#updateStatus('started')
@@ -152,14 +279,60 @@ class Runtime extends EventEmitter {
152
279
  }
153
280
 
154
281
  async stop (silent = false) {
282
+ if (this.#scheduler) {
283
+ await this.#scheduler.stop()
284
+ }
285
+
155
286
  if (this.#status === 'starting') {
156
287
  await once(this, 'started')
157
288
  }
158
289
 
159
290
  this.#updateStatus('stopping')
160
- this.#startedServices.clear()
161
291
 
162
- await Promise.all(this.#servicesIds.map(service => this._stopService(service, silent)))
292
+ if (this.#inspectorServer) {
293
+ await this.#inspectorServer.close()
294
+ }
295
+
296
+ // Stop the entrypoint first so that no new requests are accepted
297
+ if (this.#entrypointId) {
298
+ await this.stopApplication(this.#entrypointId, silent)
299
+ }
300
+
301
+ const stopInvocations = []
302
+
303
+ // Construct the reverse dependency graph
304
+ const dependents = {}
305
+
306
+ try {
307
+ const allApplications = await this.getApplications(true)
308
+ for (const application of allApplications.applications) {
309
+ for (const dependency of application.dependencies ?? []) {
310
+ let applicationDependents = dependents[dependency]
311
+ if (!applicationDependents) {
312
+ applicationDependents = new Set()
313
+ dependents[dependency] = applicationDependents
314
+ }
315
+
316
+ applicationDependents.add(application.id)
317
+ }
318
+ }
319
+ } catch (e) {
320
+ // Noop - This only happens if stop is invoked after a failed start, in which case we don't care about deps
321
+ }
322
+
323
+ for (const application of this.getApplicationsIds()) {
324
+ // The entrypoint has been stopped above
325
+ if (application === this.#entrypointId) {
326
+ continue
327
+ }
328
+
329
+ stopInvocations.push([application, silent, Array.from(dependents[application] ?? [])])
330
+ }
331
+
332
+ await executeInParallel(this.stopApplication.bind(this), stopInvocations, this.#concurrency)
333
+
334
+ await this.#meshInterceptor.close()
335
+ this.#workersBroadcastChannel?.close()
163
336
 
164
337
  this.#updateStatus('stopped')
165
338
  }
@@ -168,6 +341,7 @@ class Runtime extends EventEmitter {
168
341
  this.emit('restarting')
169
342
 
170
343
  await this.stop()
344
+ this.#meshInterceptor.restart()
171
345
  await this.start()
172
346
 
173
347
  this.emit('restarted')
@@ -175,139 +349,172 @@ class Runtime extends EventEmitter {
175
349
  return this.#url
176
350
  }
177
351
 
178
- async close (fromManagementApi = false, silent = false) {
179
- this.#updateStatus('closing')
180
-
352
+ async close (silent = false) {
181
353
  clearInterval(this.#metricsTimeout)
182
354
 
183
355
  await this.stop(silent)
356
+ this.#updateStatus('closing')
184
357
 
185
- if (this.#managementApi) {
186
- if (fromManagementApi) {
187
- // This allow a close request coming from the management API to correctly be handled
188
- setImmediate(() => {
189
- this.#managementApi.close()
190
- })
191
- } else {
192
- await this.#managementApi.close()
193
- }
194
- }
358
+ // The management API autocloses by itself via event in management-api.js.
359
+ // This is needed to let management API stop endpoint to reply.
195
360
 
196
361
  if (this.#prometheusServer) {
197
362
  await this.#prometheusServer.close()
198
363
  }
199
364
 
200
365
  if (this.logger) {
201
- this.#loggerDestination.end()
366
+ this.#loggerDestination?.end()
202
367
 
203
- this.logger = null
368
+ this.logger = abstractLogger
204
369
  this.#loggerDestination = null
205
370
  }
206
371
 
372
+ if (this.#sharedHttpCache?.close) {
373
+ await this.#sharedHttpCache.close()
374
+ }
375
+
207
376
  this.#updateStatus('closed')
208
377
  }
209
378
 
210
- async startService (id) {
211
- if (this.#startedServices.get(id)) {
212
- throw new errors.ApplicationAlreadyStartedError()
213
- }
379
+ async closeAndThrow (error) {
380
+ this.#updateStatus('errored', error)
381
+ this.error = error
214
382
 
215
- // This is set here so that if the service fails while starting we track the status
216
- this.#startedServices.set(id, true)
383
+ // Wait for the next tick so that any pending logging is properly flushed
384
+ await sleep(1)
385
+ await this.close()
217
386
 
218
- let service = await this.#getServiceById(id, false, false)
387
+ throw error
388
+ }
219
389
 
220
- // The service was stopped, recreate the thread
221
- if (!service) {
222
- const config = this.#configManager.current
223
- const serviceConfig = config.services.find(s => s.id === id)
390
+ async inject (id, injectParams) {
391
+ // Make sure the application exists
392
+ await this.#getApplicationById(id, true)
224
393
 
225
- await this.#setupService(serviceConfig)
226
- service = await this.#getServiceById(id)
394
+ if (typeof injectParams === 'string') {
395
+ injectParams = { url: injectParams }
227
396
  }
228
397
 
229
- try {
230
- const serviceUrl = await sendViaITC(service, 'start')
231
- if (serviceUrl) {
232
- this.#url = serviceUrl
398
+ let { method, headers, body } = injectParams
399
+ const url = new URL(injectParams.url, `http://${id}.plt.local`)
400
+
401
+ if (injectParams.query) {
402
+ for (const [k, v] of Object.entries(injectParams.query)) {
403
+ url.searchParams.append(k, v)
233
404
  }
234
- this.#bootstrapAttempts.set(id, 0)
235
- } catch (error) {
236
- // TODO: handle port allocation error here
237
- if (error.code === 'EADDRINUSE') throw error
405
+ }
238
406
 
239
- this.logger.error({ err: ensureLoggableError(error) }, `Failed to start service "${id}".`)
407
+ // Stringify the body as JSON if needed
408
+ if (
409
+ body &&
410
+ typeof body === 'object' &&
411
+ headers &&
412
+ Object.entries(headers).some(([k, v]) => k.toLowerCase() === 'content-type' && v.includes('application/json'))
413
+ ) {
414
+ body = JSON.stringify(body)
415
+ }
240
416
 
241
- const config = this.#configManager.current
242
- const restartOnError = config.restartOnError
417
+ const {
418
+ statusCode: responseStatus,
419
+ headers: responseHeaders,
420
+ body: responseRawBody
421
+ } = await request(url.toString(), { method, headers, body, dispatcher: this.#dispatcher })
422
+ const responsePayload = await responseRawBody.arrayBuffer()
423
+ const responseBody = Buffer.from(responsePayload).toString('utf-8')
243
424
 
244
- if (!restartOnError) {
245
- this.logger.error(`Failed to start service "${id}".`)
246
- throw error
247
- }
425
+ return {
426
+ statusCode: responseStatus,
427
+ statusMessage: STATUS_CODES[responseStatus] || 'unknown',
428
+ headers: responseHeaders,
429
+ body: responseBody,
430
+ payload: responseBody,
431
+ rawPayload: responsePayload
432
+ }
433
+ }
248
434
 
249
- let bootstrapAttempt = this.#bootstrapAttempts.get(id)
250
- if (bootstrapAttempt++ >= MAX_BOOTSTRAP_ATTEMPTS || restartOnError === 0) {
251
- this.logger.error(`Failed to start service "${id}" after ${MAX_BOOTSTRAP_ATTEMPTS} attempts.`)
252
- throw error
253
- }
435
+ emit (event, payload) {
436
+ for (const worker of this.#workers.values()) {
437
+ worker[kITC].notify('runtime:event', { event, payload })
438
+ }
254
439
 
255
- this.logger.warn(
256
- `Starting a service "${id}" in ${restartOnError}ms. ` +
257
- `Attempt ${bootstrapAttempt} of ${MAX_BOOTSTRAP_ATTEMPTS}...`
258
- )
440
+ this.logger.trace({ event, payload }, 'Runtime event')
441
+ return super.emit(event, payload)
442
+ }
443
+
444
+ async sendCommandToApplication (id, name, message) {
445
+ const application = await this.#getApplicationById(id)
446
+
447
+ try {
448
+ return await sendViaITC(application, name, message)
449
+ } catch (e) {
450
+ // The application exports no meta, return an empty object
451
+ if (e.code === 'PLT_ITC_HANDLER_NOT_FOUND') {
452
+ return {}
453
+ }
259
454
 
260
- this.#bootstrapAttempts.set(id, bootstrapAttempt)
261
- await this.#restartCrashedService(id)
455
+ throw e
262
456
  }
263
457
  }
264
458
 
265
- // Do not rename to #stopService as this is used in tests
266
- async _stopService (id, silent) {
267
- const service = await this.#getServiceById(id, false, false)
268
-
269
- if (!service) {
270
- return
459
+ async startApplication (id, silent = false) {
460
+ // Since when an application is stopped the worker is deleted, we consider an application start if its first application
461
+ // is no longer in the init phase
462
+ const firstWorker = this.#workers.get(`${id}:0`)
463
+ if (firstWorker && firstWorker[kWorkerStatus] !== 'boot' && firstWorker[kWorkerStatus] !== 'init') {
464
+ throw new ApplicationAlreadyStartedError()
271
465
  }
272
466
 
273
- this.#startedServices.set(id, false)
467
+ const config = this.#config
468
+ const applicationConfig = config.applications.find(s => s.id === id)
274
469
 
275
- if (!silent) {
276
- this.logger?.info(`Stopping service "${id}"...`)
470
+ if (!applicationConfig) {
471
+ throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
277
472
  }
278
473
 
279
- // Always send the stop message, it will shut down workers that only had ITC and interceptors setup
280
- try {
281
- await executeWithTimeout(sendViaITC(service, 'stop'), 10000)
282
- } catch (error) {
283
- this.logger?.info(
284
- { error: ensureLoggableError(error) },
285
- `Failed to stop service "${id}". Killing a worker thread.`
286
- )
287
- } finally {
288
- service[kITC].close()
289
- }
474
+ const workersCount = await this.#workers.getCount(applicationConfig.id)
290
475
 
291
- // Wait for the worker thread to finish, we're going to create a new one if the service is ever restarted
292
- const res = await executeWithTimeout(once(service, 'exit'), 10000)
476
+ this.emit('application:starting', id)
293
477
 
294
- // If the worker didn't exit in time, kill it
295
- if (res === 'timeout') {
296
- await service.terminate()
478
+ for (let i = 0; i < workersCount; i++) {
479
+ await this.#startWorker(config, applicationConfig, workersCount, id, i, silent)
297
480
  }
481
+
482
+ this.emit('application:started', id)
298
483
  }
299
484
 
300
- async buildService (id) {
301
- const service = this.#services.get(id)
485
+ async stopApplication (id, silent = false, dependents = []) {
486
+ const config = this.#config
487
+ const applicationConfig = config.applications.find(s => s.id === id)
302
488
 
303
- if (!service) {
304
- throw new errors.ServiceNotFoundError(id, Array.from(this.#services.keys()).join(', '))
489
+ if (!applicationConfig) {
490
+ throw new ApplicationNotFoundError(id, this.getApplicationsIds().join(', '))
305
491
  }
306
492
 
493
+ const workersCount = await this.#workers.getCount(applicationConfig.id)
494
+
495
+ this.emit('application:stopping', id)
496
+
497
+ if (typeof workersCount === 'number') {
498
+ const stopInvocations = []
499
+ for (let i = 0; i < workersCount; i++) {
500
+ stopInvocations.push([workersCount, id, i, silent, undefined, dependents])
501
+ }
502
+
503
+ await executeInParallel(this.#stopWorker.bind(this), stopInvocations, this.#concurrency)
504
+ }
505
+
506
+ this.emit('application:stopped', id)
507
+ }
508
+
509
+ async buildApplication (id) {
510
+ const application = await this.#getApplicationById(id)
511
+
512
+ this.emit('application:building', id)
307
513
  try {
308
- return await sendViaITC(service, 'build')
514
+ await sendViaITC(application, 'build')
515
+ this.emit('application:built', id)
309
516
  } catch (e) {
310
- // The service exports no meta, return an empty object
517
+ // The application exports no meta, return an empty object
311
518
  if (e.code === 'PLT_ITC_HANDLER_NOT_FOUND') {
312
519
  return {}
313
520
  }
@@ -316,9 +523,34 @@ class Runtime extends EventEmitter {
316
523
  }
317
524
  }
318
525
 
319
- async inject (id, injectParams) {
320
- const service = await this.#getServiceById(id, true)
321
- return sendViaITC(service, 'inject', injectParams)
526
+ async startApplicationProfiling (id, options = {}, ensureStarted = true) {
527
+ const service = await this.#getApplicationById(id, ensureStarted)
528
+ this.#validatePprofCapturePreload()
529
+
530
+ return sendViaITC(service, 'startProfiling', options)
531
+ }
532
+
533
+ async stopApplicationProfiling (id, ensureStarted = true) {
534
+ const service = await this.#getApplicationById(id, ensureStarted)
535
+ this.#validatePprofCapturePreload()
536
+
537
+ return sendViaITC(service, 'stopProfiling')
538
+ }
539
+
540
+ async updateUndiciInterceptors (undiciConfig) {
541
+ this.#config.undici = undiciConfig
542
+
543
+ const promises = []
544
+ for (const worker of this.#workers.values()) {
545
+ promises.push(sendViaITC(worker, 'updateUndiciInterceptors', undiciConfig))
546
+ }
547
+
548
+ const results = await Promise.allSettled(promises)
549
+ for (const result of results) {
550
+ if (result.status === 'rejected') {
551
+ throw result.reason
552
+ }
553
+ }
322
554
  }
323
555
 
324
556
  startCollectingMetrics () {
@@ -332,9 +564,8 @@ class Runtime extends EventEmitter {
332
564
  try {
333
565
  metrics = await this.getFormattedMetrics()
334
566
  } catch (error) {
335
- if (!(error instanceof errors.RuntimeExitedError)) {
336
- // TODO(mcollina): use the logger
337
- console.error('Error collecting metrics', error)
567
+ if (!(error instanceof RuntimeExitedError)) {
568
+ this.logger.error({ err: ensureLoggableError(error) }, 'Error collecting metrics')
338
569
  }
339
570
  return
340
571
  }
@@ -347,91 +578,189 @@ class Runtime extends EventEmitter {
347
578
  }, COLLECT_METRICS_TIMEOUT).unref()
348
579
  }
349
580
 
350
- async pipeLogsStream (writableStream, logger, startLogId, endLogId, runtimePID) {
351
- endLogId = endLogId || Infinity
352
- runtimePID = runtimePID ?? process.pid
581
+ invalidateHttpCache (options = {}) {
582
+ const { keys, tags } = options
353
583
 
354
- const runtimeLogFiles = await this.#getRuntimeLogFiles(runtimePID)
355
- if (runtimeLogFiles.length === 0) {
356
- writableStream.end()
584
+ if (!this.#sharedHttpCache) {
357
585
  return
358
586
  }
359
587
 
360
- let latestFileId = parseInt(runtimeLogFiles.at(-1).slice('logs.'.length))
588
+ const promises = []
589
+ if (keys && keys.length > 0) {
590
+ promises.push(this.#sharedHttpCache.deleteKeys(keys))
591
+ }
592
+
593
+ if (tags && tags.length > 0) {
594
+ promises.push(this.#sharedHttpCache.deleteTags(tags))
595
+ }
596
+
597
+ return Promise.all(promises)
598
+ }
361
599
 
362
- let fileStream = null
363
- let fileId = startLogId ?? latestFileId
364
- let isClosed = false
600
+ async addLoggerDestination (writableStream) {
601
+ // Add the stream - We output everything we get
602
+ this.#loggerDestination.add({ stream: writableStream, level: 1 })
365
603
 
366
- const runtimeLogsDir = this.#getRuntimeLogsDir(runtimePID)
604
+ // Immediately get the counter of the lastId so we can use it to later remove it
605
+ const id = this.#loggerDestination.lastId
367
606
 
368
- const watcher = watch(runtimeLogsDir, async (event, filename) => {
369
- if (event === 'rename' && filename.startsWith('logs')) {
370
- const logFileId = parseInt(filename.slice('logs.'.length))
371
- if (logFileId > latestFileId) {
372
- latestFileId = logFileId
373
- fileStream.unwatch()
374
- }
375
- }
376
- }).unref()
607
+ const onClose = () => {
608
+ writableStream.removeListener('close', onClose)
609
+ writableStream.removeListener('error', onClose)
610
+ this.removeListener('closed', onClose)
611
+ this.#loggerDestination.remove(id)
612
+ }
377
613
 
378
- const streamLogFile = () => {
379
- if (fileId > endLogId) {
380
- writableStream.end()
381
- return
382
- }
614
+ writableStream.on('close', onClose)
615
+ writableStream.on('error', onClose)
616
+ this.on('closed', onClose)
617
+ }
383
618
 
384
- const fileName = 'logs.' + fileId
385
- const filePath = join(runtimeLogsDir, fileName)
619
+ async updateSharedContext (options = {}) {
620
+ const { context, overwrite = false } = options
386
621
 
387
- const prevFileStream = fileStream
622
+ const sharedContext = overwrite ? {} : this.#sharedContext
623
+ Object.assign(sharedContext, context)
388
624
 
389
- fileStream = ts.createReadStream(filePath)
390
- fileStream.pipe(writableStream, { end: false, persistent: false })
625
+ this.#sharedContext = sharedContext
391
626
 
392
- if (prevFileStream) {
393
- prevFileStream.unpipe(writableStream)
394
- prevFileStream.destroy()
627
+ const promises = []
628
+ for (const worker of this.#workers.values()) {
629
+ promises.push(sendViaITC(worker, 'setSharedContext', sharedContext))
630
+ }
631
+
632
+ const results = await Promise.allSettled(promises)
633
+ for (const result of results) {
634
+ if (result.status === 'rejected') {
635
+ this.logger.error({ err: result.reason }, 'Cannot update shared context')
395
636
  }
637
+ }
396
638
 
397
- fileStream.on('close', () => {
398
- if (latestFileId > fileId && !isClosed) {
399
- streamLogFile(++fileId)
400
- }
401
- })
639
+ return sharedContext
640
+ }
402
641
 
403
- fileStream.on('error', err => {
404
- isClosed = true
405
- logger.error(err, 'Error streaming log file')
406
- fileStream.destroy()
407
- watcher.close()
408
- writableStream.end()
409
- })
642
+ setApplicationConfigPatch (id, patch) {
643
+ this.#applicationsConfigsPatches.set(id, patch)
644
+ }
410
645
 
411
- fileStream.on('eof', () => {
412
- if (fileId >= endLogId) {
413
- writableStream.end()
414
- return
415
- }
416
- if (latestFileId > fileId) {
417
- fileStream.unwatch()
418
- }
419
- })
646
+ removeApplicationConfigPatch (id) {
647
+ this.#applicationsConfigsPatches.delete(id)
648
+ }
420
649
 
421
- return fileStream
650
+ /**
651
+ * Updates the resources of the applications, such as the number of workers and health configurations (e.g., heap memory settings).
652
+ *
653
+ * This function handles three update scenarios for each application:
654
+ * 1. **Updating workers only**: Adjusts the number of workers for the application.
655
+ * 2. **Updating health configurations only**: Updates health parameters like `maxHeapTotal` or `maxYoungGeneration`.
656
+ * 3. **Updating both workers and health configurations**: Scales the workers and also applies health settings.
657
+ *
658
+ * When updating both workers and health:
659
+ * - **Scaling down workers**: Stops extra workers, then restarts the remaining workers with the previous settings.
660
+ * - **Scaling up workers**: Starts new workers with the updated heap settings, then restarts the old workers with the updated settings.
661
+ *
662
+ * Scaling up new resources (workers and/or heap memory) may fails due to insufficient memory, in this case the operation may fail partially or entirely.
663
+ * Scaling down is expected to succeed without issues.
664
+ *
665
+ * @param {Array<Object>} updates - An array of objects that define the updates for each application.
666
+ * @param {string} updates[].application - The ID of the application to update.
667
+ * @param {number} [updates[].workers] - The desired number of workers for the application. If omitted, workers will not be updated.
668
+ * @param {Object} [updates[].health] - The health configuration to update for the application, which may include:
669
+ * @param {string|number} [updates[].health.maxHeapTotal] - The maximum heap memory for the application. Can be a valid memory string (e.g., '1G', '512MB') or a number representing bytes.
670
+ * @param {string|number} [updates[].health.maxYoungGeneration] - The maximum young generation memory for the application. Can be a valid memory string (e.g., '128MB') or a number representing bytes.
671
+ *
672
+ * @returns {Promise<Array<Object>>} - A promise that resolves to an array of reports for each application, detailing the success or failure of the operations:
673
+ * - `application`: The application ID.
674
+ * - `workers`: The workers' update report, including the current, new number of workers, started workers, and success status.
675
+ * - `health`: The health update report, showing the current and new heap settings, updated workers, and success status.
676
+ *
677
+ * @example
678
+ * await runtime.updateApplicationsResources([
679
+ * { application: 'application-1', workers: 2, health: { maxHeapTotal: '1G', maxYoungGeneration: '128 MB' } },
680
+ * { application: 'application-2', health: { maxHeapTotal: '1G' } },
681
+ * { application: 'application-3', workers: 2 },
682
+ * ])
683
+ *
684
+ * In this example:
685
+ * - `application-1` will have 2 workers and updated heap memory configurations.
686
+ * - `application-2` will have updated heap memory settings (without changing workers).
687
+ * - `application-3` will have its workers set to 2 but no change in memory settings.
688
+ *
689
+ * @throws {InvalidArgumentError} - Throws if any update parameter is invalid, such as:
690
+ * - Missing application ID.
691
+ * - Invalid worker count (not a positive integer).
692
+ * - Invalid memory size format for `maxHeapTotal` or `maxYoungGeneration`.
693
+ * @throws {ApplicationNotFoundError} - Throws if the specified application ID does not exist in the current application configuration.
694
+ */
695
+ async updateApplicationsResources (updates) {
696
+ if (this.#status === 'stopping' || this.#status === 'closed') {
697
+ this.logger.warn('Cannot update application resources when the runtime is stopping or closed')
698
+ return
422
699
  }
423
700
 
424
- streamLogFile(fileId)
425
-
426
- const onClose = () => {
427
- isClosed = true
428
- watcher.close()
429
- fileStream.destroy()
701
+ const ups = await this.#validateUpdateApplicationResources(updates)
702
+ const config = this.#config
703
+
704
+ const report = []
705
+ for (const update of ups) {
706
+ const { applicationId, config: applicationConfig, workers, health, currentWorkers, currentHealth } = update
707
+
708
+ if (workers && health) {
709
+ const r = await this.#updateApplicationWorkersAndHealth(
710
+ applicationId,
711
+ config,
712
+ applicationConfig,
713
+ workers,
714
+ health,
715
+ currentWorkers,
716
+ currentHealth
717
+ )
718
+ report.push({
719
+ application: applicationId,
720
+ workers: r.workers,
721
+ health: r.health
722
+ })
723
+ } else if (health) {
724
+ const r = await this.#updateApplicationHealth(
725
+ applicationId,
726
+ config,
727
+ applicationConfig,
728
+ currentWorkers,
729
+ currentHealth,
730
+ health
731
+ )
732
+ report.push({
733
+ application: applicationId,
734
+ health: r.health
735
+ })
736
+ } else if (workers) {
737
+ const r = await this.#updateApplicationWorkers(
738
+ applicationId,
739
+ config,
740
+ applicationConfig,
741
+ workers,
742
+ currentWorkers
743
+ )
744
+ report.push({
745
+ application: applicationId,
746
+ workers: r.workers
747
+ })
748
+ }
430
749
  }
431
750
 
432
- writableStream.on('close', onClose)
433
- writableStream.on('error', onClose)
434
- this.on('closed', onClose)
751
+ return report
752
+ }
753
+
754
+ setConcurrency (concurrency) {
755
+ this.#concurrency = concurrency
756
+ }
757
+
758
+ async getUrl () {
759
+ return this.#url
760
+ }
761
+
762
+ getRuntimeStatus () {
763
+ return this.#status
435
764
  }
436
765
 
437
766
  async getRuntimeMetadata () {
@@ -445,24 +774,33 @@ class Runtime extends EventEmitter {
445
774
  uptimeSeconds: Math.floor(process.uptime()),
446
775
  execPath: process.execPath,
447
776
  nodeVersion: process.version,
448
- projectDir: this.#configManager.dirname,
777
+ projectDir: this.#root,
449
778
  packageName: packageJson.name ?? null,
450
779
  packageVersion: packageJson.version ?? null,
451
780
  url: entrypointDetails?.url ?? null,
452
- platformaticVersion
781
+ platformaticVersion: version
453
782
  }
454
783
  }
455
784
 
456
785
  getRuntimeEnv () {
457
- return this.#configManager.env
786
+ return this.#env
458
787
  }
459
788
 
460
- getRuntimeConfig () {
461
- return this.#configManager.current
789
+ getRuntimeConfig (includeMeta = false) {
790
+ if (includeMeta) {
791
+ return this.#config
792
+ }
793
+
794
+ const { [kMetadata]: _, ...config } = this.#config
795
+ return config
462
796
  }
463
797
 
464
798
  getInterceptor () {
465
- return this.#interceptor
799
+ return this.#meshInterceptor
800
+ }
801
+
802
+ getDispatcher () {
803
+ return this.#dispatcher
466
804
  }
467
805
 
468
806
  getManagementApi () {
@@ -470,110 +808,76 @@ class Runtime extends EventEmitter {
470
808
  }
471
809
 
472
810
  getManagementApiUrl () {
473
- return this.#managementApi?.server.address()
811
+ return this.#managementApi?.server.address() ?? null
474
812
  }
475
813
 
476
814
  async getEntrypointDetails () {
477
- return this.getServiceDetails(this.#entrypointId)
815
+ return this.getApplicationDetails(this.#entrypointId)
478
816
  }
479
817
 
480
- async getServices () {
481
- return {
482
- entrypoint: this.#entrypointId,
483
- services: await Promise.all(this.#servicesIds.map(id => this.getServiceDetails(id)))
484
- }
485
- }
818
+ async getCustomHealthChecks () {
819
+ const status = {}
486
820
 
487
- async getServiceDetails (id, allowUnloaded = false) {
488
- let service
821
+ for (const [application, { count }] of Object.entries(this.#workers.configuration)) {
822
+ for (let i = 0; i < count; i++) {
823
+ const label = `${application}:${i}`
824
+ const worker = this.#workers.get(label)
489
825
 
490
- try {
491
- service = await this.#getServiceById(id)
492
- } catch (e) {
493
- if (allowUnloaded) {
494
- return { id, status: 'stopped' }
826
+ if (worker) {
827
+ status[label] = await sendViaITC(worker, 'getCustomHealthCheck')
828
+ }
495
829
  }
496
-
497
- throw e
498
- }
499
-
500
- const { entrypoint, dependencies, localUrl } = service[kConfig]
501
-
502
- const status = await sendViaITC(service, 'getStatus')
503
- const { type, version } = await sendViaITC(service, 'getServiceInfo')
504
-
505
- const serviceDetails = {
506
- id,
507
- type,
508
- status,
509
- version,
510
- localUrl,
511
- entrypoint,
512
- dependencies
513
- }
514
-
515
- if (entrypoint) {
516
- serviceDetails.url = status === 'started' ? this.#url : null
517
830
  }
518
831
 
519
- return serviceDetails
832
+ return status
520
833
  }
521
834
 
522
- async getService (id) {
523
- return this.#getServiceById(id, true)
524
- }
525
-
526
- async getServiceConfig (id) {
527
- const service = await this.#getServiceById(id, true)
528
-
529
- return sendViaITC(service, 'getServiceConfig')
530
- }
531
-
532
- async getServiceEnv (id) {
533
- const service = await this.#getServiceById(id, true)
534
-
535
- return sendViaITC(service, 'getServiceEnv')
536
- }
537
-
538
- async getServiceOpenapiSchema (id) {
539
- const service = await this.#getServiceById(id, true)
835
+ async getCustomReadinessChecks () {
836
+ const status = {}
540
837
 
541
- return sendViaITC(service, 'getServiceOpenAPISchema')
542
- }
838
+ for (const [application, { count }] of Object.entries(this.#workers.configuration)) {
839
+ for (let i = 0; i < count; i++) {
840
+ const label = `${application}:${i}`
841
+ const worker = this.#workers.get(label)
543
842
 
544
- async getServiceGraphqlSchema (id) {
545
- const service = await this.#getServiceById(id, true)
843
+ if (worker) {
844
+ status[label] = await sendViaITC(worker, 'getCustomReadinessCheck')
845
+ }
846
+ }
847
+ }
546
848
 
547
- return sendViaITC(service, 'getServiceGraphQLSchema')
849
+ return status
548
850
  }
549
851
 
550
852
  async getMetrics (format = 'json') {
551
853
  let metrics = null
552
854
 
553
- for (const id of this.#servicesIds) {
855
+ for (const worker of this.#workers.values()) {
554
856
  try {
555
- const service = await this.#getServiceById(id, true, false)
556
-
557
- // The service might be temporarily unavailable
558
- if (!service) {
857
+ // The application might be temporarily unavailable
858
+ if (worker[kWorkerStatus] !== 'started') {
559
859
  continue
560
860
  }
561
861
 
562
- const serviceMetrics = await sendViaITC(service, 'getMetrics', format)
563
- if (serviceMetrics) {
862
+ const applicationMetrics = await sendViaITC(worker, 'getMetrics', format)
863
+ if (applicationMetrics) {
564
864
  if (metrics === null) {
565
865
  metrics = format === 'json' ? [] : ''
566
866
  }
567
867
 
568
868
  if (format === 'json') {
569
- metrics.push(...serviceMetrics)
869
+ metrics.push(...applicationMetrics)
570
870
  } else {
571
- metrics += serviceMetrics
871
+ metrics += applicationMetrics
572
872
  }
573
873
  }
574
874
  } catch (e) {
575
- // The service exited while we were sending the ITC, skip it
576
- if (e.code === 'PLT_RUNTIME_SERVICE_NOT_STARTED' || e.code === 'PLT_RUNTIME_SERVICE_EXIT') {
875
+ // The application exited while we were sending the ITC, skip it
876
+ if (
877
+ e.code === 'PLT_RUNTIME_APPLICATION_NOT_STARTED' ||
878
+ e.code === 'PLT_RUNTIME_APPLICATION_EXIT' ||
879
+ e.code === 'PLT_RUNTIME_APPLICATION_WORKER_EXIT'
880
+ ) {
577
881
  continue
578
882
  }
579
883
 
@@ -592,73 +896,105 @@ class Runtime extends EventEmitter {
592
896
  try {
593
897
  const { metrics } = await this.getMetrics()
594
898
 
595
- if (metrics === null) {
899
+ if (metrics === null || metrics.length === 0) {
596
900
  return null
597
901
  }
598
902
 
599
- const cpuMetric = metrics.find(metric => metric.name === 'process_cpu_percent_usage')
600
- const rssMetric = metrics.find(metric => metric.name === 'process_resident_memory_bytes')
601
- const totalHeapSizeMetric = metrics.find(metric => metric.name === 'nodejs_heap_size_total_bytes')
602
- const usedHeapSizeMetric = metrics.find(metric => metric.name === 'nodejs_heap_size_used_bytes')
603
- const heapSpaceSizeTotalMetric = metrics.find(metric => metric.name === 'nodejs_heap_space_size_total_bytes')
604
- const newSpaceSizeTotalMetric = heapSpaceSizeTotalMetric.values.find(value => value.labels.space === 'new')
605
- const oldSpaceSizeTotalMetric = heapSpaceSizeTotalMetric.values.find(value => value.labels.space === 'old')
606
- const eventLoopUtilizationMetric = metrics.find(metric => metric.name === 'nodejs_eventloop_utilization')
903
+ const metricsNames = [
904
+ 'process_cpu_percent_usage',
905
+ 'process_resident_memory_bytes',
906
+ 'nodejs_heap_size_total_bytes',
907
+ 'nodejs_heap_size_used_bytes',
908
+ 'nodejs_heap_space_size_total_bytes',
909
+ 'nodejs_eventloop_utilization',
910
+ 'http_request_all_summary_seconds'
911
+ ]
607
912
 
608
- let p50Value = 0
609
- let p90Value = 0
610
- let p95Value = 0
611
- let p99Value = 0
913
+ const applicationsMetrics = {}
612
914
 
613
- const metricName = 'http_request_all_summary_seconds'
614
- const httpLatencyMetrics = metrics.filter(metric => metric.name === metricName)
915
+ for (const metric of metrics) {
916
+ const { name, values } = metric
615
917
 
616
- if (httpLatencyMetrics) {
617
- const entrypointMetrics = httpLatencyMetrics.find(
618
- metric => metric.values?.[0]?.labels?.serviceId === this.#entrypointId
619
- )
620
- if (entrypointMetrics) {
621
- p50Value = entrypointMetrics.values.find(value => value.labels.quantile === 0.5)?.value || 0
622
- p90Value = entrypointMetrics.values.find(value => value.labels.quantile === 0.9)?.value || 0
623
- p95Value = entrypointMetrics.values.find(value => value.labels.quantile === 0.95)?.value || 0
624
- p99Value = entrypointMetrics.values.find(value => value.labels.quantile === 0.99)?.value || 0
918
+ if (!metricsNames.includes(name)) continue
919
+ if (!values || values.length === 0) continue
920
+
921
+ const labels = values[0].labels
922
+ // Use the configured label name (serviceId for v2 compatibility, applicationId for v3+)
923
+ const applicationId = labels?.[this.#metricsLabelName]
924
+
925
+ if (!applicationId) {
926
+ throw new Error(`Missing ${this.#metricsLabelName} label in metrics`)
927
+ }
625
928
 
626
- p50Value = Math.round(p50Value * 1000)
627
- p90Value = Math.round(p90Value * 1000)
628
- p95Value = Math.round(p95Value * 1000)
629
- p99Value = Math.round(p99Value * 1000)
929
+ let applicationMetrics = applicationsMetrics[applicationId]
930
+ if (!applicationMetrics) {
931
+ applicationMetrics = {
932
+ cpu: 0,
933
+ rss: 0,
934
+ totalHeapSize: 0,
935
+ usedHeapSize: 0,
936
+ newSpaceSize: 0,
937
+ oldSpaceSize: 0,
938
+ elu: 0,
939
+ latency: {
940
+ p50: 0,
941
+ p90: 0,
942
+ p95: 0,
943
+ p99: 0
944
+ }
945
+ }
946
+ applicationsMetrics[applicationId] = applicationMetrics
630
947
  }
948
+
949
+ parsePromMetric(applicationMetrics, metric)
631
950
  }
632
951
 
633
- const cpu = cpuMetric.values[0].value
634
- const rss = rssMetric.values[0].value
635
- const elu = eventLoopUtilizationMetric.values[0].value
636
- const totalHeapSize = totalHeapSizeMetric.values[0].value
637
- const usedHeapSize = usedHeapSizeMetric.values[0].value
638
- const newSpaceSize = newSpaceSizeTotalMetric.value
639
- const oldSpaceSize = oldSpaceSizeTotalMetric.value
952
+ function parsePromMetric (applicationMetrics, promMetric) {
953
+ const { name } = promMetric
640
954
 
641
- const formattedMetrics = {
642
- version: 1,
643
- date: new Date().toISOString(),
644
- cpu,
645
- elu,
646
- rss,
647
- totalHeapSize,
648
- usedHeapSize,
649
- newSpaceSize,
650
- oldSpaceSize,
651
- entrypoint: {
652
- latency: {
653
- p50: p50Value,
654
- p90: p90Value,
655
- p95: p95Value,
656
- p99: p99Value
955
+ if (name === 'process_cpu_percent_usage') {
956
+ applicationMetrics.cpu = promMetric.values[0].value
957
+ return
958
+ }
959
+ if (name === 'process_resident_memory_bytes') {
960
+ applicationMetrics.rss = promMetric.values[0].value
961
+ return
962
+ }
963
+ if (name === 'nodejs_heap_size_total_bytes') {
964
+ applicationMetrics.totalHeapSize = promMetric.values[0].value
965
+ return
966
+ }
967
+ if (name === 'nodejs_heap_size_used_bytes') {
968
+ applicationMetrics.usedHeapSize = promMetric.values[0].value
969
+ return
970
+ }
971
+ if (name === 'nodejs_heap_space_size_total_bytes') {
972
+ const newSpaceSize = promMetric.values.find(value => value.labels.space === 'new')
973
+ const oldSpaceSize = promMetric.values.find(value => value.labels.space === 'old')
974
+
975
+ applicationMetrics.newSpaceSize = newSpaceSize.value
976
+ applicationMetrics.oldSpaceSize = oldSpaceSize.value
977
+ return
978
+ }
979
+ if (name === 'nodejs_eventloop_utilization') {
980
+ applicationMetrics.elu = promMetric.values[0].value
981
+ return
982
+ }
983
+ if (name === 'http_request_all_summary_seconds') {
984
+ applicationMetrics.latency = {
985
+ p50: promMetric.values.find(value => value.labels.quantile === 0.5)?.value || 0,
986
+ p90: promMetric.values.find(value => value.labels.quantile === 0.9)?.value || 0,
987
+ p95: promMetric.values.find(value => value.labels.quantile === 0.95)?.value || 0,
988
+ p99: promMetric.values.find(value => value.labels.quantile === 0.99)?.value || 0
657
989
  }
658
990
  }
659
991
  }
660
992
 
661
- return formattedMetrics
993
+ return {
994
+ version: 1,
995
+ date: new Date().toISOString(),
996
+ applications: applicationsMetrics
997
+ }
662
998
  } catch (err) {
663
999
  // If any metric is missing, return nothing
664
1000
  this.logger.warn({ err }, 'Cannot fetch metrics')
@@ -667,355 +1003,1397 @@ class Runtime extends EventEmitter {
667
1003
  }
668
1004
  }
669
1005
 
670
- async getServiceMeta (id) {
671
- const service = this.#services.get(id)
1006
+ getSharedContext () {
1007
+ return this.#sharedContext
1008
+ }
672
1009
 
673
- if (!service) {
674
- throw new errors.ServiceNotFoundError(id, Array.from(this.#services.keys()).join(', '))
675
- }
1010
+ async getApplicationResourcesInfo (id) {
1011
+ const workers = this.#workers.getCount(id)
676
1012
 
677
- try {
678
- return await sendViaITC(service, 'getServiceMeta')
679
- } catch (e) {
680
- // The service exports no meta, return an empty object
681
- if (e.code === 'PLT_ITC_HANDLER_NOT_FOUND') {
682
- return {}
683
- }
1013
+ const worker = await this.#getWorkerById(id, 0, false, false)
1014
+ const health = worker[kConfig].health
684
1015
 
685
- throw e
686
- }
1016
+ return { workers, health }
687
1017
  }
688
1018
 
689
- async getLogIds (runtimePID) {
690
- runtimePID = runtimePID ?? process.pid
691
-
692
- const runtimeLogFiles = await this.#getRuntimeLogFiles(runtimePID)
693
- const runtimeLogIds = []
1019
+ getApplicationsIds () {
1020
+ return this.#config.applications.map(application => application.id)
1021
+ }
694
1022
 
695
- for (const logFile of runtimeLogFiles) {
696
- const logId = parseInt(logFile.slice('logs.'.length))
697
- runtimeLogIds.push(logId)
1023
+ async getApplications (allowUnloaded = false) {
1024
+ return {
1025
+ entrypoint: this.#entrypointId,
1026
+ production: this.#isProduction,
1027
+ applications: await Promise.all(
1028
+ this.getApplicationsIds().map(id => this.getApplicationDetails(id, allowUnloaded))
1029
+ )
698
1030
  }
699
- return runtimeLogIds
700
1031
  }
701
1032
 
702
- async getAllLogIds () {
703
- const runtimesLogFiles = await this.#getAllLogsFiles()
704
- const runtimesLogsIds = []
1033
+ async getWorkers () {
1034
+ const status = {}
1035
+
1036
+ for (const [application, { count }] of Object.entries(this.#workers.configuration)) {
1037
+ for (let i = 0; i < count; i++) {
1038
+ const label = `${application}:${i}`
1039
+ const worker = this.#workers.get(label)
705
1040
 
706
- for (const runtime of runtimesLogFiles) {
707
- const runtimeLogIds = []
708
- for (const logFile of runtime.runtimeLogFiles) {
709
- const logId = parseInt(logFile.slice('logs.'.length))
710
- runtimeLogIds.push(logId)
1041
+ status[label] = {
1042
+ application,
1043
+ worker: i,
1044
+ status: worker?.[kWorkerStatus] ?? 'exited',
1045
+ thread: worker?.threadId
1046
+ }
711
1047
  }
712
- runtimesLogsIds.push({
713
- pid: runtime.runtimePID,
714
- indexes: runtimeLogIds
715
- })
716
1048
  }
717
1049
 
718
- return runtimesLogsIds
1050
+ return status
1051
+ }
1052
+
1053
+ async getApplicationMeta (id) {
1054
+ const application = await this.#getApplicationById(id)
1055
+
1056
+ try {
1057
+ return await sendViaITC(application, 'getApplicationMeta')
1058
+ } catch (e) {
1059
+ // The application exports no meta, return an empty object
1060
+ if (e.code === 'PLT_ITC_HANDLER_NOT_FOUND') {
1061
+ return {}
1062
+ }
1063
+
1064
+ throw e
1065
+ }
1066
+ }
1067
+
1068
+ async getApplicationDetails (id, allowUnloaded = false) {
1069
+ let application
1070
+
1071
+ try {
1072
+ application = await this.#getApplicationById(id)
1073
+ } catch (e) {
1074
+ if (allowUnloaded) {
1075
+ return { id, status: 'stopped' }
1076
+ }
1077
+
1078
+ throw e
1079
+ }
1080
+
1081
+ const { entrypoint, localUrl } = application[kConfig]
1082
+
1083
+ const status = await sendViaITC(application, 'getStatus')
1084
+ const { type, version, dependencies } = await sendViaITC(application, 'getApplicationInfo')
1085
+
1086
+ const applicationDetails = {
1087
+ id,
1088
+ type,
1089
+ status,
1090
+ dependencies,
1091
+ version,
1092
+ localUrl,
1093
+ entrypoint
1094
+ }
1095
+
1096
+ if (this.#isProduction) {
1097
+ applicationDetails.workers = this.#workers.getCount(id)
1098
+ }
1099
+
1100
+ if (entrypoint) {
1101
+ applicationDetails.url = status === 'started' ? this.#url : null
1102
+ }
1103
+
1104
+ return applicationDetails
1105
+ }
1106
+
1107
+ async getApplication (id, ensureStarted = true) {
1108
+ return this.#getApplicationById(id, ensureStarted)
1109
+ }
1110
+
1111
+ async getApplicationConfig (id, ensureStarted = true) {
1112
+ const application = await this.#getApplicationById(id, ensureStarted)
1113
+
1114
+ return sendViaITC(application, 'getApplicationConfig')
1115
+ }
1116
+
1117
+ async getApplicationEnv (id, ensureStarted = true) {
1118
+ const application = await this.#getApplicationById(id, ensureStarted)
1119
+
1120
+ return sendViaITC(application, 'getApplicationEnv')
719
1121
  }
720
1122
 
721
- async getLogFileStream (logFileId, runtimePID) {
722
- const runtimeLogsDir = this.#getRuntimeLogsDir(runtimePID)
723
- const filePath = join(runtimeLogsDir, `logs.${logFileId}`)
724
- return createReadStream(filePath)
1123
+ async getApplicationOpenapiSchema (id) {
1124
+ const application = await this.#getApplicationById(id, true)
1125
+
1126
+ return sendViaITC(application, 'getApplicationOpenAPISchema')
725
1127
  }
726
1128
 
727
- #updateStatus (status) {
1129
+ async getApplicationGraphqlSchema (id) {
1130
+ const application = await this.#getApplicationById(id, true)
1131
+
1132
+ return sendViaITC(application, 'getApplicationGraphQLSchema')
1133
+ }
1134
+
1135
+ #getHttpCacheValue ({ request }) {
1136
+ if (!this.#sharedHttpCache) {
1137
+ return
1138
+ }
1139
+
1140
+ return this.#sharedHttpCache.getValue(request)
1141
+ }
1142
+
1143
+ #setHttpCacheValue ({ request, response, payload }) {
1144
+ if (!this.#sharedHttpCache) {
1145
+ return
1146
+ }
1147
+
1148
+ return this.#sharedHttpCache.setValue(request, response, payload)
1149
+ }
1150
+
1151
+ #deleteHttpCacheValue ({ request }) {
1152
+ if (!this.#sharedHttpCache) {
1153
+ return
1154
+ }
1155
+
1156
+ return this.#sharedHttpCache.delete(request)
1157
+ }
1158
+
1159
+ async #setDispatcher (undiciConfig) {
1160
+ const config = this.#config
1161
+
1162
+ const dispatcherOpts = { ...undiciConfig }
1163
+ const interceptors = [this.#meshInterceptor]
1164
+
1165
+ if (config.httpCache) {
1166
+ this.#sharedHttpCache = await createSharedStore(this.#root, config.httpCache)
1167
+ interceptors.push(
1168
+ undiciInterceptors.cache({
1169
+ store: this.#sharedHttpCache,
1170
+ methods: config.httpCache.methods ?? ['GET', 'HEAD']
1171
+ })
1172
+ )
1173
+ }
1174
+ this.#dispatcher = new Agent(dispatcherOpts).compose(interceptors)
1175
+ }
1176
+
1177
+ #updateStatus (status, args) {
728
1178
  this.#status = status
729
- this.emit(status)
1179
+ this.emit(status, args)
730
1180
  }
731
1181
 
732
1182
  #showUrl () {
733
1183
  this.logger.info(`Platformatic is now listening at ${this.#url}`)
734
1184
  }
735
1185
 
736
- async #setupService (serviceConfig) {
737
- if (this.#status === 'stopping' || this.#status === 'closed') return
1186
+ async #setupApplications () {
1187
+ const config = this.#config
1188
+ const setupInvocations = []
1189
+
1190
+ // Parse all applications and verify we're not missing any path or resolved application
1191
+ for (const applicationConfig of config.applications) {
1192
+ // If there is no application path, check if the application was resolved
1193
+ if (!applicationConfig.path) {
1194
+ if (applicationConfig.url) {
1195
+ // Try to backfill the path for external applications
1196
+ applicationConfig.path = join(this.#root, config.resolvedApplicationsBasePath, applicationConfig.id)
1197
+
1198
+ if (!existsSync(applicationConfig.path)) {
1199
+ const executable = globalThis.platformatic?.executable ?? 'platformatic'
1200
+ this.logger.error(
1201
+ `The path for application "%s" does not exist. Please run "${executable} resolve" and try again.`,
1202
+ applicationConfig.id
1203
+ )
1204
+
1205
+ await this.closeAndThrow(new RuntimeAbortedError())
1206
+ }
1207
+ } else {
1208
+ this.logger.error(
1209
+ 'The application "%s" has no path defined. Please check your configuration and try again.',
1210
+ applicationConfig.id
1211
+ )
738
1212
 
739
- const config = this.#configManager.current
740
- const { autoload, restartOnError } = config
1213
+ await this.closeAndThrow(new RuntimeAbortedError())
1214
+ }
1215
+ }
741
1216
 
742
- const id = serviceConfig.id
743
- const { port1: loggerDestination, port2: loggingPort } = new MessageChannel()
744
- loggerDestination.on('message', this.#forwardThreadLog.bind(this))
1217
+ setupInvocations.push([applicationConfig])
1218
+ }
1219
+
1220
+ await executeInParallel(this.#setupApplication.bind(this), setupInvocations, this.#concurrency)
1221
+ }
745
1222
 
746
- if (!this.#bootstrapAttempts.has(id)) {
747
- this.#bootstrapAttempts.set(id, 0)
1223
+ async #setupApplication (applicationConfig) {
1224
+ if (this.#status === 'stopping' || this.#status === 'closed') {
1225
+ return
1226
+ }
1227
+
1228
+ const config = this.#config
1229
+ const workersCount = await this.#workers.getCount(applicationConfig.id)
1230
+ const id = applicationConfig.id
1231
+ const setupInvocations = []
1232
+
1233
+ for (let i = 0; i < workersCount; i++) {
1234
+ setupInvocations.push([config, applicationConfig, workersCount, id, i])
1235
+ }
1236
+
1237
+ await executeInParallel(this.#setupWorker.bind(this), setupInvocations, this.#concurrency)
1238
+
1239
+ this.emit('application:init', id)
1240
+ }
1241
+
1242
+ async #setupWorker (config, applicationConfig, workersCount, applicationId, index, enabled = true) {
1243
+ const { restartOnError } = config
1244
+ const workerId = `${applicationId}:${index}`
1245
+
1246
+ // Handle inspector
1247
+ let inspectorOptions
1248
+
1249
+ if (this.#config.inspectorOptions) {
1250
+ inspectorOptions = {
1251
+ ...this.#config.inspectorOptions
1252
+ }
1253
+
1254
+ inspectorOptions.port = inspectorOptions.port + this.#workers.size + 1
748
1255
  }
749
1256
 
750
- const service = new Worker(kWorkerFile, {
1257
+ if (config.telemetry) {
1258
+ applicationConfig.telemetry = {
1259
+ ...config.telemetry,
1260
+ ...applicationConfig.telemetry,
1261
+ applicationName: `${config.telemetry.applicationName}-${applicationConfig.id}`
1262
+ }
1263
+ }
1264
+
1265
+ const errorLabel = this.#workerExtendedLabel(applicationId, index, workersCount)
1266
+ const health = deepmerge(config.health ?? {}, applicationConfig.health ?? {})
1267
+
1268
+ const execArgv = []
1269
+
1270
+ if (!applicationConfig.skipTelemetryHooks && config.telemetry && config.telemetry.enabled !== false) {
1271
+ const require = createRequire(import.meta.url)
1272
+ const telemetryPath = require.resolve('@platformatic/telemetry')
1273
+ const openTelemetrySetupPath = join(telemetryPath, '..', 'lib', 'node-telemetry.js')
1274
+ const hookUrl = pathToFileURL(require.resolve('@opentelemetry/instrumentation/hook.mjs'))
1275
+
1276
+ // We need the following because otherwise some open telemetry instrumentations won't work with ESM (like express)
1277
+ // see: https://github.com/open-telemetry/opentelemetry-js/blob/main/doc/esm-support.md#instrumentation-hook-required-for-esm
1278
+ execArgv.push('--import', `data:text/javascript, import { register } from 'node:module'; register('${hookUrl}')`)
1279
+ execArgv.push('--import', pathToFileURL(openTelemetrySetupPath))
1280
+ }
1281
+
1282
+ if ((applicationConfig.sourceMaps ?? config.sourceMaps) === true) {
1283
+ execArgv.push('--enable-source-maps')
1284
+ }
1285
+
1286
+ const workerEnv = structuredClone(this.#env)
1287
+
1288
+ if (applicationConfig.nodeOptions?.trim().length > 0) {
1289
+ const originalNodeOptions = workerEnv['NODE_OPTIONS'] ?? ''
1290
+
1291
+ workerEnv['NODE_OPTIONS'] = `${originalNodeOptions} ${applicationConfig.nodeOptions}`.trim()
1292
+ }
1293
+
1294
+ const maxHeapTotal =
1295
+ typeof health.maxHeapTotal === 'string' ? parseMemorySize(health.maxHeapTotal) : health.maxHeapTotal
1296
+ const maxYoungGeneration =
1297
+ typeof health.maxYoungGeneration === 'string'
1298
+ ? parseMemorySize(health.maxYoungGeneration)
1299
+ : health.maxYoungGeneration
1300
+
1301
+ const maxOldGenerationSizeMb = Math.floor(
1302
+ (maxYoungGeneration > 0 ? maxHeapTotal - maxYoungGeneration : maxHeapTotal) / (1024 * 1024)
1303
+ )
1304
+ const maxYoungGenerationSizeMb = maxYoungGeneration ? Math.floor(maxYoungGeneration / (1024 * 1024)) : undefined
1305
+
1306
+ const worker = new Worker(kWorkerFile, {
751
1307
  workerData: {
752
1308
  config,
753
- serviceConfig: {
754
- ...serviceConfig,
755
- isProduction: this.#configManager.args?.production ?? false
1309
+ applicationConfig: {
1310
+ ...applicationConfig,
1311
+ isProduction: this.#isProduction,
1312
+ configPatch: this.#applicationsConfigsPatches.get(applicationId)
1313
+ },
1314
+ worker: {
1315
+ id: workerId,
1316
+ index,
1317
+ count: workersCount
756
1318
  },
757
- dirname: this.#configManager.dirname,
758
- runtimeLogsDir: this.#runtimeLogsDir,
759
- loggingPort
1319
+ inspectorOptions,
1320
+ dirname: this.#root
1321
+ },
1322
+ argv: applicationConfig.arguments,
1323
+ execArgv,
1324
+ env: workerEnv,
1325
+ resourceLimits: {
1326
+ maxOldGenerationSizeMb,
1327
+ maxYoungGenerationSizeMb
760
1328
  },
761
- execArgv: [], // Avoid side effects
762
- env: this.#env,
763
- transferList: [loggingPort],
764
- /*
765
- Important: always set stdout and stderr to true, so that worker's output is not automatically
766
- piped to the parent thread. We actually never output the thread output since we replace it
767
- with PinoWritable, and disabling the piping avoids us to redeclare some internal Node.js methods.
768
-
769
- The author of this (Paolo and Matteo) are not proud of the solution. Forgive us.
770
- */
771
1329
  stdout: true,
772
1330
  stderr: true
773
1331
  })
774
1332
 
1333
+ this.#handleWorkerStandardStreams(worker, applicationId, workersCount > 1 ? index : undefined)
1334
+
775
1335
  // Make sure the listener can handle a lot of API requests at once before raising a warning
776
- service.setMaxListeners(1e3)
1336
+ worker.setMaxListeners(1e3)
1337
+
1338
+ // Track application exiting
1339
+ const eventPayload = { application: applicationId, worker: index, workersCount }
777
1340
 
778
- // Track service exiting
779
- service.once('exit', code => {
780
- const started = this.#startedServices.get(id)
781
- this.#services.delete(id)
782
- loggerDestination.close()
783
- service[kITC].close()
784
- loggingPort.close()
1341
+ worker.once('exit', code => {
1342
+ if (worker[kWorkerStatus] === 'exited') {
1343
+ return
1344
+ }
785
1345
 
786
- if (this.#status === 'stopping') return
1346
+ const started = worker[kWorkerStatus] === 'started'
1347
+ worker[kWorkerStatus] = 'exited'
1348
+ this.emit('application:worker:exited', eventPayload)
1349
+
1350
+ this.#cleanupWorker(worker)
1351
+
1352
+ if (this.#status === 'stopping') {
1353
+ return
1354
+ }
787
1355
 
788
1356
  // Wait for the next tick so that crashed from the thread are logged first
789
1357
  setImmediate(() => {
790
- if (!config.watch || code !== 0) {
791
- this.logger.warn(`Service "${id}" unexpectedly exited with code ${code}.`)
1358
+ if (started && (!config.watch || code !== 0)) {
1359
+ this.emit('application:worker:error', { ...eventPayload, code })
1360
+ this.#broadcastWorkers()
1361
+
1362
+ this.logger.warn(`The ${errorLabel} unexpectedly exited with code ${code}.`)
792
1363
  }
793
1364
 
794
- // Restart the service if it was started
1365
+ // Restart the application if it was started
795
1366
  if (started && this.#status === 'started') {
796
1367
  if (restartOnError > 0) {
797
- this.logger.warn(`Restarting a service "${id}" in ${restartOnError}ms...`)
798
- this.#restartCrashedService(id).catch(err => {
799
- this.logger.error({ err: ensureLoggableError(err) }, `Failed to restart service "${id}".`)
800
- })
1368
+ if (restartOnError < IMMEDIATE_RESTART_MAX_THRESHOLD) {
1369
+ this.logger.warn(`The ${errorLabel} is being restarted ...`)
1370
+ } else {
1371
+ this.logger.warn(`The ${errorLabel} will be restarted in ${restartOnError}ms ...`)
1372
+ }
1373
+
1374
+ this.#restartCrashedWorker(config, applicationConfig, workersCount, applicationId, index, false, 0).catch(
1375
+ err => {
1376
+ this.logger.error({ err: ensureLoggableError(err) }, `${errorLabel} could not be restarted.`)
1377
+ }
1378
+ )
801
1379
  } else {
802
- this.logger.warn(`The "${id}" service is no longer available.`)
1380
+ this.emit('application:worker:unvailable', eventPayload)
1381
+ this.logger.warn(`The ${errorLabel} is no longer available.`)
803
1382
  }
804
1383
  }
805
1384
  })
806
1385
  })
807
1386
 
808
- service[kId] = id
809
- service[kConfig] = serviceConfig
1387
+ worker[kId] = workersCount > 1 ? workerId : applicationId
1388
+ worker[kFullId] = workerId
1389
+ worker[kApplicationId] = applicationId
1390
+ worker[kWorkerId] = workersCount > 1 ? index : undefined
1391
+ worker[kWorkerStatus] = 'boot'
810
1392
 
811
- // Setup ITC
812
- service[kITC] = new ITC({
813
- name: id + '-runtime',
814
- port: service,
815
- handlers: {
816
- getServiceMeta: this.getServiceMeta.bind(this),
817
- getServices: this.getServices.bind(this)
1393
+ if (inspectorOptions) {
1394
+ worker[kInspectorOptions] = {
1395
+ port: inspectorOptions.port,
1396
+ id: applicationId,
1397
+ dirname: this.#root
818
1398
  }
1399
+ }
1400
+
1401
+ // Setup ITC
1402
+ worker[kITC] = new ITC({
1403
+ name: workerId + '-runtime',
1404
+ port: worker,
1405
+ handlers: this.#workerITCHandlers
1406
+ })
1407
+ worker[kITC].listen()
1408
+
1409
+ // Forward events from the worker
1410
+ worker[kITC].on('event', ({ event, payload }) => {
1411
+ this.emit(`application:worker:event:${event}`, { ...eventPayload, payload })
819
1412
  })
820
- service[kITC].listen()
821
1413
 
822
- // Handle services changes
823
- // This is not purposely activated on when this.#configManager.current.watch === true
824
- // so that services can eventually manually trigger a restart. This mechanism is current
825
- // used by the composer
826
- service[kITC].on('changed', async () => {
1414
+ // Only activate watch for the first instance
1415
+ if (index === 0) {
1416
+ // Handle applications changes
1417
+ // This is not purposely activated on when this.#config.watch === true
1418
+ // so that applications can eventually manually trigger a restart. This mechanism is current
1419
+ // used by the gateway.
1420
+ worker[kITC].on('changed', async () => {
1421
+ this.emit('application:worker:changed', eventPayload)
1422
+
1423
+ try {
1424
+ const wasStarted = worker[kWorkerStatus].startsWith('start')
1425
+ await this.stopApplication(applicationId)
1426
+
1427
+ if (wasStarted) {
1428
+ await this.startApplication(applicationId)
1429
+ }
1430
+
1431
+ this.logger.info(`The application "${applicationId}" has been successfully reloaded ...`)
1432
+ this.emit('application:worker:reloaded', eventPayload)
1433
+
1434
+ if (applicationConfig.entrypoint) {
1435
+ this.#showUrl()
1436
+ }
1437
+ } catch (e) {
1438
+ this.logger.error(e)
1439
+ }
1440
+ })
1441
+ }
1442
+
1443
+ if (enabled) {
1444
+ // Store locally
1445
+ this.#workers.set(workerId, worker)
1446
+
1447
+ // Setup the interceptor
1448
+ this.#meshInterceptor.route(applicationId, worker)
1449
+ }
1450
+
1451
+ // Wait for initialization
1452
+ await waitEventFromITC(worker, 'init')
1453
+
1454
+ if (applicationConfig.entrypoint) {
1455
+ this.#entrypointId = applicationId
1456
+ }
1457
+
1458
+ worker[kConfig] = { ...applicationConfig, health, workers: workersCount }
1459
+ worker[kWorkerStatus] = 'init'
1460
+ this.emit('application:worker:init', eventPayload)
1461
+
1462
+ return worker
1463
+ }
1464
+
1465
+ async #getHealth (worker) {
1466
+ if (features.node.worker.getHeapStatistics) {
1467
+ const { used_heap_size: heapUsed, total_heap_size: heapTotal } = await worker.getHeapStatistics()
1468
+ const currentELU = worker.performance.eventLoopUtilization()
1469
+ const elu = worker[kLastELU] ? worker.performance.eventLoopUtilization(currentELU, worker[kLastELU]) : currentELU
1470
+ worker[kLastELU] = currentELU
1471
+ return { elu: elu.utilization, heapUsed, heapTotal }
1472
+ }
1473
+
1474
+ const health = await worker[kITC].send('getHealth')
1475
+ return health
1476
+ }
1477
+
1478
+ #setupHealthCheck (config, applicationConfig, workersCount, id, index, worker, errorLabel) {
1479
+ // Clear the timeout when exiting
1480
+ worker.on('exit', () => clearTimeout(worker[kHealthCheckTimer]))
1481
+
1482
+ const { maxELU, maxHeapUsed, maxHeapTotal, maxUnhealthyChecks, interval } = worker[kConfig].health
1483
+ const maxHeapTotalNumber = typeof maxHeapTotal === 'string' ? parseMemorySize(maxHeapTotal) : maxHeapTotal
1484
+
1485
+ let unhealthyChecks = 0
1486
+
1487
+ worker[kHealthCheckTimer] = setTimeout(async () => {
1488
+ if (worker[kWorkerStatus] !== 'started') {
1489
+ return
1490
+ }
1491
+
1492
+ let health, unhealthy, memoryUsage
827
1493
  try {
828
- const wasStarted = this.#startedServices.get(id)
1494
+ health = await this.#getHealth(worker)
1495
+ memoryUsage = health.heapUsed / maxHeapTotalNumber
1496
+ unhealthy = health.elu > maxELU || memoryUsage > maxHeapUsed
1497
+ } catch (err) {
1498
+ this.logger.error({ err }, `Failed to get health for ${errorLabel}.`)
1499
+ unhealthy = true
1500
+ memoryUsage = -1
1501
+ health = { elu: -1, heapUsed: -1, heapTotal: -1 }
1502
+ }
829
1503
 
830
- await this._stopService(id)
1504
+ this.emit('application:worker:health', {
1505
+ id: worker[kId],
1506
+ application: id,
1507
+ worker: index,
1508
+ currentHealth: health,
1509
+ unhealthy,
1510
+ healthConfig: worker[kConfig].health
1511
+ })
1512
+
1513
+ if (unhealthy) {
1514
+ if (health.elu > maxELU) {
1515
+ this.logger.error(
1516
+ `The ${errorLabel} has an ELU of ${(health.elu * 100).toFixed(2)} %, above the maximum allowed usage of ${(maxELU * 100).toFixed(2)} %.`
1517
+ )
1518
+ }
831
1519
 
832
- if (wasStarted) {
833
- await this.startService(id)
1520
+ if (memoryUsage > maxHeapUsed) {
1521
+ this.logger.error(
1522
+ `The ${errorLabel} is using ${(memoryUsage * 100).toFixed(2)} % of the memory, above the maximum allowed usage of ${(maxHeapUsed * 100).toFixed(2)} %.`
1523
+ )
834
1524
  }
835
1525
 
836
- this.logger?.info(`Service ${id} has been successfully reloaded ...`)
1526
+ unhealthyChecks++
1527
+ } else {
1528
+ unhealthyChecks = 0
1529
+ }
1530
+
1531
+ if (unhealthyChecks === maxUnhealthyChecks) {
1532
+ try {
1533
+ this.emit('application:worker:unhealthy', { application: id, worker: index })
837
1534
 
838
- if (serviceConfig.entrypoint) {
839
- this.#showUrl()
1535
+ this.logger.error(
1536
+ { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1537
+ `The ${errorLabel} is unhealthy. Replacing it ...`
1538
+ )
1539
+
1540
+ await this.#replaceWorker(config, applicationConfig, workersCount, id, index, worker)
1541
+ } catch (e) {
1542
+ this.logger.error(
1543
+ { elu: health.elu, maxELU, memoryUsage: health.heapUsed, maxMemoryUsage: maxHeapUsed },
1544
+ `Cannot replace the ${errorLabel}. Forcefully terminating it ...`
1545
+ )
1546
+
1547
+ worker.terminate()
840
1548
  }
841
- } catch (e) {
842
- this.logger?.error(e)
1549
+ } else {
1550
+ worker[kHealthCheckTimer].refresh()
843
1551
  }
844
- })
1552
+ }, interval)
1553
+ }
845
1554
 
846
- // Store locally
847
- this.#services.set(id, service)
1555
+ async #startWorker (
1556
+ config,
1557
+ applicationConfig,
1558
+ workersCount,
1559
+ id,
1560
+ index,
1561
+ silent,
1562
+ bootstrapAttempt = 0,
1563
+ worker = undefined,
1564
+ disableRestartAttempts = false
1565
+ ) {
1566
+ const label = this.#workerExtendedLabel(id, index, workersCount)
848
1567
 
849
- if (serviceConfig.entrypoint) {
850
- this.#entrypoint = service
851
- this.#entrypointId = id
1568
+ if (!silent) {
1569
+ this.logger.info(`Starting the ${label}...`)
852
1570
  }
853
1571
 
854
- // Setup the interceptor
855
- this.#interceptor.route(id, service)
1572
+ if (!worker) {
1573
+ worker = await this.#getWorkerById(id, index, false, false)
1574
+ }
1575
+
1576
+ const eventPayload = { application: id, worker: index, workersCount }
1577
+
1578
+ // The application was stopped, recreate the thread
1579
+ if (!worker) {
1580
+ await this.#setupApplication(applicationConfig, index)
1581
+ worker = await this.#getWorkerById(id, index)
1582
+ }
856
1583
 
857
- // Store dependencies
858
- const [{ dependencies }] = await waitEventFromITC(service, 'init')
1584
+ worker[kWorkerStatus] = 'starting'
1585
+ this.emit('application:worker:starting', eventPayload)
859
1586
 
860
- if (autoload) {
861
- serviceConfig.dependencies = dependencies
862
- for (const { envVar, url } of dependencies) {
863
- if (envVar) {
864
- serviceConfig.localServiceEnvVars.set(envVar, url)
1587
+ try {
1588
+ let workerUrl
1589
+ if (config.startTimeout > 0) {
1590
+ workerUrl = await executeWithTimeout(sendViaITC(worker, 'start'), config.startTimeout)
1591
+
1592
+ if (workerUrl === kTimeout) {
1593
+ this.emit('application:worker:startTimeout', eventPayload)
1594
+ this.logger.info(`The ${label} failed to start in ${config.startTimeout}ms. Forcefully killing the thread.`)
1595
+ worker.terminate()
1596
+ throw new ApplicationStartTimeoutError(id, config.startTimeout)
865
1597
  }
1598
+ } else {
1599
+ workerUrl = await sendViaITC(worker, 'start')
1600
+ }
1601
+
1602
+ await this.#avoidOutOfOrderThreadLogs()
1603
+
1604
+ if (workerUrl) {
1605
+ this.#url = workerUrl
1606
+ }
1607
+
1608
+ worker[kWorkerStatus] = 'started'
1609
+ this.emit('application:worker:started', eventPayload)
1610
+ this.#broadcastWorkers()
1611
+
1612
+ if (!silent) {
1613
+ this.logger.info(`Started the ${label}...`)
1614
+ }
1615
+
1616
+ const { enabled, gracePeriod } = worker[kConfig].health
1617
+ if (enabled && config.restartOnError > 0) {
1618
+ // if gracePeriod is 0, it will be set to 1 to start health checks immediately
1619
+ // however, the health event will start when the worker is started
1620
+ setTimeout(
1621
+ () => {
1622
+ this.#setupHealthCheck(config, applicationConfig, workersCount, id, index, worker, label)
1623
+ },
1624
+ gracePeriod > 0 ? gracePeriod : 1
1625
+ ).unref()
866
1626
  }
1627
+ } catch (err) {
1628
+ const error = ensureError(err)
1629
+ worker[kITC].notify('application:worker:start:processed')
1630
+
1631
+ // TODO: handle port allocation error here
1632
+ if (error.code === 'EADDRINUSE' || error.code === 'EACCES') throw error
1633
+
1634
+ this.#cleanupWorker(worker)
1635
+
1636
+ if (worker[kWorkerStatus] !== 'exited') {
1637
+ // This prevent the exit handler to restart application
1638
+ worker[kWorkerStatus] = 'exited'
1639
+
1640
+ // Wait for the worker to exit gracefully, otherwise we terminate it
1641
+ const waitTimeout = await executeWithTimeout(once(worker, 'exit'), config.gracefulShutdown.application)
1642
+
1643
+ if (waitTimeout === kTimeout) {
1644
+ await worker.terminate()
1645
+ }
1646
+ }
1647
+
1648
+ this.emit('application:worker:start:error', { ...eventPayload, error })
1649
+
1650
+ if (error.code !== 'PLT_RUNTIME_APPLICATION_START_TIMEOUT') {
1651
+ this.logger.error({ err: ensureLoggableError(error) }, `Failed to start ${label}: ${error.message}`)
1652
+ }
1653
+
1654
+ const restartOnError = config.restartOnError
1655
+
1656
+ if (disableRestartAttempts || !restartOnError) {
1657
+ throw error
1658
+ }
1659
+
1660
+ if (bootstrapAttempt++ >= MAX_BOOTSTRAP_ATTEMPTS || restartOnError === 0) {
1661
+ this.logger.error(`Failed to start ${label} after ${MAX_BOOTSTRAP_ATTEMPTS} attempts.`)
1662
+ this.emit('application:worker:start:failed', { ...eventPayload, error })
1663
+ throw error
1664
+ }
1665
+
1666
+ if (restartOnError < IMMEDIATE_RESTART_MAX_THRESHOLD) {
1667
+ this.logger.warn(
1668
+ `Performing attempt ${bootstrapAttempt} of ${MAX_BOOTSTRAP_ATTEMPTS} to start the ${label} again ...`
1669
+ )
1670
+ } else {
1671
+ this.logger.warn(
1672
+ `Attempt ${bootstrapAttempt} of ${MAX_BOOTSTRAP_ATTEMPTS} to start the ${label} again will be performed in ${restartOnError}ms ...`
1673
+ )
1674
+ }
1675
+
1676
+ await this.#restartCrashedWorker(config, applicationConfig, workersCount, id, index, silent, bootstrapAttempt)
867
1677
  }
868
1678
  }
869
1679
 
870
- async #restartCrashedService (id) {
871
- const config = this.#configManager.current
872
- const serviceConfig = config.services.find(s => s.id === id)
1680
+ async #stopWorker (workersCount, id, index, silent, worker, dependents) {
1681
+ if (!worker) {
1682
+ worker = await this.#getWorkerById(id, index, false, false)
1683
+ }
873
1684
 
874
- let restartPromise = this.#restartPromises.get(id)
1685
+ if (!worker) {
1686
+ return
1687
+ }
1688
+
1689
+ // Boot should be aborted, discard the worker
1690
+ if (worker[kWorkerStatus] === 'boot') {
1691
+ return this.#discardWorker(worker)
1692
+ }
1693
+
1694
+ const eventPayload = { application: id, worker: index, workersCount }
1695
+
1696
+ worker[kWorkerStatus] = 'stopping'
1697
+ worker[kITC].removeAllListeners('changed')
1698
+ this.emit('application:worker:stopping', eventPayload)
1699
+
1700
+ const label = this.#workerExtendedLabel(id, index, workersCount)
1701
+
1702
+ if (!silent) {
1703
+ this.logger.info(`Stopping the ${label}...`)
1704
+ }
1705
+
1706
+ const exitTimeout = this.#config.gracefulShutdown.application
1707
+ const exitPromise = once(worker, 'exit')
1708
+
1709
+ // Always send the stop message, it will shut down workers that only had ITC and interceptors setup
1710
+ try {
1711
+ await executeWithTimeout(sendViaITC(worker, 'stop', { force: !!this.error, dependents }), exitTimeout)
1712
+ } catch (error) {
1713
+ this.emit('application:worker:stop:error', eventPayload)
1714
+ this.logger.info({ error: ensureLoggableError(error) }, `Failed to stop ${label}. Killing a worker thread.`)
1715
+ } finally {
1716
+ worker[kITC].notify('application:worker:stop:processed')
1717
+ // Wait for the processed message to be received
1718
+ await sleep(1)
1719
+
1720
+ worker[kITC].close()
1721
+ }
1722
+
1723
+ if (!silent) {
1724
+ this.logger.info(`Stopped the ${label}...`)
1725
+ }
1726
+
1727
+ // Wait for the worker thread to finish, we're going to create a new one if the application is ever restarted
1728
+ const res = await executeWithTimeout(exitPromise, exitTimeout)
1729
+
1730
+ // If the worker didn't exit in time, kill it
1731
+ if (res === kTimeout) {
1732
+ this.emit('application:worker:exit:timeout', eventPayload)
1733
+ await worker.terminate()
1734
+ }
1735
+
1736
+ await this.#avoidOutOfOrderThreadLogs()
1737
+
1738
+ worker[kWorkerStatus] = 'stopped'
1739
+ this.emit('application:worker:stopped', eventPayload)
1740
+ this.#broadcastWorkers()
1741
+ }
1742
+
1743
+ #cleanupWorker (worker) {
1744
+ clearTimeout(worker[kHealthCheckTimer])
1745
+
1746
+ const currentWorker = this.#workers.get(worker[kFullId])
1747
+
1748
+ if (currentWorker === worker) {
1749
+ this.#workers.delete(worker[kFullId])
1750
+ }
1751
+
1752
+ worker[kITC].close()
1753
+ }
1754
+
1755
+ async #discardWorker (worker) {
1756
+ this.#meshInterceptor.unroute(worker[kApplicationId], worker, true)
1757
+ worker.removeAllListeners('exit')
1758
+ await worker.terminate()
1759
+
1760
+ return this.#cleanupWorker(worker)
1761
+ }
1762
+
1763
+ #workerExtendedLabel (applicationId, workerId, workersCount) {
1764
+ return workersCount > 1
1765
+ ? `worker ${workerId} of the application "${applicationId}"`
1766
+ : `application "${applicationId}"`
1767
+ }
1768
+
1769
+ async #restartCrashedWorker (config, applicationConfig, workersCount, id, index, silent, bootstrapAttempt) {
1770
+ const workerId = `${id}:${index}`
1771
+
1772
+ let restartPromise = this.#restartingWorkers.get(workerId)
875
1773
  if (restartPromise) {
876
1774
  await restartPromise
877
1775
  return
878
1776
  }
879
1777
 
880
1778
  restartPromise = new Promise((resolve, reject) => {
881
- setTimeout(async () => {
882
- this.#restartPromises.delete(id)
1779
+ async function restart () {
1780
+ this.#restartingWorkers.delete(workerId)
883
1781
 
884
- try {
885
- await this.#setupService(serviceConfig)
1782
+ // If some processes were scheduled to restart
1783
+ // but the runtime is stopped, ignore it
1784
+ if (!this.#status.startsWith('start')) {
1785
+ return
1786
+ }
886
1787
 
887
- const started = this.#startedServices.get(id)
888
- if (started) {
889
- this.#startedServices.set(id, false)
890
- await this.startService(id)
891
- }
1788
+ try {
1789
+ await this.#setupWorker(config, applicationConfig, workersCount, id, index)
1790
+ await this.#startWorker(config, applicationConfig, workersCount, id, index, silent, bootstrapAttempt)
892
1791
 
1792
+ this.logger.info(
1793
+ `The ${this.#workerExtendedLabel(id, index, workersCount)} has been successfully restarted ...`
1794
+ )
893
1795
  resolve()
894
1796
  } catch (err) {
1797
+ // The runtime was stopped while the restart was happening, ignore any error.
1798
+ if (!this.#status.startsWith('start')) {
1799
+ resolve()
1800
+ }
1801
+
895
1802
  reject(err)
896
1803
  }
897
- }, config.restartOnError)
1804
+ }
1805
+
1806
+ if (config.restartOnError < IMMEDIATE_RESTART_MAX_THRESHOLD) {
1807
+ process.nextTick(restart.bind(this))
1808
+ } else {
1809
+ setTimeout(restart.bind(this), config.restartOnError)
1810
+ }
898
1811
  })
899
1812
 
900
- this.#restartPromises.set(id, restartPromise)
1813
+ this.#restartingWorkers.set(workerId, restartPromise)
901
1814
  await restartPromise
902
1815
  }
903
1816
 
904
- async #getServiceById (id, ensureStarted = false, mustExist = true) {
905
- const service = this.#services.get(id)
1817
+ async #replaceWorker (config, applicationConfig, workersCount, applicationId, index, worker) {
1818
+ const workerId = `${applicationId}:${index}`
1819
+ let newWorker
1820
+
1821
+ try {
1822
+ // Create a new worker
1823
+ newWorker = await this.#setupWorker(config, applicationConfig, workersCount, applicationId, index, false)
1824
+
1825
+ // Make sure the runtime hasn't been stopped in the meanwhile
1826
+ if (this.#status !== 'started') {
1827
+ return this.#discardWorker(newWorker)
1828
+ }
1829
+
1830
+ // Add the worker to the mesh
1831
+ await this.#startWorker(config, applicationConfig, workersCount, applicationId, index, false, 0, newWorker, true)
1832
+
1833
+ // Make sure the runtime hasn't been stopped in the meanwhile
1834
+ if (this.#status !== 'started') {
1835
+ return this.#discardWorker(newWorker)
1836
+ }
1837
+
1838
+ this.#workers.set(workerId, newWorker)
1839
+ this.#meshInterceptor.route(applicationId, newWorker)
1840
+
1841
+ // Remove the old worker and then kill it
1842
+ await sendViaITC(worker, 'removeFromMesh')
1843
+ } catch (e) {
1844
+ newWorker?.terminate?.()
1845
+ throw e
1846
+ }
1847
+
1848
+ await this.#stopWorker(workersCount, applicationId, index, false, worker, [])
1849
+ }
1850
+
1851
+ async #getApplicationById (applicationId, ensureStarted = false, mustExist = true) {
1852
+ // If the applicationId includes the worker, properly split
1853
+ let workerId
1854
+ const matched = applicationId.match(/^(.+):(\d+)$/)
1855
+
1856
+ if (matched) {
1857
+ applicationId = matched[1]
1858
+ workerId = matched[2]
1859
+ }
1860
+
1861
+ return this.#getWorkerById(applicationId, workerId, ensureStarted, mustExist)
1862
+ }
1863
+
1864
+ async #getWorkerById (applicationId, workerId, ensureStarted = false, mustExist = true) {
1865
+ let worker
1866
+
1867
+ if (typeof workerId !== 'undefined') {
1868
+ worker = this.#workers.get(`${applicationId}:${workerId}`)
1869
+ } else {
1870
+ worker = this.#workers.next(applicationId)
1871
+ }
1872
+
1873
+ const applicationsIds = this.getApplicationsIds()
906
1874
 
907
- if (!service) {
908
- if (!mustExist && this.#servicesIds.includes(id)) {
1875
+ if (!worker) {
1876
+ if (!mustExist && applicationsIds.includes(applicationId)) {
909
1877
  return null
910
1878
  }
911
1879
 
912
- throw new errors.ServiceNotFoundError(id, Array.from(this.#services.keys()).join(', '))
1880
+ if (applicationsIds.includes(applicationId)) {
1881
+ const availableWorkers = Array.from(this.#workers.keys())
1882
+ .filter(key => key.startsWith(applicationId + ':'))
1883
+ .map(key => key.split(':')[1])
1884
+ .join(', ')
1885
+ throw new WorkerNotFoundError(workerId, applicationId, availableWorkers)
1886
+ } else {
1887
+ throw new ApplicationNotFoundError(applicationId, applicationsIds.join(', '))
1888
+ }
913
1889
  }
914
1890
 
915
1891
  if (ensureStarted) {
916
- const serviceStatus = await sendViaITC(service, 'getStatus')
1892
+ const applicationStatus = await sendViaITC(worker, 'getStatus')
917
1893
 
918
- if (serviceStatus !== 'started') {
919
- throw new errors.ServiceNotStartedError(id)
1894
+ if (applicationStatus !== 'started') {
1895
+ throw new ApplicationNotStartedError(applicationId)
920
1896
  }
921
1897
  }
922
1898
 
923
- return service
1899
+ return worker
1900
+ }
1901
+
1902
+ async #createWorkersBroadcastChannel () {
1903
+ this.#workersBroadcastChannel?.close()
1904
+ this.#workersBroadcastChannel = new BroadcastChannel(kWorkersBroadcast)
1905
+ }
1906
+
1907
+ async #broadcastWorkers () {
1908
+ const workers = new Map()
1909
+
1910
+ // Create the list of workers
1911
+ for (const worker of this.#workers.values()) {
1912
+ if (worker[kWorkerStatus] !== 'started') {
1913
+ continue
1914
+ }
1915
+
1916
+ const application = worker[kApplicationId]
1917
+ let applicationWorkers = workers.get(application)
1918
+
1919
+ if (!applicationWorkers) {
1920
+ applicationWorkers = []
1921
+ workers.set(application, applicationWorkers)
1922
+ }
1923
+
1924
+ applicationWorkers.push({
1925
+ id: worker[kId],
1926
+ application: worker[kApplicationId],
1927
+ worker: worker[kWorkerId],
1928
+ thread: worker.threadId
1929
+ })
1930
+ }
1931
+
1932
+ try {
1933
+ this.#workersBroadcastChannel.postMessage(workers)
1934
+ } catch (err) {
1935
+ this.logger?.error({ err }, 'Error when broadcasting workers')
1936
+ }
1937
+ }
1938
+
1939
+ async #getWorkerMessagingChannel ({ application, worker }, context) {
1940
+ const target = await this.#getWorkerById(application, worker, true, true)
1941
+
1942
+ const { port1, port2 } = new MessageChannel()
1943
+
1944
+ // Send the first port to the target
1945
+ const response = await executeWithTimeout(
1946
+ sendViaITC(target, 'saveMessagingChannel', port1, [port1]),
1947
+ this.#config.messagingTimeout
1948
+ )
1949
+
1950
+ if (response === kTimeout) {
1951
+ throw new MessagingError(application, 'Timeout while establishing a communication channel.')
1952
+ }
1953
+
1954
+ context.transferList = [port2]
1955
+ this.emit('application:worker:messagingChannel', { application, worker })
1956
+ return port2
924
1957
  }
925
1958
 
926
1959
  async #getRuntimePackageJson () {
927
- const runtimeDir = this.#configManager.dirname
1960
+ const runtimeDir = this.#root
928
1961
  const packageJsonPath = join(runtimeDir, 'package.json')
929
1962
  const packageJsonFile = await readFile(packageJsonPath, 'utf8')
930
1963
  const packageJson = JSON.parse(packageJsonFile)
931
1964
  return packageJson
932
1965
  }
933
1966
 
934
- #getRuntimeLogsDir (runtimePID) {
935
- return join(this.#runtimeTmpDir, runtimePID.toString(), 'logs')
1967
+ #handleWorkerStandardStreams (worker, applicationId, workerId) {
1968
+ const binding = { name: applicationId }
1969
+
1970
+ if (typeof workerId !== 'undefined') {
1971
+ binding.worker = workerId
1972
+ }
1973
+
1974
+ const logger = this.logger.child(binding, { level: 'trace' })
1975
+
1976
+ const selectors = {
1977
+ stdout: { level: 'info', caller: 'STDOUT' },
1978
+ stderr: { level: 'error', caller: 'STDERR' }
1979
+ }
1980
+
1981
+ worker.stdout.setEncoding('utf8')
1982
+ worker.stdout.on('data', raw => {
1983
+ if (raw.includes(kStderrMarker)) {
1984
+ this.#forwardThreadLog(logger, selectors.stderr, raw.replaceAll(kStderrMarker, ''), 'stderr')
1985
+ } else {
1986
+ this.#forwardThreadLog(logger, selectors.stdout, raw, 'stdout')
1987
+ }
1988
+ })
1989
+
1990
+ // Whatever is outputted here, it come from a direct process.stderr.write in the thread.
1991
+ // There's nothing we can do about it in regard of out of order logs due to a Node bug.
1992
+ worker.stderr.setEncoding('utf8')
1993
+ worker.stderr.on('data', raw => {
1994
+ this.#forwardThreadLog(logger, selectors.stderr, raw, 'stderr')
1995
+ })
936
1996
  }
937
1997
 
938
- async #getRuntimeLogFiles (runtimePID) {
939
- const runtimeLogsDir = this.#getRuntimeLogsDir(runtimePID)
940
- const runtimeLogsFiles = await readdir(runtimeLogsDir)
941
- return runtimeLogsFiles
942
- .filter(file => file.startsWith('logs'))
943
- .sort((log1, log2) => {
944
- const index1 = parseInt(log1.slice('logs.'.length))
945
- const index2 = parseInt(log2.slice('logs.'.length))
946
- return index1 - index2
947
- })
1998
+ // label is the key in the logger object, either 'stdout' or 'stderr'
1999
+ #forwardThreadLog (logger, { level, caller }, data, label) {
2000
+ // When captureStdio is false, write directly to the logger destination
2001
+ if (!this.#config.logger.captureStdio) {
2002
+ this.#stdio[label].write(data)
2003
+ return
2004
+ }
2005
+
2006
+ let plainMessages = ''
2007
+ for (const raw of data.split('\n')) {
2008
+ // First of all, try to parse the message as JSON
2009
+ let message
2010
+ let json
2011
+ // The message is a JSON object if it has at least 2 bytes
2012
+ if (raw.length >= 2) {
2013
+ try {
2014
+ message = JSON.parse(raw)
2015
+ json = true
2016
+ } catch {
2017
+ // No-op, we assume the message is raw
2018
+ }
2019
+ }
2020
+
2021
+ const pinoLog =
2022
+ typeof message?.level === 'number' && typeof message?.time === 'number' && typeof message?.msg === 'string'
2023
+
2024
+ // Directly write to the Pino destination
2025
+ if (pinoLog) {
2026
+ if (!this.#loggerDestination) {
2027
+ continue
2028
+ }
2029
+
2030
+ this.#loggerDestination.lastLevel = message.level
2031
+ this.#loggerDestination.lastTime = message.time
2032
+ this.#loggerDestination.lastMsg = message.msg
2033
+ this.#loggerDestination.lastObj = message
2034
+ this.#loggerDestination.lastLogger = logger
2035
+ this.#loggerDestination.write(raw + '\n')
2036
+ continue
2037
+ }
2038
+
2039
+ if (json) {
2040
+ logger[level]({ caller, [label]: message })
2041
+ continue
2042
+ }
2043
+
2044
+ // Not a Pino JSON nor a JSON object, accumulate the message
2045
+ if (!pinoLog && !json) {
2046
+ plainMessages += (plainMessages.length ? '\n' : '') + raw
2047
+ }
2048
+ }
2049
+
2050
+ // Write whatever is left
2051
+ if (plainMessages.length > 0) {
2052
+ logger[level]({ caller }, plainMessages.replace(/\n$/, ''))
2053
+ }
948
2054
  }
949
2055
 
950
- async #getAllLogsFiles () {
951
- try {
952
- await access(this.#runtimeTmpDir)
953
- } catch (err) {
954
- this.logger.error({ err: ensureLoggableError(err) }, 'Cannot access temporary folder.')
955
- return []
2056
+ // Due to Worker Threads implementation via MessagePort, it might happen that if two messages are printed almost
2057
+ // at the same time from a worker and the main thread, the latter always arrives first.
2058
+ // Let's wait few more ticks to ensure the right order.
2059
+ async #avoidOutOfOrderThreadLogs () {
2060
+ for (let i = 0; i < 2; i++) {
2061
+ await immediate()
956
2062
  }
2063
+ }
957
2064
 
958
- const runtimePIDs = await readdir(this.#runtimeTmpDir)
959
- const runtimesLogFiles = []
2065
+ async #updateApplicationConfigWorkers (applicationId, workers) {
2066
+ this.logger.info(`Updating application "${applicationId}" config workers to ${workers}`)
960
2067
 
961
- for (const runtimePID of runtimePIDs) {
962
- const runtimeLogsDir = this.#getRuntimeLogsDir(runtimePID)
963
- const runtimeLogsDirStat = await stat(runtimeLogsDir)
964
- const runtimeLogFiles = await this.#getRuntimeLogFiles(runtimePID)
965
- const lastModified = runtimeLogsDirStat.mtime
2068
+ this.#config.applications.find(s => s.id === applicationId).workers = workers
2069
+ const application = await this.#getApplicationById(applicationId)
2070
+ this.#workers.setCount(applicationId, workers)
2071
+ application[kConfig].workers = workers
966
2072
 
967
- runtimesLogFiles.push({
968
- runtimePID: parseInt(runtimePID),
969
- runtimeLogFiles,
970
- lastModified
971
- })
2073
+ const promises = []
2074
+ for (const [workerId, worker] of this.#workers.entries()) {
2075
+ if (workerId.startsWith(`${applicationId}:`)) {
2076
+ promises.push(sendViaITC(worker, 'updateWorkersCount', { applicationId, workers }))
2077
+ }
972
2078
  }
973
2079
 
974
- return runtimesLogFiles.sort((runtime1, runtime2) => runtime1.lastModified - runtime2.lastModified)
2080
+ const results = await Promise.allSettled(promises)
2081
+ for (const result of results) {
2082
+ if (result.status === 'rejected') {
2083
+ this.logger.error({ err: result.reason }, `Cannot update application "${applicationId}" workers`)
2084
+ throw result.reason
2085
+ }
2086
+ }
975
2087
  }
976
2088
 
977
- #forwardThreadLog (message) {
978
- if (!this.#loggerDestination) {
979
- return
2089
+ async #updateApplicationConfigHealth (applicationId, health) {
2090
+ this.logger.info(`Updating application "${applicationId}" config health heap to ${JSON.stringify(health)}`)
2091
+ const { maxHeapTotal, maxYoungGeneration } = health
2092
+
2093
+ const application = this.#config.applications.find(s => s.id === applicationId)
2094
+ if (maxHeapTotal) {
2095
+ application.health.maxHeapTotal = maxHeapTotal
2096
+ }
2097
+ if (maxYoungGeneration) {
2098
+ application.health.maxYoungGeneration = maxYoungGeneration
2099
+ }
2100
+ }
2101
+
2102
+ async #validateUpdateApplicationResources (updates) {
2103
+ if (!Array.isArray(updates)) {
2104
+ throw new InvalidArgumentError('updates', 'must be an array')
2105
+ }
2106
+ if (updates.length === 0) {
2107
+ throw new InvalidArgumentError('updates', 'must have at least one element')
980
2108
  }
981
2109
 
982
- for (const log of message.logs) {
983
- // In order to being able to forward messages serialized in the
984
- // worker threads by directly writing to the destinations using multistream
985
- // we unfortunately need to reparse the message to set some internal flags
986
- // of the destination which are never set since we bypass pino.
987
- let message = JSON.parse(log)
988
- let { level, time, msg, raw } = message
2110
+ const config = this.#config
2111
+ const validatedUpdates = []
2112
+ for (const update of updates) {
2113
+ const { application: applicationId } = update
989
2114
 
990
- try {
991
- const parsed = JSON.parse(raw.trimEnd())
2115
+ if (!applicationId) {
2116
+ throw new InvalidArgumentError('application', 'must be a string')
2117
+ }
2118
+ const applicationConfig = config.applications.find(s => s.id === applicationId)
2119
+ if (!applicationConfig) {
2120
+ throw new ApplicationNotFoundError(applicationId, Array.from(this.getApplicationsIds()).join(', '))
2121
+ }
992
2122
 
993
- if (typeof parsed.level === 'number' && typeof parsed.time === 'number') {
994
- level = parsed.level
995
- time = parsed.time
996
- message = parsed
2123
+ const { workers: currentWorkers, health: currentHealth } = await this.getApplicationResourcesInfo(applicationId)
2124
+
2125
+ let workers
2126
+ if (update.workers !== undefined) {
2127
+ if (typeof update.workers !== 'number') {
2128
+ throw new InvalidArgumentError('workers', 'must be a number')
2129
+ }
2130
+ if (update.workers <= 0) {
2131
+ throw new InvalidArgumentError('workers', 'must be greater than 0')
2132
+ }
2133
+ if (update.workers > MAX_WORKERS) {
2134
+ throw new InvalidArgumentError('workers', `must be less than ${MAX_WORKERS}`)
2135
+ }
2136
+
2137
+ if (currentWorkers === update.workers) {
2138
+ this.logger.warn(
2139
+ { applicationId, workers: update.workers },
2140
+ 'No change in the number of workers for application'
2141
+ )
997
2142
  } else {
998
- message.raw = undefined
999
- message.payload = parsed
2143
+ workers = update.workers
2144
+ }
2145
+ }
2146
+
2147
+ let maxHeapTotal, maxYoungGeneration
2148
+ if (update.health) {
2149
+ if (update.health.maxHeapTotal !== undefined) {
2150
+ if (typeof update.health.maxHeapTotal === 'string') {
2151
+ try {
2152
+ maxHeapTotal = parseMemorySize(update.health.maxHeapTotal)
2153
+ } catch {
2154
+ throw new InvalidArgumentError('maxHeapTotal', 'must be a valid memory size')
2155
+ }
2156
+ } else if (typeof update.health.maxHeapTotal === 'number') {
2157
+ maxHeapTotal = update.health.maxHeapTotal
2158
+ if (update.health.maxHeapTotal <= 0) {
2159
+ throw new InvalidArgumentError('maxHeapTotal', 'must be greater than 0')
2160
+ }
2161
+ } else {
2162
+ throw new InvalidArgumentError('maxHeapTotal', 'must be a number or a string representing a memory size')
2163
+ }
2164
+
2165
+ if (currentHealth.maxHeapTotal === maxHeapTotal) {
2166
+ this.logger.warn({ applicationId, maxHeapTotal }, 'No change in the max heap total for application')
2167
+ maxHeapTotal = undefined
2168
+ }
2169
+ }
2170
+
2171
+ if (update.health.maxYoungGeneration !== undefined) {
2172
+ if (typeof update.health.maxYoungGeneration === 'string') {
2173
+ try {
2174
+ maxYoungGeneration = parseMemorySize(update.health.maxYoungGeneration)
2175
+ } catch {
2176
+ throw new InvalidArgumentError('maxYoungGeneration', 'must be a valid memory size')
2177
+ }
2178
+ } else if (typeof update.health.maxYoungGeneration === 'number') {
2179
+ maxYoungGeneration = update.health.maxYoungGeneration
2180
+ if (update.health.maxYoungGeneration <= 0) {
2181
+ throw new InvalidArgumentError('maxYoungGeneration', 'must be greater than 0')
2182
+ }
2183
+ } else {
2184
+ throw new InvalidArgumentError(
2185
+ 'maxYoungGeneration',
2186
+ 'must be a number or a string representing a memory size'
2187
+ )
2188
+ }
2189
+
2190
+ if (currentHealth.maxYoungGeneration && currentHealth.maxYoungGeneration === maxYoungGeneration) {
2191
+ this.logger.warn(
2192
+ { applicationId, maxYoungGeneration },
2193
+ 'No change in the max young generation for application'
2194
+ )
2195
+ maxYoungGeneration = undefined
2196
+ }
1000
2197
  }
1001
- } catch {
1002
- if (typeof message.raw === 'string') {
1003
- message.msg = message.raw.replace(/\n$/, '')
2198
+ }
2199
+
2200
+ if (workers || maxHeapTotal || maxYoungGeneration) {
2201
+ let health
2202
+ if (maxHeapTotal || maxYoungGeneration) {
2203
+ health = {}
2204
+ if (maxHeapTotal) {
2205
+ health.maxHeapTotal = maxHeapTotal
2206
+ }
2207
+ if (maxYoungGeneration) {
2208
+ health.maxYoungGeneration = maxYoungGeneration
2209
+ }
1004
2210
  }
2211
+ validatedUpdates.push({
2212
+ applicationId,
2213
+ config: applicationConfig,
2214
+ workers,
2215
+ health,
2216
+ currentWorkers,
2217
+ currentHealth
2218
+ })
2219
+ }
2220
+ }
2221
+
2222
+ return validatedUpdates
2223
+ }
2224
+
2225
+ async #updateApplicationWorkersAndHealth (
2226
+ applicationId,
2227
+ config,
2228
+ applicationConfig,
2229
+ workers,
2230
+ health,
2231
+ currentWorkers,
2232
+ currentHealth
2233
+ ) {
2234
+ if (currentWorkers > workers) {
2235
+ // stop workers
2236
+ const reportWorkers = await this.#updateApplicationWorkers(
2237
+ applicationId,
2238
+ config,
2239
+ applicationConfig,
2240
+ workers,
2241
+ currentWorkers
2242
+ )
2243
+ // update heap for current workers
2244
+ const reportHealth = await this.#updateApplicationHealth(
2245
+ applicationId,
2246
+ config,
2247
+ applicationConfig,
2248
+ workers,
2249
+ currentHealth,
2250
+ health
2251
+ )
2252
+
2253
+ return { workers: reportWorkers, health: reportHealth }
2254
+ } else {
2255
+ // update application heap
2256
+ await this.#updateApplicationConfigHealth(applicationId, health)
2257
+ // start new workers with new heap
2258
+ const reportWorkers = await this.#updateApplicationWorkers(
2259
+ applicationId,
2260
+ config,
2261
+ applicationConfig,
2262
+ workers,
2263
+ currentWorkers
2264
+ )
2265
+ // update heap for current workers
2266
+ const reportHealth = await this.#updateApplicationHealth(
2267
+ applicationId,
2268
+ config,
2269
+ applicationConfig,
2270
+ currentWorkers,
2271
+ currentHealth,
2272
+ health,
2273
+ false
2274
+ )
1005
2275
 
1006
- message.raw = undefined
2276
+ return { workers: reportWorkers, health: reportHealth }
2277
+ }
2278
+ }
2279
+
2280
+ async #updateApplicationHealth (
2281
+ applicationId,
2282
+ config,
2283
+ applicationConfig,
2284
+ currentWorkers,
2285
+ currentHealth,
2286
+ health,
2287
+ updateConfig = true
2288
+ ) {
2289
+ const report = {
2290
+ current: currentHealth,
2291
+ new: health,
2292
+ updated: []
2293
+ }
2294
+ try {
2295
+ if (updateConfig) {
2296
+ await this.#updateApplicationConfigHealth(applicationId, health)
1007
2297
  }
1008
2298
 
1009
- this.#loggerDestination.lastLevel = level
1010
- this.#loggerDestination.lastTime = time
1011
- this.#loggerDestination.lastMsg = msg
1012
- this.#loggerDestination.lastObj = message
1013
- this.#loggerDestination.lastLogger = this.logger
2299
+ for (let i = 0; i < currentWorkers; i++) {
2300
+ this.logger.info(
2301
+ { health: { current: currentHealth, new: health } },
2302
+ `Restarting application "${applicationId}" worker ${i} to update config health heap...`
2303
+ )
1014
2304
 
1015
- // Never drop the `\n` as the worker thread trimmed the message
1016
- this.#loggerDestination.write(JSON.stringify(message) + '\n')
2305
+ const worker = await this.#getWorkerById(applicationId, i)
2306
+ if (health.maxHeapTotal) {
2307
+ worker[kConfig].health.maxHeapTotal = health.maxHeapTotal
2308
+ }
2309
+ if (health.maxYoungGeneration) {
2310
+ worker[kConfig].health.maxYoungGeneration = health.maxYoungGeneration
2311
+ }
2312
+
2313
+ await this.#replaceWorker(config, applicationConfig, currentWorkers, applicationId, i, worker)
2314
+ report.updated.push(i)
2315
+ this.logger.info(
2316
+ { health: { current: currentHealth, new: health } },
2317
+ `Restarted application "${applicationId}" worker ${i}`
2318
+ )
2319
+ }
2320
+ report.success = true
2321
+ } catch (err) {
2322
+ if (report.updated.length < 1) {
2323
+ this.logger.error({ err }, 'Cannot update application health heap, no worker updated')
2324
+ await this.#updateApplicationConfigHealth(applicationId, currentHealth)
2325
+ } else {
2326
+ this.logger.error(
2327
+ { err },
2328
+ `Cannot update application health heap, updated workers: ${report.updated.length} out of ${currentWorkers}`
2329
+ )
2330
+ }
2331
+ report.success = false
1017
2332
  }
2333
+ return report
1018
2334
  }
1019
- }
1020
2335
 
1021
- module.exports = { Runtime }
2336
+ async #updateApplicationWorkers (applicationId, config, applicationConfig, workers, currentWorkers) {
2337
+ const report = {
2338
+ current: currentWorkers,
2339
+ new: workers
2340
+ }
2341
+ if (currentWorkers < workers) {
2342
+ report.started = []
2343
+ try {
2344
+ await this.#updateApplicationConfigWorkers(applicationId, workers)
2345
+ for (let i = currentWorkers; i < workers; i++) {
2346
+ await this.#setupWorker(config, applicationConfig, workers, applicationId, i)
2347
+ await this.#startWorker(config, applicationConfig, workers, applicationId, i, false, 0)
2348
+ report.started.push(i)
2349
+ }
2350
+ report.success = true
2351
+ } catch (err) {
2352
+ if (report.started.length < 1) {
2353
+ this.logger.error({ err }, 'Cannot start application workers, no worker started')
2354
+ await this.#updateApplicationConfigWorkers(applicationId, currentWorkers)
2355
+ } else {
2356
+ this.logger.error(
2357
+ { err },
2358
+ `Cannot start application workers, started workers: ${report.started.length} out of ${workers}`
2359
+ )
2360
+ await this.#updateApplicationConfigWorkers(applicationId, currentWorkers + report.started.length)
2361
+ }
2362
+ report.success = false
2363
+ }
2364
+ } else {
2365
+ // keep the current workers count until all the application workers are all stopped
2366
+ report.stopped = []
2367
+ try {
2368
+ for (let i = currentWorkers - 1; i >= workers; i--) {
2369
+ const worker = await this.#getWorkerById(applicationId, i, false, false)
2370
+ await sendViaITC(worker, 'removeFromMesh')
2371
+ await this.#stopWorker(currentWorkers, applicationId, i, false, worker, [])
2372
+ report.stopped.push(i)
2373
+ }
2374
+ await this.#updateApplicationConfigWorkers(applicationId, workers)
2375
+ report.success = true
2376
+ } catch (err) {
2377
+ if (report.stopped.length < 1) {
2378
+ this.logger.error({ err }, 'Cannot stop application workers, no worker stopped')
2379
+ } else {
2380
+ this.logger.error(
2381
+ { err },
2382
+ `Cannot stop application workers, stopped workers: ${report.stopped.length} out of ${workers}`
2383
+ )
2384
+ await this.#updateApplicationConfigWorkers(applicationId, currentWorkers - report.stopped)
2385
+ }
2386
+ report.success = false
2387
+ }
2388
+ }
2389
+ return report
2390
+ }
2391
+
2392
+ #validatePprofCapturePreload () {
2393
+ const found = this.#config.preload?.some(p => p.includes('wattpm-pprof-capture'))
2394
+
2395
+ if (!found) {
2396
+ throw new MissingPprofCapture()
2397
+ }
2398
+ }
2399
+ }