@platformatic/runtime 2.7.1-alpha.2 → 2.8.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/runtime.js CHANGED
@@ -16,16 +16,27 @@ const errors = require('./errors')
16
16
  const { createLogger } = require('./logger')
17
17
  const { startManagementApi } = require('./management-api')
18
18
  const { startPrometheusServer } = require('./prom-server')
19
- const { createSharedStore } = require('./shared-http-cache')
20
19
  const { getRuntimeTmpDir } = require('./utils')
21
20
  const { sendViaITC, waitEventFromITC } = require('./worker/itc')
22
- const { kId, kITC, kConfig } = require('./worker/symbols')
23
-
24
- const Fastify = require('fastify')
21
+ const { RoundRobinMap } = require('./worker/round-robin-map.js')
22
+ const {
23
+ kId,
24
+ kServiceId,
25
+ kWorkerId,
26
+ kITC,
27
+ kConfig,
28
+ kLoggerDestination,
29
+ kLoggingPort,
30
+ kWorkerStatus
31
+ } = require('./worker/symbols')
32
+
33
+ const fastify = require('fastify')
25
34
 
26
35
  const platformaticVersion = require('../package.json').version
27
36
  const kWorkerFile = join(__dirname, 'worker/main.js')
28
37
 
38
+ const kInspectorOptions = Symbol('plt.runtime.worker.inspectorOptions')
39
+
29
40
  const MAX_LISTENERS_COUNT = 100
30
41
  const MAX_METRICS_QUEUE_LENGTH = 5 * 60 // 5 minutes in seconds
31
42
  const COLLECT_METRICS_TIMEOUT = 1000
@@ -34,12 +45,11 @@ const MAX_BOOTSTRAP_ATTEMPTS = 5
34
45
 
35
46
  class Runtime extends EventEmitter {
36
47
  #configManager
48
+ #isProduction
37
49
  #runtimeTmpDir
38
50
  #runtimeLogsDir
39
51
  #env
40
- #services
41
52
  #servicesIds
42
- #entrypoint
43
53
  #entrypointId
44
54
  #url
45
55
  #loggerDestination
@@ -49,12 +59,9 @@ class Runtime extends EventEmitter {
49
59
  #interceptor
50
60
  #managementApi
51
61
  #prometheusServer
52
- #startedServices
53
- #restartPromises
54
- #bootstrapAttempts
55
- #inspectors
56
62
  #inspectorServer
57
- #sharedHttpCache
63
+ #workers
64
+ #restartingWorkers
58
65
 
59
66
  constructor (configManager, runtimeLogsDir, env) {
60
67
  super()
@@ -64,17 +71,13 @@ class Runtime extends EventEmitter {
64
71
  this.#runtimeTmpDir = getRuntimeTmpDir(configManager.dirname)
65
72
  this.#runtimeLogsDir = runtimeLogsDir
66
73
  this.#env = env
67
- this.#services = new Map()
74
+ this.#workers = new RoundRobinMap()
68
75
  this.#servicesIds = []
69
76
  this.#url = undefined
70
77
  // Note: nothing hits the main thread so there is no reason to set the globalDispatcher here
71
78
  this.#interceptor = createThreadInterceptor({ domain: '.plt.local', timeout: true })
72
79
  this.#status = undefined
73
- this.#startedServices = new Map()
74
- this.#restartPromises = new Map()
75
- this.#bootstrapAttempts = new Map()
76
- this.#inspectors = []
77
- this.#sharedHttpCache = null
80
+ this.#restartingWorkers = new Map()
78
81
  }
79
82
 
80
83
  async init () {
@@ -97,19 +100,21 @@ class Runtime extends EventEmitter {
97
100
  this.logger = logger
98
101
  this.#loggerDestination = destination
99
102
 
103
+ this.#isProduction = this.#configManager.args?.production ?? false
104
+ this.#servicesIds = config.services.map(service => service.id)
105
+ this.#workers.configure(config.services, this.#configManager.current.workers, this.#isProduction)
106
+
100
107
  // Create all services, each in is own worker thread
101
108
  for (const serviceConfig of config.services) {
102
- // Setup forwarding of logs from the worker threads to the main thread
103
109
  await this.#setupService(serviceConfig)
104
110
  }
105
111
 
106
112
  try {
107
113
  // Make sure the list exists before computing the dependencies, otherwise some services might not be stopped
108
- this.#servicesIds = config.services.map(service => service.id)
109
114
 
110
115
  if (autoloadEnabled) {
111
116
  checkDependencies(config.services)
112
- this.#services = topologicalSort(this.#services, config)
117
+ this.#workers = topologicalSort(this.#workers, config)
113
118
  }
114
119
 
115
120
  // Recompute the list of services after sorting
@@ -119,15 +124,10 @@ class Runtime extends EventEmitter {
119
124
  throw e
120
125
  }
121
126
 
122
- this.#sharedHttpCache = createSharedStore(
123
- this.#configManager.dirname,
124
- config.httpCache
125
- )
126
-
127
127
  this.#updateStatus('init')
128
128
  }
129
129
 
130
- async start () {
130
+ async start (silent = false) {
131
131
  if (typeof this.#configManager.current.entrypoint === 'undefined') {
132
132
  throw new errors.MissingEntrypointError()
133
133
  }
@@ -136,33 +136,41 @@ class Runtime extends EventEmitter {
136
136
  // Important: do not use Promise.all here since it won't properly manage dependencies
137
137
  try {
138
138
  for (const service of this.#servicesIds) {
139
- await this.startService(service)
139
+ await this.startService(service, silent)
140
140
  }
141
141
 
142
142
  if (this.#configManager.current.inspectorOptions) {
143
143
  const { port } = this.#configManager.current.inspectorOptions
144
144
 
145
- const server = Fastify({
145
+ const server = fastify({
146
146
  loggerInstance: this.logger.child({ name: 'inspector' }, { level: 'warn' })
147
147
  })
148
148
 
149
- const version = await fetch(`http://127.0.0.1:${this.#configManager.current.inspectorOptions.port + 1}/json/version`).then((res) => res.json())
150
-
151
- const data = (await Promise.all(this.#inspectors.map(async (data) => {
152
- const res = await fetch(`http://127.0.0.1:${data.port}/json/list`)
153
- const details = await res.json()
154
- return {
155
- ...details[0],
156
- title: data.id
157
- }
158
- })))
149
+ const version = await fetch(
150
+ `http://127.0.0.1:${this.#configManager.current.inspectorOptions.port + 1}/json/version`
151
+ ).then(res => res.json())
152
+
153
+ const data = await Promise.all(
154
+ Array.from(this.#workers.values()).map(async worker => {
155
+ const data = worker[kInspectorOptions]
156
+
157
+ const res = await fetch(`http://127.0.0.1:${data.port}/json/list`)
158
+ const details = await res.json()
159
+ return {
160
+ ...details[0],
161
+ title: data.id
162
+ }
163
+ })
164
+ )
159
165
 
160
166
  server.get('/json/list', () => data)
161
167
  server.get('/json', () => data)
162
168
  server.get('/json/version', () => version)
163
169
 
164
170
  await server.listen({ port })
165
- this.logger.info('The inspector server is now listening for all services. Open `chrome://inspect` in Google Chrome to connect.')
171
+ this.logger.info(
172
+ 'The inspector server is now listening for all services. Open `chrome://inspect` in Google Chrome to connect.'
173
+ )
166
174
  this.#inspectorServer = server
167
175
  }
168
176
  } catch (error) {
@@ -188,13 +196,14 @@ class Runtime extends EventEmitter {
188
196
  }
189
197
 
190
198
  this.#updateStatus('stopping')
191
- this.#startedServices.clear()
192
199
 
193
200
  if (this.#inspectorServer) {
194
201
  await this.#inspectorServer.close()
195
202
  }
196
203
 
197
- await Promise.all(this.#servicesIds.map(service => this._stopService(service, silent)))
204
+ for (const service of this.#servicesIds) {
205
+ await this.stopService(service, silent)
206
+ }
198
207
 
199
208
  this.#updateStatus('stopped')
200
209
  }
@@ -243,109 +252,48 @@ class Runtime extends EventEmitter {
243
252
  this.#loggerDestination = null
244
253
  }
245
254
 
246
- if (this.#sharedHttpCache?.close) {
247
- await this.#sharedHttpCache.close()
248
- }
249
-
250
255
  this.#updateStatus('closed')
251
256
  }
252
257
 
253
- async startService (id) {
254
- if (this.#startedServices.get(id)) {
258
+ async startService (id, silent) {
259
+ // Since when a service is stopped the worker is deleted, we consider a service start if its first service
260
+ // is no longer in the init phase
261
+ const firstWorker = this.#workers.get(`${id}:0`)
262
+ if (firstWorker && firstWorker[kWorkerStatus] !== 'boot' && firstWorker[kWorkerStatus] !== 'init') {
255
263
  throw new errors.ApplicationAlreadyStartedError()
256
264
  }
257
265
 
258
- // This is set here so that if the service fails while starting we track the status
259
- this.#startedServices.set(id, true)
260
-
261
- let service = await this.#getServiceById(id, false, false)
262
-
263
- // The service was stopped, recreate the thread
264
- if (!service) {
265
- const config = this.#configManager.current
266
- const serviceConfig = config.services.find(s => s.id === id)
266
+ const config = this.#configManager.current
267
+ const serviceConfig = config.services.find(s => s.id === id)
267
268
 
268
- await this.#setupService(serviceConfig)
269
- service = await this.#getServiceById(id)
269
+ if (!serviceConfig) {
270
+ throw new errors.ServiceNotFoundError(id, Array.from(this.#servicesIds).join(', '))
270
271
  }
271
272
 
272
- try {
273
- const serviceUrl = await sendViaITC(service, 'start')
274
- if (serviceUrl) {
275
- this.#url = serviceUrl
276
- }
277
- this.#bootstrapAttempts.set(id, 0)
278
- } catch (error) {
279
- // TODO: handle port allocation error here
280
- if (error.code === 'EADDRINUSE') throw error
281
-
282
- this.logger.error({ err: ensureLoggableError(error) }, `Failed to start service "${id}".`)
283
-
284
- const config = this.#configManager.current
285
- const restartOnError = config.restartOnError
286
-
287
- if (!restartOnError) {
288
- this.logger.error(`Failed to start service "${id}".`)
289
- throw error
290
- }
291
-
292
- let bootstrapAttempt = this.#bootstrapAttempts.get(id)
293
- if (bootstrapAttempt++ >= MAX_BOOTSTRAP_ATTEMPTS || restartOnError === 0) {
294
- this.logger.error(`Failed to start service "${id}" after ${MAX_BOOTSTRAP_ATTEMPTS} attempts.`)
295
- throw error
296
- }
297
-
298
- this.logger.warn(
299
- `Starting a service "${id}" in ${restartOnError}ms. ` +
300
- `Attempt ${bootstrapAttempt} of ${MAX_BOOTSTRAP_ATTEMPTS}...`
301
- )
273
+ const workersCount = await this.#workers.getCount(serviceConfig.id)
302
274
 
303
- this.#bootstrapAttempts.set(id, bootstrapAttempt)
304
- await this.#restartCrashedService(id)
275
+ for (let i = 0; i < workersCount; i++) {
276
+ await this.#startWorker(config, serviceConfig, workersCount, id, i, silent)
305
277
  }
306
278
  }
307
279
 
308
- // Do not rename to #stopService as this is used in tests
309
- async _stopService (id, silent) {
310
- const service = await this.#getServiceById(id, false, false)
311
-
312
- if (!service) {
313
- return
314
- }
315
-
316
- this.#startedServices.set(id, false)
317
-
318
- if (!silent) {
319
- this.logger?.info(`Stopping service "${id}"...`)
320
- }
280
+ async stopService (id, silent) {
281
+ const config = this.#configManager.current
282
+ const serviceConfig = config.services.find(s => s.id === id)
321
283
 
322
- // Always send the stop message, it will shut down workers that only had ITC and interceptors setup
323
- try {
324
- await executeWithTimeout(sendViaITC(service, 'stop'), 10000)
325
- } catch (error) {
326
- this.logger?.info(
327
- { error: ensureLoggableError(error) },
328
- `Failed to stop service "${id}". Killing a worker thread.`
329
- )
330
- } finally {
331
- service[kITC].close()
284
+ if (!serviceConfig) {
285
+ throw new errors.ServiceNotFoundError(id, Array.from(this.#servicesIds).join(', '))
332
286
  }
333
287
 
334
- // Wait for the worker thread to finish, we're going to create a new one if the service is ever restarted
335
- const res = await executeWithTimeout(once(service, 'exit'), 10000)
288
+ const workersCount = await this.#workers.getCount(serviceConfig.id)
336
289
 
337
- // If the worker didn't exit in time, kill it
338
- if (res === 'timeout') {
339
- await service.terminate()
290
+ for (let i = 0; i < workersCount; i++) {
291
+ await this.#stopWorker(workersCount, id, i, silent)
340
292
  }
341
293
  }
342
294
 
343
295
  async buildService (id) {
344
- const service = this.#services.get(id)
345
-
346
- if (!service) {
347
- throw new errors.ServiceNotFoundError(id, Array.from(this.#services.keys()).join(', '))
348
- }
296
+ const service = await this.#getServiceById(id)
349
297
 
350
298
  try {
351
299
  return await sendViaITC(service, 'build')
@@ -523,6 +471,7 @@ class Runtime extends EventEmitter {
523
471
  async getServices () {
524
472
  return {
525
473
  entrypoint: this.#entrypointId,
474
+ production: this.#isProduction,
526
475
  services: await Promise.all(this.#servicesIds.map(id => this.getServiceDetails(id)))
527
476
  }
528
477
  }
@@ -555,6 +504,10 @@ class Runtime extends EventEmitter {
555
504
  dependencies
556
505
  }
557
506
 
507
+ if (this.#isProduction) {
508
+ serviceDetails.workers = this.#workers.getCount(id)
509
+ }
510
+
558
511
  if (entrypoint) {
559
512
  serviceDetails.url = status === 'started' ? this.#url : null
560
513
  }
@@ -593,16 +546,14 @@ class Runtime extends EventEmitter {
593
546
  async getMetrics (format = 'json') {
594
547
  let metrics = null
595
548
 
596
- for (const id of this.#servicesIds) {
549
+ for (const worker of this.#workers.values()) {
597
550
  try {
598
- const service = await this.#getServiceById(id, true, false)
599
-
600
551
  // The service might be temporarily unavailable
601
- if (!service) {
552
+ if (worker[kWorkerStatus] !== 'started') {
602
553
  continue
603
554
  }
604
555
 
605
- const serviceMetrics = await sendViaITC(service, 'getMetrics', format)
556
+ const serviceMetrics = await sendViaITC(worker, 'getMetrics', format)
606
557
  if (serviceMetrics) {
607
558
  if (metrics === null) {
608
559
  metrics = format === 'json' ? [] : ''
@@ -711,11 +662,7 @@ class Runtime extends EventEmitter {
711
662
  }
712
663
 
713
664
  async getServiceMeta (id) {
714
- const service = this.#services.get(id)
715
-
716
- if (!service) {
717
- throw new errors.ServiceNotFoundError(id, Array.from(this.#services.keys()).join(', '))
718
- }
665
+ const service = await this.#getServiceById(id)
719
666
 
720
667
  try {
721
668
  return await sendViaITC(service, 'getServiceMeta')
@@ -767,24 +714,6 @@ class Runtime extends EventEmitter {
767
714
  return createReadStream(filePath)
768
715
  }
769
716
 
770
- async getCachedHttpRequests () {
771
- return this.#sharedHttpCache.getRoutes()
772
- }
773
-
774
- async invalidateHttpCache (options = {}) {
775
- const { origin, routes, tags } = options
776
-
777
- if (!this.#sharedHttpCache) return
778
-
779
- if (routes && routes.length > 0) {
780
- await this.#sharedHttpCache.deleteRoutes(routes)
781
- }
782
-
783
- if (tags && tags.length > 0) {
784
- await this.#sharedHttpCache.deleteByCacheTags(origin, tags)
785
- }
786
- }
787
-
788
717
  #updateStatus (status) {
789
718
  this.#status = status
790
719
  this.emit(status)
@@ -798,16 +727,21 @@ class Runtime extends EventEmitter {
798
727
  if (this.#status === 'stopping' || this.#status === 'closed') return
799
728
 
800
729
  const config = this.#configManager.current
730
+ const workersCount = await this.#workers.getCount(serviceConfig.id)
731
+ const id = serviceConfig.id
732
+
733
+ for (let i = 0; i < workersCount; i++) {
734
+ await this.#setupWorker(config, serviceConfig, workersCount, id, i)
735
+ }
736
+ }
737
+
738
+ async #setupWorker (config, serviceConfig, workersCount, serviceId, index) {
801
739
  const { autoload, restartOnError } = config
740
+ const workerId = `${serviceId}:${index}`
802
741
 
803
- const id = serviceConfig.id
804
742
  const { port1: loggerDestination, port2: loggingPort } = new MessageChannel()
805
743
  loggerDestination.on('message', this.#forwardThreadLog.bind(this))
806
744
 
807
- if (!this.#bootstrapAttempts.has(id)) {
808
- this.#bootstrapAttempts.set(id, 0)
809
- }
810
-
811
745
  // Handle inspector
812
746
  let inspectorOptions
813
747
 
@@ -816,23 +750,20 @@ class Runtime extends EventEmitter {
816
750
  ...this.#configManager.current.inspectorOptions
817
751
  }
818
752
 
819
- inspectorOptions.port = inspectorOptions.port + this.#inspectors.length + 1
820
-
821
- const inspectorData = {
822
- port: inspectorOptions.port,
823
- id,
824
- dirname: this.#configManager.dirname
825
- }
826
-
827
- this.#inspectors.push(inspectorData)
753
+ inspectorOptions.port = inspectorOptions.port + this.#workers.size + 1
828
754
  }
829
755
 
830
- const service = new Worker(kWorkerFile, {
756
+ const worker = new Worker(kWorkerFile, {
831
757
  workerData: {
832
758
  config,
833
759
  serviceConfig: {
834
760
  ...serviceConfig,
835
- isProduction: this.#configManager.args?.production ?? false
761
+ isProduction: this.#isProduction
762
+ },
763
+ worker: {
764
+ id: workerId,
765
+ index,
766
+ count: workersCount
836
767
  },
837
768
  inspectorOptions,
838
769
  dirname: this.#configManager.dirname,
@@ -854,100 +785,112 @@ class Runtime extends EventEmitter {
854
785
  })
855
786
 
856
787
  // Make sure the listener can handle a lot of API requests at once before raising a warning
857
- service.setMaxListeners(1e3)
788
+ worker.setMaxListeners(1e3)
858
789
 
859
790
  // Track service exiting
860
- service.once('exit', code => {
861
- const started = this.#startedServices.get(id)
862
- this.#services.delete(id)
863
- loggerDestination.close()
864
- service[kITC].close()
865
- loggingPort.close()
791
+ worker.once('exit', code => {
792
+ if (worker[kWorkerStatus] === 'exited') {
793
+ return
794
+ }
866
795
 
867
- if (this.#status === 'stopping') return
796
+ const started = worker[kWorkerStatus] === 'started'
797
+ worker[kWorkerStatus] = 'exited'
798
+
799
+ this.#cleanupWorker(workerId, worker)
800
+
801
+ if (this.#status === 'stopping') {
802
+ return
803
+ }
868
804
 
869
805
  // Wait for the next tick so that crashed from the thread are logged first
870
806
  setImmediate(() => {
871
- if (!config.watch || code !== 0) {
872
- this.logger.warn(`Service "${id}" unexpectedly exited with code ${code}.`)
807
+ const errorLabel = workersCount > 1 ? `worker ${index} of the service "${serviceId}"` : `service "${serviceId}"`
808
+
809
+ if (started && (!config.watch || code !== 0)) {
810
+ this.logger.warn(`The ${errorLabel} unexpectedly exited with code ${code}.`)
873
811
  }
874
812
 
875
813
  // Restart the service if it was started
876
814
  if (started && this.#status === 'started') {
877
815
  if (restartOnError > 0) {
878
- this.logger.warn(`Restarting a service "${id}" in ${restartOnError}ms...`)
879
- this.#restartCrashedService(id).catch(err => {
880
- this.logger.error({ err: ensureLoggableError(err) }, `Failed to restart service "${id}".`)
816
+ this.logger.warn(`The ${errorLabel} will be restarted in ${restartOnError}ms...`)
817
+ this.#restartCrashedWorker(config, serviceConfig, workersCount, serviceId, index, false, 0).catch(err => {
818
+ this.logger.error({ err: ensureLoggableError(err) }, `${errorLabel} could not be restarted.`)
881
819
  })
882
820
  } else {
883
- this.logger.warn(`The "${id}" service is no longer available.`)
821
+ this.logger.warn(`The ${errorLabel} is no longer available.`)
884
822
  }
885
823
  }
886
824
  })
887
825
  })
888
826
 
889
- service[kId] = id
890
- service[kConfig] = serviceConfig
827
+ worker[kId] = workersCount > 1 ? workerId : serviceId
828
+ worker[kServiceId] = serviceId
829
+ worker[kWorkerId] = workersCount > 1 ? index : undefined
830
+ worker[kConfig] = serviceConfig
831
+ worker[kLoggerDestination] = loggerDestination
832
+ worker[kLoggingPort] = loggingPort
833
+
834
+ if (inspectorOptions) {
835
+ worker[kInspectorOptions] = {
836
+ port: inspectorOptions.port,
837
+ id: serviceId,
838
+ dirname: this.#configManager.dirname
839
+ }
840
+ }
891
841
 
892
842
  // Setup ITC
893
- service[kITC] = new ITC({
894
- name: id + '-runtime',
895
- port: service,
843
+ worker[kITC] = new ITC({
844
+ name: workerId + '-runtime',
845
+ port: worker,
896
846
  handlers: {
897
847
  getServiceMeta: this.getServiceMeta.bind(this),
898
- getServices: this.getServices.bind(this),
899
- isHttpCacheFull: () => this.#sharedHttpCache.isFull(),
900
- getHttpCacheValue: opts => this.#sharedHttpCache.getValue(opts.request),
901
- setHttpCacheValue: opts => this.#sharedHttpCache.setValue(
902
- opts.request,
903
- opts.response,
904
- opts.payload
905
- ),
906
- deleteHttpCacheValue: opts => this.#sharedHttpCache.deleteByOrigin(
907
- opts.origin
908
- ),
909
- invalidateHttpCache: opts => this.invalidateHttpCache(opts),
848
+ listServices: () => {
849
+ return this.#servicesIds
850
+ }
910
851
  }
911
852
  })
912
- service[kITC].listen()
913
-
914
- // Handle services changes
915
- // This is not purposely activated on when this.#configManager.current.watch === true
916
- // so that services can eventually manually trigger a restart. This mechanism is current
917
- // used by the composer
918
- service[kITC].on('changed', async () => {
919
- try {
920
- const wasStarted = this.#startedServices.get(id)
921
-
922
- await this._stopService(id)
853
+ worker[kITC].listen()
854
+
855
+ // Only activate watch for the first instance
856
+ if (index === 0) {
857
+ // Handle services changes
858
+ // This is not purposely activated on when this.#configManager.current.watch === true
859
+ // so that services can eventually manually trigger a restart. This mechanism is current
860
+ // used by the composer.
861
+ worker[kITC].on('changed', async () => {
862
+ try {
863
+ const wasStarted = worker[kWorkerStatus].startsWith('start')
864
+ await this.stopService(serviceId)
923
865
 
924
- if (wasStarted) {
925
- await this.startService(id)
926
- }
866
+ if (wasStarted) {
867
+ await this.startService(serviceId)
868
+ }
927
869
 
928
- this.logger?.info(`Service ${id} has been successfully reloaded ...`)
870
+ this.logger?.info(`Service "${serviceId}" has been successfully reloaded ...`)
929
871
 
930
- if (serviceConfig.entrypoint) {
931
- this.#showUrl()
872
+ if (serviceConfig.entrypoint) {
873
+ this.#showUrl()
874
+ }
875
+ } catch (e) {
876
+ this.logger?.error(e)
932
877
  }
933
- } catch (e) {
934
- this.logger?.error(e)
935
- }
936
- })
878
+ })
879
+ }
937
880
 
938
881
  // Store locally
939
- this.#services.set(id, service)
882
+ this.#workers.set(workerId, worker)
940
883
 
941
884
  if (serviceConfig.entrypoint) {
942
- this.#entrypoint = service
943
- this.#entrypointId = id
885
+ this.#entrypointId = serviceId
944
886
  }
945
887
 
946
888
  // Setup the interceptor
947
- this.#interceptor.route(id, service)
889
+ this.#interceptor.route(serviceId, worker)
948
890
 
949
891
  // Store dependencies
950
- const [{ dependencies }] = await waitEventFromITC(service, 'init')
892
+ const [{ dependencies }] = await waitEventFromITC(worker, 'init')
893
+ worker[kWorkerStatus] = 'boot'
951
894
 
952
895
  if (autoload) {
953
896
  serviceConfig.dependencies = dependencies
@@ -959,11 +902,122 @@ class Runtime extends EventEmitter {
959
902
  }
960
903
  }
961
904
 
962
- async #restartCrashedService (id) {
963
- const config = this.#configManager.current
964
- const serviceConfig = config.services.find(s => s.id === id)
905
+ async #startWorker (config, serviceConfig, workersCount, id, index, silent, bootstrapAttempt = 0) {
906
+ const workerId = `${id}:${index}`
907
+ const label = workersCount > 1 ? `worker ${index} of the service "${id}"` : `service "${id}"`
908
+
909
+ if (!silent) {
910
+ this.logger?.info(`Starting the ${label}...`)
911
+ }
912
+
913
+ let worker = await this.#getWorkerById(id, index, false, false)
914
+
915
+ // The service was stopped, recreate the thread
916
+ if (!worker) {
917
+ await this.#setupService(serviceConfig, index)
918
+ worker = await this.#getWorkerById(id, index)
919
+ }
920
+
921
+ worker[kWorkerStatus] = 'starting'
965
922
 
966
- let restartPromise = this.#restartPromises.get(id)
923
+ try {
924
+ const workerUrl = await sendViaITC(worker, 'start')
925
+ if (workerUrl) {
926
+ this.#url = workerUrl
927
+ }
928
+
929
+ worker[kWorkerStatus] = 'started'
930
+
931
+ if (!silent) {
932
+ this.logger?.info(`Started the ${label}...`)
933
+ }
934
+ } catch (error) {
935
+ // TODO: handle port allocation error here
936
+ if (error.code === 'EADDRINUSE') throw error
937
+
938
+ this.#cleanupWorker(workerId, worker)
939
+
940
+ if (worker[kWorkerStatus] !== 'exited') {
941
+ // This prevent the exit handler to restart service
942
+ worker[kWorkerStatus] = 'exited'
943
+ await worker.terminate()
944
+ }
945
+
946
+ this.logger.error({ err: ensureLoggableError(error) }, `Failed to start ${label}.`)
947
+
948
+ const restartOnError = config.restartOnError
949
+
950
+ if (!restartOnError) {
951
+ throw error
952
+ }
953
+
954
+ if (bootstrapAttempt++ >= MAX_BOOTSTRAP_ATTEMPTS || restartOnError === 0) {
955
+ this.logger.error(`Failed to start ${label} after ${MAX_BOOTSTRAP_ATTEMPTS} attempts.`)
956
+ throw error
957
+ }
958
+
959
+ this.logger.warn(
960
+ `Attempt ${bootstrapAttempt} of ${MAX_BOOTSTRAP_ATTEMPTS} to start the ${label} again will be performed in ${restartOnError}ms ...`
961
+ )
962
+
963
+ await this.#restartCrashedWorker(config, serviceConfig, workersCount, id, index, silent, bootstrapAttempt)
964
+ }
965
+ }
966
+
967
+ async #stopWorker (workersCount, id, index, silent) {
968
+ const worker = await this.#getWorkerById(id, index, false, false)
969
+
970
+ if (!worker) {
971
+ return
972
+ }
973
+
974
+ worker[kWorkerStatus] = 'stopping'
975
+
976
+ const label = workersCount > 1 ? `worker ${index} of the service "${id}"` : `service "${id}"`
977
+
978
+ if (!silent) {
979
+ this.logger?.info(`Stopping the ${label}...`)
980
+ }
981
+
982
+ const exitTimeout = this.#configManager.current.gracefulShutdown.runtime
983
+ const exitPromise = once(worker, 'exit')
984
+
985
+ // Always send the stop message, it will shut down workers that only had ITC and interceptors setup
986
+ try {
987
+ await executeWithTimeout(sendViaITC(worker, 'stop'), exitTimeout)
988
+ } catch (error) {
989
+ this.logger?.info({ error: ensureLoggableError(error) }, `Failed to stop ${label}. Killing a worker thread.`)
990
+ } finally {
991
+ worker[kITC].close()
992
+ }
993
+
994
+ if (!silent) {
995
+ this.logger?.info(`Stopped the ${label}...`)
996
+ }
997
+
998
+ // Wait for the worker thread to finish, we're going to create a new one if the service is ever restarted
999
+ const res = await executeWithTimeout(exitPromise, exitTimeout)
1000
+
1001
+ // If the worker didn't exit in time, kill it
1002
+ if (res === 'timeout') {
1003
+ await worker.terminate()
1004
+ }
1005
+
1006
+ worker[kWorkerStatus] = 'stopped'
1007
+ }
1008
+
1009
+ #cleanupWorker (workerId, worker) {
1010
+ this.#workers.delete(workerId)
1011
+
1012
+ worker[kITC].close()
1013
+ worker[kLoggerDestination].close()
1014
+ worker[kLoggingPort].close()
1015
+ }
1016
+
1017
+ async #restartCrashedWorker (config, serviceConfig, workersCount, id, index, silent, bootstrapAttempt) {
1018
+ const workerId = `${id}:${index}`
1019
+
1020
+ let restartPromise = this.#restartingWorkers.get(workerId)
967
1021
  if (restartPromise) {
968
1022
  await restartPromise
969
1023
  return
@@ -971,48 +1025,67 @@ class Runtime extends EventEmitter {
971
1025
 
972
1026
  restartPromise = new Promise((resolve, reject) => {
973
1027
  setTimeout(async () => {
974
- this.#restartPromises.delete(id)
1028
+ this.#restartingWorkers.delete(workerId)
975
1029
 
976
1030
  try {
977
- await this.#setupService(serviceConfig)
978
-
979
- const started = this.#startedServices.get(id)
980
- if (started) {
981
- this.#startedServices.set(id, false)
982
- await this.startService(id)
983
- }
1031
+ await this.#setupWorker(config, serviceConfig, workersCount, id, index)
1032
+ await this.#startWorker(config, serviceConfig, workersCount, id, index, silent, bootstrapAttempt)
984
1033
 
985
1034
  resolve()
986
1035
  } catch (err) {
1036
+ // The runtime was stopped while the restart was happening, ignore any error.
1037
+ if (!this.#status.startsWith('start')) {
1038
+ resolve()
1039
+ }
1040
+
987
1041
  reject(err)
988
1042
  }
989
1043
  }, config.restartOnError)
990
1044
  })
991
1045
 
992
- this.#restartPromises.set(id, restartPromise)
1046
+ this.#restartingWorkers.set(workerId, restartPromise)
993
1047
  await restartPromise
994
1048
  }
995
1049
 
996
- async #getServiceById (id, ensureStarted = false, mustExist = true) {
997
- const service = this.#services.get(id)
1050
+ async #getServiceById (serviceId, ensureStarted = false, mustExist = true) {
1051
+ // If the serviceId includes the worker, properly split
1052
+ let workerId
1053
+ const matched = serviceId.match(/^(.+):(\d+)$/)
1054
+
1055
+ if (matched) {
1056
+ serviceId = matched[1]
1057
+ workerId = matched[2]
1058
+ }
1059
+
1060
+ return this.#getWorkerById(serviceId, workerId, ensureStarted, mustExist)
1061
+ }
1062
+
1063
+ async #getWorkerById (serviceId, workerId, ensureStarted = false, mustExist = true) {
1064
+ let worker
1065
+
1066
+ if (typeof workerId !== 'undefined') {
1067
+ worker = this.#workers.get(`${serviceId}:${workerId}`)
1068
+ } else {
1069
+ worker = this.#workers.next(serviceId)
1070
+ }
998
1071
 
999
- if (!service) {
1000
- if (!mustExist && this.#servicesIds.includes(id)) {
1072
+ if (!worker) {
1073
+ if (!mustExist && this.#servicesIds.includes(serviceId)) {
1001
1074
  return null
1002
1075
  }
1003
1076
 
1004
- throw new errors.ServiceNotFoundError(id, Array.from(this.#services.keys()).join(', '))
1077
+ throw new errors.ServiceNotFoundError(serviceId, Array.from(this.#servicesIds).join(', '))
1005
1078
  }
1006
1079
 
1007
1080
  if (ensureStarted) {
1008
- const serviceStatus = await sendViaITC(service, 'getStatus')
1081
+ const serviceStatus = await sendViaITC(worker, 'getStatus')
1009
1082
 
1010
1083
  if (serviceStatus !== 'started') {
1011
- throw new errors.ServiceNotStartedError(id)
1084
+ throw new errors.ServiceNotStartedError(serviceId)
1012
1085
  }
1013
1086
  }
1014
1087
 
1015
- return service
1088
+ return worker
1016
1089
  }
1017
1090
 
1018
1091
  async #getRuntimePackageJson () {