@platformatic/watt-extra 1.7.1-alpha.7 → 1.8.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,20 @@
1
1
  {
2
2
  "permissions": {
3
3
  "allow": [
4
- "Bash(node --test-only:*)",
4
+ "Read(//work/workspaces/workspace-platformatic/platformatic/**)",
5
+ "Bash(npx borp:*)",
6
+ "Bash(timeout 30 npx borp -c 1 --timeout=20000 ./test/trigger-flamegraphs.test.js)",
7
+ "Bash(xargs cat:*)",
8
+ "Bash(pnpm install)",
9
+ "Bash(find:*)",
10
+ "Bash(cat:*)",
11
+ "WebFetch(domain:github.com)",
5
12
  "Bash(node --test:*)",
6
- "Bash(for i in {1..3})",
7
- "Bash(do echo \"=== Run $i ===\")",
8
- "Bash(done)"
13
+ "Bash(for i in 1 2 3)",
14
+ "Bash(do echo \"Run $i:\")",
15
+ "Bash(done)",
16
+ "Bash(git stash:*)",
17
+ "Bash(echo:*)"
9
18
  ],
10
19
  "deny": [],
11
20
  "ask": []
@@ -35,7 +35,7 @@ jobs:
35
35
  node-version: [22.x, 24.x]
36
36
 
37
37
  steps:
38
- - uses: actions/checkout@v5
38
+ - uses: actions/checkout@v6
39
39
  - uses: pnpm/action-setup@v4.2.0
40
40
  with:
41
41
  version: 10
package/lib/watt.js CHANGED
@@ -94,6 +94,49 @@ class Watt {
94
94
  }
95
95
  }
96
96
 
97
+ async updateInstanceConfig (instanceConfig) {
98
+ this.#logger.info({ applicationId: instanceConfig?.applicationId }, 'Updating instance config after ICC recovery')
99
+
100
+ const previousConfig = this.#instanceConfig
101
+ this.#instanceConfig = instanceConfig
102
+
103
+ // If we didn't have a config before and now we do, apply runtime updates
104
+ if (!previousConfig && instanceConfig && this.runtime) {
105
+ // Update undici interceptors
106
+ try {
107
+ const undiciConfig = this.#getUndiciConfig()
108
+ await this.runtime.updateUndiciInterceptors?.(undiciConfig)
109
+ this.#logger.info('Updated undici interceptors after ICC recovery')
110
+ } catch (err) {
111
+ this.#logger.error({ err }, 'Failed to update undici interceptors after ICC recovery')
112
+ }
113
+
114
+ // Update metrics config if runtime supports it
115
+ if (typeof this.runtime.updateMetricsConfig === 'function') {
116
+ try {
117
+ // Get current metrics config set by #configureRuntime
118
+ const runtimeConfig = this.runtime.getRuntimeConfig(true)
119
+ const currentMetrics = runtimeConfig.metrics || {}
120
+
121
+ // Merge with ICC updates
122
+ const updatedMetrics = {
123
+ ...currentMetrics,
124
+ labels: {
125
+ ...currentMetrics.labels,
126
+ applicationId: instanceConfig.applicationId
127
+ },
128
+ applicationLabel: instanceConfig.applicationMetricsLabel ?? currentMetrics.applicationLabel
129
+ }
130
+
131
+ await this.runtime.updateMetricsConfig(updatedMetrics)
132
+ this.#logger.info('Updated metrics config after ICC recovery')
133
+ } catch (err) {
134
+ this.#logger.error({ err }, 'Failed to update metrics config after ICC recovery')
135
+ }
136
+ }
137
+ }
138
+ }
139
+
97
140
  async updateSharedContext (context) {
98
141
  this.#sharedContext = context
99
142
  await this.runtime?.updateSharedContext?.({ context })
@@ -351,30 +394,61 @@ class Watt {
351
394
  !!this.#instanceConfig?.enableOpenTelemetry &&
352
395
  !!this.#instanceConfig?.iccServices?.riskEngine?.url
353
396
 
354
- // We need to always set an opentelemetry config to pass a telemetry
355
- // applicationName to render a taxonomy diagram
356
- config.telemetry = config.telemetry ?? {
357
- enabled: enableOpenTelemetry,
358
- applicationName: `${this.#applicationName}`,
359
- skip: [
360
- { method: 'GET', path: '/documentation' },
361
- { method: 'GET', path: '/documentation/json' }
362
- ],
363
- exporter: {
364
- type: 'otlp',
365
- options: {
366
- url:
367
- this.#instanceConfig?.iccServices?.riskEngine?.url + '/v1/traces',
368
- headers: {
369
- 'x-platformatic-application-id': this.#instanceConfig?.applicationId
370
- },
371
- keepAlive: true,
372
- httpAgentOptions: {
373
- rejectUnauthorized: false
374
- }
397
+ const iccExporter = {
398
+ type: 'otlp',
399
+ options: {
400
+ url: this.#instanceConfig?.iccServices?.riskEngine?.url + '/v1/traces',
401
+ headers: {
402
+ 'x-platformatic-application-id': this.#instanceConfig?.applicationId
403
+ },
404
+ keepAlive: true,
405
+ httpAgentOptions: {
406
+ rejectUnauthorized: false
375
407
  }
376
408
  }
377
409
  }
410
+
411
+ const defaultSkip = [
412
+ { method: 'GET', path: '/documentation' },
413
+ { method: 'GET', path: '/documentation/json' }
414
+ ]
415
+
416
+ // If user has no telemetry config, create default
417
+ if (!config.telemetry) {
418
+ config.telemetry = {
419
+ enabled: enableOpenTelemetry,
420
+ applicationName: `${this.#applicationName}`,
421
+ skip: defaultSkip,
422
+ exporter: iccExporter
423
+ }
424
+ return
425
+ }
426
+
427
+ // Merge with existing telemetry config
428
+ // Always set applicationName for taxonomy diagrams (overrides user's value)
429
+ config.telemetry.applicationName = `${this.#applicationName}`
430
+
431
+ // If ICC telemetry is enabled, add ICC exporter to user's exporters
432
+ if (enableOpenTelemetry) {
433
+ const userExporter = config.telemetry.exporter
434
+ if (!userExporter) {
435
+ // No user exporter, just use ICC
436
+ config.telemetry.exporter = iccExporter
437
+ } else if (Array.isArray(userExporter)) {
438
+ // User has array of exporters, add ICC to the list
439
+ config.telemetry.exporter = [...userExporter, iccExporter]
440
+ } else {
441
+ // User has single exporter, convert to array with both
442
+ config.telemetry.exporter = [userExporter, iccExporter]
443
+ }
444
+ }
445
+
446
+ // Merge skip patterns
447
+ if (config.telemetry.skip) {
448
+ config.telemetry.skip = [...config.telemetry.skip, ...defaultSkip]
449
+ } else {
450
+ config.telemetry.skip = defaultSkip
451
+ }
378
452
  }
379
453
 
380
454
  #configureHttpCaching (config) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platformatic/watt-extra",
3
- "version": "1.7.1-alpha.7",
3
+ "version": "1.8.1-alpha.1",
4
4
  "description": "The Platformatic runtime manager",
5
5
  "type": "module",
6
6
  "scripts": {
package/plugins/alerts.js CHANGED
@@ -1,9 +1,6 @@
1
1
  import { request } from 'undici'
2
2
 
3
3
  async function alerts (app, _opts) {
4
- const pauseEluThreshold = app.env.PLT_FLAMEGRAPHS_PAUSE_ELU_TRESHOLD
5
- const pauseTimeout = app.env.PLT_FLAMEGRAPHS_PAUSE_TIMEOUT
6
-
7
4
  const healthCache = [] // It's OK to have this in memory, this is per-pod.
8
5
  const podHealthWindow =
9
6
  app.instanceConfig?.scaler?.podHealthWindow || 60 * 1000
@@ -13,6 +10,10 @@ async function alerts (app, _opts) {
13
10
  const lastServicesAlertTime = {}
14
11
  const workerStartTimes = new Map() // Track per-worker start times for grace period
15
12
 
13
+ // Store listener references for cleanup
14
+ let workerStartedListener = null
15
+ let healthListener = null
16
+
16
17
  async function setupAlerts () {
17
18
  const scalerAlgorithmVersion = app.instanceConfig?.scaler?.version ?? 'v1'
18
19
  if (scalerAlgorithmVersion !== 'v1') {
@@ -40,17 +41,30 @@ async function alerts (app, _opts) {
40
41
  return
41
42
  }
42
43
 
44
+ const healthEventName = app.watt.runtimeSupportsNewHealthMetrics()
45
+ ? 'application:worker:health:metrics'
46
+ : 'application:worker:health'
47
+
48
+ // Remove old listeners if they exist (for ICC recovery scenario)
49
+ if (workerStartedListener) {
50
+ runtime.removeListener('application:worker:started', workerStartedListener)
51
+ }
52
+ if (healthListener) {
53
+ runtime.removeListener(healthEventName, healthListener)
54
+ }
55
+
43
56
  // Default start time for workers that started before the listener was registered
44
57
  const pluginStartTime = Date.now()
45
58
 
46
59
  // Listen for worker start events to track start times
47
- runtime.on('application:worker:started', (workerInfo) => {
60
+ workerStartedListener = (workerInfo) => {
48
61
  const workerId = workerInfo?.id
49
62
  if (workerId) {
50
63
  workerStartTimes.set(workerId, Date.now())
51
64
  app.log.debug({ workerId }, 'Worker started, tracking for grace period')
52
65
  }
53
- })
66
+ }
67
+ runtime.on('application:worker:started', workerStartedListener)
54
68
 
55
69
  const processHealthInfo = async (healthInfo) => {
56
70
  if (!healthInfo) {
@@ -64,11 +78,6 @@ async function alerts (app, _opts) {
64
78
  const healthWithTimestamp = { ...healthInfo, timestamp, service: serviceId }
65
79
  delete healthWithTimestamp.healthConfig // we don't need to store this
66
80
 
67
- const elu = healthInfo.currentHealth.elu
68
- if (elu >= pauseEluThreshold) {
69
- app.pauseProfiling({ serviceId, timeout: pauseTimeout })
70
- }
71
-
72
81
  healthCache.push(healthWithTimestamp)
73
82
 
74
83
  const cutoffTime = timestamp - podHealthWindow
@@ -144,14 +153,18 @@ async function alerts (app, _opts) {
144
153
 
145
154
  const alert = await body.json()
146
155
 
147
- app.requestFlamegraphs({ serviceIds: [serviceId], alertId: alert.id })
148
- .catch(err => app.log.error({ err }, 'Failed to send a flamegraph'))
156
+ app.sendFlamegraphs({
157
+ workerIds: [workerId],
158
+ alertId: alert.id
159
+ }).catch(err => {
160
+ app.log.error({ err }, 'Failed to send a flamegraph')
161
+ })
149
162
  }
150
163
  }
151
164
 
152
165
  if (app.watt.runtimeSupportsNewHealthMetrics()) {
153
166
  // Runtime >= 3.18.0: Listen to health:metrics
154
- runtime.on('application:worker:health:metrics', async (health) => {
167
+ healthListener = async (health) => {
155
168
  if (!health) {
156
169
  app.log.error('No health info received')
157
170
  return
@@ -181,11 +194,12 @@ async function alerts (app, _opts) {
181
194
  }
182
195
 
183
196
  await processHealthInfo(healthInfo)
184
- })
197
+ }
185
198
  } else {
186
199
  // Runtime < 3.18.0:
187
- runtime.on('application:worker:health', processHealthInfo)
200
+ healthListener = processHealthInfo
188
201
  }
202
+ runtime.on(healthEventName, healthListener)
189
203
  }
190
204
  app.setupAlerts = setupAlerts
191
205
  }
package/plugins/env.js CHANGED
@@ -19,10 +19,10 @@ const schema = {
19
19
  PLT_CACHE_CONFIG: { type: 'string' },
20
20
  PLT_DISABLE_FLAMEGRAPHS: { type: 'boolean', default: false },
21
21
  PLT_FLAMEGRAPHS_INTERVAL_SEC: { type: 'number', default: 60 },
22
+ PLT_FLAMEGRAPHS_ELU_THRESHOLD: { type: 'number', default: 0.4 },
22
23
  PLT_FLAMEGRAPHS_GRACE_PERIOD: { type: 'number', default: 3000 },
23
- PLT_FLAMEGRAPHS_PAUSE_ELU_TRESHOLD: { type: 'number', default: 0.95 },
24
- PLT_FLAMEGRAPHS_PAUSE_TIMEOUT: { type: 'number', default: 5 * 60 * 1000 },
25
- PLT_FLAMEGRAPHS_STATES_REFRESH_INTERVAL: { type: 'number', default: 10 * 1000 },
24
+ PLT_FLAMEGRAPHS_ATTEMPT_TIMEOUT: { type: 'number', default: 10000 },
25
+ PLT_FLAMEGRAPHS_CACHE_CLEANUP_INTERVAL: { type: 'number', default: 120000 },
26
26
  PLT_JWT_EXPIRATION_OFFSET_SEC: { type: 'number', default: 60 },
27
27
  PLT_UPDATES_RECONNECT_INTERVAL_SEC: { type: 'number', default: 1 },
28
28
  PLT_ELU_HEALTH_SIGNAL_THRESHOLD: { type: 'number', default: 0.8 },