npm - dd-trace - Versions diffs - 5.64.0 → 5.65.0 - Mend

dd-trace 5.64.0 → 5.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/packages/dd-trace/src/azure_metadata.js +5 -4
package/packages/dd-trace/src/datastreams/pathway.js +1 -1
package/packages/dd-trace/src/dogstatsd.js +17 -20
package/packages/dd-trace/src/runtime_metrics/runtime_metrics.js +175 -126
package/packages/dd-trace/src/supported-configurations.json +0 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dd-trace",
-  "version": "5.64.0",
+  "version": "5.65.0",
   "description": "Datadog APM tracing client for JavaScript",
   "main": "index.js",
   "typings": "index.d.ts",

package/packages/dd-trace/src/azure_metadata.js CHANGED Viewed

@@ -76,10 +76,11 @@ function buildMetadata () {
 }
 function getAzureAppMetadata () {
-  // DD_AZURE_APP_SERVICES is an environment variable introduced by the .NET APM team and is set automatically for
-  // anyone using the Datadog APM Extensions (.NET, Java, or Node) for Windows Azure App Services
-  // See: https://github.com/DataDog/datadog-aas-extension/blob/01f94b5c28b7fa7a9ab264ca28bd4e03be603900/node/src/applicationHost.xdt#L20-L21
-  if (getEnvironmentVariable('DD_AZURE_APP_SERVICES') !== undefined) {
+  // WEBSITE_SITE_NAME is the unique name of the website instance within Azure App Services. Its
+  // presence is used to determine if we are running in Azure App Service
+  // See equivalent in dd-trace-dotnet:
+  // https://github.com/DataDog/dd-trace-dotnet/blob/37030168b2996e549ba23231ae732874b53a37e6/tracer/src/Datadog.Trace/Util/EnvironmentHelpers.cs#L99-L155
+  if (getEnvironmentVariable('WEBSITE_SITE_NAME') !== undefined) {
     return buildMetadata()
   }
 }

package/packages/dd-trace/src/datastreams/pathway.js CHANGED Viewed

@@ -17,7 +17,7 @@ const CONTEXT_PROPAGATION_KEY_BASE64 = 'dd-pathway-ctx-base64'
 const logKeys = [CONTEXT_PROPAGATION_KEY, CONTEXT_PROPAGATION_KEY_BASE64]
 function shaHash (checkpointString) {
-  const hash = crypto.createHash('md5').update(checkpointString).digest('hex').slice(0, 16)
+  const hash = crypto.createHash('sha256').update(checkpointString).digest('hex').slice(0, 16)
   return Buffer.from(hash, 'hex')
 }

package/packages/dd-trace/src/dogstatsd.js CHANGED Viewed

@@ -156,23 +156,19 @@ class DogStatsDClient {
     return socket
   }
-  static generateClientConfig (config = {}) {
+  static generateClientConfig (config) {
     const tags = []
     if (config.tags) {
-      Object.keys(config.tags)
-        .filter(key => typeof config.tags[key] === 'string')
-        .filter(key => {
-          // Skip runtime-id unless enabled as cardinality may be too high
-          if (key !== 'runtime-id') return true
-          return config.runtimeMetricsRuntimeId
-        })
-        .forEach(key => {
+      for (const [key, value] of Object.entries(config.tags)) {
+        // Skip runtime-id unless enabled as cardinality may be too high
+        if (typeof value === 'string' && (key !== 'runtime-id' || config.runtimeMetricsRuntimeId)) {
           // https://docs.datadoghq.com/tagging/#defining-tags
-          const value = config.tags[key].replaceAll(/[^a-z0-9_:./-]/ig, '_')
+          const valueStripped = value.replaceAll(/[^a-z0-9_:./-]/ig, '_')
-          tags.push(`${key}:${value}`)
-        })
+          tags.push(`${key}:${valueStripped}`)
+        }
+      }
     }
     const clientConfig = {
@@ -216,7 +212,7 @@ class MetricsAggregationClient {
     this._histograms = new Map()
   }
-  // TODO: Aggerate with a histogram and send the buckets to the client.
+  // TODO: Aggregate with a histogram and send the buckets to the client.
   distribution (name, value, tags) {
     this._client.distribution(name, value, tags)
   }
@@ -352,9 +348,10 @@ class MetricsAggregationClient {
  * @implements {DogStatsD}
  */
 class CustomMetrics {
+  #client
   constructor (config) {
     const clientConfig = DogStatsDClient.generateClientConfig(config)
-    this._client = new MetricsAggregationClient(new DogStatsDClient(clientConfig))
+    this.#client = new MetricsAggregationClient(new DogStatsDClient(clientConfig))
     const flush = this.flush.bind(this)
@@ -365,27 +362,27 @@ class CustomMetrics {
   }
   increment (stat, value = 1, tags) {
-    this._client.increment(stat, value, CustomMetrics.tagTranslator(tags))
+    this.#client.increment(stat, value, CustomMetrics.tagTranslator(tags))
   }
   decrement (stat, value = 1, tags) {
-    this._client.decrement(stat, value, CustomMetrics.tagTranslator(tags))
+    this.#client.decrement(stat, value, CustomMetrics.tagTranslator(tags))
   }
   gauge (stat, value, tags) {
-    this._client.gauge(stat, value, CustomMetrics.tagTranslator(tags))
+    this.#client.gauge(stat, value, CustomMetrics.tagTranslator(tags))
   }
   distribution (stat, value, tags) {
-    this._client.distribution(stat, value, CustomMetrics.tagTranslator(tags))
+    this.#client.distribution(stat, value, CustomMetrics.tagTranslator(tags))
   }
   histogram (stat, value, tags) {
-    this._client.histogram(stat, value, CustomMetrics.tagTranslator(tags))
+    this.#client.histogram(stat, value, CustomMetrics.tagTranslator(tags))
   }
   flush () {
-    return this._client.flush()
+    return this.#client.flush()
   }
   /**

package/packages/dd-trace/src/runtime_metrics/runtime_metrics.js CHANGED Viewed

@@ -4,72 +4,90 @@
 const v8 = require('v8')
 const os = require('os')
+const process = require('process')
 const { DogStatsDClient, MetricsAggregationClient } = require('../dogstatsd')
 const log = require('../log')
-const { performance, PerformanceObserver } = require('perf_hooks')
+const { performance, PerformanceObserver, monitorEventLoopDelay } = require('perf_hooks')
 const { getEnvironmentVariable } = require('../config-helper')
-const { NODE_MAJOR, NODE_MINOR } = require('../../../../version')
+const { NODE_MAJOR } = require('../../../../version')
+// TODO: This environment variable may not be changed, since the agent expects a flush every ten seconds.
+// It is only a variable for testing. Think about alternatives.
 const DD_RUNTIME_METRICS_FLUSH_INTERVAL = getEnvironmentVariable('DD_RUNTIME_METRICS_FLUSH_INTERVAL') ?? '10000'
 const INTERVAL = Number.parseInt(DD_RUNTIME_METRICS_FLUSH_INTERVAL, 10)
-// Node >=16 has PerformanceObserver with `gc` type, but <16.7 had a critical bug.
-// See: https://github.com/nodejs/node/issues/39548
-const hasGCObserver = NODE_MAJOR >= 18 || (NODE_MAJOR === 16 && NODE_MINOR >= 7)
+const eventLoopDelayResolution = 4
 let nativeMetrics = null
 let gcObserver = null
-let interval
-let client
-let time
-let cpuUsage
-let elu
-reset()
-const runtimeMetrics = module.exports = {
+let interval = null
+let client = null
+let lastTime = 0n
+let lastCpuUsage = null
+let eventLoopDelayObserver = null
+// !!!!!!!!!!!
+//  IMPORTANT
+// !!!!!!!!!!!
+//
+// ALL metrics that relate to time are handled in nanoseconds in the backend.
+// https://github.com/DataDog/dogweb/blob/prod/integration/node/node_metadata.csv
+module.exports = {
   start (config) {
+    this.stop()
     const clientConfig = DogStatsDClient.generateClientConfig(config)
-    const watchers = []
-    if (config.runtimeMetrics.gc !== false) {
-      if (hasGCObserver) {
-        startGCObserver()
-      } else {
-        watchers.push('gc')
-      }
-    }
+    const trackEventLoop = config.runtimeMetrics.eventLoop !== false
+    const trackGc = config.runtimeMetrics.gc !== false
-    if (config.runtimeMetrics.eventLoop !== false) {
-      watchers.push('loop')
+    if (trackGc) {
+      startGCObserver()
     }
+    // Using no-gc prevents the native gc metrics from being tracked. Not
+    // passing any options means all metrics are tracked.
+    // TODO: This is a workaround. We should find a better solution.
+    const watchers = trackEventLoop ? ['loop'] : ['no-gc']
     try {
       nativeMetrics = require('@datadog/native-metrics')
       nativeMetrics.start(...watchers)
-    } catch (e) {
-      log.error('Error starting native metrics', e)
+    } catch (error) {
+      log.error('Error starting native metrics', error)
       nativeMetrics = null
     }
     client = new MetricsAggregationClient(new DogStatsDClient(clientConfig))
-    time = process.hrtime()
+    lastTime = performance.now()
     if (nativeMetrics) {
       interval = setInterval(() => {
-        captureCommonMetrics()
-        captureNativeMetrics()
+        captureNativeMetrics(trackEventLoop, trackGc)
+        captureCommonMetrics(trackEventLoop)
         client.flush()
       }, INTERVAL)
     } else {
-      cpuUsage = process.cpuUsage()
+      lastCpuUsage = process.cpuUsage()
+      if (trackEventLoop) {
+        eventLoopDelayObserver = monitorEventLoopDelay({ resolution: eventLoopDelayResolution })
+        eventLoopDelayObserver.enable()
+      }
       interval = setInterval(() => {
-        captureCommonMetrics()
         captureCpuUsage()
+        captureCommonMetrics(trackEventLoop)
         captureHeapSpace()
+        if (trackEventLoop) {
+          // Experimental: The Node.js implementation deviates from the native metrics.
+          // We normalize the metrics to the same format but the Node.js values
+          // are that way lower than they should be, while they are still nearer
+          // to the native ones that way.
+          // We use these only as fallback values.
+          captureEventLoopDelay()
+        }
         client.flush()
       }, INTERVAL)
     }
@@ -78,12 +96,21 @@ const runtimeMetrics = module.exports = {
   },
   stop () {
-    if (nativeMetrics) {
-      nativeMetrics.stop()
-    }
+    nativeMetrics?.stop()
+    nativeMetrics = null
     clearInterval(interval)
-    reset()
+    interval = null
+    client = null
+    lastTime = 0n
+    lastCpuUsage = null
+    gcObserver?.disconnect()
+    gcObserver = null
+    eventLoopDelayObserver?.disable()
+    eventLoopDelayObserver = null
   },
   track (span) {
@@ -99,19 +126,19 @@ const runtimeMetrics = module.exports = {
   },
   boolean (name, value, tag) {
-    client && client.boolean(name, value, tag)
+    client?.boolean(name, value, tag)
   },
   histogram (name, value, tag) {
-    client && client.histogram(name, value, tag)
+    client?.histogram(name, value, tag)
   },
   count (name, count, tag, monotonic = false) {
-    client && client.count(name, count, tag, monotonic)
+    client?.count(name, count, tag, monotonic)
   },
   gauge (name, value, tag) {
-    client && client.gauge(name, value, tag)
+    client?.gauge(name, value, tag)
   },
   increment (name, tag, monotonic) {
@@ -123,30 +150,20 @@ const runtimeMetrics = module.exports = {
   }
 }
-function reset () {
-  interval = null
-  client = null
-  time = null
-  cpuUsage = null
-  nativeMetrics = null
-  gcObserver && gcObserver.disconnect()
-  gcObserver = null
-}
 function captureCpuUsage () {
-  if (!process.cpuUsage) return
-  const elapsedTime = process.hrtime(time)
-  const elapsedUsage = process.cpuUsage(cpuUsage)
-  time = process.hrtime()
-  cpuUsage = process.cpuUsage()
-  const elapsedMs = elapsedTime[0] * 1000 + elapsedTime[1] / 1_000_000
-  const userPercent = 100 * elapsedUsage.user / 1000 / elapsedMs
-  const systemPercent = 100 * elapsedUsage.system / 1000 / elapsedMs
+  const currentCpuUsage = process.cpuUsage()
+  const elapsedUsageUser = currentCpuUsage.user - lastCpuUsage.user
+  const elapsedUsageSystem = currentCpuUsage.system - lastCpuUsage.system
+  const currentTime = performance.now() // Milliseconds with decimal places
+  const elapsedUsDividedBy100 = (currentTime - lastTime) * 10
+  const userPercent = elapsedUsageUser / elapsedUsDividedBy100
+  const systemPercent = elapsedUsageSystem / elapsedUsDividedBy100
   const totalPercent = userPercent + systemPercent
+  lastTime = currentTime
+  lastCpuUsage = currentCpuUsage
   client.gauge('runtime.node.cpu.system', systemPercent.toFixed(2))
   client.gauge('runtime.node.cpu.user', userPercent.toFixed(2))
   client.gauge('runtime.node.cpu.total', totalPercent.toFixed(2))
@@ -160,12 +177,44 @@ function captureMemoryUsage () {
   client.gauge('runtime.node.mem.rss', stats.rss)
   client.gauge('runtime.node.mem.total', os.totalmem())
   client.gauge('runtime.node.mem.free', os.freemem())
+  client.gauge('runtime.node.mem.external', stats.external)
+  // TODO: Add arrayBuffers to the metrics. That also requires the
+  // node_metadata.csv to be updated for the website.
+  //
+  // client.gauge('runtime.node.mem.arrayBuffers', stats.arrayBuffers)
+}
-  stats.external && client.gauge('runtime.node.mem.external', stats.external)
+function captureUptime () {
+  // WARNING: lastTime must be updated in the same interval before this function is called!
+  // This is a faster `process.uptime()`.
+  client.gauge('runtime.node.process.uptime', Math.round((lastTime + 499) / 1000))
 }
-function captureProcess () {
-  client.gauge('runtime.node.process.uptime', Math.round(process.uptime()))
+function captureEventLoopDelay () {
+  eventLoopDelayObserver.disable()
+  if (eventLoopDelayObserver.count !== 0) {
+    const minimum = eventLoopDelayResolution * 1e6
+    const avg = Math.max(eventLoopDelayObserver.mean - minimum, 0)
+    const sum = Math.round(avg * eventLoopDelayObserver.count)
+    if (sum !== 0) {
+      // Normalize the metrics to the same format as the native metrics.
+      const stats = {
+        min: Math.max(eventLoopDelayObserver.min - minimum, 0),
+        max: Math.max(eventLoopDelayObserver.max - minimum, 0),
+        sum,
+        total: sum,
+        avg,
+        count: eventLoopDelayObserver.count,
+        p95: Math.max(eventLoopDelayObserver.percentile(95) - minimum, 0)
+      }
+      histogram('runtime.node.event_loop.delay', stats)
+    }
+  }
+  eventLoopDelayObserver = monitorEventLoopDelay({ resolution: eventLoopDelayResolution })
+  eventLoopDelayObserver.enable()
 }
 function captureHeapStats () {
@@ -176,14 +225,17 @@ function captureHeapStats () {
   client.gauge('runtime.node.heap.total_physical_size', stats.total_physical_size)
   client.gauge('runtime.node.heap.total_available_size', stats.total_available_size)
   client.gauge('runtime.node.heap.heap_size_limit', stats.heap_size_limit)
-  stats.malloced_memory && client.gauge('runtime.node.heap.malloced_memory', stats.malloced_memory)
-  stats.peak_malloced_memory && client.gauge('runtime.node.heap.peak_malloced_memory', stats.peak_malloced_memory)
+  client.gauge('runtime.node.heap.malloced_memory', stats.malloced_memory)
+  client.gauge('runtime.node.heap.peak_malloced_memory', stats.peak_malloced_memory)
+  // TODO: Add number_of_native_contexts and number_of_detached_contexts to the
+  // metrics. Those metrics allow to identify memory leaks. Adding them also
+  // requires the node_metadata.csv to be updated for the website.
+  //
+  // client.gauge('runtime.node.heap.number_of_native_contexts', stats.number_of_native_contexts)
+  // client.gauge('runtime.node.heap.number_of_detached_contexts', stats.number_of_detached_contexts)
 }
 function captureHeapSpace () {
-  if (!v8.getHeapSpaceStatistics) return
   const stats = v8.getHeapSpaceStatistics()
   for (let i = 0, l = stats.length; i < l; i++) {
@@ -197,55 +249,63 @@ function captureHeapSpace () {
 }
 /**
- * Gathers and reports Event Loop Utilization (ELU) since last run
+ * Gathers and reports Event Loop Utilization (ELU) since last run, or from the
+ * start of the process on first run.
  *
  * ELU is a measure of how busy the event loop is, like running JavaScript or
  * waiting on *Sync functions. The value is between 0 (idle) and 1 (exhausted).
- *
- * performance.eventLoopUtilization available in Node.js >= v14.10, >= v12.19, >= v16
  */
-let captureELU = () => {}
-if ('eventLoopUtilization' in performance) {
-  captureELU = () => {
-    // if elu is undefined (first run) the measurement is from start of process
-    elu = performance.eventLoopUtilization(elu)
+let lastElu = { idle: 0, active: 0 }
+function captureELU () {
+  const elu = performance.eventLoopUtilization()
-    client.gauge('runtime.node.event_loop.utilization', elu.utilization)
-  }
+  const idle = elu.idle - lastElu.idle
+  const active = elu.active - lastElu.active
+  const utilization = active / (idle + active)
+  lastElu = elu
+  client.gauge('runtime.node.event_loop.utilization', utilization)
 }
-function captureCommonMetrics () {
+function captureCommonMetrics (trackEventLoop) {
   captureMemoryUsage()
-  captureProcess()
+  captureUptime()
   captureHeapStats()
-  captureELU()
+  if (trackEventLoop) {
+    captureELU()
+  }
 }
-function captureNativeMetrics () {
+function captureNativeMetrics (trackEventLoop, trackGc) {
   const stats = nativeMetrics.stats()
   const spaces = stats.heap.spaces
-  const elapsedTime = process.hrtime(time)
-  time = process.hrtime()
+  const currentTime = performance.now() // Milliseconds with decimal places
+  const elapsedUsDividedBy100 = (currentTime - lastTime) * 10
+  lastTime = currentTime
-  const elapsedUs = elapsedTime[0] * 1e6 + elapsedTime[1] / 1e3
-  const userPercent = 100 * stats.cpu.user / elapsedUs
-  const systemPercent = 100 * stats.cpu.system / elapsedUs
+  const userPercent = stats.cpu.user / elapsedUsDividedBy100
+  const systemPercent = stats.cpu.system / elapsedUsDividedBy100
   const totalPercent = userPercent + systemPercent
   client.gauge('runtime.node.cpu.system', systemPercent.toFixed(2))
   client.gauge('runtime.node.cpu.user', userPercent.toFixed(2))
   client.gauge('runtime.node.cpu.total', totalPercent.toFixed(2))
-  histogram('runtime.node.event_loop.delay', stats.eventLoop)
+  if (trackEventLoop) {
+    histogram('runtime.node.event_loop.delay', stats.eventLoop)
+  }
-  Object.keys(stats.gc).forEach(type => {
-    if (type === 'all') {
-      histogram('runtime.node.gc.pause', stats.gc[type])
-    } else {
-      histogram('runtime.node.gc.pause.by.type', stats.gc[type], `gc_type:${type}`)
+  if (trackGc) {
+    for (const [type, value] of Object.entries(stats.gc)) {
+      if (type === 'all') {
+        histogram('runtime.node.gc.pause', value)
+      } else {
+        histogram('runtime.node.gc.pause.by.type', value, `gc_type:${type}`)
+      }
     }
-  })
+  }
   for (let i = 0, l = spaces.length; i < l; i++) {
     const tag = `heap_space:${spaces[i].space_name}`
@@ -258,13 +318,19 @@ function captureNativeMetrics () {
 }
 function histogram (name, stats, tag) {
+  if (stats.count === 0) {
+    return
+  }
   client.gauge(`${name}.min`, stats.min, tag)
   client.gauge(`${name}.max`, stats.max, tag)
   client.increment(`${name}.sum`, stats.sum, tag)
   client.increment(`${name}.total`, stats.sum, tag)
   client.gauge(`${name}.avg`, stats.avg, tag)
   client.increment(`${name}.count`, stats.count, tag)
-  client.gauge(`${name}.median`, stats.median, tag)
+  if (stats.median !== undefined) {
+    // TODO: Consider adding the median to the Node.js histogram/adding stddev to native metrics.
+    client.gauge(`${name}.median`, stats.median, tag)
+  }
   client.gauge(`${name}.95percentile`, stats.p95, tag)
 }
@@ -276,42 +342,25 @@ function startGCObserver () {
       const type = gcType(entry.detail?.kind || entry.kind)
       const duration = entry.duration * 1_000_000
-      runtimeMetrics.histogram('runtime.node.gc.pause.by.type', duration, `gc_type:${type}`)
-      runtimeMetrics.histogram('runtime.node.gc.pause', duration)
+      // These are individual metrics for each type of GC.
+      client.histogram('runtime.node.gc.pause.by.type', duration, `gc_type:${type}`)
+      client.histogram('runtime.node.gc.pause', duration)
     }
   })
   gcObserver.observe({ type: 'gc' })
 }
+const minorGCType = NODE_MAJOR >= 22 ? 'minor_mark_sweep' : 'minor_mark_compact'
 function gcType (kind) {
-  if (NODE_MAJOR >= 22) {
-    switch (kind) {
-      case 1: return 'scavenge'
-      case 2: return 'minor_mark_sweep'
-      case 4: return 'mark_sweep_compact' // Deprecated, might be removed soon.
-      case 8: return 'incremental_marking'
-      case 16: return 'process_weak_callbacks'
-      case 31: return 'all'
-    }
-  } else if (NODE_MAJOR >= 18) {
-    switch (kind) {
-      case 1: return 'scavenge'
-      case 2: return 'minor_mark_compact'
-      case 4: return 'mark_sweep_compact'
-      case 8: return 'incremental_marking'
-      case 16: return 'process_weak_callbacks'
-      case 31: return 'all'
-    }
-  } else {
-    switch (kind) {
-      case 1: return 'scavenge'
-      case 2: return 'mark_sweep_compact'
-      case 4: return 'incremental_marking'
-      case 8: return 'process_weak_callbacks'
-      case 15: return 'all'
-    }
+  switch (kind) {
+    case 1: return 'scavenge'
+    case 2: return minorGCType
+    case 4: return 'mark_sweep_compact' // Deprecated, might be removed soon.
+    case 8: return 'incremental_marking'
+    case 16: return 'process_weak_callbacks'
+    case 31: return 'all'
+    default: return 'unknown'
   }
-  return 'unknown'
 }

package/packages/dd-trace/src/supported-configurations.json CHANGED Viewed

@@ -31,7 +31,6 @@
     "DD_APPSEC_STACK_TRACE_ENABLED": ["A"],
     "DD_APPSEC_TRACE_RATE_LIMIT": ["A"],
     "DD_APPSEC_WAF_TIMEOUT": ["A"],
-    "DD_AZURE_APP_SERVICES": ["A"],
     "DD_CIVISIBILITY_AGENTLESS_ENABLED": ["A"],
     "DD_CIVISIBILITY_AGENTLESS_URL": ["A"],
     "DD_CIVISIBILITY_AUTO_INSTRUMENTATION_PROVIDER": ["A"],