@sentienguard/apm 1.0.11 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.d.ts +7 -0
- package/src/index.js +9 -0
- package/src/traceSpanExporter.js +25 -1
- package/src/traceTransport.js +51 -0
- package/src/tracing.js +80 -4
package/package.json
CHANGED
package/src/index.d.ts
CHANGED
|
@@ -8,6 +8,7 @@ export interface ApmConfig {
|
|
|
8
8
|
service?: string;
|
|
9
9
|
environment?: string;
|
|
10
10
|
endpoint?: string;
|
|
11
|
+
tracesEndpoint?: string;
|
|
11
12
|
flushInterval?: number;
|
|
12
13
|
maxRoutes?: number;
|
|
13
14
|
maxPayloadSize?: number;
|
|
@@ -17,6 +18,12 @@ export interface ApmConfig {
|
|
|
17
18
|
enabled?: boolean;
|
|
18
19
|
traceLocalHttp?: boolean;
|
|
19
20
|
peerServiceMap?: Record<string, string>;
|
|
21
|
+
/** Export sampling for raw traces only (0..1). Metrics are not sampled. */
|
|
22
|
+
sampleRate?: number;
|
|
23
|
+
/** Drop-on-pressure queue size for raw span export */
|
|
24
|
+
maxQueueSize?: number;
|
|
25
|
+
/** Batch size for raw span export */
|
|
26
|
+
maxBatchSize?: number;
|
|
20
27
|
};
|
|
21
28
|
}
|
|
22
29
|
|
package/src/index.js
CHANGED
|
@@ -14,8 +14,12 @@
|
|
|
14
14
|
* SENTIENGUARD_SERVICE=my-api (required)
|
|
15
15
|
* SENTIENGUARD_ENV=production (optional, default: production)
|
|
16
16
|
* SENTIENGUARD_ENDPOINT=https://... (optional)
|
|
17
|
+
* SENTIENGUARD_TRACES_ENDPOINT=https://... (optional, raw span ingest; default derived from SENTIENGUARD_ENDPOINT)
|
|
17
18
|
* SENTIENGUARD_FLUSH_INTERVAL=10 (optional, seconds)
|
|
18
19
|
* SENTIENGUARD_TRACING=false (optional, disable OpenTelemetry / W3C propagation; use legacy HTTP patches)
|
|
20
|
+
* SENTIENGUARD_TRACE_SAMPLE_RATE=0.05 (optional, export sampling for raw traces only; metrics are not sampled)
|
|
21
|
+
* SENTIENGUARD_TRACE_MAX_QUEUE_SIZE=2048 (optional, drop-on-pressure queue size for raw spans)
|
|
22
|
+
* SENTIENGUARD_TRACE_MAX_BATCH_SIZE=256 (optional, batch size for raw span export)
|
|
19
23
|
* SENTIENGUARD_TRACE_LOCAL_HTTP=true (optional, record outgoing HTTP to localhost as dependencies; use with SENTIENGUARD_PEER_SERVICE_MAP)
|
|
20
24
|
* SENTIENGUARD_PEER_SERVICE_MAP=3001:service-b,3002:other (optional, port -> callee name for local peers)
|
|
21
25
|
*
|
|
@@ -33,6 +37,7 @@ import { instrumentMongoDB, autoInstrumentMongoDB, stopMongoDBInstrumentation }
|
|
|
33
37
|
import { instrumentOpenAI, stopOpenAIInstrumentation } from './openai.js';
|
|
34
38
|
import { createBreaker, wrapMongoOperation, getBreakerStats, shutdownBreakers } from './circuitBreaker.js';
|
|
35
39
|
import { startTracing, shutdownTracing, getActiveTraceId, isTracingActive } from './tracing.js';
|
|
40
|
+
import { flushTraceQueue } from './traceTransport.js';
|
|
36
41
|
|
|
37
42
|
let isInitialized = false;
|
|
38
43
|
|
|
@@ -100,6 +105,8 @@ function setupGracefulShutdown() {
|
|
|
100
105
|
|
|
101
106
|
// Final flush
|
|
102
107
|
await finalFlush();
|
|
108
|
+
// Best-effort flush of queued raw spans
|
|
109
|
+
await flushTraceQueue();
|
|
103
110
|
|
|
104
111
|
debug('Shutdown complete');
|
|
105
112
|
};
|
|
@@ -122,6 +129,8 @@ async function shutdown() {
|
|
|
122
129
|
debug('Shutting down SDK');
|
|
123
130
|
|
|
124
131
|
await shutdownTracing();
|
|
132
|
+
// After OTel shutdown, best-effort drain any serialized spans still queued in transport.
|
|
133
|
+
await flushTraceQueue();
|
|
125
134
|
|
|
126
135
|
// Stop MongoDB instrumentation
|
|
127
136
|
stopMongoDBInstrumentation();
|
package/src/traceSpanExporter.js
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import { ExportResultCode, hrTimeToMilliseconds } from '@opentelemetry/core';
|
|
9
9
|
import { SpanStatusCode } from '@opentelemetry/api';
|
|
10
10
|
import { enqueueSpans } from './traceTransport.js';
|
|
11
|
+
import { getConfig } from './config.js';
|
|
11
12
|
|
|
12
13
|
function hrTimeToUnixNanoString(hrTime) {
|
|
13
14
|
// hrTime is [seconds, nanoseconds]
|
|
@@ -35,6 +36,26 @@ function safeAttrs(attrs) {
|
|
|
35
36
|
return {};
|
|
36
37
|
}
|
|
37
38
|
|
|
39
|
+
function shouldSampleTraceId(traceId, sampleRate) {
|
|
40
|
+
if (sampleRate == null) return true;
|
|
41
|
+
const r = Number(sampleRate);
|
|
42
|
+
if (!Number.isFinite(r)) return true;
|
|
43
|
+
if (r <= 0) return false;
|
|
44
|
+
if (r >= 1) return true;
|
|
45
|
+
|
|
46
|
+
// Deterministic sampling based on trace_id (stable across services).
|
|
47
|
+
// Use the first 8 hex chars (32 bits) -> [0,1).
|
|
48
|
+
try {
|
|
49
|
+
const prefix = String(traceId).slice(0, 8);
|
|
50
|
+
if (!/^[0-9a-f]{8}$/i.test(prefix)) return Math.random() < r;
|
|
51
|
+
const n = parseInt(prefix, 16) >>> 0;
|
|
52
|
+
const p = n / 0x100000000; // 2^32
|
|
53
|
+
return p < r;
|
|
54
|
+
} catch {
|
|
55
|
+
return Math.random() < r;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
38
59
|
function serializeSpan(span) {
|
|
39
60
|
const ctx = span?.spanContext?.();
|
|
40
61
|
if (!ctx?.traceId || !ctx?.spanId) return null;
|
|
@@ -70,11 +91,14 @@ function serializeSpan(span) {
|
|
|
70
91
|
export class SentienGuardTraceSpanExporter {
|
|
71
92
|
export(spans, resultCallback) {
|
|
72
93
|
try {
|
|
94
|
+
const cfg = getConfig();
|
|
95
|
+
const rate = cfg?.tracing?.sampleRate;
|
|
96
|
+
|
|
73
97
|
const serialized = [];
|
|
74
98
|
for (const span of spans) {
|
|
75
99
|
try {
|
|
76
100
|
const s = serializeSpan(span);
|
|
77
|
-
if (s) serialized.push(s);
|
|
101
|
+
if (s && shouldSampleTraceId(s.trace_id, rate)) serialized.push(s);
|
|
78
102
|
} catch {
|
|
79
103
|
// ignore
|
|
80
104
|
}
|
package/src/traceTransport.js
CHANGED
|
@@ -14,8 +14,11 @@ import { debug, warn, getConfig, isEnabled } from './config.js';
|
|
|
14
14
|
let queue = [];
|
|
15
15
|
let scheduled = false;
|
|
16
16
|
let consecutiveFailures = 0;
|
|
17
|
+
let lastFailureAtMs = 0;
|
|
18
|
+
let recoveryTimer = null;
|
|
17
19
|
|
|
18
20
|
const MAX_CONSECUTIVE_FAILURES = 5;
|
|
21
|
+
const RECOVERY_BACKOFF_MS = 30_000;
|
|
19
22
|
|
|
20
23
|
function sendToBackend(payload) {
|
|
21
24
|
return new Promise((resolve, reject) => {
|
|
@@ -96,13 +99,29 @@ async function flushOnce(batch) {
|
|
|
96
99
|
try {
|
|
97
100
|
await sendToBackend(payload);
|
|
98
101
|
consecutiveFailures = 0;
|
|
102
|
+
lastFailureAtMs = 0;
|
|
99
103
|
debug(`Trace flush ok: spans=${batch.length}`);
|
|
100
104
|
} catch (err) {
|
|
101
105
|
consecutiveFailures++;
|
|
106
|
+
lastFailureAtMs = Date.now();
|
|
102
107
|
warn(`Trace flush failed (attempt ${consecutiveFailures}): ${err.message}`);
|
|
103
108
|
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
104
109
|
// Stop retrying aggressively; drop future spans until backend recovers.
|
|
105
110
|
warn('Trace flush: max failures reached; dropping spans under backpressure');
|
|
111
|
+
if (!recoveryTimer) {
|
|
112
|
+
recoveryTimer = setTimeout(() => {
|
|
113
|
+
recoveryTimer = null;
|
|
114
|
+
consecutiveFailures = 0;
|
|
115
|
+
lastFailureAtMs = 0;
|
|
116
|
+
debug('Trace flush: recovery window elapsed; retrying delivery');
|
|
117
|
+
if (queue.length && !scheduled) {
|
|
118
|
+
scheduled = true;
|
|
119
|
+
setImmediate(drainQueue);
|
|
120
|
+
}
|
|
121
|
+
}, RECOVERY_BACKOFF_MS);
|
|
122
|
+
// Allow process to exit naturally.
|
|
123
|
+
if (typeof recoveryTimer.unref === 'function') recoveryTimer.unref();
|
|
124
|
+
}
|
|
106
125
|
}
|
|
107
126
|
}
|
|
108
127
|
}
|
|
@@ -114,6 +133,8 @@ function drainQueue() {
|
|
|
114
133
|
|
|
115
134
|
// If backend is unhealthy, drop to protect app memory.
|
|
116
135
|
if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
|
136
|
+
// During the recovery backoff window, keep dropping to protect memory.
|
|
137
|
+
// After backoff, recoveryTimer resets failures and normal flushing resumes.
|
|
117
138
|
queue = [];
|
|
118
139
|
return;
|
|
119
140
|
}
|
|
@@ -155,5 +176,35 @@ export function resetTraceQueueForTests() {
|
|
|
155
176
|
queue = [];
|
|
156
177
|
scheduled = false;
|
|
157
178
|
consecutiveFailures = 0;
|
|
179
|
+
lastFailureAtMs = 0;
|
|
180
|
+
if (recoveryTimer) {
|
|
181
|
+
clearTimeout(recoveryTimer);
|
|
182
|
+
recoveryTimer = null;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Best-effort flush of queued spans (e.g. during shutdown).
|
|
188
|
+
* Never throws; safe to call even if tracing is disabled.
|
|
189
|
+
*/
|
|
190
|
+
export async function flushTraceQueue({ timeoutMs = 1500 } = {}) {
|
|
191
|
+
try {
|
|
192
|
+
if (!isEnabled()) return;
|
|
193
|
+
const deadline = Date.now() + Math.max(0, Number(timeoutMs) || 0);
|
|
194
|
+
const cfg = getConfig();
|
|
195
|
+
const maxBatch = cfg.tracing?.maxBatchSize || 256;
|
|
196
|
+
|
|
197
|
+
while (queue.length && Date.now() < deadline) {
|
|
198
|
+
const batch = queue.slice(0, maxBatch);
|
|
199
|
+
queue = queue.slice(batch.length);
|
|
200
|
+
// Attempt flush even if failures happened earlier; this is a single best-effort drain.
|
|
201
|
+
consecutiveFailures = 0;
|
|
202
|
+
lastFailureAtMs = 0;
|
|
203
|
+
// eslint-disable-next-line no-await-in-loop
|
|
204
|
+
await flushOnce(batch);
|
|
205
|
+
}
|
|
206
|
+
} catch {
|
|
207
|
+
// ignore
|
|
208
|
+
}
|
|
158
209
|
}
|
|
159
210
|
|
package/src/tracing.js
CHANGED
|
@@ -10,7 +10,7 @@ import { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_DEPLOYMENT_ENVIRONMENT } from '@o
|
|
|
10
10
|
import { W3CTraceContextPropagator } from '@opentelemetry/core';
|
|
11
11
|
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http';
|
|
12
12
|
import { ExpressInstrumentation } from '@opentelemetry/instrumentation-express';
|
|
13
|
-
import {
|
|
13
|
+
import { AlwaysOnSampler, BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
|
|
14
14
|
import { getConfig, debug } from './config.js';
|
|
15
15
|
import { SentienGuardSpanExporter } from './spanExporter.js';
|
|
16
16
|
import { SentienGuardTraceSpanExporter } from './traceSpanExporter.js';
|
|
@@ -18,6 +18,54 @@ import { SentienGuardTraceSpanExporter } from './traceSpanExporter.js';
|
|
|
18
18
|
let sdk = null;
|
|
19
19
|
let tracingActive = false;
|
|
20
20
|
|
|
21
|
+
function setAttr(span, key, value) {
|
|
22
|
+
try {
|
|
23
|
+
if (!span || typeof span.setAttribute !== 'function') return;
|
|
24
|
+
if (value == null) return;
|
|
25
|
+
const v = typeof value === 'string' ? value : String(value);
|
|
26
|
+
if (!v) return;
|
|
27
|
+
span.setAttribute(key, v);
|
|
28
|
+
} catch {
|
|
29
|
+
// ignore
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function setAttrNumber(span, key, value) {
|
|
34
|
+
try {
|
|
35
|
+
if (!span || typeof span.setAttribute !== 'function') return;
|
|
36
|
+
const n = Number(value);
|
|
37
|
+
if (!Number.isFinite(n)) return;
|
|
38
|
+
span.setAttribute(key, n);
|
|
39
|
+
} catch {
|
|
40
|
+
// ignore
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function safeUrlFromOutgoingRequest(requestOptions) {
|
|
45
|
+
try {
|
|
46
|
+
if (!requestOptions) return '';
|
|
47
|
+
if (typeof requestOptions === 'string') return requestOptions;
|
|
48
|
+
if (requestOptions instanceof URL) return requestOptions.toString();
|
|
49
|
+
const protocol = requestOptions.protocol || 'http:';
|
|
50
|
+
const hostRaw = requestOptions.hostname || requestOptions.host || '';
|
|
51
|
+
const host = String(hostRaw).trim();
|
|
52
|
+
const path = requestOptions.path || requestOptions.pathname || '/';
|
|
53
|
+
if (!host) return '';
|
|
54
|
+
return `${protocol}//${host}${path}`;
|
|
55
|
+
} catch {
|
|
56
|
+
return '';
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function safeHostFromIncoming(req) {
|
|
61
|
+
try {
|
|
62
|
+
const h = req?.headers?.host;
|
|
63
|
+
return h ? String(h).split(',')[0].trim() : '';
|
|
64
|
+
} catch {
|
|
65
|
+
return '';
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
21
69
|
function shouldIgnoreOutgoingHost(hostname) {
|
|
22
70
|
if (!hostname) return false;
|
|
23
71
|
const host = hostname.split(':')[0];
|
|
@@ -64,6 +112,34 @@ export function startTracing() {
|
|
|
64
112
|
ignoreOutgoingRequestHook: (requestOptions) => {
|
|
65
113
|
const host = requestOptions.hostname || (requestOptions.host ? String(requestOptions.host).split(':')[0] : '');
|
|
66
114
|
return shouldIgnoreOutgoingHost(host);
|
|
115
|
+
},
|
|
116
|
+
/**
|
|
117
|
+
* Ensure we always attach basic HTTP attributes.
|
|
118
|
+
* This protects us from cases where the underlying instrumentation
|
|
119
|
+
* doesn't populate attributes (common in some Node/Jest/undici combos).
|
|
120
|
+
*/
|
|
121
|
+
requestHook: (span, request) => {
|
|
122
|
+
// Incoming request (SERVER)
|
|
123
|
+
if (request && typeof request === 'object' && 'headers' in request && 'method' in request) {
|
|
124
|
+
setAttr(span, 'http.method', request.method);
|
|
125
|
+
setAttr(span, 'http.target', request.url);
|
|
126
|
+
setAttr(span, 'http.host', safeHostFromIncoming(request));
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Outgoing request (CLIENT) — requestOptions
|
|
131
|
+
setAttr(span, 'http.method', request?.method);
|
|
132
|
+
const url = safeUrlFromOutgoingRequest(request);
|
|
133
|
+
if (url) setAttr(span, 'http.url', url);
|
|
134
|
+
const host = request?.hostname || (request?.host ? String(request.host).split(':')[0] : '');
|
|
135
|
+
if (host) setAttr(span, 'net.peer.name', host);
|
|
136
|
+
if (request?.port) setAttrNumber(span, 'net.peer.port', request.port);
|
|
137
|
+
},
|
|
138
|
+
responseHook: (span, response) => {
|
|
139
|
+
// Incoming response (SERVER): ServerResponse
|
|
140
|
+
if (response && typeof response === 'object' && 'statusCode' in response) {
|
|
141
|
+
setAttrNumber(span, 'http.status_code', response.statusCode);
|
|
142
|
+
}
|
|
67
143
|
}
|
|
68
144
|
});
|
|
69
145
|
|
|
@@ -71,9 +147,9 @@ export function startTracing() {
|
|
|
71
147
|
|
|
72
148
|
sdk = new NodeSDK({
|
|
73
149
|
resource,
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
150
|
+
// Important: keep span recording ON so APM metrics derived from spans are not undersampled.
|
|
151
|
+
// Raw trace export sampling is handled inside SentienGuardTraceSpanExporter instead.
|
|
152
|
+
sampler: new AlwaysOnSampler(),
|
|
77
153
|
textMapPropagator: new W3CTraceContextPropagator(),
|
|
78
154
|
instrumentations: [httpInstrumentation, expressInstrumentation],
|
|
79
155
|
spanProcessors: [
|