@decocms/start 6.0.0 → 6.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATION_TOOLING_PLAN.md +9 -0
- package/docs/observability.md +20 -10
- package/package.json +1 -1
- package/scripts/generate-invoke.test.ts +83 -56
- package/scripts/generate-invoke.ts +26 -10
- package/src/middleware/observability.test.ts +237 -0
- package/src/middleware/observability.ts +165 -8
- package/src/sdk/cachedLoader.ts +10 -7
- package/src/sdk/logger.test.ts +99 -0
- package/src/sdk/logger.ts +18 -7
- package/src/sdk/observability.ts +18 -0
- package/src/sdk/otel.ts +228 -38
- package/src/sdk/otelHttpTracer.test.ts +422 -0
- package/src/sdk/otelHttpTracer.ts +489 -0
- package/src/sdk/requestContext.ts +46 -0
- package/src/sdk/workerEntry.ts +138 -17
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OTLP/HTTP JSON trace exporter — direct POST from a Cloudflare Worker
|
|
3
|
+
* to `deco-otel-ingest` `/v1/traces`.
|
|
4
|
+
*
|
|
5
|
+
* Mirrors `otelHttpMeter.ts` in shape: per-isolate buffer, flush via
|
|
6
|
+
* `ctx.waitUntil` at request end, cooldown + buffer cap.
|
|
7
|
+
*
|
|
8
|
+
* **Why direct POST instead of CF Destinations + `@opentelemetry/api` bridge.**
|
|
9
|
+
* The bridge tracer in `otel.ts` delegates to `trace.getTracer(...)`. Without
|
|
10
|
+
* a registered `TracerProvider`, that's a no-op proxy and every framework
|
|
11
|
+
* `deco.*` span we create silently disappears. Empirical verification (May
|
|
12
|
+
* 2026) on prod sites confirmed: zero `deco.*` rows in `otel_traces` despite
|
|
13
|
+
* `withTracing` being called on every request. This adapter ships those
|
|
14
|
+
* spans direct-POST, same channel as metrics + error logs.
|
|
15
|
+
*
|
|
16
|
+
* **Sampling.** Consistent per-trace decision via FNV-1a hash of the
|
|
17
|
+
* trace-id. Caller passes `headSamplingRate` (default 0.01 = 1%) to match
|
|
18
|
+
* the CF Destinations `traces.head_sampling_rate` default and keep volume
|
|
19
|
+
* within budget. Parent-based override: if an incoming W3C `traceparent`
|
|
20
|
+
* header carried `flags=01` (sampled), every span in that trace is
|
|
21
|
+
* exported regardless of the rate.
|
|
22
|
+
*
|
|
23
|
+
* **Buffer.** Spans are buffered per-isolate until `flush()` ships them.
|
|
24
|
+
* Unlike metrics (CUMULATIVE temporality, lifelong buffer), traces are
|
|
25
|
+
* one-shot per span: buffer drains on flush and starts empty for the next
|
|
26
|
+
* window. Cap (`maxBufferSpans`) protects against runaway trace volume.
|
|
27
|
+
*
|
|
28
|
+
* **Parent linkage.** `startSpan` reads the active span from a caller-
|
|
29
|
+
* supplied accessor (`getActiveSpanForParent`) — the framework wires this
|
|
30
|
+
* to `getActiveSpan` from `middleware/observability.ts` so child spans
|
|
31
|
+
* inherit `trace_id` + record `parent_span_id` automatically. Root spans
|
|
32
|
+
* (no active parent) consult `getRequestTraceContext` to pick up the
|
|
33
|
+
* incoming W3C traceparent, or generate a fresh trace.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import type { Span, TracerAdapter } from "../middleware/observability";
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// W3C traceparent parsing
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Trace context lifted from an inbound W3C `traceparent` header (RFC
|
|
44
|
+
* tracecontext, `version-traceId-parentId-flags`). `remoteParent: true`
|
|
45
|
+
* indicates the parent span lives in another service.
|
|
46
|
+
*/
|
|
47
|
+
export interface TraceContext {
|
|
48
|
+
traceId: string;
|
|
49
|
+
parentSpanId: string;
|
|
50
|
+
/** Parent's `traceFlags & 0x01` — the W3C "sampled" bit. */
|
|
51
|
+
sampled: boolean;
|
|
52
|
+
/** True when the context came from an inbound header (remote parent). */
|
|
53
|
+
remoteParent: boolean;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Parse a W3C `traceparent` value. Returns `null` on any structural
|
|
58
|
+
* violation, including the well-known all-zero IDs (which OTel treats
|
|
59
|
+
* as invalid — see W3C tracecontext §3.2.2).
|
|
60
|
+
*/
|
|
61
|
+
export function parseTraceparent(value: string | null | undefined): TraceContext | null {
|
|
62
|
+
if (!value) return null;
|
|
63
|
+
const parts = value.split("-");
|
|
64
|
+
if (parts.length !== 4) return null;
|
|
65
|
+
const [version, traceId, parentId, flags] = parts;
|
|
66
|
+
if (version !== "00") return null;
|
|
67
|
+
if (!/^[0-9a-f]{32}$/.test(traceId)) return null;
|
|
68
|
+
if (traceId === "0".repeat(32)) return null;
|
|
69
|
+
if (!/^[0-9a-f]{16}$/.test(parentId)) return null;
|
|
70
|
+
if (parentId === "0".repeat(16)) return null;
|
|
71
|
+
if (!/^[0-9a-f]{2}$/.test(flags)) return null;
|
|
72
|
+
const sampled = (Number.parseInt(flags, 16) & 0x01) === 0x01;
|
|
73
|
+
return { traceId, parentSpanId: parentId, sampled, remoteParent: true };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// ID generation + sampling
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
function randomHex(bytes: number): string {
|
|
81
|
+
if (typeof crypto !== "undefined" && crypto.getRandomValues) {
|
|
82
|
+
const buf = new Uint8Array(bytes);
|
|
83
|
+
crypto.getRandomValues(buf);
|
|
84
|
+
let out = "";
|
|
85
|
+
for (let i = 0; i < buf.length; i++) {
|
|
86
|
+
out += buf[i].toString(16).padStart(2, "0");
|
|
87
|
+
}
|
|
88
|
+
return out;
|
|
89
|
+
}
|
|
90
|
+
// Fallback for runtimes without crypto.getRandomValues (tests, some
|
|
91
|
+
// sandboxes). Lower-entropy but the function still satisfies the
|
|
92
|
+
// length contract — collisions in this branch are operational
|
|
93
|
+
// breadcrumbs, not a security threat.
|
|
94
|
+
let out = "";
|
|
95
|
+
while (out.length < bytes * 2) {
|
|
96
|
+
out += Math.floor(Math.random() * 0xffff)
|
|
97
|
+
.toString(16)
|
|
98
|
+
.padStart(4, "0");
|
|
99
|
+
}
|
|
100
|
+
return out.slice(0, bytes * 2);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function newTraceId(): string {
|
|
104
|
+
return randomHex(16);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function newSpanId(): string {
|
|
108
|
+
return randomHex(8);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* FNV-1a 32-bit hash over the trace ID (hex string). Cheap, dependency-
|
|
113
|
+
* free, and produces a uniform distribution over 32-bit unsigned ints —
|
|
114
|
+
* good enough for consistent head sampling.
|
|
115
|
+
*/
|
|
116
|
+
function hashTraceId(traceId: string): number {
|
|
117
|
+
let h = 2166136261;
|
|
118
|
+
for (let i = 0; i < traceId.length; i++) {
|
|
119
|
+
h ^= traceId.charCodeAt(i);
|
|
120
|
+
h = Math.imul(h, 16777619);
|
|
121
|
+
}
|
|
122
|
+
return h >>> 0;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Consistent head-sampling decision: every span in a trace gets the same
|
|
127
|
+
* answer because the input is `traceId`, not `spanId`. Caller is
|
|
128
|
+
* responsible for parent-based overrides (see `shouldExportSpan`).
|
|
129
|
+
*/
|
|
130
|
+
export function shouldSampleTrace(traceId: string, rate: number): boolean {
|
|
131
|
+
if (!Number.isFinite(rate) || rate >= 1) return true;
|
|
132
|
+
if (rate <= 0) return false;
|
|
133
|
+
return hashTraceId(traceId) / 0xffffffff < rate;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ---------------------------------------------------------------------------
|
|
137
|
+
// Types
|
|
138
|
+
// ---------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
type Labels = Record<string, string | number | boolean>;
|
|
141
|
+
|
|
142
|
+
/** OTel `SpanKind` enum. We only ever emit INTERNAL spans. */
|
|
143
|
+
const SPAN_KIND_INTERNAL = 1;
|
|
144
|
+
/** OTel `StatusCode.OK` = 1, `ERROR` = 2 (and `UNSET` = 0). */
|
|
145
|
+
const STATUS_OK = 1;
|
|
146
|
+
const STATUS_ERROR = 2;
|
|
147
|
+
|
|
148
|
+
interface SpanEvent {
|
|
149
|
+
name: string;
|
|
150
|
+
timeUnixNano: string;
|
|
151
|
+
attributes: Labels;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
interface SpanRecord {
|
|
155
|
+
name: string;
|
|
156
|
+
traceId: string;
|
|
157
|
+
spanId: string;
|
|
158
|
+
parentSpanId: string;
|
|
159
|
+
startTimeUnixNano: string;
|
|
160
|
+
endTimeUnixNano: string;
|
|
161
|
+
attributes: Labels;
|
|
162
|
+
events: SpanEvent[];
|
|
163
|
+
status: { code: number; message: string };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export interface OtlpHttpTracerOptions {
|
|
167
|
+
/** Full OTLP/HTTP JSON traces endpoint, e.g. `https://.../v1/traces`. */
|
|
168
|
+
endpoint: string;
|
|
169
|
+
/** Resource attributes stamped on every OTLP payload (service.name etc.). */
|
|
170
|
+
resourceAttributes: Record<string, string>;
|
|
171
|
+
/** Scope name advertised in `scopeSpans[].scope.name`. */
|
|
172
|
+
scopeName?: string;
|
|
173
|
+
/** Scope version. */
|
|
174
|
+
scopeVersion?: string;
|
|
175
|
+
/**
|
|
176
|
+
* Head sampling rate, 0.0..1.0. Default 0.01 to match the
|
|
177
|
+
* CF Destinations `traces.head_sampling_rate` recommendation. Set to
|
|
178
|
+
* 1 to capture every trace; set to 0 to disable export entirely.
|
|
179
|
+
*
|
|
180
|
+
* Decisions are consistent per trace (FNV-1a hash of the trace ID),
|
|
181
|
+
* so a `deco.cms.resolvePage` child span is sampled iff the
|
|
182
|
+
* `deco.http.request` root for the same trace is sampled.
|
|
183
|
+
*/
|
|
184
|
+
headSamplingRate?: number;
|
|
185
|
+
/** Hard cap on pending spans. Default: 2000. */
|
|
186
|
+
maxBufferSpans?: number;
|
|
187
|
+
/** Cooldown between successful flushes (ms). Default: 5000. */
|
|
188
|
+
minFlushIntervalMs?: number;
|
|
189
|
+
/** Per-flush HTTP timeout (ms). Default: 5000. */
|
|
190
|
+
flushTimeoutMs?: number;
|
|
191
|
+
/**
|
|
192
|
+
* Test seam — override `fetch` for the flush path. Same role as in
|
|
193
|
+
* `otelHttpMeter.ts`.
|
|
194
|
+
*/
|
|
195
|
+
fetchImpl?: typeof fetch;
|
|
196
|
+
/** Test seam — override `Date.now()` for deterministic timestamps. */
|
|
197
|
+
nowMs?: () => number;
|
|
198
|
+
/** Optional sink for transport errors. */
|
|
199
|
+
onError?: (kind: "flush" | "overflow", err: unknown) => void;
|
|
200
|
+
/**
|
|
201
|
+
* Accessor for the currently-active span at the moment `startSpan` is
|
|
202
|
+
* called. The framework wires this to `getActiveSpan()` from
|
|
203
|
+
* `middleware/observability.ts` so child spans inherit `trace_id` and
|
|
204
|
+
* record `parent_span_id`. Returns `null` for root spans.
|
|
205
|
+
*/
|
|
206
|
+
getActiveSpanForParent: () => Span | null;
|
|
207
|
+
/**
|
|
208
|
+
* Accessor for the per-request trace context (parsed from inbound
|
|
209
|
+
* `traceparent`). Read at root-span creation time so we honor remote
|
|
210
|
+
* parents and the `sampled` flag.
|
|
211
|
+
*/
|
|
212
|
+
getRequestTraceContext?: () => TraceContext | null;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export interface OtlpHttpTracer extends TracerAdapter {
|
|
216
|
+
/** Drain the buffer (subject to cooldown). */
|
|
217
|
+
flush(): Promise<void>;
|
|
218
|
+
/** For tests + the audit channel. */
|
|
219
|
+
pendingSpanCount(): number;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ---------------------------------------------------------------------------
|
|
223
|
+
// Factory
|
|
224
|
+
// ---------------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
export function createOtlpHttpTracerAdapter(options: OtlpHttpTracerOptions): OtlpHttpTracer {
|
|
227
|
+
const endpoint = options.endpoint;
|
|
228
|
+
const resourceAttributes = options.resourceAttributes;
|
|
229
|
+
const scopeName = options.scopeName ?? "@decocms/start";
|
|
230
|
+
const scopeVersion = options.scopeVersion ?? "";
|
|
231
|
+
const headSamplingRate = options.headSamplingRate ?? 0.01;
|
|
232
|
+
const maxBuffer = options.maxBufferSpans ?? 2000;
|
|
233
|
+
const minFlushIntervalMs = options.minFlushIntervalMs ?? 5000;
|
|
234
|
+
const flushTimeoutMs = options.flushTimeoutMs ?? 5000;
|
|
235
|
+
const fetchImpl = options.fetchImpl ?? fetch;
|
|
236
|
+
const now = options.nowMs ?? (() => Date.now());
|
|
237
|
+
const onError = options.onError;
|
|
238
|
+
const getActiveSpanForParent = options.getActiveSpanForParent;
|
|
239
|
+
const getRequestTraceContext = options.getRequestTraceContext;
|
|
240
|
+
|
|
241
|
+
// Buffer of completed spans waiting to ship. Sampling decision is taken
|
|
242
|
+
// at span-end (not span-start) so attribute mutations during the span
|
|
243
|
+
// lifetime are captured in the record we drop or keep.
|
|
244
|
+
const spans: SpanRecord[] = [];
|
|
245
|
+
let lastFlushAt = 0;
|
|
246
|
+
let inflight: Promise<void> | null = null;
|
|
247
|
+
|
|
248
|
+
function pendingSpanCount(): number {
|
|
249
|
+
return spans.length;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Decide whether to export a finished span. Honors:
|
|
254
|
+
* 1. Remote parent's `sampled` flag (always wins — joins external traces).
|
|
255
|
+
* 2. Trace-ID hash vs `headSamplingRate` (consistent per trace).
|
|
256
|
+
*/
|
|
257
|
+
function shouldExportSpan(traceId: string, remoteSampled: boolean | null): boolean {
|
|
258
|
+
if (remoteSampled === true) return true;
|
|
259
|
+
return shouldSampleTrace(traceId, headSamplingRate);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function startSpan(name: string, attributes?: Labels): Span {
|
|
263
|
+
const parent = getActiveSpanForParent();
|
|
264
|
+
const parentCtx = parent?.spanContext?.();
|
|
265
|
+
|
|
266
|
+
// Inherit trace ID from the parent (in-process or remote). Only root
|
|
267
|
+
// spans with no parent context generate a fresh trace ID.
|
|
268
|
+
const remoteCtx = parentCtx ? null : (getRequestTraceContext?.() ?? null);
|
|
269
|
+
const traceId = parentCtx?.traceId ?? remoteCtx?.traceId ?? newTraceId();
|
|
270
|
+
const parentSpanId = parentCtx?.spanId ?? remoteCtx?.parentSpanId ?? "";
|
|
271
|
+
// traceFlags propagated unchanged — if the inbound traceparent said
|
|
272
|
+
// sampled, downstream services that join via our `traceparent` header
|
|
273
|
+
// (see `injectTraceContext`) see the same flag.
|
|
274
|
+
const traceFlags = remoteCtx?.sampled ? 0x01 : (parentCtx?.traceFlags ?? 0x00);
|
|
275
|
+
const remoteSampled = remoteCtx?.sampled ?? null;
|
|
276
|
+
|
|
277
|
+
const spanId = newSpanId();
|
|
278
|
+
const startTimeNs = msToNs(now());
|
|
279
|
+
const record: SpanRecord = {
|
|
280
|
+
name,
|
|
281
|
+
traceId,
|
|
282
|
+
spanId,
|
|
283
|
+
parentSpanId,
|
|
284
|
+
startTimeUnixNano: startTimeNs,
|
|
285
|
+
endTimeUnixNano: startTimeNs, // overwritten on end()
|
|
286
|
+
attributes: { ...(attributes ?? {}) },
|
|
287
|
+
events: [],
|
|
288
|
+
status: { code: 0, message: "" },
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
let ended = false;
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
end(): void {
|
|
295
|
+
if (ended) return;
|
|
296
|
+
ended = true;
|
|
297
|
+
record.endTimeUnixNano = msToNs(now());
|
|
298
|
+
|
|
299
|
+
// Sample at the END so attribute mutations during the span are
|
|
300
|
+
// captured in the kept record. The decision is consistent across
|
|
301
|
+
// every span in the trace because it hashes `traceId`.
|
|
302
|
+
if (!shouldExportSpan(traceId, remoteSampled)) return;
|
|
303
|
+
|
|
304
|
+
if (spans.length >= maxBuffer) {
|
|
305
|
+
onError?.("overflow", new Error(`trace buffer at cap (${maxBuffer}) — dropping span`));
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
spans.push(record);
|
|
309
|
+
},
|
|
310
|
+
setError(error: unknown): void {
|
|
311
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
312
|
+
record.status = { code: STATUS_ERROR, message };
|
|
313
|
+
record.attributes["exception.type"] =
|
|
314
|
+
error instanceof Error ? error.constructor.name : "unknown";
|
|
315
|
+
record.attributes["exception.message"] = message;
|
|
316
|
+
if (error instanceof Error && error.stack) {
|
|
317
|
+
record.attributes["exception.stacktrace"] = error.stack;
|
|
318
|
+
}
|
|
319
|
+
record.events.push({
|
|
320
|
+
name: "exception",
|
|
321
|
+
timeUnixNano: msToNs(now()),
|
|
322
|
+
attributes: {
|
|
323
|
+
"exception.type":
|
|
324
|
+
error instanceof Error ? error.constructor.name : "unknown",
|
|
325
|
+
"exception.message": message,
|
|
326
|
+
...(error instanceof Error && error.stack
|
|
327
|
+
? { "exception.stacktrace": error.stack }
|
|
328
|
+
: {}),
|
|
329
|
+
},
|
|
330
|
+
});
|
|
331
|
+
},
|
|
332
|
+
setAttribute(key: string, value: string | number | boolean): void {
|
|
333
|
+
record.attributes[key] = value;
|
|
334
|
+
// Status promotion: setting an OK-ish HTTP status_code transitions
|
|
335
|
+
// an UNSET span to OK so dashboards see "succeeded" explicitly.
|
|
336
|
+
if (key === "http.status_code" && typeof value === "number" && value < 400 && record.status.code === 0) {
|
|
337
|
+
record.status = { code: STATUS_OK, message: "" };
|
|
338
|
+
}
|
|
339
|
+
},
|
|
340
|
+
spanContext(): { traceId: string; spanId: string; traceFlags: number } {
|
|
341
|
+
return { traceId, spanId, traceFlags };
|
|
342
|
+
},
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
async function doFlush(): Promise<void> {
|
|
347
|
+
if (spans.length === 0) return;
|
|
348
|
+
|
|
349
|
+
// Snapshot + reset buffer before the network call so concurrent
|
|
350
|
+
// span ends during the POST land in the next window.
|
|
351
|
+
const batch = spans.splice(0, spans.length);
|
|
352
|
+
|
|
353
|
+
const payload = serializeOtlpTraces(batch, {
|
|
354
|
+
resourceAttributes,
|
|
355
|
+
scopeName,
|
|
356
|
+
scopeVersion,
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
const controller = new AbortController();
|
|
360
|
+
const timer = setTimeout(() => controller.abort(), flushTimeoutMs);
|
|
361
|
+
try {
|
|
362
|
+
const res = await fetchImpl(endpoint, {
|
|
363
|
+
method: "POST",
|
|
364
|
+
headers: { "Content-Type": "application/json" },
|
|
365
|
+
body: JSON.stringify(payload),
|
|
366
|
+
signal: controller.signal,
|
|
367
|
+
});
|
|
368
|
+
if (!res.ok) {
|
|
369
|
+
try {
|
|
370
|
+
await res.text();
|
|
371
|
+
} catch {
|
|
372
|
+
/* swallow */
|
|
373
|
+
}
|
|
374
|
+
onError?.("flush", new Error(`POST ${endpoint} → ${res.status}`));
|
|
375
|
+
}
|
|
376
|
+
} catch (err) {
|
|
377
|
+
onError?.("flush", err);
|
|
378
|
+
} finally {
|
|
379
|
+
clearTimeout(timer);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
async function flush(): Promise<void> {
|
|
384
|
+
if (inflight) return inflight;
|
|
385
|
+
const elapsed = now() - lastFlushAt;
|
|
386
|
+
const overCap = spans.length >= maxBuffer;
|
|
387
|
+
if (!overCap && elapsed < minFlushIntervalMs) return;
|
|
388
|
+
inflight = doFlush().finally(() => {
|
|
389
|
+
lastFlushAt = now();
|
|
390
|
+
inflight = null;
|
|
391
|
+
});
|
|
392
|
+
return inflight;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return {
|
|
396
|
+
startSpan,
|
|
397
|
+
flush,
|
|
398
|
+
pendingSpanCount,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// ---------------------------------------------------------------------------
|
|
403
|
+
// OTLP/HTTP JSON serialization for traces
|
|
404
|
+
// ---------------------------------------------------------------------------
|
|
405
|
+
|
|
406
|
+
function msToNs(ms: number): string {
|
|
407
|
+
return `${Math.floor(ms)}000000`;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function attrsToOtlp(attrs: Labels): Array<{
|
|
411
|
+
key: string;
|
|
412
|
+
value:
|
|
413
|
+
| { stringValue: string }
|
|
414
|
+
| { intValue: string }
|
|
415
|
+
| { doubleValue: number }
|
|
416
|
+
| { boolValue: boolean };
|
|
417
|
+
}> {
|
|
418
|
+
const out: ReturnType<typeof attrsToOtlp> = [];
|
|
419
|
+
for (const k of Object.keys(attrs).sort()) {
|
|
420
|
+
const v = attrs[k];
|
|
421
|
+
if (v === undefined || v === null) continue;
|
|
422
|
+
if (typeof v === "string") out.push({ key: k, value: { stringValue: v } });
|
|
423
|
+
else if (typeof v === "boolean") out.push({ key: k, value: { boolValue: v } });
|
|
424
|
+
else if (Number.isInteger(v)) out.push({ key: k, value: { intValue: String(v) } });
|
|
425
|
+
else out.push({ key: k, value: { doubleValue: v } });
|
|
426
|
+
}
|
|
427
|
+
return out;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
interface SerializeOpts {
|
|
431
|
+
resourceAttributes: Record<string, string>;
|
|
432
|
+
scopeName: string;
|
|
433
|
+
scopeVersion: string;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
function serializeOtlpTraces(
|
|
437
|
+
batch: SpanRecord[],
|
|
438
|
+
opts: SerializeOpts,
|
|
439
|
+
): { resourceSpans: unknown[] } {
|
|
440
|
+
const otlpSpans = batch.map((s) => ({
|
|
441
|
+
traceId: s.traceId,
|
|
442
|
+
spanId: s.spanId,
|
|
443
|
+
parentSpanId: s.parentSpanId,
|
|
444
|
+
name: s.name,
|
|
445
|
+
kind: SPAN_KIND_INTERNAL,
|
|
446
|
+
startTimeUnixNano: s.startTimeUnixNano,
|
|
447
|
+
endTimeUnixNano: s.endTimeUnixNano,
|
|
448
|
+
attributes: attrsToOtlp(s.attributes),
|
|
449
|
+
status: {
|
|
450
|
+
code: s.status.code,
|
|
451
|
+
...(s.status.message ? { message: s.status.message } : {}),
|
|
452
|
+
},
|
|
453
|
+
events: s.events.map((e) => ({
|
|
454
|
+
timeUnixNano: e.timeUnixNano,
|
|
455
|
+
name: e.name,
|
|
456
|
+
attributes: attrsToOtlp(e.attributes),
|
|
457
|
+
})),
|
|
458
|
+
}));
|
|
459
|
+
|
|
460
|
+
const resourceAttrs: Array<{ key: string; value: { stringValue: string } }> = [];
|
|
461
|
+
for (const k of Object.keys(opts.resourceAttributes).sort()) {
|
|
462
|
+
resourceAttrs.push({ key: k, value: { stringValue: opts.resourceAttributes[k] } });
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
resourceSpans: [
|
|
467
|
+
{
|
|
468
|
+
resource: { attributes: resourceAttrs },
|
|
469
|
+
scopeSpans: [
|
|
470
|
+
{
|
|
471
|
+
scope: { name: opts.scopeName, version: opts.scopeVersion },
|
|
472
|
+
spans: otlpSpans,
|
|
473
|
+
},
|
|
474
|
+
],
|
|
475
|
+
},
|
|
476
|
+
],
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Test seam: internals exposed for unit tests only. Never use from app
|
|
482
|
+
* code — the surface is unstable and might change between minor releases.
|
|
483
|
+
*/
|
|
484
|
+
export const _internals = {
|
|
485
|
+
parseTraceparent,
|
|
486
|
+
shouldSampleTrace,
|
|
487
|
+
newTraceId,
|
|
488
|
+
newSpanId,
|
|
489
|
+
};
|
|
@@ -38,6 +38,22 @@ export interface RequestContextData {
|
|
|
38
38
|
request: Request;
|
|
39
39
|
signal: AbortSignal;
|
|
40
40
|
startedAt: number;
|
|
41
|
+
/**
|
|
42
|
+
* Stable per-request identifier. Generated by `workerEntry.ts` from
|
|
43
|
+
* `cf-ray` when present, falling back to `crypto.randomUUID()`. Echoed
|
|
44
|
+
* back to the client as the `X-Request-Id` response header so:
|
|
45
|
+
*
|
|
46
|
+
* - The producer Worker can stamp it on every log / span / metric
|
|
47
|
+
* attribute (`request.id`) for join queries.
|
|
48
|
+
* - The CF tail worker reads it from the response header and stamps
|
|
49
|
+
* the same `request.id` on tail rows — restores the join key on a
|
|
50
|
+
* channel that has no other shared identity with direct-POST.
|
|
51
|
+
* - Clients can include it in support requests so we can pull the
|
|
52
|
+
* full trace + logs from one ID.
|
|
53
|
+
*
|
|
54
|
+
* See `MIGRATION_TOOLING_PLAN.md` D-9 for the propagation policy.
|
|
55
|
+
*/
|
|
56
|
+
requestId: string;
|
|
41
57
|
/** Lazily computed device type. */
|
|
42
58
|
_device?: "mobile" | "desktop";
|
|
43
59
|
/** Lazily computed bot detection result. */
|
|
@@ -93,10 +109,30 @@ export const RequestContext = {
|
|
|
93
109
|
}
|
|
94
110
|
}
|
|
95
111
|
|
|
112
|
+
// Resolve request.id with explicit precedence so the choice is
|
|
113
|
+
// auditable from a single read:
|
|
114
|
+
// (1) inbound `x-request-id` — caller-supplied (e.g. a load balancer
|
|
115
|
+
// already tagged the request, or a synthetic test passed one in)
|
|
116
|
+
// (2) `cf-ray` — Cloudflare's own per-request identifier, present
|
|
117
|
+
// on every real Workers request and globally unique
|
|
118
|
+
// (3) `crypto.randomUUID()` — last resort for tests and locally-run
|
|
119
|
+
// dev without `cf-ray`
|
|
120
|
+
const incomingReqId = request.headers.get("x-request-id");
|
|
121
|
+
const cfRay = request.headers.get("cf-ray");
|
|
122
|
+
const requestId =
|
|
123
|
+
incomingReqId && incomingReqId.length > 0
|
|
124
|
+
? incomingReqId
|
|
125
|
+
: cfRay && cfRay.length > 0
|
|
126
|
+
? cfRay
|
|
127
|
+
: (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function"
|
|
128
|
+
? crypto.randomUUID()
|
|
129
|
+
: `${Date.now()}-${Math.random().toString(16).slice(2)}`);
|
|
130
|
+
|
|
96
131
|
const ctx: RequestContextData = {
|
|
97
132
|
request,
|
|
98
133
|
signal: controller.signal,
|
|
99
134
|
startedAt: Date.now(),
|
|
135
|
+
requestId,
|
|
100
136
|
bag: new Map(),
|
|
101
137
|
responseHeaders: new Headers(),
|
|
102
138
|
};
|
|
@@ -131,6 +167,16 @@ export const RequestContext = {
|
|
|
131
167
|
return ctx.signal;
|
|
132
168
|
},
|
|
133
169
|
|
|
170
|
+
/**
|
|
171
|
+
* The stable per-request identifier — see `RequestContextData.requestId`
|
|
172
|
+
* for the full propagation policy. Returns `null` when called outside a
|
|
173
|
+
* request scope (vs throwing, because observability helpers in the
|
|
174
|
+
* shutdown path of a Worker may pull it lazily).
|
|
175
|
+
*/
|
|
176
|
+
get requestId(): string | null {
|
|
177
|
+
return storage.getStore()?.requestId ?? null;
|
|
178
|
+
},
|
|
179
|
+
|
|
134
180
|
/**
|
|
135
181
|
* Detected device type based on User-Agent.
|
|
136
182
|
*/
|