@decocms/start 6.0.1 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -239,25 +239,121 @@ export const MetricNames = {
239
239
  CACHE_MISS: "cache_miss_total",
240
240
  RESOLVE_DURATION_MS: "resolve_duration_ms",
241
241
  FETCH_DURATION_MS: "fetch_duration_ms",
242
+ /**
243
+ * Per-provider outbound commerce fetch duration. Owned by
244
+ * `@decocms/start` (not `@decocms/apps`) so every site emits this
245
+ * histogram unconditionally as soon as it bumps the framework,
246
+ * regardless of apps-start version. Apps register operation strings
247
+ * (`vtex.intelligent-search.product_search`,
248
+ * `shopify.graphql.cart_create`, ...) via `recordCommerceMetric`
249
+ * below; the framework owns the cardinality contract.
250
+ *
251
+ * Canonical labels: `provider`, `operation`, `status_class`, `cached`.
252
+ * See `recordCommerceMetric` for the full label set and Phase 2 in
253
+ * `MIGRATION_TOOLING_PLAN.md` for the rationale.
254
+ */
255
+ COMMERCE_REQUEST_DURATION_MS: "commerce_request_duration_ms",
242
256
  } as const;
243
257
 
258
+ /**
259
+ * Map an HTTP status code to its canonical class label (`2xx` / ... /
260
+ * `5xx`). Out-of-range numbers (e.g. -1 from a thrown fetch) fall back
261
+ * to `"unknown"` so dashboards don't break on edge cases.
262
+ *
263
+ * Exported because callers occasionally need the same mapping for
264
+ * non-metric purposes (logging, tail enrichment).
265
+ */
266
+ export function statusClassFor(status: number): string {
267
+ if (typeof status !== "number" || !Number.isFinite(status)) return "unknown";
268
+ if (status < 100 || status >= 600) return "unknown";
269
+ return `${Math.floor(status / 100)}xx`;
270
+ }
271
+
272
+ /**
273
+ * Optional dimensions stamped on `http_requests_total` /
274
+ * `http_request_duration_ms` / `http_request_errors_total`. All fields
275
+ * are optional — callers pass what they have, the framework fills in
276
+ * the rest from defaults.
277
+ *
278
+ * Cardinality discipline: every field here is bounded. `route_pattern`
279
+ * comes from the TanStack router (a closed set), `outcome` is the CF
280
+ * Workers Observability enum, `cache_decision` / `cache_layer` are
281
+ * union types declared in this module, `region` is a small set of CF
282
+ * colo codes. Status is unbounded by spec but bounded in practice; the
283
+ * `status_class` label bounds the cardinality further for dashboards
284
+ * that don't need the raw value.
285
+ */
286
+ export interface RequestMetricLabels {
287
+ /** TanStack route pattern (`/_products/$slug/p`) — closed set. */
288
+ route_pattern?: string;
289
+ /** Cloudflare Workers Observability `outcome` (`ok`, `exception`, ...). */
290
+ outcome?: string;
291
+ /** Cache layer + decision when known. */
292
+ cache_decision?: CacheDecision;
293
+ cache_layer?: CacheLayer;
294
+ /** Cloudflare colo (`GRU`, `IAD`, ...). */
295
+ region?: string;
296
+ /**
297
+ * Arbitrary extra labels — callers should avoid this and add fields
298
+ * to the typed surface above instead. Kept as an escape hatch so
299
+ * non-canonical experiments don't require a framework release.
300
+ */
301
+ extra?: Record<string, string | number | boolean>;
302
+ }
303
+
244
304
  /**
245
305
  * Record an HTTP request metric.
246
- * Call in middleware after the response is produced.
306
+ *
307
+ * Call in middleware after the response is produced. Two-call surface
308
+ * for backward compat:
309
+ *
310
+ * recordRequestMetric(method, path, status, durationMs)
311
+ * recordRequestMetric(method, path, status, durationMs, labels)
312
+ *
313
+ * The labels argument is optional — sites that haven't bumped to the
314
+ * Phase 2 metric shape still emit the original three labels
315
+ * (`method`, `route_pattern`, `status`). Adding labels never changes
316
+ * existing labels' values; only adds new ones.
247
317
  */
248
318
  export function recordRequestMetric(
249
319
  method: string,
250
320
  path: string,
251
321
  status: number,
252
322
  durationMs: number,
323
+ labels?: RequestMetricLabels,
253
324
  ) {
254
325
  const m = getState().meter;
255
326
  if (!m) return;
256
- const labels: Labels = { method, path: normalizePath(path), status };
257
- m.counterInc(MetricNames.HTTP_REQUESTS_TOTAL, 1, labels);
258
- m.histogramRecord?.(MetricNames.HTTP_REQUEST_DURATION_MS, durationMs, labels);
327
+ // Cardinality discipline:
328
+ // - `method`: small (GET, POST, ...).
329
+ // - `route_pattern`: closed set (caller-supplied) OR normalized path
330
+ // (fallback). Either way bounded.
331
+ // - `status`: full HTTP code (bounded ~50 values in practice).
332
+ // - `status_class`: 5-element enum (2xx / 3xx / 4xx / 5xx / unknown).
333
+ // - `outcome`: CF outcome enum (~7 values).
334
+ // - `cache_decision`: 5-element enum.
335
+ // - `cache_layer`: 3-element enum (edge / cachedLoader / vtex-swr).
336
+ // - `region`: ~250 CF colo codes worldwide.
337
+ // Total combinations are bounded — safe for unbounded series on
338
+ // ClickHouse but operators should still avoid grouping by `region`
339
+ // unless explicitly needed.
340
+ const merged: Labels = {
341
+ method,
342
+ route_pattern: labels?.route_pattern ?? normalizePath(path),
343
+ status,
344
+ status_class: statusClassFor(status),
345
+ };
346
+ if (labels?.outcome) merged.outcome = labels.outcome;
347
+ if (labels?.cache_decision) merged.cache_decision = labels.cache_decision;
348
+ if (labels?.cache_layer) merged.cache_layer = labels.cache_layer;
349
+ if (labels?.region) merged.region = labels.region;
350
+ if (labels?.extra) {
351
+ for (const [k, v] of Object.entries(labels.extra)) merged[k] = v;
352
+ }
353
+ m.counterInc(MetricNames.HTTP_REQUESTS_TOTAL, 1, merged);
354
+ m.histogramRecord?.(MetricNames.HTTP_REQUEST_DURATION_MS, durationMs, merged);
259
355
  if (status >= 500) {
260
- m.counterInc(MetricNames.HTTP_REQUEST_ERRORS, 1, labels);
356
+ m.counterInc(MetricNames.HTTP_REQUEST_ERRORS, 1, merged);
261
357
  }
262
358
  }
263
359
 
@@ -272,22 +368,45 @@ export function recordRequestMetric(
272
368
  */
273
369
  export type CacheDecision = "HIT" | "STALE-HIT" | "STALE-ERROR" | "MISS" | "BYPASS";
274
370
 
371
+ /**
372
+ * Where the cache lives. Phase 2 label expansion (D-11).
373
+ * - `edge` — Cloudflare Cache API (HTML pages, server-fn responses)
374
+ * - `cachedLoader` — In-memory per-isolate via `sdk/cachedLoader.ts`
375
+ * (loader-level SWR, dedup, in-flight)
376
+ * - `vtex-swr` — Apps-side in-memory cache shared by VTEX clients
377
+ * (intelligent-search, cross-selling, etc.)
378
+ */
379
+ export type CacheLayer = "edge" | "cachedLoader" | "vtex-swr";
380
+
275
381
  /**
276
382
  * Record a cache hit/miss metric. Also stamps the decision on the active
277
383
  * trace span (when one exists) as `deco.cache.decision` / `deco.cache.profile`
278
384
  * so operators can filter ClickStack traces by cache decision directly,
279
385
  * without joining to metrics.
280
386
  *
281
- * `decision` is optional — when omitted, the metric still records HIT vs MISS
282
- * but dashboards can't distinguish SWR/SIE paths. Pass it whenever known.
387
+ * Backward-compatible signature:
388
+ * recordCacheMetric(hit, profile?, decision?)
389
+ * recordCacheMetric(hit, profile?, decision?, layer?)
390
+ *
391
+ * `decision` is optional — when omitted, the metric still records HIT
392
+ * vs MISS but dashboards can't distinguish SWR/SIE paths. Pass it
393
+ * whenever known. `layer` defaults to `edge` when called from
394
+ * workerEntry; cachedLoader / vtex-swr call sites should pass their
395
+ * value explicitly.
283
396
  */
284
- export function recordCacheMetric(hit: boolean, profile?: string, decision?: CacheDecision) {
397
+ export function recordCacheMetric(
398
+ hit: boolean,
399
+ profile?: string,
400
+ decision?: CacheDecision,
401
+ layer?: CacheLayer,
402
+ ) {
285
403
  // Stamp on the active span FIRST so the attribute survives even if the
286
404
  // meter is a no-op (e.g. on tests, or in dev without DECO_METRICS).
287
405
  const active = getActiveSpan();
288
406
  if (active) {
289
407
  if (decision) active.setAttribute?.("deco.cache.decision", decision);
290
408
  if (profile) active.setAttribute?.("deco.cache.profile", profile);
409
+ if (layer) active.setAttribute?.("deco.cache.layer", layer);
291
410
  }
292
411
 
293
412
  const m = getState().meter;
@@ -295,9 +414,47 @@ export function recordCacheMetric(hit: boolean, profile?: string, decision?: Cac
295
414
  const labels: Labels = {};
296
415
  if (profile) labels.profile = profile;
297
416
  if (decision) labels.decision = decision;
417
+ if (layer) labels.layer = layer;
298
418
  m.counterInc(hit ? MetricNames.CACHE_HIT : MetricNames.CACHE_MISS, 1, labels);
299
419
  }
300
420
 
421
+ /**
422
+ * Labels for `commerce_request_duration_ms`. Owned by the framework so
423
+ * apps-start (and any future provider package) can register operation
424
+ * strings without owning the histogram declaration. Phase 2 (D-11).
425
+ */
426
+ export interface CommerceMetricLabels {
427
+ /** `vtex`, `shopify`, `wake`, ... — small closed set. */
428
+ provider: string;
429
+ /** Per-provider operation, e.g. `intelligent-search.product_search`. */
430
+ operation: string;
431
+ /** Set when known (e.g. from the HTTP response). Bounded enum. */
432
+ status_class?: string;
433
+ /** Whether the underlying fetch was served from a cache. */
434
+ cached?: boolean;
435
+ }
436
+
437
+ /**
438
+ * Record a commerce / outbound-fetch duration sample. No-op when no
439
+ * meter is configured. The metric name is constant
440
+ * (`commerce_request_duration_ms`) — providers vary by the `provider`
441
+ * label, not by name, so dashboards aggregate cleanly across the fleet.
442
+ */
443
+ export function recordCommerceMetric(
444
+ durationMs: number,
445
+ labels: CommerceMetricLabels,
446
+ ) {
447
+ const m = getState().meter;
448
+ if (!m) return;
449
+ const merged: Labels = {
450
+ provider: labels.provider,
451
+ operation: labels.operation,
452
+ };
453
+ if (labels.status_class) merged.status_class = labels.status_class;
454
+ if (typeof labels.cached === "boolean") merged.cached = labels.cached;
455
+ m.histogramRecord?.(MetricNames.COMMERCE_REQUEST_DURATION_MS, durationMs, merged);
456
+ }
457
+
301
458
  function normalizePath(path: string): string {
302
459
  // Collapse dynamic segments to reduce cardinality
303
460
  return path
@@ -99,13 +99,13 @@ export function createCachedLoader<TProps, TResult>(
99
99
  const inflight = inflightRequests.get(cacheKey);
100
100
  if (inflight) {
101
101
  // Treat in-flight dedup as a cache hit — avoided the origin call.
102
- recordCacheMetric(true, name);
102
+ recordCacheMetric(true, name, undefined, "cachedLoader");
103
103
  return inflight as Promise<TResult>;
104
104
  }
105
105
 
106
106
  if (isDev) {
107
107
  // Dev mode: no caching, but still useful to count attempts.
108
- recordCacheMetric(false, name);
108
+ recordCacheMetric(false, name, undefined, "cachedLoader");
109
109
  const promise = withTracing(
110
110
  "deco.cachedLoader",
111
111
  () => loaderFn(props).finally(() => inflightRequests.delete(cacheKey)),
@@ -121,20 +121,20 @@ export function createCachedLoader<TProps, TResult>(
121
121
 
122
122
  if (policy === "no-cache") {
123
123
  if (entry && !isStale) {
124
- recordCacheMetric(true, name);
124
+ recordCacheMetric(true, name, "HIT", "cachedLoader");
125
125
  return entry.value;
126
126
  }
127
127
  }
128
128
 
129
129
  if (policy === "stale-while-revalidate") {
130
130
  if (entry && !isStale) {
131
- recordCacheMetric(true, name);
131
+ recordCacheMetric(true, name, "HIT", "cachedLoader");
132
132
  return entry.value;
133
133
  }
134
134
 
135
135
  if (entry && isStale && !entry.refreshing) {
136
136
  // Stale-while-revalidate hit: serve stale, refresh in background.
137
- recordCacheMetric(true, name);
137
+ recordCacheMetric(true, name, "STALE-HIT", "cachedLoader");
138
138
  entry.refreshing = true;
139
139
  loaderFn(props)
140
140
  .then((result) => {
@@ -156,14 +156,17 @@ export function createCachedLoader<TProps, TResult>(
156
156
  }
157
157
 
158
158
  if (entry) {
159
- recordCacheMetric(true, name);
159
+ // Past SIE window — still serve the stale value once but mark
160
+ // the decision as STALE-ERROR so dashboards can distinguish
161
+ // this from healthy SWR.
162
+ recordCacheMetric(true, name, "STALE-ERROR", "cachedLoader");
160
163
  return entry.value;
161
164
  }
162
165
  }
163
166
 
164
167
  // Cache miss — emit metric, then run loader inside a span so individual
165
168
  // slow loaders are visible in traces.
166
- recordCacheMetric(false, name);
169
+ recordCacheMetric(false, name, "MISS", "cachedLoader");
167
170
  const promise = withTracing("deco.cachedLoader", () => loaderFn(props), {
168
171
  "deco.loader": name,
169
172
  "deco.cache.policy": policy,
@@ -266,6 +266,105 @@ describe("serializeError", () => {
266
266
  });
267
267
  });
268
268
 
269
+ describe("request.id stamping (Phase 1, D-9)", () => {
270
+ afterEach(() => {
271
+ configureLogger(defaultLoggerAdapter);
272
+ setLogLevel("info");
273
+ });
274
+
275
+ it("stamps request.id from RequestContext on every log line emitted inside a request scope", async () => {
276
+ const { RequestContext } = await import("./requestContext");
277
+
278
+ const seen: Array<Record<string, unknown> | undefined> = [];
279
+ configureLogger({
280
+ log(_l, _m, attrs) {
281
+ seen.push(attrs);
282
+ },
283
+ });
284
+
285
+ const reqWithId = new Request("https://example.com/", {
286
+ headers: { "x-request-id": "client-supplied-uuid" },
287
+ });
288
+ await RequestContext.run(reqWithId, async () => {
289
+ logger.info("inside-scope", { custom: "yes" });
290
+ });
291
+ // Outside the scope, no request.id is stamped — the fast path stays
292
+ // fast (no allocation, no key) when RequestContext.requestId is null.
293
+ logger.info("outside-scope", { custom: "no" });
294
+
295
+ expect(seen[0]).toMatchObject({
296
+ "request.id": "client-supplied-uuid",
297
+ custom: "yes",
298
+ });
299
+ expect(seen[1]).toEqual({ custom: "no" });
300
+ expect(seen[1]).not.toHaveProperty("request.id");
301
+ });
302
+
303
+ it("prefers caller-supplied request.id over the auto-generated one", async () => {
304
+ const { RequestContext } = await import("./requestContext");
305
+
306
+ const seen: Array<Record<string, unknown> | undefined> = [];
307
+ configureLogger({
308
+ log(_l, _m, attrs) {
309
+ seen.push(attrs);
310
+ },
311
+ });
312
+
313
+ const req = new Request("https://example.com/", {
314
+ headers: { "x-request-id": "from-headers" },
315
+ });
316
+
317
+ await RequestContext.run(req, async () => {
318
+ // Caller can still override by passing the key directly in attrs.
319
+ logger.info("override", { "request.id": "explicit-from-caller" });
320
+ });
321
+
322
+ expect(seen[0]?.["request.id"]).toBe("explicit-from-caller");
323
+ });
324
+
325
+ it("falls back to cf-ray when x-request-id is absent", async () => {
326
+ const { RequestContext } = await import("./requestContext");
327
+
328
+ const seen: Array<Record<string, unknown> | undefined> = [];
329
+ configureLogger({
330
+ log(_l, _m, attrs) {
331
+ seen.push(attrs);
332
+ },
333
+ });
334
+
335
+ const req = new Request("https://example.com/", {
336
+ headers: { "cf-ray": "8a1b2c3d4e5f6a7b" },
337
+ });
338
+
339
+ await RequestContext.run(req, async () => {
340
+ logger.info("cf-ray-stamped");
341
+ });
342
+
343
+ expect(seen[0]?.["request.id"]).toBe("8a1b2c3d4e5f6a7b");
344
+ });
345
+
346
+ it("generates a fresh UUID when neither header is set", async () => {
347
+ const { RequestContext } = await import("./requestContext");
348
+
349
+ const seen: Array<Record<string, unknown> | undefined> = [];
350
+ configureLogger({
351
+ log(_l, _m, attrs) {
352
+ seen.push(attrs);
353
+ },
354
+ });
355
+
356
+ const req = new Request("https://example.com/");
357
+
358
+ await RequestContext.run(req, async () => {
359
+ logger.info("uuid-stamped");
360
+ });
361
+
362
+ const stamped = seen[0]?.["request.id"];
363
+ expect(typeof stamped).toBe("string");
364
+ expect((stamped as string).length).toBeGreaterThan(8);
365
+ });
366
+ });
367
+
269
368
  describe("trace correlation", () => {
270
369
  afterEach(() => {
271
370
  configureLogger(defaultLoggerAdapter);
package/src/sdk/logger.ts CHANGED
@@ -25,6 +25,7 @@
25
25
  */
26
26
 
27
27
  import { getActiveSpan } from "./observability";
28
+ import { RequestContext } from "./requestContext";
28
29
 
29
30
  export type LogLevel = "debug" | "info" | "warn" | "error";
30
31
 
@@ -252,20 +253,30 @@ function emit(level: LogLevel, msg: string, attrs?: Record<string, unknown>): vo
252
253
  // to its trace in ClickStack/HyperDX. No active span → no-op; caller
253
254
  // attrs always win so explicit `trace_id` overrides keep working.
254
255
  const ctx = getActiveSpan()?.spanContext?.();
255
- const traceAttrs: Record<string, unknown> | undefined = ctx
256
- ? { trace_id: ctx.traceId, span_id: ctx.spanId }
257
- : undefined;
258
- // Merge order: floor trace context → caller attrs. Caller wins; trace
259
- // context only overrides floor (which never sets trace_id anyway).
256
+ // Pull request.id from the AsyncLocalStorage-backed RequestContext so
257
+ // every log line in the request also carries the join key used by
258
+ // direct-POST metrics + tail-worker rows. Single read, no allocation
259
+ // when outside a request scope.
260
+ const requestId = RequestContext.requestId;
261
+ const requestAttrs: Record<string, unknown> | undefined =
262
+ ctx || requestId
263
+ ? {
264
+ ...(ctx ? { trace_id: ctx.traceId, span_id: ctx.spanId } : {}),
265
+ ...(requestId ? { "request.id": requestId } : {}),
266
+ }
267
+ : undefined;
268
+ // Merge order: floor → trace / request context → caller attrs. Caller
269
+ // wins; the request-scoped context only overrides floor keys (which
270
+ // never set `trace_id` / `request.id` anyway).
260
271
  const s = getState();
261
272
  const hasFloor = Object.keys(s.attributeFloor).length > 0;
262
273
  let merged: Record<string, unknown> | undefined;
263
- if (!hasFloor && !traceAttrs && !attrs) {
274
+ if (!hasFloor && !requestAttrs && !attrs) {
264
275
  merged = undefined;
265
276
  } else {
266
277
  merged = {
267
278
  ...(hasFloor ? s.attributeFloor : {}),
268
- ...(traceAttrs ?? {}),
279
+ ...(requestAttrs ?? {}),
269
280
  ...(attrs ?? {}),
270
281
  };
271
282
  }
@@ -52,6 +52,8 @@ export {
52
52
  // Tracer / meter / request log primitives (re-exported from the middleware)
53
53
  export {
54
54
  type CacheDecision,
55
+ type CacheLayer,
56
+ type CommerceMetricLabels,
55
57
  configureMeter,
56
58
  configureTracer,
57
59
  getActiveSpan,
@@ -62,16 +64,32 @@ export {
62
64
  type MeterAdapter,
63
65
  MetricNames,
64
66
  recordCacheMetric,
67
+ recordCommerceMetric,
65
68
  recordRequestMetric,
69
+ type RequestMetricLabels,
66
70
  type RequestStore,
67
71
  type Span,
68
72
  setObservabilitySpanStore,
69
73
  setSpanAttribute,
74
+ statusClassFor,
70
75
  type TracerAdapter,
71
76
  withTracing,
72
77
  } from "../middleware/observability";
73
78
  // Worker-entry wrapper + adapter wiring
74
79
  export { instrumentWorker, type OtelOptions } from "./otel";
80
+ // Direct-POST OTLP trace exporter (Phase 3 / D-12). Exported for sites
81
+ // that need to wire a custom traces endpoint outside `instrumentWorker`,
82
+ // and for the audit tooling that asserts framework spans are flowing.
83
+ export {
84
+ createOtlpHttpTracerAdapter,
85
+ newSpanId,
86
+ newTraceId,
87
+ type OtlpHttpTracer,
88
+ type OtlpHttpTracerOptions,
89
+ parseTraceparent,
90
+ shouldSampleTrace,
91
+ type TraceContext,
92
+ } from "./otelHttpTracer";
75
93
  // AE meter adapter + runtime env helpers (for tests / custom wiring)
76
94
  export {
77
95
  type AnalyticsEngineDataset,