@decocms/start 4.5.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,17 @@ import { getAppMiddleware } from "./setupApps";
45
45
  import { cleanPathForCacheKey } from "./urlUtils";
46
46
  import { type Device, isMobileUA } from "./useDevice";
47
47
 
48
+ /**
49
+ * Build-time identifier injected by `decoVitePlugin()` (see
50
+ * `src/vite/plugin.js`). Falls back to `undefined` if the consuming site
51
+ * isn't using the plugin or the symbol wasn't `define`d at bundle time.
52
+ *
53
+ * The runtime `env.BUILD_HASH` (when explicitly set, e.g. via
54
+ * `wrangler deploy --var BUILD_HASH:foo`) takes precedence — see
55
+ * `getBuildHash()` below.
56
+ */
57
+ declare const __DECO_BUILD_HASH__: string | undefined;
58
+
48
59
  /**
49
60
  * Append Link preload headers for CSS and fonts so the browser starts
50
61
  * fetching them before parsing HTML. Only applied to HTML responses.
@@ -673,6 +684,23 @@ export function createDecoWorkerEntry(
673
684
  return parts.join("|");
674
685
  }
675
686
 
687
+ /**
688
+ * Resolve the per-deploy cache-key version with this priority:
689
+ * 1. `env[cacheVersionEnv]` — explicit override (e.g. `wrangler
690
+ * deploy --var BUILD_HASH:foo`). Wins so callers can always
691
+ * force a specific value.
692
+ * 2. `__DECO_BUILD_HASH__` — build-time constant injected by
693
+ * `decoVitePlugin()` from WORKERS_CI_COMMIT_SHA / git rev-parse.
694
+ * This is the production path on Cloudflare Workers Builds.
695
+ * 3. Empty string — versioning disabled (legacy pre-plugin sites).
696
+ */
697
+ function getBuildHash(env: Record<string, unknown>): string {
698
+ if (cacheVersionEnv === false) return "";
699
+ const fromEnv = (env[cacheVersionEnv] as string) || "";
700
+ if (fromEnv) return fromEnv;
701
+ return typeof __DECO_BUILD_HASH__ !== "undefined" ? __DECO_BUILD_HASH__ : "";
702
+ }
703
+
676
704
  function buildCacheKey(
677
705
  request: Request,
678
706
  env: Record<string, unknown>,
@@ -685,11 +713,9 @@ export function createDecoWorkerEntry(
685
713
  url.search = cleanUrl.search;
686
714
  }
687
715
 
688
- if (cacheVersionEnv !== false) {
689
- const version = (env[cacheVersionEnv] as string) || "";
690
- if (version) {
691
- url.searchParams.set("__v", version);
692
- }
716
+ const version = getBuildHash(env);
717
+ if (version) {
718
+ url.searchParams.set("__v", version);
693
719
  }
694
720
 
695
721
  // Include CF geo data in cache key so location matcher results don't leak
@@ -791,10 +817,8 @@ export function createDecoWorkerEntry(
791
817
  for (const seg of segments) {
792
818
  for (const cc of geoKeys) {
793
819
  const url = new URL(p, baseUrl);
794
- if (cacheVersionEnv !== false) {
795
- const version = (env[cacheVersionEnv] as string) || "";
796
- if (version) url.searchParams.set("__v", version);
797
- }
820
+ const purgeVersion = getBuildHash(env);
821
+ if (purgeVersion) url.searchParams.set("__v", purgeVersion);
798
822
  url.searchParams.set("__seg", hashSegment(seg));
799
823
  if (cc) url.searchParams.set("__cf_geo", cc);
800
824
  const key = new Request(url.toString(), { method: "GET" });
@@ -816,10 +840,8 @@ export function createDecoWorkerEntry(
816
840
  for (const device of devices) {
817
841
  for (const cc of geoKeys) {
818
842
  const url = new URL(p, baseUrl);
819
- if (cacheVersionEnv !== false) {
820
- const version = (env[cacheVersionEnv] as string) || "";
821
- if (version) url.searchParams.set("__v", version);
822
- }
843
+ const purgeVersion = getBuildHash(env);
844
+ if (purgeVersion) url.searchParams.set("__v", purgeVersion);
823
845
  if (device) url.searchParams.set("__cf_device", device);
824
846
  if (cc) url.searchParams.set("__cf_geo", cc);
825
847
  const key = new Request(url.toString(), { method: "GET" });
@@ -1190,10 +1212,8 @@ export function createDecoWorkerEntry(
1190
1212
  // different regions or channels get separate cache entries.
1191
1213
  const cacheKeyUrl = new URL(request.url);
1192
1214
  cacheKeyUrl.searchParams.set("__body", bodyHash);
1193
- if (cacheVersionEnv !== false) {
1194
- const version = (env[cacheVersionEnv] as string) || "";
1195
- if (version) cacheKeyUrl.searchParams.set("__v", version);
1196
- }
1215
+ const sfnVersion = getBuildHash(env);
1216
+ if (sfnVersion) cacheKeyUrl.searchParams.set("__v", sfnVersion);
1197
1217
  if (sfnSegment) {
1198
1218
  cacheKeyUrl.searchParams.set("__seg", hashSegment(sfnSegment));
1199
1219
  } else if (deviceSpecificKeys) {
@@ -1409,10 +1429,8 @@ export function createDecoWorkerEntry(
1409
1429
  out.headers.set("X-Cache", xCache);
1410
1430
  out.headers.set("X-Cache-Profile", profile);
1411
1431
  if (segment) out.headers.set("X-Cache-Segment", hashSegment(segment));
1412
- if (cacheVersionEnv !== false) {
1413
- const v = (env[cacheVersionEnv] as string) || "";
1414
- if (v) out.headers.set("X-Cache-Version", v);
1415
- }
1432
+ const headerVersion = getBuildHash(env);
1433
+ if (headerVersion) out.headers.set("X-Cache-Version", headerVersion);
1416
1434
  if (extra) for (const [k, v] of Object.entries(extra)) out.headers.set(k, v);
1417
1435
  appendResourceHints(out);
1418
1436
  return out;
@@ -31,8 +31,49 @@
31
31
  * export default defineConfig({ plugins: [decoVitePlugin(), ...] });
32
32
  * ```
33
33
  */
34
+ import { execFileSync } from "node:child_process";
34
35
  import { existsSync, readFileSync } from "node:fs";
35
36
 
37
+ /**
38
+ * Resolve a per-build identifier for cache-key versioning.
39
+ *
40
+ * The returned string is injected into the worker bundle as the
41
+ * `__DECO_BUILD_HASH__` global via Vite `define`. `createDecoWorkerEntry`
42
+ * appends it (or `env.BUILD_HASH` if explicitly set) as `__v=<hash>` on
43
+ * every Cache API key, so each new deploy gets its own cache namespace
44
+ * — old edge-cached HTML referencing dead asset filenames stops being
45
+ * served the moment the new worker is live.
46
+ *
47
+ * Resolution order:
48
+ * 1. WORKERS_CI_COMMIT_SHA — Cloudflare Workers Builds default env var
49
+ * (the production deploy path-of-record). Sliced to 12 chars.
50
+ * 2. `git rev-parse --short=12 HEAD` — local `wrangler deploy` from a
51
+ * developer laptop. Try/catch so missing git or shallow clones don't
52
+ * fail the build.
53
+ * 3. `Date.now().toString(36)` — last-resort fallback so the cache-bust
54
+ * invariant never silently regresses to "always the same key".
55
+ *
56
+ * For dev (`command !== "build"`), the value is the literal `"dev"`.
57
+ *
58
+ * @returns {string}
59
+ */
60
+ function resolveBuildHash() {
61
+ const ciSha = process.env.WORKERS_CI_COMMIT_SHA;
62
+ if (ciSha?.trim()) return ciSha.trim().slice(0, 12);
63
+
64
+ try {
65
+ const sha = execFileSync("git", ["rev-parse", "--short=12", "HEAD"], {
66
+ encoding: "utf-8",
67
+ stdio: ["ignore", "pipe", "ignore"],
68
+ }).trim();
69
+ if (sha) return sha;
70
+ } catch {
71
+ // git absent, not a repo, or shallow clone w/o history — fall through.
72
+ }
73
+
74
+ return Date.now().toString(36);
75
+ }
76
+
36
77
  // Bare-specifier stubs resolved by ID before Vite touches them.
37
78
  /** @type {Record<string, string>} */
38
79
  const CLIENT_STUBS = {
@@ -227,6 +268,20 @@ export function decoVitePlugin() {
227
268
  };
228
269
  }
229
270
 
271
+ // Inject a per-build identifier as `__DECO_BUILD_HASH__` so
272
+ // createDecoWorkerEntry can fall back to it when env.BUILD_HASH is
273
+ // unset (the default on Cloudflare Workers Builds, where there's
274
+ // no GH-Actions step injecting --var BUILD_HASH).
275
+ //
276
+ // Dev gets the literal "dev" so SSR doesn't crash on an undefined
277
+ // identifier; prod gets WORKERS_CI_COMMIT_SHA → git rev-parse →
278
+ // time-based fallback (see resolveBuildHash above).
279
+ const buildHash = command === "build" ? resolveBuildHash() : "dev";
280
+ cfg.define = {
281
+ ...cfg.define,
282
+ __DECO_BUILD_HASH__: JSON.stringify(buildHash),
283
+ };
284
+
230
285
  // Only split chunks for production builds — dev uses unbundled ESM.
231
286
  if (command !== "build") return cfg;
232
287
  return {
@@ -1,4 +1,4 @@
1
- import { describe, expect, it } from "vitest";
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
2
  import { decoVitePlugin } from "./plugin.js";
3
3
 
4
4
  /**
@@ -52,3 +52,62 @@ describe("decoVitePlugin client stubs (regression guard)", () => {
52
52
  expect(id).toBeUndefined();
53
53
  });
54
54
  });
55
+
56
+ describe("decoVitePlugin __DECO_BUILD_HASH__ injection", () => {
57
+ let originalEnv;
58
+
59
+ beforeEach(() => {
60
+ originalEnv = { ...process.env };
61
+ delete process.env.WORKERS_CI_COMMIT_SHA;
62
+ });
63
+
64
+ afterEach(() => {
65
+ process.env = originalEnv;
66
+ vi.restoreAllMocks();
67
+ });
68
+
69
+ function callConfig(command) {
70
+ const p = getPlugin();
71
+ return p.config({}, { command });
72
+ }
73
+
74
+ it("injects the literal 'dev' for non-build commands", () => {
75
+ const cfg = callConfig("serve");
76
+ expect(cfg.define.__DECO_BUILD_HASH__).toBe(JSON.stringify("dev"));
77
+ });
78
+
79
+ it("uses WORKERS_CI_COMMIT_SHA (sliced to 12 chars) when set on a build", () => {
80
+ process.env.WORKERS_CI_COMMIT_SHA = "abcdef1234567890fedcba";
81
+ const cfg = callConfig("build");
82
+ expect(cfg.define.__DECO_BUILD_HASH__).toBe(JSON.stringify("abcdef123456"));
83
+ });
84
+
85
+ it("falls back to git rev-parse when WORKERS_CI_COMMIT_SHA is unset", async () => {
86
+ // The plugin module imports execFileSync at top-level, so we can't easily
87
+ // mock it after the fact. Instead, exercise the real git binary against
88
+ // this repo (CI runs in the repo working tree). Assert the value is a
89
+ // 12-char lowercase hex SHA — that proves git was consulted, not that
90
+ // the time-based fallback was hit.
91
+ const cfg = callConfig("build");
92
+ const value = JSON.parse(cfg.define.__DECO_BUILD_HASH__);
93
+ // Either git produced a SHA (CI / dev machine inside a repo) or the
94
+ // time-based fallback ran. Both are acceptable; we just assert non-empty
95
+ // and length sanity.
96
+ expect(typeof value).toBe("string");
97
+ expect(value.length).toBeGreaterThan(0);
98
+ // Time-based fallback produces base36 characters; git short SHAs are
99
+ // 12 hex chars. Both fit in this superset regex.
100
+ expect(value).toMatch(/^[0-9a-z]+$/);
101
+ });
102
+
103
+ it("preserves allowedHosts behaviour (regression: define is additive, not replacing)", () => {
104
+ process.env.DECO_SITE_NAME = "test-site";
105
+ try {
106
+ const cfg = callConfig("serve");
107
+ expect(cfg.server?.allowedHosts).toContain(".deco.studio");
108
+ expect(cfg.define.__DECO_BUILD_HASH__).toBeDefined();
109
+ } finally {
110
+ delete process.env.DECO_SITE_NAME;
111
+ }
112
+ });
113
+ });
package/vitest.config.ts CHANGED
@@ -11,7 +11,7 @@ export default defineConfig({
11
11
  ["scripts/**", "node"],
12
12
  ],
13
13
  include: [
14
- "src/**/*.test.{ts,tsx}",
14
+ "src/**/*.test.{ts,tsx,js}",
15
15
  "scripts/**/*.test.ts",
16
16
  ],
17
17
  globals: true,
@@ -1,165 +0,0 @@
1
- import { ROOT_CONTEXT, SpanKind } from "@opentelemetry/api";
2
- import {
3
- AlwaysOnSampler,
4
- ParentBasedSampler,
5
- SamplingDecision,
6
- } from "@opentelemetry/sdk-trace-base";
7
- import { describe, expect, it } from "vitest";
8
- import {
9
- createUrlBasedHeadSampler,
10
- DEFAULT_SAMPLE_RATIO,
11
- decodeSamplingConfig,
12
- URLBasedSampler,
13
- } from "./sampler";
14
-
15
- // Two trace IDs at opposite ends of the TraceIdRatioBased accumulator.
16
- // `accumulate(traceId)` xors the trace ID in 8-hex-char chunks; threshold at
17
- // ratio R is `floor(R * 0xffffffff)`. See
18
- // `@opentelemetry/sdk-trace-base/src/sampler/TraceIdRatioBasedSampler`.
19
- //
20
- // LOW: 0x00000000 ^ 0x00000000 ^ 0xffffffff ^ 0xffffffff = 0
21
- // → 0 < threshold for any ratio > 0 → SAMPLED at any ratio > 0
22
- // HIGH: 0xffffffff ^ 0x00000000 ^ 0x00000000 ^ 0x00000000 = 0xffffffff
23
- // → never below threshold for ratio < 1.0 → DROPPED at any ratio < 1
24
- const LOW_TRACE_ID = "0000000000000000ffffffffffffffff";
25
- const HIGH_TRACE_ID = "ffffffff000000000000000000000000";
26
-
27
- function decide(sampler: URLBasedSampler, path: string, traceId = LOW_TRACE_ID) {
28
- return sampler.shouldSample(
29
- ROOT_CONTEXT,
30
- traceId,
31
- "span-name",
32
- SpanKind.SERVER,
33
- { "url.path": path },
34
- [],
35
- );
36
- }
37
-
38
- describe("URLBasedSampler", () => {
39
- it("exposes 0.1 as the framework-wide default sample ratio", () => {
40
- expect(DEFAULT_SAMPLE_RATIO).toBe(0.1);
41
- });
42
-
43
- it("defaults to DEFAULT_SAMPLE_RATIO (0.1) when config omits `default`", () => {
44
- const s = new URLBasedSampler();
45
- // LOW_TRACE_ID accumulates to 0 — sampled at any ratio > 0, including 0.1.
46
- expect(decide(s, "/anything").decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
47
- // HIGH_TRACE_ID accumulates to ~uint32 max — dropped at 0.1, would be
48
- // kept only at ratio ~= 1. This is the assertion that catches an
49
- // accidental revert to the old `?? 1.0` fallback.
50
- expect(decide(s, "/anything", HIGH_TRACE_ID).decision).toBe(SamplingDecision.NOT_RECORD);
51
- });
52
-
53
- it("explicit `default: 1` opts in to AlwaysOn (records every trace)", () => {
54
- const s = new URLBasedSampler({ default: 1 });
55
- expect(decide(s, "/anything", HIGH_TRACE_ID).decision).toBe(
56
- SamplingDecision.RECORD_AND_SAMPLED,
57
- );
58
- });
59
-
60
- it("falls back to provided default ratio when no rule matches", () => {
61
- const s = new URLBasedSampler({ default: 1.0 });
62
- expect(decide(s, "/anything").decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
63
- });
64
-
65
- it("first matching rule wins", () => {
66
- const s = new URLBasedSampler({
67
- default: 1.0,
68
- rules: [
69
- { pattern: "^/api/health", ratio: 0.0 },
70
- { pattern: "^/api/", ratio: 1.0 },
71
- ],
72
- });
73
- expect(decide(s, "/api/health").decision).toBe(SamplingDecision.NOT_RECORD);
74
- expect(decide(s, "/api/orders").decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
75
- });
76
-
77
- it("falls back to default when no path attribute is present", () => {
78
- const s = new URLBasedSampler({ default: 1.0 });
79
- const result = s.shouldSample(ROOT_CONTEXT, LOW_TRACE_ID, "noop", SpanKind.INTERNAL, {}, []);
80
- expect(result.decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
81
- });
82
-
83
- it("supports url.path, http.target, and http.url", () => {
84
- const s = new URLBasedSampler({
85
- default: 0.0,
86
- rules: [{ pattern: "^/wanted", ratio: 1.0 }],
87
- });
88
- const ok = (attrs: Record<string, string>) =>
89
- s.shouldSample(ROOT_CONTEXT, LOW_TRACE_ID, "n", SpanKind.SERVER, attrs, []);
90
-
91
- expect(ok({ "url.path": "/wanted/x" }).decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
92
- expect(ok({ "http.target": "/wanted/y" }).decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
93
- expect(ok({ "http.url": "https://h.example/wanted/z?q=1" }).decision).toBe(
94
- SamplingDecision.RECORD_AND_SAMPLED,
95
- );
96
- });
97
- });
98
-
99
- describe("decodeSamplingConfig", () => {
100
- it("returns null on missing input", () => {
101
- expect(decodeSamplingConfig(undefined)).toBeNull();
102
- expect(decodeSamplingConfig("")).toBeNull();
103
- });
104
-
105
- it("decodes valid base64 JSON", () => {
106
- const cfg = { default: 0.5, rules: [{ pattern: "^/x", ratio: 1.0 }] };
107
- const enc = btoa(JSON.stringify(cfg));
108
- const decoded = decodeSamplingConfig(enc);
109
- expect(decoded).toEqual(cfg);
110
- });
111
-
112
- it("drops invalid rules but keeps the rest", () => {
113
- const enc = btoa(
114
- JSON.stringify({
115
- default: 0.1,
116
- rules: [
117
- { pattern: "^/ok", ratio: 1.0 },
118
- { pattern: "^[", ratio: 1.0 }, // invalid regex
119
- { pattern: "^/yes", ratio: 0.5 },
120
- { pattern: 7, ratio: 0.5 }, // wrong type
121
- ],
122
- }),
123
- );
124
- const decoded = decodeSamplingConfig(enc);
125
- expect(decoded?.rules).toEqual([
126
- { pattern: "^/ok", ratio: 1.0 },
127
- { pattern: "^/yes", ratio: 0.5 },
128
- ]);
129
- });
130
-
131
- it("returns null for non-JSON input", () => {
132
- expect(decodeSamplingConfig("not-base64-not-json!!")).toBeNull();
133
- });
134
- });
135
-
136
- describe("createUrlBasedHeadSampler", () => {
137
- it("wraps the URL-based sampler in ParentBasedSampler", () => {
138
- const sampler = createUrlBasedHeadSampler(null);
139
- expect(sampler).toBeInstanceOf(ParentBasedSampler);
140
- });
141
-
142
- it("applies DEFAULT_SAMPLE_RATIO when config is null", () => {
143
- const sampler = createUrlBasedHeadSampler(null);
144
- // High-accumulating trace ID is dropped at 0.1 — proves the ParentBased
145
- // wrapper inherits the URLBasedSampler default and isn't accidentally
146
- // forcing AlwaysOn.
147
- const result = sampler.shouldSample(
148
- ROOT_CONTEXT,
149
- HIGH_TRACE_ID,
150
- "n",
151
- SpanKind.SERVER,
152
- { "url.path": "/" },
153
- [],
154
- );
155
- expect(result.decision).toBe(SamplingDecision.NOT_RECORD);
156
- });
157
- });
158
-
159
- describe("regression: AlwaysOnSampler still works", () => {
160
- it("guards against accidental import-rename breakage", () => {
161
- // If sdk-trace-base ever renames AlwaysOnSampler we want a loud failure.
162
- const s = new AlwaysOnSampler();
163
- expect(s.shouldSample().decision).toBe(SamplingDecision.RECORD_AND_SAMPLED);
164
- });
165
- });
@@ -1,213 +0,0 @@
1
- /**
2
- * URL-based head sampler — port of `deco-cx/deco/observability/otel/samplers/urlBased.ts`.
3
- *
4
- * **No longer wired into `instrumentWorker` by default.** As of 4.4.0 the
5
- * recommended path for trace sampling is Cloudflare's wrangler-level
6
- * `observability.traces.head_sampling_rate`, which is one global rate per
7
- * Worker. This module stays as an opt-in escape hatch for sites that need
8
- * URL-pattern-aware sampling (e.g. always trace `/checkout`, sample
9
- * homepages at 1%) — those sites must wire OTel themselves outside the
10
- * default `instrumentWorker` flow.
11
- *
12
- * The sampler reads `OTEL_SAMPLING_CONFIG` (base64-encoded JSON) and
13
- * decides each trace's sample rate based on the matching pattern.
14
- *
15
- * Wrapped in `ParentBasedSampler` so a span inherits its parent's sampling
16
- * decision when one exists (i.e. distributed traces stay consistent end
17
- * to end).
18
- *
19
- * **Default ratio.** When no `default` is provided in the config (or the env
20
- * var is unset entirely), the sampler keeps **10%** of traces. Production
21
- * storefront traffic at full sampling will burn HyperDX ingest quotas
22
- * quickly — for an unconfigured site we'd rather drop 90% than overspend by
23
- * default. Sites that genuinely want every trace recorded must opt-in
24
- * explicitly with `OTEL_SAMPLING_CONFIG` setting `default: 1`.
25
- *
26
- * @example
27
- * ```jsonc
28
- * // base64-encode this and set as OTEL_SAMPLING_CONFIG:
29
- * {
30
- * "default": 0.05,
31
- * "rules": [
32
- * { "pattern": "^/checkout", "ratio": 1.0 },
33
- * { "pattern": "^/api/health", "ratio": 0.0 },
34
- * { "pattern": "/p$", "ratio": 0.1 }
35
- * ]
36
- * }
37
- * ```
38
- *
39
- * @deprecated Slated for removal in 5.0.0 unless a site declares an active
40
- * need. Use Cloudflare's `head_sampling_rate` first.
41
- */
42
-
43
- import { type Attributes, type Context, type Link, type SpanKind, trace } from "@opentelemetry/api";
44
- import {
45
- AlwaysOffSampler,
46
- AlwaysOnSampler,
47
- ParentBasedSampler,
48
- type Sampler,
49
- type SamplingResult,
50
- TraceIdRatioBasedSampler,
51
- } from "@opentelemetry/sdk-trace-base";
52
-
53
- export interface SamplingRule {
54
- /** ECMA RegExp pattern matched against the URL path. */
55
- pattern: string;
56
- /** Ratio in [0, 1]. */
57
- ratio: number;
58
- }
59
-
60
- export interface SamplingConfig {
61
- /**
62
- * Default sample ratio applied when no rule matches. Defaults to **0.1**
63
- * (10% sampled) when omitted. Set to `1` to record every trace
64
- * (only do this when you need a full debug stream and accept the cost).
65
- */
66
- default?: number;
67
- /** Ordered list of rules. First match wins. */
68
- rules?: SamplingRule[];
69
- }
70
-
71
- /**
72
- * Default ratio applied by `URLBasedSampler` when neither the config nor a
73
- * matching rule specifies one. Centralised so tests + docs share a single
74
- * source of truth.
75
- */
76
- export const DEFAULT_SAMPLE_RATIO = 0.1;
77
-
78
- interface CompiledRule {
79
- re: RegExp;
80
- sampler: Sampler;
81
- }
82
-
83
- /**
84
- * URL-pattern-driven head sampler. Implements the OTel `Sampler` interface
85
- * directly so it can be plugged into `ParentBasedSampler`'s `root` slot.
86
- */
87
- export class URLBasedSampler implements Sampler {
88
- private readonly defaultSampler: Sampler;
89
- private readonly rules: CompiledRule[];
90
-
91
- constructor(config: SamplingConfig = {}) {
92
- this.defaultSampler = ratioToSampler(config.default ?? DEFAULT_SAMPLE_RATIO);
93
- this.rules = (config.rules ?? []).map((rule) => ({
94
- re: new RegExp(rule.pattern),
95
- sampler: ratioToSampler(rule.ratio),
96
- }));
97
- }
98
-
99
- shouldSample(
100
- context: Context,
101
- traceId: string,
102
- spanName: string,
103
- spanKind: SpanKind,
104
- attributes: Attributes,
105
- links: Link[],
106
- ): SamplingResult {
107
- const path = extractPath(attributes);
108
- if (path) {
109
- for (const rule of this.rules) {
110
- if (rule.re.test(path)) {
111
- return rule.sampler.shouldSample(context, traceId, spanName, spanKind, attributes, links);
112
- }
113
- }
114
- }
115
- return this.defaultSampler.shouldSample(
116
- context,
117
- traceId,
118
- spanName,
119
- spanKind,
120
- attributes,
121
- links,
122
- );
123
- }
124
-
125
- toString(): string {
126
- return `URLBasedSampler(${this.rules.length} rules)`;
127
- }
128
- }
129
-
130
- function ratioToSampler(ratio: number): Sampler {
131
- if (ratio >= 1) return new AlwaysOnSampler();
132
- if (ratio <= 0) return new AlwaysOffSampler();
133
- return new TraceIdRatioBasedSampler(ratio);
134
- }
135
-
136
- function extractPath(attrs: Attributes): string | null {
137
- // Prefer the OTel-standard `url.path` (semconv >= 1.21), fall back to
138
- // legacy `http.target` and `http.url`.
139
- const direct = attrs["url.path"] ?? attrs["http.target"];
140
- if (typeof direct === "string") return direct;
141
-
142
- const httpUrl = attrs["http.url"];
143
- if (typeof httpUrl === "string") {
144
- try {
145
- return new URL(httpUrl).pathname;
146
- } catch {
147
- return null;
148
- }
149
- }
150
- return null;
151
- }
152
-
153
- // ---------------------------------------------------------------------------
154
- // Boot helpers
155
- // ---------------------------------------------------------------------------
156
-
157
- /**
158
- * Decode a base64-encoded `OTEL_SAMPLING_CONFIG` value into a `SamplingConfig`.
159
- * Returns `null` (caller falls back to `DEFAULT_SAMPLE_RATIO`, currently 0.1) on:
160
- * - missing / empty input
161
- * - invalid base64
162
- * - JSON parse failure
163
- * - schema-mismatched payload
164
- *
165
- * Logs a warning to console when the env var is set but unparseable so the
166
- * mistake is visible in CF Logs without crashing the worker boot.
167
- */
168
- export function decodeSamplingConfig(raw: string | undefined): SamplingConfig | null {
169
- if (!raw) return null;
170
- try {
171
- const json = atob(raw);
172
- const parsed = JSON.parse(json) as unknown;
173
- if (!parsed || typeof parsed !== "object") return null;
174
- const obj = parsed as { default?: unknown; rules?: unknown };
175
-
176
- const defaultRatio = typeof obj.default === "number" ? obj.default : undefined;
177
- const rawRules = Array.isArray(obj.rules) ? obj.rules : [];
178
- const rules: SamplingRule[] = [];
179
- for (const r of rawRules) {
180
- if (!r || typeof r !== "object") continue;
181
- const rec = r as { pattern?: unknown; ratio?: unknown };
182
- if (typeof rec.pattern !== "string" || typeof rec.ratio !== "number") continue;
183
- try {
184
- // Eagerly validate the regex so a bad pattern fails at boot, not
185
- // on the first matching request.
186
- new RegExp(rec.pattern);
187
- rules.push({ pattern: rec.pattern, ratio: rec.ratio });
188
- } catch {
189
- console.warn(`[sampler] dropping invalid pattern: ${rec.pattern}`);
190
- }
191
- }
192
-
193
- return { default: defaultRatio, rules };
194
- } catch (err) {
195
- console.warn(`[sampler] failed to decode OTEL_SAMPLING_CONFIG`, String(err));
196
- return null;
197
- }
198
- }
199
-
200
- /**
201
- * Build a `ParentBasedSampler` rooted at our URL-based sampler.
202
- * Wire as the `headSampler` for any custom OTel SDK setup (e.g. a site
203
- * that opts back into `@microlabs/otel-cf-workers` outside the default
204
- * `instrumentWorker` flow).
205
- */
206
- export function createUrlBasedHeadSampler(config: SamplingConfig | null): Sampler {
207
- const root = new URLBasedSampler(config ?? {});
208
- return new ParentBasedSampler({ root });
209
- }
210
-
211
- // Re-export OTel API helper so callers can read `traceId` / build tags off
212
- // the active span without importing @opentelemetry/api directly.
213
- export { trace as _otelTrace };