@heystack/otel 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -2
- package/dist/next.js +8 -2
- package/dist/node.d.ts +22 -1
- package/dist/node.js +32 -2
- package/dist/workers.d.ts +79 -8
- package/dist/workers.js +396 -84
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -13,7 +13,7 @@ Always read your key from the environment — never paste it into source:
|
|
|
13
13
|
HEYSTACK_API_KEY=sk_live_…
|
|
14
14
|
```
|
|
15
15
|
|
|
16
|
-
> **Requires `@heystack/otel` `>=0.3.0
|
|
16
|
+
> **Requires `@heystack/otel` `>=0.3.0` (prefer `>=0.3.2`).** 0.3.2 fixes the no-op `suppressTracing` (feedback-loop guard) on the Workers/Next-on-OpenNext path and adds the Workers `nodejs_compat` requirement. See [Migration](#migration--versioning) below.
|
|
17
17
|
|
|
18
18
|
## Runtime matrix
|
|
19
19
|
|
|
@@ -36,6 +36,24 @@ initHeystack({ apiKey: process.env.HEYSTACK_API_KEY, service: "my-app" });
|
|
|
36
36
|
|
|
37
37
|
This enables auto-instrumentations (HTTP, Express, etc.) so you get spans without manual wiring.
|
|
38
38
|
|
|
39
|
+
### Slimming down auto-instrumentations (cost)
|
|
40
|
+
|
|
41
|
+
The default `getNodeAutoInstrumentations()` eagerly patches **~40 libraries** (HTTP, DNS, fs, net, gRPC, and every popular DB/HTTP client). That adds startup time and per-call overhead even for libraries your app never uses. To load only what you need, pass your own `instrumentations` array:
|
|
42
|
+
|
|
43
|
+
```ts
|
|
44
|
+
import { initHeystack } from "@heystack/otel/node";
|
|
45
|
+
import { HttpInstrumentation } from "@opentelemetry/instrumentation-http";
|
|
46
|
+
import { ExpressInstrumentation } from "@opentelemetry/instrumentation-express";
|
|
47
|
+
|
|
48
|
+
initHeystack({
|
|
49
|
+
apiKey: process.env.HEYSTACK_API_KEY,
|
|
50
|
+
service: "my-app",
|
|
51
|
+
instrumentations: [new HttpInstrumentation(), new ExpressInstrumentation()],
|
|
52
|
+
});
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
`instrumentations` defaults to `[getNodeAutoInstrumentations()]`. Passing your own array replaces the default entirely (it's ignored when `autoInstrument: false`). `initHeystack` is also **idempotent** as of 0.3.2 — calling it again returns the already-started SDK (no duplicate instrumentations or signal handlers, which matters across Next dev-server reloads).
|
|
56
|
+
|
|
39
57
|
## Next.js (any deploy target, including Cloudflare/OpenNext)
|
|
40
58
|
|
|
41
59
|
In `instrumentation.ts` at the project root:
|
|
@@ -70,7 +88,17 @@ export default instrument(
|
|
|
70
88
|
);
|
|
71
89
|
```
|
|
72
90
|
|
|
73
|
-
As of **0.3.0** `instrument()` registers the **global** tracer provider and creates the per-request SERVER span via the global tracer, so nested spans created through the global `trace.getTracer()` API (framework/library/manual) also export — you get a trace tree, not a lone SERVER span.
|
|
91
|
+
As of **0.3.0** `instrument()` registers the **global** tracer provider and creates the per-request SERVER span via the global tracer, so nested spans created through the global `trace.getTracer()` API (framework/library/manual) also export — you get a trace tree, not a lone SERVER span.
|
|
92
|
+
|
|
93
|
+
> **Requires `nodejs_compat` on workerd.** As of **0.3.2** the SDK registers an OpenTelemetry **ContextManager** at init (see below), backed by `AsyncLocalStorage` from `node:async_hooks`. On Cloudflare Workers that means your `wrangler.toml` must enable the Node.js compatibility flag:
|
|
94
|
+
> ```toml
|
|
95
|
+
> compatibility_flags = ["nodejs_compat"]
|
|
96
|
+
> ```
|
|
97
|
+
> If `node:async_hooks` is unavailable, the SDK transparently falls back to a synchronous stack-based ContextManager (no extra dependency) — suppression still works, but cross-`await` parent linking and per-request context isolation degrade to best-effort.
|
|
98
|
+
|
|
99
|
+
### Why a ContextManager (0.3.2)
|
|
100
|
+
|
|
101
|
+
`context.with(...)` in OpenTelemetry is a **no-op unless a ContextManager is registered** with the global API. Before 0.3.2 the Workers path registered only a tracer provider, so `suppressTracing()` — the primary defence against the self-trace feedback loop — silently did nothing in production (the exporter's own `POST /v1/traces` could be re-traced by host fetch auto-instrumentation, looping). As of **0.3.2** the SDK registers a ContextManager exactly once at init. With `AsyncLocalStorageContextManager` (the default, on Node and on workerd under `nodejs_compat`) you also get **cross-`await` parent→child span linking** and **per-request context isolation** — concurrent requests no longer share or clobber the active span.
|
|
74
102
|
|
|
75
103
|
`instrument()` must be the **outermost** wrapper if other middleware also wraps the handler, so the request span covers everything inside:
|
|
76
104
|
|
|
@@ -80,6 +108,39 @@ export default instrument(withOtherMiddleware(worker), { service: "my-worker" })
|
|
|
80
108
|
|
|
81
109
|
Set the key as a secret: `wrangler secret put HEYSTACK_API_KEY`.
|
|
82
110
|
|
|
111
|
+
As of **0.3.1** `instrument()` **forwards every other handler your Worker exports** — `queue`, `scheduled`, `tail`, etc. — untouched, so wrapping never drops a handler Cloudflare requires for deploy (it previously returned only `{ fetch }`, which broke Queue/Cron Workers). On top of forwarding, `queue` and `scheduled` are themselves traced when present: each gets a root span via the global tracer (`queue <queueName>` as a CONSUMER span with batch attributes; `scheduled <cron>` as an INTERNAL span with the cron attribute), flushed via `ctx.waitUntil` just like `fetch`.
|
|
112
|
+
|
|
113
|
+
### Durable Objects are NOT covered by `instrument()`
|
|
114
|
+
|
|
115
|
+
`instrument()` wraps the keys of the **default-export handler object** (`fetch`/`queue`/`scheduled`/… ). **Durable Objects are separate named class exports**, so spreading the handler object does not touch them — a DO's `fetch`/`alarm` methods run **untraced** even when your Worker's default export is wrapped.
|
|
116
|
+
|
|
117
|
+
To trace a Durable Object, instrument it manually with the global tracer (which `instrument()` / `initHeystackWorkers()` already registered) and flush per invocation:
|
|
118
|
+
|
|
119
|
+
```ts
|
|
120
|
+
import { trace, SpanKind, context } from "@opentelemetry/api";
|
|
121
|
+
import { flushHeystack } from "@heystack/otel/workers";
|
|
122
|
+
|
|
123
|
+
export class Counter {
|
|
124
|
+
async fetch(req: Request): Promise<Response> {
|
|
125
|
+
const tracer = trace.getTracer("heystack");
|
|
126
|
+
const span = tracer.startSpan(`DO ${new URL(req.url).pathname}`, {
|
|
127
|
+
kind: SpanKind.SERVER,
|
|
128
|
+
});
|
|
129
|
+
try {
|
|
130
|
+
return await context.with(trace.setSpan(context.active(), span), async () => {
|
|
131
|
+
// ...your DO logic...
|
|
132
|
+
return new Response("ok");
|
|
133
|
+
});
|
|
134
|
+
} finally {
|
|
135
|
+
span.end();
|
|
136
|
+
this.state.waitUntil(flushHeystack()); // ensure the export POST completes
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The default export still needs to be wrapped with `instrument()` (or `initHeystackWorkers()` called) so the global provider + ContextManager are registered before the DO runs.
|
|
143
|
+
|
|
83
144
|
## Flushing
|
|
84
145
|
|
|
85
146
|
On Workers/edge the export is a `fetch()` POST, and the isolate can be torn down the instant your handler returns. **You must let that POST complete or the trace is silently dropped** — this is the #1 cause of flaky Workers tracing. `flushHeystack()` and `instrument()`'s built-in flush both await the in-flight fetch (not just the OTel span processor, which does *not* wait for it).
|
|
@@ -88,8 +149,22 @@ On Workers/edge the export is a `fetch()` POST, and the isolate can be torn down
|
|
|
88
149
|
- **Next on Cloudflare/OpenNext (`registerHeystack`)** — as of **0.3.0** this flushes automatically when `@opennextjs/cloudflare` is present: the export runs inside the Cloudflare request context, so the exporter borrows that request's `ctx.waitUntil` (via OpenNext's `getCloudflareContext`) to keep the isolate alive until the POST completes. **No manual hook needed.** For other workerd setups *without* `@opennextjs/cloudflare`, `import { flushHeystack } from "@heystack/otel/workers"` and call it from a response hook (or `ctx.waitUntil(flushHeystack())` if you have a ctx) — or pass an explicit `waitUntil` to `initHeystackWorkers` (highest priority). `flushHeystack()` awaits the export fetch.
|
|
89
150
|
- **Node (`initHeystack`)** — flushes on `SIGTERM`/`SIGINT` automatically.
|
|
90
151
|
|
|
152
|
+
## No feedback loop with host fetch auto-instrumentation
|
|
153
|
+
|
|
154
|
+
As of **0.3.1** the exporter **suppresses tracing for its own ingest POST**, and **0.3.2 makes that suppression actually take effect in production**. On Next/OpenNext the host auto-instruments outbound `fetch`, so without this the exporter's `POST /v1/traces` became a CLIENT span → exported → re-captured → a sustained loop (wall-to-wall identical `fetch POST .../v1/traces` spans). The POST runs inside an OpenTelemetry tracing-suppressed context (`suppressTracing`) — but `context.with()` is a no-op unless a ContextManager is registered, which 0.3.1 did not do, so suppression silently did nothing. **0.3.2 registers a ContextManager** (see [Why a ContextManager](#why-a-contextmanager-032)), so the POST is genuinely suppressed.
|
|
155
|
+
|
|
156
|
+
As belt-and-suspenders the exporter also drops any span whose HTTP target points at the configured ingest origin. As of **0.3.2** that match is **hostname-accurate**: full-URL attributes (`url.full`, `http.url`) are parsed and compared on `.hostname` (case-insensitive, port-stripped) so a sibling domain like `myingest.heystack.dev` is no longer a false positive and an explicit port like `ingest.heystack.dev:443` is correctly matched; host-only attributes (`server.address`, `net.peer.name`, `net.peer.hostname`, `http.host`, `peer.address`) are port-stripped and compared by hostname.
|
|
157
|
+
|
|
91
158
|
## Migration / versioning
|
|
92
159
|
|
|
160
|
+
- **`0.3.2`** — runtime-correctness fixes:
|
|
161
|
+
- **ContextManager registered (CRITICAL).** 0.3.1's `suppressTracing` was a no-op because no ContextManager was registered, so the exporter's ingest POST could still be re-traced into a feedback loop. 0.3.2 registers an `AsyncLocalStorageContextManager` (Node + workerd under `nodejs_compat`; sync stack-manager fallback otherwise) so suppression works — and as a bonus you get cross-`await` span parenting + per-request context isolation. **Workers now require `nodejs_compat`.** New dependency: `@opentelemetry/context-async-hooks`.
|
|
162
|
+
- **Hostname-accurate self-span filter** (no sibling-domain false positives; `host:port` now matched; more host-only attrs covered).
|
|
163
|
+
- **OpenNext accessor race fixed** — the `getCloudflareContext` accessor loads eagerly and early exports that fire before it resolves still get handed to `ctx.waitUntil`; accessor/`waitUntil` failures are now logged (once) instead of silently swallowed.
|
|
164
|
+
- **Node SDK hardening** — `initHeystack` is idempotent (cached singleton) and registers SIGTERM/SIGINT handlers at most once (no leak across Next dev reloads); new optional `instrumentations` field to load a slimmer instrumentation set.
|
|
165
|
+
- **Drain timeout** — `instrument()`'s `ctx.waitUntil` flush is raced against an ~8s timeout so a hung ingest can't pin the isolate to its CPU limit.
|
|
166
|
+
- **Bundler hints** — `/next`'s runtime-selected dynamic imports carry `@vite-ignore` + `webpackIgnore` so a Node build doesn't bundle the workers path (and vice-versa).
|
|
167
|
+
- **`0.3.1`** — `instrument()` now **forwards `queue`/`scheduled`/`tail` (and any other handler)** instead of returning only `{ fetch }`, and traces `queue`/`scheduled`; the exporter **suppresses self-tracing** on its ingest POST (note: only effective from 0.3.2, which registers the ContextManager). Both are production-reproduced bug fixes; upgrade is recommended for any Queue/Cron Worker or Next-on-OpenNext app.
|
|
93
168
|
- **Pin `@heystack/otel` `>=0.3.0`** — 0.3.0 makes Next-on-OpenNext auto-flush via the Cloudflare request context, hardens workerd detection (uses the `WebSocketPair` global so it survives `nodejs_compat`), and has `instrument()` set the global provider so nested spans export. The workerd-aware `/next` path and `initHeystackWorkers` / `flushHeystack` exports were added in 0.2.0.
|
|
94
169
|
- The pre-0.1.0 top-level default `initHeystack({ apiKey })` is **gone**. Use the subpath entries: `@heystack/otel/node`, `@heystack/otel/next`, `@heystack/otel/workers`. The root `@heystack/otel` entry now exposes only pure helpers (`buildExporterConfig`, types).
|
|
95
170
|
|
package/dist/next.js
CHANGED
|
@@ -40,11 +40,17 @@ export async function registerHeystack(o) {
|
|
|
40
40
|
const onWorkerd = g.navigator?.userAgent === "Cloudflare-Workers" ||
|
|
41
41
|
typeof g.WebSocketPair !== "undefined";
|
|
42
42
|
if (onWorkerd) {
|
|
43
|
-
|
|
43
|
+
// Bundler-ignore hints: a Node build must not eagerly bundle the workers
|
|
44
|
+
// path (and a workers build must not bundle the node path) — the import is
|
|
45
|
+
// runtime-selected per environment. `@vite-ignore` covers Vite/Rollup;
|
|
46
|
+
// `webpackIgnore` covers Next/Turbopack/webpack.
|
|
47
|
+
const { initHeystackWorkers } = await import(
|
|
48
|
+
/* @vite-ignore */ /* webpackIgnore: true */ "./workers.js");
|
|
44
49
|
initHeystackWorkers({ apiKey, service: o.service, endpoint: o.endpoint });
|
|
45
50
|
}
|
|
46
51
|
else {
|
|
47
|
-
const { initHeystack } = await import(
|
|
52
|
+
const { initHeystack } = await import(
|
|
53
|
+
/* @vite-ignore */ /* webpackIgnore: true */ "./node.js");
|
|
48
54
|
initHeystack({
|
|
49
55
|
apiKey,
|
|
50
56
|
service: o.service,
|
package/dist/node.d.ts
CHANGED
|
@@ -1,12 +1,33 @@
|
|
|
1
1
|
import { NodeSDK } from "@opentelemetry/sdk-node";
|
|
2
|
+
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
|
2
3
|
import { type HeystackOptions } from "./core.js";
|
|
4
|
+
/**
|
|
5
|
+
* The element type the NodeSDK `instrumentations` config accepts (an OTel
|
|
6
|
+
* `Instrumentation` or array thereof). Derived from `getNodeAutoInstrumentations`
|
|
7
|
+
* so we don't take a direct dependency on `@opentelemetry/instrumentation` just
|
|
8
|
+
* for a type.
|
|
9
|
+
*/
|
|
10
|
+
type InstrumentationConfigItem = ReturnType<typeof getNodeAutoInstrumentations>[number];
|
|
3
11
|
export interface NodeOptions extends HeystackOptions {
|
|
4
12
|
/** Enable OTel diagnostic logging to console to confirm export. */
|
|
5
13
|
debug?: boolean;
|
|
6
14
|
/** Set false to skip auto-instrumentations (you'll then only get framework/manual spans). Default true. */
|
|
7
15
|
autoInstrument?: boolean;
|
|
16
|
+
/**
|
|
17
|
+
* Provide your own instrumentation array instead of the default
|
|
18
|
+
* `getNodeAutoInstrumentations()`. Use this to load a slimmer set (the default
|
|
19
|
+
* eagerly patches ~40 libraries — DNS, fs, net, gRPC, every popular DB/HTTP
|
|
20
|
+
* client — which adds startup cost and overhead even for libs you don't use).
|
|
21
|
+
* Ignored when `autoInstrument === false`. Example:
|
|
22
|
+
* import { HttpInstrumentation } from "@opentelemetry/instrumentation-http";
|
|
23
|
+
* initHeystack({ ..., instrumentations: [new HttpInstrumentation()] });
|
|
24
|
+
*/
|
|
25
|
+
instrumentations?: InstrumentationConfigItem[];
|
|
8
26
|
}
|
|
9
27
|
/** Initialise Heystack tracing on a Node runtime. Call once, as early as possible. Returns the started SDK. */
|
|
10
28
|
export declare function initHeystack(o: NodeOptions): NodeSDK;
|
|
11
|
-
/** Flush + shutdown the SDK on SIGTERM/SIGINT so short-lived processes don't lose the last batch. */
|
|
29
|
+
/** Flush + shutdown the SDK on SIGTERM/SIGINT so short-lived processes don't lose the last batch. Registers handlers at most once. */
|
|
12
30
|
export declare function shutdownOnSignals(sdk: NodeSDK): void;
|
|
31
|
+
/** Reset the cached SDK + signal-handler guard. Internal/testing helper. */
|
|
32
|
+
export declare function __resetNodeSdk(): void;
|
|
33
|
+
export {};
|
package/dist/node.js
CHANGED
|
@@ -3,22 +3,47 @@ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
|
3
3
|
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
|
4
4
|
import { diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
|
|
5
5
|
import { buildExporterConfig } from "./core.js";
|
|
6
|
+
/**
|
|
7
|
+
* Process-level guard so SIGTERM/SIGINT handlers are registered AT MOST ONCE,
|
|
8
|
+
* even if `initHeystack` is called repeatedly (e.g. Next dev server reloads).
|
|
9
|
+
* Without this, every re-init leaked another pair of signal listeners and
|
|
10
|
+
* eventually tripped Node's MaxListenersExceededWarning.
|
|
11
|
+
*/
|
|
12
|
+
let _signalHandlersRegistered = false;
|
|
13
|
+
/**
|
|
14
|
+
* Cached singleton SDK. `initHeystack` is meant to be called once; on repeat
|
|
15
|
+
* calls we return the already-started SDK instead of constructing/starting a
|
|
16
|
+
* second NodeSDK (which would double-register instrumentations and signal
|
|
17
|
+
* handlers).
|
|
18
|
+
*/
|
|
19
|
+
let _sdk = null;
|
|
6
20
|
/** Initialise Heystack tracing on a Node runtime. Call once, as early as possible. Returns the started SDK. */
|
|
7
21
|
export function initHeystack(o) {
|
|
22
|
+
// Idempotent: a second call returns the cached SDK rather than starting a new
|
|
23
|
+
// one (which would duplicate instrumentations + signal handlers).
|
|
24
|
+
if (_sdk)
|
|
25
|
+
return _sdk;
|
|
8
26
|
if (o.debug)
|
|
9
27
|
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG);
|
|
10
28
|
const cfg = buildExporterConfig(o);
|
|
29
|
+
const instrumentations = o.autoInstrument === false
|
|
30
|
+
? []
|
|
31
|
+
: (o.instrumentations ?? [getNodeAutoInstrumentations()]);
|
|
11
32
|
const sdk = new NodeSDK({
|
|
12
33
|
serviceName: o.service,
|
|
13
34
|
traceExporter: new OTLPTraceExporter({ url: cfg.url, headers: cfg.headers }),
|
|
14
|
-
instrumentations
|
|
35
|
+
instrumentations,
|
|
15
36
|
});
|
|
16
37
|
sdk.start();
|
|
17
38
|
shutdownOnSignals(sdk);
|
|
39
|
+
_sdk = sdk;
|
|
18
40
|
return sdk;
|
|
19
41
|
}
|
|
20
|
-
/** Flush + shutdown the SDK on SIGTERM/SIGINT so short-lived processes don't lose the last batch. */
|
|
42
|
+
/** Flush + shutdown the SDK on SIGTERM/SIGINT so short-lived processes don't lose the last batch. Registers handlers at most once. */
|
|
21
43
|
export function shutdownOnSignals(sdk) {
|
|
44
|
+
if (_signalHandlersRegistered)
|
|
45
|
+
return;
|
|
46
|
+
_signalHandlersRegistered = true;
|
|
22
47
|
const stop = () => {
|
|
23
48
|
sdk
|
|
24
49
|
.shutdown()
|
|
@@ -28,3 +53,8 @@ export function shutdownOnSignals(sdk) {
|
|
|
28
53
|
process.once("SIGTERM", stop);
|
|
29
54
|
process.once("SIGINT", stop);
|
|
30
55
|
}
|
|
56
|
+
/** Reset the cached SDK + signal-handler guard. Internal/testing helper. */
|
|
57
|
+
export function __resetNodeSdk() {
|
|
58
|
+
_sdk = null;
|
|
59
|
+
_signalHandlersRegistered = false;
|
|
60
|
+
}
|
package/dist/workers.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type Span } from "@opentelemetry/api";
|
|
2
|
+
import { type Context, type ContextManager } from "@opentelemetry/api";
|
|
2
3
|
import { BasicTracerProvider, type ReadableSpan, type SpanExporter } from "@opentelemetry/sdk-trace-base";
|
|
3
4
|
import { type HeystackOptions } from "./core.js";
|
|
4
5
|
declare const ExportResultCode: {
|
|
@@ -53,12 +54,21 @@ interface OtlpTracesPayload {
|
|
|
53
54
|
* hence the same resource, so we emit a single resourceSpans entry.
|
|
54
55
|
*/
|
|
55
56
|
export declare function serializeSpans(spans: ReadableSpan[]): OtlpTracesPayload;
|
|
57
|
+
/**
|
|
58
|
+
* Test-only helper: run the self-span attribute check directly against a plain
|
|
59
|
+
* attribute bag + ingest hostname, without constructing a ReadableSpan. The
|
|
60
|
+
* `ingestHost` should be a bare hostname (lower-case, no port), matching what
|
|
61
|
+
* the exporter derives via `safeHostname(cfg.url)`.
|
|
62
|
+
*/
|
|
63
|
+
export declare function isSelfSpanForTest(attrs: Record<string, unknown>, ingestHost: string): boolean;
|
|
56
64
|
/**
|
|
57
65
|
* A WinterCG-compatible OTLP/JSON span exporter. POSTs ended spans to the
|
|
58
66
|
* Heystack ingest using the platform `fetch` — no Node built-ins.
|
|
59
67
|
*/
|
|
60
68
|
export declare class HeystackSpanExporter implements SpanExporter {
|
|
61
69
|
private readonly url;
|
|
70
|
+
/** Hostname (no port) of the ingest endpoint, used to drop self-trace spans. */
|
|
71
|
+
private readonly ingestHost;
|
|
62
72
|
private readonly headers;
|
|
63
73
|
private shutdownState;
|
|
64
74
|
/**
|
|
@@ -81,6 +91,15 @@ export declare class HeystackSpanExporter implements SpanExporter {
|
|
|
81
91
|
waitUntil?: (p: Promise<unknown>) => void;
|
|
82
92
|
constructor(options: HeystackOptions);
|
|
83
93
|
export(spans: ReadableSpan[], resultCallback: (result: ExportResult) => void): void;
|
|
94
|
+
/**
|
|
95
|
+
* Hand `p` to the OpenNext Cloudflare request context's `ctx.waitUntil`.
|
|
96
|
+
* Resolves the accessor first (awaiting its in-flight load if needed) so an
|
|
97
|
+
* export that fired before the dynamic import settled is still covered.
|
|
98
|
+
*/
|
|
99
|
+
private handOffToCloudflareContext;
|
|
100
|
+
private warnedWaitUntilFailure;
|
|
101
|
+
/** Log a waitUntil/accessor failure exactly once; never throw. */
|
|
102
|
+
private warnWaitUntilFailed;
|
|
84
103
|
shutdown(): Promise<void>;
|
|
85
104
|
/**
|
|
86
105
|
* Resolve only once every in-flight export fetch has settled. This is the
|
|
@@ -123,6 +142,24 @@ export type HeystackTracerProvider = BasicTracerProvider & {
|
|
|
123
142
|
* `flushHeystack()`), not just `provider.forceFlush()`.
|
|
124
143
|
*/
|
|
125
144
|
export declare function createTracerProvider(config: HeystackOptions): HeystackTracerProvider;
|
|
145
|
+
/**
|
|
146
|
+
* A minimal SYNCHRONOUS, stack-based ContextManager — the registered manager for
|
|
147
|
+
* the /workers entry (no `node:async_hooks`, so it works on any WinterCG runtime).
|
|
148
|
+
* It makes `context.with()` propagate synchronously, which is enough for the
|
|
149
|
+
* exporter's `suppressTracing` to take effect and for the belt-and-suspenders
|
|
150
|
+
* self-span filter — but it does NOT carry context across `await` boundaries (so
|
|
151
|
+
* cross-`await` parent linking and per-request isolation are best-effort).
|
|
152
|
+
*/
|
|
153
|
+
export declare class SyncStackContextManager implements ContextManager {
|
|
154
|
+
private _stack;
|
|
155
|
+
active(): Context;
|
|
156
|
+
with<A extends unknown[], F extends (...args: A) => ReturnType<F>>(ctx: Context, fn: F, thisArg?: ThisParameterType<F>, ...args: A): ReturnType<F>;
|
|
157
|
+
bind<T>(_ctx: Context, target: T): T;
|
|
158
|
+
enable(): this;
|
|
159
|
+
disable(): this;
|
|
160
|
+
}
|
|
161
|
+
/** Reset the context-manager registration guard. Internal/testing helper. */
|
|
162
|
+
export declare function __resetContextManager(): void;
|
|
126
163
|
/**
|
|
127
164
|
* Register Heystack as the global tracer provider on a Workers/edge runtime
|
|
128
165
|
* (workerd). Spans from the host framework (e.g. Next.js) export over fetch.
|
|
@@ -152,8 +189,18 @@ export declare function flushHeystack(): Promise<void>;
|
|
|
152
189
|
export declare function __resetProvider(): void;
|
|
153
190
|
/** Reset the once-only no-key warning. Internal/testing helper. */
|
|
154
191
|
export declare function __resetWarnings(): void;
|
|
155
|
-
|
|
156
|
-
|
|
192
|
+
/**
|
|
193
|
+
* The shape of a Worker's default export. `fetch` is the common entrypoint, but
|
|
194
|
+
* a Worker may also export `queue` / `scheduled` / `tail` (and arbitrary other
|
|
195
|
+
* siblings). `instrument()` forwards ALL of these untouched except `fetch`
|
|
196
|
+
* (always traced) and `queue` / `scheduled` (traced when present) — so wrapping
|
|
197
|
+
* a Worker never drops a handler Cloudflare requires for deploy.
|
|
198
|
+
*/
|
|
199
|
+
interface WorkerHandler<E> {
|
|
200
|
+
fetch?: (req: Request, env: E, ctx: ExecutionContext) => Promise<Response> | Response;
|
|
201
|
+
queue?: (batch: MessageBatch, env: E, ctx: ExecutionContext) => Promise<void> | void;
|
|
202
|
+
scheduled?: (controller: ScheduledController, env: E, ctx: ExecutionContext) => Promise<void> | void;
|
|
203
|
+
[key: string]: unknown;
|
|
157
204
|
}
|
|
158
205
|
/**
|
|
159
206
|
* Wrap a Worker's default export so every request is auto-traced with a SERVER
|
|
@@ -172,15 +219,39 @@ interface FetchHandler<E> {
|
|
|
172
219
|
* );
|
|
173
220
|
*
|
|
174
221
|
* TRACE TREE: `instrument()` sets up the singleton GLOBAL tracer provider and
|
|
175
|
-
* creates the
|
|
176
|
-
*
|
|
177
|
-
*
|
|
178
|
-
*
|
|
222
|
+
* creates the root span via the global tracer (`trace.getTracer("heystack")`).
|
|
223
|
+
* This means nested spans created through the global `trace.getTracer()` API
|
|
224
|
+
* (framework / library / your own manual spans) also flow to the exporter — you
|
|
225
|
+
* get a trace tree, not a lone root span.
|
|
226
|
+
*
|
|
227
|
+
* ALL HANDLERS FORWARDED: the returned object spreads `handler`, so sibling
|
|
228
|
+
* handlers a Worker exports (`tail`, `email`, etc.) are preserved — Cloudflare
|
|
229
|
+
* won't reject the deploy for a missing handler. `fetch` is always replaced with
|
|
230
|
+
* a traced wrapper; `queue` and `scheduled` are wrapped (and traced) when
|
|
231
|
+
* present; everything else is forwarded untouched.
|
|
179
232
|
*
|
|
180
233
|
* If no API key is available (neither `config.apiKey` nor
|
|
181
234
|
* `env.HEYSTACK_API_KEY`), the handler runs untraced.
|
|
182
235
|
*/
|
|
183
|
-
|
|
236
|
+
/**
|
|
237
|
+
* The instrumented result: the original handler with the traced entrypoints
|
|
238
|
+
* normalised to their full Worker signatures (so callers/tests can invoke them
|
|
239
|
+
* with `(arg, env, ctx)`), and every other sibling handler forwarded as-is. A
|
|
240
|
+
* traced key is present (and non-optional) exactly when it exists on the input
|
|
241
|
+
* handler, so a `{ fetch }` Worker yields a non-optional `fetch`.
|
|
242
|
+
*/
|
|
243
|
+
type Instrumented<E, H> = Omit<H, "fetch" | "queue" | "scheduled"> & (H extends {
|
|
244
|
+
fetch: unknown;
|
|
245
|
+
} ? {
|
|
184
246
|
fetch: (req: Request, env: E, ctx: ExecutionContext) => Promise<Response>;
|
|
185
|
-
}
|
|
247
|
+
} : unknown) & (H extends {
|
|
248
|
+
queue: unknown;
|
|
249
|
+
} ? {
|
|
250
|
+
queue: (batch: MessageBatch, env: E, ctx: ExecutionContext) => Promise<void>;
|
|
251
|
+
} : unknown) & (H extends {
|
|
252
|
+
scheduled: unknown;
|
|
253
|
+
} ? {
|
|
254
|
+
scheduled: (controller: ScheduledController, env: E, ctx: ExecutionContext) => Promise<void>;
|
|
255
|
+
} : unknown);
|
|
256
|
+
export declare function instrument<E = unknown, H extends WorkerHandler<E> = WorkerHandler<E>>(handler: H, config: WorkersConfig): Instrumented<E, H>;
|
|
186
257
|
export type { Span };
|
package/dist/workers.js
CHANGED
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
// ships its own OTLP/JSON-over-fetch span exporter so it runs on Workers/Edge
|
|
9
9
|
// where the Node SDK cannot.
|
|
10
10
|
import { context, trace, SpanKind, SpanStatusCode, } from "@opentelemetry/api";
|
|
11
|
+
import { suppressTracing } from "@opentelemetry/core";
|
|
12
|
+
import { ROOT_CONTEXT } from "@opentelemetry/api";
|
|
11
13
|
import { Resource } from "@opentelemetry/resources";
|
|
12
14
|
import { BasicTracerProvider, SimpleSpanProcessor, } from "@opentelemetry/sdk-trace-base";
|
|
13
15
|
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
@@ -18,21 +20,36 @@ import { buildExporterConfig } from "./core.js";
|
|
|
18
20
|
// out guarantees no extra (potentially node-platform) code in the bundle.
|
|
19
21
|
const ExportResultCode = { SUCCESS: 0, FAILED: 1 };
|
|
20
22
|
let _getCloudflareContext;
|
|
21
|
-
let
|
|
22
|
-
/**
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
23
|
+
let _cfAccessorLoadPromise;
|
|
24
|
+
/**
|
|
25
|
+
* Best-effort, once-only resolve of OpenNext's `getCloudflareContext`. The
|
|
26
|
+
* import is async, so the FIRST export(s) may run before it settles — see
|
|
27
|
+
* `export()`, which awaits this promise so those early exports still get handed
|
|
28
|
+
* to `ctx.waitUntil` once the accessor resolves (rather than relying solely on
|
|
29
|
+
* an explicit `flushHeystack()` that the OpenNext path doesn't call). Started
|
|
30
|
+
* eagerly at module init so the window is as small as possible.
|
|
31
|
+
*/
|
|
32
|
+
function loadCloudflareContextAccessor() {
|
|
33
|
+
if (_cfAccessorLoadPromise)
|
|
34
|
+
return _cfAccessorLoadPromise;
|
|
35
|
+
_cfAccessorLoadPromise = (async () => {
|
|
36
|
+
try {
|
|
37
|
+
const spec = "@opennextjs/cloudflare";
|
|
38
|
+
const m = (await import(/* @vite-ignore */ /* webpackIgnore: true */ spec));
|
|
39
|
+
_getCloudflareContext = m.getCloudflareContext;
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
// Not on OpenNext (or the package isn't installed) — fall back silently to
|
|
43
|
+
// the `pending` set + manual flush. This is the common, expected case off
|
|
44
|
+
// OpenNext, so it is intentionally NOT logged.
|
|
45
|
+
}
|
|
46
|
+
})();
|
|
47
|
+
return _cfAccessorLoadPromise;
|
|
35
48
|
}
|
|
49
|
+
// Kick off the accessor resolution eagerly at module load so it's ready (or
|
|
50
|
+
// nearly so) by the time the first export runs, minimising the race where an
|
|
51
|
+
// early export can't borrow `ctx.waitUntil`.
|
|
52
|
+
void loadCloudflareContextAccessor();
|
|
36
53
|
/** Convert an OTel HrTime `[seconds, nanos]` tuple to a nanosecond string. */
|
|
37
54
|
function hrTimeToUnixNano(time) {
|
|
38
55
|
// BigInt math keeps full nanosecond precision without float rounding.
|
|
@@ -121,6 +138,98 @@ export function serializeSpans(spans) {
|
|
|
121
138
|
};
|
|
122
139
|
}
|
|
123
140
|
// ---------------------------------------------------------------------------
|
|
141
|
+
// Self-span filtering (feedback-loop guard)
|
|
142
|
+
//
|
|
143
|
+
// On Next/OpenNext the host auto-instruments outbound `fetch`, so the exporter's
|
|
144
|
+
// own POST to `/v1/traces` becomes a CLIENT span → exported → re-captured → a
|
|
145
|
+
// sustained loop. The primary defence is exporting under a suppressed context
|
|
146
|
+
// (see `export()`), but as belt-and-suspenders we also drop any span that
|
|
147
|
+
// targets the ingest origin, so an upstream instrumentation that ignores
|
|
148
|
+
// suppression still can't feed the loop.
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
/**
|
|
151
|
+
* Parse the hostname (no port) out of a URL, lower-cased; empty string if it
|
|
152
|
+
* can't be parsed. We compare on hostname rather than `host` so that an ingest
|
|
153
|
+
* URL like `ingest.heystack.dev` matches a captured span attribute of
|
|
154
|
+
* `ingest.heystack.dev:443` (and vice-versa).
|
|
155
|
+
*/
|
|
156
|
+
function safeHostname(url) {
|
|
157
|
+
try {
|
|
158
|
+
return new URL(url).hostname.toLowerCase();
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
return "";
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Strip a trailing `:port` from a bare host attribute and lower-case it, so a
|
|
166
|
+
* host-only attr like `ingest.heystack.dev:443` compares equal to the ingest
|
|
167
|
+
* hostname. IPv6 literals (`[::1]:443`) keep their bracketed form. Returns ""
|
|
168
|
+
* for anything that isn't a non-empty string.
|
|
169
|
+
*/
|
|
170
|
+
function hostnameOf(hostAttr) {
|
|
171
|
+
if (typeof hostAttr !== "string" || hostAttr === "")
|
|
172
|
+
return "";
|
|
173
|
+
const v = hostAttr.trim();
|
|
174
|
+
// Bracketed IPv6, optionally with a port: `[::1]` or `[::1]:443`.
|
|
175
|
+
if (v.startsWith("[")) {
|
|
176
|
+
const close = v.indexOf("]");
|
|
177
|
+
if (close !== -1)
|
|
178
|
+
return v.slice(0, close + 1).toLowerCase();
|
|
179
|
+
return v.toLowerCase();
|
|
180
|
+
}
|
|
181
|
+
// Strip a single trailing :port (host:port). A bare hostname has no colon.
|
|
182
|
+
const colon = v.lastIndexOf(":");
|
|
183
|
+
if (colon !== -1 && v.indexOf(":") === colon) {
|
|
184
|
+
return v.slice(0, colon).toLowerCase();
|
|
185
|
+
}
|
|
186
|
+
return v.toLowerCase();
|
|
187
|
+
}
|
|
188
|
+
/** Attributes that carry a full HTTP URL on a CLIENT/SERVER span. */
|
|
189
|
+
const HTTP_URL_ATTRS = ["url.full", "http.url"];
|
|
190
|
+
/** Host-only attributes (host[:port], no scheme/path). */
|
|
191
|
+
const HTTP_HOST_ATTRS = [
|
|
192
|
+
"server.address",
|
|
193
|
+
"net.peer.name",
|
|
194
|
+
"net.peer.hostname",
|
|
195
|
+
"http.host",
|
|
196
|
+
"peer.address",
|
|
197
|
+
];
|
|
198
|
+
/**
|
|
199
|
+
* True if `span` looks like a request to the configured ingest origin — i.e. it
|
|
200
|
+
* is (or could be) the exporter's own self-trace. For full-URL attributes we
|
|
201
|
+
* parse the URL and compare its `.hostname` (case-insensitive, port stripped) so
|
|
202
|
+
* a sibling domain like `myingest.heystack.dev` is NOT a false positive and an
|
|
203
|
+
* explicit port like `ingest.heystack.dev:443` IS matched. For host-only attrs
|
|
204
|
+
* we strip any `:port` and compare hostname equality.
|
|
205
|
+
*/
|
|
206
|
+
function isSelfSpanAttrs(attrs, ingestHost) {
|
|
207
|
+
if (!ingestHost)
|
|
208
|
+
return false;
|
|
209
|
+
for (const key of HTTP_URL_ATTRS) {
|
|
210
|
+
const v = attrs[key];
|
|
211
|
+
if (typeof v === "string" && safeHostname(v) === ingestHost)
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
for (const key of HTTP_HOST_ATTRS) {
|
|
215
|
+
if (hostnameOf(attrs[key]) === ingestHost)
|
|
216
|
+
return true;
|
|
217
|
+
}
|
|
218
|
+
return false;
|
|
219
|
+
}
|
|
220
|
+
function isSelfSpan(span, ingestHost) {
|
|
221
|
+
return isSelfSpanAttrs(span.attributes, ingestHost);
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Test-only helper: run the self-span attribute check directly against a plain
|
|
225
|
+
* attribute bag + ingest hostname, without constructing a ReadableSpan. The
|
|
226
|
+
* `ingestHost` should be a bare hostname (lower-case, no port), matching what
|
|
227
|
+
* the exporter derives via `safeHostname(cfg.url)`.
|
|
228
|
+
*/
|
|
229
|
+
export function isSelfSpanForTest(attrs, ingestHost) {
|
|
230
|
+
return isSelfSpanAttrs(attrs, ingestHost);
|
|
231
|
+
}
|
|
232
|
+
// ---------------------------------------------------------------------------
|
|
124
233
|
// Exporter
|
|
125
234
|
// ---------------------------------------------------------------------------
|
|
126
235
|
/**
|
|
@@ -129,6 +238,8 @@ export function serializeSpans(spans) {
|
|
|
129
238
|
*/
|
|
130
239
|
export class HeystackSpanExporter {
|
|
131
240
|
url;
|
|
241
|
+
/** Hostname (no port) of the ingest endpoint, used to drop self-trace spans. */
|
|
242
|
+
ingestHost;
|
|
132
243
|
headers;
|
|
133
244
|
shutdownState = false;
|
|
134
245
|
/**
|
|
@@ -152,6 +263,7 @@ export class HeystackSpanExporter {
|
|
|
152
263
|
constructor(options) {
|
|
153
264
|
const cfg = buildExporterConfig(options);
|
|
154
265
|
this.url = cfg.url;
|
|
266
|
+
this.ingestHost = safeHostname(cfg.url);
|
|
155
267
|
this.headers = {
|
|
156
268
|
...cfg.headers,
|
|
157
269
|
"content-type": "application/json",
|
|
@@ -169,11 +281,25 @@ export class HeystackSpanExporter {
|
|
|
169
281
|
resultCallback({ code: ExportResultCode.SUCCESS });
|
|
170
282
|
return;
|
|
171
283
|
}
|
|
172
|
-
|
|
284
|
+
// Belt-and-suspenders: drop the exporter's own self-trace spans (any span
|
|
285
|
+
// targeting the ingest origin) so an upstream instrumentation that ignores
|
|
286
|
+
// our suppressed context still can't feed the feedback loop.
|
|
287
|
+
const exportable = spans.filter((s) => !isSelfSpan(s, this.ingestHost));
|
|
288
|
+
if (exportable.length === 0) {
|
|
289
|
+
resultCallback({ code: ExportResultCode.SUCCESS });
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
const body = JSON.stringify(serializeSpans(exportable));
|
|
173
293
|
// Build the fetch chain as a promise we retain, so forceFlush() can await
|
|
174
294
|
// the actual network write. It resolves (never rejects) once the POST has
|
|
175
295
|
// completed (success or fail) and resultCallback has been invoked.
|
|
176
|
-
|
|
296
|
+
//
|
|
297
|
+
// The POST runs inside a tracing-suppressed context so that host fetch
|
|
298
|
+
// auto-instrumentation (e.g. Next/OpenNext) does NOT create a CLIENT span
|
|
299
|
+
// for it — which would otherwise be exported and re-captured, a sustained
|
|
300
|
+
// feedback loop.
|
|
301
|
+
const p = context
|
|
302
|
+
.with(suppressTracing(context.active()), () => fetch(this.url, { method: "POST", headers: this.headers, body }))
|
|
177
303
|
.then((res) => {
|
|
178
304
|
if (res.ok) {
|
|
179
305
|
resultCallback({ code: ExportResultCode.SUCCESS });
|
|
@@ -199,20 +325,64 @@ export class HeystackSpanExporter {
|
|
|
199
325
|
// export runs during request handling, so this is available there.
|
|
200
326
|
// Either path makes delivery reliable on workerd/OpenNext with no app hook.
|
|
201
327
|
// The `pending` set + `flushHeystack()` remain as the explicit fallback.
|
|
202
|
-
|
|
203
|
-
|
|
328
|
+
if (this.waitUntil) {
|
|
329
|
+
try {
|
|
204
330
|
this.waitUntil(p);
|
|
205
331
|
}
|
|
206
|
-
|
|
332
|
+
catch (error) {
|
|
333
|
+
this.warnWaitUntilFailed("explicit waitUntil", error);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
else {
|
|
337
|
+
// The accessor loads asynchronously, so the first export(s) may run before
|
|
338
|
+
// it resolves. Borrow `ctx.waitUntil` synchronously if it's already
|
|
339
|
+
// available; otherwise await the in-flight accessor load so even those
|
|
340
|
+
// early exports get handed to waitUntil once it resolves. `p` is already
|
|
341
|
+
// in `pending`, so a manual `flushHeystack()` covers it regardless.
|
|
342
|
+
this.handOffToCloudflareContext(p);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Hand `p` to the OpenNext Cloudflare request context's `ctx.waitUntil`.
|
|
347
|
+
* Resolves the accessor first (awaiting its in-flight load if needed) so an
|
|
348
|
+
* export that fired before the dynamic import settled is still covered.
|
|
349
|
+
*/
|
|
350
|
+
handOffToCloudflareContext(p) {
|
|
351
|
+
const attempt = () => {
|
|
352
|
+
if (!_getCloudflareContext)
|
|
353
|
+
return;
|
|
354
|
+
try {
|
|
207
355
|
const cf = _getCloudflareContext();
|
|
208
356
|
cf?.ctx?.waitUntil?.(p);
|
|
209
357
|
}
|
|
358
|
+
catch (error) {
|
|
359
|
+
// #15: a real failure inside getCloudflareContext (e.g. called outside a
|
|
360
|
+
// request context) was previously swallowed silently. Name it once so it
|
|
361
|
+
// is diagnosable, then fall back to the `pending` set + manual flush.
|
|
362
|
+
this.warnWaitUntilFailed("getCloudflareContext", error);
|
|
363
|
+
}
|
|
364
|
+
};
|
|
365
|
+
if (_getCloudflareContext) {
|
|
366
|
+
attempt();
|
|
210
367
|
}
|
|
211
|
-
|
|
212
|
-
//
|
|
213
|
-
|
|
368
|
+
else {
|
|
369
|
+
// Accessor not resolved yet — await its load, then try once.
|
|
370
|
+
void loadCloudflareContextAccessor().then(attempt);
|
|
214
371
|
}
|
|
215
372
|
}
|
|
373
|
+
warnedWaitUntilFailure = false;
|
|
374
|
+
/** Log a waitUntil/accessor failure exactly once; never throw. */
|
|
375
|
+
warnWaitUntilFailed(where, error) {
|
|
376
|
+
if (this.warnedWaitUntilFailure)
|
|
377
|
+
return;
|
|
378
|
+
this.warnedWaitUntilFailure = true;
|
|
379
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
380
|
+
// console.debug so it's quiet by default but visible when debugging dropped
|
|
381
|
+
// traces; the export still completes via the `pending` set + manual flush.
|
|
382
|
+
console.debug(`[heystack] could not hand export to ${where} (${msg}); ` +
|
|
383
|
+
"falling back to pending-set flush. Trace delivery may be best-effort " +
|
|
384
|
+
"unless you call flushHeystack() or pass an explicit waitUntil.");
|
|
385
|
+
}
|
|
216
386
|
shutdown() {
|
|
217
387
|
return this.forceFlush().then(() => {
|
|
218
388
|
this.shutdownState = true;
|
|
@@ -251,6 +421,82 @@ export function createTracerProvider(config) {
|
|
|
251
421
|
// Global tracer provider registration (for host frameworks, e.g. Next.js)
|
|
252
422
|
// ---------------------------------------------------------------------------
|
|
253
423
|
let _provider = null;
|
|
424
|
+
// ---------------------------------------------------------------------------
|
|
425
|
+
// Context manager registration (makes suppressTracing() actually work)
|
|
426
|
+
//
|
|
427
|
+
// `context.with(...)` is a NO-OP unless a ContextManager is registered with the
|
|
428
|
+
// global OTel API. Without one, `suppressTracing(context.active())` produces a
|
|
429
|
+
// context that is never made active, so the exporter's POST is NOT suppressed
|
|
430
|
+
// in production and host fetch auto-instrumentation can re-trace it (feedback
|
|
431
|
+
// loop). We therefore register a manager exactly ONCE in `ensureGlobalProvider`.
|
|
432
|
+
//
|
|
433
|
+
// We register a dependency-free SYNCHRONOUS stack manager (below). Deliberately
|
|
434
|
+
// NOT AsyncLocalStorageContextManager: that statically imports `node:async_hooks`,
|
|
435
|
+
// which would break `import "@heystack/otel/workers"` on a bare workerd without
|
|
436
|
+
// `nodejs_compat` (and on other WinterCG runtimes) — defeating the whole point of
|
|
437
|
+
// this entry being node-builtin-free. The sync manager covers the critical path:
|
|
438
|
+
// the exporter's POST runs synchronously inside the suppressed `context.with`, so
|
|
439
|
+
// `suppressTracing` takes effect. Trade-off: no cross-`await` context propagation,
|
|
440
|
+
// so deep nested-span parenting is limited on the edge (documented).
|
|
441
|
+
// ---------------------------------------------------------------------------
|
|
442
|
+
/**
|
|
443
|
+
* A minimal SYNCHRONOUS, stack-based ContextManager — the registered manager for
|
|
444
|
+
* the /workers entry (no `node:async_hooks`, so it works on any WinterCG runtime).
|
|
445
|
+
* It makes `context.with()` propagate synchronously, which is enough for the
|
|
446
|
+
* exporter's `suppressTracing` to take effect and for the belt-and-suspenders
|
|
447
|
+
* self-span filter — but it does NOT carry context across `await` boundaries (so
|
|
448
|
+
* cross-`await` parent linking and per-request isolation are best-effort).
|
|
449
|
+
*/
|
|
450
|
+
export class SyncStackContextManager {
|
|
451
|
+
_stack = [];
|
|
452
|
+
active() {
|
|
453
|
+
return this._stack[this._stack.length - 1] ?? ROOT_CONTEXT;
|
|
454
|
+
}
|
|
455
|
+
with(ctx, fn, thisArg, ...args) {
|
|
456
|
+
this._stack.push(ctx);
|
|
457
|
+
try {
|
|
458
|
+
return fn.call(thisArg, ...args);
|
|
459
|
+
}
|
|
460
|
+
finally {
|
|
461
|
+
this._stack.pop();
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
bind(_ctx, target) {
|
|
465
|
+
return target;
|
|
466
|
+
}
|
|
467
|
+
enable() {
|
|
468
|
+
return this;
|
|
469
|
+
}
|
|
470
|
+
disable() {
|
|
471
|
+
this._stack = [];
|
|
472
|
+
return this;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
let _contextManagerRegistered = false;
|
|
476
|
+
/**
|
|
477
|
+
* Register a global OTel ContextManager exactly once, so that
|
|
478
|
+
* `context.with(suppressTracing(...))` in the exporter is actually honoured —
|
|
479
|
+
* otherwise suppression is a no-op and the exporter's POST can be re-traced.
|
|
480
|
+
*
|
|
481
|
+
* We register a synchronous, dependency-free stack manager. This keeps the
|
|
482
|
+
* /workers entry WinterCG-safe (no `node:async_hooks` import → works on bare
|
|
483
|
+
* workerd WITHOUT nodejs_compat, Deno, Bun, etc.). It fully covers the critical
|
|
484
|
+
* path (the export fetch runs synchronously inside the suppressed `context.with`,
|
|
485
|
+
* and per-request root spans). Trade-off: it does not propagate context across
|
|
486
|
+
* `await` boundaries, so deep nested-span parenting is limited on the edge — an
|
|
487
|
+
* acceptable, documented limitation (workerd has no async context manager by
|
|
488
|
+
* default regardless).
|
|
489
|
+
*/
|
|
490
|
+
function ensureContextManager() {
|
|
491
|
+
if (_contextManagerRegistered)
|
|
492
|
+
return;
|
|
493
|
+
_contextManagerRegistered = true;
|
|
494
|
+
context.setGlobalContextManager(new SyncStackContextManager().enable());
|
|
495
|
+
}
|
|
496
|
+
/** Reset the context-manager registration guard. Internal/testing helper. */
|
|
497
|
+
export function __resetContextManager() {
|
|
498
|
+
_contextManagerRegistered = false;
|
|
499
|
+
}
|
|
254
500
|
/**
|
|
255
501
|
* Build (once) and register the singleton global tracer provider. Wires the
|
|
256
502
|
* exporter's `waitUntil` (explicit override > nothing here; the auto-detected
|
|
@@ -270,9 +516,12 @@ function ensureGlobalProvider(config) {
|
|
|
270
516
|
void loadCloudflareContextAccessor();
|
|
271
517
|
// Register as the global provider so framework / global-API spans flow to our
|
|
272
518
|
// exporter. We set it directly (rather than provider.register()) so it's
|
|
273
|
-
// deterministic and
|
|
274
|
-
// workerd.
|
|
519
|
+
// deterministic and so we control exactly which ContextManager is registered.
|
|
275
520
|
trace.setGlobalTracerProvider(_provider);
|
|
521
|
+
// Register a ContextManager (once) so `context.with(suppressTracing(...))` in
|
|
522
|
+
// the exporter actually takes effect — without one, `context.with` is a no-op
|
|
523
|
+
// and suppression silently does nothing in production.
|
|
524
|
+
ensureContextManager();
|
|
276
525
|
return _provider;
|
|
277
526
|
}
|
|
278
527
|
/**
|
|
@@ -324,52 +573,60 @@ function warnOnceNoKey() {
|
|
|
324
573
|
export function __resetWarnings() {
|
|
325
574
|
warnedNoKey = false;
|
|
326
575
|
}
|
|
327
|
-
/**
|
|
328
|
-
* Wrap a Worker's default export so every request is auto-traced with a SERVER
|
|
329
|
-
* span.
|
|
330
|
-
*
|
|
331
|
-
* FLUSH (CRITICAL on Workers/edge): the export is a `fetch()` POST. After
|
|
332
|
-
* `span.end()` we `ctx.waitUntil` a promise that awaits BOTH the provider's
|
|
333
|
-
* span processor AND the exporter's in-flight fetch, so the network write
|
|
334
|
-
* completes before the isolate is torn down. Without this, fast-responding
|
|
335
|
-
* handlers return before the POST finishes and the trace is silently dropped.
|
|
336
|
-
*
|
|
337
|
-
* import { instrument } from "@heystack/otel/workers";
|
|
338
|
-
* export default instrument(
|
|
339
|
-
* { async fetch(req, env, ctx) { return new Response("ok"); } },
|
|
340
|
-
* { service: "my-worker" },
|
|
341
|
-
* );
|
|
342
|
-
*
|
|
343
|
-
* TRACE TREE: `instrument()` sets up the singleton GLOBAL tracer provider and
|
|
344
|
-
* creates the per-request SERVER span via the global tracer
|
|
345
|
-
* (`trace.getTracer("heystack")`). This means nested spans created through the
|
|
346
|
-
* global `trace.getTracer()` API (framework / library / your own manual spans)
|
|
347
|
-
* also flow to the exporter — you get a trace tree, not a lone SERVER span.
|
|
348
|
-
*
|
|
349
|
-
* If no API key is available (neither `config.apiKey` nor
|
|
350
|
-
* `env.HEYSTACK_API_KEY`), the handler runs untraced.
|
|
351
|
-
*/
|
|
352
576
|
export function instrument(handler, config) {
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
577
|
+
// Resolve the API key + set up (once) the global provider. Returns null when
|
|
578
|
+
// no key is available so callers can run the handler untraced.
|
|
579
|
+
const setup = (env) => {
|
|
580
|
+
const apiKey = config.apiKey ?? env?.HEYSTACK_API_KEY;
|
|
581
|
+
if (!apiKey) {
|
|
582
|
+
warnOnceNoKey();
|
|
583
|
+
return null;
|
|
584
|
+
}
|
|
585
|
+
// The global provider lets spans created via the global `trace.getTracer()`
|
|
586
|
+
// API — nested framework/library/manual spans — export too, yielding a
|
|
587
|
+
// trace tree rather than a lone root span.
|
|
588
|
+
const provider = ensureGlobalProvider({
|
|
589
|
+
apiKey,
|
|
590
|
+
service: config.service,
|
|
591
|
+
endpoint: config.endpoint,
|
|
592
|
+
waitUntil: config.waitUntil,
|
|
593
|
+
});
|
|
594
|
+
return { provider, tracer: trace.getTracer("heystack") };
|
|
595
|
+
};
|
|
596
|
+
// Drain BOTH the provider's span processor AND the exporter's in-flight fetch
|
|
597
|
+
// via ctx.waitUntil. Awaiting only provider.forceFlush() would return before
|
|
598
|
+
// the export POST completes, letting the isolate be torn down and silently
|
|
599
|
+
// dropping the trace.
|
|
600
|
+
//
|
|
601
|
+
// #25: the drain is raced against an ~8s timeout so a hung forceFlush (e.g. an
|
|
602
|
+
// ingest that never responds) can't pin the isolate until the platform CPU
|
|
603
|
+
// timeout kills it. On timeout we stop waiting; the POST is still in-flight and
|
|
604
|
+
// may yet complete, we just don't block the isolate on it indefinitely.
|
|
605
|
+
const DRAIN_TIMEOUT_MS = 8_000;
|
|
606
|
+
const drain = (provider, ctx) => {
|
|
607
|
+
const drained = (async () => {
|
|
608
|
+
await provider.forceFlush().catch(() => { });
|
|
609
|
+
await provider.heystackExporter.forceFlush().catch(() => { });
|
|
610
|
+
})();
|
|
611
|
+
let timer;
|
|
612
|
+
const timeout = new Promise((resolve) => {
|
|
613
|
+
timer = setTimeout(resolve, DRAIN_TIMEOUT_MS);
|
|
614
|
+
});
|
|
615
|
+
ctx.waitUntil(Promise.race([drained, timeout]).finally(() => {
|
|
616
|
+
if (timer)
|
|
617
|
+
clearTimeout(timer);
|
|
618
|
+
}));
|
|
619
|
+
};
|
|
620
|
+
// Start from a shallow copy so EVERY sibling handler (tail, email, …) is
|
|
621
|
+
// forwarded untouched; we only override fetch/queue/scheduled below.
|
|
622
|
+
const wrapped = { ...handler };
|
|
623
|
+
const originalFetch = handler.fetch?.bind(handler);
|
|
624
|
+
if (originalFetch) {
|
|
625
|
+
wrapped.fetch = async (req, env, ctx) => {
|
|
626
|
+
const s = setup(env);
|
|
627
|
+
if (!s)
|
|
628
|
+
return originalFetch(req, env, ctx);
|
|
629
|
+
const { provider, tracer } = s;
|
|
373
630
|
const url = new URL(req.url);
|
|
374
631
|
const span = tracer.startSpan(`${req.method} ${url.pathname}`, {
|
|
375
632
|
kind: SpanKind.SERVER,
|
|
@@ -380,35 +637,90 @@ export function instrument(handler, config) {
|
|
|
380
637
|
"server.address": url.host,
|
|
381
638
|
},
|
|
382
639
|
});
|
|
383
|
-
// waitUntil a promise that drains BOTH the provider's span processor and
|
|
384
|
-
// the exporter's in-flight fetch. Awaiting only provider.forceFlush()
|
|
385
|
-
// would return before the export POST completes, letting the isolate be
|
|
386
|
-
// torn down and silently dropping the trace.
|
|
387
|
-
const flush = () => ctx.waitUntil((async () => {
|
|
388
|
-
await provider.forceFlush().catch(() => { });
|
|
389
|
-
await provider.heystackExporter.forceFlush().catch(() => { });
|
|
390
|
-
})());
|
|
391
640
|
try {
|
|
392
|
-
const response = await context.with(trace.setSpan(context.active(), span), () =>
|
|
641
|
+
const response = await context.with(trace.setSpan(context.active(), span), () => originalFetch(req, env, ctx));
|
|
393
642
|
span.setAttribute("http.response.status_code", response.status);
|
|
394
643
|
span.setStatus({
|
|
395
644
|
code: response.status >= 500 ? SpanStatusCode.ERROR : SpanStatusCode.UNSET,
|
|
396
645
|
});
|
|
397
646
|
span.end();
|
|
398
|
-
|
|
647
|
+
drain(provider, ctx);
|
|
399
648
|
return response;
|
|
400
649
|
}
|
|
401
650
|
catch (error) {
|
|
402
|
-
|
|
403
|
-
span.recordException(error);
|
|
651
|
+
span.recordException(error instanceof Error ? error : new Error(String(error)));
|
|
404
652
|
span.setStatus({
|
|
405
653
|
code: SpanStatusCode.ERROR,
|
|
406
654
|
message: error instanceof Error ? error.message : String(error),
|
|
407
655
|
});
|
|
408
656
|
span.end();
|
|
409
|
-
|
|
657
|
+
drain(provider, ctx);
|
|
410
658
|
throw error;
|
|
411
659
|
}
|
|
412
|
-
}
|
|
413
|
-
}
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
const originalQueue = handler.queue?.bind(handler);
|
|
663
|
+
if (originalQueue) {
|
|
664
|
+
wrapped.queue = async (batch, env, ctx) => {
|
|
665
|
+
const s = setup(env);
|
|
666
|
+
if (!s)
|
|
667
|
+
return originalQueue(batch, env, ctx);
|
|
668
|
+
const { provider, tracer } = s;
|
|
669
|
+
const span = tracer.startSpan(`queue ${batch.queue}`, {
|
|
670
|
+
kind: SpanKind.CONSUMER,
|
|
671
|
+
attributes: {
|
|
672
|
+
"messaging.batch.message_count": batch.messages.length,
|
|
673
|
+
"messaging.destination.name": batch.queue,
|
|
674
|
+
},
|
|
675
|
+
});
|
|
676
|
+
try {
|
|
677
|
+
await context.with(trace.setSpan(context.active(), span), () => originalQueue(batch, env, ctx));
|
|
678
|
+
span.setStatus({ code: SpanStatusCode.UNSET });
|
|
679
|
+
span.end();
|
|
680
|
+
drain(provider, ctx);
|
|
681
|
+
}
|
|
682
|
+
catch (error) {
|
|
683
|
+
span.recordException(error instanceof Error ? error : new Error(String(error)));
|
|
684
|
+
span.setStatus({
|
|
685
|
+
code: SpanStatusCode.ERROR,
|
|
686
|
+
message: error instanceof Error ? error.message : String(error),
|
|
687
|
+
});
|
|
688
|
+
span.end();
|
|
689
|
+
drain(provider, ctx);
|
|
690
|
+
throw error;
|
|
691
|
+
}
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
const originalScheduled = handler.scheduled?.bind(handler);
|
|
695
|
+
if (originalScheduled) {
|
|
696
|
+
wrapped.scheduled = async (controller, env, ctx) => {
|
|
697
|
+
const s = setup(env);
|
|
698
|
+
if (!s)
|
|
699
|
+
return originalScheduled(controller, env, ctx);
|
|
700
|
+
const { provider, tracer } = s;
|
|
701
|
+
const span = tracer.startSpan(`scheduled ${controller.cron}`, {
|
|
702
|
+
kind: SpanKind.INTERNAL,
|
|
703
|
+
attributes: {
|
|
704
|
+
"controller.cron": controller.cron,
|
|
705
|
+
},
|
|
706
|
+
});
|
|
707
|
+
try {
|
|
708
|
+
await context.with(trace.setSpan(context.active(), span), () => originalScheduled(controller, env, ctx));
|
|
709
|
+
span.setStatus({ code: SpanStatusCode.UNSET });
|
|
710
|
+
span.end();
|
|
711
|
+
drain(provider, ctx);
|
|
712
|
+
}
|
|
713
|
+
catch (error) {
|
|
714
|
+
span.recordException(error instanceof Error ? error : new Error(String(error)));
|
|
715
|
+
span.setStatus({
|
|
716
|
+
code: SpanStatusCode.ERROR,
|
|
717
|
+
message: error instanceof Error ? error.message : String(error),
|
|
718
|
+
});
|
|
719
|
+
span.end();
|
|
720
|
+
drain(provider, ctx);
|
|
721
|
+
throw error;
|
|
722
|
+
}
|
|
723
|
+
};
|
|
724
|
+
}
|
|
725
|
+
return wrapped;
|
|
414
726
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@heystack/otel",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "Runtime-aware OpenTelemetry tracing that exports to Heystack (Node, Next.js, Workers).",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@opentelemetry/api": "^1.9.0",
|
|
25
|
+
"@opentelemetry/core": "^1.30.0",
|
|
25
26
|
"@opentelemetry/sdk-node": "^0.57.0",
|
|
26
27
|
"@opentelemetry/exporter-trace-otlp-http": "^0.57.0",
|
|
27
28
|
"@opentelemetry/auto-instrumentations-node": "^0.55.0",
|