@heystack/otel 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -3
- package/dist/llm-enrich.d.ts +15 -0
- package/dist/llm-enrich.js +129 -0
- package/dist/workers-bindings.d.ts +26 -8
- package/dist/workers-bindings.js +167 -7
- package/dist/workers.d.ts +15 -0
- package/dist/workers.js +58 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -90,7 +90,7 @@ export default instrument(
|
|
|
90
90
|
getUser: (req) => ({
|
|
91
91
|
id: req.headers.get("x-user-id") ?? undefined,
|
|
92
92
|
}),
|
|
93
|
-
instrumentBindings: true, // auto-trace D1/KV/R2/Vectorize
|
|
93
|
+
instrumentBindings: true, // auto-trace D1/KV/R2/Vectorize/AI/Queues/Service bindings
|
|
94
94
|
},
|
|
95
95
|
);
|
|
96
96
|
```
|
|
@@ -116,8 +116,9 @@ Set the key as a secret: `wrangler secret put HEYSTACK_API_KEY`.
|
|
|
116
116
|
| `service` | `string` | **Required.** Service name that appears in the Heystack console. |
|
|
117
117
|
| `apiKey` | `string?` | Defaults to `env.HEYSTACK_API_KEY`. |
|
|
118
118
|
| `getUser` | `(req: Request) => { id?, session?, requestId? } \| undefined` | Called per request. `id` → `enduser.id`, `session` → `session.id`, `requestId` → `http.request.id` (falls back to the `cf-ray` header). |
|
|
119
|
-
| `instrumentBindings` | `boolean \| string[]` | `true` = auto child spans for all detected D1/KV/R2/Vectorize bindings; `string[]` = only the named bindings. Default `false`. |
|
|
119
|
+
| `instrumentBindings` | `boolean \| string[]` | `true` = auto child spans for all detected D1 / KV / R2 / Vectorize / Workers AI / Queue producer / Service binding bindings; `string[]` = only the named bindings. Default `false`. |
|
|
120
120
|
| `sampling` | `{ rate?: number } \| { remote: true }` | Head-sampling configuration. `{ rate }`: keep a deterministic fraction of fresh root traces (0–1; default `1` = keep all). `{ remote: true }`: fetch the rate from the Heystack config endpoint instead — lets you change it centrally without redeploying. Cold isolates keep all traffic until the first config fetch resolves; fails open if the config can't be reached. Parent-respecting in both modes: a request arriving with a sampled `traceparent` is always recorded. See [Head sampling](#head-sampling) below. |
|
|
121
|
+
| `ai` | `{ captureContent?: boolean; redact?: (text: string) => string; maxContentChars?: number }` | LLM/gen_ai capture for outbound calls to known providers. See [AI / LLM observability](#ai--llm-observability) below. |
|
|
121
122
|
| `waitUntil` | `(p: Promise<unknown>) => void` | Override the isolate keep-alive hook; defaults to the auto-detected `ctx.waitUntil`. |
|
|
122
123
|
| `endpoint` | `string?` | Override the ingest endpoint (advanced). |
|
|
123
124
|
|
|
@@ -159,7 +160,7 @@ On startup the worker fetches its configured rate from the Heystack config endpo
|
|
|
159
160
|
- **Outbound `fetch`** — each outbound subrequest while a request span is active gets a CLIENT child span (`http.request.method`, `url.full`, `server.address`, `http.response.status_code`). A W3C `traceparent` header is injected into the subrequest so a downstream Heystack-instrumented service continues the same trace (distributed tracing across services). The exporter's own ingest POST is never traced.
|
|
160
161
|
- **Queue consumers (`queue`)** — a CONSUMER span per batch, with `messaging.destination.name` (queue name) and `messaging.batch.message_count`.
|
|
161
162
|
- **Scheduled handlers (`scheduled`)** — an INTERNAL span per invocation, with `controller.cron`.
|
|
162
|
-
- **Binding calls** (when `instrumentBindings` is set) — a child span for every D1 query (`db.statement`), KV read/write, R2 operation, and
|
|
163
|
+
- **Binding calls** (when `instrumentBindings` is set) — a child span for every D1 query (`db.statement`), KV read/write, R2 operation, Vectorize query, Workers AI inference (`gen_ai.*` attributes including model name and token usage), Queue `.send`/`.sendBatch` (PRODUCER spans with `messaging.*` attributes), and Service binding `.fetch` calls (CLIENT spans with `traceparent` injected so calls to other Workers stitch into the same distributed trace).
|
|
163
164
|
|
|
164
165
|
### Client enrichment
|
|
165
166
|
|
|
@@ -173,6 +174,64 @@ These attributes are set automatically on every SERVER span from request metadat
|
|
|
173
174
|
| `client.address` | `CF-Connecting-IP` header |
|
|
174
175
|
| `geo.country`, `geo.region`, `geo.city`, `geo.asn` | Cloudflare `req.cf` object |
|
|
175
176
|
|
|
177
|
+
### AI / LLM observability
|
|
178
|
+
|
|
179
|
+
When your Worker calls an LLM API, `instrument()` automatically attaches [OpenTelemetry gen_ai semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/) to the outbound CLIENT span. No extra code is needed — detection is based on the target hostname.
|
|
180
|
+
|
|
181
|
+
**Detected providers:**
|
|
182
|
+
- **OpenAI** — `api.openai.com`
|
|
183
|
+
- **Anthropic** — `api.anthropic.com`
|
|
184
|
+
- **Cloudflare AI Gateway** — any host ending in `gateway.ai.cloudflare.com`
|
|
185
|
+
- **Google** — `generativelanguage.googleapis.com`
|
|
186
|
+
|
|
187
|
+
**Attributes captured automatically (metadata, always on):**
|
|
188
|
+
|
|
189
|
+
| Attribute | Description |
|
|
190
|
+
| --- | --- |
|
|
191
|
+
| `gen_ai.system` | Provider name (`openai`, `anthropic`, `cloudflare`, `google`) |
|
|
192
|
+
| `gen_ai.request.model` | Model from the request body (`body.model`) |
|
|
193
|
+
| `gen_ai.request.max_tokens` | Max tokens from the request (`max_tokens` or `max_completion_tokens`) |
|
|
194
|
+
| `gen_ai.request.temperature` | Temperature from the request |
|
|
195
|
+
| `gen_ai.response.model` | Model from the response body |
|
|
196
|
+
| `gen_ai.response.id` | Response ID |
|
|
197
|
+
| `gen_ai.usage.input_tokens` | Input/prompt tokens used |
|
|
198
|
+
| `gen_ai.usage.output_tokens` | Output/completion tokens used |
|
|
199
|
+
| `gen_ai.response.finish_reason` | Stop reason (`stop`, `end_turn`, etc.) |
|
|
200
|
+
|
|
201
|
+
Streaming responses (`text/event-stream`) skip response enrichment (the body is not consumed) but request-side attributes are still set.
|
|
202
|
+
|
|
203
|
+
**Content capture (opt-in, strongly recommended for AI-app RCA):**
|
|
204
|
+
|
|
205
|
+
Prompt and completion text are **not captured by default** — enable `ai.captureContent: true` to capture them. This is strongly recommended when debugging AI app issues (wrong answers, unexpected outputs, prompt regressions) since without the content you can see that an LLM call happened and how long it took but not what was said.
|
|
206
|
+
|
|
207
|
+
Use `redact` to scrub sensitive fields before they leave the Worker. Values are truncated to `maxContentChars` (default 8000) before storage.
|
|
208
|
+
|
|
209
|
+
```ts
|
|
210
|
+
export default instrument(worker, {
|
|
211
|
+
service: "my-ai-worker",
|
|
212
|
+
ai: {
|
|
213
|
+
captureContent: true, // capture prompts + completions (off by default)
|
|
214
|
+
redact: (text) => // optional: scrub sensitive text
|
|
215
|
+
text.replace(/sk-[a-zA-Z0-9]+/g, "[REDACTED]"),
|
|
216
|
+
maxContentChars: 4000, // optional: cap per-value length (default 8000)
|
|
217
|
+
},
|
|
218
|
+
});
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
When `captureContent` is enabled, two additional attributes appear on the CLIENT span:
|
|
222
|
+
|
|
223
|
+
| Attribute | Description |
|
|
224
|
+
| --- | --- |
|
|
225
|
+
| `gen_ai.prompt` | The request messages array (JSON-serialised, truncated) |
|
|
226
|
+
| `gen_ai.completion` | The first choice/content block from the response |
|
|
227
|
+
|
|
228
|
+
**Body safety guarantees:**
|
|
229
|
+
- Request body: only read when it is already a plain `string` (the common LLM SDK case). Non-string bodies (streams, FormData, etc.) are never touched.
|
|
230
|
+
- Response body: read only from `response.clone()`, so the original response is returned untouched and your handler can call `.json()` / `.text()` normally.
|
|
231
|
+
- Parse failures or unexpected shapes degrade silently to the plain CLIENT span — no errors are thrown.
|
|
232
|
+
- For a detected provider with a JSON response, the cloned body is parsed before the `fetch()` promise resolves to your handler (the span must close with usage attributes). This adds negligible latency to an already-slow LLM call.
|
|
233
|
+
- The response size guard (512 KB) only applies when the response carries a `content-length` header; a chunked JSON response without one is still read. The known providers bound this via `max_tokens`, so it is safe in practice.
|
|
234
|
+
|
|
176
235
|
### Manual spans: `withSpan` / `addEvent`
|
|
177
236
|
|
|
178
237
|
Inside a traced handler, add finer-grained spans without touching the OpenTelemetry API directly:
|
|
@@ -305,6 +364,8 @@ As belt-and-suspenders the exporter also drops any span whose HTTP target points
|
|
|
305
364
|
|
|
306
365
|
## Migration / versioning
|
|
307
366
|
|
|
367
|
+
- **`0.9.0`** — **`/workers`: automatic LLM gen_ai enrichment for outbound API calls.** Outbound `fetch` calls to known LLM providers (OpenAI, Anthropic, Cloudflare AI Gateway, Google) automatically gain `gen_ai.*` OTel semantic-convention attributes on the CLIENT span — model, token counts, finish reason, response ID — with no extra code. New optional `WorkersConfig.ai` option: `captureContent: true` also captures prompt/completion text (off by default; **strongly recommended for AI-app RCA**), with `redact` for scrubbing and `maxContentChars` for length capping. The original request/response bodies are never consumed (request read only when already a string; response via `response.clone()`). Streaming responses skip response enrichment. No breaking changes.
|
|
368
|
+
- **`0.8.0`** — **`/workers`: Workers AI, Queue producer, and Service binding instrumentation.** `instrumentBindings: true` now auto-wraps three additional binding types: `env.AI.run()` emits CLIENT spans with `gen_ai.system`, `gen_ai.request.model`, and `gen_ai.usage.input_tokens`/`output_tokens` (streaming results are never consumed); Queue `.send`/`.sendBatch` emit PRODUCER spans with `messaging.*` attributes including batch size; Service binding `.fetch` emits a CLIENT span and injects a W3C `traceparent` header into the outgoing request so calls to other Workers appear in the same distributed trace. `startSpan` factory now accepts an optional `SpanKind` for correct CLIENT/PRODUCER categorisation. No breaking changes.
|
|
308
369
|
- **`0.7.0`** — **`/workers`: remote sampling (`sampling: { remote: true }`).** New `sampling` variant that fetches the head-sampling rate from the Heystack config endpoint at runtime, so you can change it from the console without redeploying. Cold isolates keep all traffic until the first config fetch resolves (fails open). If the config endpoint is unreachable, the worker keeps everything. Same parent-respecting rule as `sampling: { rate }`. No breaking changes; existing `sampling: { rate }` configs are unchanged.
|
|
309
370
|
- **`0.6.0`** — **`/workers`: head sampling (`sampling: { rate }`).** New optional `WorkersConfig` field: `sampling.rate` (0–1, default `1`). Keeps a deterministic fraction of fresh root traces — the drop decision is made in the worker before export (no egress, no ingest cost). Parent-respecting: requests arriving with a sampled `traceparent` are always recorded. Consistent with server-side sampling (same trace-ID hash). No breaking changes; all new options are optional. See [Head sampling](#head-sampling).
|
|
310
371
|
- **`0.5.0`** — **`/workers`: identity enrichment, binding tracing, outbound-fetch tracing, manual span helpers.** New `WorkersConfig` options: `getUser` (attach `enduser.id`/`session.id`/`http.request.id` per request from a synchronous callback), `instrumentBindings` (auto child spans for D1/KV/R2/Vectorize — `true` = all detected, or a `string[]` to select). Outbound `fetch` calls made inside a traced handler automatically get CLIENT child spans with `traceparent` injection (distributed tracing across services). New ergonomic exports from `/workers`: `withSpan(name, attrs?, fn)` runs a function inside a named child span (auto-parented, exceptions recorded, `span.end()` in `finally`); `addEvent(name, attrs?)` adds an event to the active span. No breaking changes; all new options are optional.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export type LLMProvider = "openai" | "anthropic" | "cloudflare" | "google";
|
|
2
|
+
/** Return the provider for a known LLM host, else null. Host is lower-case, no port. */
|
|
3
|
+
export declare function detectLLMProvider(host: string): LLMProvider | null;
|
|
4
|
+
export interface LLMRequestAttrs {
|
|
5
|
+
[k: string]: string | number;
|
|
6
|
+
}
|
|
7
|
+
/** Best-effort gen_ai.request.* from a parsed JSON request body. Never throws. */
|
|
8
|
+
export declare function llmRequestAttrs(provider: LLMProvider, body: unknown): LLMRequestAttrs;
|
|
9
|
+
/** Best-effort gen_ai.response.* + usage from a parsed JSON response body. Never throws. */
|
|
10
|
+
export declare function llmResponseAttrs(provider: LLMProvider, body: unknown): Record<string, string | number>;
|
|
11
|
+
/** Content attrs, ONLY when captureContent is on. Size-capped per value, redactor applied. */
|
|
12
|
+
export declare function llmContentAttrs(provider: LLMProvider, reqBody: unknown, resBody: unknown, opts: {
|
|
13
|
+
redact?: (text: string) => string;
|
|
14
|
+
maxChars?: number;
|
|
15
|
+
}): Record<string, string>;
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
// WinterCG-safe LLM enrichment helpers — pure string/object logic, no Node imports.
|
|
2
|
+
// Used by the /workers fetch wrapper to attach gen_ai.* semantic-convention
|
|
3
|
+
// attributes to outbound calls to known LLM providers.
|
|
4
|
+
/** Return the provider for a known LLM host, else null. Host is lower-case, no port. */
|
|
5
|
+
export function detectLLMProvider(host) {
|
|
6
|
+
const h = host.toLowerCase();
|
|
7
|
+
if (h === "api.openai.com")
|
|
8
|
+
return "openai";
|
|
9
|
+
if (h === "api.anthropic.com")
|
|
10
|
+
return "anthropic";
|
|
11
|
+
if (h === "generativelanguage.googleapis.com")
|
|
12
|
+
return "google";
|
|
13
|
+
if (h === "gateway.ai.cloudflare.com" || h.endsWith(".gateway.ai.cloudflare.com"))
|
|
14
|
+
return "cloudflare";
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
/** Best-effort gen_ai.request.* from a parsed JSON request body. Never throws. */
|
|
18
|
+
export function llmRequestAttrs(provider, body) {
|
|
19
|
+
const out = { "gen_ai.system": provider };
|
|
20
|
+
if (!body || typeof body !== "object" || Array.isArray(body))
|
|
21
|
+
return out;
|
|
22
|
+
const b = body;
|
|
23
|
+
if (typeof b["model"] === "string")
|
|
24
|
+
out["gen_ai.request.model"] = b["model"];
|
|
25
|
+
// max_tokens: Anthropic uses max_tokens; OpenAI supports both max_tokens and
|
|
26
|
+
// max_completion_tokens (prefer max_tokens when both are present).
|
|
27
|
+
const maxTokens = b["max_tokens"] ?? b["max_completion_tokens"];
|
|
28
|
+
if (typeof maxTokens === "number")
|
|
29
|
+
out["gen_ai.request.max_tokens"] = maxTokens;
|
|
30
|
+
if (typeof b["temperature"] === "number")
|
|
31
|
+
out["gen_ai.request.temperature"] = b["temperature"];
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
/** Best-effort gen_ai.response.* + usage from a parsed JSON response body. Never throws. */
|
|
35
|
+
export function llmResponseAttrs(provider, body) {
|
|
36
|
+
const out = {};
|
|
37
|
+
if (!body || typeof body !== "object" || Array.isArray(body))
|
|
38
|
+
return out;
|
|
39
|
+
const b = body;
|
|
40
|
+
if (typeof b["model"] === "string")
|
|
41
|
+
out["gen_ai.response.model"] = b["model"];
|
|
42
|
+
if (typeof b["id"] === "string")
|
|
43
|
+
out["gen_ai.response.id"] = b["id"];
|
|
44
|
+
// Usage tokens — OpenAI: usage.prompt_tokens / completion_tokens
|
|
45
|
+
// Anthropic: usage.input_tokens / output_tokens
|
|
46
|
+
if (b["usage"] && typeof b["usage"] === "object" && !Array.isArray(b["usage"])) {
|
|
47
|
+
const usage = b["usage"];
|
|
48
|
+
// OpenAI shape
|
|
49
|
+
if (typeof usage["prompt_tokens"] === "number") {
|
|
50
|
+
out["gen_ai.usage.input_tokens"] = usage["prompt_tokens"];
|
|
51
|
+
}
|
|
52
|
+
if (typeof usage["completion_tokens"] === "number") {
|
|
53
|
+
out["gen_ai.usage.output_tokens"] = usage["completion_tokens"];
|
|
54
|
+
}
|
|
55
|
+
// Anthropic shape (overrides OpenAI if both somehow present)
|
|
56
|
+
if (typeof usage["input_tokens"] === "number") {
|
|
57
|
+
out["gen_ai.usage.input_tokens"] = usage["input_tokens"];
|
|
58
|
+
}
|
|
59
|
+
if (typeof usage["output_tokens"] === "number") {
|
|
60
|
+
out["gen_ai.usage.output_tokens"] = usage["output_tokens"];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Finish reason — OpenAI: choices[0].finish_reason; Anthropic: stop_reason
|
|
64
|
+
if (typeof b["stop_reason"] === "string") {
|
|
65
|
+
out["gen_ai.response.finish_reason"] = b["stop_reason"];
|
|
66
|
+
}
|
|
67
|
+
else if (Array.isArray(b["choices"]) && b["choices"].length > 0) {
|
|
68
|
+
const choice = b["choices"][0];
|
|
69
|
+
if (typeof choice["finish_reason"] === "string") {
|
|
70
|
+
out["gen_ai.response.finish_reason"] = choice["finish_reason"];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
void provider; // provider available for future provider-specific logic
|
|
74
|
+
return out;
|
|
75
|
+
}
|
|
76
|
+
/** Content attrs, ONLY when captureContent is on. Size-capped per value, redactor applied. */
|
|
77
|
+
export function llmContentAttrs(provider, reqBody, resBody, opts) {
|
|
78
|
+
const out = {};
|
|
79
|
+
const cap = opts.maxChars ?? 8000;
|
|
80
|
+
const applyRedact = (s) => {
|
|
81
|
+
const trimmed = s.slice(0, cap);
|
|
82
|
+
// Re-slice after redact so a redactor that expands text can't blow past the
|
|
83
|
+
// cap (content is the sensitive storage path — keep it hard-bounded).
|
|
84
|
+
return opts.redact ? opts.redact(trimmed).slice(0, cap) : trimmed;
|
|
85
|
+
};
|
|
86
|
+
// Prompt — serialize request messages/prompt compactly
|
|
87
|
+
try {
|
|
88
|
+
if (reqBody && typeof reqBody === "object" && !Array.isArray(reqBody)) {
|
|
89
|
+
const req = reqBody;
|
|
90
|
+
if (Array.isArray(req["messages"])) {
|
|
91
|
+
out["gen_ai.prompt"] = applyRedact(JSON.stringify(req["messages"]));
|
|
92
|
+
}
|
|
93
|
+
else if (typeof req["prompt"] === "string") {
|
|
94
|
+
out["gen_ai.prompt"] = applyRedact(req["prompt"]);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
// never throw
|
|
100
|
+
}
|
|
101
|
+
// Completion — extract response text
|
|
102
|
+
try {
|
|
103
|
+
if (resBody && typeof resBody === "object" && !Array.isArray(resBody)) {
|
|
104
|
+
const res = resBody;
|
|
105
|
+
// OpenAI shape: choices[0].message.content
|
|
106
|
+
if (Array.isArray(res["choices"]) && res["choices"].length > 0) {
|
|
107
|
+
const choice = res["choices"][0];
|
|
108
|
+
if (choice["message"] && typeof choice["message"] === "object") {
|
|
109
|
+
const msg = choice["message"];
|
|
110
|
+
if (typeof msg["content"] === "string") {
|
|
111
|
+
out["gen_ai.completion"] = applyRedact(msg["content"]);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Anthropic shape: content[0].text
|
|
116
|
+
else if (Array.isArray(res["content"]) && res["content"].length > 0) {
|
|
117
|
+
const block = res["content"][0];
|
|
118
|
+
if (typeof block["text"] === "string") {
|
|
119
|
+
out["gen_ai.completion"] = applyRedact(block["text"]);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
// never throw
|
|
126
|
+
}
|
|
127
|
+
void provider; // available for future provider-specific logic
|
|
128
|
+
return out;
|
|
129
|
+
}
|
|
@@ -1,33 +1,51 @@
|
|
|
1
|
-
import { type Span } from "@opentelemetry/api";
|
|
1
|
+
import { SpanKind, type Span } from "@opentelemetry/api";
|
|
2
2
|
export interface InstrumentBindingsOpts {
|
|
3
3
|
/**
|
|
4
4
|
* Factory that creates and starts a new child span. Called at binding method
|
|
5
5
|
* invocation time (inside the traced handler scope), so `context.active()`
|
|
6
6
|
* at that moment correctly parents to the root span.
|
|
7
7
|
*
|
|
8
|
-
* For integration: pass `(name, attrs) => tracer.startSpan(name, { attributes: attrs }, context.active())`.
|
|
8
|
+
* For integration: pass `(name, attrs, kind) => tracer.startSpan(name, { attributes: attrs, kind }, context.active())`.
|
|
9
9
|
* For unit tests: inject a fake so no global provider is required.
|
|
10
10
|
*/
|
|
11
|
-
startSpan: (name: string, attrs: Record<string, unknown
|
|
11
|
+
startSpan: (name: string, attrs: Record<string, unknown>, kind?: SpanKind) => Span;
|
|
12
12
|
/**
|
|
13
|
-
* `true` → auto-detect and wrap all D1/KV/R2/Vectorize bindings.
|
|
13
|
+
* `true` → auto-detect and wrap all D1/KV/R2/Vectorize/AI/Queue/Service bindings.
|
|
14
14
|
* `string[]` → only wrap bindings whose env key is listed.
|
|
15
15
|
*/
|
|
16
16
|
select: boolean | string[];
|
|
17
17
|
}
|
|
18
|
+
/**
|
|
19
|
+
* Build a new args array for `fetch(input, init?)` with a `traceparent` header
|
|
20
|
+
* injected WITHOUT mutating the caller's original Request or init object.
|
|
21
|
+
*
|
|
22
|
+
* - `Request` first arg → rebuilt as `new Request(original, { headers })`.
|
|
23
|
+
* - String/URL first arg → spread a new init object with an augmented Headers.
|
|
24
|
+
*/
|
|
25
|
+
export declare function injectTraceparentArgs(args: any[], traceparent: string): any[];
|
|
18
26
|
/**
|
|
19
27
|
* Wrap an env object's Cloudflare bindings so that each binding operation
|
|
20
28
|
* emits a child span under the currently-active OTel context.
|
|
21
29
|
*
|
|
22
|
-
* Detects binding type by duck-typing
|
|
23
|
-
*
|
|
24
|
-
*
|
|
30
|
+
* Detects binding type by duck-typing:
|
|
31
|
+
* - D1: `prepare`
|
|
32
|
+
* - R2: `get`+`put`+`head`
|
|
33
|
+
* - KV: `get`+`put`+`list`
|
|
34
|
+
* - Vectorize: `query`+`upsert`
|
|
35
|
+
* - Workers AI: `run` (without `prepare`)
|
|
36
|
+
* - Queue producer: `send`+`sendBatch`
|
|
37
|
+
* - Service binding / Fetcher: `fetch` (last branch; most generic)
|
|
38
|
+
*
|
|
39
|
+
* Unrecognised bindings are passed through unchanged.
|
|
25
40
|
*
|
|
26
41
|
* Each wrapped binding is a `Proxy` over the original — non-wrapped prototype
|
|
27
42
|
* methods fall through to the real binding so no functionality is lost.
|
|
28
43
|
*
|
|
44
|
+
* Service-binding spans inject a W3C `traceparent` header into outgoing
|
|
45
|
+
* requests so calls to other Workers stitch into one distributed trace.
|
|
46
|
+
*
|
|
29
47
|
* @param env - The Worker env / binding bag.
|
|
30
|
-
* @param opts - `startSpan` factory + `select` filter.
|
|
48
|
+
* @param opts - `startSpan` factory (now accepts optional `SpanKind`) + `select` filter.
|
|
31
49
|
* @returns A shallow copy of `env` with selected bindings replaced by proxies.
|
|
32
50
|
*/
|
|
33
51
|
export declare function instrumentEnv<E extends Record<string, unknown>>(env: E, opts: InstrumentBindingsOpts): E;
|
package/dist/workers-bindings.js
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
2
|
// Cloudflare binding instrumentation for @heystack/otel/workers.
|
|
3
3
|
//
|
|
4
|
-
// Wraps D1, KV, R2,
|
|
5
|
-
//
|
|
4
|
+
// Wraps D1, KV, R2, Vectorize, Workers AI, Queue producers, and Service
|
|
5
|
+
// bindings with OTel child spans so that every binding operation is visible
|
|
6
|
+
// as a child of the active request span.
|
|
6
7
|
//
|
|
7
8
|
// WinterCG-safe: no `node:*` imports. Span factory is injected so the logic
|
|
8
9
|
// is pure and unit-testable without a global provider.
|
|
9
10
|
// ---------------------------------------------------------------------------
|
|
10
|
-
import { context, SpanStatusCode } from "@opentelemetry/api";
|
|
11
|
+
import { context, SpanKind, SpanStatusCode } from "@opentelemetry/api";
|
|
11
12
|
import { isTracingSuppressed } from "@opentelemetry/core";
|
|
12
13
|
// ---------------------------------------------------------------------------
|
|
13
14
|
// Duck-type detectors — conservative; require the distinctive method set
|
|
@@ -31,6 +32,24 @@ function isVectorizeLike(b) {
|
|
|
31
32
|
return (typeof b?.query === "function" &&
|
|
32
33
|
typeof b?.upsert === "function");
|
|
33
34
|
}
|
|
35
|
+
/**
|
|
36
|
+
* Workers AI: has `run` but NOT `prepare` (which would match D1 first anyway).
|
|
37
|
+
* The `prepare` exclusion is defensive — D1 is already matched before this branch.
|
|
38
|
+
*/
|
|
39
|
+
function isWorkersAILike(b) {
|
|
40
|
+
return (typeof b?.run === "function" &&
|
|
41
|
+
typeof b?.prepare !== "function");
|
|
42
|
+
}
|
|
43
|
+
/** Queue producer: exposes both `send` and `sendBatch`. */
|
|
44
|
+
function isQueueLike(b) {
|
|
45
|
+
return (typeof b?.send === "function" &&
|
|
46
|
+
typeof b?.sendBatch === "function");
|
|
47
|
+
}
|
|
48
|
+
/** Service binding / Fetcher: last branch — anything with `.fetch`. Prior branches
|
|
49
|
+
* exclude D1/R2/KV/Vectorize/AI/Queue, so order alone is sufficient. */
|
|
50
|
+
function isServiceLike(b) {
|
|
51
|
+
return typeof b?.fetch === "function";
|
|
52
|
+
}
|
|
34
53
|
// ---------------------------------------------------------------------------
|
|
35
54
|
// Span lifecycle helper
|
|
36
55
|
// ---------------------------------------------------------------------------
|
|
@@ -161,21 +180,153 @@ function wrapVectorize(binding, opts, indexName) {
|
|
|
161
180
|
});
|
|
162
181
|
}
|
|
163
182
|
// ---------------------------------------------------------------------------
|
|
183
|
+
// Workers AI wrapper
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
function wrapWorkersAI(binding, opts, _name) {
|
|
186
|
+
return makeProxy(binding, {
|
|
187
|
+
run: async (...args) => {
|
|
188
|
+
if (isTracingSuppressed(context.active())) {
|
|
189
|
+
return binding.run(...args);
|
|
190
|
+
}
|
|
191
|
+
const attrs = {
|
|
192
|
+
"gen_ai.system": "cloudflare.workers_ai",
|
|
193
|
+
};
|
|
194
|
+
if (typeof args[0] === "string") {
|
|
195
|
+
attrs["gen_ai.request.model"] = args[0];
|
|
196
|
+
}
|
|
197
|
+
const span = opts.startSpan("AI run", attrs, SpanKind.CLIENT);
|
|
198
|
+
try {
|
|
199
|
+
const result = await binding.run(...args);
|
|
200
|
+
// Best-effort usage extraction — never read/await/tee the stream.
|
|
201
|
+
try {
|
|
202
|
+
if (result &&
|
|
203
|
+
typeof result === "object" &&
|
|
204
|
+
!(result instanceof ReadableStream) &&
|
|
205
|
+
typeof result.usage === "object") {
|
|
206
|
+
const usage = result.usage;
|
|
207
|
+
if (typeof usage.prompt_tokens === "number") {
|
|
208
|
+
span.setAttribute("gen_ai.usage.input_tokens", usage.prompt_tokens);
|
|
209
|
+
}
|
|
210
|
+
if (typeof usage.completion_tokens === "number") {
|
|
211
|
+
span.setAttribute("gen_ai.usage.output_tokens", usage.completion_tokens);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
// Swallow — a weird shape must never throw.
|
|
217
|
+
}
|
|
218
|
+
return result;
|
|
219
|
+
}
|
|
220
|
+
catch (err) {
|
|
221
|
+
span.recordException(err instanceof Error ? err : new Error(String(err)));
|
|
222
|
+
span.setStatus({
|
|
223
|
+
code: SpanStatusCode.ERROR,
|
|
224
|
+
message: err instanceof Error ? err.message : String(err),
|
|
225
|
+
});
|
|
226
|
+
throw err;
|
|
227
|
+
}
|
|
228
|
+
finally {
|
|
229
|
+
span.end();
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
// ---------------------------------------------------------------------------
|
|
235
|
+
// Queue producer wrapper
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
function wrapQueue(binding, opts, name) {
|
|
238
|
+
const baseAttrs = {
|
|
239
|
+
"messaging.system": "cloudflare_queues",
|
|
240
|
+
"messaging.destination.name": name,
|
|
241
|
+
};
|
|
242
|
+
return makeProxy(binding, {
|
|
243
|
+
send: async (...args) => {
|
|
244
|
+
if (isTracingSuppressed(context.active())) {
|
|
245
|
+
return binding.send(...args);
|
|
246
|
+
}
|
|
247
|
+
const span = opts.startSpan("Queue send", { ...baseAttrs, "messaging.operation": "send" }, SpanKind.PRODUCER);
|
|
248
|
+
return runWithSpan(span, () => binding.send(...args));
|
|
249
|
+
},
|
|
250
|
+
sendBatch: async (...args) => {
|
|
251
|
+
if (isTracingSuppressed(context.active())) {
|
|
252
|
+
return binding.sendBatch(...args);
|
|
253
|
+
}
|
|
254
|
+
const batchAttrs = {
|
|
255
|
+
...baseAttrs,
|
|
256
|
+
"messaging.operation": "sendBatch",
|
|
257
|
+
};
|
|
258
|
+
if (Array.isArray(args[0])) {
|
|
259
|
+
batchAttrs["messaging.batch.message_count"] = args[0].length;
|
|
260
|
+
}
|
|
261
|
+
const span = opts.startSpan("Queue sendBatch", batchAttrs, SpanKind.PRODUCER);
|
|
262
|
+
return runWithSpan(span, () => binding.sendBatch(...args));
|
|
263
|
+
},
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
// ---------------------------------------------------------------------------
|
|
267
|
+
// Service binding / Fetcher wrapper + traceparent injection
|
|
268
|
+
// ---------------------------------------------------------------------------
|
|
269
|
+
/**
|
|
270
|
+
* Build a new args array for `fetch(input, init?)` with a `traceparent` header
|
|
271
|
+
* injected WITHOUT mutating the caller's original Request or init object.
|
|
272
|
+
*
|
|
273
|
+
* - `Request` first arg → rebuilt as `new Request(original, { headers })`.
|
|
274
|
+
* - String/URL first arg → spread a new init object with an augmented Headers.
|
|
275
|
+
*/
|
|
276
|
+
export function injectTraceparentArgs(args, traceparent) {
|
|
277
|
+
const [input, ...rest] = args;
|
|
278
|
+
if (input instanceof Request) {
|
|
279
|
+
const headers = new Headers(input.headers);
|
|
280
|
+
headers.set("traceparent", traceparent);
|
|
281
|
+
return [new Request(input, { headers }), ...rest];
|
|
282
|
+
}
|
|
283
|
+
// String or URL
|
|
284
|
+
const init = { ...(rest[0] ?? {}) };
|
|
285
|
+
const headers = new Headers(init.headers);
|
|
286
|
+
headers.set("traceparent", traceparent);
|
|
287
|
+
init.headers = headers;
|
|
288
|
+
return [input, init];
|
|
289
|
+
}
|
|
290
|
+
function wrapService(binding, opts, name) {
|
|
291
|
+
return makeProxy(binding, {
|
|
292
|
+
fetch: async (...args) => {
|
|
293
|
+
if (isTracingSuppressed(context.active())) {
|
|
294
|
+
return binding.fetch(...args);
|
|
295
|
+
}
|
|
296
|
+
const span = opts.startSpan(`Service ${name} fetch`, { "peer.service": name }, SpanKind.CLIENT);
|
|
297
|
+
const sc = span.spanContext();
|
|
298
|
+
const traceparent = `00-${sc.traceId}-${sc.spanId}-01`;
|
|
299
|
+
const injectedArgs = injectTraceparentArgs(args, traceparent);
|
|
300
|
+
return runWithSpan(span, () => binding.fetch(...injectedArgs));
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
// ---------------------------------------------------------------------------
|
|
164
305
|
// Main export
|
|
165
306
|
// ---------------------------------------------------------------------------
|
|
166
307
|
/**
|
|
167
308
|
* Wrap an env object's Cloudflare bindings so that each binding operation
|
|
168
309
|
* emits a child span under the currently-active OTel context.
|
|
169
310
|
*
|
|
170
|
-
* Detects binding type by duck-typing
|
|
171
|
-
*
|
|
172
|
-
*
|
|
311
|
+
* Detects binding type by duck-typing:
|
|
312
|
+
* - D1: `prepare`
|
|
313
|
+
* - R2: `get`+`put`+`head`
|
|
314
|
+
* - KV: `get`+`put`+`list`
|
|
315
|
+
* - Vectorize: `query`+`upsert`
|
|
316
|
+
* - Workers AI: `run` (without `prepare`)
|
|
317
|
+
* - Queue producer: `send`+`sendBatch`
|
|
318
|
+
* - Service binding / Fetcher: `fetch` (last branch; most generic)
|
|
319
|
+
*
|
|
320
|
+
* Unrecognised bindings are passed through unchanged.
|
|
173
321
|
*
|
|
174
322
|
* Each wrapped binding is a `Proxy` over the original — non-wrapped prototype
|
|
175
323
|
* methods fall through to the real binding so no functionality is lost.
|
|
176
324
|
*
|
|
325
|
+
* Service-binding spans inject a W3C `traceparent` header into outgoing
|
|
326
|
+
* requests so calls to other Workers stitch into one distributed trace.
|
|
327
|
+
*
|
|
177
328
|
* @param env - The Worker env / binding bag.
|
|
178
|
-
* @param opts - `startSpan` factory + `select` filter.
|
|
329
|
+
* @param opts - `startSpan` factory (now accepts optional `SpanKind`) + `select` filter.
|
|
179
330
|
* @returns A shallow copy of `env` with selected bindings replaced by proxies.
|
|
180
331
|
*/
|
|
181
332
|
export function instrumentEnv(env, opts) {
|
|
@@ -199,6 +350,15 @@ export function instrumentEnv(env, opts) {
|
|
|
199
350
|
else if (isVectorizeLike(binding)) {
|
|
200
351
|
result[key] = wrapVectorize(binding, opts, key);
|
|
201
352
|
}
|
|
353
|
+
else if (isWorkersAILike(binding)) {
|
|
354
|
+
result[key] = wrapWorkersAI(binding, opts, key);
|
|
355
|
+
}
|
|
356
|
+
else if (isQueueLike(binding)) {
|
|
357
|
+
result[key] = wrapQueue(binding, opts, key);
|
|
358
|
+
}
|
|
359
|
+
else if (isServiceLike(binding)) {
|
|
360
|
+
result[key] = wrapService(binding, opts, key);
|
|
361
|
+
}
|
|
202
362
|
// Unrecognised bindings are left as-is.
|
|
203
363
|
}
|
|
204
364
|
return result;
|
package/dist/workers.d.ts
CHANGED
|
@@ -178,6 +178,21 @@ export interface WorkersConfig {
|
|
|
178
178
|
rate?: number;
|
|
179
179
|
remote?: boolean;
|
|
180
180
|
};
|
|
181
|
+
/**
|
|
182
|
+
* LLM/gen_ai capture for outbound calls to known providers (OpenAI, Anthropic,
|
|
183
|
+
* CF AI Gateway, Google). Metadata (model, tokens, latency, finish_reason) is
|
|
184
|
+
* always captured for detected providers.
|
|
185
|
+
*
|
|
186
|
+
* Set `captureContent: true` to also capture prompt/completion text (off by
|
|
187
|
+
* default) — strongly recommended for AI-app RCA. Provide `redact` to scrub
|
|
188
|
+
* sensitive text before it leaves the Worker. `maxContentChars` caps the length
|
|
189
|
+
* of each captured value (default 8000 characters).
|
|
190
|
+
*/
|
|
191
|
+
ai?: {
|
|
192
|
+
captureContent?: boolean;
|
|
193
|
+
redact?: (text: string) => string;
|
|
194
|
+
maxContentChars?: number;
|
|
195
|
+
};
|
|
181
196
|
}
|
|
182
197
|
/**
|
|
183
198
|
* A `BasicTracerProvider` with the underlying `HeystackSpanExporter` attached so
|
package/dist/workers.js
CHANGED
|
@@ -15,6 +15,7 @@ import { BasicTracerProvider, SimpleSpanProcessor, } from "@opentelemetry/sdk-tr
|
|
|
15
15
|
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
|
16
16
|
import { buildExporterConfig, DEFAULT_ENDPOINT } from "./core.js";
|
|
17
17
|
import { isSelfSpanAttrs, safeHostname } from "./self-span.js";
|
|
18
|
+
import { detectLLMProvider, llmRequestAttrs, llmResponseAttrs, llmContentAttrs } from "./llm-enrich.js";
|
|
18
19
|
import { instrumentEnv } from "./workers-bindings.js";
|
|
19
20
|
import { makeSampler, loadRemoteSamplingRate } from "./workers-sampler.js";
|
|
20
21
|
// `ExportResult` / `ExportResultCode` mirror `@opentelemetry/core`. We define
|
|
@@ -264,12 +265,15 @@ function injectTraceparent(input, init, traceparent) {
|
|
|
264
265
|
headers.set("traceparent", traceparent);
|
|
265
266
|
return [input, { ...(init ?? {}), headers }];
|
|
266
267
|
}
|
|
268
|
+
/** Max response body size to clone for gen_ai enrichment (512 KB). */
|
|
269
|
+
const LLM_RESPONSE_SIZE_CAP = 512 * 1024;
|
|
267
270
|
/**
|
|
268
271
|
* Patch `globalThis.fetch` exactly once to emit a CLIENT child span + inject
|
|
269
272
|
* `traceparent` for outbound subrequests. `ingestHost` is the bare ingest
|
|
270
273
|
* hostname (lower-case, no port) so the exporter's own uploads are never traced.
|
|
274
|
+
* `aiCfg` enables gen_ai semantic-convention enrichment for known LLM providers.
|
|
271
275
|
*/
|
|
272
|
-
function ensureFetchInstrumentation(ingestHost) {
|
|
276
|
+
function ensureFetchInstrumentation(ingestHost, aiCfg = {}) {
|
|
273
277
|
if (_fetchInstrumented)
|
|
274
278
|
return;
|
|
275
279
|
_fetchInstrumented = true;
|
|
@@ -304,9 +308,59 @@ function ensureFetchInstrumentation(ingestHost) {
|
|
|
304
308
|
const sc = span.spanContext();
|
|
305
309
|
const traceparent = `00-${sc.traceId}-${sc.spanId}-01`;
|
|
306
310
|
const [reqInput, reqInit] = injectTraceparent(input, init, traceparent);
|
|
311
|
+
// gen_ai enrichment — request side.
|
|
312
|
+
// Only when host is a known LLM provider and the body is already a string
|
|
313
|
+
// (the common LLM SDK case). Never clone/read the outgoing Request body.
|
|
314
|
+
const provider = detectLLMProvider(host);
|
|
315
|
+
let parsedReqBody = undefined;
|
|
316
|
+
if (provider !== null) {
|
|
317
|
+
// Only read the request body when it's already a string (the common LLM
|
|
318
|
+
// SDK case). Never clone/consume the outgoing Request.
|
|
319
|
+
const bodyStr = typeof init?.body === "string" ? init.body : undefined;
|
|
320
|
+
if (typeof bodyStr === "string") {
|
|
321
|
+
try {
|
|
322
|
+
parsedReqBody = JSON.parse(bodyStr);
|
|
323
|
+
for (const [k, v] of Object.entries(llmRequestAttrs(provider, parsedReqBody))) {
|
|
324
|
+
span.setAttribute(k, v);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
catch {
|
|
328
|
+
// body not valid JSON — skip request enrichment
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
307
332
|
try {
|
|
308
333
|
const response = await originalFetch(reqInput, reqInit);
|
|
309
334
|
span.setAttribute("http.response.status_code", response.status);
|
|
335
|
+
// gen_ai enrichment — response side.
|
|
336
|
+
// Guard: known provider, JSON content-type (not event-stream), size cap.
|
|
337
|
+
// Always clone — NEVER consume the original response body.
|
|
338
|
+
if (provider !== null) {
|
|
339
|
+
try {
|
|
340
|
+
const ct = response.headers.get("content-type") ?? "";
|
|
341
|
+
const cl = parseInt(response.headers.get("content-length") ?? "0", 10);
|
|
342
|
+
const isJson = ct.includes("application/json");
|
|
343
|
+
const isStream = ct.includes("event-stream");
|
|
344
|
+
const withinCap = isNaN(cl) || cl === 0 || cl <= LLM_RESPONSE_SIZE_CAP;
|
|
345
|
+
if (isJson && !isStream && withinCap) {
|
|
346
|
+
const rb = await response.clone().json();
|
|
347
|
+
for (const [k, v] of Object.entries(llmResponseAttrs(provider, rb))) {
|
|
348
|
+
span.setAttribute(k, v);
|
|
349
|
+
}
|
|
350
|
+
if (aiCfg.captureContent) {
|
|
351
|
+
for (const [k, v] of Object.entries(llmContentAttrs(provider, parsedReqBody, rb, {
|
|
352
|
+
redact: aiCfg.redact,
|
|
353
|
+
maxChars: aiCfg.maxContentChars,
|
|
354
|
+
}))) {
|
|
355
|
+
span.setAttribute(k, v);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
catch {
|
|
361
|
+
// parse failure — degrade to plain CLIENT span (no throw, no side effects)
|
|
362
|
+
}
|
|
363
|
+
}
|
|
310
364
|
return response;
|
|
311
365
|
}
|
|
312
366
|
catch (error) {
|
|
@@ -656,7 +710,7 @@ function ensureGlobalProvider(config) {
|
|
|
656
710
|
// Patch globalThis.fetch (once) so outbound subrequests get CLIENT child spans
|
|
657
711
|
// + `traceparent` injection (distributed tracing). The exporter's own POST uses
|
|
658
712
|
// the captured original fetch, so it is never re-entered by this wrapper.
|
|
659
|
-
ensureFetchInstrumentation(_provider.heystackExporter.ingestHost);
|
|
713
|
+
ensureFetchInstrumentation(_provider.heystackExporter.ingestHost, config.ai ?? {});
|
|
660
714
|
return _provider;
|
|
661
715
|
}
|
|
662
716
|
/**
|
|
@@ -729,6 +783,7 @@ export function instrument(handler, config) {
|
|
|
729
783
|
endpoint: config.endpoint,
|
|
730
784
|
waitUntil: config.waitUntil,
|
|
731
785
|
sampling: config.sampling,
|
|
786
|
+
ai: config.ai,
|
|
732
787
|
});
|
|
733
788
|
return { provider, tracer: trace.getTracer("heystack") };
|
|
734
789
|
};
|
|
@@ -838,7 +893,7 @@ export function instrument(handler, config) {
|
|
|
838
893
|
if (config.instrumentBindings) {
|
|
839
894
|
const binTracer = trace.getTracer("heystack");
|
|
840
895
|
handlerEnv = instrumentEnv(env, {
|
|
841
|
-
startSpan: (name, attrs) => binTracer.startSpan(name, { attributes: attrs }, context.active()),
|
|
896
|
+
startSpan: (name, attrs, kind) => binTracer.startSpan(name, { attributes: attrs, kind }, context.active()),
|
|
842
897
|
select: config.instrumentBindings,
|
|
843
898
|
});
|
|
844
899
|
}
|