retrace-sdk 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +4 -0
- package/dist/config.js +7 -0
- package/dist/errors.d.ts +22 -0
- package/dist/errors.js +18 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +8 -1
- package/dist/init.js +22 -15
- package/dist/interceptors/_dispatch.d.ts +25 -0
- package/dist/interceptors/_dispatch.js +57 -0
- package/dist/interceptors/gemini.d.ts +1 -1
- package/dist/interceptors/gemini.js +215 -74
- package/dist/interceptors/install.d.ts +1 -0
- package/dist/interceptors/install.js +21 -0
- package/dist/recorder.d.ts +30 -2
- package/dist/recorder.js +132 -23
- package/dist/stream.d.ts +26 -0
- package/dist/stream.js +56 -0
- package/dist/traceparent.d.ts +19 -1
- package/dist/traceparent.js +35 -3
- package/dist/transport.d.ts +71 -1
- package/dist/transport.js +304 -42
- package/package.json +2 -2
package/dist/recorder.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { SpanBuilder, SpanData, SpanType, TraceStatus } from "./trace.js";
|
|
2
2
|
/** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
|
|
3
3
|
export declare function flushSharedTransport(): Promise<void>;
|
|
4
|
+
/** Exit-path drain — uses the transport's HTTP one-shot for buffered events when available. */
|
|
5
|
+
export declare function drainSharedTransportOnExit(budgetMs?: number): Promise<void>;
|
|
4
6
|
export interface RecordOptions {
|
|
5
7
|
name?: string;
|
|
6
8
|
input?: unknown;
|
|
@@ -13,19 +15,45 @@ export declare class TraceRecorder {
|
|
|
13
15
|
private builder;
|
|
14
16
|
private transport;
|
|
15
17
|
private interceptorsInstalled;
|
|
18
|
+
private countedImperative;
|
|
19
|
+
private prevFallback;
|
|
20
|
+
private prevFallbackSink;
|
|
16
21
|
private forkPointSpanId;
|
|
17
22
|
private forkPointReached;
|
|
18
23
|
private spanCounter;
|
|
19
24
|
output: unknown;
|
|
20
25
|
constructor(opts?: RecordOptions);
|
|
21
26
|
get traceId(): string;
|
|
22
|
-
start(name?: string, input?: unknown
|
|
23
|
-
|
|
27
|
+
start(name?: string, input?: unknown, opts?: {
|
|
28
|
+
managed?: boolean;
|
|
29
|
+
}): this;
|
|
30
|
+
private openSpans;
|
|
31
|
+
registerOpenSpan(spanId: string, finalize: (reason: "complete" | "partial") => void): void;
|
|
32
|
+
unregisterOpenSpan(spanId: string): void;
|
|
33
|
+
/** Finalize any still-open streaming spans as partial (capture_complete:false). Called on end()
|
|
34
|
+
* so a stream the AFC layer abandoned mid-drain is still emitted into the trace, flagged
|
|
35
|
+
* not-byte-replayable, rather than lost or appearing only at process exit. */
|
|
36
|
+
private finalizeOpenSpans;
|
|
37
|
+
end(output?: unknown, status?: TraceStatus, opts?: {
|
|
38
|
+
terminatedEarly?: boolean;
|
|
39
|
+
}): void;
|
|
24
40
|
addSpan(span: SpanData): void;
|
|
25
41
|
startSpan(name: string, spanType?: SpanType, input?: unknown, model?: string, parentId?: string): SpanBuilder;
|
|
26
42
|
endSpan(spanBuilder: SpanBuilder, output?: unknown, error?: string): void;
|
|
27
43
|
private installInterceptors;
|
|
28
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Create a manual (imperative) recorder you drive with start()/end().
|
|
47
|
+
*
|
|
48
|
+
* CONCURRENCY: this bare imperative path is NOT context-isolated — start()/end() have no async
|
|
49
|
+
* scope to bind, so span-routing and traceparent are tracked via process/async-context state that
|
|
50
|
+
* concurrent imperative traces (e.g. several started under one Promise.all) can stomp. For
|
|
51
|
+
* concurrent workloads use `trace()`, which wraps your function in AsyncLocalStorage (provably
|
|
52
|
+
* isolated). Sequential start→end cycles are correct, and overlapping imperative use is detected
|
|
53
|
+
* and LOUDLY WARNED (never silent corruption) — pointing you at `trace()`. (Python's `@record`
|
|
54
|
+
* decorator wraps the function and IS isolated on both asyncio and threads; the TS equivalent is
|
|
55
|
+
* `trace()`.)
|
|
56
|
+
*/
|
|
29
57
|
export declare function record(opts?: RecordOptions): TraceRecorder;
|
|
30
58
|
export declare function trace<T>(fn: (...args: unknown[]) => T, opts?: RecordOptions & {
|
|
31
59
|
resumable?: boolean;
|
package/dist/recorder.js
CHANGED
|
@@ -1,19 +1,37 @@
|
|
|
1
1
|
import { getConfig, requireApiKey } from "./config.js";
|
|
2
2
|
import { SpanBuilder, SpanType, TraceBuilder, TraceStatus } from "./trace.js";
|
|
3
|
-
import { createTransport } from "./transport.js";
|
|
3
|
+
import { createTransport, registerProcessExitFlush } from "./transport.js";
|
|
4
4
|
import { shouldSample } from "./utils.js";
|
|
5
5
|
import { installGeminiInterceptor } from "./interceptors/gemini.js";
|
|
6
6
|
import { installOpenAIInterceptor } from "./interceptors/openai.js";
|
|
7
7
|
import { installAnthropicInterceptor } from "./interceptors/anthropic.js";
|
|
8
|
+
import { dispatchInterceptedSpan, runWithActiveRecorder, setActiveRecorderFallback, currentFallbackSink } from "./interceptors/_dispatch.js";
|
|
9
|
+
import { withTraceContext, enterTraceContext, exitTraceContext } from "./traceparent.js";
|
|
8
10
|
// Shared transport — stays open across multiple traces for resume/replay listening
|
|
9
11
|
let sharedTransport = null;
|
|
12
|
+
// Count of imperative (non-HOF) recorders currently between start() and end(). The bare imperative
|
|
13
|
+
// record()/TraceRecorder path is NOT context-isolated (see record() doc + traceparent.ts), so two
|
|
14
|
+
// overlapping imperative traces can cross-attribute spans/traceparent. We can't make it isolated
|
|
15
|
+
// without an async scope, but we MUST NOT let the corruption be silent — overlap is loudly warned,
|
|
16
|
+
// pointing at trace() (the concurrency-safe API). HOF-managed starts pass {managed:true} and are
|
|
17
|
+
// excluded (they ARE isolated via withTraceContext/runWithActiveRecorder).
|
|
18
|
+
let activeImperativeRecorders = 0;
|
|
10
19
|
function getSharedTransport() {
|
|
11
20
|
if (!sharedTransport) {
|
|
12
21
|
sharedTransport = createTransport(getConfig().transport);
|
|
13
|
-
//
|
|
14
|
-
//
|
|
15
|
-
|
|
16
|
-
|
|
22
|
+
// Hand the transport the user's callback (or undefined). The transport owns the policy:
|
|
23
|
+
// callback-safety (a throwing onError can't kill the WS loop) and the throttled default-warn
|
|
24
|
+
// when no callback is registered — see WSTransport.surfaceSignal. Set onError in configure()
|
|
25
|
+
// before the first trace.
|
|
26
|
+
sharedTransport.onError = getConfig().onError;
|
|
27
|
+
// Flush pending data before the process exits. Covers beforeExit (graceful) + SIGTERM/SIGINT
|
|
28
|
+
// (with signal-ownership semantics), using an HTTP one-shot for buffered events since a WS
|
|
29
|
+
// handshake can't reliably complete during teardown. See registerProcessExitFlush.
|
|
30
|
+
// Only register the network exit hook when actually configured — an imported-but-unconfigured
|
|
31
|
+
// SDK installs no signal handlers and makes no outbound call on exit.
|
|
32
|
+
const cfg = getConfig();
|
|
33
|
+
if (typeof process !== "undefined" && cfg.enabled && cfg.apiKey) {
|
|
34
|
+
registerProcessExitFlush(sharedTransport);
|
|
17
35
|
}
|
|
18
36
|
}
|
|
19
37
|
return sharedTransport;
|
|
@@ -22,10 +40,20 @@ function getSharedTransport() {
|
|
|
22
40
|
export async function flushSharedTransport() {
|
|
23
41
|
await sharedTransport?.flush();
|
|
24
42
|
}
|
|
43
|
+
/** Exit-path drain — uses the transport's HTTP one-shot for buffered events when available. */
|
|
44
|
+
export async function drainSharedTransportOnExit(budgetMs) {
|
|
45
|
+
if (sharedTransport?.flushOnExit)
|
|
46
|
+
await sharedTransport.flushOnExit(budgetMs);
|
|
47
|
+
else
|
|
48
|
+
await sharedTransport?.flush();
|
|
49
|
+
}
|
|
25
50
|
export class TraceRecorder {
|
|
26
51
|
builder;
|
|
27
52
|
transport;
|
|
28
53
|
interceptorsInstalled = false;
|
|
54
|
+
countedImperative = false;
|
|
55
|
+
prevFallback = null;
|
|
56
|
+
prevFallbackSink = null;
|
|
29
57
|
forkPointSpanId;
|
|
30
58
|
forkPointReached = false;
|
|
31
59
|
spanCounter = 0;
|
|
@@ -49,15 +77,67 @@ export class TraceRecorder {
|
|
|
49
77
|
}
|
|
50
78
|
}
|
|
51
79
|
get traceId() { return this.builder.id; }
|
|
52
|
-
start(name, input) {
|
|
80
|
+
start(name, input, opts) {
|
|
81
|
+
// Never-silent guard for the imperative path: if another imperative trace is already active when
|
|
82
|
+
// this one starts, overlapping imperative record() use can cross-attribute spans/traceparent.
|
|
83
|
+
// Warn loudly and point at trace() — convert silent corruption into a loud signal. HOF-managed
|
|
84
|
+
// starts are isolated (withTraceContext) and excluded.
|
|
85
|
+
if (!opts?.managed) {
|
|
86
|
+
if (activeImperativeRecorders > 0) {
|
|
87
|
+
console.warn("[retrace] CONCURRENT imperative record() detected: another imperative trace is still active. " +
|
|
88
|
+
"The bare record()/TraceRecorder path is NOT concurrency-isolated — spans and traceparent from " +
|
|
89
|
+
"overlapping imperative traces can be cross-attributed. Use trace() (the concurrency-safe API) " +
|
|
90
|
+
"for concurrent/parallel workloads.");
|
|
91
|
+
}
|
|
92
|
+
activeImperativeRecorders++;
|
|
93
|
+
this.countedImperative = true;
|
|
94
|
+
}
|
|
53
95
|
this.builder.start(name, input);
|
|
54
96
|
this.installInterceptors();
|
|
97
|
+
// Imperative API: route intercepted spans to this recorder and propagate trace context for
|
|
98
|
+
// outbound requests. Save the prior fallback so end() can RESTORE it (a nested trace() must not
|
|
99
|
+
// wipe the ambient init() fallback). The trace()/HOF path additionally isolates via ALS.
|
|
100
|
+
this.prevFallbackSink = currentFallbackSink();
|
|
101
|
+
this.prevFallback = setActiveRecorderFallback((span) => this.addSpan(span), this);
|
|
102
|
+
// ALS-isolated traceparent (per async execution), NOT a module global — so concurrent imperative
|
|
103
|
+
// traces never leak context into one another's outbound requests. The trace()/HOF path also
|
|
104
|
+
// wraps in withTraceContext; this covers the bare start()/end() imperative path.
|
|
105
|
+
enterTraceContext(this.builder.id, this.builder.id);
|
|
55
106
|
this.transport.send("trace_started", this.builder.toDict());
|
|
56
107
|
return this;
|
|
57
108
|
}
|
|
58
|
-
|
|
109
|
+
// ── OpenSpanSink: two-phase streaming spans (open at invocation, finalize once) ──
|
|
110
|
+
openSpans = new Map();
|
|
111
|
+
registerOpenSpan(spanId, finalize) {
|
|
112
|
+
this.openSpans.set(spanId, finalize);
|
|
113
|
+
}
|
|
114
|
+
unregisterOpenSpan(spanId) {
|
|
115
|
+
this.openSpans.delete(spanId);
|
|
116
|
+
}
|
|
117
|
+
/** Finalize any still-open streaming spans as partial (capture_complete:false). Called on end()
|
|
118
|
+
* so a stream the AFC layer abandoned mid-drain is still emitted into the trace, flagged
|
|
119
|
+
* not-byte-replayable, rather than lost or appearing only at process exit. */
|
|
120
|
+
finalizeOpenSpans() {
|
|
121
|
+
if (this.openSpans.size === 0)
|
|
122
|
+
return;
|
|
123
|
+
for (const [, finalize] of this.openSpans) {
|
|
124
|
+
try {
|
|
125
|
+
finalize("partial");
|
|
126
|
+
}
|
|
127
|
+
catch { /* best effort */ }
|
|
128
|
+
}
|
|
129
|
+
this.openSpans.clear();
|
|
130
|
+
}
|
|
131
|
+
end(output, status = TraceStatus.COMPLETED, opts) {
|
|
132
|
+
if (this.countedImperative) {
|
|
133
|
+
activeImperativeRecorders = Math.max(0, activeImperativeRecorders - 1);
|
|
134
|
+
this.countedImperative = false;
|
|
135
|
+
}
|
|
59
136
|
if (output !== undefined)
|
|
60
137
|
this.output = output;
|
|
138
|
+
// Close any dangling streaming spans (capture_complete:false) BEFORE the terminal event, so
|
|
139
|
+
// they land in this trace.
|
|
140
|
+
this.finalizeOpenSpans();
|
|
61
141
|
const data = this.builder.end(this.output, status);
|
|
62
142
|
this.transport.send("trace_ended", {
|
|
63
143
|
id: data.id,
|
|
@@ -66,7 +146,16 @@ export class TraceRecorder {
|
|
|
66
146
|
status: data.status,
|
|
67
147
|
total_tokens: data.total_tokens,
|
|
68
148
|
total_cost: data.total_cost,
|
|
149
|
+
// Force-closed by exit-flush/signal: a synthesized terminal must NOT look clean to the
|
|
150
|
+
// replay-guard. terminated_early ⇒ refuse byte-deterministic replay (same as no-terminal /
|
|
151
|
+
// lossy / capture_complete:false). Only a naturally-drained run produces a clean terminal.
|
|
152
|
+
...(opts?.terminatedEarly ? { terminated_early: true } : {}),
|
|
69
153
|
});
|
|
154
|
+
// Restore the enclosing trace's fallback (e.g. the ambient init() recorder) instead of nulling.
|
|
155
|
+
setActiveRecorderFallback(this.prevFallback, this.prevFallbackSink);
|
|
156
|
+
this.prevFallback = null;
|
|
157
|
+
this.prevFallbackSink = null;
|
|
158
|
+
exitTraceContext();
|
|
70
159
|
// Shared transport stays open for resume/replay listening
|
|
71
160
|
}
|
|
72
161
|
addSpan(span) {
|
|
@@ -125,12 +214,26 @@ export class TraceRecorder {
|
|
|
125
214
|
installInterceptors() {
|
|
126
215
|
if (this.interceptorsInstalled)
|
|
127
216
|
return;
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
217
|
+
// Install ONE stable dispatcher; the active recorder is resolved per async-context (see
|
|
218
|
+
// interceptors/_dispatch.ts) so concurrent traces don't cross-route intercepted spans.
|
|
219
|
+
installGeminiInterceptor(dispatchInterceptedSpan);
|
|
220
|
+
installOpenAIInterceptor(dispatchInterceptedSpan);
|
|
221
|
+
installAnthropicInterceptor(dispatchInterceptedSpan);
|
|
131
222
|
this.interceptorsInstalled = true;
|
|
132
223
|
}
|
|
133
224
|
}
|
|
225
|
+
/**
|
|
226
|
+
* Create a manual (imperative) recorder you drive with start()/end().
|
|
227
|
+
*
|
|
228
|
+
* CONCURRENCY: this bare imperative path is NOT context-isolated — start()/end() have no async
|
|
229
|
+
* scope to bind, so span-routing and traceparent are tracked via process/async-context state that
|
|
230
|
+
* concurrent imperative traces (e.g. several started under one Promise.all) can stomp. For
|
|
231
|
+
* concurrent workloads use `trace()`, which wraps your function in AsyncLocalStorage (provably
|
|
232
|
+
* isolated). Sequential start→end cycles are correct, and overlapping imperative use is detected
|
|
233
|
+
* and LOUDLY WARNED (never silent corruption) — pointing you at `trace()`. (Python's `@record`
|
|
234
|
+
* decorator wraps the function and IS isolated on both asyncio and threads; the TS equivalent is
|
|
235
|
+
* `trace()`.)
|
|
236
|
+
*/
|
|
134
237
|
export function record(opts) {
|
|
135
238
|
const cfg = getConfig();
|
|
136
239
|
if (!cfg.enabled || !shouldSample(cfg.sampleRate, cfg.sampleSeed, opts?.name)) {
|
|
@@ -165,19 +268,25 @@ export function trace(fn, opts) {
|
|
|
165
268
|
input: opts?.input ?? args,
|
|
166
269
|
metadata: opts?.metadata,
|
|
167
270
|
});
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
271
|
+
// Isolate this trace's intercepted-span routing AND traceparent context to its own async
|
|
272
|
+
// context, so concurrent traces on a server never cross-route spans or leak context.
|
|
273
|
+
const tid = recorder.traceId;
|
|
274
|
+
const route = (span) => recorder.addSpan(span);
|
|
275
|
+
return runWithActiveRecorder(route, () => withTraceContext(tid, tid, () => {
|
|
276
|
+
recorder.start(opts?.name || fn.name || "anonymous", opts?.input ?? args, { managed: true });
|
|
277
|
+
try {
|
|
278
|
+
const result = fn(...args);
|
|
279
|
+
// Handle async functions
|
|
280
|
+
if (result && typeof result.then === "function") {
|
|
281
|
+
return result.then((resolved) => { recorder.end(resolved, TraceStatus.COMPLETED); return resolved; }, (err) => { recorder.end(undefined, TraceStatus.FAILED); throw err; });
|
|
282
|
+
}
|
|
283
|
+
recorder.end(result, TraceStatus.COMPLETED);
|
|
284
|
+
return result;
|
|
174
285
|
}
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
throw err;
|
|
181
|
-
}
|
|
286
|
+
catch (err) {
|
|
287
|
+
recorder.end(undefined, TraceStatus.FAILED);
|
|
288
|
+
throw err;
|
|
289
|
+
}
|
|
290
|
+
}), recorder);
|
|
182
291
|
};
|
|
183
292
|
}
|
package/dist/stream.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `stream()` — opt-in full-fidelity wrapper for an LLM stream.
|
|
3
|
+
*
|
|
4
|
+
* Wrap a provider stream so that the auto-captured span observes a CLEAN full drain even if you stop
|
|
5
|
+
* iterating early — giving you a byte-replay-eligible span (`capture_complete: true`) for a stream you
|
|
6
|
+
* only partially consume:
|
|
7
|
+
*
|
|
8
|
+
* ```ts
|
|
9
|
+
* for await (const chunk of stream(ai.models.generateContentStream({ ... }))) {
|
|
10
|
+
* render(chunk);
|
|
11
|
+
* if (enough) break; // the rest is still drained in the background → full capture
|
|
12
|
+
* }
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* It does NOT record its own span — it relies on the auto-interceptor's capture. So the
|
|
16
|
+
* function-call rule still applies: a stream that emits a function call stays
|
|
17
|
+
* `capture_complete: false` regardless of how it's drained. The helper's guarantee is
|
|
18
|
+
* "true when there is no function call", never "true, period" — it can't force a function-call
|
|
19
|
+
* stream to look byte-replayable.
|
|
20
|
+
*
|
|
21
|
+
* Opt-in cost: if you break early, the remainder of the stream is still consumed (the provider may
|
|
22
|
+
* bill for the full generation) — so wrapping with `stream()` TRADES AWAY the cost/latency savings
|
|
23
|
+
* of your early stop in exchange for guaranteed full-fidelity capture. If you broke early to save
|
|
24
|
+
* money, do not wrap with `stream()`.
|
|
25
|
+
*/
|
|
26
|
+
export declare function stream<T>(src: AsyncIterable<T> | Promise<AsyncIterable<T>>): AsyncGenerator<T>;
|
package/dist/stream.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `stream()` — opt-in full-fidelity wrapper for an LLM stream.
|
|
3
|
+
*
|
|
4
|
+
* Wrap a provider stream so that the auto-captured span observes a CLEAN full drain even if you stop
|
|
5
|
+
* iterating early — giving you a byte-replay-eligible span (`capture_complete: true`) for a stream you
|
|
6
|
+
* only partially consume:
|
|
7
|
+
*
|
|
8
|
+
* ```ts
|
|
9
|
+
* for await (const chunk of stream(ai.models.generateContentStream({ ... }))) {
|
|
10
|
+
* render(chunk);
|
|
11
|
+
* if (enough) break; // the rest is still drained in the background → full capture
|
|
12
|
+
* }
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* It does NOT record its own span — it relies on the auto-interceptor's capture. So the
|
|
16
|
+
* function-call rule still applies: a stream that emits a function call stays
|
|
17
|
+
* `capture_complete: false` regardless of how it's drained. The helper's guarantee is
|
|
18
|
+
* "true when there is no function call", never "true, period" — it can't force a function-call
|
|
19
|
+
* stream to look byte-replayable.
|
|
20
|
+
*
|
|
21
|
+
* Opt-in cost: if you break early, the remainder of the stream is still consumed (the provider may
|
|
22
|
+
* bill for the full generation) — so wrapping with `stream()` TRADES AWAY the cost/latency savings
|
|
23
|
+
* of your early stop in exchange for guaranteed full-fidelity capture. If you broke early to save
|
|
24
|
+
* money, do not wrap with `stream()`.
|
|
25
|
+
*/
|
|
26
|
+
export async function* stream(src) {
|
|
27
|
+
const iterable = (await src);
|
|
28
|
+
const it = iterable[Symbol.asyncIterator]();
|
|
29
|
+
let drainedToEnd = false;
|
|
30
|
+
try {
|
|
31
|
+
while (true) {
|
|
32
|
+
const r = await it.next();
|
|
33
|
+
if (r.done) {
|
|
34
|
+
drainedToEnd = true;
|
|
35
|
+
break;
|
|
36
|
+
}
|
|
37
|
+
yield r.value;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
finally {
|
|
41
|
+
// Consumer broke early — finish draining the source so the auto-captured span sees a clean
|
|
42
|
+
// drain. (Function-call streams still finalize capture_complete:false in the auto path.)
|
|
43
|
+
if (!drainedToEnd) {
|
|
44
|
+
try {
|
|
45
|
+
for (;;) {
|
|
46
|
+
const r = await it.next();
|
|
47
|
+
if (r.done)
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
/* best-effort drain */
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
package/dist/traceparent.d.ts
CHANGED
|
@@ -6,8 +6,26 @@
|
|
|
6
6
|
* When a traced function makes HTTP calls, the traceparent header
|
|
7
7
|
* is injected so downstream services can correlate their spans.
|
|
8
8
|
*/
|
|
9
|
-
/**
|
|
9
|
+
/**
|
|
10
|
+
* Run `fn` with an isolated trace context. Outgoing-request helpers (getTraceparent /
|
|
11
|
+
* injectTraceparent) called within `fn` (including across awaits) see this context, with
|
|
12
|
+
* no cross-contamination between concurrent traces.
|
|
13
|
+
*/
|
|
14
|
+
export declare function withTraceContext<T>(traceId: string, spanId: string, fn: () => T): T;
|
|
15
|
+
/** Set the active trace context for outgoing requests (legacy imperative API). */
|
|
10
16
|
export declare function setTraceContext(traceId: string, spanId: string): void;
|
|
17
|
+
/**
|
|
18
|
+
* Imperative ALS context for the recorder's start()/end(), which have no callback scope to wrap.
|
|
19
|
+
* Uses AsyncLocalStorage.enterWith so the context is isolated PER ASYNC EXECUTION — two concurrent
|
|
20
|
+
* imperative traces (each in its own async context) get their own store instead of clobbering a
|
|
21
|
+
* shared module global. This is the fix for the old setTraceContext()-writes-a-global contamination:
|
|
22
|
+
* the recorder no longer touches _currentTraceId at all; the globals remain ONLY for the explicitly
|
|
23
|
+
* legacy, single-context setTraceContext() public API.
|
|
24
|
+
*/
|
|
25
|
+
export declare function enterTraceContext(traceId: string, spanId: string): void;
|
|
26
|
+
/** Clear the imperative ALS context for the current execution (empty store → getTraceparent returns
|
|
27
|
+
* null WITHOUT falling through to the legacy globals). */
|
|
28
|
+
export declare function exitTraceContext(): void;
|
|
11
29
|
/** Clear the active trace context. */
|
|
12
30
|
export declare function clearTraceContext(): void;
|
|
13
31
|
/** Get the current traceparent header value, or null if no active trace. */
|
package/dist/traceparent.js
CHANGED
|
@@ -6,15 +6,44 @@
|
|
|
6
6
|
* When a traced function makes HTTP calls, the traceparent header
|
|
7
7
|
* is injected so downstream services can correlate their spans.
|
|
8
8
|
*/
|
|
9
|
+
import { AsyncLocalStorage } from "async_hooks";
|
|
10
|
+
// Per-async-context trace context. Concurrent traces each get their own store, so one
|
|
11
|
+
// trace's context can't leak into another's outbound requests. The module-level globals
|
|
12
|
+
// below remain as a fallback for the legacy imperative setTraceContext/clearTraceContext API.
|
|
13
|
+
const traceContextStore = new AsyncLocalStorage();
|
|
9
14
|
let _currentTraceId = null;
|
|
10
15
|
let _currentSpanId = null;
|
|
11
|
-
/**
|
|
16
|
+
/**
|
|
17
|
+
* Run `fn` with an isolated trace context. Outgoing-request helpers (getTraceparent /
|
|
18
|
+
* injectTraceparent) called within `fn` (including across awaits) see this context, with
|
|
19
|
+
* no cross-contamination between concurrent traces.
|
|
20
|
+
*/
|
|
21
|
+
export function withTraceContext(traceId, spanId, fn) {
|
|
22
|
+
return traceContextStore.run({ traceId: traceId.replace(/-/g, ""), spanId: spanId.replace(/-/g, "").slice(0, 16) }, fn);
|
|
23
|
+
}
|
|
24
|
+
/** Set the active trace context for outgoing requests (legacy imperative API). */
|
|
12
25
|
export function setTraceContext(traceId, spanId) {
|
|
13
26
|
// Convert UUID format to 32-hex (remove dashes)
|
|
14
27
|
_currentTraceId = traceId.replace(/-/g, "");
|
|
15
28
|
// Take first 16 chars of span ID as parent span
|
|
16
29
|
_currentSpanId = spanId.replace(/-/g, "").slice(0, 16);
|
|
17
30
|
}
|
|
31
|
+
/**
|
|
32
|
+
* Imperative ALS context for the recorder's start()/end(), which have no callback scope to wrap.
|
|
33
|
+
* Uses AsyncLocalStorage.enterWith so the context is isolated PER ASYNC EXECUTION — two concurrent
|
|
34
|
+
* imperative traces (each in its own async context) get their own store instead of clobbering a
|
|
35
|
+
* shared module global. This is the fix for the old setTraceContext()-writes-a-global contamination:
|
|
36
|
+
* the recorder no longer touches _currentTraceId at all; the globals remain ONLY for the explicitly
|
|
37
|
+
* legacy, single-context setTraceContext() public API.
|
|
38
|
+
*/
|
|
39
|
+
export function enterTraceContext(traceId, spanId) {
|
|
40
|
+
traceContextStore.enterWith({ traceId: traceId.replace(/-/g, ""), spanId: spanId.replace(/-/g, "").slice(0, 16) });
|
|
41
|
+
}
|
|
42
|
+
/** Clear the imperative ALS context for the current execution (empty store → getTraceparent returns
|
|
43
|
+
* null WITHOUT falling through to the legacy globals). */
|
|
44
|
+
export function exitTraceContext() {
|
|
45
|
+
traceContextStore.enterWith({ traceId: "", spanId: "" });
|
|
46
|
+
}
|
|
18
47
|
/** Clear the active trace context. */
|
|
19
48
|
export function clearTraceContext() {
|
|
20
49
|
_currentTraceId = null;
|
|
@@ -22,10 +51,13 @@ export function clearTraceContext() {
|
|
|
22
51
|
}
|
|
23
52
|
/** Get the current traceparent header value, or null if no active trace. */
|
|
24
53
|
export function getTraceparent() {
|
|
25
|
-
|
|
54
|
+
const scoped = traceContextStore.getStore();
|
|
55
|
+
const traceId = scoped?.traceId ?? _currentTraceId;
|
|
56
|
+
const spanId = scoped?.spanId ?? _currentSpanId;
|
|
57
|
+
if (!traceId || !spanId)
|
|
26
58
|
return null;
|
|
27
59
|
// version-trace_id-parent_id-flags (01 = sampled)
|
|
28
|
-
return `00-${
|
|
60
|
+
return `00-${traceId}-${spanId}-01`;
|
|
29
61
|
}
|
|
30
62
|
/**
|
|
31
63
|
* Inject traceparent into a headers object (for fetch/axios/http calls).
|
package/dist/transport.d.ts
CHANGED
|
@@ -1,8 +1,18 @@
|
|
|
1
|
+
import { type RetraceServerSignal } from "./errors.js";
|
|
1
2
|
export interface Transport {
|
|
2
3
|
send(eventType: string, data: Record<string, unknown>): void;
|
|
3
4
|
close(): void;
|
|
4
5
|
/** Drain in-flight data to the network (awaited on graceful shutdown). */
|
|
5
6
|
flush(): Promise<void>;
|
|
7
|
+
/** Exit/signal path: drain via the most reliable channel for teardown (HTTP one-shot for
|
|
8
|
+
* buffered events), bounded by budgetMs. Falls back to flush() when not implemented. */
|
|
9
|
+
flushOnExit?(budgetMs?: number): Promise<void>;
|
|
10
|
+
/** True if there is anything worth flushing (buffered events or an unsent in-flight payload).
|
|
11
|
+
* Lets the exit path no-op — no fetch, no delay — for a zero-event run. */
|
|
12
|
+
hasPendingData?(): boolean;
|
|
13
|
+
/** Server-signal channel: a STRUCTURED signal (code/retryable/fatal), not a raw string — branch
|
|
14
|
+
* on `signal.code`, never string-match `signal.message`. */
|
|
15
|
+
onError?: (signal: RetraceServerSignal) => void;
|
|
6
16
|
}
|
|
7
17
|
export declare class WSTransport implements Transport {
|
|
8
18
|
private ws;
|
|
@@ -11,16 +21,51 @@ export declare class WSTransport implements Transport {
|
|
|
11
21
|
private backoff;
|
|
12
22
|
private queue;
|
|
13
23
|
private reconnectTimer;
|
|
14
|
-
|
|
24
|
+
private lossyTraces;
|
|
25
|
+
private droppedOpenSpanIds;
|
|
26
|
+
private droppedTotal;
|
|
27
|
+
private lastDropWarnMs;
|
|
28
|
+
private lastSignalWarnMs;
|
|
29
|
+
onError?: (signal: RetraceServerSignal) => void;
|
|
15
30
|
get isConnected(): boolean;
|
|
31
|
+
/** Whether a trace lost events to a buffer drop (so the API/replay can refuse byte-replay). */
|
|
32
|
+
isTraceLossy(traceId: string): boolean;
|
|
16
33
|
connect(): void;
|
|
17
34
|
private reconnect;
|
|
18
35
|
private flushQueue;
|
|
36
|
+
/** Serialize + send a single event, stamping trace-level `lossy` on a trace_ended whose trace
|
|
37
|
+
* lost events to a buffer drop (so the server/replay can refuse byte-deterministic replay). */
|
|
38
|
+
private transmit;
|
|
39
|
+
/** Enqueue with a bounded (1000) buffer. On overflow drop the OLDEST event, mark its trace lossy,
|
|
40
|
+
* and if it was an open span (span_started) remember its id so the later close is a no-op. */
|
|
41
|
+
private enqueue;
|
|
42
|
+
/** Throttled drop warning — at most once per ~5s burst, reporting the CUMULATIVE count (a
|
|
43
|
+
* single-tick burst that exits before the next window would otherwise under-report). A final
|
|
44
|
+
* summary is emitted on close(). */
|
|
45
|
+
private recordDrop;
|
|
46
|
+
/** Surface a structured server signal. If the user registered onError, invoke it WRAPPED — a
|
|
47
|
+
* throwing user callback must never kill the listener / WS loop (classic footgun). With no
|
|
48
|
+
* callback, fall back to a throttled console warning (per code) — never silent, never an
|
|
49
|
+
* unthrottled storm. */
|
|
50
|
+
private surfaceSignal;
|
|
51
|
+
private warnUnknownType;
|
|
52
|
+
/** At most one warn per ~5s per key, so a storm (rate_limited, unknown frames) can't flood. */
|
|
53
|
+
private throttledSignalWarn;
|
|
54
|
+
/** Whether there is anything worth flushing on exit. */
|
|
55
|
+
hasPendingData(): boolean;
|
|
19
56
|
send(eventType: string, data: Record<string, unknown>): void;
|
|
20
57
|
close(): void;
|
|
21
58
|
/** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
|
|
22
59
|
* the process before exit. Best-effort with a hard timeout. */
|
|
23
60
|
flush(): Promise<void>;
|
|
61
|
+
/** Exit path: drain a live socket if connected, then HTTP one-shot anything still buffered.
|
|
62
|
+
* A WS handshake can't reliably complete during teardown, so buffered events (the common
|
|
63
|
+
* short-lived-script case where WS never connected) go out over HTTP keepalive instead. */
|
|
64
|
+
flushOnExit(budgetMs?: number): Promise<void>;
|
|
65
|
+
/** POST whatever is still buffered over a bounded HTTP keepalive request, grouped by trace.
|
|
66
|
+
* Incomplete traces (no terminal trace_ended buffered) and already-lossy traces are stamped
|
|
67
|
+
* lossy:true so the server/replay refuses byte-deterministic replay. */
|
|
68
|
+
private flushViaHttp;
|
|
24
69
|
}
|
|
25
70
|
export declare class HTTPTransport implements Transport {
|
|
26
71
|
private traceData;
|
|
@@ -29,5 +74,30 @@ export declare class HTTPTransport implements Transport {
|
|
|
29
74
|
flush(): Promise<void>;
|
|
30
75
|
private buildSpans;
|
|
31
76
|
close(): void;
|
|
77
|
+
/** HTTP is already the one-shot channel — just drain. */
|
|
78
|
+
flushOnExit(): Promise<void>;
|
|
79
|
+
hasPendingData(): boolean;
|
|
32
80
|
}
|
|
33
81
|
export declare function createTransport(mode?: "ws" | "http" | "auto"): Transport;
|
|
82
|
+
export type ExitReason = "graceful" | "signal" | "uncaught";
|
|
83
|
+
/**
|
|
84
|
+
* Register a synchronous hook run on the exit/signal path BEFORE the transport drains — e.g. the
|
|
85
|
+
* ambient trace finishing itself (emitting trace_ended) so it's in the buffer for the drain.
|
|
86
|
+
*/
|
|
87
|
+
export declare function onProcessExit(fn: (reason: ExitReason) => void): void;
|
|
88
|
+
/**
|
|
89
|
+
* Wire process-exit flushing for a shared transport. Node's exit hooks are weaker than Python's
|
|
90
|
+
* atexit, so we cover three paths with different ownership semantics:
|
|
91
|
+
* - beforeExit: graceful (loop emptied naturally). Drain; do not exit (Node exits after).
|
|
92
|
+
* - SIGTERM/SIGINT, and WE are the sole listener: adding a listener suppresses Node's default
|
|
93
|
+
* terminate, so we now OWN the exit — flush THEN process.exit(), or the process hangs forever.
|
|
94
|
+
* - SIGTERM/SIGINT, but the USER already has handlers: best-effort flush only; their handler owns
|
|
95
|
+
* exit. We do not exit, and cannot guarantee our async flush completes (a synchronous
|
|
96
|
+
* process.exit() in their handler will cut it off).
|
|
97
|
+
*
|
|
98
|
+
* Irreducible residual (both SDKs): process.exit() mid-flush, os._exit(), and SIGKILL bypass these
|
|
99
|
+
* hooks entirely and lose still-buffered events; a framework-owned SIGTERM (we stay a guest) drains
|
|
100
|
+
* only what that framework's shutdown allows. Both SDKs DO flush on a sole-owner SIGTERM (TS via the
|
|
101
|
+
* listenerCount gate here; Python via a SIG_DFL/main-thread gate) — that parity is real.
|
|
102
|
+
*/
|
|
103
|
+
export declare function registerProcessExitFlush(transport: Transport, budgetMs?: number): void;
|