retrace-sdk 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  import { SpanBuilder, SpanData, SpanType, TraceStatus } from "./trace.js";
2
2
  /** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
3
3
  export declare function flushSharedTransport(): Promise<void>;
4
+ /** Exit-path drain — uses the transport's HTTP one-shot for buffered events when available. */
5
+ export declare function drainSharedTransportOnExit(budgetMs?: number): Promise<void>;
4
6
  export interface RecordOptions {
5
7
  name?: string;
6
8
  input?: unknown;
@@ -13,19 +15,45 @@ export declare class TraceRecorder {
13
15
  private builder;
14
16
  private transport;
15
17
  private interceptorsInstalled;
18
+ private countedImperative;
19
+ private prevFallback;
20
+ private prevFallbackSink;
16
21
  private forkPointSpanId;
17
22
  private forkPointReached;
18
23
  private spanCounter;
19
24
  output: unknown;
20
25
  constructor(opts?: RecordOptions);
21
26
  get traceId(): string;
22
- start(name?: string, input?: unknown): this;
23
- end(output?: unknown, status?: TraceStatus): void;
27
+ start(name?: string, input?: unknown, opts?: {
28
+ managed?: boolean;
29
+ }): this;
30
+ private openSpans;
31
+ registerOpenSpan(spanId: string, finalize: (reason: "complete" | "partial") => void): void;
32
+ unregisterOpenSpan(spanId: string): void;
33
+ /** Finalize any still-open streaming spans as partial (capture_complete:false). Called on end()
34
+ * so a stream the AFC layer abandoned mid-drain is still emitted into the trace, flagged
35
+ * not-byte-replayable, rather than lost or appearing only at process exit. */
36
+ private finalizeOpenSpans;
37
+ end(output?: unknown, status?: TraceStatus, opts?: {
38
+ terminatedEarly?: boolean;
39
+ }): void;
24
40
  addSpan(span: SpanData): void;
25
41
  startSpan(name: string, spanType?: SpanType, input?: unknown, model?: string, parentId?: string): SpanBuilder;
26
42
  endSpan(spanBuilder: SpanBuilder, output?: unknown, error?: string): void;
27
43
  private installInterceptors;
28
44
  }
45
+ /**
46
+ * Create a manual (imperative) recorder you drive with start()/end().
47
+ *
48
+ * CONCURRENCY: this bare imperative path is NOT context-isolated — start()/end() have no async
49
+ * scope to bind, so span-routing and traceparent are tracked via process/async-context state that
50
+ * concurrent imperative traces (e.g. several started under one Promise.all) can stomp. For
51
+ * concurrent workloads use `trace()`, which wraps your function in AsyncLocalStorage (provably
52
+ * isolated). Sequential start→end cycles are correct, and overlapping imperative use is detected
53
+ * and LOUDLY WARNED (never silent corruption) — pointing you at `trace()`. (Python's `@record`
54
+ * decorator wraps the function and IS isolated on both asyncio and threads; the TS equivalent is
55
+ * `trace()`.)
56
+ */
29
57
  export declare function record(opts?: RecordOptions): TraceRecorder;
30
58
  export declare function trace<T>(fn: (...args: unknown[]) => T, opts?: RecordOptions & {
31
59
  resumable?: boolean;
package/dist/recorder.js CHANGED
@@ -1,19 +1,37 @@
1
1
  import { getConfig, requireApiKey } from "./config.js";
2
2
  import { SpanBuilder, SpanType, TraceBuilder, TraceStatus } from "./trace.js";
3
- import { createTransport } from "./transport.js";
3
+ import { createTransport, registerProcessExitFlush } from "./transport.js";
4
4
  import { shouldSample } from "./utils.js";
5
5
  import { installGeminiInterceptor } from "./interceptors/gemini.js";
6
6
  import { installOpenAIInterceptor } from "./interceptors/openai.js";
7
7
  import { installAnthropicInterceptor } from "./interceptors/anthropic.js";
8
+ import { dispatchInterceptedSpan, runWithActiveRecorder, setActiveRecorderFallback, currentFallbackSink } from "./interceptors/_dispatch.js";
9
+ import { withTraceContext, enterTraceContext, exitTraceContext } from "./traceparent.js";
8
10
  // Shared transport — stays open across multiple traces for resume/replay listening
9
11
  let sharedTransport = null;
12
+ // Count of imperative (non-HOF) recorders currently between start() and end(). The bare imperative
13
+ // record()/TraceRecorder path is NOT context-isolated (see record() doc + traceparent.ts), so two
14
+ // overlapping imperative traces can cross-attribute spans/traceparent. We can't make it isolated
15
+ // without an async scope, but we MUST NOT let the corruption be silent — overlap is loudly warned,
16
+ // pointing at trace() (the concurrency-safe API). HOF-managed starts pass {managed:true} and are
17
+ // excluded (they ARE isolated via withTraceContext/runWithActiveRecorder).
18
+ let activeImperativeRecorders = 0;
10
19
  function getSharedTransport() {
11
20
  if (!sharedTransport) {
12
21
  sharedTransport = createTransport(getConfig().transport);
13
- // Flush pending data before the process exits. Flushing (not just close) ensures the
14
- // final trace is actually uploaded close() alone can race the process exiting.
15
- if (typeof process !== "undefined") {
16
- process.on("beforeExit", () => { void flushSharedTransport(); });
22
+ // Hand the transport the user's callback (or undefined). The transport owns the policy:
23
+ // callback-safety (a throwing onError can't kill the WS loop) and the throttled default-warn
24
+ // when no callback is registered — see WSTransport.surfaceSignal. Set onError in configure()
25
+ // before the first trace.
26
+ sharedTransport.onError = getConfig().onError;
27
+ // Flush pending data before the process exits. Covers beforeExit (graceful) + SIGTERM/SIGINT
28
+ // (with signal-ownership semantics), using an HTTP one-shot for buffered events since a WS
29
+ // handshake can't reliably complete during teardown. See registerProcessExitFlush.
30
+ // Only register the network exit hook when actually configured — an imported-but-unconfigured
31
+ // SDK installs no signal handlers and makes no outbound call on exit.
32
+ const cfg = getConfig();
33
+ if (typeof process !== "undefined" && cfg.enabled && cfg.apiKey) {
34
+ registerProcessExitFlush(sharedTransport);
17
35
  }
18
36
  }
19
37
  return sharedTransport;
@@ -22,10 +40,20 @@ function getSharedTransport() {
22
40
  export async function flushSharedTransport() {
23
41
  await sharedTransport?.flush();
24
42
  }
43
+ /** Exit-path drain — uses the transport's HTTP one-shot for buffered events when available. */
44
+ export async function drainSharedTransportOnExit(budgetMs) {
45
+ if (sharedTransport?.flushOnExit)
46
+ await sharedTransport.flushOnExit(budgetMs);
47
+ else
48
+ await sharedTransport?.flush();
49
+ }
25
50
  export class TraceRecorder {
26
51
  builder;
27
52
  transport;
28
53
  interceptorsInstalled = false;
54
+ countedImperative = false;
55
+ prevFallback = null;
56
+ prevFallbackSink = null;
29
57
  forkPointSpanId;
30
58
  forkPointReached = false;
31
59
  spanCounter = 0;
@@ -49,15 +77,67 @@ export class TraceRecorder {
49
77
  }
50
78
  }
51
79
  get traceId() { return this.builder.id; }
52
- start(name, input) {
80
+ start(name, input, opts) {
81
+ // Never-silent guard for the imperative path: if another imperative trace is already active when
82
+ // this one starts, overlapping imperative record() use can cross-attribute spans/traceparent.
83
+ // Warn loudly and point at trace() — convert silent corruption into a loud signal. HOF-managed
84
+ // starts are isolated (withTraceContext) and excluded.
85
+ if (!opts?.managed) {
86
+ if (activeImperativeRecorders > 0) {
87
+ console.warn("[retrace] CONCURRENT imperative record() detected: another imperative trace is still active. " +
88
+ "The bare record()/TraceRecorder path is NOT concurrency-isolated — spans and traceparent from " +
89
+ "overlapping imperative traces can be cross-attributed. Use trace() (the concurrency-safe API) " +
90
+ "for concurrent/parallel workloads.");
91
+ }
92
+ activeImperativeRecorders++;
93
+ this.countedImperative = true;
94
+ }
53
95
  this.builder.start(name, input);
54
96
  this.installInterceptors();
97
+ // Imperative API: route intercepted spans to this recorder and propagate trace context for
98
+ // outbound requests. Save the prior fallback so end() can RESTORE it (a nested trace() must not
99
+ // wipe the ambient init() fallback). The trace()/HOF path additionally isolates via ALS.
100
+ this.prevFallbackSink = currentFallbackSink();
101
+ this.prevFallback = setActiveRecorderFallback((span) => this.addSpan(span), this);
102
+ // ALS-isolated traceparent (per async execution), NOT a module global — so concurrent imperative
103
+ // traces never leak context into one another's outbound requests. The trace()/HOF path also
104
+ // wraps in withTraceContext; this covers the bare start()/end() imperative path.
105
+ enterTraceContext(this.builder.id, this.builder.id);
55
106
  this.transport.send("trace_started", this.builder.toDict());
56
107
  return this;
57
108
  }
58
- end(output, status = TraceStatus.COMPLETED) {
109
+ // ── OpenSpanSink: two-phase streaming spans (open at invocation, finalize once) ──
110
+ openSpans = new Map();
111
+ registerOpenSpan(spanId, finalize) {
112
+ this.openSpans.set(spanId, finalize);
113
+ }
114
+ unregisterOpenSpan(spanId) {
115
+ this.openSpans.delete(spanId);
116
+ }
117
+ /** Finalize any still-open streaming spans as partial (capture_complete:false). Called on end()
118
+ * so a stream the AFC layer abandoned mid-drain is still emitted into the trace, flagged
119
+ * not-byte-replayable, rather than lost or appearing only at process exit. */
120
+ finalizeOpenSpans() {
121
+ if (this.openSpans.size === 0)
122
+ return;
123
+ for (const [, finalize] of this.openSpans) {
124
+ try {
125
+ finalize("partial");
126
+ }
127
+ catch { /* best effort */ }
128
+ }
129
+ this.openSpans.clear();
130
+ }
131
+ end(output, status = TraceStatus.COMPLETED, opts) {
132
+ if (this.countedImperative) {
133
+ activeImperativeRecorders = Math.max(0, activeImperativeRecorders - 1);
134
+ this.countedImperative = false;
135
+ }
59
136
  if (output !== undefined)
60
137
  this.output = output;
138
+ // Close any dangling streaming spans (capture_complete:false) BEFORE the terminal event, so
139
+ // they land in this trace.
140
+ this.finalizeOpenSpans();
61
141
  const data = this.builder.end(this.output, status);
62
142
  this.transport.send("trace_ended", {
63
143
  id: data.id,
@@ -66,7 +146,16 @@ export class TraceRecorder {
66
146
  status: data.status,
67
147
  total_tokens: data.total_tokens,
68
148
  total_cost: data.total_cost,
149
+ // Force-closed by exit-flush/signal: a synthesized terminal must NOT look clean to the
150
+ // replay-guard. terminated_early ⇒ refuse byte-deterministic replay (same as no-terminal /
151
+ // lossy / capture_complete:false). Only a naturally-drained run produces a clean terminal.
152
+ ...(opts?.terminatedEarly ? { terminated_early: true } : {}),
69
153
  });
154
+ // Restore the enclosing trace's fallback (e.g. the ambient init() recorder) instead of nulling.
155
+ setActiveRecorderFallback(this.prevFallback, this.prevFallbackSink);
156
+ this.prevFallback = null;
157
+ this.prevFallbackSink = null;
158
+ exitTraceContext();
70
159
  // Shared transport stays open for resume/replay listening
71
160
  }
72
161
  addSpan(span) {
@@ -125,12 +214,26 @@ export class TraceRecorder {
125
214
  installInterceptors() {
126
215
  if (this.interceptorsInstalled)
127
216
  return;
128
- installGeminiInterceptor((span) => this.addSpan(span));
129
- installOpenAIInterceptor((span) => this.addSpan(span));
130
- installAnthropicInterceptor((span) => this.addSpan(span));
217
+ // Install ONE stable dispatcher; the active recorder is resolved per async-context (see
218
+ // interceptors/_dispatch.ts) so concurrent traces don't cross-route intercepted spans.
219
+ installGeminiInterceptor(dispatchInterceptedSpan);
220
+ installOpenAIInterceptor(dispatchInterceptedSpan);
221
+ installAnthropicInterceptor(dispatchInterceptedSpan);
131
222
  this.interceptorsInstalled = true;
132
223
  }
133
224
  }
225
+ /**
226
+ * Create a manual (imperative) recorder you drive with start()/end().
227
+ *
228
+ * CONCURRENCY: this bare imperative path is NOT context-isolated — start()/end() have no async
229
+ * scope to bind, so span-routing and traceparent are tracked via process/async-context state that
230
+ * concurrent imperative traces (e.g. several started under one Promise.all) can stomp. For
231
+ * concurrent workloads use `trace()`, which wraps your function in AsyncLocalStorage (provably
232
+ * isolated). Sequential start→end cycles are correct, and overlapping imperative use is detected
233
+ * and LOUDLY WARNED (never silent corruption) — pointing you at `trace()`. (Python's `@record`
234
+ * decorator wraps the function and IS isolated on both asyncio and threads; the TS equivalent is
235
+ * `trace()`.)
236
+ */
134
237
  export function record(opts) {
135
238
  const cfg = getConfig();
136
239
  if (!cfg.enabled || !shouldSample(cfg.sampleRate, cfg.sampleSeed, opts?.name)) {
@@ -165,19 +268,25 @@ export function trace(fn, opts) {
165
268
  input: opts?.input ?? args,
166
269
  metadata: opts?.metadata,
167
270
  });
168
- recorder.start(opts?.name || fn.name || "anonymous", opts?.input ?? args);
169
- try {
170
- const result = fn(...args);
171
- // Handle async functions
172
- if (result && typeof result.then === "function") {
173
- return result.then((resolved) => { recorder.end(resolved, TraceStatus.COMPLETED); return resolved; }, (err) => { recorder.end(undefined, TraceStatus.FAILED); throw err; });
271
+ // Isolate this trace's intercepted-span routing AND traceparent context to its own async
272
+ // context, so concurrent traces on a server never cross-route spans or leak context.
273
+ const tid = recorder.traceId;
274
+ const route = (span) => recorder.addSpan(span);
275
+ return runWithActiveRecorder(route, () => withTraceContext(tid, tid, () => {
276
+ recorder.start(opts?.name || fn.name || "anonymous", opts?.input ?? args, { managed: true });
277
+ try {
278
+ const result = fn(...args);
279
+ // Handle async functions
280
+ if (result && typeof result.then === "function") {
281
+ return result.then((resolved) => { recorder.end(resolved, TraceStatus.COMPLETED); return resolved; }, (err) => { recorder.end(undefined, TraceStatus.FAILED); throw err; });
282
+ }
283
+ recorder.end(result, TraceStatus.COMPLETED);
284
+ return result;
174
285
  }
175
- recorder.end(result, TraceStatus.COMPLETED);
176
- return result;
177
- }
178
- catch (err) {
179
- recorder.end(undefined, TraceStatus.FAILED);
180
- throw err;
181
- }
286
+ catch (err) {
287
+ recorder.end(undefined, TraceStatus.FAILED);
288
+ throw err;
289
+ }
290
+ }), recorder);
182
291
  };
183
292
  }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * `stream()` — opt-in full-fidelity wrapper for an LLM stream.
3
+ *
4
+ * Wrap a provider stream so that the auto-captured span observes a CLEAN full drain even if you stop
5
+ * iterating early — giving you a byte-replay-eligible span (`capture_complete: true`) for a stream you
6
+ * only partially consume:
7
+ *
8
+ * ```ts
9
+ * for await (const chunk of stream(ai.models.generateContentStream({ ... }))) {
10
+ * render(chunk);
11
+ * if (enough) break; // the rest is still drained in the background → full capture
12
+ * }
13
+ * ```
14
+ *
15
+ * It does NOT record its own span — it relies on the auto-interceptor's capture. So the
16
+ * function-call rule still applies: a stream that emits a function call stays
17
+ * `capture_complete: false` regardless of how it's drained. The helper's guarantee is
18
+ * "true when there is no function call", never "true, period" — it can't force a function-call
19
+ * stream to look byte-replayable.
20
+ *
21
+ * Opt-in cost: if you break early, the remainder of the stream is still consumed (the provider may
22
+ * bill for the full generation) — so wrapping with `stream()` TRADES AWAY the cost/latency savings
23
+ * of your early stop in exchange for guaranteed full-fidelity capture. If you broke early to save
24
+ * money, do not wrap with `stream()`.
25
+ */
26
+ export declare function stream<T>(src: AsyncIterable<T> | Promise<AsyncIterable<T>>): AsyncGenerator<T>;
package/dist/stream.js ADDED
@@ -0,0 +1,56 @@
1
+ /**
2
+ * `stream()` — opt-in full-fidelity wrapper for an LLM stream.
3
+ *
4
+ * Wrap a provider stream so that the auto-captured span observes a CLEAN full drain even if you stop
5
+ * iterating early — giving you a byte-replay-eligible span (`capture_complete: true`) for a stream you
6
+ * only partially consume:
7
+ *
8
+ * ```ts
9
+ * for await (const chunk of stream(ai.models.generateContentStream({ ... }))) {
10
+ * render(chunk);
11
+ * if (enough) break; // the rest is still drained in the background → full capture
12
+ * }
13
+ * ```
14
+ *
15
+ * It does NOT record its own span — it relies on the auto-interceptor's capture. So the
16
+ * function-call rule still applies: a stream that emits a function call stays
17
+ * `capture_complete: false` regardless of how it's drained. The helper's guarantee is
18
+ * "true when there is no function call", never "true, period" — it can't force a function-call
19
+ * stream to look byte-replayable.
20
+ *
21
+ * Opt-in cost: if you break early, the remainder of the stream is still consumed (the provider may
22
+ * bill for the full generation) — so wrapping with `stream()` TRADES AWAY the cost/latency savings
23
+ * of your early stop in exchange for guaranteed full-fidelity capture. If you broke early to save
24
+ * money, do not wrap with `stream()`.
25
+ */
26
+ export async function* stream(src) {
27
+ const iterable = (await src);
28
+ const it = iterable[Symbol.asyncIterator]();
29
+ let drainedToEnd = false;
30
+ try {
31
+ while (true) {
32
+ const r = await it.next();
33
+ if (r.done) {
34
+ drainedToEnd = true;
35
+ break;
36
+ }
37
+ yield r.value;
38
+ }
39
+ }
40
+ finally {
41
+ // Consumer broke early — finish draining the source so the auto-captured span sees a clean
42
+ // drain. (Function-call streams still finalize capture_complete:false in the auto path.)
43
+ if (!drainedToEnd) {
44
+ try {
45
+ for (;;) {
46
+ const r = await it.next();
47
+ if (r.done)
48
+ break;
49
+ }
50
+ }
51
+ catch {
52
+ /* best-effort drain */
53
+ }
54
+ }
55
+ }
56
+ }
@@ -6,8 +6,26 @@
6
6
  * When a traced function makes HTTP calls, the traceparent header
7
7
  * is injected so downstream services can correlate their spans.
8
8
  */
9
- /** Set the active trace context for outgoing requests. */
9
+ /**
10
+ * Run `fn` with an isolated trace context. Outgoing-request helpers (getTraceparent /
11
+ * injectTraceparent) called within `fn` (including across awaits) see this context, with
12
+ * no cross-contamination between concurrent traces.
13
+ */
14
+ export declare function withTraceContext<T>(traceId: string, spanId: string, fn: () => T): T;
15
+ /** Set the active trace context for outgoing requests (legacy imperative API). */
10
16
  export declare function setTraceContext(traceId: string, spanId: string): void;
17
+ /**
18
+ * Imperative ALS context for the recorder's start()/end(), which have no callback scope to wrap.
19
+ * Uses AsyncLocalStorage.enterWith so the context is isolated PER ASYNC EXECUTION — two concurrent
20
+ * imperative traces (each in its own async context) get their own store instead of clobbering a
21
+ * shared module global. This is the fix for the old setTraceContext()-writes-a-global contamination:
22
+ * the recorder no longer touches _currentTraceId at all; the globals remain ONLY for the explicitly
23
+ * legacy, single-context setTraceContext() public API.
24
+ */
25
+ export declare function enterTraceContext(traceId: string, spanId: string): void;
26
+ /** Clear the imperative ALS context for the current execution (empty store → getTraceparent returns
27
+ * null WITHOUT falling through to the legacy globals). */
28
+ export declare function exitTraceContext(): void;
11
29
  /** Clear the active trace context. */
12
30
  export declare function clearTraceContext(): void;
13
31
  /** Get the current traceparent header value, or null if no active trace. */
@@ -6,15 +6,44 @@
6
6
  * When a traced function makes HTTP calls, the traceparent header
7
7
  * is injected so downstream services can correlate their spans.
8
8
  */
9
+ import { AsyncLocalStorage } from "async_hooks";
10
+ // Per-async-context trace context. Concurrent traces each get their own store, so one
11
+ // trace's context can't leak into another's outbound requests. The module-level globals
12
+ // below remain as a fallback for the legacy imperative setTraceContext/clearTraceContext API.
13
+ const traceContextStore = new AsyncLocalStorage();
9
14
  let _currentTraceId = null;
10
15
  let _currentSpanId = null;
11
- /** Set the active trace context for outgoing requests. */
16
+ /**
17
+ * Run `fn` with an isolated trace context. Outgoing-request helpers (getTraceparent /
18
+ * injectTraceparent) called within `fn` (including across awaits) see this context, with
19
+ * no cross-contamination between concurrent traces.
20
+ */
21
+ export function withTraceContext(traceId, spanId, fn) {
22
+ return traceContextStore.run({ traceId: traceId.replace(/-/g, ""), spanId: spanId.replace(/-/g, "").slice(0, 16) }, fn);
23
+ }
24
+ /** Set the active trace context for outgoing requests (legacy imperative API). */
12
25
  export function setTraceContext(traceId, spanId) {
13
26
  // Convert UUID format to 32-hex (remove dashes)
14
27
  _currentTraceId = traceId.replace(/-/g, "");
15
28
  // Take first 16 chars of span ID as parent span
16
29
  _currentSpanId = spanId.replace(/-/g, "").slice(0, 16);
17
30
  }
31
+ /**
32
+ * Imperative ALS context for the recorder's start()/end(), which have no callback scope to wrap.
33
+ * Uses AsyncLocalStorage.enterWith so the context is isolated PER ASYNC EXECUTION — two concurrent
34
+ * imperative traces (each in its own async context) get their own store instead of clobbering a
35
+ * shared module global. This is the fix for the old setTraceContext()-writes-a-global contamination:
36
+ * the recorder no longer touches _currentTraceId at all; the globals remain ONLY for the explicitly
37
+ * legacy, single-context setTraceContext() public API.
38
+ */
39
+ export function enterTraceContext(traceId, spanId) {
40
+ traceContextStore.enterWith({ traceId: traceId.replace(/-/g, ""), spanId: spanId.replace(/-/g, "").slice(0, 16) });
41
+ }
42
+ /** Clear the imperative ALS context for the current execution (empty store → getTraceparent returns
43
+ * null WITHOUT falling through to the legacy globals). */
44
+ export function exitTraceContext() {
45
+ traceContextStore.enterWith({ traceId: "", spanId: "" });
46
+ }
18
47
  /** Clear the active trace context. */
19
48
  export function clearTraceContext() {
20
49
  _currentTraceId = null;
@@ -22,10 +51,13 @@ export function clearTraceContext() {
22
51
  }
23
52
  /** Get the current traceparent header value, or null if no active trace. */
24
53
  export function getTraceparent() {
25
- if (!_currentTraceId || !_currentSpanId)
54
+ const scoped = traceContextStore.getStore();
55
+ const traceId = scoped?.traceId ?? _currentTraceId;
56
+ const spanId = scoped?.spanId ?? _currentSpanId;
57
+ if (!traceId || !spanId)
26
58
  return null;
27
59
  // version-trace_id-parent_id-flags (01 = sampled)
28
- return `00-${_currentTraceId}-${_currentSpanId}-01`;
60
+ return `00-${traceId}-${spanId}-01`;
29
61
  }
30
62
  /**
31
63
  * Inject traceparent into a headers object (for fetch/axios/http calls).
@@ -1,8 +1,18 @@
1
+ import { type RetraceServerSignal } from "./errors.js";
1
2
  export interface Transport {
2
3
  send(eventType: string, data: Record<string, unknown>): void;
3
4
  close(): void;
4
5
  /** Drain in-flight data to the network (awaited on graceful shutdown). */
5
6
  flush(): Promise<void>;
7
+ /** Exit/signal path: drain via the most reliable channel for teardown (HTTP one-shot for
8
+ * buffered events), bounded by budgetMs. Falls back to flush() when not implemented. */
9
+ flushOnExit?(budgetMs?: number): Promise<void>;
10
+ /** True if there is anything worth flushing (buffered events or an unsent in-flight payload).
11
+ * Lets the exit path no-op — no fetch, no delay — for a zero-event run. */
12
+ hasPendingData?(): boolean;
13
+ /** Server-signal channel: a STRUCTURED signal (code/retryable/fatal), not a raw string — branch
14
+ * on `signal.code`, never string-match `signal.message`. */
15
+ onError?: (signal: RetraceServerSignal) => void;
6
16
  }
7
17
  export declare class WSTransport implements Transport {
8
18
  private ws;
@@ -11,16 +21,51 @@ export declare class WSTransport implements Transport {
11
21
  private backoff;
12
22
  private queue;
13
23
  private reconnectTimer;
14
- onError?: (type: string, message: string) => void;
24
+ private lossyTraces;
25
+ private droppedOpenSpanIds;
26
+ private droppedTotal;
27
+ private lastDropWarnMs;
28
+ private lastSignalWarnMs;
29
+ onError?: (signal: RetraceServerSignal) => void;
15
30
  get isConnected(): boolean;
31
+ /** Whether a trace lost events to a buffer drop (so the API/replay can refuse byte-replay). */
32
+ isTraceLossy(traceId: string): boolean;
16
33
  connect(): void;
17
34
  private reconnect;
18
35
  private flushQueue;
36
+ /** Serialize + send a single event, stamping trace-level `lossy` on a trace_ended whose trace
37
+ * lost events to a buffer drop (so the server/replay can refuse byte-deterministic replay). */
38
+ private transmit;
39
+ /** Enqueue with a bounded (1000) buffer. On overflow drop the OLDEST event, mark its trace lossy,
40
+ * and if it was an open span (span_started) remember its id so the later close is a no-op. */
41
+ private enqueue;
42
+ /** Throttled drop warning — at most once per ~5s burst, reporting the CUMULATIVE count (a
43
+ * single-tick burst that exits before the next window would otherwise under-report). A final
44
+ * summary is emitted on close(). */
45
+ private recordDrop;
46
+ /** Surface a structured server signal. If the user registered onError, invoke it WRAPPED — a
47
+ * throwing user callback must never kill the listener / WS loop (classic footgun). With no
48
+ * callback, fall back to a throttled console warning (per code) — never silent, never an
49
+ * unthrottled storm. */
50
+ private surfaceSignal;
51
+ private warnUnknownType;
52
+ /** At most one warn per ~5s per key, so a storm (rate_limited, unknown frames) can't flood. */
53
+ private throttledSignalWarn;
54
+ /** Whether there is anything worth flushing on exit. */
55
+ hasPendingData(): boolean;
19
56
  send(eventType: string, data: Record<string, unknown>): void;
20
57
  close(): void;
21
58
  /** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
22
59
  * the process before exit. Best-effort with a hard timeout. */
23
60
  flush(): Promise<void>;
61
+ /** Exit path: drain a live socket if connected, then HTTP one-shot anything still buffered.
62
+ * A WS handshake can't reliably complete during teardown, so buffered events (the common
63
+ * short-lived-script case where WS never connected) go out over HTTP keepalive instead. */
64
+ flushOnExit(budgetMs?: number): Promise<void>;
65
+ /** POST whatever is still buffered over a bounded HTTP keepalive request, grouped by trace.
66
+ * Incomplete traces (no terminal trace_ended buffered) and already-lossy traces are stamped
67
+ * lossy:true so the server/replay refuses byte-deterministic replay. */
68
+ private flushViaHttp;
24
69
  }
25
70
  export declare class HTTPTransport implements Transport {
26
71
  private traceData;
@@ -29,5 +74,30 @@ export declare class HTTPTransport implements Transport {
29
74
  flush(): Promise<void>;
30
75
  private buildSpans;
31
76
  close(): void;
77
+ /** HTTP is already the one-shot channel — just drain. */
78
+ flushOnExit(): Promise<void>;
79
+ hasPendingData(): boolean;
32
80
  }
33
81
  export declare function createTransport(mode?: "ws" | "http" | "auto"): Transport;
82
+ export type ExitReason = "graceful" | "signal" | "uncaught";
83
+ /**
84
+ * Register a synchronous hook run on the exit/signal path BEFORE the transport drains — e.g. the
85
+ * ambient trace finishing itself (emitting trace_ended) so it's in the buffer for the drain.
86
+ */
87
+ export declare function onProcessExit(fn: (reason: ExitReason) => void): void;
88
+ /**
89
+ * Wire process-exit flushing for a shared transport. Node's exit hooks are weaker than Python's
90
+ * atexit, so we cover three paths with different ownership semantics:
91
+ * - beforeExit: graceful (loop emptied naturally). Drain; do not exit (Node exits after).
92
+ * - SIGTERM/SIGINT, and WE are the sole listener: adding a listener suppresses Node's default
93
+ * terminate, so we now OWN the exit — flush THEN process.exit(), or the process hangs forever.
94
+ * - SIGTERM/SIGINT, but the USER already has handlers: best-effort flush only; their handler owns
95
+ * exit. We do not exit, and cannot guarantee our async flush completes (a synchronous
96
+ * process.exit() in their handler will cut it off).
97
+ *
98
+ * Irreducible residual (both SDKs): process.exit() mid-flush, os._exit(), and SIGKILL bypass these
99
+ * hooks entirely and lose still-buffered events; a framework-owned SIGTERM (we stay a guest) drains
100
+ * only what that framework's shutdown allows. Both SDKs DO flush on a sole-owner SIGTERM (TS via the
101
+ * listenerCount gate here; Python via a SIG_DFL/main-thread gate) — that parity is real.
102
+ */
103
+ export declare function registerProcessExitFlush(transport: Transport, budgetMs?: number): void;