retrace-sdk 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.d.ts CHANGED
@@ -7,6 +7,10 @@ export interface Config {
7
7
  sampleRate: number;
8
8
  /** Optional seed for deterministic sampling. When set, the same trace name always produces the same sample decision. */
9
9
  sampleSeed: string | undefined;
10
+ /** Transport mode. "auto" (default) tries WebSocket then falls back to HTTP; "http" is
11
+ * request/response only (recommended for short-lived scripts and serverless — it never
12
+ * holds an open socket and always surfaces upload errors); "ws" forces WebSocket. */
13
+ transport: "auto" | "ws" | "http";
10
14
  }
11
15
  export declare function configure(opts: Partial<Config>): Config;
12
16
  export declare function requireApiKey(): string;
package/dist/config.js CHANGED
@@ -6,6 +6,7 @@ const config = {
6
6
  enabled: !["false", "0"].includes((process.env.RETRACE_ENABLED || "true").toLowerCase()),
7
7
  sampleRate: parseFloat(process.env.RETRACE_SAMPLE_RATE || "1"),
8
8
  sampleSeed: process.env.RETRACE_SAMPLE_SEED || undefined,
9
+ transport: (["auto", "ws", "http"].includes(process.env.RETRACE_TRANSPORT || "") ? process.env.RETRACE_TRANSPORT : "auto"),
9
10
  };
10
11
  config.wsUrl = config.baseUrl.replace("https://", "wss://").replace("http://", "ws://");
11
12
  export function configure(opts) {
package/dist/init.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { configure, getConfig, requireApiKey } from "./config.js";
2
- import { TraceRecorder } from "./recorder.js";
2
+ import { TraceRecorder, flushSharedTransport } from "./recorder.js";
3
3
  import { TraceStatus } from "./trace.js";
4
4
  let ambient = null;
5
5
  let exitHooked = false;
@@ -54,10 +54,20 @@ export function init(opts = {}) {
54
54
  }
55
55
  catch { /* best effort on shutdown */ }
56
56
  };
57
+ // On signal-triggered exits, process.exit() would otherwise kill the process before the
58
+ // final trace_ended is delivered over the network. End the trace, then await a transport
59
+ // drain (capped by a hard timeout so a hung network can't block shutdown) before exiting.
60
+ const finishAndExit = (status, code) => {
61
+ finish(status);
62
+ void Promise.race([
63
+ flushSharedTransport().catch(() => { }),
64
+ new Promise((r) => setTimeout(r, 3000)),
65
+ ]).then(() => process.exit(code));
66
+ };
57
67
  process.once("beforeExit", () => finish(TraceStatus.COMPLETED));
58
- process.once("SIGINT", () => { finish(TraceStatus.COMPLETED); process.exit(130); });
59
- process.once("SIGTERM", () => { finish(TraceStatus.COMPLETED); process.exit(143); });
60
- process.once("uncaughtException", (err) => { finish(TraceStatus.FAILED); console.error(err); process.exit(1); });
68
+ process.once("SIGINT", () => finishAndExit(TraceStatus.COMPLETED, 130));
69
+ process.once("SIGTERM", () => finishAndExit(TraceStatus.COMPLETED, 143));
70
+ process.once("uncaughtException", (err) => { console.error(err); finishAndExit(TraceStatus.FAILED, 1); });
61
71
  }
62
72
  return ambient;
63
73
  }
@@ -1,4 +1,6 @@
1
1
  import { SpanBuilder, SpanData, SpanType, TraceStatus } from "./trace.js";
2
+ /** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
3
+ export declare function flushSharedTransport(): Promise<void>;
2
4
  export interface RecordOptions {
3
5
  name?: string;
4
6
  input?: unknown;
package/dist/recorder.js CHANGED
@@ -9,14 +9,19 @@ import { installAnthropicInterceptor } from "./interceptors/anthropic.js";
9
9
  let sharedTransport = null;
10
10
  function getSharedTransport() {
11
11
  if (!sharedTransport) {
12
- sharedTransport = createTransport();
13
- // Flush pending data before process exits
12
+ sharedTransport = createTransport(getConfig().transport);
13
+ // Flush pending data before the process exits. Flushing (not just close) ensures the
14
+ // final trace is actually uploaded — close() alone can race the process exiting.
14
15
  if (typeof process !== "undefined") {
15
- process.on("beforeExit", () => { sharedTransport?.close(); });
16
+ process.on("beforeExit", () => { void flushSharedTransport(); });
16
17
  }
17
18
  }
18
19
  return sharedTransport;
19
20
  }
21
+ /** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
22
+ export async function flushSharedTransport() {
23
+ await sharedTransport?.flush();
24
+ }
20
25
  export class TraceRecorder {
21
26
  builder;
22
27
  transport;
package/dist/trace.js CHANGED
@@ -83,7 +83,12 @@ export class TraceBuilder {
83
83
  return this.data;
84
84
  }
85
85
  addSpan(span) {
86
- this.data.spans.push(span);
86
+ // Spans are streamed individually through the transport (and HTTPTransport keeps its own
87
+ // per-trace buffer for the batched POST), so this retained array is only an in-memory
88
+ // convenience and is never itself transmitted. Cap it so init()'s long-lived ambient
89
+ // trace can't accumulate spans for the life of the process (an unbounded memory leak).
90
+ if (this.data.spans.length < 1000)
91
+ this.data.spans.push(span);
87
92
  this.data.total_tokens += (span.input_tokens || 0) + (span.output_tokens || 0);
88
93
  this.data.total_cost += span.cost || 0;
89
94
  }
@@ -1,6 +1,8 @@
1
1
  export interface Transport {
2
2
  send(eventType: string, data: Record<string, unknown>): void;
3
3
  close(): void;
4
+ /** Drain in-flight data to the network (awaited on graceful shutdown). */
5
+ flush(): Promise<void>;
4
6
  }
5
7
  export declare class WSTransport implements Transport {
6
8
  private ws;
@@ -8,6 +10,7 @@ export declare class WSTransport implements Transport {
8
10
  private closed;
9
11
  private backoff;
10
12
  private queue;
13
+ private reconnectTimer;
11
14
  onError?: (type: string, message: string) => void;
12
15
  get isConnected(): boolean;
13
16
  connect(): void;
@@ -15,12 +18,15 @@ export declare class WSTransport implements Transport {
15
18
  private flushQueue;
16
19
  send(eventType: string, data: Record<string, unknown>): void;
17
20
  close(): void;
21
+ /** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
22
+ * the process before exit. Best-effort with a hard timeout. */
23
+ flush(): Promise<void>;
18
24
  }
19
25
  export declare class HTTPTransport implements Transport {
20
26
  private traceData;
21
27
  private spans;
22
28
  send(eventType: string, data: Record<string, unknown>): void;
23
- flush(): void;
29
+ flush(): Promise<void>;
24
30
  private buildSpans;
25
31
  close(): void;
26
32
  }
package/dist/transport.js CHANGED
@@ -6,6 +6,7 @@ export class WSTransport {
6
6
  closed = false;
7
7
  backoff = 1000;
8
8
  queue = [];
9
+ reconnectTimer = null;
9
10
  onError;
10
11
  get isConnected() { return this.connected; }
11
12
  connect() {
@@ -15,6 +16,10 @@ export class WSTransport {
15
16
  const url = `${cfg.wsUrl}/ws/v1/stream`;
16
17
  this.ws = new WebSocket(url);
17
18
  this.ws.on("open", () => {
19
+ // Unref the underlying socket so a short-lived script (the common SDK usage) can exit
20
+ // once its work is done instead of hanging on an open WebSocket. Graceful shutdown
21
+ // still drains via flush()/beforeExit.
22
+ this.ws?._socket?.unref?.();
18
23
  this.ws.send(JSON.stringify({ type: "auth", api_key: cfg.apiKey }));
19
24
  });
20
25
  this.ws.on("message", (raw) => {
@@ -60,7 +65,9 @@ export class WSTransport {
60
65
  this.connected = false;
61
66
  this.ws = null;
62
67
  if (!this.closed) {
63
- setTimeout(() => this.reconnect(), this.backoff * (0.5 + Math.random() * 0.5));
68
+ this.reconnectTimer = setTimeout(() => this.reconnect(), this.backoff * (0.5 + Math.random() * 0.5));
69
+ // Don't let the reconnect timer keep the event loop (and the process) alive.
70
+ this.reconnectTimer?.unref?.();
64
71
  this.backoff = Math.min(this.backoff * 2, 30000);
65
72
  }
66
73
  });
@@ -92,12 +99,24 @@ export class WSTransport {
92
99
  }
93
100
  close() {
94
101
  this.closed = true;
102
+ if (this.reconnectTimer) {
103
+ clearTimeout(this.reconnectTimer);
104
+ this.reconnectTimer = null;
105
+ }
95
106
  if (this.ws) {
96
107
  this.ws.close();
97
108
  this.ws = null;
98
109
  }
99
110
  this.connected = false;
100
111
  }
112
+ /** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
113
+ * the process before exit. Best-effort with a hard timeout. */
114
+ async flush() {
115
+ const start = Date.now();
116
+ while (this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.bufferedAmount > 0 && Date.now() - start < 2000) {
117
+ await new Promise((r) => setTimeout(r, 50));
118
+ }
119
+ }
101
120
  }
102
121
  export class HTTPTransport {
103
122
  traceData = null;
@@ -112,28 +131,47 @@ export class HTTPTransport {
112
131
  else if (eventType === "trace_ended") {
113
132
  if (this.traceData)
114
133
  Object.assign(this.traceData, data);
115
- this.flush();
134
+ void this.flush();
116
135
  }
117
136
  }
118
- flush() {
137
+ async flush() {
119
138
  if (!this.traceData)
120
139
  return;
121
140
  const cfg = getConfig();
122
141
  const url = `${cfg.baseUrl}/api/v1/traces`;
123
142
  const body = { ...this.traceData, spans: this.buildSpans() };
124
143
  const payload = JSON.stringify(body);
125
- // Retry up to 3 times with exponential backoff
126
- const attempt = (n, delay) => {
127
- fetch(url, {
128
- method: "POST",
129
- headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
130
- body: payload,
131
- }).catch(() => { if (n < 3)
132
- setTimeout(() => attempt(n + 1, delay * 2), delay); });
133
- };
134
- attempt(1, 1000);
144
+ // Clear first so a concurrent flush (e.g. trace_ended then shutdown drain) can't double-send.
135
145
  this.traceData = null;
136
146
  this.spans = [];
147
+ // Retry up to 3 times with exponential backoff; awaited so shutdown can drain it.
148
+ for (let n = 1; n <= 3; n++) {
149
+ try {
150
+ const res = await fetch(url, {
151
+ method: "POST",
152
+ headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
153
+ body: payload,
154
+ });
155
+ if (res.ok)
156
+ return;
157
+ const txt = await res.text().catch(() => "");
158
+ // 4xx (except 429) is a client/payload error that won't succeed on retry — surface
159
+ // it loudly and stop, rather than silently dropping the trace.
160
+ if (res.status < 500 && res.status !== 429) {
161
+ console.error(`[retrace] trace upload rejected (HTTP ${res.status}): ${txt.slice(0, 300)}`);
162
+ return;
163
+ }
164
+ // 5xx / 429 — transient; retry.
165
+ if (n === 3)
166
+ console.error(`[retrace] trace upload failed after ${n} attempts (HTTP ${res.status}): ${txt.slice(0, 200)}`);
167
+ }
168
+ catch (err) {
169
+ if (n === 3)
170
+ console.error(`[retrace] trace upload network error after ${n} attempts: ${err?.message ?? err}`);
171
+ }
172
+ if (n < 3)
173
+ await new Promise((r) => setTimeout(r, 1000 * n));
174
+ }
137
175
  }
138
176
  buildSpans() {
139
177
  const merged = new Map();
@@ -150,7 +188,7 @@ export class HTTPTransport {
150
188
  return [...merged.values()];
151
189
  }
152
190
  close() {
153
- this.flush();
191
+ void this.flush();
154
192
  }
155
193
  }
156
194
  export function createTransport(mode = "auto") {
@@ -214,5 +252,24 @@ export function createTransport(mode = "auto") {
214
252
  http.close();
215
253
  }
216
254
  },
255
+ async flush() {
256
+ if (!decided) {
257
+ // Never connected over WS — force the HTTP fallback and drain the buffer so the
258
+ // final trace isn't lost on shutdown.
259
+ decided = true;
260
+ useWs = false;
261
+ clearTimeout(fallbackTimer);
262
+ ws.close();
263
+ for (const item of buffer.splice(0))
264
+ http.send(item.eventType, item.data);
265
+ await http.flush();
266
+ }
267
+ else if (useWs) {
268
+ await ws.flush();
269
+ }
270
+ else {
271
+ await http.flush();
272
+ }
273
+ },
217
274
  };
218
275
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "retrace-sdk",
3
- "version": "0.9.0",
3
+ "version": "0.11.0",
4
4
  "description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",