retrace-sdk 0.11.2 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  import { SpanType } from "../trace.js";
2
- import { genId, nowIso, truncateJson } from "../utils.js";
2
+ import { genId, nowIso, truncateJson, wasTruncated } from "../utils.js";
3
3
  import { isReplaying, consumeCassetteEntry } from "../replay.js";
4
4
  import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
5
+ import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
5
6
  const PRICING = {
6
7
  "claude-opus-4.7": [5.0, 25.0],
7
8
  "claude-opus-4.6": [5.0, 25.0],
@@ -22,15 +23,18 @@ function calcCost(model, inputTokens, outputTokens) {
22
23
  }
23
24
  let originalCreate = null;
24
25
  let installed = false;
26
+ // Set SYNCHRONOUSLY before the async import() so a second concurrent install can't double-wrap the
27
+ // prototype. (`installed` is set inside the .then() and is therefore too late to guard the race.)
28
+ let installStarted = false;
25
29
  let onSpanCallback = null;
26
30
  export function installAnthropicInterceptor(onSpan) {
27
- if (installed) {
28
- onSpanCallback = onSpan;
29
- resetToolResultDedup();
30
- return;
31
- }
31
+ // Always refresh the active callback; the prototype PATCH must happen at most once (a synchronous
32
+ // guard so two concurrent installs can't both patch and double-wrap create() → doubled spans).
32
33
  onSpanCallback = onSpan;
33
34
  resetToolResultDedup();
35
+ if (installStarted)
36
+ return;
37
+ installStarted = true;
34
38
  import("@anthropic-ai/sdk").then((anthropicMod) => {
35
39
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
36
40
  const mod = anthropicMod;
@@ -44,7 +48,7 @@ export function installAnthropicInterceptor(onSpan) {
44
48
  originalCreate = proto.create;
45
49
  proto.create = createPatchedCreate();
46
50
  installed = true;
47
- }).catch(() => { });
51
+ }).catch(() => { installStarted = false; });
48
52
  }
49
53
  function createPatchedCreate() {
50
54
  return async function (...args) {
@@ -63,7 +67,7 @@ function createPatchedCreate() {
63
67
  if (isReplaying()) {
64
68
  const entry = consumeCassetteEntry("anthropic.messages.create", "llm_call");
65
69
  if (entry) {
66
- const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
70
+ const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
67
71
  const span = {
68
72
  id: spanId, trace_id: "", parent_id: null,
69
73
  span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
@@ -93,30 +97,41 @@ function createPatchedCreate() {
93
97
  const toolAcc = {};
94
98
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
95
99
  const originalIterator = result[Symbol.asyncIterator]();
100
+ // Two-phase capture: finalize EXACTLY ONCE on clean drain (complete), early break / error
101
+ // (partial), or trace-end/exit (partial, via the sink) — never silently drop the span.
102
+ let finalized = false;
103
+ const finalize = (reason) => {
104
+ if (finalized)
105
+ return;
106
+ finalized = true;
107
+ dispatchUnregisterOpenSpan(spanId);
108
+ const durationMs = Date.now() - startMs;
109
+ const output = chunks.join("");
110
+ const span = {
111
+ id: spanId, trace_id: "", parent_id: null,
112
+ span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
113
+ input: truncateJson({ messages: messages.slice(0, 10) }),
114
+ output: truncateJson(output),
115
+ input_tokens: inputTokens, output_tokens: outputTokens,
116
+ cost: calcCost(model, inputTokens, outputTokens),
117
+ duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
118
+ metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...(wasTruncated(output) ? { truncated: true } : {}), ...spanMeta },
119
+ };
120
+ onSpanCallback?.(span);
121
+ if (onSpanCallback && reason === "complete") {
122
+ emitAnthropicToolResults(messages, onSpanCallback);
123
+ const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
124
+ emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
125
+ }
126
+ };
127
+ dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
96
128
  const wrappedStream = {
97
129
  [Symbol.asyncIterator]() {
98
130
  return {
99
131
  async next() {
100
132
  const { value, done } = await originalIterator.next();
101
133
  if (done) {
102
- const durationMs = Date.now() - startMs;
103
- const output = chunks.join("");
104
- const span = {
105
- id: spanId, trace_id: "", parent_id: null,
106
- span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
107
- input: truncateJson({ messages: messages.slice(0, 10) }),
108
- output: truncateJson(output),
109
- input_tokens: inputTokens, output_tokens: outputTokens,
110
- cost: calcCost(model, inputTokens, outputTokens),
111
- duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
112
- metadata: { streaming: true, ...spanMeta },
113
- };
114
- onSpanCallback?.(span);
115
- if (onSpanCallback) {
116
- emitAnthropicToolResults(messages, onSpanCallback);
117
- const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
118
- emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
119
- }
134
+ finalize("complete");
120
135
  return { value: undefined, done: true };
121
136
  }
122
137
  // Collect content_block_delta text
@@ -142,8 +157,8 @@ function createPatchedCreate() {
142
157
  }
143
158
  return { value, done: false };
144
159
  },
145
- return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
146
- throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
160
+ return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
161
+ throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
147
162
  };
148
163
  },
149
164
  };
@@ -164,7 +179,7 @@ function createPatchedCreate() {
164
179
  input_tokens: inputTokens, output_tokens: outputTokens,
165
180
  cost: calcCost(model, inputTokens, outputTokens),
166
181
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
167
- ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
182
+ ...(Object.keys(spanMeta).length || wasTruncated(output) ? { metadata: { ...spanMeta, ...(wasTruncated(output) ? { truncated: true } : {}) } } : {}),
168
183
  };
169
184
  onSpanCallback?.(span);
170
185
  // Auto-capture tool usage (tool_use blocks in response, tool_result blocks in input).
@@ -45,7 +45,7 @@ function wrapGenerate(original) {
45
45
  if (isReplaying()) {
46
46
  const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
47
47
  if (entry) {
48
- return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
48
+ return { text: entry.output_raw ?? (entry.output || ""), usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
49
49
  }
50
50
  }
51
51
  try {
@@ -104,7 +104,7 @@ function wrapStream(original) {
104
104
  if (isReplaying()) {
105
105
  const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
106
106
  if (entry) {
107
- const text = entry.output || "";
107
+ const text = entry.output_raw ?? (entry.output || "");
108
108
  async function* mockStream() { yield { text, usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 } }; }
109
109
  return mockStream();
110
110
  }
@@ -1,9 +1,10 @@
1
1
  import { SpanType } from "../trace.js";
2
- import { genId, nowIso, truncateJson } from "../utils.js";
2
+ import { genId, nowIso, truncateJson, wasTruncated } from "../utils.js";
3
3
  import { isReplaying, consumeCassetteEntry } from "../replay.js";
4
4
  import { getConfig } from "../config.js";
5
5
  import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
6
6
  import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
7
+ import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
7
8
  /** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
8
9
  const FALLBACK_PRICING = {
9
10
  "gpt-5.5-pro": [30.0, 180.0],
@@ -60,15 +61,19 @@ function calcCost(model, inputTokens, outputTokens) {
60
61
  }
61
62
  let originalCreate = null;
62
63
  let installed = false;
64
+ // Set SYNCHRONOUSLY before the async import() so a second concurrent install can't double-wrap the
65
+ // prototype. (`installed` is set inside the .then() and is therefore too late to guard the race.)
66
+ let installStarted = false;
63
67
  let onSpanCallback = null;
64
68
  export function installOpenAIInterceptor(onSpan) {
65
- if (installed) {
66
- onSpanCallback = onSpan;
67
- resetToolResultDedup();
68
- return;
69
- }
69
+ // Always refresh the active callback; the prototype PATCH must happen at most once. The guard is
70
+ // a synchronous flag set before import() so two concurrent installs (e.g. two recorders starting
71
+ // before "openai" resolves) can't both patch and double-wrap create() → doubled spans/billing.
70
72
  onSpanCallback = onSpan;
71
73
  resetToolResultDedup();
74
+ if (installStarted)
75
+ return;
76
+ installStarted = true;
72
77
  import("openai").then((openaiMod) => {
73
78
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
74
79
  const mod = openaiMod;
@@ -89,7 +94,7 @@ export function installOpenAIInterceptor(onSpan) {
89
94
  originalCreate = proto.create;
90
95
  proto.create = createPatchedCreate();
91
96
  installed = true;
92
- }).catch(() => { });
97
+ }).catch(() => { installStarted = false; });
93
98
  }
94
99
  function createPatchedCreate() {
95
100
  return async function (...args) {
@@ -120,7 +125,7 @@ function createPatchedCreate() {
120
125
  if (isReplaying()) {
121
126
  const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
122
127
  if (entry) {
123
- const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
128
+ const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
124
129
  const span = {
125
130
  id: spanId, trace_id: "", parent_id: null,
126
131
  span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
@@ -151,31 +156,43 @@ function createPatchedCreate() {
151
156
  const toolAcc = {};
152
157
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
153
158
  const originalIterator = result[Symbol.asyncIterator]();
159
+ // Two-phase capture: register an OPEN span now and finalize EXACTLY ONCE — on clean drain
160
+ // (complete), on early break / error (partial), or at trace-end/exit (partial, via the sink).
161
+ // Previously the span was emitted only in the `done` branch, so an abandoned or errored
162
+ // stream silently lost its span entirely.
163
+ let finalized = false;
164
+ const finalize = (reason) => {
165
+ if (finalized)
166
+ return;
167
+ finalized = true;
168
+ dispatchUnregisterOpenSpan(spanId);
169
+ const durationMs = Date.now() - startMs;
170
+ const output = chunks.join("");
171
+ const span = {
172
+ id: spanId, trace_id: "", parent_id: null,
173
+ span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
174
+ input: truncateJson({ messages: messages.slice(0, 10) }),
175
+ output: truncateJson(output),
176
+ input_tokens: inputTokens, output_tokens: outputTokens,
177
+ cost: calcCost(model, inputTokens, outputTokens),
178
+ duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
179
+ metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...(wasTruncated(output) ? { truncated: true } : {}), ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
180
+ };
181
+ onSpanCallback?.(span);
182
+ if (onSpanCallback && reason === "complete") {
183
+ emitOpenAIToolResults(messages, onSpanCallback);
184
+ const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
185
+ emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
186
+ }
187
+ };
188
+ dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
154
189
  const wrappedStream = {
155
190
  [Symbol.asyncIterator]() {
156
191
  return {
157
192
  async next() {
158
193
  const { value, done } = await originalIterator.next();
159
194
  if (done) {
160
- // Stream complete — emit span
161
- const durationMs = Date.now() - startMs;
162
- const output = chunks.join("");
163
- const span = {
164
- id: spanId, trace_id: "", parent_id: null,
165
- span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
166
- input: truncateJson({ messages: messages.slice(0, 10) }),
167
- output: truncateJson(output),
168
- input_tokens: inputTokens, output_tokens: outputTokens,
169
- cost: calcCost(model, inputTokens, outputTokens),
170
- duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
171
- metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
172
- };
173
- onSpanCallback?.(span);
174
- if (onSpanCallback) {
175
- emitOpenAIToolResults(messages, onSpanCallback);
176
- const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
177
- emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
178
- }
195
+ finalize("complete");
179
196
  return { value: undefined, done: true };
180
197
  }
181
198
  // Collect content delta
@@ -203,8 +220,10 @@ function createPatchedCreate() {
203
220
  }
204
221
  return { value, done: false };
205
222
  },
206
- return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
207
- throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
223
+ // Early break (consumer stops iterating) and errors must still finalize the span
224
+ // otherwise the streamed work is silently lost.
225
+ return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
226
+ throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
208
227
  };
209
228
  },
210
229
  // Preserve tee/controller methods if present
@@ -235,7 +254,7 @@ function createPatchedCreate() {
235
254
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
236
255
  ...(tokenIds?.length ? { token_ids: tokenIds } : {}),
237
256
  ...(logprobValues?.length ? { logprobs: logprobValues } : {}),
238
- ...(Object.keys(spanMetadata).length ? { metadata: spanMetadata } : {}),
257
+ ...(Object.keys(spanMetadata).length || wasTruncated(output) ? { metadata: { ...spanMetadata, ...(wasTruncated(output) ? { truncated: true } : {}) } } : {}),
239
258
  };
240
259
  onSpanCallback?.(span);
241
260
  // Auto-capture tool usage: tool_result spans from the fed-back tool messages (deduped),
@@ -10,6 +10,9 @@ export interface RecordOptions {
10
10
  sessionId?: string;
11
11
  /** When set, spans emitted before this span ID is encountered are suppressed (pre-fork filtering). */
12
12
  forkPointSpanId?: string;
13
+ /** 0-based ordinal of the fork-point span among the original ordered spans. Suppression is
14
+ * positional: spans with counter <= index are suppressed, emission starts at index+1. */
15
+ forkPointIndex?: number;
13
16
  }
14
17
  export declare class TraceRecorder {
15
18
  private builder;
@@ -19,6 +22,7 @@ export declare class TraceRecorder {
19
22
  private prevFallback;
20
23
  private prevFallbackSink;
21
24
  private forkPointSpanId;
25
+ private forkPointIndex;
22
26
  private forkPointReached;
23
27
  private spanCounter;
24
28
  output: unknown;
package/dist/recorder.js CHANGED
@@ -55,6 +55,7 @@ export class TraceRecorder {
55
55
  prevFallback = null;
56
56
  prevFallbackSink = null;
57
57
  forkPointSpanId;
58
+ forkPointIndex;
58
59
  forkPointReached = false;
59
60
  spanCounter = 0;
60
61
  output = undefined;
@@ -63,8 +64,10 @@ export class TraceRecorder {
63
64
  this.builder = new TraceBuilder();
64
65
  this.transport = getSharedTransport();
65
66
  this.forkPointSpanId = opts?.forkPointSpanId;
66
- // If no fork point specified, all spans pass through
67
- this.forkPointReached = !opts?.forkPointSpanId;
67
+ this.forkPointIndex = opts?.forkPointIndex;
68
+ // Suppress pre-fork spans only when BOTH a fork point and its positional index are known;
69
+ // otherwise (normal recording, or a fork command without an index) emit everything.
70
+ this.forkPointReached = !opts?.forkPointSpanId || opts?.forkPointIndex === undefined;
68
71
  const cfg = getConfig();
69
72
  if (cfg.projectId)
70
73
  this.builder.setProjectId(cfg.projectId);
@@ -160,12 +163,13 @@ export class TraceRecorder {
160
163
  }
161
164
  addSpan(span) {
162
165
  this.spanCounter++;
163
- // Fork point filtering: skip spans until the fork point is reached.
164
- // The server copies pre-fork spans; the SDK only emits from fork point onward.
166
+ // Fork-point filtering: during cascade replay suppress the pre-fork spans (the server already
167
+ // has them / they replay from the cassette) and emit only from the fork point onward. The fork
168
+ // point is the (forkPointIndex)-th span (0-based), i.e. the (index+1)-th counted here, so
169
+ // suppress while spanCounter <= index and emit once spanCounter > index. (Previously this
170
+ // compared spanCounter >= 1, which is always true after the increment ⇒ zero suppression.)
165
171
  if (!this.forkPointReached) {
166
- if (this.forkPointSpanId && this.spanCounter >= 1) {
167
- // Use span counter as proxy — the Nth span corresponds to the fork point index.
168
- // Mark as reached so all subsequent spans pass through.
172
+ if (this.forkPointIndex !== undefined && this.spanCounter > this.forkPointIndex) {
169
173
  this.forkPointReached = true;
170
174
  }
171
175
  else {
package/dist/replay.d.ts CHANGED
@@ -16,6 +16,9 @@ export interface CassetteEntry {
16
16
  model: string | null;
17
17
  input: unknown;
18
18
  output: unknown;
19
+ /** Exact original token for byte-identical replay (e.g. scalar-numeric outputs whose JSON.parse
20
+ * form lost precision). Preferred over `output` when present. */
21
+ output_raw?: string;
19
22
  error: string | null;
20
23
  }
21
24
  export interface ReplayCommand {
package/dist/resume.d.ts CHANGED
@@ -12,6 +12,9 @@ export interface ResumeCommand {
12
12
  traceId: string;
13
13
  traceName: string;
14
14
  forkPointSpanId: string;
15
+ /** 0-based ordinal of the fork-point span among the original ordered spans. Pre-fork spans
16
+ * (counter <= index) are suppressed on re-exec; the server already has them. */
17
+ forkPointIndex?: number;
15
18
  modifiedInput: unknown;
16
19
  originalArgs?: unknown[];
17
20
  }
package/dist/resume.js CHANGED
@@ -32,6 +32,7 @@ export async function handleResume(command) {
32
32
  _cascade_replay: true,
33
33
  },
34
34
  forkPointSpanId: command.forkPointSpanId,
35
+ forkPointIndex: command.forkPointIndex,
35
36
  });
36
37
  recorder.start(`Fork: ${command.traceName}`, command.modifiedInput);
37
38
  // Determine args for re-execution
@@ -59,6 +60,7 @@ export function parseResumeMessage(msg) {
59
60
  traceId: msg.data.traceId,
60
61
  traceName: msg.data.traceName,
61
62
  forkPointSpanId: msg.data.forkPointSpanId,
63
+ forkPointIndex: msg.data.forkPointIndex,
62
64
  modifiedInput: msg.data.modifiedInput,
63
65
  originalArgs: msg.data.originalArgs,
64
66
  };
package/dist/utils.d.ts CHANGED
@@ -8,6 +8,9 @@ export declare function utcNow(): Date;
8
8
  */
9
9
  export declare function shouldSample(rate: number, seed?: string, key?: string): boolean;
10
10
  export declare function truncateJson(obj: unknown, maxBytes?: number): unknown;
11
+ /** True if truncateJson(obj, maxBytes) would drop bytes. Used to flag a span's output as truncated
12
+ * so the server refuses to byte-replay it (the replayed value would differ from the original). */
13
+ export declare function wasTruncated(obj: unknown, maxBytes?: number): boolean;
11
14
  /** Configure per-span-type truncation limits. */
12
15
  export declare function setTruncationLimits(limits: Record<string, number>): void;
13
16
  /** Get the truncation limit for a given span type. */
package/dist/utils.js CHANGED
@@ -40,6 +40,16 @@ export function truncateJson(obj, maxBytes = 10240) {
40
40
  return String(obj).slice(0, maxBytes);
41
41
  }
42
42
  }
43
+ /** True if truncateJson(obj, maxBytes) would drop bytes. Used to flag a span's output as truncated
44
+ * so the server refuses to byte-replay it (the replayed value would differ from the original). */
45
+ export function wasTruncated(obj, maxBytes = 10240) {
46
+ try {
47
+ return Buffer.byteLength(JSON.stringify(obj)) > maxBytes;
48
+ }
49
+ catch {
50
+ return String(obj).length > maxBytes;
51
+ }
52
+ }
43
53
  /** Default per-span-type truncation limits (bytes). */
44
54
  const DEFAULT_TRUNCATION_LIMITS = {
45
55
  llm_call: 51200, // 50KB — LLM prompts can be large
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "retrace-sdk",
3
- "version": "0.11.2",
3
+ "version": "0.11.4",
4
4
  "description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",