npm - retrace-sdk - Versions diffs - 0.11.2 → 0.11.4 - Mend

retrace-sdk 0.11.2 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/interceptors/anthropic.js +44 -29
package/dist/interceptors/gemini.js +2 -2
package/dist/interceptors/openai.js +49 -30
package/dist/recorder.d.ts +4 -0
package/dist/recorder.js +11 -7
package/dist/replay.d.ts +3 -0
package/dist/resume.d.ts +3 -0
package/dist/resume.js +2 -0
package/dist/utils.d.ts +3 -0
package/dist/utils.js +10 -0
package/package.json +1 -1

package/dist/interceptors/anthropic.js CHANGED Viewed

@@ -1,7 +1,8 @@
 import { SpanType } from "../trace.js";
-import { genId, nowIso, truncateJson } from "../utils.js";
+import { genId, nowIso, truncateJson, wasTruncated } from "../utils.js";
 import { isReplaying, consumeCassetteEntry } from "../replay.js";
 import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
+import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
 const PRICING = {
     "claude-opus-4.7": [5.0, 25.0],
     "claude-opus-4.6": [5.0, 25.0],
@@ -22,15 +23,18 @@ function calcCost(model, inputTokens, outputTokens) {
 }
 let originalCreate = null;
 let installed = false;
+// Set SYNCHRONOUSLY before the async import() so a second concurrent install can't double-wrap the
+// prototype. (`installed` is set inside the .then() and is therefore too late to guard the race.)
+let installStarted = false;
 let onSpanCallback = null;
 export function installAnthropicInterceptor(onSpan) {
-    if (installed) {
-        onSpanCallback = onSpan;
-        resetToolResultDedup();
-        return;
-    }
+    // Always refresh the active callback; the prototype PATCH must happen at most once (a synchronous
+    // guard so two concurrent installs can't both patch and double-wrap create() → doubled spans).
     onSpanCallback = onSpan;
     resetToolResultDedup();
+    if (installStarted)
+        return;
+    installStarted = true;
     import("@anthropic-ai/sdk").then((anthropicMod) => {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any
         const mod = anthropicMod;
@@ -44,7 +48,7 @@ export function installAnthropicInterceptor(onSpan) {
         originalCreate = proto.create;
         proto.create = createPatchedCreate();
         installed = true;
-    }).catch(() => { });
+    }).catch(() => { installStarted = false; });
 }
 function createPatchedCreate() {
     return async function (...args) {
@@ -63,7 +67,7 @@ function createPatchedCreate() {
         if (isReplaying()) {
             const entry = consumeCassetteEntry("anthropic.messages.create", "llm_call");
             if (entry) {
-                const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
+                const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
                 const span = {
                     id: spanId, trace_id: "", parent_id: null,
                     span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
@@ -93,30 +97,41 @@ function createPatchedCreate() {
                 const toolAcc = {};
                 // eslint-disable-next-line @typescript-eslint/no-explicit-any
                 const originalIterator = result[Symbol.asyncIterator]();
+                // Two-phase capture: finalize EXACTLY ONCE on clean drain (complete), early break / error
+                // (partial), or trace-end/exit (partial, via the sink) — never silently drop the span.
+                let finalized = false;
+                const finalize = (reason) => {
+                    if (finalized)
+                        return;
+                    finalized = true;
+                    dispatchUnregisterOpenSpan(spanId);
+                    const durationMs = Date.now() - startMs;
+                    const output = chunks.join("");
+                    const span = {
+                        id: spanId, trace_id: "", parent_id: null,
+                        span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
+                        input: truncateJson({ messages: messages.slice(0, 10) }),
+                        output: truncateJson(output),
+                        input_tokens: inputTokens, output_tokens: outputTokens,
+                        cost: calcCost(model, inputTokens, outputTokens),
+                        duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
+                        metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...(wasTruncated(output) ? { truncated: true } : {}), ...spanMeta },
+                    };
+                    onSpanCallback?.(span);
+                    if (onSpanCallback && reason === "complete") {
+                        emitAnthropicToolResults(messages, onSpanCallback);
+                        const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
+                        emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
+                    }
+                };
+                dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
                 const wrappedStream = {
                     [Symbol.asyncIterator]() {
                         return {
                             async next() {
                                 const { value, done } = await originalIterator.next();
                                 if (done) {
-                                    const durationMs = Date.now() - startMs;
-                                    const output = chunks.join("");
-                                    const span = {
-                                        id: spanId, trace_id: "", parent_id: null,
-                                        span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
-                                        input: truncateJson({ messages: messages.slice(0, 10) }),
-                                        output: truncateJson(output),
-                                        input_tokens: inputTokens, output_tokens: outputTokens,
-                                        cost: calcCost(model, inputTokens, outputTokens),
-                                        duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
-                                        metadata: { streaming: true, ...spanMeta },
-                                    };
-                                    onSpanCallback?.(span);
-                                    if (onSpanCallback) {
-                                        emitAnthropicToolResults(messages, onSpanCallback);
-                                        const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
-                                        emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
-                                    }
+                                    finalize("complete");
                                     return { value: undefined, done: true };
                                 }
                                 // Collect content_block_delta text
@@ -142,8 +157,8 @@ function createPatchedCreate() {
                                 }
                                 return { value, done: false };
                             },
-                            return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
-                            throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
+                            return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
+                            throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
                         };
                     },
                 };
@@ -164,7 +179,7 @@ function createPatchedCreate() {
                 input_tokens: inputTokens, output_tokens: outputTokens,
                 cost: calcCost(model, inputTokens, outputTokens),
                 duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
-                ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
+                ...(Object.keys(spanMeta).length || wasTruncated(output) ? { metadata: { ...spanMeta, ...(wasTruncated(output) ? { truncated: true } : {}) } } : {}),
             };
             onSpanCallback?.(span);
             // Auto-capture tool usage (tool_use blocks in response, tool_result blocks in input).

package/dist/interceptors/gemini.js CHANGED Viewed

@@ -45,7 +45,7 @@ function wrapGenerate(original) {
         if (isReplaying()) {
             const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
             if (entry) {
-                return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
+                return { text: entry.output_raw ?? (entry.output || ""), usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
             }
         }
         try {
@@ -104,7 +104,7 @@ function wrapStream(original) {
         if (isReplaying()) {
             const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
             if (entry) {
-                const text = entry.output || "";
+                const text = entry.output_raw ?? (entry.output || "");
                 async function* mockStream() { yield { text, usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 } }; }
                 return mockStream();
             }

package/dist/interceptors/openai.js CHANGED Viewed

@@ -1,9 +1,10 @@
 import { SpanType } from "../trace.js";
-import { genId, nowIso, truncateJson } from "../utils.js";
+import { genId, nowIso, truncateJson, wasTruncated } from "../utils.js";
 import { isReplaying, consumeCassetteEntry } from "../replay.js";
 import { getConfig } from "../config.js";
 import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
 import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
+import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
 /** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
 const FALLBACK_PRICING = {
     "gpt-5.5-pro": [30.0, 180.0],
@@ -60,15 +61,19 @@ function calcCost(model, inputTokens, outputTokens) {
 }
 let originalCreate = null;
 let installed = false;
+// Set SYNCHRONOUSLY before the async import() so a second concurrent install can't double-wrap the
+// prototype. (`installed` is set inside the .then() and is therefore too late to guard the race.)
+let installStarted = false;
 let onSpanCallback = null;
 export function installOpenAIInterceptor(onSpan) {
-    if (installed) {
-        onSpanCallback = onSpan;
-        resetToolResultDedup();
-        return;
-    }
+    // Always refresh the active callback; the prototype PATCH must happen at most once. The guard is
+    // a synchronous flag set before import() so two concurrent installs (e.g. two recorders starting
+    // before "openai" resolves) can't both patch and double-wrap create() → doubled spans/billing.
     onSpanCallback = onSpan;
     resetToolResultDedup();
+    if (installStarted)
+        return;
+    installStarted = true;
     import("openai").then((openaiMod) => {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any
         const mod = openaiMod;
@@ -89,7 +94,7 @@ export function installOpenAIInterceptor(onSpan) {
         originalCreate = proto.create;
         proto.create = createPatchedCreate();
         installed = true;
-    }).catch(() => { });
+    }).catch(() => { installStarted = false; });
 }
 function createPatchedCreate() {
     return async function (...args) {
@@ -120,7 +125,7 @@ function createPatchedCreate() {
         if (isReplaying()) {
             const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
             if (entry) {
-                const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
+                const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
                 const span = {
                     id: spanId, trace_id: "", parent_id: null,
                     span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
@@ -151,31 +156,43 @@ function createPatchedCreate() {
                 const toolAcc = {};
                 // eslint-disable-next-line @typescript-eslint/no-explicit-any
                 const originalIterator = result[Symbol.asyncIterator]();
+                // Two-phase capture: register an OPEN span now and finalize EXACTLY ONCE — on clean drain
+                // (complete), on early break / error (partial), or at trace-end/exit (partial, via the sink).
+                // Previously the span was emitted only in the `done` branch, so an abandoned or errored
+                // stream silently lost its span entirely.
+                let finalized = false;
+                const finalize = (reason) => {
+                    if (finalized)
+                        return;
+                    finalized = true;
+                    dispatchUnregisterOpenSpan(spanId);
+                    const durationMs = Date.now() - startMs;
+                    const output = chunks.join("");
+                    const span = {
+                        id: spanId, trace_id: "", parent_id: null,
+                        span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
+                        input: truncateJson({ messages: messages.slice(0, 10) }),
+                        output: truncateJson(output),
+                        input_tokens: inputTokens, output_tokens: outputTokens,
+                        cost: calcCost(model, inputTokens, outputTokens),
+                        duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
+                        metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...(wasTruncated(output) ? { truncated: true } : {}), ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
+                    };
+                    onSpanCallback?.(span);
+                    if (onSpanCallback && reason === "complete") {
+                        emitOpenAIToolResults(messages, onSpanCallback);
+                        const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
+                        emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
+                    }
+                };
+                dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
                 const wrappedStream = {
                     [Symbol.asyncIterator]() {
                         return {
                             async next() {
                                 const { value, done } = await originalIterator.next();
                                 if (done) {
-                                    // Stream complete — emit span
-                                    const durationMs = Date.now() - startMs;
-                                    const output = chunks.join("");
-                                    const span = {
-                                        id: spanId, trace_id: "", parent_id: null,
-                                        span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
-                                        input: truncateJson({ messages: messages.slice(0, 10) }),
-                                        output: truncateJson(output),
-                                        input_tokens: inputTokens, output_tokens: outputTokens,
-                                        cost: calcCost(model, inputTokens, outputTokens),
-                                        duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
-                                        metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
-                                    };
-                                    onSpanCallback?.(span);
-                                    if (onSpanCallback) {
-                                        emitOpenAIToolResults(messages, onSpanCallback);
-                                        const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
-                                        emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
-                                    }
+                                    finalize("complete");
                                     return { value: undefined, done: true };
                                 }
                                 // Collect content delta
@@ -203,8 +220,10 @@ function createPatchedCreate() {
                                 }
                                 return { value, done: false };
                             },
-                            return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
-                            throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
+                            // Early break (consumer stops iterating) and errors must still finalize the span —
+                            // otherwise the streamed work is silently lost.
+                            return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
+                            throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
                         };
                     },
                     // Preserve tee/controller methods if present
@@ -235,7 +254,7 @@ function createPatchedCreate() {
                 duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
                 ...(tokenIds?.length ? { token_ids: tokenIds } : {}),
                 ...(logprobValues?.length ? { logprobs: logprobValues } : {}),
-                ...(Object.keys(spanMetadata).length ? { metadata: spanMetadata } : {}),
+                ...(Object.keys(spanMetadata).length || wasTruncated(output) ? { metadata: { ...spanMetadata, ...(wasTruncated(output) ? { truncated: true } : {}) } } : {}),
             };
             onSpanCallback?.(span);
             // Auto-capture tool usage: tool_result spans from the fed-back tool messages (deduped),

package/dist/recorder.d.ts CHANGED Viewed

@@ -10,6 +10,9 @@ export interface RecordOptions {
     sessionId?: string;
     /** When set, spans emitted before this span ID is encountered are suppressed (pre-fork filtering). */
     forkPointSpanId?: string;
+    /** 0-based ordinal of the fork-point span among the original ordered spans. Suppression is
+     *  positional: spans with counter <= index are suppressed, emission starts at index+1. */
+    forkPointIndex?: number;
 }
 export declare class TraceRecorder {
     private builder;
@@ -19,6 +22,7 @@ export declare class TraceRecorder {
     private prevFallback;
     private prevFallbackSink;
     private forkPointSpanId;
+    private forkPointIndex;
     private forkPointReached;
     private spanCounter;
     output: unknown;

package/dist/recorder.js CHANGED Viewed

@@ -55,6 +55,7 @@ export class TraceRecorder {
     prevFallback = null;
     prevFallbackSink = null;
     forkPointSpanId;
+    forkPointIndex;
     forkPointReached = false;
     spanCounter = 0;
     output = undefined;
@@ -63,8 +64,10 @@ export class TraceRecorder {
         this.builder = new TraceBuilder();
         this.transport = getSharedTransport();
         this.forkPointSpanId = opts?.forkPointSpanId;
-        // If no fork point specified, all spans pass through
-        this.forkPointReached = !opts?.forkPointSpanId;
+        this.forkPointIndex = opts?.forkPointIndex;
+        // Suppress pre-fork spans only when BOTH a fork point and its positional index are known;
+        // otherwise (normal recording, or a fork command without an index) emit everything.
+        this.forkPointReached = !opts?.forkPointSpanId || opts?.forkPointIndex === undefined;
         const cfg = getConfig();
         if (cfg.projectId)
             this.builder.setProjectId(cfg.projectId);
@@ -160,12 +163,13 @@ export class TraceRecorder {
     }
     addSpan(span) {
         this.spanCounter++;
-        // Fork point filtering: skip spans until the fork point is reached.
-        // The server copies pre-fork spans; the SDK only emits from fork point onward.
+        // Fork-point filtering: during cascade replay suppress the pre-fork spans (the server already
+        // has them / they replay from the cassette) and emit only from the fork point onward. The fork
+        // point is the (forkPointIndex)-th span (0-based), i.e. the (index+1)-th counted here, so
+        // suppress while spanCounter <= index and emit once spanCounter > index. (Previously this
+        // compared spanCounter >= 1, which is always true after the increment ⇒ zero suppression.)
         if (!this.forkPointReached) {
-            if (this.forkPointSpanId && this.spanCounter >= 1) {
-                // Use span counter as proxy — the Nth span corresponds to the fork point index.
-                // Mark as reached so all subsequent spans pass through.
+            if (this.forkPointIndex !== undefined && this.spanCounter > this.forkPointIndex) {
                 this.forkPointReached = true;
             }
             else {

package/dist/replay.d.ts CHANGED Viewed

@@ -16,6 +16,9 @@ export interface CassetteEntry {
     model: string | null;
     input: unknown;
     output: unknown;
+    /** Exact original token for byte-identical replay (e.g. scalar-numeric outputs whose JSON.parse
+     *  form lost precision). Preferred over `output` when present. */
+    output_raw?: string;
     error: string | null;
 }
 export interface ReplayCommand {

package/dist/resume.d.ts CHANGED Viewed

@@ -12,6 +12,9 @@ export interface ResumeCommand {
     traceId: string;
     traceName: string;
     forkPointSpanId: string;
+    /** 0-based ordinal of the fork-point span among the original ordered spans. Pre-fork spans
+     *  (counter <= index) are suppressed on re-exec; the server already has them. */
+    forkPointIndex?: number;
     modifiedInput: unknown;
     originalArgs?: unknown[];
 }

package/dist/resume.js CHANGED Viewed

@@ -32,6 +32,7 @@ export async function handleResume(command) {
                 _cascade_replay: true,
             },
             forkPointSpanId: command.forkPointSpanId,
+            forkPointIndex: command.forkPointIndex,
         });
         recorder.start(`Fork: ${command.traceName}`, command.modifiedInput);
         // Determine args for re-execution
@@ -59,6 +60,7 @@ export function parseResumeMessage(msg) {
         traceId: msg.data.traceId,
         traceName: msg.data.traceName,
         forkPointSpanId: msg.data.forkPointSpanId,
+        forkPointIndex: msg.data.forkPointIndex,
         modifiedInput: msg.data.modifiedInput,
         originalArgs: msg.data.originalArgs,
     };

package/dist/utils.d.ts CHANGED Viewed

@@ -8,6 +8,9 @@ export declare function utcNow(): Date;
  */
 export declare function shouldSample(rate: number, seed?: string, key?: string): boolean;
 export declare function truncateJson(obj: unknown, maxBytes?: number): unknown;
+/** True if truncateJson(obj, maxBytes) would drop bytes. Used to flag a span's output as truncated
+ *  so the server refuses to byte-replay it (the replayed value would differ from the original). */
+export declare function wasTruncated(obj: unknown, maxBytes?: number): boolean;
 /** Configure per-span-type truncation limits. */
 export declare function setTruncationLimits(limits: Record<string, number>): void;
 /** Get the truncation limit for a given span type. */

package/dist/utils.js CHANGED Viewed

@@ -40,6 +40,16 @@ export function truncateJson(obj, maxBytes = 10240) {
         return String(obj).slice(0, maxBytes);
     }
 }
+/** True if truncateJson(obj, maxBytes) would drop bytes. Used to flag a span's output as truncated
+ *  so the server refuses to byte-replay it (the replayed value would differ from the original). */
+export function wasTruncated(obj, maxBytes = 10240) {
+    try {
+        return Buffer.byteLength(JSON.stringify(obj)) > maxBytes;
+    }
+    catch {
+        return String(obj).length > maxBytes;
+    }
+}
 /** Default per-span-type truncation limits (bytes). */
 const DEFAULT_TRUNCATION_LIMITS = {
     llm_call: 51200, // 50KB — LLM prompts can be large

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "retrace-sdk",
-  "version": "0.11.2",
+  "version": "0.11.4",
   "description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
   "type": "module",
   "main": "dist/index.js",