npm - retrace-sdk - Versions diffs - 0.6.0 → 0.10.0 - Mend

retrace-sdk 0.6.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/adapters/index.d.ts +3 -0
package/dist/adapters/index.js +2 -0
package/dist/adapters/langchain.d.ts +17 -0
package/dist/adapters/langchain.js +111 -0
package/dist/adapters/vercel-ai.d.ts +43 -0
package/dist/adapters/vercel-ai.js +32 -0
package/dist/golden.d.ts +6 -0
package/dist/golden.js +17 -0
package/dist/index.d.ts +6 -0
package/dist/index.js +5 -0
package/dist/init.d.ts +30 -0
package/dist/init.js +83 -0
package/dist/interceptors/anthropic.js +29 -1
package/dist/interceptors/gemini.js +12 -0
package/dist/interceptors/openai.js +38 -1
package/dist/interceptors/tool-spans.d.ts +66 -0
package/dist/interceptors/tool-spans.js +213 -0
package/dist/recorder.d.ts +2 -0
package/dist/recorder.js +4 -0
package/dist/trace.js +6 -1
package/dist/transport.d.ts +6 -1
package/dist/transport.js +46 -13
package/package.json +14 -2

package/dist/adapters/index.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export { createLangChainHandler } from "./langchain.js";
+export { retraceOnStepFinish, recordVercelStep } from "./vercel-ai.js";
+export type { AISDKStep } from "./vercel-ai.js";

package/dist/adapters/index.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export { createLangChainHandler } from "./langchain.js";
2	+ export { retraceOnStepFinish, recordVercelStep } from "./vercel-ai.js";

package/dist/adapters/langchain.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import type { TraceRecorder } from "../recorder.js";
+/**
+ * LangChain / LangGraph adapter for Retrace (JS).
+ *
+ * Returns a LangChain `BaseCallbackHandler` that records STRUCTURED tool / retriever / chain spans
+ * into the active Retrace trace, aligned with the detectors. LLM spans are already captured by the
+ * provider interceptors, so this handler does not emit `llm_call` spans.
+ *
+ * ```ts
+ * import { init } from "retrace-sdk";
+ * import { createLangChainHandler } from "retrace-sdk/adapters/langchain";
+ * init();
+ * const cb = await createLangChainHandler();
+ * await app.invoke(input, { callbacks: [cb] });   // works for LangChain + LangGraph
+ * ```
+ */
+export declare function createLangChainHandler(recorder?: TraceRecorder): Promise<object>;

package/dist/adapters/langchain.js ADDED Viewed

@@ -0,0 +1,111 @@
+import { SpanType } from "../trace.js";
+import { getActiveRecorder } from "../init.js";
+/**
+ * LangChain / LangGraph adapter for Retrace (JS).
+ *
+ * Returns a LangChain `BaseCallbackHandler` that records STRUCTURED tool / retriever / chain spans
+ * into the active Retrace trace, aligned with the detectors. LLM spans are already captured by the
+ * provider interceptors, so this handler does not emit `llm_call` spans.
+ *
+ * ```ts
+ * import { init } from "retrace-sdk";
+ * import { createLangChainHandler } from "retrace-sdk/adapters/langchain";
+ * init();
+ * const cb = await createLangChainHandler();
+ * await app.invoke(input, { callbacks: [cb] });   // works for LangChain + LangGraph
+ * ```
+ */
+export async function createLangChainHandler(recorder) {
+    let BaseCallbackHandler;
+    try {
+        ({ BaseCallbackHandler } = await import("@langchain/core/callbacks/base"));
+    }
+    catch {
+        throw new Error("@langchain/core is not installed. Run: npm install @langchain/core");
+    }
+    const open = new Map();
+    const rec = () => recorder ?? getActiveRecorder();
+    class RetraceHandler extends BaseCallbackHandler {
+        name = "retrace";
+        handleToolStart(tool, input, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            open.set(runId, r.startSpan(tool?.name || "tool", SpanType.TOOL_CALL, input));
+        }
+        handleToolEnd(output, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            const sb = open.get(runId);
+            if (sb) {
+                r.endSpan(sb, output);
+                open.delete(runId);
+            }
+            const tr = r.startSpan("tool_result", SpanType.TOOL_RESULT);
+            r.endSpan(tr, output);
+        }
+        handleToolError(err, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            const sb = open.get(runId);
+            if (sb) {
+                r.endSpan(sb, undefined, err instanceof Error ? err.message : String(err));
+                open.delete(runId);
+            }
+        }
+        handleRetrieverStart(_retriever, query, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            open.set(runId, r.startSpan("retrieval", SpanType.ACTION, query));
+        }
+        handleRetrieverEnd(documents, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            const sb = open.get(runId);
+            if (sb) {
+                const docs = (documents || []).map((d) => d?.pageContent ?? JSON.stringify(d));
+                r.endSpan(sb, { count: docs.length, documents: docs });
+                open.delete(runId);
+            }
+        }
+        handleChainStart(chain, inputs, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            const name = chain?.name || chain?.id?.[chain.id.length - 1] || "chain";
+            open.set(runId, r.startSpan(String(name), SpanType.REASONING, inputs));
+        }
+        handleChainEnd(outputs, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            const sb = open.get(runId);
+            if (sb) {
+                r.endSpan(sb, outputs);
+                open.delete(runId);
+            }
+        }
+        handleChainError(err, runId) {
+            const r = rec();
+            if (!r)
+                return;
+            const sb = open.get(runId);
+            if (sb) {
+                r.endSpan(sb, undefined, err instanceof Error ? err.message : String(err));
+                open.delete(runId);
+            }
+        }
+        handleAgentAction(action) {
+            const r = rec();
+            if (!r)
+                return;
+            const sb = r.startSpan(String(action?.tool || "action"), SpanType.TOOL_CALL, action?.toolInput);
+            r.endSpan(sb, action?.log);
+        }
+    }
+    return new RetraceHandler();
+}

package/dist/adapters/vercel-ai.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import type { TraceRecorder } from "../recorder.js";
+/**
+ * Vercel AI SDK adapter for Retrace.
+ *
+ * The AI SDK talks to providers through its own `@ai-sdk/*` packages (not the raw OpenAI/Anthropic
+ * SDKs), so the provider interceptors don't see those calls. This adapter records the LLM step plus
+ * structured `tool_call` / `tool_result` spans from the AI SDK's per-step callback, aligned with the
+ * detectors. Drop it into `generateText` / `streamText`:
+ *
+ * ```ts
+ * import { init } from "retrace-sdk";
+ * import { retraceOnStepFinish } from "retrace-sdk/adapters/vercel-ai";
+ * init();
+ * await generateText({ model, prompt, tools, onStepFinish: retraceOnStepFinish() });
+ * ```
+ */
+interface AISDKToolCall {
+    toolName?: string;
+    toolCallId?: string;
+    args?: unknown;
+}
+interface AISDKToolResult {
+    toolName?: string;
+    toolCallId?: string;
+    result?: unknown;
+}
+interface AISDKUsage {
+    promptTokens?: number;
+    completionTokens?: number;
+    totalTokens?: number;
+}
+export interface AISDKStep {
+    text?: string;
+    toolCalls?: AISDKToolCall[];
+    toolResults?: AISDKToolResult[];
+    usage?: AISDKUsage;
+    finishReason?: string;
+}
+/** Record one AI SDK step (LLM output + tool calls/results) into the trace. */
+export declare function recordVercelStep(step: AISDKStep, recorder?: TraceRecorder): void;
+/** Returns an `onStepFinish` callback for `generateText` / `streamText`. */
+export declare function retraceOnStepFinish(recorder?: TraceRecorder): (step: AISDKStep) => void;
+export {};

package/dist/adapters/vercel-ai.js ADDED Viewed

@@ -0,0 +1,32 @@
+import { SpanType } from "../trace.js";
+import { genId, nowIso } from "../utils.js";
+import { getActiveRecorder } from "../init.js";
+/** Record one AI SDK step (LLM output + tool calls/results) into the trace. */
+export function recordVercelStep(step, recorder) {
+    const rec = recorder ?? getActiveRecorder();
+    if (!rec)
+        return;
+    // LLM step — the AI SDK call the provider interceptors don't see; emit with token usage.
+    const now = nowIso();
+    const llm = {
+        id: genId(), trace_id: "", parent_id: null,
+        span_type: SpanType.LLM_CALL, name: "ai.generate",
+        output: step.text ?? "",
+        input_tokens: step.usage?.promptTokens,
+        output_tokens: step.usage?.completionTokens,
+        started_at: now, ended_at: now,
+    };
+    rec.addSpan(llm);
+    for (const call of step.toolCalls ?? []) {
+        const sb = rec.startSpan(call.toolName || "tool", SpanType.TOOL_CALL, call.args);
+        rec.endSpan(sb, undefined);
+    }
+    for (const res of step.toolResults ?? []) {
+        const sb = rec.startSpan(res.toolName || "tool_result", SpanType.TOOL_RESULT, undefined);
+        rec.endSpan(sb, res.result);
+    }
+}
+/** Returns an `onStepFinish` callback for `generateText` / `streamText`. */
+export function retraceOnStepFinish(recorder) {
+    return (step) => recordVercelStep(step, recorder);
+}

package/dist/golden.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+/**
+ * Mark (or unmark) a recorded trace as a GOLDEN regression baseline (Phase 2E).
+ * Golden traces are the reference for regression replay — `compareToGolden` flags structural
+ * divergence in later runs as a regression.
+ */
+export declare function markGolden(traceId: string, golden?: boolean): Promise<void>;

package/dist/golden.js ADDED Viewed

@@ -0,0 +1,17 @@
+import { getConfig, requireApiKey } from "./config.js";
+/**
+ * Mark (or unmark) a recorded trace as a GOLDEN regression baseline (Phase 2E).
+ * Golden traces are the reference for regression replay — `compareToGolden` flags structural
+ * divergence in later runs as a regression.
+ */
+export async function markGolden(traceId, golden = true) {
+    requireApiKey();
+    const cfg = getConfig();
+    const res = await fetch(`${cfg.baseUrl}/api/v1/traces/${traceId}/golden`, {
+        method: "POST",
+        headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
+        body: JSON.stringify({ golden }),
+    });
+    if (!res.ok)
+        throw new Error(`markGolden failed: HTTP ${res.status}`);
+}

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 export { configure, getConfig } from "./config.js";
+export { init, getActiveRecorder, shutdown } from "./init.js";
+export type { InitOptions } from "./init.js";
 export { record, trace, TraceRecorder } from "./recorder.js";
 export { SpanBuilder, TraceBuilder } from "./trace.js";
 export type { SpanData, TraceData } from "./trace.js";
@@ -12,3 +14,7 @@ export type { ResumeCommand } from "./resume.js";
 export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
 export type { CassetteEntry, ReplayCommand } from "./replay.js";
 export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
+export { markGolden } from "./golden.js";
+export { createLangChainHandler } from "./adapters/langchain.js";
+export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
+export type { AISDKStep } from "./adapters/vercel-ai.js";

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,5 @@
 export { configure, getConfig } from "./config.js";
+export { init, getActiveRecorder, shutdown } from "./init.js";
 export { record, trace, TraceRecorder } from "./recorder.js";
 export { SpanBuilder, TraceBuilder } from "./trace.js";
 export { SpanType, TraceStatus } from "./trace.js";
@@ -9,5 +10,9 @@ export { RetraceError, RetraceAuthError, RetraceCreditsExhaustedError, RetraceCo
 export { registerResumable, handleResume } from "./resume.js";
 export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
 export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
+export { markGolden } from "./golden.js";
+// Framework adapters (5B) — drop-in instrumentation for LangChain/LangGraph + Vercel AI SDK.
+export { createLangChainHandler } from "./adapters/langchain.js";
+export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
 // v0.5.0
 // trigger

package/dist/init.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import { type Config } from "./config.js";
+import { TraceRecorder } from "./recorder.js";
+import { TraceStatus } from "./trace.js";
+export interface InitOptions extends Partial<Config> {
+    /** Name for the auto-started ambient trace. Defaults to RETRACE_TRACE_NAME, the entry script name, or "agent". */
+    name?: string;
+    metadata?: Record<string, unknown>;
+    /** Auto-start an ambient trace that captures every provider call (default true). Set false to only configure + patch. */
+    autoTrace?: boolean;
+}
+/**
+ * Zero-config, one-line init. Reads `RETRACE_API_KEY` from the environment (or pass `apiKey`),
+ * auto-patches any installed provider SDK (OpenAI / Anthropic / Gemini), and auto-starts an
+ * ambient trace so every LLM + tool call is captured with NO `startSpan`/`trace()` boilerplate.
+ * The ambient trace is flushed and ended automatically on process exit.
+ *
+ * ```ts
+ * import { init } from "retrace-sdk";
+ * init();                       // RETRACE_API_KEY from env
+ * // ...use openai / anthropic / gemini normally — auto-recorded
+ * ```
+ *
+ * Intended for scripts and single-run agents. Long-lived servers should keep using `trace()`
+ * per request so each request is its own trace.
+ */
+export declare function init(opts?: InitOptions): TraceRecorder | null;
+/** The ambient recorder started by {@link init}, if any. */
+export declare function getActiveRecorder(): TraceRecorder | null;
+/** Manually end the ambient trace (e.g. with a final output) before process exit. Idempotent. */
+export declare function shutdown(output?: unknown, status?: TraceStatus): void;

package/dist/init.js ADDED Viewed

@@ -0,0 +1,83 @@
+import { configure, getConfig, requireApiKey } from "./config.js";
+import { TraceRecorder, flushSharedTransport } from "./recorder.js";
+import { TraceStatus } from "./trace.js";
+let ambient = null;
+let exitHooked = false;
+function defaultName(explicit) {
+    if (explicit)
+        return explicit;
+    if (typeof process !== "undefined") {
+        if (process.env.RETRACE_TRACE_NAME)
+            return process.env.RETRACE_TRACE_NAME;
+        const argv1 = process.argv?.[1];
+        if (argv1) {
+            const base = argv1.split(/[\\/]/).pop();
+            if (base)
+                return base.replace(/\.[cm]?[jt]s$/, "");
+        }
+    }
+    return "agent";
+}
+/**
+ * Zero-config, one-line init. Reads `RETRACE_API_KEY` from the environment (or pass `apiKey`),
+ * auto-patches any installed provider SDK (OpenAI / Anthropic / Gemini), and auto-starts an
+ * ambient trace so every LLM + tool call is captured with NO `startSpan`/`trace()` boilerplate.
+ * The ambient trace is flushed and ended automatically on process exit.
+ *
+ * ```ts
+ * import { init } from "retrace-sdk";
+ * init();                       // RETRACE_API_KEY from env
+ * // ...use openai / anthropic / gemini normally — auto-recorded
+ * ```
+ *
+ * Intended for scripts and single-run agents. Long-lived servers should keep using `trace()`
+ * per request so each request is its own trace.
+ */
+export function init(opts = {}) {
+    const { name, metadata, autoTrace = true, ...cfg } = opts;
+    configure(cfg);
+    requireApiKey();
+    if (!getConfig().enabled || !autoTrace)
+        return null;
+    if (ambient)
+        return ambient;
+    const traceName = defaultName(name);
+    ambient = new TraceRecorder({ name: traceName, metadata });
+    ambient.start(traceName); // installs the provider interceptors against the ambient recorder
+    if (!exitHooked && typeof process !== "undefined") {
+        exitHooked = true;
+        const finish = (status) => {
+            const rec = ambient;
+            ambient = null;
+            try {
+                rec?.end(undefined, status);
+            }
+            catch { /* best effort on shutdown */ }
+        };
+        // On signal-triggered exits, process.exit() would otherwise kill the process before the
+        // final trace_ended is delivered over the network. End the trace, then await a transport
+        // drain (capped by a hard timeout so a hung network can't block shutdown) before exiting.
+        const finishAndExit = (status, code) => {
+            finish(status);
+            void Promise.race([
+                flushSharedTransport().catch(() => { }),
+                new Promise((r) => setTimeout(r, 3000)),
+            ]).then(() => process.exit(code));
+        };
+        process.once("beforeExit", () => finish(TraceStatus.COMPLETED));
+        process.once("SIGINT", () => finishAndExit(TraceStatus.COMPLETED, 130));
+        process.once("SIGTERM", () => finishAndExit(TraceStatus.COMPLETED, 143));
+        process.once("uncaughtException", (err) => { console.error(err); finishAndExit(TraceStatus.FAILED, 1); });
+    }
+    return ambient;
+}
+/** The ambient recorder started by {@link init}, if any. */
+export function getActiveRecorder() {
+    return ambient;
+}
+/** Manually end the ambient trace (e.g. with a final output) before process exit. Idempotent. */
+export function shutdown(output, status = TraceStatus.COMPLETED) {
+    const rec = ambient;
+    ambient = null;
+    rec?.end(output, status);
+}

package/dist/interceptors/anthropic.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { SpanType } from "../trace.js";
 import { genId, nowIso, truncateJson } from "../utils.js";
 import { isReplaying, consumeCassetteEntry } from "../replay.js";
+import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
 const PRICING = {
     "claude-opus-4.7": [5.0, 25.0],
     "claude-opus-4.6": [5.0, 25.0],
@@ -25,9 +26,11 @@ let onSpanCallback = null;
 export function installAnthropicInterceptor(onSpan) {
     if (installed) {
         onSpanCallback = onSpan;
+        resetToolResultDedup();
         return;
     }
     onSpanCallback = onSpan;
+    resetToolResultDedup();
     import("@anthropic-ai/sdk").then((anthropicMod) => {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any
         const mod = anthropicMod;
@@ -50,6 +53,9 @@ function createPatchedCreate() {
         const model = opts.model || "unknown";
         const messages = opts.messages || [];
         const isStreaming = !!opts.stream;
+        const toolSchemas = extractToolSchemas("anthropic", opts.tools);
+        const sampling = extractSamplingParams("anthropic", opts);
+        const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
         const spanId = genId();
         const startedAt = nowIso();
         const startMs = Date.now();
@@ -83,6 +89,8 @@ function createPatchedCreate() {
                 const chunks = [];
                 let inputTokens = 0;
                 let outputTokens = 0;
+                // Accumulate streamed tool_use blocks by index (content_block_start + input_json_delta).
+                const toolAcc = {};
                 // eslint-disable-next-line @typescript-eslint/no-explicit-any
                 const originalIterator = result[Symbol.asyncIterator]();
                 const wrappedStream = {
@@ -101,15 +109,29 @@ function createPatchedCreate() {
                                         input_tokens: inputTokens, output_tokens: outputTokens,
                                         cost: calcCost(model, inputTokens, outputTokens),
                                         duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
-                                        metadata: { streaming: true },
+                                        metadata: { streaming: true, ...spanMeta },
                                     };
                                     onSpanCallback?.(span);
+                                    if (onSpanCallback) {
+                                        emitAnthropicToolResults(messages, onSpanCallback);
+                                        const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
+                                        emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
+                                    }
                                     return { value: undefined, done: true };
                                 }
                                 // Collect content_block_delta text
                                 if (value?.type === "content_block_delta" && value?.delta?.text) {
                                     chunks.push(value.delta.text);
                                 }
+                                // Accumulate tool_use blocks (start carries id/name, input_json_delta streams args)
+                                if (value?.type === "content_block_start" && value?.content_block?.type === "tool_use") {
+                                    toolAcc[value.index ?? 0] = { id: value.content_block.id, name: value.content_block.name, json: "" };
+                                }
+                                if (value?.type === "content_block_delta" && value?.delta?.type === "input_json_delta") {
+                                    const acc = toolAcc[value.index ?? 0];
+                                    if (acc && typeof value.delta.partial_json === "string")
+                                        acc.json += value.delta.partial_json;
+                                }
                                 // Collect usage from message_delta
                                 if (value?.type === "message_delta" && value?.usage) {
                                     outputTokens = value.usage.output_tokens || outputTokens;
@@ -142,8 +164,14 @@ function createPatchedCreate() {
                 input_tokens: inputTokens, output_tokens: outputTokens,
                 cost: calcCost(model, inputTokens, outputTokens),
                 duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
+                ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
             };
             onSpanCallback?.(span);
+            // Auto-capture tool usage (tool_use blocks in response, tool_result blocks in input).
+            if (onSpanCallback) {
+                emitAnthropicToolResults(messages, onSpanCallback);
+                emitAnthropicToolCalls(res?.content, spanId, model, onSpanCallback);
+            }
             return result;
         }
         catch (err) {

package/dist/interceptors/gemini.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { SpanType } from "../trace.js";
 import { genId, nowIso, truncateJson } from "../utils.js";
+import { emitGeminiToolCalls, emitGeminiToolResults, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
 const PRICING = {
     "gemini-3.1-flash-lite": [0.10, 0.40],
     "gemini-3.1-flash": [0.50, 3.0],
@@ -22,9 +23,11 @@ let onSpanCallback = null;
 export function installGeminiInterceptor(onSpan) {
     if (installed) {
         onSpanCallback = onSpan;
+        resetToolResultDedup();
         return;
     }
     onSpanCallback = onSpan;
+    resetToolResultDedup();
     import("@google/genai").then((genaiMod) => {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any
         const mod = genaiMod;
@@ -37,6 +40,9 @@ export function installGeminiInterceptor(onSpan) {
             const opts = args[0] || {};
             const model = opts.model || "unknown";
             const contents = opts.contents;
+            const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
+            const sampling = extractSamplingParams("gemini", opts);
+            const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
             const spanId = genId();
             const startedAt = nowIso();
             const startMs = Date.now();
@@ -63,8 +69,14 @@ export function installGeminiInterceptor(onSpan) {
                     input_tokens: inputTokens, output_tokens: outputTokens,
                     cost: calcCost(model, inputTokens, outputTokens),
                     duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
+                    ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
                 };
                 onSpanCallback?.(span);
+                // Auto-capture tool usage (functionCall parts in response, functionResponse in input).
+                if (onSpanCallback) {
+                    emitGeminiToolResults(contents, onSpanCallback);
+                    emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
+                }
                 return result;
             }
             catch (err) {

package/dist/interceptors/openai.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { genId, nowIso, truncateJson } from "../utils.js";
 import { isReplaying, consumeCassetteEntry } from "../replay.js";
 import { getConfig } from "../config.js";
 import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
+import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
 /** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
 const FALLBACK_PRICING = {
     "gpt-5.5-pro": [30.0, 180.0],
@@ -63,9 +64,11 @@ let onSpanCallback = null;
 export function installOpenAIInterceptor(onSpan) {
     if (installed) {
         onSpanCallback = onSpan;
+        resetToolResultDedup();
         return;
     }
     onSpanCallback = onSpan;
+    resetToolResultDedup();
     import("openai").then((openaiMod) => {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any
         const mod = openaiMod;
@@ -106,6 +109,13 @@ function createPatchedCreate() {
             spanMetadata.vision = true;
         if (responseFormat)
             spanMetadata.structured_output = typeof responseFormat === "object" ? responseFormat.type || "json_schema" : responseFormat;
+        // Capture declared tool parameter schemas so the detection engine can validate tool args.
+        const toolSchemas = extractToolSchemas("openai", opts.tools);
+        if (toolSchemas)
+            spanMetadata.tool_schemas = toolSchemas;
+        const sampling = extractSamplingParams("openai", opts);
+        if (sampling)
+            spanMetadata.sampling = sampling;
         // During replay, return mocked response from cassette instead of calling the real API
         if (isReplaying()) {
             const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
@@ -137,6 +147,8 @@ function createPatchedCreate() {
                 const chunks = [];
                 let inputTokens = 0;
                 let outputTokens = 0;
+                // Accumulate streamed tool calls by index (id/name arrive first, arguments stream in).
+                const toolAcc = {};
                 // eslint-disable-next-line @typescript-eslint/no-explicit-any
                 const originalIterator = result[Symbol.asyncIterator]();
                 const wrappedStream = {
@@ -156,15 +168,34 @@ function createPatchedCreate() {
                                         input_tokens: inputTokens, output_tokens: outputTokens,
                                         cost: calcCost(model, inputTokens, outputTokens),
                                         duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
-                                        metadata: { streaming: true },
+                                        metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
                                     };
                                     onSpanCallback?.(span);
+                                    if (onSpanCallback) {
+                                        emitOpenAIToolResults(messages, onSpanCallback);
+                                        const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
+                                        emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
+                                    }
                                     return { value: undefined, done: true };
                                 }
                                 // Collect content delta
                                 const delta = value?.choices?.[0]?.delta?.content;
                                 if (delta)
                                     chunks.push(delta);
+                                // Collect streamed tool-call deltas (function name/id, then argument fragments)
+                                const tcDeltas = value?.choices?.[0]?.delta?.tool_calls;
+                                if (Array.isArray(tcDeltas)) {
+                                    for (const tc of tcDeltas) {
+                                        const idx = typeof tc.index === "number" ? tc.index : 0;
+                                        const acc = (toolAcc[idx] ??= { args: "" });
+                                        if (tc.id)
+                                            acc.id = tc.id;
+                                        if (tc.function?.name)
+                                            acc.name = tc.function.name;
+                                        if (typeof tc.function?.arguments === "string")
+                                            acc.args += tc.function.arguments;
+                                    }
+                                }
                                 // Collect usage from final chunk
                                 if (value?.usage) {
                                     inputTokens = value.usage.prompt_tokens || 0;
@@ -207,6 +238,12 @@ function createPatchedCreate() {
                 ...(Object.keys(spanMetadata).length ? { metadata: spanMetadata } : {}),
             };
             onSpanCallback?.(span);
+            // Auto-capture tool usage: tool_result spans from the fed-back tool messages (deduped),
+            // tool_call spans from the model's requested calls (structured args).
+            if (onSpanCallback) {
+                emitOpenAIToolResults(messages, onSpanCallback);
+                emitOpenAIToolCalls(res?.choices?.[0]?.message, spanId, model, onSpanCallback);
+            }
             return result;
         }
         catch (err) {

package/dist/interceptors/tool-spans.d.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Tool-span extraction (Phase 1C).
+ *
+ * Provider interceptors historically emitted only a single `llm_call` span and dropped the
+ * model's tool calls entirely (the most common agent-failure class — tool errors + tool
+ * loops — was therefore invisible). These helpers derive structured `tool_call` and
+ * `tool_result` spans from a provider request/response so tool usage is captured with NO
+ * manual instrumentation.
+ *
+ * - `tool_call` spans come from the model's response (the calls it REQUESTED), with arguments
+ *   parsed into structured JSON (not stringified into the output text).
+ * - `tool_result` spans come from the tool messages the caller feeds back on the NEXT request
+ *   (the verbatim recorded result, including errors/empty). They are deduped by the provider
+ *   tool-call id so they are emitted once, not on every subsequent turn.
+ *
+ * Detectors downstream (2C schema validation, 2D loop detection, 3C tool-output hallucination)
+ * depend on these spans + the `tool_call_id` linkage carried in metadata.
+ */
+import { SpanData } from "../trace.js";
+type Emit = (span: SpanData) => void;
+export declare function resetToolResultDedup(): void;
+/** Parse a JSON-string arguments payload into structured JSON; leave non-strings as-is. */
+export declare function parseToolArgs(args: unknown): unknown;
+/**
+ * Extract declared tool parameter schemas (name → JSON schema) from a provider request's tool
+ * definitions, so the detection engine can validate tool_call arguments against ground truth.
+ */
+export declare function extractToolSchemas(provider: "openai" | "anthropic" | "gemini", tools: unknown): Record<string, unknown> | undefined;
+/**
+ * Capture the sampling/determinism envelope from the request options so replay-divergence (2A)
+ * and regression replay (2E) can compare sampling config, not just the model. Normalizes provider
+ * field names to { temperature, top_p, top_k, seed, max_tokens }. Returns undefined if none set.
+ */
+export declare function extractSamplingParams(provider: "openai" | "anthropic" | "gemini", opts: unknown): Record<string, unknown> | undefined;
+interface OAToolCall {
+    id?: string;
+    type?: string;
+    function?: {
+        name?: string;
+        arguments?: unknown;
+    };
+}
+interface OAMessage {
+    role?: string;
+    content?: unknown;
+    tool_calls?: OAToolCall[];
+    tool_call_id?: string;
+    name?: string;
+}
+/** Emit tool_call spans from an OpenAI assistant response message. */
+export declare function emitOpenAIToolCalls(message: OAMessage | undefined, parentId: string, model: string | undefined, emit: Emit): void;
+/** Emit tool_result spans from OpenAI request messages (role:"tool"), deduped by tool_call_id. */
+export declare function emitOpenAIToolResults(messages: OAMessage[] | undefined, emit: Emit): void;
+interface AnthropicMessage {
+    role?: string;
+    content?: unknown;
+}
+/** Emit tool_call spans from Anthropic response content blocks (type:"tool_use"). */
+export declare function emitAnthropicToolCalls(content: unknown, parentId: string, model: string | undefined, emit: Emit): void;
+/** Emit tool_result spans from Anthropic request messages (content blocks type:"tool_result"). */
+export declare function emitAnthropicToolResults(messages: AnthropicMessage[] | undefined, emit: Emit): void;
+/** Emit tool_call spans from Gemini response candidate parts (functionCall). */
+export declare function emitGeminiToolCalls(candidates: unknown, parentId: string, model: string | undefined, emit: Emit): void;
+/** Emit tool_result spans from Gemini request contents (functionResponse parts), deduped. */
+export declare function emitGeminiToolResults(contents: unknown, emit: Emit): void;
+export {};

package/dist/interceptors/tool-spans.js ADDED Viewed

@@ -0,0 +1,213 @@
+/**
+ * Tool-span extraction (Phase 1C).
+ *
+ * Provider interceptors historically emitted only a single `llm_call` span and dropped the
+ * model's tool calls entirely (the most common agent-failure class — tool errors + tool
+ * loops — was therefore invisible). These helpers derive structured `tool_call` and
+ * `tool_result` spans from a provider request/response so tool usage is captured with NO
+ * manual instrumentation.
+ *
+ * - `tool_call` spans come from the model's response (the calls it REQUESTED), with arguments
+ *   parsed into structured JSON (not stringified into the output text).
+ * - `tool_result` spans come from the tool messages the caller feeds back on the NEXT request
+ *   (the verbatim recorded result, including errors/empty). They are deduped by the provider
+ *   tool-call id so they are emitted once, not on every subsequent turn.
+ *
+ * Detectors downstream (2C schema validation, 2D loop detection, 3C tool-output hallucination)
+ * depend on these spans + the `tool_call_id` linkage carried in metadata.
+ */
+import { SpanType } from "../trace.js";
+import { genId, nowIso, truncateJson } from "../utils.js";
+// Bounded dedup of emitted tool_result spans (keyed by provider tool-call id). Cleared when a
+// new trace installs its callback (see reset call in each interceptor) to bound memory.
+const emittedToolResultIds = new Set();
+export function resetToolResultDedup() {
+    emittedToolResultIds.clear();
+}
+function markEmitted(id) {
+    if (emittedToolResultIds.has(id))
+        return false;
+    if (emittedToolResultIds.size > 5000)
+        emittedToolResultIds.clear();
+    emittedToolResultIds.add(id);
+    return true;
+}
+/** Parse a JSON-string arguments payload into structured JSON; leave non-strings as-is. */
+export function parseToolArgs(args) {
+    if (typeof args !== "string")
+        return args;
+    try {
+        return JSON.parse(args);
+    }
+    catch {
+        return args;
+    }
+}
+/**
+ * Extract declared tool parameter schemas (name → JSON schema) from a provider request's tool
+ * definitions, so the detection engine can validate tool_call arguments against ground truth.
+ */
+export function extractToolSchemas(provider, tools) {
+    const out = {};
+    if (provider === "gemini") {
+        // config.tools = [{ functionDeclarations: [{ name, parameters }] }]
+        if (!Array.isArray(tools))
+            return undefined;
+        for (const group of tools) {
+            for (const fd of group?.functionDeclarations || []) {
+                if (fd?.name && fd.parameters)
+                    out[fd.name] = fd.parameters;
+            }
+        }
+    }
+    else if (Array.isArray(tools)) {
+        for (const t of tools) {
+            if (provider === "openai" && t.function?.name && t.function.parameters)
+                out[t.function.name] = t.function.parameters;
+            if (provider === "anthropic" && t.name && t.input_schema)
+                out[t.name] = t.input_schema;
+        }
+    }
+    return Object.keys(out).length ? out : undefined;
+}
+/**
+ * Capture the sampling/determinism envelope from the request options so replay-divergence (2A)
+ * and regression replay (2E) can compare sampling config, not just the model. Normalizes provider
+ * field names to { temperature, top_p, top_k, seed, max_tokens }. Returns undefined if none set.
+ */
+export function extractSamplingParams(provider, opts) {
+    const o = (opts || {});
+    const cfg = (provider === "gemini" ? o.config : o) || {};
+    const out = {};
+    const put = (key, v) => { if (v !== undefined && v !== null)
+        out[key] = v; };
+    if (provider === "gemini") {
+        put("temperature", cfg.temperature);
+        put("top_p", cfg.topP);
+        put("top_k", cfg.topK);
+        put("seed", cfg.seed);
+        put("max_tokens", cfg.maxOutputTokens);
+    }
+    else {
+        put("temperature", cfg.temperature);
+        put("top_p", cfg.top_p);
+        put("top_k", cfg.top_k); // anthropic only
+        put("seed", cfg.seed); // openai only
+        put("max_tokens", cfg.max_tokens ?? cfg.max_completion_tokens);
+    }
+    return Object.keys(out).length ? out : undefined;
+}
+function toolCallSpan(name, input, parentId, model, toolCallId) {
+    const now = nowIso();
+    return {
+        id: genId(), trace_id: "", parent_id: parentId,
+        span_type: SpanType.TOOL_CALL, name: name || "tool",
+        ...(model ? { model } : {}),
+        input: truncateJson(input),
+        started_at: now, ended_at: now, duration_ms: 0,
+        ...(toolCallId ? { metadata: { tool_call_id: toolCallId } } : {}),
+    };
+}
+function toolResultSpan(name, output, isError, toolCallId) {
+    const now = nowIso();
+    return {
+        id: genId(), trace_id: "", parent_id: null,
+        span_type: SpanType.TOOL_RESULT, name: name || "tool_result",
+        output: truncateJson(output),
+        started_at: now, ended_at: now, duration_ms: 0,
+        ...(isError ? { error: typeof output === "string" ? output : JSON.stringify(output) } : {}),
+        ...(toolCallId ? { metadata: { tool_call_id: toolCallId } } : {}),
+    };
+}
+/** Emit tool_call spans from an OpenAI assistant response message. */
+export function emitOpenAIToolCalls(message, parentId, model, emit) {
+    const calls = message?.tool_calls;
+    if (!Array.isArray(calls))
+        return;
+    for (const c of calls) {
+        const name = c.function?.name || "tool";
+        emit(toolCallSpan(name, parseToolArgs(c.function?.arguments), parentId, model, c.id));
+    }
+}
+/** Emit tool_result spans from OpenAI request messages (role:"tool"), deduped by tool_call_id. */
+export function emitOpenAIToolResults(messages, emit) {
+    if (!Array.isArray(messages))
+        return;
+    // Map tool_call_id -> tool name from any assistant tool_calls in the same message list.
+    const nameById = new Map();
+    for (const m of messages) {
+        if (m?.role === "assistant" && Array.isArray(m.tool_calls)) {
+            for (const c of m.tool_calls)
+                if (c.id)
+                    nameById.set(c.id, c.function?.name || "tool");
+        }
+    }
+    for (const m of messages) {
+        if (m?.role !== "tool" || !m.tool_call_id)
+            continue;
+        if (!markEmitted(`oa:${m.tool_call_id}`))
+            continue;
+        const content = m.content;
+        const isError = typeof content === "string" && /error|exception|failed/i.test(content);
+        emit(toolResultSpan(nameById.get(m.tool_call_id) || m.name || "tool_result", content, isError, m.tool_call_id));
+    }
+}
+/** Emit tool_call spans from Anthropic response content blocks (type:"tool_use"). */
+export function emitAnthropicToolCalls(content, parentId, model, emit) {
+    if (!Array.isArray(content))
+        return;
+    for (const block of content) {
+        if (block?.type !== "tool_use")
+            continue;
+        emit(toolCallSpan(block.name || "tool", block.input, parentId, model, block.id));
+    }
+}
+/** Emit tool_result spans from Anthropic request messages (content blocks type:"tool_result"). */
+export function emitAnthropicToolResults(messages, emit) {
+    if (!Array.isArray(messages))
+        return;
+    for (const m of messages) {
+        if (!Array.isArray(m?.content))
+            continue;
+        for (const block of m.content) {
+            if (block?.type !== "tool_result" || !block.tool_use_id)
+                continue;
+            if (!markEmitted(`anthropic:${block.tool_use_id}`))
+                continue;
+            emit(toolResultSpan("tool_result", block.content, !!block.is_error, block.tool_use_id));
+        }
+    }
+}
+/** Emit tool_call spans from Gemini response candidate parts (functionCall). */
+export function emitGeminiToolCalls(candidates, parentId, model, emit) {
+    if (!Array.isArray(candidates))
+        return;
+    for (const cand of candidates) {
+        const parts = cand?.content?.parts;
+        if (!Array.isArray(parts))
+            continue;
+        for (const p of parts) {
+            if (!p.functionCall)
+                continue;
+            emit(toolCallSpan(p.functionCall.name || "tool", p.functionCall.args, parentId, model, p.functionCall.name));
+        }
+    }
+}
+/** Emit tool_result spans from Gemini request contents (functionResponse parts), deduped. */
+export function emitGeminiToolResults(contents, emit) {
+    const list = Array.isArray(contents) ? contents : contents != null ? [contents] : [];
+    for (const c of list) {
+        const parts = c?.parts;
+        if (!Array.isArray(parts))
+            continue;
+        for (const p of parts) {
+            const fr = p.functionResponse;
+            if (!fr)
+                continue;
+            const key = `gemini:${fr.id || fr.name || JSON.stringify(fr.response).slice(0, 64)}`;
+            if (!markEmitted(key))
+                continue;
+            emit(toolResultSpan(fr.name || "tool_result", fr.response, false, fr.id || fr.name));
+        }
+    }
+}

package/dist/recorder.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 import { SpanBuilder, SpanData, SpanType, TraceStatus } from "./trace.js";
+/** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
+export declare function flushSharedTransport(): Promise<void>;
 export interface RecordOptions {
     name?: string;
     input?: unknown;

package/dist/recorder.js CHANGED Viewed

@@ -17,6 +17,10 @@ function getSharedTransport() {
     }
     return sharedTransport;
 }
+/** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
+export async function flushSharedTransport() {
+    await sharedTransport?.flush();
+}
 export class TraceRecorder {
     builder;
     transport;

package/dist/trace.js CHANGED Viewed

@@ -83,7 +83,12 @@ export class TraceBuilder {
         return this.data;
     }
     addSpan(span) {
-        this.data.spans.push(span);
+        // Spans are streamed individually through the transport (and HTTPTransport keeps its own
+        // per-trace buffer for the batched POST), so this retained array is only an in-memory
+        // convenience and is never itself transmitted. Cap it so init()'s long-lived ambient
+        // trace can't accumulate spans for the life of the process (an unbounded memory leak).
+        if (this.data.spans.length < 1000)
+            this.data.spans.push(span);
         this.data.total_tokens += (span.input_tokens || 0) + (span.output_tokens || 0);
         this.data.total_cost += span.cost || 0;
     }

package/dist/transport.d.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 export interface Transport {
     send(eventType: string, data: Record<string, unknown>): void;
     close(): void;
+    /** Drain in-flight data to the network (awaited on graceful shutdown). */
+    flush(): Promise<void>;
 }
 export declare class WSTransport implements Transport {
     private ws;
@@ -15,12 +17,15 @@ export declare class WSTransport implements Transport {
     private flushQueue;
     send(eventType: string, data: Record<string, unknown>): void;
     close(): void;
+    /** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
+     *  the process before exit. Best-effort with a hard timeout. */
+    flush(): Promise<void>;
 }
 export declare class HTTPTransport implements Transport {
     private traceData;
     private spans;
     send(eventType: string, data: Record<string, unknown>): void;
-    flush(): void;
+    flush(): Promise<void>;
     private buildSpans;
     close(): void;
 }

package/dist/transport.js CHANGED Viewed

@@ -98,6 +98,14 @@ export class WSTransport {
         }
         this.connected = false;
     }
+    /** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
+     *  the process before exit. Best-effort with a hard timeout. */
+    async flush() {
+        const start = Date.now();
+        while (this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.bufferedAmount > 0 && Date.now() - start < 2000) {
+            await new Promise((r) => setTimeout(r, 50));
+        }
+    }
 }
 export class HTTPTransport {
     traceData = null;
@@ -112,28 +120,34 @@ export class HTTPTransport {
         else if (eventType === "trace_ended") {
             if (this.traceData)
                 Object.assign(this.traceData, data);
-            this.flush();
+            void this.flush();
         }
     }
-    flush() {
+    async flush() {
         if (!this.traceData)
             return;
         const cfg = getConfig();
         const url = `${cfg.baseUrl}/api/v1/traces`;
         const body = { ...this.traceData, spans: this.buildSpans() };
         const payload = JSON.stringify(body);
-        // Retry up to 3 times with exponential backoff
-        const attempt = (n, delay) => {
-            fetch(url, {
-                method: "POST",
-                headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
-                body: payload,
-            }).catch(() => { if (n < 3)
-                setTimeout(() => attempt(n + 1, delay * 2), delay); });
-        };
-        attempt(1, 1000);
+        // Clear first so a concurrent flush (e.g. trace_ended then shutdown drain) can't double-send.
         this.traceData = null;
         this.spans = [];
+        // Retry up to 3 times with exponential backoff; awaited so shutdown can drain it.
+        for (let n = 1; n <= 3; n++) {
+            try {
+                await fetch(url, {
+                    method: "POST",
+                    headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
+                    body: payload,
+                });
+                return;
+            }
+            catch {
+                if (n < 3)
+                    await new Promise((r) => setTimeout(r, 1000 * n));
+            }
+        }
     }
     buildSpans() {
         const merged = new Map();
@@ -150,7 +164,7 @@ export class HTTPTransport {
         return [...merged.values()];
     }
     close() {
-        this.flush();
+        void this.flush();
     }
 }
 export function createTransport(mode = "auto") {
@@ -214,5 +228,24 @@ export function createTransport(mode = "auto") {
                 http.close();
             }
         },
+        async flush() {
+            if (!decided) {
+                // Never connected over WS — force the HTTP fallback and drain the buffer so the
+                // final trace isn't lost on shutdown.
+                decided = true;
+                useWs = false;
+                clearTimeout(fallbackTimer);
+                ws.close();
+                for (const item of buffer.splice(0))
+                    http.send(item.eventType, item.data);
+                await http.flush();
+            }
+            else if (useWs) {
+                await ws.flush();
+            }
+            else {
+                await http.flush();
+            }
+        },
     };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "retrace-sdk",
-  "version": "0.6.0",
+  "version": "0.10.0",
   "description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
   "type": "module",
   "main": "dist/index.js",
@@ -9,6 +9,14 @@
     ".": {
       "types": "./dist/index.d.ts",
       "import": "./dist/index.js"
+    },
+    "./adapters/langchain": {
+      "types": "./dist/adapters/langchain.d.ts",
+      "import": "./dist/adapters/langchain.js"
+    },
+    "./adapters/vercel-ai": {
+      "types": "./dist/adapters/vercel-ai.d.ts",
+      "import": "./dist/adapters/vercel-ai.js"
     }
   },
   "files": ["dist", "README.md", "LICENSE"],
@@ -37,7 +45,8 @@
   "peerDependencies": {
     "@google/genai": ">=1.52.0",
     "openai": ">=4.0.0",
-    "@anthropic-ai/sdk": ">=0.30.0"
+    "@anthropic-ai/sdk": ">=0.30.0",
+    "@langchain/core": ">=0.3.0"
   },
   "peerDependenciesMeta": {
     "@google/genai": {
@@ -48,6 +57,9 @@
     },
     "@anthropic-ai/sdk": {
       "optional": true
+    },
+    "@langchain/core": {
+      "optional": true
     }
   },
   "devDependencies": {