retrace-sdk 0.5.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -111,6 +111,12 @@ configure({ apiKey: "rt_live_...", sampleRate: 0.1 }); // Record 10% of traces
111
111
 
112
112
  - **Fixed** — OpenAI interceptor no longer creates dummy client instance to find prototype
113
113
 
114
+ ### 0.6.0
115
+
116
+ - **Token ID capture** — Stores output token IDs + logprobs from OpenAI responses (enables speculative decoding during replay)
117
+ - **SpanData extended** — New `token_ids` and `logprobs` fields on SpanData interface
118
+ - **Shared schema** — SpanInputSchema updated with `token_ids` and `logprobs` optional arrays
119
+
114
120
  ### 0.2.1
115
121
 
116
122
  - **Offline buffer** — stores up to 1000 messages when WebSocket disconnects, flushes on reconnect
@@ -0,0 +1,3 @@
1
+ export { createLangChainHandler } from "./langchain.js";
2
+ export { retraceOnStepFinish, recordVercelStep } from "./vercel-ai.js";
3
+ export type { AISDKStep } from "./vercel-ai.js";
@@ -0,0 +1,2 @@
1
+ export { createLangChainHandler } from "./langchain.js";
2
+ export { retraceOnStepFinish, recordVercelStep } from "./vercel-ai.js";
@@ -0,0 +1,17 @@
1
+ import type { TraceRecorder } from "../recorder.js";
2
+ /**
3
+ * LangChain / LangGraph adapter for Retrace (JS).
4
+ *
5
+ * Returns a LangChain `BaseCallbackHandler` that records STRUCTURED tool / retriever / chain spans
6
+ * into the active Retrace trace, aligned with the detectors. LLM spans are already captured by the
7
+ * provider interceptors, so this handler does not emit `llm_call` spans.
8
+ *
9
+ * ```ts
10
+ * import { init } from "retrace-sdk";
11
+ * import { createLangChainHandler } from "retrace-sdk/adapters/langchain";
12
+ * init();
13
+ * const cb = await createLangChainHandler();
14
+ * await app.invoke(input, { callbacks: [cb] }); // works for LangChain + LangGraph
15
+ * ```
16
+ */
17
+ export declare function createLangChainHandler(recorder?: TraceRecorder): Promise<object>;
@@ -0,0 +1,111 @@
1
+ import { SpanType } from "../trace.js";
2
+ import { getActiveRecorder } from "../init.js";
3
+ /**
4
+ * LangChain / LangGraph adapter for Retrace (JS).
5
+ *
6
+ * Returns a LangChain `BaseCallbackHandler` that records STRUCTURED tool / retriever / chain spans
7
+ * into the active Retrace trace, aligned with the detectors. LLM spans are already captured by the
8
+ * provider interceptors, so this handler does not emit `llm_call` spans.
9
+ *
10
+ * ```ts
11
+ * import { init } from "retrace-sdk";
12
+ * import { createLangChainHandler } from "retrace-sdk/adapters/langchain";
13
+ * init();
14
+ * const cb = await createLangChainHandler();
15
+ * await app.invoke(input, { callbacks: [cb] }); // works for LangChain + LangGraph
16
+ * ```
17
+ */
18
+ export async function createLangChainHandler(recorder) {
19
+ let BaseCallbackHandler;
20
+ try {
21
+ ({ BaseCallbackHandler } = await import("@langchain/core/callbacks/base"));
22
+ }
23
+ catch {
24
+ throw new Error("@langchain/core is not installed. Run: npm install @langchain/core");
25
+ }
26
+ const open = new Map();
27
+ const rec = () => recorder ?? getActiveRecorder();
28
+ class RetraceHandler extends BaseCallbackHandler {
29
+ name = "retrace";
30
+ handleToolStart(tool, input, runId) {
31
+ const r = rec();
32
+ if (!r)
33
+ return;
34
+ open.set(runId, r.startSpan(tool?.name || "tool", SpanType.TOOL_CALL, input));
35
+ }
36
+ handleToolEnd(output, runId) {
37
+ const r = rec();
38
+ if (!r)
39
+ return;
40
+ const sb = open.get(runId);
41
+ if (sb) {
42
+ r.endSpan(sb, output);
43
+ open.delete(runId);
44
+ }
45
+ const tr = r.startSpan("tool_result", SpanType.TOOL_RESULT);
46
+ r.endSpan(tr, output);
47
+ }
48
+ handleToolError(err, runId) {
49
+ const r = rec();
50
+ if (!r)
51
+ return;
52
+ const sb = open.get(runId);
53
+ if (sb) {
54
+ r.endSpan(sb, undefined, err instanceof Error ? err.message : String(err));
55
+ open.delete(runId);
56
+ }
57
+ }
58
+ handleRetrieverStart(_retriever, query, runId) {
59
+ const r = rec();
60
+ if (!r)
61
+ return;
62
+ open.set(runId, r.startSpan("retrieval", SpanType.ACTION, query));
63
+ }
64
+ handleRetrieverEnd(documents, runId) {
65
+ const r = rec();
66
+ if (!r)
67
+ return;
68
+ const sb = open.get(runId);
69
+ if (sb) {
70
+ const docs = (documents || []).map((d) => d?.pageContent ?? JSON.stringify(d));
71
+ r.endSpan(sb, { count: docs.length, documents: docs });
72
+ open.delete(runId);
73
+ }
74
+ }
75
+ handleChainStart(chain, inputs, runId) {
76
+ const r = rec();
77
+ if (!r)
78
+ return;
79
+ const name = chain?.name || chain?.id?.[chain.id.length - 1] || "chain";
80
+ open.set(runId, r.startSpan(String(name), SpanType.REASONING, inputs));
81
+ }
82
+ handleChainEnd(outputs, runId) {
83
+ const r = rec();
84
+ if (!r)
85
+ return;
86
+ const sb = open.get(runId);
87
+ if (sb) {
88
+ r.endSpan(sb, outputs);
89
+ open.delete(runId);
90
+ }
91
+ }
92
+ handleChainError(err, runId) {
93
+ const r = rec();
94
+ if (!r)
95
+ return;
96
+ const sb = open.get(runId);
97
+ if (sb) {
98
+ r.endSpan(sb, undefined, err instanceof Error ? err.message : String(err));
99
+ open.delete(runId);
100
+ }
101
+ }
102
+ handleAgentAction(action) {
103
+ const r = rec();
104
+ if (!r)
105
+ return;
106
+ const sb = r.startSpan(String(action?.tool || "action"), SpanType.TOOL_CALL, action?.toolInput);
107
+ r.endSpan(sb, action?.log);
108
+ }
109
+ }
110
+ return new RetraceHandler();
111
+ }
@@ -0,0 +1,43 @@
1
+ import type { TraceRecorder } from "../recorder.js";
2
+ /**
3
+ * Vercel AI SDK adapter for Retrace.
4
+ *
5
+ * The AI SDK talks to providers through its own `@ai-sdk/*` packages (not the raw OpenAI/Anthropic
6
+ * SDKs), so the provider interceptors don't see those calls. This adapter records the LLM step plus
7
+ * structured `tool_call` / `tool_result` spans from the AI SDK's per-step callback, aligned with the
8
+ * detectors. Drop it into `generateText` / `streamText`:
9
+ *
10
+ * ```ts
11
+ * import { init } from "retrace-sdk";
12
+ * import { retraceOnStepFinish } from "retrace-sdk/adapters/vercel-ai";
13
+ * init();
14
+ * await generateText({ model, prompt, tools, onStepFinish: retraceOnStepFinish() });
15
+ * ```
16
+ */
17
+ interface AISDKToolCall {
18
+ toolName?: string;
19
+ toolCallId?: string;
20
+ args?: unknown;
21
+ }
22
+ interface AISDKToolResult {
23
+ toolName?: string;
24
+ toolCallId?: string;
25
+ result?: unknown;
26
+ }
27
+ interface AISDKUsage {
28
+ promptTokens?: number;
29
+ completionTokens?: number;
30
+ totalTokens?: number;
31
+ }
32
+ export interface AISDKStep {
33
+ text?: string;
34
+ toolCalls?: AISDKToolCall[];
35
+ toolResults?: AISDKToolResult[];
36
+ usage?: AISDKUsage;
37
+ finishReason?: string;
38
+ }
39
+ /** Record one AI SDK step (LLM output + tool calls/results) into the trace. */
40
+ export declare function recordVercelStep(step: AISDKStep, recorder?: TraceRecorder): void;
41
+ /** Returns an `onStepFinish` callback for `generateText` / `streamText`. */
42
+ export declare function retraceOnStepFinish(recorder?: TraceRecorder): (step: AISDKStep) => void;
43
+ export {};
@@ -0,0 +1,32 @@
1
+ import { SpanType } from "../trace.js";
2
+ import { genId, nowIso } from "../utils.js";
3
+ import { getActiveRecorder } from "../init.js";
4
+ /** Record one AI SDK step (LLM output + tool calls/results) into the trace. */
5
+ export function recordVercelStep(step, recorder) {
6
+ const rec = recorder ?? getActiveRecorder();
7
+ if (!rec)
8
+ return;
9
+ // LLM step — the AI SDK call the provider interceptors don't see; emit with token usage.
10
+ const now = nowIso();
11
+ const llm = {
12
+ id: genId(), trace_id: "", parent_id: null,
13
+ span_type: SpanType.LLM_CALL, name: "ai.generate",
14
+ output: step.text ?? "",
15
+ input_tokens: step.usage?.promptTokens,
16
+ output_tokens: step.usage?.completionTokens,
17
+ started_at: now, ended_at: now,
18
+ };
19
+ rec.addSpan(llm);
20
+ for (const call of step.toolCalls ?? []) {
21
+ const sb = rec.startSpan(call.toolName || "tool", SpanType.TOOL_CALL, call.args);
22
+ rec.endSpan(sb, undefined);
23
+ }
24
+ for (const res of step.toolResults ?? []) {
25
+ const sb = rec.startSpan(res.toolName || "tool_result", SpanType.TOOL_RESULT, undefined);
26
+ rec.endSpan(sb, res.result);
27
+ }
28
+ }
29
+ /** Returns an `onStepFinish` callback for `generateText` / `streamText`. */
30
+ export function retraceOnStepFinish(recorder) {
31
+ return (step) => recordVercelStep(step, recorder);
32
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Mark (or unmark) a recorded trace as a GOLDEN regression baseline (Phase 2E).
3
+ * Golden traces are the reference for regression replay — `compareToGolden` flags structural
4
+ * divergence in later runs as a regression.
5
+ */
6
+ export declare function markGolden(traceId: string, golden?: boolean): Promise<void>;
package/dist/golden.js ADDED
@@ -0,0 +1,17 @@
1
+ import { getConfig, requireApiKey } from "./config.js";
2
+ /**
3
+ * Mark (or unmark) a recorded trace as a GOLDEN regression baseline (Phase 2E).
4
+ * Golden traces are the reference for regression replay — `compareToGolden` flags structural
5
+ * divergence in later runs as a regression.
6
+ */
7
+ export async function markGolden(traceId, golden = true) {
8
+ requireApiKey();
9
+ const cfg = getConfig();
10
+ const res = await fetch(`${cfg.baseUrl}/api/v1/traces/${traceId}/golden`, {
11
+ method: "POST",
12
+ headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
13
+ body: JSON.stringify({ golden }),
14
+ });
15
+ if (!res.ok)
16
+ throw new Error(`markGolden failed: HTTP ${res.status}`);
17
+ }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,6 @@
1
1
  export { configure, getConfig } from "./config.js";
2
+ export { init, getActiveRecorder, shutdown } from "./init.js";
3
+ export type { InitOptions } from "./init.js";
2
4
  export { record, trace, TraceRecorder } from "./recorder.js";
3
5
  export { SpanBuilder, TraceBuilder } from "./trace.js";
4
6
  export type { SpanData, TraceData } from "./trace.js";
@@ -12,3 +14,7 @@ export type { ResumeCommand } from "./resume.js";
12
14
  export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
13
15
  export type { CassetteEntry, ReplayCommand } from "./replay.js";
14
16
  export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
17
+ export { markGolden } from "./golden.js";
18
+ export { createLangChainHandler } from "./adapters/langchain.js";
19
+ export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
20
+ export type { AISDKStep } from "./adapters/vercel-ai.js";
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export { configure, getConfig } from "./config.js";
2
+ export { init, getActiveRecorder, shutdown } from "./init.js";
2
3
  export { record, trace, TraceRecorder } from "./recorder.js";
3
4
  export { SpanBuilder, TraceBuilder } from "./trace.js";
4
5
  export { SpanType, TraceStatus } from "./trace.js";
@@ -9,5 +10,9 @@ export { RetraceError, RetraceAuthError, RetraceCreditsExhaustedError, RetraceCo
9
10
  export { registerResumable, handleResume } from "./resume.js";
10
11
  export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
11
12
  export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
13
+ export { markGolden } from "./golden.js";
14
+ // Framework adapters (5B) — drop-in instrumentation for LangChain/LangGraph + Vercel AI SDK.
15
+ export { createLangChainHandler } from "./adapters/langchain.js";
16
+ export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
12
17
  // v0.5.0
13
18
  // trigger
package/dist/init.d.ts ADDED
@@ -0,0 +1,30 @@
1
+ import { type Config } from "./config.js";
2
+ import { TraceRecorder } from "./recorder.js";
3
+ import { TraceStatus } from "./trace.js";
4
+ export interface InitOptions extends Partial<Config> {
5
+ /** Name for the auto-started ambient trace. Defaults to RETRACE_TRACE_NAME, the entry script name, or "agent". */
6
+ name?: string;
7
+ metadata?: Record<string, unknown>;
8
+ /** Auto-start an ambient trace that captures every provider call (default true). Set false to only configure + patch. */
9
+ autoTrace?: boolean;
10
+ }
11
+ /**
12
+ * Zero-config, one-line init. Reads `RETRACE_API_KEY` from the environment (or pass `apiKey`),
13
+ * auto-patches any installed provider SDK (OpenAI / Anthropic / Gemini), and auto-starts an
14
+ * ambient trace so every LLM + tool call is captured with NO `startSpan`/`trace()` boilerplate.
15
+ * The ambient trace is flushed and ended automatically on process exit.
16
+ *
17
+ * ```ts
18
+ * import { init } from "retrace-sdk";
19
+ * init(); // RETRACE_API_KEY from env
20
+ * // ...use openai / anthropic / gemini normally — auto-recorded
21
+ * ```
22
+ *
23
+ * Intended for scripts and single-run agents. Long-lived servers should keep using `trace()`
24
+ * per request so each request is its own trace.
25
+ */
26
+ export declare function init(opts?: InitOptions): TraceRecorder | null;
27
+ /** The ambient recorder started by {@link init}, if any. */
28
+ export declare function getActiveRecorder(): TraceRecorder | null;
29
+ /** Manually end the ambient trace (e.g. with a final output) before process exit. Idempotent. */
30
+ export declare function shutdown(output?: unknown, status?: TraceStatus): void;
package/dist/init.js ADDED
@@ -0,0 +1,73 @@
1
+ import { configure, getConfig, requireApiKey } from "./config.js";
2
+ import { TraceRecorder } from "./recorder.js";
3
+ import { TraceStatus } from "./trace.js";
4
+ let ambient = null;
5
+ let exitHooked = false;
6
+ function defaultName(explicit) {
7
+ if (explicit)
8
+ return explicit;
9
+ if (typeof process !== "undefined") {
10
+ if (process.env.RETRACE_TRACE_NAME)
11
+ return process.env.RETRACE_TRACE_NAME;
12
+ const argv1 = process.argv?.[1];
13
+ if (argv1) {
14
+ const base = argv1.split(/[\\/]/).pop();
15
+ if (base)
16
+ return base.replace(/\.[cm]?[jt]s$/, "");
17
+ }
18
+ }
19
+ return "agent";
20
+ }
21
+ /**
22
+ * Zero-config, one-line init. Reads `RETRACE_API_KEY` from the environment (or pass `apiKey`),
23
+ * auto-patches any installed provider SDK (OpenAI / Anthropic / Gemini), and auto-starts an
24
+ * ambient trace so every LLM + tool call is captured with NO `startSpan`/`trace()` boilerplate.
25
+ * The ambient trace is flushed and ended automatically on process exit.
26
+ *
27
+ * ```ts
28
+ * import { init } from "retrace-sdk";
29
+ * init(); // RETRACE_API_KEY from env
30
+ * // ...use openai / anthropic / gemini normally — auto-recorded
31
+ * ```
32
+ *
33
+ * Intended for scripts and single-run agents. Long-lived servers should keep using `trace()`
34
+ * per request so each request is its own trace.
35
+ */
36
+ export function init(opts = {}) {
37
+ const { name, metadata, autoTrace = true, ...cfg } = opts;
38
+ configure(cfg);
39
+ requireApiKey();
40
+ if (!getConfig().enabled || !autoTrace)
41
+ return null;
42
+ if (ambient)
43
+ return ambient;
44
+ const traceName = defaultName(name);
45
+ ambient = new TraceRecorder({ name: traceName, metadata });
46
+ ambient.start(traceName); // installs the provider interceptors against the ambient recorder
47
+ if (!exitHooked && typeof process !== "undefined") {
48
+ exitHooked = true;
49
+ const finish = (status) => {
50
+ const rec = ambient;
51
+ ambient = null;
52
+ try {
53
+ rec?.end(undefined, status);
54
+ }
55
+ catch { /* best effort on shutdown */ }
56
+ };
57
+ process.once("beforeExit", () => finish(TraceStatus.COMPLETED));
58
+ process.once("SIGINT", () => { finish(TraceStatus.COMPLETED); process.exit(130); });
59
+ process.once("SIGTERM", () => { finish(TraceStatus.COMPLETED); process.exit(143); });
60
+ process.once("uncaughtException", (err) => { finish(TraceStatus.FAILED); console.error(err); process.exit(1); });
61
+ }
62
+ return ambient;
63
+ }
64
+ /** The ambient recorder started by {@link init}, if any. */
65
+ export function getActiveRecorder() {
66
+ return ambient;
67
+ }
68
+ /** Manually end the ambient trace (e.g. with a final output) before process exit. Idempotent. */
69
+ export function shutdown(output, status = TraceStatus.COMPLETED) {
70
+ const rec = ambient;
71
+ ambient = null;
72
+ rec?.end(output, status);
73
+ }
@@ -1,6 +1,7 @@
1
1
  import { SpanType } from "../trace.js";
2
2
  import { genId, nowIso, truncateJson } from "../utils.js";
3
3
  import { isReplaying, consumeCassetteEntry } from "../replay.js";
4
+ import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
4
5
  const PRICING = {
5
6
  "claude-opus-4.7": [5.0, 25.0],
6
7
  "claude-opus-4.6": [5.0, 25.0],
@@ -25,9 +26,11 @@ let onSpanCallback = null;
25
26
  export function installAnthropicInterceptor(onSpan) {
26
27
  if (installed) {
27
28
  onSpanCallback = onSpan;
29
+ resetToolResultDedup();
28
30
  return;
29
31
  }
30
32
  onSpanCallback = onSpan;
33
+ resetToolResultDedup();
31
34
  import("@anthropic-ai/sdk").then((anthropicMod) => {
32
35
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
33
36
  const mod = anthropicMod;
@@ -50,6 +53,9 @@ function createPatchedCreate() {
50
53
  const model = opts.model || "unknown";
51
54
  const messages = opts.messages || [];
52
55
  const isStreaming = !!opts.stream;
56
+ const toolSchemas = extractToolSchemas("anthropic", opts.tools);
57
+ const sampling = extractSamplingParams("anthropic", opts);
58
+ const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
53
59
  const spanId = genId();
54
60
  const startedAt = nowIso();
55
61
  const startMs = Date.now();
@@ -83,6 +89,8 @@ function createPatchedCreate() {
83
89
  const chunks = [];
84
90
  let inputTokens = 0;
85
91
  let outputTokens = 0;
92
+ // Accumulate streamed tool_use blocks by index (content_block_start + input_json_delta).
93
+ const toolAcc = {};
86
94
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
87
95
  const originalIterator = result[Symbol.asyncIterator]();
88
96
  const wrappedStream = {
@@ -101,15 +109,29 @@ function createPatchedCreate() {
101
109
  input_tokens: inputTokens, output_tokens: outputTokens,
102
110
  cost: calcCost(model, inputTokens, outputTokens),
103
111
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
104
- metadata: { streaming: true },
112
+ metadata: { streaming: true, ...spanMeta },
105
113
  };
106
114
  onSpanCallback?.(span);
115
+ if (onSpanCallback) {
116
+ emitAnthropicToolResults(messages, onSpanCallback);
117
+ const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
118
+ emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
119
+ }
107
120
  return { value: undefined, done: true };
108
121
  }
109
122
  // Collect content_block_delta text
110
123
  if (value?.type === "content_block_delta" && value?.delta?.text) {
111
124
  chunks.push(value.delta.text);
112
125
  }
126
+ // Accumulate tool_use blocks (start carries id/name, input_json_delta streams args)
127
+ if (value?.type === "content_block_start" && value?.content_block?.type === "tool_use") {
128
+ toolAcc[value.index ?? 0] = { id: value.content_block.id, name: value.content_block.name, json: "" };
129
+ }
130
+ if (value?.type === "content_block_delta" && value?.delta?.type === "input_json_delta") {
131
+ const acc = toolAcc[value.index ?? 0];
132
+ if (acc && typeof value.delta.partial_json === "string")
133
+ acc.json += value.delta.partial_json;
134
+ }
113
135
  // Collect usage from message_delta
114
136
  if (value?.type === "message_delta" && value?.usage) {
115
137
  outputTokens = value.usage.output_tokens || outputTokens;
@@ -142,8 +164,14 @@ function createPatchedCreate() {
142
164
  input_tokens: inputTokens, output_tokens: outputTokens,
143
165
  cost: calcCost(model, inputTokens, outputTokens),
144
166
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
167
+ ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
145
168
  };
146
169
  onSpanCallback?.(span);
170
+ // Auto-capture tool usage (tool_use blocks in response, tool_result blocks in input).
171
+ if (onSpanCallback) {
172
+ emitAnthropicToolResults(messages, onSpanCallback);
173
+ emitAnthropicToolCalls(res?.content, spanId, model, onSpanCallback);
174
+ }
147
175
  return result;
148
176
  }
149
177
  catch (err) {
@@ -1,5 +1,6 @@
1
1
  import { SpanType } from "../trace.js";
2
2
  import { genId, nowIso, truncateJson } from "../utils.js";
3
+ import { emitGeminiToolCalls, emitGeminiToolResults, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
3
4
  const PRICING = {
4
5
  "gemini-3.1-flash-lite": [0.10, 0.40],
5
6
  "gemini-3.1-flash": [0.50, 3.0],
@@ -22,9 +23,11 @@ let onSpanCallback = null;
22
23
  export function installGeminiInterceptor(onSpan) {
23
24
  if (installed) {
24
25
  onSpanCallback = onSpan;
26
+ resetToolResultDedup();
25
27
  return;
26
28
  }
27
29
  onSpanCallback = onSpan;
30
+ resetToolResultDedup();
28
31
  import("@google/genai").then((genaiMod) => {
29
32
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
30
33
  const mod = genaiMod;
@@ -37,6 +40,9 @@ export function installGeminiInterceptor(onSpan) {
37
40
  const opts = args[0] || {};
38
41
  const model = opts.model || "unknown";
39
42
  const contents = opts.contents;
43
+ const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
44
+ const sampling = extractSamplingParams("gemini", opts);
45
+ const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
40
46
  const spanId = genId();
41
47
  const startedAt = nowIso();
42
48
  const startMs = Date.now();
@@ -63,8 +69,14 @@ export function installGeminiInterceptor(onSpan) {
63
69
  input_tokens: inputTokens, output_tokens: outputTokens,
64
70
  cost: calcCost(model, inputTokens, outputTokens),
65
71
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
72
+ ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
66
73
  };
67
74
  onSpanCallback?.(span);
75
+ // Auto-capture tool usage (functionCall parts in response, functionResponse in input).
76
+ if (onSpanCallback) {
77
+ emitGeminiToolResults(contents, onSpanCallback);
78
+ emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
79
+ }
68
80
  return result;
69
81
  }
70
82
  catch (err) {
@@ -3,6 +3,7 @@ import { genId, nowIso, truncateJson } from "../utils.js";
3
3
  import { isReplaying, consumeCassetteEntry } from "../replay.js";
4
4
  import { getConfig } from "../config.js";
5
5
  import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
6
+ import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
6
7
  /** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
7
8
  const FALLBACK_PRICING = {
8
9
  "gpt-5.5-pro": [30.0, 180.0],
@@ -63,9 +64,11 @@ let onSpanCallback = null;
63
64
  export function installOpenAIInterceptor(onSpan) {
64
65
  if (installed) {
65
66
  onSpanCallback = onSpan;
67
+ resetToolResultDedup();
66
68
  return;
67
69
  }
68
70
  onSpanCallback = onSpan;
71
+ resetToolResultDedup();
69
72
  import("openai").then((openaiMod) => {
70
73
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
71
74
  const mod = openaiMod;
@@ -106,6 +109,13 @@ function createPatchedCreate() {
106
109
  spanMetadata.vision = true;
107
110
  if (responseFormat)
108
111
  spanMetadata.structured_output = typeof responseFormat === "object" ? responseFormat.type || "json_schema" : responseFormat;
112
+ // Capture declared tool parameter schemas so the detection engine can validate tool args.
113
+ const toolSchemas = extractToolSchemas("openai", opts.tools);
114
+ if (toolSchemas)
115
+ spanMetadata.tool_schemas = toolSchemas;
116
+ const sampling = extractSamplingParams("openai", opts);
117
+ if (sampling)
118
+ spanMetadata.sampling = sampling;
109
119
  // During replay, return mocked response from cassette instead of calling the real API
110
120
  if (isReplaying()) {
111
121
  const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
@@ -137,6 +147,8 @@ function createPatchedCreate() {
137
147
  const chunks = [];
138
148
  let inputTokens = 0;
139
149
  let outputTokens = 0;
150
+ // Accumulate streamed tool calls by index (id/name arrive first, arguments stream in).
151
+ const toolAcc = {};
140
152
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
141
153
  const originalIterator = result[Symbol.asyncIterator]();
142
154
  const wrappedStream = {
@@ -156,15 +168,34 @@ function createPatchedCreate() {
156
168
  input_tokens: inputTokens, output_tokens: outputTokens,
157
169
  cost: calcCost(model, inputTokens, outputTokens),
158
170
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
159
- metadata: { streaming: true },
171
+ metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
160
172
  };
161
173
  onSpanCallback?.(span);
174
+ if (onSpanCallback) {
175
+ emitOpenAIToolResults(messages, onSpanCallback);
176
+ const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
177
+ emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
178
+ }
162
179
  return { value: undefined, done: true };
163
180
  }
164
181
  // Collect content delta
165
182
  const delta = value?.choices?.[0]?.delta?.content;
166
183
  if (delta)
167
184
  chunks.push(delta);
185
+ // Collect streamed tool-call deltas (function name/id, then argument fragments)
186
+ const tcDeltas = value?.choices?.[0]?.delta?.tool_calls;
187
+ if (Array.isArray(tcDeltas)) {
188
+ for (const tc of tcDeltas) {
189
+ const idx = typeof tc.index === "number" ? tc.index : 0;
190
+ const acc = (toolAcc[idx] ??= { args: "" });
191
+ if (tc.id)
192
+ acc.id = tc.id;
193
+ if (tc.function?.name)
194
+ acc.name = tc.function.name;
195
+ if (typeof tc.function?.arguments === "string")
196
+ acc.args += tc.function.arguments;
197
+ }
198
+ }
168
199
  // Collect usage from final chunk
169
200
  if (value?.usage) {
170
201
  inputTokens = value.usage.prompt_tokens || 0;
@@ -188,6 +219,12 @@ function createPatchedCreate() {
188
219
  const inputTokens = res?.usage?.prompt_tokens || 0;
189
220
  const outputTokens = res?.usage?.completion_tokens || 0;
190
221
  const output = res?.choices?.[0]?.message?.content || "";
222
+ // Extract token IDs and logprobs if available (requires logprobs: true in request)
223
+ const choiceLogprobs = res?.choices?.[0]?.logprobs?.content;
224
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
225
+ const tokenIds = choiceLogprobs?.map((t) => t.token_id ?? t.top_logprobs?.[0]?.token_id).filter(Boolean);
226
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
227
+ const logprobValues = choiceLogprobs?.map((t) => t.logprob).filter((v) => v !== undefined);
191
228
  const span = {
192
229
  id: spanId, trace_id: "", parent_id: null,
193
230
  span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
@@ -196,9 +233,17 @@ function createPatchedCreate() {
196
233
  input_tokens: inputTokens, output_tokens: outputTokens,
197
234
  cost: calcCost(model, inputTokens, outputTokens),
198
235
  duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
236
+ ...(tokenIds?.length ? { token_ids: tokenIds } : {}),
237
+ ...(logprobValues?.length ? { logprobs: logprobValues } : {}),
199
238
  ...(Object.keys(spanMetadata).length ? { metadata: spanMetadata } : {}),
200
239
  };
201
240
  onSpanCallback?.(span);
241
+ // Auto-capture tool usage: tool_result spans from the fed-back tool messages (deduped),
242
+ // tool_call spans from the model's requested calls (structured args).
243
+ if (onSpanCallback) {
244
+ emitOpenAIToolResults(messages, onSpanCallback);
245
+ emitOpenAIToolCalls(res?.choices?.[0]?.message, spanId, model, onSpanCallback);
246
+ }
202
247
  return result;
203
248
  }
204
249
  catch (err) {
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Tool-span extraction (Phase 1C).
3
+ *
4
+ * Provider interceptors historically emitted only a single `llm_call` span and dropped the
5
+ * model's tool calls entirely (the most common agent-failure class — tool errors + tool
6
+ * loops — was therefore invisible). These helpers derive structured `tool_call` and
7
+ * `tool_result` spans from a provider request/response so tool usage is captured with NO
8
+ * manual instrumentation.
9
+ *
10
+ * - `tool_call` spans come from the model's response (the calls it REQUESTED), with arguments
11
+ * parsed into structured JSON (not stringified into the output text).
12
+ * - `tool_result` spans come from the tool messages the caller feeds back on the NEXT request
13
+ * (the verbatim recorded result, including errors/empty). They are deduped by the provider
14
+ * tool-call id so they are emitted once, not on every subsequent turn.
15
+ *
16
+ * Detectors downstream (2C schema validation, 2D loop detection, 3C tool-output hallucination)
17
+ * depend on these spans + the `tool_call_id` linkage carried in metadata.
18
+ */
19
+ import { SpanData } from "../trace.js";
20
+ type Emit = (span: SpanData) => void;
21
+ export declare function resetToolResultDedup(): void;
22
+ /** Parse a JSON-string arguments payload into structured JSON; leave non-strings as-is. */
23
+ export declare function parseToolArgs(args: unknown): unknown;
24
+ /**
25
+ * Extract declared tool parameter schemas (name → JSON schema) from a provider request's tool
26
+ * definitions, so the detection engine can validate tool_call arguments against ground truth.
27
+ */
28
+ export declare function extractToolSchemas(provider: "openai" | "anthropic" | "gemini", tools: unknown): Record<string, unknown> | undefined;
29
+ /**
30
+ * Capture the sampling/determinism envelope from the request options so replay-divergence (2A)
31
+ * and regression replay (2E) can compare sampling config, not just the model. Normalizes provider
32
+ * field names to { temperature, top_p, top_k, seed, max_tokens }. Returns undefined if none set.
33
+ */
34
+ export declare function extractSamplingParams(provider: "openai" | "anthropic" | "gemini", opts: unknown): Record<string, unknown> | undefined;
35
+ interface OAToolCall {
36
+ id?: string;
37
+ type?: string;
38
+ function?: {
39
+ name?: string;
40
+ arguments?: unknown;
41
+ };
42
+ }
43
+ interface OAMessage {
44
+ role?: string;
45
+ content?: unknown;
46
+ tool_calls?: OAToolCall[];
47
+ tool_call_id?: string;
48
+ name?: string;
49
+ }
50
+ /** Emit tool_call spans from an OpenAI assistant response message. */
51
+ export declare function emitOpenAIToolCalls(message: OAMessage | undefined, parentId: string, model: string | undefined, emit: Emit): void;
52
+ /** Emit tool_result spans from OpenAI request messages (role:"tool"), deduped by tool_call_id. */
53
+ export declare function emitOpenAIToolResults(messages: OAMessage[] | undefined, emit: Emit): void;
54
+ interface AnthropicMessage {
55
+ role?: string;
56
+ content?: unknown;
57
+ }
58
+ /** Emit tool_call spans from Anthropic response content blocks (type:"tool_use"). */
59
+ export declare function emitAnthropicToolCalls(content: unknown, parentId: string, model: string | undefined, emit: Emit): void;
60
+ /** Emit tool_result spans from Anthropic request messages (content blocks type:"tool_result"). */
61
+ export declare function emitAnthropicToolResults(messages: AnthropicMessage[] | undefined, emit: Emit): void;
62
+ /** Emit tool_call spans from Gemini response candidate parts (functionCall). */
63
+ export declare function emitGeminiToolCalls(candidates: unknown, parentId: string, model: string | undefined, emit: Emit): void;
64
+ /** Emit tool_result spans from Gemini request contents (functionResponse parts), deduped. */
65
+ export declare function emitGeminiToolResults(contents: unknown, emit: Emit): void;
66
+ export {};
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Tool-span extraction (Phase 1C).
3
+ *
4
+ * Provider interceptors historically emitted only a single `llm_call` span and dropped the
5
+ * model's tool calls entirely (the most common agent-failure class — tool errors + tool
6
+ * loops — was therefore invisible). These helpers derive structured `tool_call` and
7
+ * `tool_result` spans from a provider request/response so tool usage is captured with NO
8
+ * manual instrumentation.
9
+ *
10
+ * - `tool_call` spans come from the model's response (the calls it REQUESTED), with arguments
11
+ * parsed into structured JSON (not stringified into the output text).
12
+ * - `tool_result` spans come from the tool messages the caller feeds back on the NEXT request
13
+ * (the verbatim recorded result, including errors/empty). They are deduped by the provider
14
+ * tool-call id so they are emitted once, not on every subsequent turn.
15
+ *
16
+ * Detectors downstream (2C schema validation, 2D loop detection, 3C tool-output hallucination)
17
+ * depend on these spans + the `tool_call_id` linkage carried in metadata.
18
+ */
19
+ import { SpanType } from "../trace.js";
20
+ import { genId, nowIso, truncateJson } from "../utils.js";
21
+ // Bounded dedup of emitted tool_result spans (keyed by provider tool-call id). Cleared when a
22
+ // new trace installs its callback (see reset call in each interceptor) to bound memory.
23
+ const emittedToolResultIds = new Set();
24
+ export function resetToolResultDedup() {
25
+ emittedToolResultIds.clear();
26
+ }
27
+ function markEmitted(id) {
28
+ if (emittedToolResultIds.has(id))
29
+ return false;
30
+ if (emittedToolResultIds.size > 5000)
31
+ emittedToolResultIds.clear();
32
+ emittedToolResultIds.add(id);
33
+ return true;
34
+ }
35
+ /** Parse a JSON-string arguments payload into structured JSON; leave non-strings as-is. */
36
+ export function parseToolArgs(args) {
37
+ if (typeof args !== "string")
38
+ return args;
39
+ try {
40
+ return JSON.parse(args);
41
+ }
42
+ catch {
43
+ return args;
44
+ }
45
+ }
46
+ /**
47
+ * Extract declared tool parameter schemas (name → JSON schema) from a provider request's tool
48
+ * definitions, so the detection engine can validate tool_call arguments against ground truth.
49
+ */
50
+ export function extractToolSchemas(provider, tools) {
51
+ const out = {};
52
+ if (provider === "gemini") {
53
+ // config.tools = [{ functionDeclarations: [{ name, parameters }] }]
54
+ if (!Array.isArray(tools))
55
+ return undefined;
56
+ for (const group of tools) {
57
+ for (const fd of group?.functionDeclarations || []) {
58
+ if (fd?.name && fd.parameters)
59
+ out[fd.name] = fd.parameters;
60
+ }
61
+ }
62
+ }
63
+ else if (Array.isArray(tools)) {
64
+ for (const t of tools) {
65
+ if (provider === "openai" && t.function?.name && t.function.parameters)
66
+ out[t.function.name] = t.function.parameters;
67
+ if (provider === "anthropic" && t.name && t.input_schema)
68
+ out[t.name] = t.input_schema;
69
+ }
70
+ }
71
+ return Object.keys(out).length ? out : undefined;
72
+ }
73
+ /**
74
+ * Capture the sampling/determinism envelope from the request options so replay-divergence (2A)
75
+ * and regression replay (2E) can compare sampling config, not just the model. Normalizes provider
76
+ * field names to { temperature, top_p, top_k, seed, max_tokens }. Returns undefined if none set.
77
+ */
78
+ export function extractSamplingParams(provider, opts) {
79
+ const o = (opts || {});
80
+ const cfg = (provider === "gemini" ? o.config : o) || {};
81
+ const out = {};
82
+ const put = (key, v) => { if (v !== undefined && v !== null)
83
+ out[key] = v; };
84
+ if (provider === "gemini") {
85
+ put("temperature", cfg.temperature);
86
+ put("top_p", cfg.topP);
87
+ put("top_k", cfg.topK);
88
+ put("seed", cfg.seed);
89
+ put("max_tokens", cfg.maxOutputTokens);
90
+ }
91
+ else {
92
+ put("temperature", cfg.temperature);
93
+ put("top_p", cfg.top_p);
94
+ put("top_k", cfg.top_k); // anthropic only
95
+ put("seed", cfg.seed); // openai only
96
+ put("max_tokens", cfg.max_tokens ?? cfg.max_completion_tokens);
97
+ }
98
+ return Object.keys(out).length ? out : undefined;
99
+ }
100
+ function toolCallSpan(name, input, parentId, model, toolCallId) {
101
+ const now = nowIso();
102
+ return {
103
+ id: genId(), trace_id: "", parent_id: parentId,
104
+ span_type: SpanType.TOOL_CALL, name: name || "tool",
105
+ ...(model ? { model } : {}),
106
+ input: truncateJson(input),
107
+ started_at: now, ended_at: now, duration_ms: 0,
108
+ ...(toolCallId ? { metadata: { tool_call_id: toolCallId } } : {}),
109
+ };
110
+ }
111
+ function toolResultSpan(name, output, isError, toolCallId) {
112
+ const now = nowIso();
113
+ return {
114
+ id: genId(), trace_id: "", parent_id: null,
115
+ span_type: SpanType.TOOL_RESULT, name: name || "tool_result",
116
+ output: truncateJson(output),
117
+ started_at: now, ended_at: now, duration_ms: 0,
118
+ ...(isError ? { error: typeof output === "string" ? output : JSON.stringify(output) } : {}),
119
+ ...(toolCallId ? { metadata: { tool_call_id: toolCallId } } : {}),
120
+ };
121
+ }
122
+ /** Emit tool_call spans from an OpenAI assistant response message. */
123
+ export function emitOpenAIToolCalls(message, parentId, model, emit) {
124
+ const calls = message?.tool_calls;
125
+ if (!Array.isArray(calls))
126
+ return;
127
+ for (const c of calls) {
128
+ const name = c.function?.name || "tool";
129
+ emit(toolCallSpan(name, parseToolArgs(c.function?.arguments), parentId, model, c.id));
130
+ }
131
+ }
132
+ /** Emit tool_result spans from OpenAI request messages (role:"tool"), deduped by tool_call_id. */
133
+ export function emitOpenAIToolResults(messages, emit) {
134
+ if (!Array.isArray(messages))
135
+ return;
136
+ // Map tool_call_id -> tool name from any assistant tool_calls in the same message list.
137
+ const nameById = new Map();
138
+ for (const m of messages) {
139
+ if (m?.role === "assistant" && Array.isArray(m.tool_calls)) {
140
+ for (const c of m.tool_calls)
141
+ if (c.id)
142
+ nameById.set(c.id, c.function?.name || "tool");
143
+ }
144
+ }
145
+ for (const m of messages) {
146
+ if (m?.role !== "tool" || !m.tool_call_id)
147
+ continue;
148
+ if (!markEmitted(`oa:${m.tool_call_id}`))
149
+ continue;
150
+ const content = m.content;
151
+ const isError = typeof content === "string" && /error|exception|failed/i.test(content);
152
+ emit(toolResultSpan(nameById.get(m.tool_call_id) || m.name || "tool_result", content, isError, m.tool_call_id));
153
+ }
154
+ }
155
+ /** Emit tool_call spans from Anthropic response content blocks (type:"tool_use"). */
156
+ export function emitAnthropicToolCalls(content, parentId, model, emit) {
157
+ if (!Array.isArray(content))
158
+ return;
159
+ for (const block of content) {
160
+ if (block?.type !== "tool_use")
161
+ continue;
162
+ emit(toolCallSpan(block.name || "tool", block.input, parentId, model, block.id));
163
+ }
164
+ }
165
+ /** Emit tool_result spans from Anthropic request messages (content blocks type:"tool_result"). */
166
+ export function emitAnthropicToolResults(messages, emit) {
167
+ if (!Array.isArray(messages))
168
+ return;
169
+ for (const m of messages) {
170
+ if (!Array.isArray(m?.content))
171
+ continue;
172
+ for (const block of m.content) {
173
+ if (block?.type !== "tool_result" || !block.tool_use_id)
174
+ continue;
175
+ if (!markEmitted(`anthropic:${block.tool_use_id}`))
176
+ continue;
177
+ emit(toolResultSpan("tool_result", block.content, !!block.is_error, block.tool_use_id));
178
+ }
179
+ }
180
+ }
181
+ /** Emit tool_call spans from Gemini response candidate parts (functionCall). */
182
+ export function emitGeminiToolCalls(candidates, parentId, model, emit) {
183
+ if (!Array.isArray(candidates))
184
+ return;
185
+ for (const cand of candidates) {
186
+ const parts = cand?.content?.parts;
187
+ if (!Array.isArray(parts))
188
+ continue;
189
+ for (const p of parts) {
190
+ if (!p.functionCall)
191
+ continue;
192
+ emit(toolCallSpan(p.functionCall.name || "tool", p.functionCall.args, parentId, model, p.functionCall.name));
193
+ }
194
+ }
195
+ }
196
+ /** Emit tool_result spans from Gemini request contents (functionResponse parts), deduped. */
197
+ export function emitGeminiToolResults(contents, emit) {
198
+ const list = Array.isArray(contents) ? contents : contents != null ? [contents] : [];
199
+ for (const c of list) {
200
+ const parts = c?.parts;
201
+ if (!Array.isArray(parts))
202
+ continue;
203
+ for (const p of parts) {
204
+ const fr = p.functionResponse;
205
+ if (!fr)
206
+ continue;
207
+ const key = `gemini:${fr.id || fr.name || JSON.stringify(fr.response).slice(0, 64)}`;
208
+ if (!markEmitted(key))
209
+ continue;
210
+ emit(toolResultSpan(fr.name || "tool_result", fr.response, false, fr.id || fr.name));
211
+ }
212
+ }
213
+ }
package/dist/trace.d.ts CHANGED
@@ -30,6 +30,8 @@ export interface SpanData {
30
30
  started_at: string;
31
31
  ended_at?: string;
32
32
  error?: string;
33
+ token_ids?: number[];
34
+ logprobs?: number[];
33
35
  }
34
36
  export interface TraceData {
35
37
  id: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "retrace-sdk",
3
- "version": "0.5.4",
3
+ "version": "0.9.0",
4
4
  "description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -9,6 +9,14 @@
9
9
  ".": {
10
10
  "types": "./dist/index.d.ts",
11
11
  "import": "./dist/index.js"
12
+ },
13
+ "./adapters/langchain": {
14
+ "types": "./dist/adapters/langchain.d.ts",
15
+ "import": "./dist/adapters/langchain.js"
16
+ },
17
+ "./adapters/vercel-ai": {
18
+ "types": "./dist/adapters/vercel-ai.d.ts",
19
+ "import": "./dist/adapters/vercel-ai.js"
12
20
  }
13
21
  },
14
22
  "files": ["dist", "README.md", "LICENSE"],
@@ -37,7 +45,8 @@
37
45
  "peerDependencies": {
38
46
  "@google/genai": ">=1.52.0",
39
47
  "openai": ">=4.0.0",
40
- "@anthropic-ai/sdk": ">=0.30.0"
48
+ "@anthropic-ai/sdk": ">=0.30.0",
49
+ "@langchain/core": ">=0.3.0"
41
50
  },
42
51
  "peerDependenciesMeta": {
43
52
  "@google/genai": {
@@ -48,6 +57,9 @@
48
57
  },
49
58
  "@anthropic-ai/sdk": {
50
59
  "optional": true
60
+ },
61
+ "@langchain/core": {
62
+ "optional": true
51
63
  }
52
64
  },
53
65
  "devDependencies": {