retrace-sdk 0.6.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.js +2 -0
- package/dist/adapters/langchain.d.ts +17 -0
- package/dist/adapters/langchain.js +111 -0
- package/dist/adapters/vercel-ai.d.ts +43 -0
- package/dist/adapters/vercel-ai.js +32 -0
- package/dist/golden.d.ts +6 -0
- package/dist/golden.js +17 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +5 -0
- package/dist/init.d.ts +30 -0
- package/dist/init.js +83 -0
- package/dist/interceptors/anthropic.js +29 -1
- package/dist/interceptors/gemini.js +12 -0
- package/dist/interceptors/openai.js +38 -1
- package/dist/interceptors/tool-spans.d.ts +66 -0
- package/dist/interceptors/tool-spans.js +213 -0
- package/dist/recorder.d.ts +2 -0
- package/dist/recorder.js +4 -0
- package/dist/trace.js +6 -1
- package/dist/transport.d.ts +6 -1
- package/dist/transport.js +46 -13
- package/package.json +14 -2
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { TraceRecorder } from "../recorder.js";
|
|
2
|
+
/**
|
|
3
|
+
* LangChain / LangGraph adapter for Retrace (JS).
|
|
4
|
+
*
|
|
5
|
+
* Returns a LangChain `BaseCallbackHandler` that records STRUCTURED tool / retriever / chain spans
|
|
6
|
+
* into the active Retrace trace, aligned with the detectors. LLM spans are already captured by the
|
|
7
|
+
* provider interceptors, so this handler does not emit `llm_call` spans.
|
|
8
|
+
*
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { init } from "retrace-sdk";
|
|
11
|
+
* import { createLangChainHandler } from "retrace-sdk/adapters/langchain";
|
|
12
|
+
* init();
|
|
13
|
+
* const cb = await createLangChainHandler();
|
|
14
|
+
* await app.invoke(input, { callbacks: [cb] }); // works for LangChain + LangGraph
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export declare function createLangChainHandler(recorder?: TraceRecorder): Promise<object>;
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { SpanType } from "../trace.js";
|
|
2
|
+
import { getActiveRecorder } from "../init.js";
|
|
3
|
+
/**
|
|
4
|
+
* LangChain / LangGraph adapter for Retrace (JS).
|
|
5
|
+
*
|
|
6
|
+
* Returns a LangChain `BaseCallbackHandler` that records STRUCTURED tool / retriever / chain spans
|
|
7
|
+
* into the active Retrace trace, aligned with the detectors. LLM spans are already captured by the
|
|
8
|
+
* provider interceptors, so this handler does not emit `llm_call` spans.
|
|
9
|
+
*
|
|
10
|
+
* ```ts
|
|
11
|
+
* import { init } from "retrace-sdk";
|
|
12
|
+
* import { createLangChainHandler } from "retrace-sdk/adapters/langchain";
|
|
13
|
+
* init();
|
|
14
|
+
* const cb = await createLangChainHandler();
|
|
15
|
+
* await app.invoke(input, { callbacks: [cb] }); // works for LangChain + LangGraph
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export async function createLangChainHandler(recorder) {
|
|
19
|
+
let BaseCallbackHandler;
|
|
20
|
+
try {
|
|
21
|
+
({ BaseCallbackHandler } = await import("@langchain/core/callbacks/base"));
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
throw new Error("@langchain/core is not installed. Run: npm install @langchain/core");
|
|
25
|
+
}
|
|
26
|
+
const open = new Map();
|
|
27
|
+
const rec = () => recorder ?? getActiveRecorder();
|
|
28
|
+
class RetraceHandler extends BaseCallbackHandler {
|
|
29
|
+
name = "retrace";
|
|
30
|
+
handleToolStart(tool, input, runId) {
|
|
31
|
+
const r = rec();
|
|
32
|
+
if (!r)
|
|
33
|
+
return;
|
|
34
|
+
open.set(runId, r.startSpan(tool?.name || "tool", SpanType.TOOL_CALL, input));
|
|
35
|
+
}
|
|
36
|
+
handleToolEnd(output, runId) {
|
|
37
|
+
const r = rec();
|
|
38
|
+
if (!r)
|
|
39
|
+
return;
|
|
40
|
+
const sb = open.get(runId);
|
|
41
|
+
if (sb) {
|
|
42
|
+
r.endSpan(sb, output);
|
|
43
|
+
open.delete(runId);
|
|
44
|
+
}
|
|
45
|
+
const tr = r.startSpan("tool_result", SpanType.TOOL_RESULT);
|
|
46
|
+
r.endSpan(tr, output);
|
|
47
|
+
}
|
|
48
|
+
handleToolError(err, runId) {
|
|
49
|
+
const r = rec();
|
|
50
|
+
if (!r)
|
|
51
|
+
return;
|
|
52
|
+
const sb = open.get(runId);
|
|
53
|
+
if (sb) {
|
|
54
|
+
r.endSpan(sb, undefined, err instanceof Error ? err.message : String(err));
|
|
55
|
+
open.delete(runId);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
handleRetrieverStart(_retriever, query, runId) {
|
|
59
|
+
const r = rec();
|
|
60
|
+
if (!r)
|
|
61
|
+
return;
|
|
62
|
+
open.set(runId, r.startSpan("retrieval", SpanType.ACTION, query));
|
|
63
|
+
}
|
|
64
|
+
handleRetrieverEnd(documents, runId) {
|
|
65
|
+
const r = rec();
|
|
66
|
+
if (!r)
|
|
67
|
+
return;
|
|
68
|
+
const sb = open.get(runId);
|
|
69
|
+
if (sb) {
|
|
70
|
+
const docs = (documents || []).map((d) => d?.pageContent ?? JSON.stringify(d));
|
|
71
|
+
r.endSpan(sb, { count: docs.length, documents: docs });
|
|
72
|
+
open.delete(runId);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
handleChainStart(chain, inputs, runId) {
|
|
76
|
+
const r = rec();
|
|
77
|
+
if (!r)
|
|
78
|
+
return;
|
|
79
|
+
const name = chain?.name || chain?.id?.[chain.id.length - 1] || "chain";
|
|
80
|
+
open.set(runId, r.startSpan(String(name), SpanType.REASONING, inputs));
|
|
81
|
+
}
|
|
82
|
+
handleChainEnd(outputs, runId) {
|
|
83
|
+
const r = rec();
|
|
84
|
+
if (!r)
|
|
85
|
+
return;
|
|
86
|
+
const sb = open.get(runId);
|
|
87
|
+
if (sb) {
|
|
88
|
+
r.endSpan(sb, outputs);
|
|
89
|
+
open.delete(runId);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
handleChainError(err, runId) {
|
|
93
|
+
const r = rec();
|
|
94
|
+
if (!r)
|
|
95
|
+
return;
|
|
96
|
+
const sb = open.get(runId);
|
|
97
|
+
if (sb) {
|
|
98
|
+
r.endSpan(sb, undefined, err instanceof Error ? err.message : String(err));
|
|
99
|
+
open.delete(runId);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
handleAgentAction(action) {
|
|
103
|
+
const r = rec();
|
|
104
|
+
if (!r)
|
|
105
|
+
return;
|
|
106
|
+
const sb = r.startSpan(String(action?.tool || "action"), SpanType.TOOL_CALL, action?.toolInput);
|
|
107
|
+
r.endSpan(sb, action?.log);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return new RetraceHandler();
|
|
111
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { TraceRecorder } from "../recorder.js";
|
|
2
|
+
/**
|
|
3
|
+
* Vercel AI SDK adapter for Retrace.
|
|
4
|
+
*
|
|
5
|
+
* The AI SDK talks to providers through its own `@ai-sdk/*` packages (not the raw OpenAI/Anthropic
|
|
6
|
+
* SDKs), so the provider interceptors don't see those calls. This adapter records the LLM step plus
|
|
7
|
+
* structured `tool_call` / `tool_result` spans from the AI SDK's per-step callback, aligned with the
|
|
8
|
+
* detectors. Drop it into `generateText` / `streamText`:
|
|
9
|
+
*
|
|
10
|
+
* ```ts
|
|
11
|
+
* import { init } from "retrace-sdk";
|
|
12
|
+
* import { retraceOnStepFinish } from "retrace-sdk/adapters/vercel-ai";
|
|
13
|
+
* init();
|
|
14
|
+
* await generateText({ model, prompt, tools, onStepFinish: retraceOnStepFinish() });
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
interface AISDKToolCall {
|
|
18
|
+
toolName?: string;
|
|
19
|
+
toolCallId?: string;
|
|
20
|
+
args?: unknown;
|
|
21
|
+
}
|
|
22
|
+
interface AISDKToolResult {
|
|
23
|
+
toolName?: string;
|
|
24
|
+
toolCallId?: string;
|
|
25
|
+
result?: unknown;
|
|
26
|
+
}
|
|
27
|
+
interface AISDKUsage {
|
|
28
|
+
promptTokens?: number;
|
|
29
|
+
completionTokens?: number;
|
|
30
|
+
totalTokens?: number;
|
|
31
|
+
}
|
|
32
|
+
export interface AISDKStep {
|
|
33
|
+
text?: string;
|
|
34
|
+
toolCalls?: AISDKToolCall[];
|
|
35
|
+
toolResults?: AISDKToolResult[];
|
|
36
|
+
usage?: AISDKUsage;
|
|
37
|
+
finishReason?: string;
|
|
38
|
+
}
|
|
39
|
+
/** Record one AI SDK step (LLM output + tool calls/results) into the trace. */
|
|
40
|
+
export declare function recordVercelStep(step: AISDKStep, recorder?: TraceRecorder): void;
|
|
41
|
+
/** Returns an `onStepFinish` callback for `generateText` / `streamText`. */
|
|
42
|
+
export declare function retraceOnStepFinish(recorder?: TraceRecorder): (step: AISDKStep) => void;
|
|
43
|
+
export {};
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { SpanType } from "../trace.js";
|
|
2
|
+
import { genId, nowIso } from "../utils.js";
|
|
3
|
+
import { getActiveRecorder } from "../init.js";
|
|
4
|
+
/** Record one AI SDK step (LLM output + tool calls/results) into the trace. */
|
|
5
|
+
export function recordVercelStep(step, recorder) {
|
|
6
|
+
const rec = recorder ?? getActiveRecorder();
|
|
7
|
+
if (!rec)
|
|
8
|
+
return;
|
|
9
|
+
// LLM step — the AI SDK call the provider interceptors don't see; emit with token usage.
|
|
10
|
+
const now = nowIso();
|
|
11
|
+
const llm = {
|
|
12
|
+
id: genId(), trace_id: "", parent_id: null,
|
|
13
|
+
span_type: SpanType.LLM_CALL, name: "ai.generate",
|
|
14
|
+
output: step.text ?? "",
|
|
15
|
+
input_tokens: step.usage?.promptTokens,
|
|
16
|
+
output_tokens: step.usage?.completionTokens,
|
|
17
|
+
started_at: now, ended_at: now,
|
|
18
|
+
};
|
|
19
|
+
rec.addSpan(llm);
|
|
20
|
+
for (const call of step.toolCalls ?? []) {
|
|
21
|
+
const sb = rec.startSpan(call.toolName || "tool", SpanType.TOOL_CALL, call.args);
|
|
22
|
+
rec.endSpan(sb, undefined);
|
|
23
|
+
}
|
|
24
|
+
for (const res of step.toolResults ?? []) {
|
|
25
|
+
const sb = rec.startSpan(res.toolName || "tool_result", SpanType.TOOL_RESULT, undefined);
|
|
26
|
+
rec.endSpan(sb, res.result);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/** Returns an `onStepFinish` callback for `generateText` / `streamText`. */
|
|
30
|
+
export function retraceOnStepFinish(recorder) {
|
|
31
|
+
return (step) => recordVercelStep(step, recorder);
|
|
32
|
+
}
|
package/dist/golden.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mark (or unmark) a recorded trace as a GOLDEN regression baseline (Phase 2E).
|
|
3
|
+
* Golden traces are the reference for regression replay — `compareToGolden` flags structural
|
|
4
|
+
* divergence in later runs as a regression.
|
|
5
|
+
*/
|
|
6
|
+
export declare function markGolden(traceId: string, golden?: boolean): Promise<void>;
|
package/dist/golden.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { getConfig, requireApiKey } from "./config.js";
|
|
2
|
+
/**
|
|
3
|
+
* Mark (or unmark) a recorded trace as a GOLDEN regression baseline (Phase 2E).
|
|
4
|
+
* Golden traces are the reference for regression replay — `compareToGolden` flags structural
|
|
5
|
+
* divergence in later runs as a regression.
|
|
6
|
+
*/
|
|
7
|
+
export async function markGolden(traceId, golden = true) {
|
|
8
|
+
requireApiKey();
|
|
9
|
+
const cfg = getConfig();
|
|
10
|
+
const res = await fetch(`${cfg.baseUrl}/api/v1/traces/${traceId}/golden`, {
|
|
11
|
+
method: "POST",
|
|
12
|
+
headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
|
|
13
|
+
body: JSON.stringify({ golden }),
|
|
14
|
+
});
|
|
15
|
+
if (!res.ok)
|
|
16
|
+
throw new Error(`markGolden failed: HTTP ${res.status}`);
|
|
17
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
export { configure, getConfig } from "./config.js";
|
|
2
|
+
export { init, getActiveRecorder, shutdown } from "./init.js";
|
|
3
|
+
export type { InitOptions } from "./init.js";
|
|
2
4
|
export { record, trace, TraceRecorder } from "./recorder.js";
|
|
3
5
|
export { SpanBuilder, TraceBuilder } from "./trace.js";
|
|
4
6
|
export type { SpanData, TraceData } from "./trace.js";
|
|
@@ -12,3 +14,7 @@ export type { ResumeCommand } from "./resume.js";
|
|
|
12
14
|
export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
|
|
13
15
|
export type { CassetteEntry, ReplayCommand } from "./replay.js";
|
|
14
16
|
export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
|
|
17
|
+
export { markGolden } from "./golden.js";
|
|
18
|
+
export { createLangChainHandler } from "./adapters/langchain.js";
|
|
19
|
+
export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
|
|
20
|
+
export type { AISDKStep } from "./adapters/vercel-ai.js";
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { configure, getConfig } from "./config.js";
|
|
2
|
+
export { init, getActiveRecorder, shutdown } from "./init.js";
|
|
2
3
|
export { record, trace, TraceRecorder } from "./recorder.js";
|
|
3
4
|
export { SpanBuilder, TraceBuilder } from "./trace.js";
|
|
4
5
|
export { SpanType, TraceStatus } from "./trace.js";
|
|
@@ -9,5 +10,9 @@ export { RetraceError, RetraceAuthError, RetraceCreditsExhaustedError, RetraceCo
|
|
|
9
10
|
export { registerResumable, handleResume } from "./resume.js";
|
|
10
11
|
export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
|
|
11
12
|
export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
|
|
13
|
+
export { markGolden } from "./golden.js";
|
|
14
|
+
// Framework adapters (5B) — drop-in instrumentation for LangChain/LangGraph + Vercel AI SDK.
|
|
15
|
+
export { createLangChainHandler } from "./adapters/langchain.js";
|
|
16
|
+
export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
|
|
12
17
|
// v0.5.0
|
|
13
18
|
// trigger
|
package/dist/init.d.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { type Config } from "./config.js";
|
|
2
|
+
import { TraceRecorder } from "./recorder.js";
|
|
3
|
+
import { TraceStatus } from "./trace.js";
|
|
4
|
+
export interface InitOptions extends Partial<Config> {
|
|
5
|
+
/** Name for the auto-started ambient trace. Defaults to RETRACE_TRACE_NAME, the entry script name, or "agent". */
|
|
6
|
+
name?: string;
|
|
7
|
+
metadata?: Record<string, unknown>;
|
|
8
|
+
/** Auto-start an ambient trace that captures every provider call (default true). Set false to only configure + patch. */
|
|
9
|
+
autoTrace?: boolean;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Zero-config, one-line init. Reads `RETRACE_API_KEY` from the environment (or pass `apiKey`),
|
|
13
|
+
* auto-patches any installed provider SDK (OpenAI / Anthropic / Gemini), and auto-starts an
|
|
14
|
+
* ambient trace so every LLM + tool call is captured with NO `startSpan`/`trace()` boilerplate.
|
|
15
|
+
* The ambient trace is flushed and ended automatically on process exit.
|
|
16
|
+
*
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { init } from "retrace-sdk";
|
|
19
|
+
* init(); // RETRACE_API_KEY from env
|
|
20
|
+
* // ...use openai / anthropic / gemini normally — auto-recorded
|
|
21
|
+
* ```
|
|
22
|
+
*
|
|
23
|
+
* Intended for scripts and single-run agents. Long-lived servers should keep using `trace()`
|
|
24
|
+
* per request so each request is its own trace.
|
|
25
|
+
*/
|
|
26
|
+
export declare function init(opts?: InitOptions): TraceRecorder | null;
|
|
27
|
+
/** The ambient recorder started by {@link init}, if any. */
|
|
28
|
+
export declare function getActiveRecorder(): TraceRecorder | null;
|
|
29
|
+
/** Manually end the ambient trace (e.g. with a final output) before process exit. Idempotent. */
|
|
30
|
+
export declare function shutdown(output?: unknown, status?: TraceStatus): void;
|
package/dist/init.js
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { configure, getConfig, requireApiKey } from "./config.js";
|
|
2
|
+
import { TraceRecorder, flushSharedTransport } from "./recorder.js";
|
|
3
|
+
import { TraceStatus } from "./trace.js";
|
|
4
|
+
let ambient = null;
|
|
5
|
+
let exitHooked = false;
|
|
6
|
+
function defaultName(explicit) {
|
|
7
|
+
if (explicit)
|
|
8
|
+
return explicit;
|
|
9
|
+
if (typeof process !== "undefined") {
|
|
10
|
+
if (process.env.RETRACE_TRACE_NAME)
|
|
11
|
+
return process.env.RETRACE_TRACE_NAME;
|
|
12
|
+
const argv1 = process.argv?.[1];
|
|
13
|
+
if (argv1) {
|
|
14
|
+
const base = argv1.split(/[\\/]/).pop();
|
|
15
|
+
if (base)
|
|
16
|
+
return base.replace(/\.[cm]?[jt]s$/, "");
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return "agent";
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Zero-config, one-line init. Reads `RETRACE_API_KEY` from the environment (or pass `apiKey`),
|
|
23
|
+
* auto-patches any installed provider SDK (OpenAI / Anthropic / Gemini), and auto-starts an
|
|
24
|
+
* ambient trace so every LLM + tool call is captured with NO `startSpan`/`trace()` boilerplate.
|
|
25
|
+
* The ambient trace is flushed and ended automatically on process exit.
|
|
26
|
+
*
|
|
27
|
+
* ```ts
|
|
28
|
+
* import { init } from "retrace-sdk";
|
|
29
|
+
* init(); // RETRACE_API_KEY from env
|
|
30
|
+
* // ...use openai / anthropic / gemini normally — auto-recorded
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* Intended for scripts and single-run agents. Long-lived servers should keep using `trace()`
|
|
34
|
+
* per request so each request is its own trace.
|
|
35
|
+
*/
|
|
36
|
+
export function init(opts = {}) {
|
|
37
|
+
const { name, metadata, autoTrace = true, ...cfg } = opts;
|
|
38
|
+
configure(cfg);
|
|
39
|
+
requireApiKey();
|
|
40
|
+
if (!getConfig().enabled || !autoTrace)
|
|
41
|
+
return null;
|
|
42
|
+
if (ambient)
|
|
43
|
+
return ambient;
|
|
44
|
+
const traceName = defaultName(name);
|
|
45
|
+
ambient = new TraceRecorder({ name: traceName, metadata });
|
|
46
|
+
ambient.start(traceName); // installs the provider interceptors against the ambient recorder
|
|
47
|
+
if (!exitHooked && typeof process !== "undefined") {
|
|
48
|
+
exitHooked = true;
|
|
49
|
+
const finish = (status) => {
|
|
50
|
+
const rec = ambient;
|
|
51
|
+
ambient = null;
|
|
52
|
+
try {
|
|
53
|
+
rec?.end(undefined, status);
|
|
54
|
+
}
|
|
55
|
+
catch { /* best effort on shutdown */ }
|
|
56
|
+
};
|
|
57
|
+
// On signal-triggered exits, process.exit() would otherwise kill the process before the
|
|
58
|
+
// final trace_ended is delivered over the network. End the trace, then await a transport
|
|
59
|
+
// drain (capped by a hard timeout so a hung network can't block shutdown) before exiting.
|
|
60
|
+
const finishAndExit = (status, code) => {
|
|
61
|
+
finish(status);
|
|
62
|
+
void Promise.race([
|
|
63
|
+
flushSharedTransport().catch(() => { }),
|
|
64
|
+
new Promise((r) => setTimeout(r, 3000)),
|
|
65
|
+
]).then(() => process.exit(code));
|
|
66
|
+
};
|
|
67
|
+
process.once("beforeExit", () => finish(TraceStatus.COMPLETED));
|
|
68
|
+
process.once("SIGINT", () => finishAndExit(TraceStatus.COMPLETED, 130));
|
|
69
|
+
process.once("SIGTERM", () => finishAndExit(TraceStatus.COMPLETED, 143));
|
|
70
|
+
process.once("uncaughtException", (err) => { console.error(err); finishAndExit(TraceStatus.FAILED, 1); });
|
|
71
|
+
}
|
|
72
|
+
return ambient;
|
|
73
|
+
}
|
|
74
|
+
/** The ambient recorder started by {@link init}, if any. */
|
|
75
|
+
export function getActiveRecorder() {
|
|
76
|
+
return ambient;
|
|
77
|
+
}
|
|
78
|
+
/** Manually end the ambient trace (e.g. with a final output) before process exit. Idempotent. */
|
|
79
|
+
export function shutdown(output, status = TraceStatus.COMPLETED) {
|
|
80
|
+
const rec = ambient;
|
|
81
|
+
ambient = null;
|
|
82
|
+
rec?.end(output, status);
|
|
83
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { SpanType } from "../trace.js";
|
|
2
2
|
import { genId, nowIso, truncateJson } from "../utils.js";
|
|
3
3
|
import { isReplaying, consumeCassetteEntry } from "../replay.js";
|
|
4
|
+
import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
|
|
4
5
|
const PRICING = {
|
|
5
6
|
"claude-opus-4.7": [5.0, 25.0],
|
|
6
7
|
"claude-opus-4.6": [5.0, 25.0],
|
|
@@ -25,9 +26,11 @@ let onSpanCallback = null;
|
|
|
25
26
|
export function installAnthropicInterceptor(onSpan) {
|
|
26
27
|
if (installed) {
|
|
27
28
|
onSpanCallback = onSpan;
|
|
29
|
+
resetToolResultDedup();
|
|
28
30
|
return;
|
|
29
31
|
}
|
|
30
32
|
onSpanCallback = onSpan;
|
|
33
|
+
resetToolResultDedup();
|
|
31
34
|
import("@anthropic-ai/sdk").then((anthropicMod) => {
|
|
32
35
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
33
36
|
const mod = anthropicMod;
|
|
@@ -50,6 +53,9 @@ function createPatchedCreate() {
|
|
|
50
53
|
const model = opts.model || "unknown";
|
|
51
54
|
const messages = opts.messages || [];
|
|
52
55
|
const isStreaming = !!opts.stream;
|
|
56
|
+
const toolSchemas = extractToolSchemas("anthropic", opts.tools);
|
|
57
|
+
const sampling = extractSamplingParams("anthropic", opts);
|
|
58
|
+
const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
|
|
53
59
|
const spanId = genId();
|
|
54
60
|
const startedAt = nowIso();
|
|
55
61
|
const startMs = Date.now();
|
|
@@ -83,6 +89,8 @@ function createPatchedCreate() {
|
|
|
83
89
|
const chunks = [];
|
|
84
90
|
let inputTokens = 0;
|
|
85
91
|
let outputTokens = 0;
|
|
92
|
+
// Accumulate streamed tool_use blocks by index (content_block_start + input_json_delta).
|
|
93
|
+
const toolAcc = {};
|
|
86
94
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
87
95
|
const originalIterator = result[Symbol.asyncIterator]();
|
|
88
96
|
const wrappedStream = {
|
|
@@ -101,15 +109,29 @@ function createPatchedCreate() {
|
|
|
101
109
|
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
102
110
|
cost: calcCost(model, inputTokens, outputTokens),
|
|
103
111
|
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
104
|
-
metadata: { streaming: true },
|
|
112
|
+
metadata: { streaming: true, ...spanMeta },
|
|
105
113
|
};
|
|
106
114
|
onSpanCallback?.(span);
|
|
115
|
+
if (onSpanCallback) {
|
|
116
|
+
emitAnthropicToolResults(messages, onSpanCallback);
|
|
117
|
+
const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
|
|
118
|
+
emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
|
|
119
|
+
}
|
|
107
120
|
return { value: undefined, done: true };
|
|
108
121
|
}
|
|
109
122
|
// Collect content_block_delta text
|
|
110
123
|
if (value?.type === "content_block_delta" && value?.delta?.text) {
|
|
111
124
|
chunks.push(value.delta.text);
|
|
112
125
|
}
|
|
126
|
+
// Accumulate tool_use blocks (start carries id/name, input_json_delta streams args)
|
|
127
|
+
if (value?.type === "content_block_start" && value?.content_block?.type === "tool_use") {
|
|
128
|
+
toolAcc[value.index ?? 0] = { id: value.content_block.id, name: value.content_block.name, json: "" };
|
|
129
|
+
}
|
|
130
|
+
if (value?.type === "content_block_delta" && value?.delta?.type === "input_json_delta") {
|
|
131
|
+
const acc = toolAcc[value.index ?? 0];
|
|
132
|
+
if (acc && typeof value.delta.partial_json === "string")
|
|
133
|
+
acc.json += value.delta.partial_json;
|
|
134
|
+
}
|
|
113
135
|
// Collect usage from message_delta
|
|
114
136
|
if (value?.type === "message_delta" && value?.usage) {
|
|
115
137
|
outputTokens = value.usage.output_tokens || outputTokens;
|
|
@@ -142,8 +164,14 @@ function createPatchedCreate() {
|
|
|
142
164
|
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
143
165
|
cost: calcCost(model, inputTokens, outputTokens),
|
|
144
166
|
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
167
|
+
...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
|
|
145
168
|
};
|
|
146
169
|
onSpanCallback?.(span);
|
|
170
|
+
// Auto-capture tool usage (tool_use blocks in response, tool_result blocks in input).
|
|
171
|
+
if (onSpanCallback) {
|
|
172
|
+
emitAnthropicToolResults(messages, onSpanCallback);
|
|
173
|
+
emitAnthropicToolCalls(res?.content, spanId, model, onSpanCallback);
|
|
174
|
+
}
|
|
147
175
|
return result;
|
|
148
176
|
}
|
|
149
177
|
catch (err) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { SpanType } from "../trace.js";
|
|
2
2
|
import { genId, nowIso, truncateJson } from "../utils.js";
|
|
3
|
+
import { emitGeminiToolCalls, emitGeminiToolResults, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
|
|
3
4
|
const PRICING = {
|
|
4
5
|
"gemini-3.1-flash-lite": [0.10, 0.40],
|
|
5
6
|
"gemini-3.1-flash": [0.50, 3.0],
|
|
@@ -22,9 +23,11 @@ let onSpanCallback = null;
|
|
|
22
23
|
export function installGeminiInterceptor(onSpan) {
|
|
23
24
|
if (installed) {
|
|
24
25
|
onSpanCallback = onSpan;
|
|
26
|
+
resetToolResultDedup();
|
|
25
27
|
return;
|
|
26
28
|
}
|
|
27
29
|
onSpanCallback = onSpan;
|
|
30
|
+
resetToolResultDedup();
|
|
28
31
|
import("@google/genai").then((genaiMod) => {
|
|
29
32
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
30
33
|
const mod = genaiMod;
|
|
@@ -37,6 +40,9 @@ export function installGeminiInterceptor(onSpan) {
|
|
|
37
40
|
const opts = args[0] || {};
|
|
38
41
|
const model = opts.model || "unknown";
|
|
39
42
|
const contents = opts.contents;
|
|
43
|
+
const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
|
|
44
|
+
const sampling = extractSamplingParams("gemini", opts);
|
|
45
|
+
const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
|
|
40
46
|
const spanId = genId();
|
|
41
47
|
const startedAt = nowIso();
|
|
42
48
|
const startMs = Date.now();
|
|
@@ -63,8 +69,14 @@ export function installGeminiInterceptor(onSpan) {
|
|
|
63
69
|
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
64
70
|
cost: calcCost(model, inputTokens, outputTokens),
|
|
65
71
|
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
72
|
+
...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
|
|
66
73
|
};
|
|
67
74
|
onSpanCallback?.(span);
|
|
75
|
+
// Auto-capture tool usage (functionCall parts in response, functionResponse in input).
|
|
76
|
+
if (onSpanCallback) {
|
|
77
|
+
emitGeminiToolResults(contents, onSpanCallback);
|
|
78
|
+
emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
|
|
79
|
+
}
|
|
68
80
|
return result;
|
|
69
81
|
}
|
|
70
82
|
catch (err) {
|
|
@@ -3,6 +3,7 @@ import { genId, nowIso, truncateJson } from "../utils.js";
|
|
|
3
3
|
import { isReplaying, consumeCassetteEntry } from "../replay.js";
|
|
4
4
|
import { getConfig } from "../config.js";
|
|
5
5
|
import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
|
|
6
|
+
import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
|
|
6
7
|
/** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
|
|
7
8
|
const FALLBACK_PRICING = {
|
|
8
9
|
"gpt-5.5-pro": [30.0, 180.0],
|
|
@@ -63,9 +64,11 @@ let onSpanCallback = null;
|
|
|
63
64
|
export function installOpenAIInterceptor(onSpan) {
|
|
64
65
|
if (installed) {
|
|
65
66
|
onSpanCallback = onSpan;
|
|
67
|
+
resetToolResultDedup();
|
|
66
68
|
return;
|
|
67
69
|
}
|
|
68
70
|
onSpanCallback = onSpan;
|
|
71
|
+
resetToolResultDedup();
|
|
69
72
|
import("openai").then((openaiMod) => {
|
|
70
73
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
71
74
|
const mod = openaiMod;
|
|
@@ -106,6 +109,13 @@ function createPatchedCreate() {
|
|
|
106
109
|
spanMetadata.vision = true;
|
|
107
110
|
if (responseFormat)
|
|
108
111
|
spanMetadata.structured_output = typeof responseFormat === "object" ? responseFormat.type || "json_schema" : responseFormat;
|
|
112
|
+
// Capture declared tool parameter schemas so the detection engine can validate tool args.
|
|
113
|
+
const toolSchemas = extractToolSchemas("openai", opts.tools);
|
|
114
|
+
if (toolSchemas)
|
|
115
|
+
spanMetadata.tool_schemas = toolSchemas;
|
|
116
|
+
const sampling = extractSamplingParams("openai", opts);
|
|
117
|
+
if (sampling)
|
|
118
|
+
spanMetadata.sampling = sampling;
|
|
109
119
|
// During replay, return mocked response from cassette instead of calling the real API
|
|
110
120
|
if (isReplaying()) {
|
|
111
121
|
const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
|
|
@@ -137,6 +147,8 @@ function createPatchedCreate() {
|
|
|
137
147
|
const chunks = [];
|
|
138
148
|
let inputTokens = 0;
|
|
139
149
|
let outputTokens = 0;
|
|
150
|
+
// Accumulate streamed tool calls by index (id/name arrive first, arguments stream in).
|
|
151
|
+
const toolAcc = {};
|
|
140
152
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
141
153
|
const originalIterator = result[Symbol.asyncIterator]();
|
|
142
154
|
const wrappedStream = {
|
|
@@ -156,15 +168,34 @@ function createPatchedCreate() {
|
|
|
156
168
|
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
157
169
|
cost: calcCost(model, inputTokens, outputTokens),
|
|
158
170
|
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
159
|
-
metadata: { streaming: true },
|
|
171
|
+
metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
|
|
160
172
|
};
|
|
161
173
|
onSpanCallback?.(span);
|
|
174
|
+
if (onSpanCallback) {
|
|
175
|
+
emitOpenAIToolResults(messages, onSpanCallback);
|
|
176
|
+
const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
|
|
177
|
+
emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
|
|
178
|
+
}
|
|
162
179
|
return { value: undefined, done: true };
|
|
163
180
|
}
|
|
164
181
|
// Collect content delta
|
|
165
182
|
const delta = value?.choices?.[0]?.delta?.content;
|
|
166
183
|
if (delta)
|
|
167
184
|
chunks.push(delta);
|
|
185
|
+
// Collect streamed tool-call deltas (function name/id, then argument fragments)
|
|
186
|
+
const tcDeltas = value?.choices?.[0]?.delta?.tool_calls;
|
|
187
|
+
if (Array.isArray(tcDeltas)) {
|
|
188
|
+
for (const tc of tcDeltas) {
|
|
189
|
+
const idx = typeof tc.index === "number" ? tc.index : 0;
|
|
190
|
+
const acc = (toolAcc[idx] ??= { args: "" });
|
|
191
|
+
if (tc.id)
|
|
192
|
+
acc.id = tc.id;
|
|
193
|
+
if (tc.function?.name)
|
|
194
|
+
acc.name = tc.function.name;
|
|
195
|
+
if (typeof tc.function?.arguments === "string")
|
|
196
|
+
acc.args += tc.function.arguments;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
168
199
|
// Collect usage from final chunk
|
|
169
200
|
if (value?.usage) {
|
|
170
201
|
inputTokens = value.usage.prompt_tokens || 0;
|
|
@@ -207,6 +238,12 @@ function createPatchedCreate() {
|
|
|
207
238
|
...(Object.keys(spanMetadata).length ? { metadata: spanMetadata } : {}),
|
|
208
239
|
};
|
|
209
240
|
onSpanCallback?.(span);
|
|
241
|
+
// Auto-capture tool usage: tool_result spans from the fed-back tool messages (deduped),
|
|
242
|
+
// tool_call spans from the model's requested calls (structured args).
|
|
243
|
+
if (onSpanCallback) {
|
|
244
|
+
emitOpenAIToolResults(messages, onSpanCallback);
|
|
245
|
+
emitOpenAIToolCalls(res?.choices?.[0]?.message, spanId, model, onSpanCallback);
|
|
246
|
+
}
|
|
210
247
|
return result;
|
|
211
248
|
}
|
|
212
249
|
catch (err) {
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-span extraction (Phase 1C).
|
|
3
|
+
*
|
|
4
|
+
* Provider interceptors historically emitted only a single `llm_call` span and dropped the
|
|
5
|
+
* model's tool calls entirely (the most common agent-failure class — tool errors + tool
|
|
6
|
+
* loops — was therefore invisible). These helpers derive structured `tool_call` and
|
|
7
|
+
* `tool_result` spans from a provider request/response so tool usage is captured with NO
|
|
8
|
+
* manual instrumentation.
|
|
9
|
+
*
|
|
10
|
+
* - `tool_call` spans come from the model's response (the calls it REQUESTED), with arguments
|
|
11
|
+
* parsed into structured JSON (not stringified into the output text).
|
|
12
|
+
* - `tool_result` spans come from the tool messages the caller feeds back on the NEXT request
|
|
13
|
+
* (the verbatim recorded result, including errors/empty). They are deduped by the provider
|
|
14
|
+
* tool-call id so they are emitted once, not on every subsequent turn.
|
|
15
|
+
*
|
|
16
|
+
* Detectors downstream (2C schema validation, 2D loop detection, 3C tool-output hallucination)
|
|
17
|
+
* depend on these spans + the `tool_call_id` linkage carried in metadata.
|
|
18
|
+
*/
|
|
19
|
+
import { SpanData } from "../trace.js";
|
|
20
|
+
type Emit = (span: SpanData) => void;
|
|
21
|
+
export declare function resetToolResultDedup(): void;
|
|
22
|
+
/** Parse a JSON-string arguments payload into structured JSON; leave non-strings as-is. */
|
|
23
|
+
export declare function parseToolArgs(args: unknown): unknown;
|
|
24
|
+
/**
|
|
25
|
+
* Extract declared tool parameter schemas (name → JSON schema) from a provider request's tool
|
|
26
|
+
* definitions, so the detection engine can validate tool_call arguments against ground truth.
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractToolSchemas(provider: "openai" | "anthropic" | "gemini", tools: unknown): Record<string, unknown> | undefined;
|
|
29
|
+
/**
|
|
30
|
+
* Capture the sampling/determinism envelope from the request options so replay-divergence (2A)
|
|
31
|
+
* and regression replay (2E) can compare sampling config, not just the model. Normalizes provider
|
|
32
|
+
* field names to { temperature, top_p, top_k, seed, max_tokens }. Returns undefined if none set.
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractSamplingParams(provider: "openai" | "anthropic" | "gemini", opts: unknown): Record<string, unknown> | undefined;
|
|
35
|
+
interface OAToolCall {
|
|
36
|
+
id?: string;
|
|
37
|
+
type?: string;
|
|
38
|
+
function?: {
|
|
39
|
+
name?: string;
|
|
40
|
+
arguments?: unknown;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
interface OAMessage {
|
|
44
|
+
role?: string;
|
|
45
|
+
content?: unknown;
|
|
46
|
+
tool_calls?: OAToolCall[];
|
|
47
|
+
tool_call_id?: string;
|
|
48
|
+
name?: string;
|
|
49
|
+
}
|
|
50
|
+
/** Emit tool_call spans from an OpenAI assistant response message. */
|
|
51
|
+
export declare function emitOpenAIToolCalls(message: OAMessage | undefined, parentId: string, model: string | undefined, emit: Emit): void;
|
|
52
|
+
/** Emit tool_result spans from OpenAI request messages (role:"tool"), deduped by tool_call_id. */
|
|
53
|
+
export declare function emitOpenAIToolResults(messages: OAMessage[] | undefined, emit: Emit): void;
|
|
54
|
+
interface AnthropicMessage {
|
|
55
|
+
role?: string;
|
|
56
|
+
content?: unknown;
|
|
57
|
+
}
|
|
58
|
+
/** Emit tool_call spans from Anthropic response content blocks (type:"tool_use"). */
|
|
59
|
+
export declare function emitAnthropicToolCalls(content: unknown, parentId: string, model: string | undefined, emit: Emit): void;
|
|
60
|
+
/** Emit tool_result spans from Anthropic request messages (content blocks type:"tool_result"). */
|
|
61
|
+
export declare function emitAnthropicToolResults(messages: AnthropicMessage[] | undefined, emit: Emit): void;
|
|
62
|
+
/** Emit tool_call spans from Gemini response candidate parts (functionCall). */
|
|
63
|
+
export declare function emitGeminiToolCalls(candidates: unknown, parentId: string, model: string | undefined, emit: Emit): void;
|
|
64
|
+
/** Emit tool_result spans from Gemini request contents (functionResponse parts), deduped. */
|
|
65
|
+
export declare function emitGeminiToolResults(contents: unknown, emit: Emit): void;
|
|
66
|
+
export {};
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-span extraction (Phase 1C).
|
|
3
|
+
*
|
|
4
|
+
* Provider interceptors historically emitted only a single `llm_call` span and dropped the
|
|
5
|
+
* model's tool calls entirely (the most common agent-failure class — tool errors + tool
|
|
6
|
+
* loops — was therefore invisible). These helpers derive structured `tool_call` and
|
|
7
|
+
* `tool_result` spans from a provider request/response so tool usage is captured with NO
|
|
8
|
+
* manual instrumentation.
|
|
9
|
+
*
|
|
10
|
+
* - `tool_call` spans come from the model's response (the calls it REQUESTED), with arguments
|
|
11
|
+
* parsed into structured JSON (not stringified into the output text).
|
|
12
|
+
* - `tool_result` spans come from the tool messages the caller feeds back on the NEXT request
|
|
13
|
+
* (the verbatim recorded result, including errors/empty). They are deduped by the provider
|
|
14
|
+
* tool-call id so they are emitted once, not on every subsequent turn.
|
|
15
|
+
*
|
|
16
|
+
* Detectors downstream (2C schema validation, 2D loop detection, 3C tool-output hallucination)
|
|
17
|
+
* depend on these spans + the `tool_call_id` linkage carried in metadata.
|
|
18
|
+
*/
|
|
19
|
+
import { SpanType } from "../trace.js";
|
|
20
|
+
import { genId, nowIso, truncateJson } from "../utils.js";
|
|
21
|
+
// Bounded dedup of emitted tool_result spans (keyed by provider tool-call id). Cleared when a
|
|
22
|
+
// new trace installs its callback (see reset call in each interceptor) to bound memory.
|
|
23
|
+
const emittedToolResultIds = new Set();
|
|
24
|
+
export function resetToolResultDedup() {
|
|
25
|
+
emittedToolResultIds.clear();
|
|
26
|
+
}
|
|
27
|
+
function markEmitted(id) {
|
|
28
|
+
if (emittedToolResultIds.has(id))
|
|
29
|
+
return false;
|
|
30
|
+
if (emittedToolResultIds.size > 5000)
|
|
31
|
+
emittedToolResultIds.clear();
|
|
32
|
+
emittedToolResultIds.add(id);
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
/** Parse a JSON-string arguments payload into structured JSON; leave non-strings as-is. */
|
|
36
|
+
export function parseToolArgs(args) {
|
|
37
|
+
if (typeof args !== "string")
|
|
38
|
+
return args;
|
|
39
|
+
try {
|
|
40
|
+
return JSON.parse(args);
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return args;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Extract declared tool parameter schemas (name → JSON schema) from a provider request's tool
|
|
48
|
+
* definitions, so the detection engine can validate tool_call arguments against ground truth.
|
|
49
|
+
*/
|
|
50
|
+
export function extractToolSchemas(provider, tools) {
|
|
51
|
+
const out = {};
|
|
52
|
+
if (provider === "gemini") {
|
|
53
|
+
// config.tools = [{ functionDeclarations: [{ name, parameters }] }]
|
|
54
|
+
if (!Array.isArray(tools))
|
|
55
|
+
return undefined;
|
|
56
|
+
for (const group of tools) {
|
|
57
|
+
for (const fd of group?.functionDeclarations || []) {
|
|
58
|
+
if (fd?.name && fd.parameters)
|
|
59
|
+
out[fd.name] = fd.parameters;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
else if (Array.isArray(tools)) {
|
|
64
|
+
for (const t of tools) {
|
|
65
|
+
if (provider === "openai" && t.function?.name && t.function.parameters)
|
|
66
|
+
out[t.function.name] = t.function.parameters;
|
|
67
|
+
if (provider === "anthropic" && t.name && t.input_schema)
|
|
68
|
+
out[t.name] = t.input_schema;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return Object.keys(out).length ? out : undefined;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Capture the sampling/determinism envelope from the request options so replay-divergence (2A)
|
|
75
|
+
* and regression replay (2E) can compare sampling config, not just the model. Normalizes provider
|
|
76
|
+
* field names to { temperature, top_p, top_k, seed, max_tokens }. Returns undefined if none set.
|
|
77
|
+
*/
|
|
78
|
+
export function extractSamplingParams(provider, opts) {
|
|
79
|
+
const o = (opts || {});
|
|
80
|
+
const cfg = (provider === "gemini" ? o.config : o) || {};
|
|
81
|
+
const out = {};
|
|
82
|
+
const put = (key, v) => { if (v !== undefined && v !== null)
|
|
83
|
+
out[key] = v; };
|
|
84
|
+
if (provider === "gemini") {
|
|
85
|
+
put("temperature", cfg.temperature);
|
|
86
|
+
put("top_p", cfg.topP);
|
|
87
|
+
put("top_k", cfg.topK);
|
|
88
|
+
put("seed", cfg.seed);
|
|
89
|
+
put("max_tokens", cfg.maxOutputTokens);
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
put("temperature", cfg.temperature);
|
|
93
|
+
put("top_p", cfg.top_p);
|
|
94
|
+
put("top_k", cfg.top_k); // anthropic only
|
|
95
|
+
put("seed", cfg.seed); // openai only
|
|
96
|
+
put("max_tokens", cfg.max_tokens ?? cfg.max_completion_tokens);
|
|
97
|
+
}
|
|
98
|
+
return Object.keys(out).length ? out : undefined;
|
|
99
|
+
}
|
|
100
|
+
function toolCallSpan(name, input, parentId, model, toolCallId) {
|
|
101
|
+
const now = nowIso();
|
|
102
|
+
return {
|
|
103
|
+
id: genId(), trace_id: "", parent_id: parentId,
|
|
104
|
+
span_type: SpanType.TOOL_CALL, name: name || "tool",
|
|
105
|
+
...(model ? { model } : {}),
|
|
106
|
+
input: truncateJson(input),
|
|
107
|
+
started_at: now, ended_at: now, duration_ms: 0,
|
|
108
|
+
...(toolCallId ? { metadata: { tool_call_id: toolCallId } } : {}),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
function toolResultSpan(name, output, isError, toolCallId) {
|
|
112
|
+
const now = nowIso();
|
|
113
|
+
return {
|
|
114
|
+
id: genId(), trace_id: "", parent_id: null,
|
|
115
|
+
span_type: SpanType.TOOL_RESULT, name: name || "tool_result",
|
|
116
|
+
output: truncateJson(output),
|
|
117
|
+
started_at: now, ended_at: now, duration_ms: 0,
|
|
118
|
+
...(isError ? { error: typeof output === "string" ? output : JSON.stringify(output) } : {}),
|
|
119
|
+
...(toolCallId ? { metadata: { tool_call_id: toolCallId } } : {}),
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
/** Emit tool_call spans from an OpenAI assistant response message. */
|
|
123
|
+
export function emitOpenAIToolCalls(message, parentId, model, emit) {
|
|
124
|
+
const calls = message?.tool_calls;
|
|
125
|
+
if (!Array.isArray(calls))
|
|
126
|
+
return;
|
|
127
|
+
for (const c of calls) {
|
|
128
|
+
const name = c.function?.name || "tool";
|
|
129
|
+
emit(toolCallSpan(name, parseToolArgs(c.function?.arguments), parentId, model, c.id));
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/** Emit tool_result spans from OpenAI request messages (role:"tool"), deduped by tool_call_id. */
|
|
133
|
+
export function emitOpenAIToolResults(messages, emit) {
|
|
134
|
+
if (!Array.isArray(messages))
|
|
135
|
+
return;
|
|
136
|
+
// Map tool_call_id -> tool name from any assistant tool_calls in the same message list.
|
|
137
|
+
const nameById = new Map();
|
|
138
|
+
for (const m of messages) {
|
|
139
|
+
if (m?.role === "assistant" && Array.isArray(m.tool_calls)) {
|
|
140
|
+
for (const c of m.tool_calls)
|
|
141
|
+
if (c.id)
|
|
142
|
+
nameById.set(c.id, c.function?.name || "tool");
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
for (const m of messages) {
|
|
146
|
+
if (m?.role !== "tool" || !m.tool_call_id)
|
|
147
|
+
continue;
|
|
148
|
+
if (!markEmitted(`oa:${m.tool_call_id}`))
|
|
149
|
+
continue;
|
|
150
|
+
const content = m.content;
|
|
151
|
+
const isError = typeof content === "string" && /error|exception|failed/i.test(content);
|
|
152
|
+
emit(toolResultSpan(nameById.get(m.tool_call_id) || m.name || "tool_result", content, isError, m.tool_call_id));
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
/** Emit tool_call spans from Anthropic response content blocks (type:"tool_use"). */
|
|
156
|
+
export function emitAnthropicToolCalls(content, parentId, model, emit) {
|
|
157
|
+
if (!Array.isArray(content))
|
|
158
|
+
return;
|
|
159
|
+
for (const block of content) {
|
|
160
|
+
if (block?.type !== "tool_use")
|
|
161
|
+
continue;
|
|
162
|
+
emit(toolCallSpan(block.name || "tool", block.input, parentId, model, block.id));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/** Emit tool_result spans from Anthropic request messages (content blocks type:"tool_result"). */
|
|
166
|
+
export function emitAnthropicToolResults(messages, emit) {
|
|
167
|
+
if (!Array.isArray(messages))
|
|
168
|
+
return;
|
|
169
|
+
for (const m of messages) {
|
|
170
|
+
if (!Array.isArray(m?.content))
|
|
171
|
+
continue;
|
|
172
|
+
for (const block of m.content) {
|
|
173
|
+
if (block?.type !== "tool_result" || !block.tool_use_id)
|
|
174
|
+
continue;
|
|
175
|
+
if (!markEmitted(`anthropic:${block.tool_use_id}`))
|
|
176
|
+
continue;
|
|
177
|
+
emit(toolResultSpan("tool_result", block.content, !!block.is_error, block.tool_use_id));
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
/** Emit tool_call spans from Gemini response candidate parts (functionCall). */
|
|
182
|
+
export function emitGeminiToolCalls(candidates, parentId, model, emit) {
|
|
183
|
+
if (!Array.isArray(candidates))
|
|
184
|
+
return;
|
|
185
|
+
for (const cand of candidates) {
|
|
186
|
+
const parts = cand?.content?.parts;
|
|
187
|
+
if (!Array.isArray(parts))
|
|
188
|
+
continue;
|
|
189
|
+
for (const p of parts) {
|
|
190
|
+
if (!p.functionCall)
|
|
191
|
+
continue;
|
|
192
|
+
emit(toolCallSpan(p.functionCall.name || "tool", p.functionCall.args, parentId, model, p.functionCall.name));
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
/** Emit tool_result spans from Gemini request contents (functionResponse parts), deduped. */
|
|
197
|
+
export function emitGeminiToolResults(contents, emit) {
|
|
198
|
+
const list = Array.isArray(contents) ? contents : contents != null ? [contents] : [];
|
|
199
|
+
for (const c of list) {
|
|
200
|
+
const parts = c?.parts;
|
|
201
|
+
if (!Array.isArray(parts))
|
|
202
|
+
continue;
|
|
203
|
+
for (const p of parts) {
|
|
204
|
+
const fr = p.functionResponse;
|
|
205
|
+
if (!fr)
|
|
206
|
+
continue;
|
|
207
|
+
const key = `gemini:${fr.id || fr.name || JSON.stringify(fr.response).slice(0, 64)}`;
|
|
208
|
+
if (!markEmitted(key))
|
|
209
|
+
continue;
|
|
210
|
+
emit(toolResultSpan(fr.name || "tool_result", fr.response, false, fr.id || fr.name));
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
package/dist/recorder.d.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { SpanBuilder, SpanData, SpanType, TraceStatus } from "./trace.js";
|
|
2
|
+
/** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
|
|
3
|
+
export declare function flushSharedTransport(): Promise<void>;
|
|
2
4
|
export interface RecordOptions {
|
|
3
5
|
name?: string;
|
|
4
6
|
input?: unknown;
|
package/dist/recorder.js
CHANGED
|
@@ -17,6 +17,10 @@ function getSharedTransport() {
|
|
|
17
17
|
}
|
|
18
18
|
return sharedTransport;
|
|
19
19
|
}
|
|
20
|
+
/** Drain the shared transport's in-flight data to the network (awaited on graceful shutdown). */
|
|
21
|
+
export async function flushSharedTransport() {
|
|
22
|
+
await sharedTransport?.flush();
|
|
23
|
+
}
|
|
20
24
|
export class TraceRecorder {
|
|
21
25
|
builder;
|
|
22
26
|
transport;
|
package/dist/trace.js
CHANGED
|
@@ -83,7 +83,12 @@ export class TraceBuilder {
|
|
|
83
83
|
return this.data;
|
|
84
84
|
}
|
|
85
85
|
addSpan(span) {
|
|
86
|
-
|
|
86
|
+
// Spans are streamed individually through the transport (and HTTPTransport keeps its own
|
|
87
|
+
// per-trace buffer for the batched POST), so this retained array is only an in-memory
|
|
88
|
+
// convenience and is never itself transmitted. Cap it so init()'s long-lived ambient
|
|
89
|
+
// trace can't accumulate spans for the life of the process (an unbounded memory leak).
|
|
90
|
+
if (this.data.spans.length < 1000)
|
|
91
|
+
this.data.spans.push(span);
|
|
87
92
|
this.data.total_tokens += (span.input_tokens || 0) + (span.output_tokens || 0);
|
|
88
93
|
this.data.total_cost += span.cost || 0;
|
|
89
94
|
}
|
package/dist/transport.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export interface Transport {
|
|
2
2
|
send(eventType: string, data: Record<string, unknown>): void;
|
|
3
3
|
close(): void;
|
|
4
|
+
/** Drain in-flight data to the network (awaited on graceful shutdown). */
|
|
5
|
+
flush(): Promise<void>;
|
|
4
6
|
}
|
|
5
7
|
export declare class WSTransport implements Transport {
|
|
6
8
|
private ws;
|
|
@@ -15,12 +17,15 @@ export declare class WSTransport implements Transport {
|
|
|
15
17
|
private flushQueue;
|
|
16
18
|
send(eventType: string, data: Record<string, unknown>): void;
|
|
17
19
|
close(): void;
|
|
20
|
+
/** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
|
|
21
|
+
* the process before exit. Best-effort with a hard timeout. */
|
|
22
|
+
flush(): Promise<void>;
|
|
18
23
|
}
|
|
19
24
|
export declare class HTTPTransport implements Transport {
|
|
20
25
|
private traceData;
|
|
21
26
|
private spans;
|
|
22
27
|
send(eventType: string, data: Record<string, unknown>): void;
|
|
23
|
-
flush(): void
|
|
28
|
+
flush(): Promise<void>;
|
|
24
29
|
private buildSpans;
|
|
25
30
|
close(): void;
|
|
26
31
|
}
|
package/dist/transport.js
CHANGED
|
@@ -98,6 +98,14 @@ export class WSTransport {
|
|
|
98
98
|
}
|
|
99
99
|
this.connected = false;
|
|
100
100
|
}
|
|
101
|
+
/** Wait for the socket's send buffer to drain so the final trace_ended actually leaves
|
|
102
|
+
* the process before exit. Best-effort with a hard timeout. */
|
|
103
|
+
async flush() {
|
|
104
|
+
const start = Date.now();
|
|
105
|
+
while (this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.bufferedAmount > 0 && Date.now() - start < 2000) {
|
|
106
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
107
|
+
}
|
|
108
|
+
}
|
|
101
109
|
}
|
|
102
110
|
export class HTTPTransport {
|
|
103
111
|
traceData = null;
|
|
@@ -112,28 +120,34 @@ export class HTTPTransport {
|
|
|
112
120
|
else if (eventType === "trace_ended") {
|
|
113
121
|
if (this.traceData)
|
|
114
122
|
Object.assign(this.traceData, data);
|
|
115
|
-
this.flush();
|
|
123
|
+
void this.flush();
|
|
116
124
|
}
|
|
117
125
|
}
|
|
118
|
-
flush() {
|
|
126
|
+
async flush() {
|
|
119
127
|
if (!this.traceData)
|
|
120
128
|
return;
|
|
121
129
|
const cfg = getConfig();
|
|
122
130
|
const url = `${cfg.baseUrl}/api/v1/traces`;
|
|
123
131
|
const body = { ...this.traceData, spans: this.buildSpans() };
|
|
124
132
|
const payload = JSON.stringify(body);
|
|
125
|
-
//
|
|
126
|
-
const attempt = (n, delay) => {
|
|
127
|
-
fetch(url, {
|
|
128
|
-
method: "POST",
|
|
129
|
-
headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
|
|
130
|
-
body: payload,
|
|
131
|
-
}).catch(() => { if (n < 3)
|
|
132
|
-
setTimeout(() => attempt(n + 1, delay * 2), delay); });
|
|
133
|
-
};
|
|
134
|
-
attempt(1, 1000);
|
|
133
|
+
// Clear first so a concurrent flush (e.g. trace_ended then shutdown drain) can't double-send.
|
|
135
134
|
this.traceData = null;
|
|
136
135
|
this.spans = [];
|
|
136
|
+
// Retry up to 3 times with exponential backoff; awaited so shutdown can drain it.
|
|
137
|
+
for (let n = 1; n <= 3; n++) {
|
|
138
|
+
try {
|
|
139
|
+
await fetch(url, {
|
|
140
|
+
method: "POST",
|
|
141
|
+
headers: { "x-retrace-key": cfg.apiKey, "Content-Type": "application/json" },
|
|
142
|
+
body: payload,
|
|
143
|
+
});
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
if (n < 3)
|
|
148
|
+
await new Promise((r) => setTimeout(r, 1000 * n));
|
|
149
|
+
}
|
|
150
|
+
}
|
|
137
151
|
}
|
|
138
152
|
buildSpans() {
|
|
139
153
|
const merged = new Map();
|
|
@@ -150,7 +164,7 @@ export class HTTPTransport {
|
|
|
150
164
|
return [...merged.values()];
|
|
151
165
|
}
|
|
152
166
|
close() {
|
|
153
|
-
this.flush();
|
|
167
|
+
void this.flush();
|
|
154
168
|
}
|
|
155
169
|
}
|
|
156
170
|
export function createTransport(mode = "auto") {
|
|
@@ -214,5 +228,24 @@ export function createTransport(mode = "auto") {
|
|
|
214
228
|
http.close();
|
|
215
229
|
}
|
|
216
230
|
},
|
|
231
|
+
async flush() {
|
|
232
|
+
if (!decided) {
|
|
233
|
+
// Never connected over WS — force the HTTP fallback and drain the buffer so the
|
|
234
|
+
// final trace isn't lost on shutdown.
|
|
235
|
+
decided = true;
|
|
236
|
+
useWs = false;
|
|
237
|
+
clearTimeout(fallbackTimer);
|
|
238
|
+
ws.close();
|
|
239
|
+
for (const item of buffer.splice(0))
|
|
240
|
+
http.send(item.eventType, item.data);
|
|
241
|
+
await http.flush();
|
|
242
|
+
}
|
|
243
|
+
else if (useWs) {
|
|
244
|
+
await ws.flush();
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
await http.flush();
|
|
248
|
+
}
|
|
249
|
+
},
|
|
217
250
|
};
|
|
218
251
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "retrace-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -9,6 +9,14 @@
|
|
|
9
9
|
".": {
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"import": "./dist/index.js"
|
|
12
|
+
},
|
|
13
|
+
"./adapters/langchain": {
|
|
14
|
+
"types": "./dist/adapters/langchain.d.ts",
|
|
15
|
+
"import": "./dist/adapters/langchain.js"
|
|
16
|
+
},
|
|
17
|
+
"./adapters/vercel-ai": {
|
|
18
|
+
"types": "./dist/adapters/vercel-ai.d.ts",
|
|
19
|
+
"import": "./dist/adapters/vercel-ai.js"
|
|
12
20
|
}
|
|
13
21
|
},
|
|
14
22
|
"files": ["dist", "README.md", "LICENSE"],
|
|
@@ -37,7 +45,8 @@
|
|
|
37
45
|
"peerDependencies": {
|
|
38
46
|
"@google/genai": ">=1.52.0",
|
|
39
47
|
"openai": ">=4.0.0",
|
|
40
|
-
"@anthropic-ai/sdk": ">=0.30.0"
|
|
48
|
+
"@anthropic-ai/sdk": ">=0.30.0",
|
|
49
|
+
"@langchain/core": ">=0.3.0"
|
|
41
50
|
},
|
|
42
51
|
"peerDependenciesMeta": {
|
|
43
52
|
"@google/genai": {
|
|
@@ -48,6 +57,9 @@
|
|
|
48
57
|
},
|
|
49
58
|
"@anthropic-ai/sdk": {
|
|
50
59
|
"optional": true
|
|
60
|
+
},
|
|
61
|
+
"@langchain/core": {
|
|
62
|
+
"optional": true
|
|
51
63
|
}
|
|
52
64
|
},
|
|
53
65
|
"devDependencies": {
|