retrace-sdk 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.d.ts CHANGED
@@ -11,6 +11,10 @@ export interface Config {
11
11
  * request/response only (recommended for short-lived scripts and serverless — it never
12
12
  * holds an open socket and always surfaces upload errors); "ws" forces WebSocket. */
13
13
  transport: "auto" | "ws" | "http";
14
+ /** Called with a STRUCTURED signal when the server signals credits_exhausted | rate_limited |
15
+ * halt | error. Branch on `signal.code`; use `signal.retryable`/`signal.fatal` to decide
16
+ * behavior. Defaults to a throttled console warning so signals are never silently dropped. */
17
+ onError?: (signal: import("./errors.js").RetraceServerSignal) => void;
14
18
  }
15
19
  export declare function configure(opts: Partial<Config>): Config;
16
20
  export declare function requireApiKey(): string;
package/dist/config.js CHANGED
@@ -17,6 +17,13 @@ export function configure(opts) {
17
17
  if (opts.baseUrl && !opts.wsUrl) {
18
18
  config.wsUrl = config.baseUrl.replace("https://", "wss://").replace("http://", "ws://");
19
19
  }
20
+ // Eagerly install provider interceptors so clients constructed AFTER configure() are patched.
21
+ // @google/genai binds generateContent as an own instance property, and our accessor only wraps
22
+ // instances built after install — so install must precede client construction. Fire-and-forget;
23
+ // the dynamic import resolves before the first awaited LLM call in any real async flow.
24
+ if (config.enabled) {
25
+ void import("./interceptors/install.js").then((m) => m.ensureInterceptorsInstalled()).catch(() => { });
26
+ }
20
27
  return config;
21
28
  }
22
29
  export function requireApiKey() {
package/dist/errors.d.ts CHANGED
@@ -14,3 +14,25 @@ export declare class RetraceRateLimitError extends RetraceError {
14
14
  retryAfter: number;
15
15
  constructor(retryAfter: number);
16
16
  }
17
+ /**
18
+ * Structured server-originated signal handed to `onError`. Actionable WITHOUT string-matching:
19
+ * branch on `code`, decide retry from `retryable`, decide whether recording is still alive from
20
+ * `fatal`. (A raw message alone forces the user to string-match — this type exists to avoid that.)
21
+ */
22
+ export type RetraceSignalCode = "credits_exhausted" | "rate_limited" | "halt" | "error";
23
+ export interface RetraceServerSignal {
24
+ /** Machine-readable category — branch on THIS, never on `message`. */
25
+ code: RetraceSignalCode;
26
+ /** Human-readable detail from the server. */
27
+ message: string;
28
+ /** Will retrying / backing off plausibly succeed? rate_limited=true; credits/halt/error=false. */
29
+ retryable: boolean;
30
+ /** Did this STOP recording? halt=true (transport closed); others leave recording alive. */
31
+ fatal: boolean;
32
+ }
33
+ /**
34
+ * Map a raw server frame to a structured signal. Single source of truth for category + retryable +
35
+ * fatal, shared by the WS dispatch. Kept here (not inline in the dispatch) so TS and Python classify
36
+ * identically and the CI gate can assert the mapping.
37
+ */
38
+ export declare function classifyServerSignal(rawType: string, message: string): RetraceServerSignal;
package/dist/errors.js CHANGED
@@ -14,3 +14,21 @@ export class RetraceRateLimitError extends RetraceError {
14
14
  retryAfter;
15
15
  constructor(retryAfter) { super(`Rate limited. Retry after ${retryAfter}s`); this.name = "RetraceRateLimitError"; this.retryAfter = retryAfter; }
16
16
  }
17
+ /**
18
+ * Map a raw server frame to a structured signal. Single source of truth for category + retryable +
19
+ * fatal, shared by the WS dispatch. Kept here (not inline in the dispatch) so TS and Python classify
20
+ * identically and the CI gate can assert the mapping.
21
+ */
22
+ export function classifyServerSignal(rawType, message) {
23
+ if (rawType === "halt") {
24
+ return { code: "halt", message: message || "Guardrail triggered", retryable: false, fatal: true };
25
+ }
26
+ // rawType === "error" (or anything else carrying an error string)
27
+ if (message?.includes("limit reached")) {
28
+ return { code: "credits_exhausted", message, retryable: false, fatal: false };
29
+ }
30
+ if (message?.includes("Rate limit")) {
31
+ return { code: "rate_limited", message, retryable: true, fatal: false };
32
+ }
33
+ return { code: "error", message: message || "Server error", retryable: false, fatal: false };
34
+ }
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@ export { configure, getConfig } from "./config.js";
2
2
  export { init, getActiveRecorder, shutdown } from "./init.js";
3
3
  export type { InitOptions } from "./init.js";
4
4
  export { record, trace, TraceRecorder } from "./recorder.js";
5
+ export { stream } from "./stream.js";
5
6
  export { SpanBuilder, TraceBuilder } from "./trace.js";
6
7
  export type { SpanData, TraceData } from "./trace.js";
7
8
  export { SpanType, TraceStatus } from "./trace.js";
@@ -13,7 +14,7 @@ export { registerResumable, handleResume } from "./resume.js";
13
14
  export type { ResumeCommand } from "./resume.js";
14
15
  export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
15
16
  export type { CassetteEntry, ReplayCommand } from "./replay.js";
16
- export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
17
+ export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent, withTraceContext } from "./traceparent.js";
17
18
  export { markGolden } from "./golden.js";
18
19
  export { createLangChainHandler } from "./adapters/langchain.js";
19
20
  export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  export { configure, getConfig } from "./config.js";
2
2
  export { init, getActiveRecorder, shutdown } from "./init.js";
3
3
  export { record, trace, TraceRecorder } from "./recorder.js";
4
+ export { stream } from "./stream.js";
4
5
  export { SpanBuilder, TraceBuilder } from "./trace.js";
5
6
  export { SpanType, TraceStatus } from "./trace.js";
6
7
  export { installGeminiInterceptor, uninstallGeminiInterceptor } from "./interceptors/gemini.js";
@@ -9,10 +10,16 @@ export { installAnthropicInterceptor, uninstallAnthropicInterceptor } from "./in
9
10
  export { RetraceError, RetraceAuthError, RetraceCreditsExhaustedError, RetraceConnectionError, RetraceRateLimitError } from "./errors.js";
10
11
  export { registerResumable, handleResume } from "./resume.js";
11
12
  export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
12
- export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
13
+ export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent, withTraceContext } from "./traceparent.js";
13
14
  export { markGolden } from "./golden.js";
14
15
  // Framework adapters (5B) — drop-in instrumentation for LangChain/LangGraph + Vercel AI SDK.
15
16
  export { createLangChainHandler } from "./adapters/langchain.js";
16
17
  export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
18
+ // Patch provider SDKs at import (fire-and-forget; NO top-level await → CJS/bundler-safe). The Gemini
19
+ // interceptor patches RETROACTIVE prototype methods (generateContentInternal/...Stream), so capture
20
+ // works regardless of when the user constructs their client — including module-level clients built
21
+ // in the same tick. This matches the Python SDK: no ordering contract, no ready() footgun.
22
+ import { ensureInterceptorsInstalled } from "./interceptors/install.js";
23
+ void ensureInterceptorsInstalled();
17
24
  // v0.5.0
18
25
  // trigger
package/dist/init.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { configure, getConfig, requireApiKey } from "./config.js";
2
- import { TraceRecorder, flushSharedTransport } from "./recorder.js";
2
+ import { TraceRecorder, drainSharedTransportOnExit } from "./recorder.js";
3
+ import { onProcessExit } from "./transport.js";
3
4
  import { TraceStatus } from "./trace.js";
4
5
  let ambient = null;
5
6
  let exitHooked = false;
@@ -46,28 +47,34 @@ export function init(opts = {}) {
46
47
  ambient.start(traceName); // installs the provider interceptors against the ambient recorder
47
48
  if (!exitHooked && typeof process !== "undefined") {
48
49
  exitHooked = true;
49
- const finish = (status) => {
50
+ const finish = (status, terminatedEarly = false) => {
50
51
  const rec = ambient;
51
52
  ambient = null;
52
53
  try {
53
- rec?.end(undefined, status);
54
+ rec?.end(undefined, status, { terminatedEarly });
54
55
  }
55
56
  catch { /* best effort on shutdown */ }
56
57
  };
57
- // On signal-triggered exits, process.exit() would otherwise kill the process before the
58
- // final trace_ended is delivered over the network. End the trace, then await a transport
59
- // drain (capped by a hard timeout so a hung network can't block shutdown) before exiting.
60
- const finishAndExit = (status, code) => {
61
- finish(status);
58
+ // Finish the ambient trace as a pre-exit hook: registerProcessExitFlush (in recorder.ts) runs
59
+ // this BEFORE draining the transport, so the final trace_ended is in the buffer for the
60
+ // HTTP one-shot and signal ownership (sole-listener-flush-then-exit vs user-owns-exit) is
61
+ // handled there in one place, not duplicated here.
62
+ //
63
+ // Only a graceful exit (event loop emptied = the program finished its work) produces a CLEAN
64
+ // terminal. Signal/uncaught exits interrupted the run mid-flight, so the synthesized terminal
65
+ // is marked terminated_early — otherwise we'd manufacture a clean-looking terminal for a
66
+ // truncated run and defeat the replay-guard's no-terminal rule.
67
+ onProcessExit((reason) => finish(reason === "uncaught" ? TraceStatus.FAILED : TraceStatus.COMPLETED, reason !== "graceful"));
68
+ // uncaughtException is not covered by registerProcessExitFlush (it's status-specific and must
69
+ // exit non-zero): finish FAILED + terminated_early, drain best-effort within a hard cap, then exit.
70
+ process.once("uncaughtException", (err) => {
71
+ console.error(err);
72
+ finish(TraceStatus.FAILED, true);
62
73
  void Promise.race([
63
- flushSharedTransport().catch(() => { }),
74
+ drainSharedTransportOnExit(1500).catch(() => { }),
64
75
  new Promise((r) => setTimeout(r, 3000)),
65
- ]).then(() => process.exit(code));
66
- };
67
- process.once("beforeExit", () => finish(TraceStatus.COMPLETED));
68
- process.once("SIGINT", () => finishAndExit(TraceStatus.COMPLETED, 130));
69
- process.once("SIGTERM", () => finishAndExit(TraceStatus.COMPLETED, 143));
70
- process.once("uncaughtException", (err) => { console.error(err); finishAndExit(TraceStatus.FAILED, 1); });
76
+ ]).then(() => process.exit(1));
77
+ });
71
78
  }
72
79
  return ambient;
73
80
  }
@@ -0,0 +1,25 @@
1
+ import type { SpanData } from "../trace.js";
2
+ export type SpanCallback = (span: SpanData) => void;
3
+ /** A recorder-side sink for the two-phase streaming-span lifecycle (open at invocation, finalize
4
+ * once at clean-drain / break / error / trace-end / exit). Routed like SpanCallback. */
5
+ export interface OpenSpanSink {
6
+ registerOpenSpan(spanId: string, finalize: (reason: "complete" | "partial") => void): void;
7
+ unregisterOpenSpan(spanId: string): void;
8
+ }
9
+ /** Stable interceptor callback — routes an intercepted span to the recorder active in this context. */
10
+ export declare function dispatchInterceptedSpan(span: SpanData): void;
11
+ /** Capture the span sink active in THIS context (synchronously, at invocation) so a deferred
12
+ * finalizer can emit to the right recorder even when later called from a context where the ALS
13
+ * store is absent (the AFC layer's .return(), trace-end, exit-flush). */
14
+ export declare function captureActiveSpanEmit(): SpanCallback | null;
15
+ /** Register an open streaming span's finalizer with the active recorder (two-phase, model (b)). */
16
+ export declare function dispatchRegisterOpenSpan(spanId: string, finalize: (reason: "complete" | "partial") => void): void;
17
+ /** Drop an open span's finalizer once it has been finalized in-band. */
18
+ export declare function dispatchUnregisterOpenSpan(spanId: string): void;
19
+ /** Run `fn` with `cb` as the active intercepted-span handler for its async context (fully isolated). */
20
+ export declare function runWithActiveRecorder<T>(cb: SpanCallback, fn: () => T, sink?: OpenSpanSink): T;
21
+ /** Set the imperative-API fallback handler. Returns the PREVIOUS handler so callers can restore
22
+ * it (e.g. a nested trace() must not wipe the ambient init() fallback). Pass null to clear. */
23
+ export declare function setActiveRecorderFallback(cb: SpanCallback | null, sink?: OpenSpanSink | null): SpanCallback | null;
24
+ /** The current imperative-API open-span sink (so a nested trace can save/restore it). */
25
+ export declare function currentFallbackSink(): OpenSpanSink | null;
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Context-isolated routing for auto-instrumented spans (mirrors the Python `_dispatch.py`
3
+ * ContextVar dispatcher).
4
+ *
5
+ * The interceptors (openai/anthropic/gemini) are patched globally and invoke ONE stable
6
+ * dispatcher. The target recorder for the current async context lives in an AsyncLocalStorage
7
+ * store, so concurrent traces on a server each resolve their own recorder — instead of a
8
+ * module-global, last-writer-wins callback that cross-routed intercepted spans to whichever
9
+ * trace was created most recently.
10
+ */
11
+ import { AsyncLocalStorage } from "async_hooks";
12
+ const activeRecorder = new AsyncLocalStorage();
13
+ const activeOpenSink = new AsyncLocalStorage();
14
+ // Fallback for the imperative record()/start()/end() API used outside a runWithActiveRecorder
15
+ // scope. Last-writer-wins (documented limitation for purely imperative concurrent traces).
16
+ let fallbackCb = null;
17
+ let fallbackSink = null;
18
+ /** Stable interceptor callback — routes an intercepted span to the recorder active in this context. */
19
+ export function dispatchInterceptedSpan(span) {
20
+ const cb = activeRecorder.getStore() ?? fallbackCb;
21
+ cb?.(span);
22
+ }
23
+ /** Capture the span sink active in THIS context (synchronously, at invocation) so a deferred
24
+ * finalizer can emit to the right recorder even when later called from a context where the ALS
25
+ * store is absent (the AFC layer's .return(), trace-end, exit-flush). */
26
+ export function captureActiveSpanEmit() {
27
+ return activeRecorder.getStore() ?? fallbackCb;
28
+ }
29
+ /** Register an open streaming span's finalizer with the active recorder (two-phase, model (b)). */
30
+ export function dispatchRegisterOpenSpan(spanId, finalize) {
31
+ const sink = activeOpenSink.getStore() ?? fallbackSink;
32
+ sink?.registerOpenSpan(spanId, finalize);
33
+ }
34
+ /** Drop an open span's finalizer once it has been finalized in-band. */
35
+ export function dispatchUnregisterOpenSpan(spanId) {
36
+ const sink = activeOpenSink.getStore() ?? fallbackSink;
37
+ sink?.unregisterOpenSpan(spanId);
38
+ }
39
+ /** Run `fn` with `cb` as the active intercepted-span handler for its async context (fully isolated). */
40
+ export function runWithActiveRecorder(cb, fn, sink) {
41
+ if (sink)
42
+ return activeRecorder.run(cb, () => activeOpenSink.run(sink, fn));
43
+ return activeRecorder.run(cb, fn);
44
+ }
45
+ /** Set the imperative-API fallback handler. Returns the PREVIOUS handler so callers can restore
46
+ * it (e.g. a nested trace() must not wipe the ambient init() fallback). Pass null to clear. */
47
+ export function setActiveRecorderFallback(cb, sink) {
48
+ const prev = fallbackCb;
49
+ fallbackCb = cb;
50
+ if (sink !== undefined)
51
+ fallbackSink = sink;
52
+ return prev;
53
+ }
54
+ /** The current imperative-API open-span sink (so a nested trace can save/restore it). */
55
+ export function currentFallbackSink() {
56
+ return fallbackSink;
57
+ }
@@ -1,3 +1,3 @@
1
1
  import { SpanData } from "../trace.js";
2
- export declare function installGeminiInterceptor(onSpan: (span: SpanData) => void): void;
2
+ export declare function installGeminiInterceptor(onSpan: (span: SpanData) => void): Promise<void>;
3
3
  export declare function uninstallGeminiInterceptor(): void;
@@ -1,5 +1,6 @@
1
1
  import { SpanType } from "../trace.js";
2
2
  import { genId, nowIso, truncateJson } from "../utils.js";
3
+ import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan, captureActiveSpanEmit } from "./_dispatch.js";
3
4
  import { emitGeminiToolCalls, emitGeminiToolResults, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
4
5
  const PRICING = {
5
6
  "gemini-3.1-flash-lite": [0.10, 0.40],
@@ -17,93 +18,233 @@ function calcCost(model, inputTokens, outputTokens) {
17
18
  const p = PRICING[model] || [0, 0];
18
19
  return (inputTokens * p[0] + outputTokens * p[1]) / 1_000_000;
19
20
  }
20
- let originalGenerateContent = null;
21
- let installed = false;
22
21
  let onSpanCallback = null;
23
- export function installGeminiInterceptor(onSpan) {
24
- if (installed) {
25
- onSpanCallback = onSpan;
26
- resetToolResultDedup();
27
- return;
28
- }
29
- onSpanCallback = onSpan;
30
- resetToolResultDedup();
31
- import("@google/genai").then((genaiMod) => {
22
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
23
+ let patchedProto = null;
24
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
25
+ let origGenerate = null;
26
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
27
+ let origStream = null;
28
+ let installPromise = null;
29
+ // Wrap a single `generateContent` implementation (the per-instance bound method). Returns a
30
+ // function with identical signature that records a span around the original call.
31
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
32
+ function wrapGenerate(original) {
33
+ return async function (...args) {
32
34
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
33
- const mod = genaiMod;
34
- const modelsProto = mod?.Models?.prototype || mod?.default?.Models?.prototype;
35
- if (!modelsProto?.generateContent)
36
- return;
37
- originalGenerateContent = modelsProto.generateContent;
38
- modelsProto.generateContent = async function (...args) {
35
+ const opts = args[0] || {};
36
+ const model = opts.model || "unknown";
37
+ const contents = opts.contents;
38
+ const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
39
+ const sampling = extractSamplingParams("gemini", opts);
40
+ const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
41
+ const spanId = genId();
42
+ const startedAt = nowIso();
43
+ const startMs = Date.now();
44
+ const { isReplaying, consumeCassetteEntry } = await import("../replay.js");
45
+ if (isReplaying()) {
46
+ const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
47
+ if (entry) {
48
+ return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
49
+ }
50
+ }
51
+ try {
52
+ const result = await original.apply(this, args);
53
+ const durationMs = Date.now() - startMs;
39
54
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
40
- const opts = args[0] || {};
41
- const model = opts.model || "unknown";
42
- const contents = opts.contents;
43
- const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
44
- const sampling = extractSamplingParams("gemini", opts);
45
- const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
46
- const spanId = genId();
47
- const startedAt = nowIso();
48
- const startMs = Date.now();
49
- // Replay mode — return mocked response from cassette
50
- const { isReplaying, consumeCassetteEntry } = await import("../replay.js");
51
- if (isReplaying()) {
52
- const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
53
- if (entry) {
54
- return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
55
- }
55
+ const res = result;
56
+ const inputTokens = res?.usageMetadata?.promptTokenCount || 0;
57
+ const outputTokens = res?.usageMetadata?.candidatesTokenCount || 0;
58
+ const fnNames = res?.candidates?.[0]?.content?.parts?.filter((p) => p.functionCall).map((p) => p.functionCall.name).join(", ");
59
+ const outputText = res?.text ?? (fnNames ? `[function_call: ${fnNames}]` : "");
60
+ const span = {
61
+ id: spanId, trace_id: "", parent_id: null,
62
+ span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
63
+ input: truncateJson(contents), output: truncateJson(outputText),
64
+ input_tokens: inputTokens, output_tokens: outputTokens,
65
+ cost: calcCost(model, inputTokens, outputTokens),
66
+ duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
67
+ ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
68
+ };
69
+ onSpanCallback?.(span);
70
+ if (onSpanCallback) {
71
+ emitGeminiToolResults(contents, onSpanCallback);
72
+ emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
73
+ }
74
+ return result;
75
+ }
76
+ catch (err) {
77
+ onSpanCallback?.({
78
+ id: spanId, trace_id: "", parent_id: null,
79
+ span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
80
+ input: truncateJson(contents), started_at: startedAt, ended_at: nowIso(),
81
+ duration_ms: Date.now() - startMs,
82
+ error: err instanceof Error ? err.message : String(err),
83
+ });
84
+ throw err;
85
+ }
86
+ };
87
+ }
88
+ // Wrap a single `generateContentStream` implementation. Accumulates text + usage across chunks and
89
+ // emits the span when the stream completes OR is abandoned/errors (via finally).
90
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
91
+ function wrapStream(original) {
92
+ return async function (...args) {
93
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
94
+ const opts = args[0] || {};
95
+ const model = opts.model || "unknown";
96
+ const contents = opts.contents;
97
+ const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
98
+ const sampling = extractSamplingParams("gemini", opts);
99
+ const spanMeta = { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
100
+ const spanId = genId();
101
+ const startedAt = nowIso();
102
+ const startMs = Date.now();
103
+ const { isReplaying, consumeCassetteEntry } = await import("../replay.js");
104
+ if (isReplaying()) {
105
+ const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
106
+ if (entry) {
107
+ const text = entry.output || "";
108
+ async function* mockStream() { yield { text, usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 } }; }
109
+ return mockStream();
110
+ }
111
+ }
112
+ let iterable;
113
+ try {
114
+ iterable = await original.apply(this, args);
115
+ }
116
+ catch (err) {
117
+ onSpanCallback?.({
118
+ id: spanId, trace_id: "", parent_id: null,
119
+ span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
120
+ input: truncateJson(contents), started_at: startedAt, ended_at: nowIso(),
121
+ duration_ms: Date.now() - startMs,
122
+ error: err instanceof Error ? err.message : String(err),
123
+ });
124
+ throw err;
125
+ }
126
+ const chunks = [];
127
+ let inputTokens = 0;
128
+ let outputTokens = 0;
129
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
130
+ let lastCandidates;
131
+ let streamError;
132
+ let emitted = false;
133
+ let sawFunctionCall = false;
134
+ // Bind the emit target NOW (caller's context) so finalize routes to the right recorder even
135
+ // when called later from the AFC's .return(), trace-end, or exit-flush.
136
+ const boundEmit = captureActiveSpanEmit() ?? onSpanCallback;
137
+ // Streaming-A, model (b): the span is OPENED at invocation (registered with the active
138
+ // recorder, synchronously in the CALLER's async context — registering lazily inside the
139
+ // generator fails because the AFC layer pulls it in a different context where the ALS sink is
140
+ // absent) and EMITTED EXACTLY ONCE at finalization — never silent. `capture_complete` is true
141
+ // ONLY on an observed clean full-drain; false for early-break, error, or a finalize forced by
142
+ // trace-end/exit (the AFC layer abandons this generator on a normal full consume — which is
143
+ // why we no longer rely on observing `done`, and dropped the read-ahead hack). A
144
+ // capture_complete:false span is NOT byte-replayable.
145
+ const finalize = (reason) => {
146
+ if (emitted)
147
+ return;
148
+ emitted = true;
149
+ dispatchUnregisterOpenSpan(spanId);
150
+ // capture_complete:true means fully-captured AND byte-replay-eligible — clean observed drain
151
+ // AND no function call (function-call output isn't captured as text and the AFC path may
152
+ // re-issue). Everything else is partial.
153
+ const complete = reason === "complete" && !sawFunctionCall;
154
+ const fnNames = lastCandidates?.[0]?.content?.parts
155
+ ?.filter((p) => p.functionCall)
156
+ .map((p) => p.functionCall.name).join(", ");
157
+ const outputText = chunks.length ? chunks.join("") : (fnNames ? `[function_call: ${fnNames}]` : "");
158
+ const span = {
159
+ id: spanId, trace_id: "", parent_id: null,
160
+ span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
161
+ input: truncateJson(contents), output: truncateJson(outputText),
162
+ input_tokens: inputTokens, output_tokens: outputTokens,
163
+ cost: calcCost(model, inputTokens, outputTokens),
164
+ duration_ms: Date.now() - startMs, started_at: startedAt, ended_at: nowIso(),
165
+ ...(streamError ? { error: streamError } : {}),
166
+ metadata: { ...spanMeta, capture_complete: complete },
167
+ };
168
+ boundEmit?.(span);
169
+ if (boundEmit) {
170
+ emitGeminiToolResults(contents, boundEmit);
171
+ emitGeminiToolCalls(lastCandidates, spanId, model, boundEmit);
56
172
  }
173
+ };
174
+ // Register in the caller's context so trace-end / exit-flush finalizes us partial if the
175
+ // consumer (AFC) abandons the generator mid-drain without ever reaching `done` / `.return()`.
176
+ dispatchRegisterOpenSpan(spanId, finalize);
177
+ async function* wrapped() {
57
178
  try {
58
- const result = await originalGenerateContent.apply(this, args);
59
- const durationMs = Date.now() - startMs;
60
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
61
- const res = result;
62
- const inputTokens = res?.usageMetadata?.promptTokenCount || 0;
63
- const outputTokens = res?.usageMetadata?.candidatesTokenCount || 0;
64
- const outputText = res?.text ?? (res?.candidates?.[0]?.content?.parts?.filter((p) => p.functionCall).map((p) => p.functionCall.name).join(", ") ? `[function_call: ${res.candidates[0].content.parts.filter((p) => p.functionCall).map((p) => p.functionCall.name).join(", ")}]` : "");
65
- const span = {
66
- id: spanId, trace_id: "", parent_id: null,
67
- span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
68
- input: truncateJson(contents), output: truncateJson(outputText),
69
- input_tokens: inputTokens, output_tokens: outputTokens,
70
- cost: calcCost(model, inputTokens, outputTokens),
71
- duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
72
- ...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
73
- };
74
- onSpanCallback?.(span);
75
- // Auto-capture tool usage (functionCall parts in response, functionResponse in input).
76
- if (onSpanCallback) {
77
- emitGeminiToolResults(contents, onSpanCallback);
78
- emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
179
+ for await (const chunk of iterable) { // eslint-disable-line @typescript-eslint/no-explicit-any
180
+ if (typeof chunk?.text === "string")
181
+ chunks.push(chunk.text);
182
+ if (chunk?.candidates)
183
+ lastCandidates = chunk.candidates;
184
+ // A function-call stream is NOT byte-replay-eligible: its output isn't plain text (so
185
+ // chunks.join misses it) and the AFC layer may re-issue. Mark it so finalize never
186
+ // stamps capture_complete:true even on a clean drain.
187
+ if (chunk?.candidates?.[0]?.content?.parts?.some((p) => p.functionCall)) {
188
+ sawFunctionCall = true;
189
+ }
190
+ if (chunk?.usageMetadata) {
191
+ inputTokens = chunk.usageMetadata.promptTokenCount || inputTokens;
192
+ outputTokens = chunk.usageMetadata.candidatesTokenCount || outputTokens;
193
+ }
194
+ yield chunk;
79
195
  }
80
- return result;
196
+ finalize("complete"); // observed clean full-drain (e.g. via retrace.stream helper)
81
197
  }
82
198
  catch (err) {
83
- const span = {
84
- id: spanId, trace_id: "", parent_id: null,
85
- span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
86
- input: truncateJson(contents), started_at: startedAt, ended_at: nowIso(),
87
- duration_ms: Date.now() - startMs,
88
- error: err instanceof Error ? err.message : String(err),
89
- };
90
- onSpanCallback?.(span);
199
+ streamError = err instanceof Error ? err.message : String(err);
200
+ finalize("partial");
91
201
  throw err;
92
202
  }
93
- };
94
- installed = true;
95
- }).catch(() => { });
203
+ finally {
204
+ finalize("partial"); // early-break (consumer .return()) / no clean drain observed
205
+ }
206
+ }
207
+ return wrapped();
208
+ };
96
209
  }
97
- export function uninstallGeminiInterceptor() {
98
- if (!installed || !originalGenerateContent)
99
- return;
100
- import("@google/genai").then((genaiMod) => {
210
+ // @google/genai binds the PUBLIC `generateContent`/`generateContentStream` as own bound instance
211
+ // properties (not on the prototype), so patching the prototype's public method is a no-op. However,
212
+ // the public methods delegate to `generateContentInternal` / `generateContentStreamInternal`, which
213
+ // ARE regular methods on `Models.prototype`. Patching those is RETROACTIVE to every instance
214
+ // regardless of construction order (mirroring the Python SDK's class-method patch) — so no
215
+ // install-before-construction requirement, no race, and no `ready()` escape hatch is needed.
216
+ export function installGeminiInterceptor(onSpan) {
217
+ onSpanCallback = onSpan;
218
+ resetToolResultDedup();
219
+ if (installPromise)
220
+ return installPromise; // synchronous dedupe — prevents the double-wrap race
221
+ installPromise = import("@google/genai").then((genaiMod) => {
101
222
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
102
223
  const mod = genaiMod;
103
- const modelsProto = mod?.Models?.prototype || mod?.default?.Models?.prototype;
104
- if (modelsProto)
105
- modelsProto.generateContent = originalGenerateContent;
224
+ const Models = mod?.Models || mod?.default?.Models;
225
+ const proto = Models?.prototype;
226
+ if (!proto || typeof proto.generateContentInternal !== "function")
227
+ return;
228
+ patchedProto = proto;
229
+ origGenerate = proto.generateContentInternal;
230
+ proto.generateContentInternal = wrapGenerate(origGenerate);
231
+ if (typeof proto.generateContentStreamInternal === "function") {
232
+ origStream = proto.generateContentStreamInternal;
233
+ proto.generateContentStreamInternal = wrapStream(origStream);
234
+ }
106
235
  }).catch(() => { });
107
- installed = false;
236
+ return installPromise;
237
+ }
238
+ export function uninstallGeminiInterceptor() {
239
+ if (patchedProto) {
240
+ if (origGenerate)
241
+ patchedProto.generateContentInternal = origGenerate;
242
+ if (origStream)
243
+ patchedProto.generateContentStreamInternal = origStream;
244
+ }
245
+ installPromise = null;
108
246
  onSpanCallback = null;
247
+ patchedProto = null;
248
+ origGenerate = null;
249
+ origStream = null;
109
250
  }
@@ -0,0 +1 @@
1
+ export declare function ensureInterceptorsInstalled(): Promise<void>;
@@ -0,0 +1,21 @@
1
+ import { dispatchInterceptedSpan } from "./_dispatch.js";
2
+ import { installGeminiInterceptor } from "./gemini.js";
3
+ import { installOpenAIInterceptor } from "./openai.js";
4
+ import { installAnthropicInterceptor } from "./anthropic.js";
5
+ /**
6
+ * Install ALL provider interceptors against the single stable dispatcher. Idempotent and memoized.
7
+ * Called at import time and from configure()/init(). The Gemini interceptor patches retroactive
8
+ * prototype methods, so the (async) install landing slightly after import is fine — capture works
9
+ * regardless of when the user constructs their client.
10
+ */
11
+ let _installPromise = null;
12
+ export function ensureInterceptorsInstalled() {
13
+ if (_installPromise)
14
+ return _installPromise;
15
+ _installPromise = Promise.all([
16
+ Promise.resolve(installGeminiInterceptor(dispatchInterceptedSpan)),
17
+ Promise.resolve(installOpenAIInterceptor(dispatchInterceptedSpan)),
18
+ Promise.resolve(installAnthropicInterceptor(dispatchInterceptedSpan)),
19
+ ]).then(() => { });
20
+ return _installPromise;
21
+ }