retrace-sdk 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +4 -0
- package/dist/config.js +7 -0
- package/dist/errors.d.ts +22 -0
- package/dist/errors.js +18 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +8 -1
- package/dist/init.js +22 -15
- package/dist/interceptors/_dispatch.d.ts +25 -0
- package/dist/interceptors/_dispatch.js +57 -0
- package/dist/interceptors/gemini.d.ts +1 -1
- package/dist/interceptors/gemini.js +215 -74
- package/dist/interceptors/install.d.ts +1 -0
- package/dist/interceptors/install.js +21 -0
- package/dist/recorder.d.ts +30 -2
- package/dist/recorder.js +132 -23
- package/dist/stream.d.ts +26 -0
- package/dist/stream.js +56 -0
- package/dist/traceparent.d.ts +19 -1
- package/dist/traceparent.js +35 -3
- package/dist/transport.d.ts +71 -1
- package/dist/transport.js +304 -42
- package/package.json +2 -2
package/dist/config.d.ts
CHANGED
|
@@ -11,6 +11,10 @@ export interface Config {
|
|
|
11
11
|
* request/response only (recommended for short-lived scripts and serverless — it never
|
|
12
12
|
* holds an open socket and always surfaces upload errors); "ws" forces WebSocket. */
|
|
13
13
|
transport: "auto" | "ws" | "http";
|
|
14
|
+
/** Called with a STRUCTURED signal when the server signals credits_exhausted | rate_limited |
|
|
15
|
+
* halt | error. Branch on `signal.code`; use `signal.retryable`/`signal.fatal` to decide
|
|
16
|
+
* behavior. Defaults to a throttled console warning so signals are never silently dropped. */
|
|
17
|
+
onError?: (signal: import("./errors.js").RetraceServerSignal) => void;
|
|
14
18
|
}
|
|
15
19
|
export declare function configure(opts: Partial<Config>): Config;
|
|
16
20
|
export declare function requireApiKey(): string;
|
package/dist/config.js
CHANGED
|
@@ -17,6 +17,13 @@ export function configure(opts) {
|
|
|
17
17
|
if (opts.baseUrl && !opts.wsUrl) {
|
|
18
18
|
config.wsUrl = config.baseUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
19
19
|
}
|
|
20
|
+
// Eagerly install provider interceptors so clients constructed AFTER configure() are patched.
|
|
21
|
+
// @google/genai binds generateContent as an own instance property, and our accessor only wraps
|
|
22
|
+
// instances built after install — so install must precede client construction. Fire-and-forget;
|
|
23
|
+
// the dynamic import resolves before the first awaited LLM call in any real async flow.
|
|
24
|
+
if (config.enabled) {
|
|
25
|
+
void import("./interceptors/install.js").then((m) => m.ensureInterceptorsInstalled()).catch(() => { });
|
|
26
|
+
}
|
|
20
27
|
return config;
|
|
21
28
|
}
|
|
22
29
|
export function requireApiKey() {
|
package/dist/errors.d.ts
CHANGED
|
@@ -14,3 +14,25 @@ export declare class RetraceRateLimitError extends RetraceError {
|
|
|
14
14
|
retryAfter: number;
|
|
15
15
|
constructor(retryAfter: number);
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Structured server-originated signal handed to `onError`. Actionable WITHOUT string-matching:
|
|
19
|
+
* branch on `code`, decide retry from `retryable`, decide whether recording is still alive from
|
|
20
|
+
* `fatal`. (A raw message alone forces the user to string-match — this type exists to avoid that.)
|
|
21
|
+
*/
|
|
22
|
+
export type RetraceSignalCode = "credits_exhausted" | "rate_limited" | "halt" | "error";
|
|
23
|
+
export interface RetraceServerSignal {
|
|
24
|
+
/** Machine-readable category — branch on THIS, never on `message`. */
|
|
25
|
+
code: RetraceSignalCode;
|
|
26
|
+
/** Human-readable detail from the server. */
|
|
27
|
+
message: string;
|
|
28
|
+
/** Will retrying / backing off plausibly succeed? rate_limited=true; credits/halt/error=false. */
|
|
29
|
+
retryable: boolean;
|
|
30
|
+
/** Did this STOP recording? halt=true (transport closed); others leave recording alive. */
|
|
31
|
+
fatal: boolean;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Map a raw server frame to a structured signal. Single source of truth for category + retryable +
|
|
35
|
+
* fatal, shared by the WS dispatch. Kept here (not inline in the dispatch) so TS and Python classify
|
|
36
|
+
* identically and the CI gate can assert the mapping.
|
|
37
|
+
*/
|
|
38
|
+
export declare function classifyServerSignal(rawType: string, message: string): RetraceServerSignal;
|
package/dist/errors.js
CHANGED
|
@@ -14,3 +14,21 @@ export class RetraceRateLimitError extends RetraceError {
|
|
|
14
14
|
retryAfter;
|
|
15
15
|
constructor(retryAfter) { super(`Rate limited. Retry after ${retryAfter}s`); this.name = "RetraceRateLimitError"; this.retryAfter = retryAfter; }
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* Map a raw server frame to a structured signal. Single source of truth for category + retryable +
|
|
19
|
+
* fatal, shared by the WS dispatch. Kept here (not inline in the dispatch) so TS and Python classify
|
|
20
|
+
* identically and the CI gate can assert the mapping.
|
|
21
|
+
*/
|
|
22
|
+
export function classifyServerSignal(rawType, message) {
|
|
23
|
+
if (rawType === "halt") {
|
|
24
|
+
return { code: "halt", message: message || "Guardrail triggered", retryable: false, fatal: true };
|
|
25
|
+
}
|
|
26
|
+
// rawType === "error" (or anything else carrying an error string)
|
|
27
|
+
if (message?.includes("limit reached")) {
|
|
28
|
+
return { code: "credits_exhausted", message, retryable: false, fatal: false };
|
|
29
|
+
}
|
|
30
|
+
if (message?.includes("Rate limit")) {
|
|
31
|
+
return { code: "rate_limited", message, retryable: true, fatal: false };
|
|
32
|
+
}
|
|
33
|
+
return { code: "error", message: message || "Server error", retryable: false, fatal: false };
|
|
34
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ export { configure, getConfig } from "./config.js";
|
|
|
2
2
|
export { init, getActiveRecorder, shutdown } from "./init.js";
|
|
3
3
|
export type { InitOptions } from "./init.js";
|
|
4
4
|
export { record, trace, TraceRecorder } from "./recorder.js";
|
|
5
|
+
export { stream } from "./stream.js";
|
|
5
6
|
export { SpanBuilder, TraceBuilder } from "./trace.js";
|
|
6
7
|
export type { SpanData, TraceData } from "./trace.js";
|
|
7
8
|
export { SpanType, TraceStatus } from "./trace.js";
|
|
@@ -13,7 +14,7 @@ export { registerResumable, handleResume } from "./resume.js";
|
|
|
13
14
|
export type { ResumeCommand } from "./resume.js";
|
|
14
15
|
export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
|
|
15
16
|
export type { CassetteEntry, ReplayCommand } from "./replay.js";
|
|
16
|
-
export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
|
|
17
|
+
export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent, withTraceContext } from "./traceparent.js";
|
|
17
18
|
export { markGolden } from "./golden.js";
|
|
18
19
|
export { createLangChainHandler } from "./adapters/langchain.js";
|
|
19
20
|
export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
export { configure, getConfig } from "./config.js";
|
|
2
2
|
export { init, getActiveRecorder, shutdown } from "./init.js";
|
|
3
3
|
export { record, trace, TraceRecorder } from "./recorder.js";
|
|
4
|
+
export { stream } from "./stream.js";
|
|
4
5
|
export { SpanBuilder, TraceBuilder } from "./trace.js";
|
|
5
6
|
export { SpanType, TraceStatus } from "./trace.js";
|
|
6
7
|
export { installGeminiInterceptor, uninstallGeminiInterceptor } from "./interceptors/gemini.js";
|
|
@@ -9,10 +10,16 @@ export { installAnthropicInterceptor, uninstallAnthropicInterceptor } from "./in
|
|
|
9
10
|
export { RetraceError, RetraceAuthError, RetraceCreditsExhaustedError, RetraceConnectionError, RetraceRateLimitError } from "./errors.js";
|
|
10
11
|
export { registerResumable, handleResume } from "./resume.js";
|
|
11
12
|
export { isReplaying, consumeCassetteEntry, handleReplay } from "./replay.js";
|
|
12
|
-
export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent } from "./traceparent.js";
|
|
13
|
+
export { setTraceContext, clearTraceContext, getTraceparent, injectTraceparent, parseTraceparent, withTraceContext } from "./traceparent.js";
|
|
13
14
|
export { markGolden } from "./golden.js";
|
|
14
15
|
// Framework adapters (5B) — drop-in instrumentation for LangChain/LangGraph + Vercel AI SDK.
|
|
15
16
|
export { createLangChainHandler } from "./adapters/langchain.js";
|
|
16
17
|
export { retraceOnStepFinish, recordVercelStep } from "./adapters/vercel-ai.js";
|
|
18
|
+
// Patch provider SDKs at import (fire-and-forget; NO top-level await → CJS/bundler-safe). The Gemini
|
|
19
|
+
// interceptor patches RETROACTIVE prototype methods (generateContentInternal/...Stream), so capture
|
|
20
|
+
// works regardless of when the user constructs their client — including module-level clients built
|
|
21
|
+
// in the same tick. This matches the Python SDK: no ordering contract, no ready() footgun.
|
|
22
|
+
import { ensureInterceptorsInstalled } from "./interceptors/install.js";
|
|
23
|
+
void ensureInterceptorsInstalled();
|
|
17
24
|
// v0.5.0
|
|
18
25
|
// trigger
|
package/dist/init.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { configure, getConfig, requireApiKey } from "./config.js";
|
|
2
|
-
import { TraceRecorder,
|
|
2
|
+
import { TraceRecorder, drainSharedTransportOnExit } from "./recorder.js";
|
|
3
|
+
import { onProcessExit } from "./transport.js";
|
|
3
4
|
import { TraceStatus } from "./trace.js";
|
|
4
5
|
let ambient = null;
|
|
5
6
|
let exitHooked = false;
|
|
@@ -46,28 +47,34 @@ export function init(opts = {}) {
|
|
|
46
47
|
ambient.start(traceName); // installs the provider interceptors against the ambient recorder
|
|
47
48
|
if (!exitHooked && typeof process !== "undefined") {
|
|
48
49
|
exitHooked = true;
|
|
49
|
-
const finish = (status) => {
|
|
50
|
+
const finish = (status, terminatedEarly = false) => {
|
|
50
51
|
const rec = ambient;
|
|
51
52
|
ambient = null;
|
|
52
53
|
try {
|
|
53
|
-
rec?.end(undefined, status);
|
|
54
|
+
rec?.end(undefined, status, { terminatedEarly });
|
|
54
55
|
}
|
|
55
56
|
catch { /* best effort on shutdown */ }
|
|
56
57
|
};
|
|
57
|
-
//
|
|
58
|
-
//
|
|
59
|
-
//
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
// Finish the ambient trace as a pre-exit hook: registerProcessExitFlush (in recorder.ts) runs
|
|
59
|
+
// this BEFORE draining the transport, so the final trace_ended is in the buffer for the
|
|
60
|
+
// HTTP one-shot — and signal ownership (sole-listener-flush-then-exit vs user-owns-exit) is
|
|
61
|
+
// handled there in one place, not duplicated here.
|
|
62
|
+
//
|
|
63
|
+
// Only a graceful exit (event loop emptied = the program finished its work) produces a CLEAN
|
|
64
|
+
// terminal. Signal/uncaught exits interrupted the run mid-flight, so the synthesized terminal
|
|
65
|
+
// is marked terminated_early — otherwise we'd manufacture a clean-looking terminal for a
|
|
66
|
+
// truncated run and defeat the replay-guard's no-terminal rule.
|
|
67
|
+
onProcessExit((reason) => finish(reason === "uncaught" ? TraceStatus.FAILED : TraceStatus.COMPLETED, reason !== "graceful"));
|
|
68
|
+
// uncaughtException is not covered by registerProcessExitFlush (it's status-specific and must
|
|
69
|
+
// exit non-zero): finish FAILED + terminated_early, drain best-effort within a hard cap, then exit.
|
|
70
|
+
process.once("uncaughtException", (err) => {
|
|
71
|
+
console.error(err);
|
|
72
|
+
finish(TraceStatus.FAILED, true);
|
|
62
73
|
void Promise.race([
|
|
63
|
-
|
|
74
|
+
drainSharedTransportOnExit(1500).catch(() => { }),
|
|
64
75
|
new Promise((r) => setTimeout(r, 3000)),
|
|
65
|
-
]).then(() => process.exit(
|
|
66
|
-
};
|
|
67
|
-
process.once("beforeExit", () => finish(TraceStatus.COMPLETED));
|
|
68
|
-
process.once("SIGINT", () => finishAndExit(TraceStatus.COMPLETED, 130));
|
|
69
|
-
process.once("SIGTERM", () => finishAndExit(TraceStatus.COMPLETED, 143));
|
|
70
|
-
process.once("uncaughtException", (err) => { console.error(err); finishAndExit(TraceStatus.FAILED, 1); });
|
|
76
|
+
]).then(() => process.exit(1));
|
|
77
|
+
});
|
|
71
78
|
}
|
|
72
79
|
return ambient;
|
|
73
80
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { SpanData } from "../trace.js";
|
|
2
|
+
export type SpanCallback = (span: SpanData) => void;
|
|
3
|
+
/** A recorder-side sink for the two-phase streaming-span lifecycle (open at invocation, finalize
|
|
4
|
+
* once at clean-drain / break / error / trace-end / exit). Routed like SpanCallback. */
|
|
5
|
+
export interface OpenSpanSink {
|
|
6
|
+
registerOpenSpan(spanId: string, finalize: (reason: "complete" | "partial") => void): void;
|
|
7
|
+
unregisterOpenSpan(spanId: string): void;
|
|
8
|
+
}
|
|
9
|
+
/** Stable interceptor callback — routes an intercepted span to the recorder active in this context. */
|
|
10
|
+
export declare function dispatchInterceptedSpan(span: SpanData): void;
|
|
11
|
+
/** Capture the span sink active in THIS context (synchronously, at invocation) so a deferred
|
|
12
|
+
* finalizer can emit to the right recorder even when later called from a context where the ALS
|
|
13
|
+
* store is absent (the AFC layer's .return(), trace-end, exit-flush). */
|
|
14
|
+
export declare function captureActiveSpanEmit(): SpanCallback | null;
|
|
15
|
+
/** Register an open streaming span's finalizer with the active recorder (two-phase, model (b)). */
|
|
16
|
+
export declare function dispatchRegisterOpenSpan(spanId: string, finalize: (reason: "complete" | "partial") => void): void;
|
|
17
|
+
/** Drop an open span's finalizer once it has been finalized in-band. */
|
|
18
|
+
export declare function dispatchUnregisterOpenSpan(spanId: string): void;
|
|
19
|
+
/** Run `fn` with `cb` as the active intercepted-span handler for its async context (fully isolated). */
|
|
20
|
+
export declare function runWithActiveRecorder<T>(cb: SpanCallback, fn: () => T, sink?: OpenSpanSink): T;
|
|
21
|
+
/** Set the imperative-API fallback handler. Returns the PREVIOUS handler so callers can restore
|
|
22
|
+
* it (e.g. a nested trace() must not wipe the ambient init() fallback). Pass null to clear. */
|
|
23
|
+
export declare function setActiveRecorderFallback(cb: SpanCallback | null, sink?: OpenSpanSink | null): SpanCallback | null;
|
|
24
|
+
/** The current imperative-API open-span sink (so a nested trace can save/restore it). */
|
|
25
|
+
export declare function currentFallbackSink(): OpenSpanSink | null;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context-isolated routing for auto-instrumented spans (mirrors the Python `_dispatch.py`
|
|
3
|
+
* ContextVar dispatcher).
|
|
4
|
+
*
|
|
5
|
+
* The interceptors (openai/anthropic/gemini) are patched globally and invoke ONE stable
|
|
6
|
+
* dispatcher. The target recorder for the current async context lives in an AsyncLocalStorage
|
|
7
|
+
* store, so concurrent traces on a server each resolve their own recorder — instead of a
|
|
8
|
+
* module-global, last-writer-wins callback that cross-routed intercepted spans to whichever
|
|
9
|
+
* trace was created most recently.
|
|
10
|
+
*/
|
|
11
|
+
import { AsyncLocalStorage } from "async_hooks";
|
|
12
|
+
const activeRecorder = new AsyncLocalStorage();
|
|
13
|
+
const activeOpenSink = new AsyncLocalStorage();
|
|
14
|
+
// Fallback for the imperative record()/start()/end() API used outside a runWithActiveRecorder
|
|
15
|
+
// scope. Last-writer-wins (documented limitation for purely imperative concurrent traces).
|
|
16
|
+
let fallbackCb = null;
|
|
17
|
+
let fallbackSink = null;
|
|
18
|
+
/** Stable interceptor callback — routes an intercepted span to the recorder active in this context. */
|
|
19
|
+
export function dispatchInterceptedSpan(span) {
|
|
20
|
+
const cb = activeRecorder.getStore() ?? fallbackCb;
|
|
21
|
+
cb?.(span);
|
|
22
|
+
}
|
|
23
|
+
/** Capture the span sink active in THIS context (synchronously, at invocation) so a deferred
|
|
24
|
+
* finalizer can emit to the right recorder even when later called from a context where the ALS
|
|
25
|
+
* store is absent (the AFC layer's .return(), trace-end, exit-flush). */
|
|
26
|
+
export function captureActiveSpanEmit() {
|
|
27
|
+
return activeRecorder.getStore() ?? fallbackCb;
|
|
28
|
+
}
|
|
29
|
+
/** Register an open streaming span's finalizer with the active recorder (two-phase, model (b)). */
|
|
30
|
+
export function dispatchRegisterOpenSpan(spanId, finalize) {
|
|
31
|
+
const sink = activeOpenSink.getStore() ?? fallbackSink;
|
|
32
|
+
sink?.registerOpenSpan(spanId, finalize);
|
|
33
|
+
}
|
|
34
|
+
/** Drop an open span's finalizer once it has been finalized in-band. */
|
|
35
|
+
export function dispatchUnregisterOpenSpan(spanId) {
|
|
36
|
+
const sink = activeOpenSink.getStore() ?? fallbackSink;
|
|
37
|
+
sink?.unregisterOpenSpan(spanId);
|
|
38
|
+
}
|
|
39
|
+
/** Run `fn` with `cb` as the active intercepted-span handler for its async context (fully isolated). */
|
|
40
|
+
export function runWithActiveRecorder(cb, fn, sink) {
|
|
41
|
+
if (sink)
|
|
42
|
+
return activeRecorder.run(cb, () => activeOpenSink.run(sink, fn));
|
|
43
|
+
return activeRecorder.run(cb, fn);
|
|
44
|
+
}
|
|
45
|
+
/** Set the imperative-API fallback handler. Returns the PREVIOUS handler so callers can restore
|
|
46
|
+
* it (e.g. a nested trace() must not wipe the ambient init() fallback). Pass null to clear. */
|
|
47
|
+
export function setActiveRecorderFallback(cb, sink) {
|
|
48
|
+
const prev = fallbackCb;
|
|
49
|
+
fallbackCb = cb;
|
|
50
|
+
if (sink !== undefined)
|
|
51
|
+
fallbackSink = sink;
|
|
52
|
+
return prev;
|
|
53
|
+
}
|
|
54
|
+
/** The current imperative-API open-span sink (so a nested trace can save/restore it). */
|
|
55
|
+
export function currentFallbackSink() {
|
|
56
|
+
return fallbackSink;
|
|
57
|
+
}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import { SpanData } from "../trace.js";
|
|
2
|
-
export declare function installGeminiInterceptor(onSpan: (span: SpanData) => void): void
|
|
2
|
+
export declare function installGeminiInterceptor(onSpan: (span: SpanData) => void): Promise<void>;
|
|
3
3
|
export declare function uninstallGeminiInterceptor(): void;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { SpanType } from "../trace.js";
|
|
2
2
|
import { genId, nowIso, truncateJson } from "../utils.js";
|
|
3
|
+
import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan, captureActiveSpanEmit } from "./_dispatch.js";
|
|
3
4
|
import { emitGeminiToolCalls, emitGeminiToolResults, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
|
|
4
5
|
const PRICING = {
|
|
5
6
|
"gemini-3.1-flash-lite": [0.10, 0.40],
|
|
@@ -17,93 +18,233 @@ function calcCost(model, inputTokens, outputTokens) {
|
|
|
17
18
|
const p = PRICING[model] || [0, 0];
|
|
18
19
|
return (inputTokens * p[0] + outputTokens * p[1]) / 1_000_000;
|
|
19
20
|
}
|
|
20
|
-
let originalGenerateContent = null;
|
|
21
|
-
let installed = false;
|
|
22
21
|
let onSpanCallback = null;
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
22
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
23
|
+
let patchedProto = null;
|
|
24
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
25
|
+
let origGenerate = null;
|
|
26
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
27
|
+
let origStream = null;
|
|
28
|
+
let installPromise = null;
|
|
29
|
+
// Wrap a single `generateContent` implementation (the per-instance bound method). Returns a
|
|
30
|
+
// function with identical signature that records a span around the original call.
|
|
31
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
32
|
+
function wrapGenerate(original) {
|
|
33
|
+
return async function (...args) {
|
|
32
34
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
const opts = args[0] || {};
|
|
36
|
+
const model = opts.model || "unknown";
|
|
37
|
+
const contents = opts.contents;
|
|
38
|
+
const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
|
|
39
|
+
const sampling = extractSamplingParams("gemini", opts);
|
|
40
|
+
const spanMeta = { ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
|
|
41
|
+
const spanId = genId();
|
|
42
|
+
const startedAt = nowIso();
|
|
43
|
+
const startMs = Date.now();
|
|
44
|
+
const { isReplaying, consumeCassetteEntry } = await import("../replay.js");
|
|
45
|
+
if (isReplaying()) {
|
|
46
|
+
const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
|
|
47
|
+
if (entry) {
|
|
48
|
+
return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
const result = await original.apply(this, args);
|
|
53
|
+
const durationMs = Date.now() - startMs;
|
|
39
54
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
40
|
-
const
|
|
41
|
-
const
|
|
42
|
-
const
|
|
43
|
-
const
|
|
44
|
-
const
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
55
|
+
const res = result;
|
|
56
|
+
const inputTokens = res?.usageMetadata?.promptTokenCount || 0;
|
|
57
|
+
const outputTokens = res?.usageMetadata?.candidatesTokenCount || 0;
|
|
58
|
+
const fnNames = res?.candidates?.[0]?.content?.parts?.filter((p) => p.functionCall).map((p) => p.functionCall.name).join(", ");
|
|
59
|
+
const outputText = res?.text ?? (fnNames ? `[function_call: ${fnNames}]` : "");
|
|
60
|
+
const span = {
|
|
61
|
+
id: spanId, trace_id: "", parent_id: null,
|
|
62
|
+
span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
|
|
63
|
+
input: truncateJson(contents), output: truncateJson(outputText),
|
|
64
|
+
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
65
|
+
cost: calcCost(model, inputTokens, outputTokens),
|
|
66
|
+
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
67
|
+
...(Object.keys(spanMeta).length ? { metadata: spanMeta } : {}),
|
|
68
|
+
};
|
|
69
|
+
onSpanCallback?.(span);
|
|
70
|
+
if (onSpanCallback) {
|
|
71
|
+
emitGeminiToolResults(contents, onSpanCallback);
|
|
72
|
+
emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
|
|
73
|
+
}
|
|
74
|
+
return result;
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
onSpanCallback?.({
|
|
78
|
+
id: spanId, trace_id: "", parent_id: null,
|
|
79
|
+
span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
|
|
80
|
+
input: truncateJson(contents), started_at: startedAt, ended_at: nowIso(),
|
|
81
|
+
duration_ms: Date.now() - startMs,
|
|
82
|
+
error: err instanceof Error ? err.message : String(err),
|
|
83
|
+
});
|
|
84
|
+
throw err;
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// Wrap a single `generateContentStream` implementation. Accumulates text + usage across chunks and
|
|
89
|
+
// emits the span when the stream completes OR is abandoned/errors (via finally).
|
|
90
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
91
|
+
function wrapStream(original) {
|
|
92
|
+
return async function (...args) {
|
|
93
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
94
|
+
const opts = args[0] || {};
|
|
95
|
+
const model = opts.model || "unknown";
|
|
96
|
+
const contents = opts.contents;
|
|
97
|
+
const toolSchemas = extractToolSchemas("gemini", opts.config?.tools);
|
|
98
|
+
const sampling = extractSamplingParams("gemini", opts);
|
|
99
|
+
const spanMeta = { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) };
|
|
100
|
+
const spanId = genId();
|
|
101
|
+
const startedAt = nowIso();
|
|
102
|
+
const startMs = Date.now();
|
|
103
|
+
const { isReplaying, consumeCassetteEntry } = await import("../replay.js");
|
|
104
|
+
if (isReplaying()) {
|
|
105
|
+
const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
|
|
106
|
+
if (entry) {
|
|
107
|
+
const text = entry.output || "";
|
|
108
|
+
async function* mockStream() { yield { text, usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 } }; }
|
|
109
|
+
return mockStream();
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
let iterable;
|
|
113
|
+
try {
|
|
114
|
+
iterable = await original.apply(this, args);
|
|
115
|
+
}
|
|
116
|
+
catch (err) {
|
|
117
|
+
onSpanCallback?.({
|
|
118
|
+
id: spanId, trace_id: "", parent_id: null,
|
|
119
|
+
span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
|
|
120
|
+
input: truncateJson(contents), started_at: startedAt, ended_at: nowIso(),
|
|
121
|
+
duration_ms: Date.now() - startMs,
|
|
122
|
+
error: err instanceof Error ? err.message : String(err),
|
|
123
|
+
});
|
|
124
|
+
throw err;
|
|
125
|
+
}
|
|
126
|
+
const chunks = [];
|
|
127
|
+
let inputTokens = 0;
|
|
128
|
+
let outputTokens = 0;
|
|
129
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
130
|
+
let lastCandidates;
|
|
131
|
+
let streamError;
|
|
132
|
+
let emitted = false;
|
|
133
|
+
let sawFunctionCall = false;
|
|
134
|
+
// Bind the emit target NOW (caller's context) so finalize routes to the right recorder even
|
|
135
|
+
// when called later from the AFC's .return(), trace-end, or exit-flush.
|
|
136
|
+
const boundEmit = captureActiveSpanEmit() ?? onSpanCallback;
|
|
137
|
+
// Streaming-A, model (b): the span is OPENED at invocation (registered with the active
|
|
138
|
+
// recorder, synchronously in the CALLER's async context — registering lazily inside the
|
|
139
|
+
// generator fails because the AFC layer pulls it in a different context where the ALS sink is
|
|
140
|
+
// absent) and EMITTED EXACTLY ONCE at finalization — never silent. `capture_complete` is true
|
|
141
|
+
// ONLY on an observed clean full-drain; false for early-break, error, or a finalize forced by
|
|
142
|
+
// trace-end/exit (the AFC layer abandons this generator on a normal full consume — which is
|
|
143
|
+
// why we no longer rely on observing `done`, and dropped the read-ahead hack). A
|
|
144
|
+
// capture_complete:false span is NOT byte-replayable.
|
|
145
|
+
const finalize = (reason) => {
|
|
146
|
+
if (emitted)
|
|
147
|
+
return;
|
|
148
|
+
emitted = true;
|
|
149
|
+
dispatchUnregisterOpenSpan(spanId);
|
|
150
|
+
// capture_complete:true means fully-captured AND byte-replay-eligible — clean observed drain
|
|
151
|
+
// AND no function call (function-call output isn't captured as text and the AFC path may
|
|
152
|
+
// re-issue). Everything else is partial.
|
|
153
|
+
const complete = reason === "complete" && !sawFunctionCall;
|
|
154
|
+
const fnNames = lastCandidates?.[0]?.content?.parts
|
|
155
|
+
?.filter((p) => p.functionCall)
|
|
156
|
+
.map((p) => p.functionCall.name).join(", ");
|
|
157
|
+
const outputText = chunks.length ? chunks.join("") : (fnNames ? `[function_call: ${fnNames}]` : "");
|
|
158
|
+
const span = {
|
|
159
|
+
id: spanId, trace_id: "", parent_id: null,
|
|
160
|
+
span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
|
|
161
|
+
input: truncateJson(contents), output: truncateJson(outputText),
|
|
162
|
+
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
163
|
+
cost: calcCost(model, inputTokens, outputTokens),
|
|
164
|
+
duration_ms: Date.now() - startMs, started_at: startedAt, ended_at: nowIso(),
|
|
165
|
+
...(streamError ? { error: streamError } : {}),
|
|
166
|
+
metadata: { ...spanMeta, capture_complete: complete },
|
|
167
|
+
};
|
|
168
|
+
boundEmit?.(span);
|
|
169
|
+
if (boundEmit) {
|
|
170
|
+
emitGeminiToolResults(contents, boundEmit);
|
|
171
|
+
emitGeminiToolCalls(lastCandidates, spanId, model, boundEmit);
|
|
56
172
|
}
|
|
173
|
+
};
|
|
174
|
+
// Register in the caller's context so trace-end / exit-flush finalizes us partial if the
|
|
175
|
+
// consumer (AFC) abandons the generator mid-drain without ever reaching `done` / `.return()`.
|
|
176
|
+
dispatchRegisterOpenSpan(spanId, finalize);
|
|
177
|
+
async function* wrapped() {
|
|
57
178
|
try {
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
onSpanCallback?.(span);
|
|
75
|
-
// Auto-capture tool usage (functionCall parts in response, functionResponse in input).
|
|
76
|
-
if (onSpanCallback) {
|
|
77
|
-
emitGeminiToolResults(contents, onSpanCallback);
|
|
78
|
-
emitGeminiToolCalls(res?.candidates, spanId, model, onSpanCallback);
|
|
179
|
+
for await (const chunk of iterable) { // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
180
|
+
if (typeof chunk?.text === "string")
|
|
181
|
+
chunks.push(chunk.text);
|
|
182
|
+
if (chunk?.candidates)
|
|
183
|
+
lastCandidates = chunk.candidates;
|
|
184
|
+
// A function-call stream is NOT byte-replay-eligible: its output isn't plain text (so
|
|
185
|
+
// chunks.join misses it) and the AFC layer may re-issue. Mark it so finalize never
|
|
186
|
+
// stamps capture_complete:true even on a clean drain.
|
|
187
|
+
if (chunk?.candidates?.[0]?.content?.parts?.some((p) => p.functionCall)) {
|
|
188
|
+
sawFunctionCall = true;
|
|
189
|
+
}
|
|
190
|
+
if (chunk?.usageMetadata) {
|
|
191
|
+
inputTokens = chunk.usageMetadata.promptTokenCount || inputTokens;
|
|
192
|
+
outputTokens = chunk.usageMetadata.candidatesTokenCount || outputTokens;
|
|
193
|
+
}
|
|
194
|
+
yield chunk;
|
|
79
195
|
}
|
|
80
|
-
|
|
196
|
+
finalize("complete"); // observed clean full-drain (e.g. via retrace.stream helper)
|
|
81
197
|
}
|
|
82
198
|
catch (err) {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
span_type: SpanType.LLM_CALL, name: "retrace.ai.generate", model,
|
|
86
|
-
input: truncateJson(contents), started_at: startedAt, ended_at: nowIso(),
|
|
87
|
-
duration_ms: Date.now() - startMs,
|
|
88
|
-
error: err instanceof Error ? err.message : String(err),
|
|
89
|
-
};
|
|
90
|
-
onSpanCallback?.(span);
|
|
199
|
+
streamError = err instanceof Error ? err.message : String(err);
|
|
200
|
+
finalize("partial");
|
|
91
201
|
throw err;
|
|
92
202
|
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
203
|
+
finally {
|
|
204
|
+
finalize("partial"); // early-break (consumer .return()) / no clean drain observed
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return wrapped();
|
|
208
|
+
};
|
|
96
209
|
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
210
|
+
// @google/genai binds the PUBLIC `generateContent`/`generateContentStream` as own bound instance
|
|
211
|
+
// properties (not on the prototype), so patching the prototype's public method is a no-op. However,
|
|
212
|
+
// the public methods delegate to `generateContentInternal` / `generateContentStreamInternal`, which
|
|
213
|
+
// ARE regular methods on `Models.prototype`. Patching those is RETROACTIVE to every instance
|
|
214
|
+
// regardless of construction order (mirroring the Python SDK's class-method patch) — so no
|
|
215
|
+
// install-before-construction requirement, no race, and no `ready()` escape hatch is needed.
|
|
216
|
+
export function installGeminiInterceptor(onSpan) {
|
|
217
|
+
onSpanCallback = onSpan;
|
|
218
|
+
resetToolResultDedup();
|
|
219
|
+
if (installPromise)
|
|
220
|
+
return installPromise; // synchronous dedupe — prevents the double-wrap race
|
|
221
|
+
installPromise = import("@google/genai").then((genaiMod) => {
|
|
101
222
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
102
223
|
const mod = genaiMod;
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
|
|
224
|
+
const Models = mod?.Models || mod?.default?.Models;
|
|
225
|
+
const proto = Models?.prototype;
|
|
226
|
+
if (!proto || typeof proto.generateContentInternal !== "function")
|
|
227
|
+
return;
|
|
228
|
+
patchedProto = proto;
|
|
229
|
+
origGenerate = proto.generateContentInternal;
|
|
230
|
+
proto.generateContentInternal = wrapGenerate(origGenerate);
|
|
231
|
+
if (typeof proto.generateContentStreamInternal === "function") {
|
|
232
|
+
origStream = proto.generateContentStreamInternal;
|
|
233
|
+
proto.generateContentStreamInternal = wrapStream(origStream);
|
|
234
|
+
}
|
|
106
235
|
}).catch(() => { });
|
|
107
|
-
|
|
236
|
+
return installPromise;
|
|
237
|
+
}
|
|
238
|
+
export function uninstallGeminiInterceptor() {
|
|
239
|
+
if (patchedProto) {
|
|
240
|
+
if (origGenerate)
|
|
241
|
+
patchedProto.generateContentInternal = origGenerate;
|
|
242
|
+
if (origStream)
|
|
243
|
+
patchedProto.generateContentStreamInternal = origStream;
|
|
244
|
+
}
|
|
245
|
+
installPromise = null;
|
|
108
246
|
onSpanCallback = null;
|
|
247
|
+
patchedProto = null;
|
|
248
|
+
origGenerate = null;
|
|
249
|
+
origStream = null;
|
|
109
250
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function ensureInterceptorsInstalled(): Promise<void>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { dispatchInterceptedSpan } from "./_dispatch.js";
|
|
2
|
+
import { installGeminiInterceptor } from "./gemini.js";
|
|
3
|
+
import { installOpenAIInterceptor } from "./openai.js";
|
|
4
|
+
import { installAnthropicInterceptor } from "./anthropic.js";
|
|
5
|
+
/**
|
|
6
|
+
* Install ALL provider interceptors against the single stable dispatcher. Idempotent and memoized.
|
|
7
|
+
* Called at import time and from configure()/init(). The Gemini interceptor patches retroactive
|
|
8
|
+
* prototype methods, so the (async) install landing slightly after import is fine — capture works
|
|
9
|
+
* regardless of when the user constructs their client.
|
|
10
|
+
*/
|
|
11
|
+
let _installPromise = null;
|
|
12
|
+
export function ensureInterceptorsInstalled() {
|
|
13
|
+
if (_installPromise)
|
|
14
|
+
return _installPromise;
|
|
15
|
+
_installPromise = Promise.all([
|
|
16
|
+
Promise.resolve(installGeminiInterceptor(dispatchInterceptedSpan)),
|
|
17
|
+
Promise.resolve(installOpenAIInterceptor(dispatchInterceptedSpan)),
|
|
18
|
+
Promise.resolve(installAnthropicInterceptor(dispatchInterceptedSpan)),
|
|
19
|
+
]).then(() => { });
|
|
20
|
+
return _installPromise;
|
|
21
|
+
}
|