retrace-sdk 0.11.2 → 0.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/interceptors/anthropic.js +33 -21
- package/dist/interceptors/gemini.js +2 -2
- package/dist/interceptors/openai.js +37 -22
- package/dist/replay.d.ts +3 -0
- package/package.json +1 -1
|
@@ -2,6 +2,7 @@ import { SpanType } from "../trace.js";
|
|
|
2
2
|
import { genId, nowIso, truncateJson } from "../utils.js";
|
|
3
3
|
import { isReplaying, consumeCassetteEntry } from "../replay.js";
|
|
4
4
|
import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
|
|
5
|
+
import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
|
|
5
6
|
const PRICING = {
|
|
6
7
|
"claude-opus-4.7": [5.0, 25.0],
|
|
7
8
|
"claude-opus-4.6": [5.0, 25.0],
|
|
@@ -63,7 +64,7 @@ function createPatchedCreate() {
|
|
|
63
64
|
if (isReplaying()) {
|
|
64
65
|
const entry = consumeCassetteEntry("anthropic.messages.create", "llm_call");
|
|
65
66
|
if (entry) {
|
|
66
|
-
const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
|
|
67
|
+
const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
|
|
67
68
|
const span = {
|
|
68
69
|
id: spanId, trace_id: "", parent_id: null,
|
|
69
70
|
span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
|
|
@@ -93,30 +94,41 @@ function createPatchedCreate() {
|
|
|
93
94
|
const toolAcc = {};
|
|
94
95
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
95
96
|
const originalIterator = result[Symbol.asyncIterator]();
|
|
97
|
+
// Two-phase capture: finalize EXACTLY ONCE on clean drain (complete), early break / error
|
|
98
|
+
// (partial), or trace-end/exit (partial, via the sink) — never silently drop the span.
|
|
99
|
+
let finalized = false;
|
|
100
|
+
const finalize = (reason) => {
|
|
101
|
+
if (finalized)
|
|
102
|
+
return;
|
|
103
|
+
finalized = true;
|
|
104
|
+
dispatchUnregisterOpenSpan(spanId);
|
|
105
|
+
const durationMs = Date.now() - startMs;
|
|
106
|
+
const output = chunks.join("");
|
|
107
|
+
const span = {
|
|
108
|
+
id: spanId, trace_id: "", parent_id: null,
|
|
109
|
+
span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
|
|
110
|
+
input: truncateJson({ messages: messages.slice(0, 10) }),
|
|
111
|
+
output: truncateJson(output),
|
|
112
|
+
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
113
|
+
cost: calcCost(model, inputTokens, outputTokens),
|
|
114
|
+
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
115
|
+
metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...spanMeta },
|
|
116
|
+
};
|
|
117
|
+
onSpanCallback?.(span);
|
|
118
|
+
if (onSpanCallback && reason === "complete") {
|
|
119
|
+
emitAnthropicToolResults(messages, onSpanCallback);
|
|
120
|
+
const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
|
|
121
|
+
emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
|
|
96
125
|
const wrappedStream = {
|
|
97
126
|
[Symbol.asyncIterator]() {
|
|
98
127
|
return {
|
|
99
128
|
async next() {
|
|
100
129
|
const { value, done } = await originalIterator.next();
|
|
101
130
|
if (done) {
|
|
102
|
-
|
|
103
|
-
const output = chunks.join("");
|
|
104
|
-
const span = {
|
|
105
|
-
id: spanId, trace_id: "", parent_id: null,
|
|
106
|
-
span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
|
|
107
|
-
input: truncateJson({ messages: messages.slice(0, 10) }),
|
|
108
|
-
output: truncateJson(output),
|
|
109
|
-
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
110
|
-
cost: calcCost(model, inputTokens, outputTokens),
|
|
111
|
-
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
112
|
-
metadata: { streaming: true, ...spanMeta },
|
|
113
|
-
};
|
|
114
|
-
onSpanCallback?.(span);
|
|
115
|
-
if (onSpanCallback) {
|
|
116
|
-
emitAnthropicToolResults(messages, onSpanCallback);
|
|
117
|
-
const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
|
|
118
|
-
emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
|
|
119
|
-
}
|
|
131
|
+
finalize("complete");
|
|
120
132
|
return { value: undefined, done: true };
|
|
121
133
|
}
|
|
122
134
|
// Collect content_block_delta text
|
|
@@ -142,8 +154,8 @@ function createPatchedCreate() {
|
|
|
142
154
|
}
|
|
143
155
|
return { value, done: false };
|
|
144
156
|
},
|
|
145
|
-
return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
|
|
146
|
-
throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
|
|
157
|
+
return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
|
|
158
|
+
throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
|
|
147
159
|
};
|
|
148
160
|
},
|
|
149
161
|
};
|
|
@@ -45,7 +45,7 @@ function wrapGenerate(original) {
|
|
|
45
45
|
if (isReplaying()) {
|
|
46
46
|
const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
|
|
47
47
|
if (entry) {
|
|
48
|
-
return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
|
|
48
|
+
return { text: entry.output_raw ?? (entry.output || ""), usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
try {
|
|
@@ -104,7 +104,7 @@ function wrapStream(original) {
|
|
|
104
104
|
if (isReplaying()) {
|
|
105
105
|
const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
|
|
106
106
|
if (entry) {
|
|
107
|
-
const text = entry.output || "";
|
|
107
|
+
const text = entry.output_raw ?? (entry.output || "");
|
|
108
108
|
async function* mockStream() { yield { text, usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 } }; }
|
|
109
109
|
return mockStream();
|
|
110
110
|
}
|
|
@@ -4,6 +4,7 @@ import { isReplaying, consumeCassetteEntry } from "../replay.js";
|
|
|
4
4
|
import { getConfig } from "../config.js";
|
|
5
5
|
import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
|
|
6
6
|
import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
|
|
7
|
+
import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
|
|
7
8
|
/** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
|
|
8
9
|
const FALLBACK_PRICING = {
|
|
9
10
|
"gpt-5.5-pro": [30.0, 180.0],
|
|
@@ -120,7 +121,7 @@ function createPatchedCreate() {
|
|
|
120
121
|
if (isReplaying()) {
|
|
121
122
|
const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
|
|
122
123
|
if (entry) {
|
|
123
|
-
const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
|
|
124
|
+
const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
|
|
124
125
|
const span = {
|
|
125
126
|
id: spanId, trace_id: "", parent_id: null,
|
|
126
127
|
span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
|
|
@@ -151,31 +152,43 @@ function createPatchedCreate() {
|
|
|
151
152
|
const toolAcc = {};
|
|
152
153
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
153
154
|
const originalIterator = result[Symbol.asyncIterator]();
|
|
155
|
+
// Two-phase capture: register an OPEN span now and finalize EXACTLY ONCE — on clean drain
|
|
156
|
+
// (complete), on early break / error (partial), or at trace-end/exit (partial, via the sink).
|
|
157
|
+
// Previously the span was emitted only in the `done` branch, so an abandoned or errored
|
|
158
|
+
// stream silently lost its span entirely.
|
|
159
|
+
let finalized = false;
|
|
160
|
+
const finalize = (reason) => {
|
|
161
|
+
if (finalized)
|
|
162
|
+
return;
|
|
163
|
+
finalized = true;
|
|
164
|
+
dispatchUnregisterOpenSpan(spanId);
|
|
165
|
+
const durationMs = Date.now() - startMs;
|
|
166
|
+
const output = chunks.join("");
|
|
167
|
+
const span = {
|
|
168
|
+
id: spanId, trace_id: "", parent_id: null,
|
|
169
|
+
span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
|
|
170
|
+
input: truncateJson({ messages: messages.slice(0, 10) }),
|
|
171
|
+
output: truncateJson(output),
|
|
172
|
+
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
173
|
+
cost: calcCost(model, inputTokens, outputTokens),
|
|
174
|
+
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
175
|
+
metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
|
|
176
|
+
};
|
|
177
|
+
onSpanCallback?.(span);
|
|
178
|
+
if (onSpanCallback && reason === "complete") {
|
|
179
|
+
emitOpenAIToolResults(messages, onSpanCallback);
|
|
180
|
+
const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
|
|
181
|
+
emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
|
|
154
185
|
const wrappedStream = {
|
|
155
186
|
[Symbol.asyncIterator]() {
|
|
156
187
|
return {
|
|
157
188
|
async next() {
|
|
158
189
|
const { value, done } = await originalIterator.next();
|
|
159
190
|
if (done) {
|
|
160
|
-
|
|
161
|
-
const durationMs = Date.now() - startMs;
|
|
162
|
-
const output = chunks.join("");
|
|
163
|
-
const span = {
|
|
164
|
-
id: spanId, trace_id: "", parent_id: null,
|
|
165
|
-
span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
|
|
166
|
-
input: truncateJson({ messages: messages.slice(0, 10) }),
|
|
167
|
-
output: truncateJson(output),
|
|
168
|
-
input_tokens: inputTokens, output_tokens: outputTokens,
|
|
169
|
-
cost: calcCost(model, inputTokens, outputTokens),
|
|
170
|
-
duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
|
|
171
|
-
metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
|
|
172
|
-
};
|
|
173
|
-
onSpanCallback?.(span);
|
|
174
|
-
if (onSpanCallback) {
|
|
175
|
-
emitOpenAIToolResults(messages, onSpanCallback);
|
|
176
|
-
const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
|
|
177
|
-
emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
|
|
178
|
-
}
|
|
191
|
+
finalize("complete");
|
|
179
192
|
return { value: undefined, done: true };
|
|
180
193
|
}
|
|
181
194
|
// Collect content delta
|
|
@@ -203,8 +216,10 @@ function createPatchedCreate() {
|
|
|
203
216
|
}
|
|
204
217
|
return { value, done: false };
|
|
205
218
|
},
|
|
206
|
-
|
|
207
|
-
|
|
219
|
+
// Early break (consumer stops iterating) and errors must still finalize the span —
|
|
220
|
+
// otherwise the streamed work is silently lost.
|
|
221
|
+
return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
|
|
222
|
+
throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
|
|
208
223
|
};
|
|
209
224
|
},
|
|
210
225
|
// Preserve tee/controller methods if present
|
package/dist/replay.d.ts
CHANGED
|
@@ -16,6 +16,9 @@ export interface CassetteEntry {
|
|
|
16
16
|
model: string | null;
|
|
17
17
|
input: unknown;
|
|
18
18
|
output: unknown;
|
|
19
|
+
/** Exact original token for byte-identical replay (e.g. scalar-numeric outputs whose JSON.parse
|
|
20
|
+
* form lost precision). Preferred over `output` when present. */
|
|
21
|
+
output_raw?: string;
|
|
19
22
|
error: string | null;
|
|
20
23
|
}
|
|
21
24
|
export interface ReplayCommand {
|