retrace-sdk 0.11.1 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import { SpanType } from "../trace.js";
2
2
  import { genId, nowIso, truncateJson } from "../utils.js";
3
3
  import { isReplaying, consumeCassetteEntry } from "../replay.js";
4
4
  import { emitAnthropicToolCalls, emitAnthropicToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
5
+ import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
5
6
  const PRICING = {
6
7
  "claude-opus-4.7": [5.0, 25.0],
7
8
  "claude-opus-4.6": [5.0, 25.0],
@@ -63,7 +64,7 @@ function createPatchedCreate() {
63
64
  if (isReplaying()) {
64
65
  const entry = consumeCassetteEntry("anthropic.messages.create", "llm_call");
65
66
  if (entry) {
66
- const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
67
+ const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
67
68
  const span = {
68
69
  id: spanId, trace_id: "", parent_id: null,
69
70
  span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
@@ -93,30 +94,41 @@ function createPatchedCreate() {
93
94
  const toolAcc = {};
94
95
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
95
96
  const originalIterator = result[Symbol.asyncIterator]();
97
+ // Two-phase capture: finalize EXACTLY ONCE on clean drain (complete), early break / error
98
+ // (partial), or trace-end/exit (partial, via the sink) — never silently drop the span.
99
+ let finalized = false;
100
+ const finalize = (reason) => {
101
+ if (finalized)
102
+ return;
103
+ finalized = true;
104
+ dispatchUnregisterOpenSpan(spanId);
105
+ const durationMs = Date.now() - startMs;
106
+ const output = chunks.join("");
107
+ const span = {
108
+ id: spanId, trace_id: "", parent_id: null,
109
+ span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
110
+ input: truncateJson({ messages: messages.slice(0, 10) }),
111
+ output: truncateJson(output),
112
+ input_tokens: inputTokens, output_tokens: outputTokens,
113
+ cost: calcCost(model, inputTokens, outputTokens),
114
+ duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
115
+ metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...spanMeta },
116
+ };
117
+ onSpanCallback?.(span);
118
+ if (onSpanCallback && reason === "complete") {
119
+ emitAnthropicToolResults(messages, onSpanCallback);
120
+ const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
121
+ emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
122
+ }
123
+ };
124
+ dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
96
125
  const wrappedStream = {
97
126
  [Symbol.asyncIterator]() {
98
127
  return {
99
128
  async next() {
100
129
  const { value, done } = await originalIterator.next();
101
130
  if (done) {
102
- const durationMs = Date.now() - startMs;
103
- const output = chunks.join("");
104
- const span = {
105
- id: spanId, trace_id: "", parent_id: null,
106
- span_type: SpanType.LLM_CALL, name: "anthropic.messages.create", model,
107
- input: truncateJson({ messages: messages.slice(0, 10) }),
108
- output: truncateJson(output),
109
- input_tokens: inputTokens, output_tokens: outputTokens,
110
- cost: calcCost(model, inputTokens, outputTokens),
111
- duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
112
- metadata: { streaming: true, ...spanMeta },
113
- };
114
- onSpanCallback?.(span);
115
- if (onSpanCallback) {
116
- emitAnthropicToolResults(messages, onSpanCallback);
117
- const blocks = Object.values(toolAcc).map((t) => ({ type: "tool_use", id: t.id, name: t.name, input: parseToolArgs(t.json) }));
118
- emitAnthropicToolCalls(blocks, spanId, model, onSpanCallback);
119
- }
131
+ finalize("complete");
120
132
  return { value: undefined, done: true };
121
133
  }
122
134
  // Collect content_block_delta text
@@ -142,8 +154,8 @@ function createPatchedCreate() {
142
154
  }
143
155
  return { value, done: false };
144
156
  },
145
- return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
146
- throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
157
+ return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
158
+ throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
147
159
  };
148
160
  },
149
161
  };
@@ -45,7 +45,7 @@ function wrapGenerate(original) {
45
45
  if (isReplaying()) {
46
46
  const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
47
47
  if (entry) {
48
- return { text: entry.output || "", usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
48
+ return { text: entry.output_raw ?? (entry.output || ""), usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 }, candidates: [] };
49
49
  }
50
50
  }
51
51
  try {
@@ -104,7 +104,7 @@ function wrapStream(original) {
104
104
  if (isReplaying()) {
105
105
  const entry = consumeCassetteEntry("retrace.ai.generate", "llm_call");
106
106
  if (entry) {
107
- const text = entry.output || "";
107
+ const text = entry.output_raw ?? (entry.output || "");
108
108
  async function* mockStream() { yield { text, usageMetadata: { promptTokenCount: 0, candidatesTokenCount: 0 } }; }
109
109
  return mockStream();
110
110
  }
@@ -4,6 +4,7 @@ import { isReplaying, consumeCassetteEntry } from "../replay.js";
4
4
  import { getConfig } from "../config.js";
5
5
  import { RetraceRateLimitError, RetraceAuthError, RetraceConnectionError } from "../errors.js";
6
6
  import { emitOpenAIToolCalls, emitOpenAIToolResults, parseToolArgs, resetToolResultDedup, extractToolSchemas, extractSamplingParams } from "./tool-spans.js";
7
+ import { dispatchRegisterOpenSpan, dispatchUnregisterOpenSpan } from "./_dispatch.js";
7
8
  /** Hardcoded fallback pricing ($/1M tokens: [input, output]). Updated periodically. */
8
9
  const FALLBACK_PRICING = {
9
10
  "gpt-5.5-pro": [30.0, 180.0],
@@ -120,7 +121,7 @@ function createPatchedCreate() {
120
121
  if (isReplaying()) {
121
122
  const entry = consumeCassetteEntry("openai.chat.completions.create", "llm_call");
122
123
  if (entry) {
123
- const output = typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || "");
124
+ const output = entry.output_raw ?? (typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output || ""));
124
125
  const span = {
125
126
  id: spanId, trace_id: "", parent_id: null,
126
127
  span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
@@ -151,31 +152,43 @@ function createPatchedCreate() {
151
152
  const toolAcc = {};
152
153
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
153
154
  const originalIterator = result[Symbol.asyncIterator]();
155
+ // Two-phase capture: register an OPEN span now and finalize EXACTLY ONCE — on clean drain
156
+ // (complete), on early break / error (partial), or at trace-end/exit (partial, via the sink).
157
+ // Previously the span was emitted only in the `done` branch, so an abandoned or errored
158
+ // stream silently lost its span entirely.
159
+ let finalized = false;
160
+ const finalize = (reason) => {
161
+ if (finalized)
162
+ return;
163
+ finalized = true;
164
+ dispatchUnregisterOpenSpan(spanId);
165
+ const durationMs = Date.now() - startMs;
166
+ const output = chunks.join("");
167
+ const span = {
168
+ id: spanId, trace_id: "", parent_id: null,
169
+ span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
170
+ input: truncateJson({ messages: messages.slice(0, 10) }),
171
+ output: truncateJson(output),
172
+ input_tokens: inputTokens, output_tokens: outputTokens,
173
+ cost: calcCost(model, inputTokens, outputTokens),
174
+ duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
175
+ metadata: { streaming: true, ...(reason === "partial" ? { partial: true } : {}), ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
176
+ };
177
+ onSpanCallback?.(span);
178
+ if (onSpanCallback && reason === "complete") {
179
+ emitOpenAIToolResults(messages, onSpanCallback);
180
+ const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
181
+ emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
182
+ }
183
+ };
184
+ dispatchRegisterOpenSpan(spanId, () => finalize("partial"));
154
185
  const wrappedStream = {
155
186
  [Symbol.asyncIterator]() {
156
187
  return {
157
188
  async next() {
158
189
  const { value, done } = await originalIterator.next();
159
190
  if (done) {
160
- // Stream complete — emit span
161
- const durationMs = Date.now() - startMs;
162
- const output = chunks.join("");
163
- const span = {
164
- id: spanId, trace_id: "", parent_id: null,
165
- span_type: SpanType.LLM_CALL, name: "openai.chat.completions.create", model,
166
- input: truncateJson({ messages: messages.slice(0, 10) }),
167
- output: truncateJson(output),
168
- input_tokens: inputTokens, output_tokens: outputTokens,
169
- cost: calcCost(model, inputTokens, outputTokens),
170
- duration_ms: durationMs, started_at: startedAt, ended_at: nowIso(),
171
- metadata: { streaming: true, ...(toolSchemas ? { tool_schemas: toolSchemas } : {}), ...(sampling ? { sampling } : {}) },
172
- };
173
- onSpanCallback?.(span);
174
- if (onSpanCallback) {
175
- emitOpenAIToolResults(messages, onSpanCallback);
176
- const accMsg = { tool_calls: Object.values(toolAcc).map((t) => ({ id: t.id, function: { name: t.name, arguments: parseToolArgs(t.args) } })) };
177
- emitOpenAIToolCalls(accMsg, spanId, model, onSpanCallback);
178
- }
191
+ finalize("complete");
179
192
  return { value: undefined, done: true };
180
193
  }
181
194
  // Collect content delta
@@ -203,8 +216,10 @@ function createPatchedCreate() {
203
216
  }
204
217
  return { value, done: false };
205
218
  },
206
- return() { return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
207
- throw(e) { return originalIterator.throw?.(e) ?? Promise.reject(e); },
219
+ // Early break (consumer stops iterating) and errors must still finalize the span
220
+ // otherwise the streamed work is silently lost.
221
+ return() { finalize("partial"); return originalIterator.return?.() ?? Promise.resolve({ value: undefined, done: true }); },
222
+ throw(e) { finalize("partial"); return originalIterator.throw?.(e) ?? Promise.reject(e); },
208
223
  };
209
224
  },
210
225
  // Preserve tee/controller methods if present
package/dist/replay.d.ts CHANGED
@@ -16,6 +16,9 @@ export interface CassetteEntry {
16
16
  model: string | null;
17
17
  input: unknown;
18
18
  output: unknown;
19
+ /** Exact original token for byte-identical replay (e.g. scalar-numeric outputs whose JSON.parse
20
+ * form lost precision). Preferred over `output` when present. */
21
+ output_raw?: string;
19
22
  error: string | null;
20
23
  }
21
24
  export interface ReplayCommand {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "retrace-sdk",
3
- "version": "0.11.1",
3
+ "version": "0.11.3",
4
4
  "description": "The execution replay engine for AI agents. Record, replay, fork, and share agent executions.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -40,7 +40,7 @@
40
40
  "prepublishOnly": "npm run build"
41
41
  },
42
42
  "dependencies": {
43
- "ws": "8.20.0"
43
+ "ws": "^8.20.1"
44
44
  },
45
45
  "peerDependencies": {
46
46
  "@google/genai": ">=1.52.0",