bitfab 0.11.0 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -165,6 +165,12 @@ declare class BitfabError extends Error {
165
165
  * @param timeoutMs - Maximum time to wait in milliseconds (default: 5000)
166
166
  */
167
167
  declare function flushTraces(timeoutMs?: number): Promise<void>;
168
+ interface TokenUsage {
169
+ input: number | null;
170
+ output: number | null;
171
+ cached: number | null;
172
+ total: number | null;
173
+ }
168
174
 
169
175
  /**
170
176
  * LangGraph/LangChain callback handler for Bitfab tracing.
@@ -231,6 +237,49 @@ declare class BitfabLangGraphCallbackHandler {
231
237
  handleRetrieverError(error: unknown, runId: string): Promise<void>;
232
238
  }
233
239
 
240
+ /**
241
+ * Replay historical traces through a function and create a test run.
242
+ *
243
+ * The replay flow has three phases:
244
+ * 1. Start — fetches historical traces from the server and creates a test run
245
+ * 2. Execute — re-runs each trace's inputs through the provided function locally
246
+ * 3. Complete — marks the test run as completed on the server
247
+ */
248
+
249
+ interface ReplayOptions {
250
+ /** Maximum number of traces to replay (1–100, default 5). */
251
+ limit?: number;
252
+ /** Optional list of specific trace IDs to replay. */
253
+ traceIds?: string[];
254
+ /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
255
+ maxConcurrency?: number;
256
+ }
257
+ interface ReplayItem<T> {
258
+ /** Deserialized inputs from the original trace. */
259
+ input: unknown[];
260
+ /** The result returned by the function during replay, or undefined on error. */
261
+ result: T | undefined;
262
+ /** The original output from the historical trace. */
263
+ originalOutput: unknown;
264
+ /** Error message if the function threw, or null on success. */
265
+ error: string | null;
266
+ /** Original trace duration in milliseconds, or null if timestamps are missing. */
267
+ durationMs: number | null;
268
+ /** Token usage from the original trace, or null if not captured. */
269
+ tokens: TokenUsage | null;
270
+ /** Model name from the original trace, or null if not captured. */
271
+ model: string | null;
272
+ }
273
+
274
+ interface ReplayResult<T> {
275
+ /** Individual replay items with inputs, results, and comparison data. */
276
+ items: ReplayItem<T>[];
277
+ /** The test run ID created on the server. */
278
+ testRunId: string;
279
+ /** Full URL to view the test run in the dashboard. */
280
+ testRunUrl: string;
281
+ }
282
+
234
283
  /**
235
284
  * Tracing utilities for external trace submission to Bitfab.
236
285
  *
@@ -665,16 +714,7 @@ declare class Bitfab {
665
714
  limit?: number;
666
715
  traceIds?: string[];
667
716
  maxConcurrency?: number;
668
- }): Promise<{
669
- items: Array<{
670
- input: unknown[];
671
- result: TReturn | undefined;
672
- originalOutput: unknown;
673
- error: string | null;
674
- }>;
675
- testRunId: string;
676
- testRunUrl: string;
677
- }>;
717
+ }): Promise<ReplayResult<TReturn>>;
678
718
  }
679
719
  /**
680
720
  * Represents a Bitfab function that can wrap user functions for tracing.
@@ -737,7 +777,7 @@ declare class BitfabFunction {
737
777
  /**
738
778
  * SDK version from package.json (injected at build time)
739
779
  */
740
- declare const __version__ = "0.11.0";
780
+ declare const __version__ = "0.11.4";
741
781
 
742
782
  /**
743
783
  * Constants for the Bitfab SDK.
@@ -747,40 +787,4 @@ declare const __version__ = "0.11.0";
747
787
  */
748
788
  declare const DEFAULT_SERVICE_URL = "https://bitfab.ai";
749
789
 
750
- /**
751
- * Replay historical traces through a function and create a test run.
752
- *
753
- * The replay flow has three phases:
754
- * 1. Start — fetches historical traces from the server and creates a test run
755
- * 2. Execute — re-runs each trace's inputs through the provided function locally
756
- * 3. Complete — marks the test run as completed on the server
757
- */
758
-
759
- interface ReplayOptions {
760
- /** Maximum number of traces to replay (1–100, default 5). */
761
- limit?: number;
762
- /** Optional list of specific trace IDs to replay. */
763
- traceIds?: string[];
764
- /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
765
- maxConcurrency?: number;
766
- }
767
- interface ReplayItem<T> {
768
- /** Deserialized inputs from the original trace. */
769
- input: unknown[];
770
- /** The result returned by the function during replay, or undefined on error. */
771
- result: T | undefined;
772
- /** The original output from the historical trace. */
773
- originalOutput: unknown;
774
- /** Error message if the function threw, or null on success. */
775
- error: string | null;
776
- }
777
- interface ReplayResult<T> {
778
- /** Individual replay items with inputs, results, and comparison data. */
779
- items: ReplayItem<T>[];
780
- /** The test run ID created on the server. */
781
- testRunId: string;
782
- /** Full URL to view the test run in the dashboard. */
783
- testRunUrl: string;
784
- }
785
-
786
- export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
790
+ export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TokenUsage, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
package/dist/index.d.ts CHANGED
@@ -165,6 +165,12 @@ declare class BitfabError extends Error {
165
165
  * @param timeoutMs - Maximum time to wait in milliseconds (default: 5000)
166
166
  */
167
167
  declare function flushTraces(timeoutMs?: number): Promise<void>;
168
+ interface TokenUsage {
169
+ input: number | null;
170
+ output: number | null;
171
+ cached: number | null;
172
+ total: number | null;
173
+ }
168
174
 
169
175
  /**
170
176
  * LangGraph/LangChain callback handler for Bitfab tracing.
@@ -231,6 +237,49 @@ declare class BitfabLangGraphCallbackHandler {
231
237
  handleRetrieverError(error: unknown, runId: string): Promise<void>;
232
238
  }
233
239
 
240
+ /**
241
+ * Replay historical traces through a function and create a test run.
242
+ *
243
+ * The replay flow has three phases:
244
+ * 1. Start — fetches historical traces from the server and creates a test run
245
+ * 2. Execute — re-runs each trace's inputs through the provided function locally
246
+ * 3. Complete — marks the test run as completed on the server
247
+ */
248
+
249
+ interface ReplayOptions {
250
+ /** Maximum number of traces to replay (1–100, default 5). */
251
+ limit?: number;
252
+ /** Optional list of specific trace IDs to replay. */
253
+ traceIds?: string[];
254
+ /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
255
+ maxConcurrency?: number;
256
+ }
257
+ interface ReplayItem<T> {
258
+ /** Deserialized inputs from the original trace. */
259
+ input: unknown[];
260
+ /** The result returned by the function during replay, or undefined on error. */
261
+ result: T | undefined;
262
+ /** The original output from the historical trace. */
263
+ originalOutput: unknown;
264
+ /** Error message if the function threw, or null on success. */
265
+ error: string | null;
266
+ /** Original trace duration in milliseconds, or null if timestamps are missing. */
267
+ durationMs: number | null;
268
+ /** Token usage from the original trace, or null if not captured. */
269
+ tokens: TokenUsage | null;
270
+ /** Model name from the original trace, or null if not captured. */
271
+ model: string | null;
272
+ }
273
+
274
+ interface ReplayResult<T> {
275
+ /** Individual replay items with inputs, results, and comparison data. */
276
+ items: ReplayItem<T>[];
277
+ /** The test run ID created on the server. */
278
+ testRunId: string;
279
+ /** Full URL to view the test run in the dashboard. */
280
+ testRunUrl: string;
281
+ }
282
+
234
283
  /**
235
284
  * Tracing utilities for external trace submission to Bitfab.
236
285
  *
@@ -665,16 +714,7 @@ declare class Bitfab {
665
714
  limit?: number;
666
715
  traceIds?: string[];
667
716
  maxConcurrency?: number;
668
- }): Promise<{
669
- items: Array<{
670
- input: unknown[];
671
- result: TReturn | undefined;
672
- originalOutput: unknown;
673
- error: string | null;
674
- }>;
675
- testRunId: string;
676
- testRunUrl: string;
677
- }>;
717
+ }): Promise<ReplayResult<TReturn>>;
678
718
  }
679
719
  /**
680
720
  * Represents a Bitfab function that can wrap user functions for tracing.
@@ -737,7 +777,7 @@ declare class BitfabFunction {
737
777
  /**
738
778
  * SDK version from package.json (injected at build time)
739
779
  */
740
- declare const __version__ = "0.11.0";
780
+ declare const __version__ = "0.11.4";
741
781
 
742
782
  /**
743
783
  * Constants for the Bitfab SDK.
@@ -747,40 +787,4 @@ declare const __version__ = "0.11.0";
747
787
  */
748
788
  declare const DEFAULT_SERVICE_URL = "https://bitfab.ai";
749
789
 
750
- /**
751
- * Replay historical traces through a function and create a test run.
752
- *
753
- * The replay flow has three phases:
754
- * 1. Start — fetches historical traces from the server and creates a test run
755
- * 2. Execute — re-runs each trace's inputs through the provided function locally
756
- * 3. Complete — marks the test run as completed on the server
757
- */
758
-
759
- interface ReplayOptions {
760
- /** Maximum number of traces to replay (1–100, default 5). */
761
- limit?: number;
762
- /** Optional list of specific trace IDs to replay. */
763
- traceIds?: string[];
764
- /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
765
- maxConcurrency?: number;
766
- }
767
- interface ReplayItem<T> {
768
- /** Deserialized inputs from the original trace. */
769
- input: unknown[];
770
- /** The result returned by the function during replay, or undefined on error. */
771
- result: T | undefined;
772
- /** The original output from the historical trace. */
773
- originalOutput: unknown;
774
- /** Error message if the function threw, or null on success. */
775
- error: string | null;
776
- }
777
- interface ReplayResult<T> {
778
- /** Individual replay items with inputs, results, and comparison data. */
779
- items: ReplayItem<T>[];
780
- /** The test run ID created on the server. */
781
- testRunId: string;
782
- /** Full URL to view the test run in the dashboard. */
783
- testRunUrl: string;
784
- }
785
-
786
- export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
790
+ export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TokenUsage, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
package/dist/index.js CHANGED
@@ -6,13 +6,13 @@ import {
6
6
  BitfabOpenAITracingProcessor,
7
7
  getCurrentSpan,
8
8
  getCurrentTrace
9
- } from "./chunk-6EZCV5TU.js";
9
+ } from "./chunk-I3OKZ2TF.js";
10
10
  import {
11
11
  BitfabError,
12
12
  DEFAULT_SERVICE_URL,
13
13
  __version__,
14
14
  flushTraces
15
- } from "./chunk-C4KRLEXZ.js";
15
+ } from "./chunk-KAX2QQPS.js";
16
16
  export {
17
17
  Bitfab,
18
18
  BitfabClaudeAgentHandler,
package/dist/node.cjs CHANGED
@@ -81,7 +81,7 @@ var __version__;
81
81
  var init_version_generated = __esm({
82
82
  "src/version.generated.ts"() {
83
83
  "use strict";
84
- __version__ = "0.11.0";
84
+ __version__ = "0.11.4";
85
85
  }
86
86
  });
87
87
 
@@ -441,7 +441,15 @@ async function processItem(httpClient, serverItem, fn, testRunId) {
441
441
  } catch (e) {
442
442
  error = e instanceof Error ? e.message : String(e);
443
443
  }
444
- return { input: inputs, result, originalOutput, error };
444
+ return {
445
+ input: inputs,
446
+ result,
447
+ originalOutput,
448
+ error,
449
+ durationMs: serverItem.durationMs ?? null,
450
+ tokens: serverItem.tokens ?? null,
451
+ model: serverItem.model ?? null
452
+ };
445
453
  }
446
454
  async function mapWithConcurrency(tasks, maxConcurrency) {
447
455
  const results = new Array(tasks.length);
@@ -1995,6 +2003,65 @@ function runWithSpanStack(stack, fn) {
1995
2003
  throw error;
1996
2004
  }
1997
2005
  }
2006
+ function isAsyncGenerator(value) {
2007
+ if (value === null || typeof value !== "object") {
2008
+ return false;
2009
+ }
2010
+ const candidate = value;
2011
+ return typeof candidate.next === "function" && typeof candidate.return === "function" && typeof candidate.throw === "function" && typeof candidate[Symbol.asyncIterator] === "function";
2012
+ }
2013
+ function wrapAsyncGenerator(source, spanStack, sendSpan) {
2014
+ const yielded = [];
2015
+ let returnValue;
2016
+ let finalized = false;
2017
+ const finalize = (errorMsg) => {
2018
+ if (finalized) {
2019
+ return;
2020
+ }
2021
+ finalized = true;
2022
+ void sendSpan({
2023
+ result: { yielded, return: returnValue },
2024
+ ...errorMsg && { error: errorMsg }
2025
+ });
2026
+ };
2027
+ const step = (method, arg) => runWithSpanStack(spanStack, () => {
2028
+ const op = source[method];
2029
+ return op.call(source, arg);
2030
+ });
2031
+ const handle = async (method, arg) => {
2032
+ try {
2033
+ const result = await step(method, arg);
2034
+ if (result.done) {
2035
+ returnValue = result.value;
2036
+ finalize();
2037
+ } else {
2038
+ yielded.push(result.value);
2039
+ }
2040
+ return result;
2041
+ } catch (error) {
2042
+ finalize(error instanceof Error ? error.message : String(error));
2043
+ throw error;
2044
+ }
2045
+ };
2046
+ const wrapped = {
2047
+ next(arg) {
2048
+ return handle("next", arg);
2049
+ },
2050
+ return(value) {
2051
+ return handle("return", value);
2052
+ },
2053
+ throw(err) {
2054
+ return handle("throw", err);
2055
+ },
2056
+ [Symbol.asyncIterator]() {
2057
+ return wrapped;
2058
+ },
2059
+ [Symbol.asyncDispose]() {
2060
+ return handle("return", void 0).then(() => void 0);
2061
+ }
2062
+ };
2063
+ return wrapped;
2064
+ }
1998
2065
  var cachedCollectorClass;
1999
2066
  async function loadCollectorClass() {
2000
2067
  if (cachedCollectorClass !== void 0) {
@@ -2487,10 +2554,14 @@ var Bitfab = class {
2487
2554
  const inputs = args;
2488
2555
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
2489
2556
  if (isRootSpan && !activeTraceStates.has(traceId)) {
2557
+ const replayCtxAtRoot = getReplayContext();
2490
2558
  activeTraceStates.set(traceId, {
2491
2559
  traceId,
2492
2560
  startedAt,
2493
- contexts: []
2561
+ contexts: [],
2562
+ ...replayCtxAtRoot?.testRunId && {
2563
+ testRunId: replayCtxAtRoot.testRunId
2564
+ }
2494
2565
  });
2495
2566
  pendingSpanPromises.set(traceId, []);
2496
2567
  }
@@ -2537,7 +2608,8 @@ var Bitfab = class {
2537
2608
  endedAt,
2538
2609
  sessionId: traceState?.sessionId,
2539
2610
  metadata: traceState?.metadata,
2540
- contexts: traceState?.contexts ?? []
2611
+ contexts: traceState?.contexts ?? [],
2612
+ testRunId: traceState?.testRunId
2541
2613
  });
2542
2614
  activeTraceStates.delete(traceId);
2543
2615
  } else {
@@ -2565,6 +2637,9 @@ var Bitfab = class {
2565
2637
  throw error;
2566
2638
  });
2567
2639
  }
2640
+ if (isAsyncGenerator(result)) {
2641
+ return wrapAsyncGenerator(result, newStack, sendSpan);
2642
+ }
2568
2643
  void sendSpan({ result });
2569
2644
  return result;
2570
2645
  };
@@ -2617,7 +2692,8 @@ var Bitfab = class {
2617
2692
  traceFunctionKey: params.traceFunctionKey,
2618
2693
  externalTrace: rawTrace,
2619
2694
  completed: true,
2620
- ...params.sessionId && { sessionId: params.sessionId }
2695
+ ...params.sessionId && { sessionId: params.sessionId },
2696
+ ...params.testRunId && { testRunId: params.testRunId }
2621
2697
  });
2622
2698
  }
2623
2699
  /**