bitfab 0.11.0 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -165,6 +165,24 @@ declare class BitfabError extends Error {
165
165
  * @param timeoutMs - Maximum time to wait in milliseconds (default: 5000)
166
166
  */
167
167
  declare function flushTraces(timeoutMs?: number): Promise<void>;
168
+ interface TokenUsage {
169
+ input: number | null;
170
+ output: number | null;
171
+ cached: number | null;
172
+ total: number | null;
173
+ }
174
+ /**
175
+ * Describes a single file edited as part of a code change.
176
+ *
177
+ * - `path`: file path (relative to the repo root, or any consistent root)
178
+ * - `before`: file contents before the change ("" for newly created files)
179
+ * - `after`: file contents after the change ("" for deleted files)
180
+ */
181
+ interface CodeChangeFile {
182
+ path: string;
183
+ before: string;
184
+ after: string;
185
+ }
168
186
 
169
187
  /**
170
188
  * LangGraph/LangChain callback handler for Bitfab tracing.
@@ -231,6 +249,61 @@ declare class BitfabLangGraphCallbackHandler {
231
249
  handleRetrieverError(error: unknown, runId: string): Promise<void>;
232
250
  }
233
251
 
252
+ /**
253
+ * Replay historical traces through a function and create a test run.
254
+ *
255
+ * The replay flow has three phases:
256
+ * 1. Start — fetches historical traces from the server and creates a test run
257
+ * 2. Execute — re-runs each trace's inputs through the provided function locally
258
+ * 3. Complete — marks the test run as completed on the server
259
+ */
260
+
261
+ interface ReplayOptions {
262
+ /** Maximum number of traces to replay (1–100, default 5). */
263
+ limit?: number;
264
+ /** Optional list of specific trace IDs to replay. */
265
+ traceIds?: string[];
266
+ /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
267
+ maxConcurrency?: number;
268
+ /**
269
+ * Description of the code change being tested in this replay. Stored on
270
+ * the resulting experiment so the change can be reviewed alongside results.
271
+ */
272
+ codeChangeDescription?: string;
273
+ /**
274
+ * Files edited as part of this code change. Each entry holds the file path
275
+ * and the full `before`/`after` contents — the agent reads each file before
276
+ * and after editing and passes the two strings. Use `""` for newly created
277
+ * files (`before`) or deleted files (`after`).
278
+ */
279
+ codeChangeFiles?: CodeChangeFile[];
280
+ }
281
+ interface ReplayItem<T> {
282
+ /** Deserialized inputs from the original trace. */
283
+ input: unknown[];
284
+ /** The result returned by the function during replay, or undefined on error. */
285
+ result: T | undefined;
286
+ /** The original output from the historical trace. */
287
+ originalOutput: unknown;
288
+ /** Error message if the function threw, or null on success. */
289
+ error: string | null;
290
+ /** Original trace duration in milliseconds, or null if timestamps are missing. */
291
+ durationMs: number | null;
292
+ /** Token usage from the original trace, or null if not captured. */
293
+ tokens: TokenUsage | null;
294
+ /** Model name from the original trace, or null if not captured. */
295
+ model: string | null;
296
+ }
297
+
298
+ interface ReplayResult<T> {
299
+ /** Individual replay items with inputs, results, and comparison data. */
300
+ items: ReplayItem<T>[];
301
+ /** The test run ID created on the server. */
302
+ testRunId: string;
303
+ /** Full URL to view the test run in the dashboard. */
304
+ testRunUrl: string;
305
+ }
306
+
234
307
  /**
235
308
  * Tracing utilities for external trace submission to Bitfab.
236
309
  *
@@ -665,16 +738,9 @@ declare class Bitfab {
665
738
  limit?: number;
666
739
  traceIds?: string[];
667
740
  maxConcurrency?: number;
668
- }): Promise<{
669
- items: Array<{
670
- input: unknown[];
671
- result: TReturn | undefined;
672
- originalOutput: unknown;
673
- error: string | null;
674
- }>;
675
- testRunId: string;
676
- testRunUrl: string;
677
- }>;
741
+ codeChangeDescription?: string;
742
+ codeChangeFiles?: CodeChangeFile[];
743
+ }): Promise<ReplayResult<TReturn>>;
678
744
  }
679
745
  /**
680
746
  * Represents a Bitfab function that can wrap user functions for tracing.
@@ -737,7 +803,7 @@ declare class BitfabFunction {
737
803
  /**
738
804
  * SDK version from package.json (injected at build time)
739
805
  */
740
- declare const __version__ = "0.11.0";
806
+ declare const __version__ = "0.11.5";
741
807
 
742
808
  /**
743
809
  * Constants for the Bitfab SDK.
@@ -747,40 +813,4 @@ declare const __version__ = "0.11.0";
747
813
  */
748
814
  declare const DEFAULT_SERVICE_URL = "https://bitfab.ai";
749
815
 
750
- /**
751
- * Replay historical traces through a function and create a test run.
752
- *
753
- * The replay flow has three phases:
754
- * 1. Start — fetches historical traces from the server and creates a test run
755
- * 2. Execute — re-runs each trace's inputs through the provided function locally
756
- * 3. Complete — marks the test run as completed on the server
757
- */
758
-
759
- interface ReplayOptions {
760
- /** Maximum number of traces to replay (1–100, default 5). */
761
- limit?: number;
762
- /** Optional list of specific trace IDs to replay. */
763
- traceIds?: string[];
764
- /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
765
- maxConcurrency?: number;
766
- }
767
- interface ReplayItem<T> {
768
- /** Deserialized inputs from the original trace. */
769
- input: unknown[];
770
- /** The result returned by the function during replay, or undefined on error. */
771
- result: T | undefined;
772
- /** The original output from the historical trace. */
773
- originalOutput: unknown;
774
- /** Error message if the function threw, or null on success. */
775
- error: string | null;
776
- }
777
- interface ReplayResult<T> {
778
- /** Individual replay items with inputs, results, and comparison data. */
779
- items: ReplayItem<T>[];
780
- /** The test run ID created on the server. */
781
- testRunId: string;
782
- /** Full URL to view the test run in the dashboard. */
783
- testRunUrl: string;
784
- }
785
-
786
- export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
816
+ export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CodeChangeFile, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TokenUsage, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
package/dist/index.d.ts CHANGED
@@ -165,6 +165,24 @@ declare class BitfabError extends Error {
165
165
  * @param timeoutMs - Maximum time to wait in milliseconds (default: 5000)
166
166
  */
167
167
  declare function flushTraces(timeoutMs?: number): Promise<void>;
168
+ interface TokenUsage {
169
+ input: number | null;
170
+ output: number | null;
171
+ cached: number | null;
172
+ total: number | null;
173
+ }
174
+ /**
175
+ * Describes a single file edited as part of a code change.
176
+ *
177
+ * - `path`: file path (relative to the repo root, or any consistent root)
178
+ * - `before`: file contents before the change ("" for newly created files)
179
+ * - `after`: file contents after the change ("" for deleted files)
180
+ */
181
+ interface CodeChangeFile {
182
+ path: string;
183
+ before: string;
184
+ after: string;
185
+ }
168
186
 
169
187
  /**
170
188
  * LangGraph/LangChain callback handler for Bitfab tracing.
@@ -231,6 +249,61 @@ declare class BitfabLangGraphCallbackHandler {
231
249
  handleRetrieverError(error: unknown, runId: string): Promise<void>;
232
250
  }
233
251
 
252
+ /**
253
+ * Replay historical traces through a function and create a test run.
254
+ *
255
+ * The replay flow has three phases:
256
+ * 1. Start — fetches historical traces from the server and creates a test run
257
+ * 2. Execute — re-runs each trace's inputs through the provided function locally
258
+ * 3. Complete — marks the test run as completed on the server
259
+ */
260
+
261
+ interface ReplayOptions {
262
+ /** Maximum number of traces to replay (1–100, default 5). */
263
+ limit?: number;
264
+ /** Optional list of specific trace IDs to replay. */
265
+ traceIds?: string[];
266
+ /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
267
+ maxConcurrency?: number;
268
+ /**
269
+ * Description of the code change being tested in this replay. Stored on
270
+ * the resulting experiment so the change can be reviewed alongside results.
271
+ */
272
+ codeChangeDescription?: string;
273
+ /**
274
+ * Files edited as part of this code change. Each entry holds the file path
275
+ * and the full `before`/`after` contents — the agent reads each file before
276
+ * and after editing and passes the two strings. Use `""` for newly created
277
+ * files (`before`) or deleted files (`after`).
278
+ */
279
+ codeChangeFiles?: CodeChangeFile[];
280
+ }
281
+ interface ReplayItem<T> {
282
+ /** Deserialized inputs from the original trace. */
283
+ input: unknown[];
284
+ /** The result returned by the function during replay, or undefined on error. */
285
+ result: T | undefined;
286
+ /** The original output from the historical trace. */
287
+ originalOutput: unknown;
288
+ /** Error message if the function threw, or null on success. */
289
+ error: string | null;
290
+ /** Original trace duration in milliseconds, or null if timestamps are missing. */
291
+ durationMs: number | null;
292
+ /** Token usage from the original trace, or null if not captured. */
293
+ tokens: TokenUsage | null;
294
+ /** Model name from the original trace, or null if not captured. */
295
+ model: string | null;
296
+ }
297
+
298
+ interface ReplayResult<T> {
299
+ /** Individual replay items with inputs, results, and comparison data. */
300
+ items: ReplayItem<T>[];
301
+ /** The test run ID created on the server. */
302
+ testRunId: string;
303
+ /** Full URL to view the test run in the dashboard. */
304
+ testRunUrl: string;
305
+ }
306
+
234
307
  /**
235
308
  * Tracing utilities for external trace submission to Bitfab.
236
309
  *
@@ -665,16 +738,9 @@ declare class Bitfab {
665
738
  limit?: number;
666
739
  traceIds?: string[];
667
740
  maxConcurrency?: number;
668
- }): Promise<{
669
- items: Array<{
670
- input: unknown[];
671
- result: TReturn | undefined;
672
- originalOutput: unknown;
673
- error: string | null;
674
- }>;
675
- testRunId: string;
676
- testRunUrl: string;
677
- }>;
741
+ codeChangeDescription?: string;
742
+ codeChangeFiles?: CodeChangeFile[];
743
+ }): Promise<ReplayResult<TReturn>>;
678
744
  }
679
745
  /**
680
746
  * Represents a Bitfab function that can wrap user functions for tracing.
@@ -737,7 +803,7 @@ declare class BitfabFunction {
737
803
  /**
738
804
  * SDK version from package.json (injected at build time)
739
805
  */
740
- declare const __version__ = "0.11.0";
806
+ declare const __version__ = "0.11.5";
741
807
 
742
808
  /**
743
809
  * Constants for the Bitfab SDK.
@@ -747,40 +813,4 @@ declare const __version__ = "0.11.0";
747
813
  */
748
814
  declare const DEFAULT_SERVICE_URL = "https://bitfab.ai";
749
815
 
750
- /**
751
- * Replay historical traces through a function and create a test run.
752
- *
753
- * The replay flow has three phases:
754
- * 1. Start — fetches historical traces from the server and creates a test run
755
- * 2. Execute — re-runs each trace's inputs through the provided function locally
756
- * 3. Complete — marks the test run as completed on the server
757
- */
758
-
759
- interface ReplayOptions {
760
- /** Maximum number of traces to replay (1–100, default 5). */
761
- limit?: number;
762
- /** Optional list of specific trace IDs to replay. */
763
- traceIds?: string[];
764
- /** Maximum number of items to process in parallel. Set to 1 for sequential. Default 10. */
765
- maxConcurrency?: number;
766
- }
767
- interface ReplayItem<T> {
768
- /** Deserialized inputs from the original trace. */
769
- input: unknown[];
770
- /** The result returned by the function during replay, or undefined on error. */
771
- result: T | undefined;
772
- /** The original output from the historical trace. */
773
- originalOutput: unknown;
774
- /** Error message if the function threw, or null on success. */
775
- error: string | null;
776
- }
777
- interface ReplayResult<T> {
778
- /** Individual replay items with inputs, results, and comparison data. */
779
- items: ReplayItem<T>[];
780
- /** The test run ID created on the server. */
781
- testRunId: string;
782
- /** Full URL to view the test run in the dashboard. */
783
- testRunUrl: string;
784
- }
785
-
786
- export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
816
+ export { type ActiveSpanContext, type AllowedEnvVars, type BamlExecutionResult, Bitfab, BitfabClaudeAgentHandler, type BitfabConfig, BitfabError, BitfabFunction, BitfabLangGraphCallbackHandler, BitfabOpenAITracingProcessor, type CodeChangeFile, type CurrentSpan, type CurrentTrace, DEFAULT_SERVICE_URL, type ProviderDefinition, type ReplayItem, type ReplayOptions, type ReplayResult, type SpanOptions, type SpanType, type TokenUsage, type TraceResponse, type TracingProcessor, type WrapBAMLOptions, type WrappedBamlFn, __version__, flushTraces, getCurrentSpan, getCurrentTrace };
package/dist/index.js CHANGED
@@ -6,13 +6,13 @@ import {
6
6
  BitfabOpenAITracingProcessor,
7
7
  getCurrentSpan,
8
8
  getCurrentTrace
9
- } from "./chunk-6EZCV5TU.js";
9
+ } from "./chunk-GWHHP5PL.js";
10
10
  import {
11
11
  BitfabError,
12
12
  DEFAULT_SERVICE_URL,
13
13
  __version__,
14
14
  flushTraces
15
- } from "./chunk-C4KRLEXZ.js";
15
+ } from "./chunk-KSUI74KL.js";
16
16
  export {
17
17
  Bitfab,
18
18
  BitfabClaudeAgentHandler,
package/dist/node.cjs CHANGED
@@ -81,7 +81,7 @@ var __version__;
81
81
  var init_version_generated = __esm({
82
82
  "src/version.generated.ts"() {
83
83
  "use strict";
84
- __version__ = "0.11.0";
84
+ __version__ = "0.11.5";
85
85
  }
86
86
  });
87
87
 
@@ -285,11 +285,17 @@ var init_http = __esm({
285
285
  * Start a replay session by fetching historical traces.
286
286
  * Blocking call — creates a test run and returns lightweight item references.
287
287
  */
288
- async startReplay(traceFunctionKey, limit, traceIds) {
288
+ async startReplay(traceFunctionKey, limit, traceIds, codeChangeDescription, codeChangeFiles) {
289
289
  const payload = { traceFunctionKey, limit };
290
290
  if (traceIds) {
291
291
  payload.traceIds = traceIds;
292
292
  }
293
+ if (codeChangeDescription !== void 0) {
294
+ payload.codeChangeDescription = codeChangeDescription;
295
+ }
296
+ if (codeChangeFiles !== void 0) {
297
+ payload.codeChangeFiles = codeChangeFiles;
298
+ }
293
299
  return this.request("/api/sdk/replay/start", payload, {
294
300
  timeout: 3e4
295
301
  });
@@ -441,7 +447,15 @@ async function processItem(httpClient, serverItem, fn, testRunId) {
441
447
  } catch (e) {
442
448
  error = e instanceof Error ? e.message : String(e);
443
449
  }
444
- return { input: inputs, result, originalOutput, error };
450
+ return {
451
+ input: inputs,
452
+ result,
453
+ originalOutput,
454
+ error,
455
+ durationMs: serverItem.durationMs ?? null,
456
+ tokens: serverItem.tokens ?? null,
457
+ model: serverItem.model ?? null
458
+ };
445
459
  }
446
460
  async function mapWithConcurrency(tasks, maxConcurrency) {
447
461
  const results = new Array(tasks.length);
@@ -468,7 +482,9 @@ async function replay(httpClient, serviceUrl, traceFunctionKey, fn, options) {
468
482
  } = await httpClient.startReplay(
469
483
  traceFunctionKey,
470
484
  options?.limit ?? 5,
471
- options?.traceIds
485
+ options?.traceIds,
486
+ options?.codeChangeDescription,
487
+ options?.codeChangeFiles
472
488
  );
473
489
  const maxConcurrency = options?.maxConcurrency ?? 10;
474
490
  const tasks = serverItems.map(
@@ -1995,6 +2011,65 @@ function runWithSpanStack(stack, fn) {
1995
2011
  throw error;
1996
2012
  }
1997
2013
  }
2014
+ function isAsyncGenerator(value) {
2015
+ if (value === null || typeof value !== "object") {
2016
+ return false;
2017
+ }
2018
+ const candidate = value;
2019
+ return typeof candidate.next === "function" && typeof candidate.return === "function" && typeof candidate.throw === "function" && typeof candidate[Symbol.asyncIterator] === "function";
2020
+ }
2021
+ function wrapAsyncGenerator(source, spanStack, sendSpan) {
2022
+ const yielded = [];
2023
+ let returnValue;
2024
+ let finalized = false;
2025
+ const finalize = (errorMsg) => {
2026
+ if (finalized) {
2027
+ return;
2028
+ }
2029
+ finalized = true;
2030
+ void sendSpan({
2031
+ result: { yielded, return: returnValue },
2032
+ ...errorMsg && { error: errorMsg }
2033
+ });
2034
+ };
2035
+ const step = (method, arg) => runWithSpanStack(spanStack, () => {
2036
+ const op = source[method];
2037
+ return op.call(source, arg);
2038
+ });
2039
+ const handle = async (method, arg) => {
2040
+ try {
2041
+ const result = await step(method, arg);
2042
+ if (result.done) {
2043
+ returnValue = result.value;
2044
+ finalize();
2045
+ } else {
2046
+ yielded.push(result.value);
2047
+ }
2048
+ return result;
2049
+ } catch (error) {
2050
+ finalize(error instanceof Error ? error.message : String(error));
2051
+ throw error;
2052
+ }
2053
+ };
2054
+ const wrapped = {
2055
+ next(arg) {
2056
+ return handle("next", arg);
2057
+ },
2058
+ return(value) {
2059
+ return handle("return", value);
2060
+ },
2061
+ throw(err) {
2062
+ return handle("throw", err);
2063
+ },
2064
+ [Symbol.asyncIterator]() {
2065
+ return wrapped;
2066
+ },
2067
+ [Symbol.asyncDispose]() {
2068
+ return handle("return", void 0).then(() => void 0);
2069
+ }
2070
+ };
2071
+ return wrapped;
2072
+ }
1998
2073
  var cachedCollectorClass;
1999
2074
  async function loadCollectorClass() {
2000
2075
  if (cachedCollectorClass !== void 0) {
@@ -2487,10 +2562,14 @@ var Bitfab = class {
2487
2562
  const inputs = args;
2488
2563
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
2489
2564
  if (isRootSpan && !activeTraceStates.has(traceId)) {
2565
+ const replayCtxAtRoot = getReplayContext();
2490
2566
  activeTraceStates.set(traceId, {
2491
2567
  traceId,
2492
2568
  startedAt,
2493
- contexts: []
2569
+ contexts: [],
2570
+ ...replayCtxAtRoot?.testRunId && {
2571
+ testRunId: replayCtxAtRoot.testRunId
2572
+ }
2494
2573
  });
2495
2574
  pendingSpanPromises.set(traceId, []);
2496
2575
  }
@@ -2537,7 +2616,8 @@ var Bitfab = class {
2537
2616
  endedAt,
2538
2617
  sessionId: traceState?.sessionId,
2539
2618
  metadata: traceState?.metadata,
2540
- contexts: traceState?.contexts ?? []
2619
+ contexts: traceState?.contexts ?? [],
2620
+ testRunId: traceState?.testRunId
2541
2621
  });
2542
2622
  activeTraceStates.delete(traceId);
2543
2623
  } else {
@@ -2565,6 +2645,9 @@ var Bitfab = class {
2565
2645
  throw error;
2566
2646
  });
2567
2647
  }
2648
+ if (isAsyncGenerator(result)) {
2649
+ return wrapAsyncGenerator(result, newStack, sendSpan);
2650
+ }
2568
2651
  void sendSpan({ result });
2569
2652
  return result;
2570
2653
  };
@@ -2617,7 +2700,8 @@ var Bitfab = class {
2617
2700
  traceFunctionKey: params.traceFunctionKey,
2618
2701
  externalTrace: rawTrace,
2619
2702
  completed: true,
2620
- ...params.sessionId && { sessionId: params.sessionId }
2703
+ ...params.sessionId && { sessionId: params.sessionId },
2704
+ ...params.testRunId && { testRunId: params.testRunId }
2621
2705
  });
2622
2706
  }
2623
2707
  /**