bitfab 0.17.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -442,7 +442,7 @@ __export(index_exports, {
442
442
  module.exports = __toCommonJS(index_exports);
443
443
 
444
444
  // src/version.generated.ts
445
- var __version__ = "0.17.0";
445
+ var __version__ = "0.18.1";
446
446
 
447
447
  // src/constants.ts
448
448
  var DEFAULT_SERVICE_URL = "https://bitfab.ai";
@@ -1672,21 +1672,131 @@ function extractModelName(serialized, metadata) {
1672
1672
  }
1673
1673
  return void 0;
1674
1674
  }
1675
+ function asTokenCount(value) {
1676
+ return typeof value === "number" && Number.isFinite(value) ? value : null;
1677
+ }
1678
+ function normalizeTokenUsage(raw) {
1679
+ if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
1680
+ return null;
1681
+ }
1682
+ const u = raw;
1683
+ if ("cache_read_input_tokens" in u || "cache_creation_input_tokens" in u) {
1684
+ const cacheRead = asTokenCount(u.cache_read_input_tokens);
1685
+ const cacheCreation = asTokenCount(u.cache_creation_input_tokens);
1686
+ const baseInput = asTokenCount(u.input_tokens);
1687
+ const outputTokens = asTokenCount(u.output_tokens);
1688
+ if (cacheRead === null && cacheCreation === null && baseInput === null && outputTokens === null) {
1689
+ return null;
1690
+ }
1691
+ const inputTokens = (baseInput ?? 0) + (cacheRead ?? 0) + (cacheCreation ?? 0);
1692
+ return {
1693
+ inputTokens,
1694
+ outputTokens,
1695
+ totalTokens: inputTokens + (outputTokens ?? 0),
1696
+ cachedInputTokens: cacheRead
1697
+ };
1698
+ }
1699
+ if ("prompt_tokens" in u || "completion_tokens" in u || "promptTokens" in u || "completionTokens" in u) {
1700
+ const promptDetails = u.prompt_tokens_details ?? {};
1701
+ return withAnyTokenCount({
1702
+ inputTokens: asTokenCount(u.prompt_tokens) ?? asTokenCount(u.promptTokens),
1703
+ outputTokens: asTokenCount(u.completion_tokens) ?? asTokenCount(u.completionTokens),
1704
+ totalTokens: asTokenCount(u.total_tokens) ?? asTokenCount(u.totalTokens),
1705
+ cachedInputTokens: asTokenCount(promptDetails.cached_tokens)
1706
+ });
1707
+ }
1708
+ if ("prompt_token_count" in u || "candidates_token_count" in u) {
1709
+ return withAnyTokenCount({
1710
+ inputTokens: asTokenCount(u.prompt_token_count),
1711
+ outputTokens: asTokenCount(u.candidates_token_count),
1712
+ totalTokens: asTokenCount(u.total_token_count),
1713
+ cachedInputTokens: asTokenCount(u.cached_content_token_count)
1714
+ });
1715
+ }
1716
+ if ("input_tokens" in u || "output_tokens" in u) {
1717
+ const inputDetails = u.input_token_details ?? {};
1718
+ const inputTokens = asTokenCount(u.input_tokens);
1719
+ const outputTokens = asTokenCount(u.output_tokens);
1720
+ let totalTokens = asTokenCount(u.total_tokens);
1721
+ if (totalTokens === null && inputTokens !== null && outputTokens !== null) {
1722
+ totalTokens = inputTokens + outputTokens;
1723
+ }
1724
+ return withAnyTokenCount({
1725
+ inputTokens,
1726
+ outputTokens,
1727
+ totalTokens,
1728
+ cachedInputTokens: asTokenCount(inputDetails.cache_read)
1729
+ });
1730
+ }
1731
+ return null;
1732
+ }
1733
+ function withAnyTokenCount(usage) {
1734
+ const hasCount = usage.inputTokens !== null || usage.outputTokens !== null || usage.totalTokens !== null || usage.cachedInputTokens !== null;
1735
+ return hasCount ? usage : null;
1736
+ }
1737
+ function addUsage(totals, usage) {
1738
+ for (const key of [
1739
+ "inputTokens",
1740
+ "outputTokens",
1741
+ "totalTokens",
1742
+ "cachedInputTokens"
1743
+ ]) {
1744
+ const value = usage[key];
1745
+ if (value !== null) {
1746
+ totals[key] = (totals[key] ?? 0) + value;
1747
+ }
1748
+ }
1749
+ }
1750
+ function usageFromGenerations(generations) {
1751
+ if (!generations?.length) {
1752
+ return null;
1753
+ }
1754
+ const totals = {
1755
+ inputTokens: null,
1756
+ outputTokens: null,
1757
+ totalTokens: null,
1758
+ cachedInputTokens: null
1759
+ };
1760
+ let found = false;
1761
+ for (const batch of generations) {
1762
+ if (!Array.isArray(batch)) {
1763
+ continue;
1764
+ }
1765
+ for (const gen of batch) {
1766
+ const msg = gen?.message;
1767
+ if (!msg || typeof msg !== "object") {
1768
+ continue;
1769
+ }
1770
+ const responseMetadata = msg.response_metadata;
1771
+ const usage = normalizeTokenUsage(msg.usage_metadata) ?? normalizeTokenUsage(responseMetadata?.token_usage) ?? normalizeTokenUsage(responseMetadata?.usage) ?? normalizeTokenUsage(responseMetadata?.tokenUsage);
1772
+ if (!usage) {
1773
+ continue;
1774
+ }
1775
+ found = true;
1776
+ addUsage(totals, usage);
1777
+ }
1778
+ }
1779
+ return found ? totals : null;
1780
+ }
1675
1781
  function extractUsage2(output) {
1782
+ const generations = output.generations;
1783
+ const llmOutput = output.llmOutput ?? output.llm_output;
1784
+ const normalized = usageFromGenerations(generations) ?? normalizeTokenUsage(llmOutput?.tokenUsage) ?? normalizeTokenUsage(llmOutput?.token_usage) ?? normalizeTokenUsage(llmOutput?.usage);
1676
1785
  const usage = {};
1677
- const llmOutput = output.llmOutput;
1678
- const tokenUsage = llmOutput?.tokenUsage ?? llmOutput?.token_usage ?? llmOutput?.usage ?? {};
1679
- const inputTokens = tokenUsage.promptTokens ?? tokenUsage.prompt_tokens ?? tokenUsage.input_tokens;
1680
- const outputTokens = tokenUsage.completionTokens ?? tokenUsage.completion_tokens ?? tokenUsage.output_tokens;
1681
- const totalTokens = tokenUsage.totalTokens ?? tokenUsage.total_tokens;
1682
- if (inputTokens !== void 0 && inputTokens !== null) {
1683
- usage.inputTokens = inputTokens;
1786
+ if (!normalized) {
1787
+ return usage;
1788
+ }
1789
+ if (normalized.inputTokens !== null) {
1790
+ usage.inputTokens = normalized.inputTokens;
1791
+ }
1792
+ if (normalized.outputTokens !== null) {
1793
+ usage.outputTokens = normalized.outputTokens;
1684
1794
  }
1685
- if (outputTokens !== void 0 && outputTokens !== null) {
1686
- usage.outputTokens = outputTokens;
1795
+ if (normalized.totalTokens !== null) {
1796
+ usage.totalTokens = normalized.totalTokens;
1687
1797
  }
1688
- if (totalTokens !== void 0 && totalTokens !== null) {
1689
- usage.totalTokens = totalTokens;
1798
+ if (normalized.cachedInputTokens !== null) {
1799
+ usage.cachedInputTokens = normalized.cachedInputTokens;
1690
1800
  }
1691
1801
  return usage;
1692
1802
  }
@@ -3074,6 +3184,9 @@ var Bitfab = class {
3074
3184
  };
3075
3185
  return runWithSpanStack(newStack, executeWithContext);
3076
3186
  };
3187
+ Object.defineProperty(wrappedFn, "_bitfabTraceFunctionKey", {
3188
+ value: traceFunctionKey
3189
+ });
3077
3190
  return wrappedFn;
3078
3191
  }
3079
3192
  /**
@@ -3245,23 +3358,40 @@ var Bitfab = class {
3245
3358
  * Fetches the last N traces for the given trace function key, re-runs each
3246
3359
  * through the provided function, and returns comparison data.
3247
3360
  *
3248
- * The function must have been wrapped with `withSpan` replay injects
3249
- * `testRunId` via async context so new spans are linked to the test run.
3361
+ * Accepts either a `withSpan`-wrapped function (under the same key) or any
3362
+ * plain callable: plain callables are wrapped internally so each replayed
3363
+ * invocation records a trace tied to the test run. The plain-callable form
3364
+ * is how handler-instrumented workflows (LangGraph/LangChain, Claude Agent
3365
+ * SDK) replay — those record traces under a key with no `withSpan`-wrapped
3366
+ * root in the app.
3250
3367
  *
3251
3368
  * @param traceFunctionKey - The trace function key to replay
3252
- * @param fn - The function to replay (must be the return value of `withSpan`)
3369
+ * @param fn - The function to run recorded inputs through
3253
3370
  * @param options - Optional replay options. When `traceIds` is passed,
3254
3371
  * `limit` is ignored (with a warning): an explicit ID list already
3255
3372
  * determines how many traces replay.
3256
3373
  * @returns ReplayResult with items, testRunId, and testRunUrl
3257
3374
  */
3258
3375
  async replay(traceFunctionKey, fn, options) {
3376
+ const wrappedKey = fn._bitfabTraceFunctionKey;
3377
+ let replayFn = fn;
3378
+ if (wrappedKey === void 0) {
3379
+ replayFn = this.withSpan(
3380
+ traceFunctionKey,
3381
+ { name: fn.name || "Replay", type: "agent" },
3382
+ fn
3383
+ );
3384
+ } else if (wrappedKey !== traceFunctionKey) {
3385
+ throw new BitfabError(
3386
+ `Function is wrapped with trace function key '${wrappedKey}' but replay was called with '${traceFunctionKey}'. Pass matching keys, or pass the unwrapped function to replay it under the explicit key.`
3387
+ );
3388
+ }
3259
3389
  const { replay: doReplay } = await Promise.resolve().then(() => (init_replay(), replay_exports));
3260
3390
  return doReplay(
3261
3391
  this.httpClient,
3262
3392
  this.serviceUrl,
3263
3393
  traceFunctionKey,
3264
- fn,
3394
+ replayFn,
3265
3395
  options
3266
3396
  );
3267
3397
  }