npm - webgpu-profiler - Versions diffs - 0.1.0 - Mend

webgpu-profiler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/LICENSE +21 -0
package/README.md +97 -0
package/dist/index.d.ts +3 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +3 -0
package/dist/index.js.map +1 -0
package/dist/instrument.d.ts +115 -0
package/dist/instrument.d.ts.map +1 -0
package/dist/instrument.js +315 -0
package/dist/instrument.js.map +1 -0
package/dist/profiler.d.ts +48 -0
package/dist/profiler.d.ts.map +1 -0
package/dist/profiler.js +105 -0
package/dist/profiler.js.map +1 -0
package/dist/react/MemoryHUD.d.ts +27 -0
package/dist/react/MemoryHUD.d.ts.map +1 -0
package/dist/react/MemoryHUD.js +232 -0
package/dist/react/MemoryHUD.js.map +1 -0
package/dist/react/index.d.ts +2 -0
package/dist/react/index.d.ts.map +1 -0
package/dist/react/index.js +2 -0
package/dist/react/index.js.map +1 -0
package/package.json +73 -0
package/src/index.ts +20 -0
package/src/instrument.ts +377 -0
package/src/profiler.ts +171 -0
package/src/react/MemoryHUD.tsx +438 -0
package/src/react/index.ts +1 -0

package/src/instrument.ts ADDED Viewed

@@ -0,0 +1,377 @@
+/**
+ * Patches `GPUDevice.createBuffer` and `GPUDevice.createTexture` to track
+ * every GPU resource the page allocates. Combined with a `.destroy()` patch
+ * on each returned object, this gives a live view of all WebGPU memory
+ * the JS API can see.
+ *
+ * Why monkey-patch the device:
+ *   - WebGPU has *exactly two* JS-visible allocation entry points
+ *     (`createBuffer`, `createTexture`). Patching both captures everything
+ *     a renderer, post-processing nodes, and compute services allocate.
+ *   - No need to instrument production modules. This is the same pattern
+ *     used by React DevTools, Vue DevTools, and Chrome's GPU inspector.
+ *
+ * What is NOT captured (driver-internal, not exposed to JS):
+ *   - Canvas swapchain backbuffer (created by `context.configure()`).
+ *   - Driver-side staging buffers, residency systems, MSAA backing,
+ *     pipeline layouts, bind group memory.
+ *
+ * ## Timing
+ *
+ * Call `instrumentDevice(device)` *after* the renderer has obtained a
+ * `GPUDevice`, but *before* any other code allocates resources. In
+ * practice: right after the renderer's `init()` promise resolves. A few
+ * allocations (the swapchain, a couple of renderer internals) happen
+ * during init itself and will not be captured. Typical overhead: <5 MB.
+ *
+ * ## Usage
+ *
+ *     import { instrumentDevice } from "webgpu-profiler";
+ *
+ *     const inst = instrumentDevice(device);
+ *     // ... time passes ...
+ *     console.log(`GPU bytes: ${inst.bytes()}`);
+ *     // When done:
+ *     inst.uninstrument();
+ *
+ * The returned maps are live views — iterate at query time, do not cache.
+ * Entries are removed automatically when `buffer.destroy()` or
+ * `texture.destroy()` is called.
+ */
+export interface TrackedBuffer {
+  buffer: GPUBuffer;
+  descriptor: GPUBufferDescriptor;
+}
+export interface TrackedTexture {
+  texture: GPUTexture;
+  descriptor: GPUTextureDescriptor;
+}
+export interface DeviceInstrumentation {
+  readonly device: GPUDevice;
+  readonly buffers: ReadonlyMap<GPUBuffer, TrackedBuffer>;
+  readonly textures: ReadonlyMap<GPUTexture, TrackedTexture>;
+  /** Sum of all tracked buffer sizes, in bytes. */
+  bufferBytes(): number;
+  /** Sum of all tracked texture sizes (across formats, mip chains, layers). */
+  textureBytes(): number;
+  /** Combined total in bytes. */
+  bytes(): number;
+  /** Remove patches and restore the original device methods. */
+  uninstrument(): void;
+}
+// ── GPUTextureFormat → bytes per texel ──────────────────────────────────────
+//
+// Comprehensive table per WebGPU spec. For compressed block formats, bpp is
+// the effective rate (BC1 = 0.5, BC7 = 1.0, etc.). A few approximate cases
+// (depth24plus, ASTC variants) are flagged inline.
+const FORMAT_BPP: Record<string, number> = {
+  // 8-bit single channel
+  r8unorm: 1, r8snorm: 1, r8uint: 1, r8sint: 1,
+  // 8-bit two channel
+  rg8unorm: 2, rg8snorm: 2, rg8uint: 2, rg8sint: 2,
+  // 8-bit four channel
+  rgba8unorm: 4, "rgba8unorm-srgb": 4,
+  rgba8snorm: 4, rgba8uint: 4, rgba8sint: 4,
+  bgra8unorm: 4, "bgra8unorm-srgb": 4,
+  // 16-bit single channel
+  r16float: 2, r16uint: 2, r16sint: 2, r16unorm: 2, r16snorm: 2,
+  // 16-bit two channel
+  rg16float: 4, rg16uint: 4, rg16sint: 4, rg16unorm: 4, rg16snorm: 4,
+  // 16-bit four channel
+  rgba16float: 8, rgba16uint: 8, rgba16sint: 8,
+  rgba16unorm: 8, rgba16snorm: 8,
+  // 32-bit single channel
+  r32float: 4, r32uint: 4, r32sint: 4,
+  // 32-bit two channel
+  rg32float: 8, rg32uint: 8, rg32sint: 8,
+  // 32-bit four channel
+  rgba32float: 16, rgba32uint: 16, rgba32sint: 16,
+  // Packed
+  rgb9e5ufloat: 4,
+  rgb10a2unorm: 4, rgb10a2uint: 4,
+  rg11b10ufloat: 4,
+  // Depth / stencil
+  stencil8: 1,
+  depth16unorm: 2,
+  depth24plus: 4, // implementation-defined, conservative
+  "depth24plus-stencil8": 4,
+  depth32float: 4,
+  "depth32float-stencil8": 5,
+  // Block-compressed (effective bytes per pixel)
+  "bc1-rgba-unorm": 0.5, "bc1-rgba-unorm-srgb": 0.5,
+  "bc2-rgba-unorm": 1, "bc2-rgba-unorm-srgb": 1,
+  "bc3-rgba-unorm": 1, "bc3-rgba-unorm-srgb": 1,
+  "bc4-r-unorm": 0.5, "bc4-r-snorm": 0.5,
+  "bc5-rg-unorm": 1, "bc5-rg-snorm": 1,
+  "bc6h-rgb-ufloat": 1, "bc6h-rgb-float": 1,
+  "bc7-rgba-unorm": 1, "bc7-rgba-unorm-srgb": 1,
+  "etc2-rgb8unorm": 0.5, "etc2-rgb8unorm-srgb": 0.5,
+  "etc2-rgb8a1unorm": 0.5, "etc2-rgb8a1unorm-srgb": 0.5,
+  "etc2-rgba8unorm": 1, "etc2-rgba8unorm-srgb": 1,
+  "eac-r11unorm": 0.5, "eac-r11snorm": 0.5,
+  "eac-rg11unorm": 1, "eac-rg11snorm": 1,
+  // ASTC variants are derived dynamically from the format string — see
+  // bytesPerTexel(). All ASTC blocks are 16 bytes; bpp is 16 / (W * H).
+};
+// ASTC block-compressed formats name their block dimensions inline, e.g.
+// `astc-4x4-unorm` or `astc-12x12-unorm-srgb`. Every block is 16 bytes,
+// covering W * H texels, so bpp = 16 / (W * H). Covers all 28 variants.
+const ASTC_RE = /^astc-(\d+)x(\d+)-/;
+function bytesPerTexel(format: string): number {
+  const known = FORMAT_BPP[format];
+  if (known !== undefined) return known;
+  const astc = ASTC_RE.exec(format);
+  if (astc) {
+    const blockW = Number(astc[1]);
+    const blockH = Number(astc[2]);
+    return 16 / (blockW * blockH);
+  }
+  return 4;
+}
+/**
+ * Resolve a GPUExtent3D to (w, h, d). Per WebGPU spec, `size` is either a
+ * `GPUExtent3DDict` (object with `.width`) or any iterable of numbers
+ * (Array, Uint32Array, etc). `Array.isArray` returns false for typed
+ * arrays, so we discriminate on the dict's `width` property instead.
+ */
+export function resolveExtent(
+  size: GPUExtent3D,
+): { width: number; height: number; depth: number } {
+  if (size != null && typeof (size as GPUExtent3DDict).width === "number") {
+    const dict = size as GPUExtent3DDict;
+    return {
+      width: dict.width,
+      height: dict.height ?? 1,
+      depth: dict.depthOrArrayLayers ?? 1,
+    };
+  }
+  const [width = 0, height = 1, depth = 1] = Array.from(size as Iterable<number>);
+  return { width, height, depth };
+}
+/**
+ * Bytes occupied on the GPU by a texture with the given descriptor.
+ * Accounts for mip chain, array layers / depth, MSAA samples, and the
+ * format-specific bits per pixel.
+ *
+ * For 3D textures, each mip level halves all three dimensions, so the
+ * per-level pixel count series is `8^-i`. For 2D and 2D-array textures
+ * only x/y halve, giving `4^-i`.
+ */
+export function textureDescriptorBytes(desc: GPUTextureDescriptor): number {
+  const bpp = bytesPerTexel(desc.format);
+  const { width, height, depth } = resolveExtent(desc.size);
+  const is3D = desc.dimension === "3d";
+  const mipLevels = desc.mipLevelCount ?? 1;
+  const mipBase = is3D ? 8 : 4;
+  let mipFactor = 0;
+  for (let i = 0; i < mipLevels; i++) mipFactor += Math.pow(mipBase, -i);
+  const sampleCount = desc.sampleCount ?? 1;
+  return Math.ceil(width * height * depth * bpp * mipFactor * sampleCount);
+}
+// ── Module-level active instrumentation ─────────────────────────────────────
+//
+// For ease of use, `instrumentDevice` sets the returned handle as the
+// "active" instrumentation. The React `MemoryHUD` reads from this state by
+// default, so callers don't need to thread the handle through props or
+// React context. Power users who want to manage multiple instrumentations
+// can still ignore the global and pass handles explicitly.
+let active: DeviceInstrumentation | null = null;
+const subscribers = new Set<() => void>();
+const INSTRUMENTED = Symbol.for("webgpu-profiler.instrumented");
+function notify(): void {
+  for (const cb of subscribers) cb();
+}
+/**
+ * Returns the most recently registered active instrumentation (or `null`
+ * if none). For React, prefer using `MemoryHUD` without a prop — it reads
+ * this via `useSyncExternalStore` and re-renders when it changes.
+ */
+export function getActiveInstrumentation(): DeviceInstrumentation | null {
+  return active;
+}
+/**
+ * Subscribe to changes in the active instrumentation. Returns an
+ * unsubscribe function. Used internally by `MemoryHUD` via React's
+ * `useSyncExternalStore`. Exposed for non-React consumers who want to
+ * react to active-instrumentation changes.
+ */
+export function subscribeActiveInstrumentation(cb: () => void): () => void {
+  subscribers.add(cb);
+  return () => {
+    subscribers.delete(cb);
+  };
+}
+// ── Patcher ─────────────────────────────────────────────────────────────────
+export function instrumentDevice(device: GPUDevice): DeviceInstrumentation {
+  // Idempotent: calling twice on the same device returns the existing
+  // handle rather than double-patching (and double-counting).
+  const existing = (device as unknown as { [INSTRUMENTED]?: DeviceInstrumentation })[
+    INSTRUMENTED
+  ];
+  if (existing) {
+    active = existing;
+    notify();
+    return existing;
+  }
+  const buffers = new Map<GPUBuffer, TrackedBuffer>();
+  const textures = new Map<GPUTexture, TrackedTexture>();
+  // Set to false by `uninstrument()`. Guards against a tracking write
+  // landing after teardown (rare, but possible if a `createBuffer` call
+  // is in-flight while uninstrument runs).
+  let alive = true;
+  const origCreateBuffer = device.createBuffer.bind(device);
+  const origCreateTexture = device.createTexture.bind(device);
+  device.createBuffer = (descriptor: GPUBufferDescriptor): GPUBuffer => {
+    const buffer = origCreateBuffer(descriptor);
+    if (alive) buffers.set(buffer, { buffer, descriptor });
+    const origDestroy = buffer.destroy.bind(buffer);
+    // GPUBuffer.destroy may be called multiple times; the patch handles
+    // that idempotently via the no-op Map.delete.
+    buffer.destroy = () => {
+      buffers.delete(buffer);
+      origDestroy();
+    };
+    return buffer;
+  };
+  device.createTexture = (descriptor: GPUTextureDescriptor): GPUTexture => {
+    const texture = origCreateTexture(descriptor);
+    if (alive) textures.set(texture, { texture, descriptor });
+    const origDestroy = texture.destroy.bind(texture);
+    texture.destroy = () => {
+      textures.delete(texture);
+      origDestroy();
+    };
+    return texture;
+  };
+  const inst: DeviceInstrumentation = {
+    device,
+    buffers,
+    textures,
+    bufferBytes() {
+      let total = 0;
+      for (const { descriptor } of buffers.values()) total += descriptor.size;
+      return total;
+    },
+    textureBytes() {
+      let total = 0;
+      for (const { descriptor } of textures.values()) {
+        total += textureDescriptorBytes(descriptor);
+      }
+      return total;
+    },
+    bytes() {
+      return this.bufferBytes() + this.textureBytes();
+    },
+    uninstrument() {
+      alive = false;
+      device.createBuffer = origCreateBuffer;
+      device.createTexture = origCreateTexture;
+      buffers.clear();
+      textures.clear();
+      delete (device as unknown as { [INSTRUMENTED]?: DeviceInstrumentation })[
+        INSTRUMENTED
+      ];
+      if (active === inst) {
+        active = null;
+        notify();
+      }
+    },
+  };
+  (device as unknown as { [INSTRUMENTED]?: DeviceInstrumentation })[INSTRUMENTED] =
+    inst;
+  active = inst;
+  notify();
+  return inst;
+}
+// ── Auto-instrument ─────────────────────────────────────────────────────────
+//
+// Patches `GPUAdapter.prototype.requestDevice` so every device any code
+// requests is automatically instrumented as soon as it's created. Call
+// this once at boot — before any renderer or framework's `init()` runs —
+// and you're done. The React `MemoryHUD` picks up the active
+// instrumentation through `useSyncExternalStore`.
+//
+// Idempotent and safe to call multiple times. No-op if WebGPU is not
+// available in the current environment (e.g. SSR, older browsers).
+const ADAPTER_PATCHED = Symbol.for("webgpu-profiler.adapter-patched");
+export interface AutoInstrumentOptions {
+  /** Called for each device the patch instruments. Useful for logging. */
+  onInstrument?: (instrumentation: DeviceInstrumentation) => void;
+}
+/**
+ * Patches `GPUAdapter.prototype.requestDevice` to auto-instrument every
+ * `GPUDevice` it returns. Returns a disposer that restores the original
+ * method (existing instrumentations stay live until their own
+ * `uninstrument()` is called).
+ *
+ * Typical usage at the top of your app boot file:
+ *
+ *     import { autoInstrument } from "webgpu-profiler";
+ *     if (import.meta.env.DEV) autoInstrument();
+ *
+ * No further wiring needed — render `<MemoryHUD />` anywhere.
+ */
+export function autoInstrument(options: AutoInstrumentOptions = {}): () => void {
+  if (typeof GPUAdapter === "undefined") {
+    // WebGPU not available in this environment; nothing to patch.
+    return () => undefined;
+  }
+  const proto = GPUAdapter.prototype as unknown as {
+    [ADAPTER_PATCHED]?: boolean;
+    requestDevice: GPUAdapter["requestDevice"];
+  };
+  if (proto[ADAPTER_PATCHED]) {
+    return () => undefined;
+  }
+  const orig = proto.requestDevice;
+  proto.requestDevice = async function (
+    this: GPUAdapter,
+    descriptor?: GPUDeviceDescriptor,
+  ): Promise<GPUDevice> {
+    const device = await orig.call(this, descriptor);
+    if (device) {
+      const inst = instrumentDevice(device);
+      options.onInstrument?.(inst);
+    }
+    return device;
+  } as GPUAdapter["requestDevice"];
+  proto[ADAPTER_PATCHED] = true;
+  return () => {
+    if (proto[ADAPTER_PATCHED]) {
+      proto.requestDevice = orig;
+      delete proto[ADAPTER_PATCHED];
+    }
+  };
+}

package/src/profiler.ts ADDED Viewed

@@ -0,0 +1,171 @@
+import {
+  resolveExtent,
+  textureDescriptorBytes,
+  type DeviceInstrumentation,
+} from "./instrument.js";
+/**
+ * Builds a snapshot of GPU memory currently allocated by the page, sourced
+ * from a {@link DeviceInstrumentation} handle (see `instrument.ts`).
+ *
+ * The numbers below are exact for every resource that flows through
+ * `GPUDevice.createBuffer` / `createTexture` — those are the only two
+ * WebGPU JS-side allocation entry points, so this is exhaustive by
+ * construction.
+ *
+ * Not counted (driver-internal, opaque to JS):
+ *   - Canvas swapchain backbuffer (created by `context.configure()`).
+ *   - Residency caches, MSAA backing, pipeline objects, bind group
+ *     layouts, staging buffers that bypass `createBuffer`.
+ */
+export interface TextureEntry {
+  name: string;
+  width: number;
+  height: number;
+  depth: number;
+  format: string;
+  mipLevels: number;
+  sampleCount: number;
+  /** True if this texture is a render attachment (post-fx, MRT, depth, etc.). */
+  isRenderTarget: boolean;
+  bytes: number;
+}
+export interface BufferEntry {
+  name: string;
+  bytes: number;
+  /** Pipe-separated decoded usage flags (e.g. "STORAGE|COPY_DST"). */
+  usage: string;
+}
+export interface MemoryReport {
+  sampledTextures: TextureEntry[];
+  renderTargets: TextureEntry[];
+  buffers: BufferEntry[];
+  totals: {
+    sampledTextures: number;
+    renderTargets: number;
+    buffers: number;
+    all: number;
+  };
+}
+// ── Helpers ─────────────────────────────────────────────────────────────────
+// Spec-defined WebGPU usage flag values, hardcoded so this module can be
+// imported in Node / SSR environments where `GPUBufferUsage` /
+// `GPUTextureUsage` global objects are not defined.
+// https://www.w3.org/TR/webgpu/#typedefdef-gpubufferusageflags
+const BUFFER_USAGE_NAMES: ReadonlyArray<[number, string]> = [
+  [0x0001, "MAP_READ"],
+  [0x0002, "MAP_WRITE"],
+  [0x0004, "COPY_SRC"],
+  [0x0008, "COPY_DST"],
+  [0x0010, "INDEX"],
+  [0x0020, "VERTEX"],
+  [0x0040, "UNIFORM"],
+  [0x0080, "STORAGE"],
+  [0x0100, "INDIRECT"],
+  [0x0200, "QUERY_RESOLVE"],
+];
+const TEXTURE_USAGE_RENDER_ATTACHMENT = 0x10;
+function decodeBufferUsage(flags: number): string {
+  const names: string[] = [];
+  for (const [flag, name] of BUFFER_USAGE_NAMES) {
+    if (flags & flag) names.push(name);
+  }
+  return names.join("|") || "0";
+}
+function isRenderAttachment(usage: number): boolean {
+  return Boolean(usage & TEXTURE_USAGE_RENDER_ATTACHMENT);
+}
+// ── Public API ──────────────────────────────────────────────────────────────
+export function profileMemory(
+  instrumentation: DeviceInstrumentation,
+): MemoryReport {
+  const sampledTextures: TextureEntry[] = [];
+  const renderTargets: TextureEntry[] = [];
+  for (const { descriptor } of instrumentation.textures.values()) {
+    const dims = resolveExtent(descriptor.size);
+    const bytes = textureDescriptorBytes(descriptor);
+    const usage = descriptor.usage ?? 0;
+    const entry: TextureEntry = {
+      name: descriptor.label || "(unnamed)",
+      width: dims.width,
+      height: dims.height,
+      depth: dims.depth,
+      format: descriptor.format,
+      mipLevels: descriptor.mipLevelCount ?? 1,
+      sampleCount: descriptor.sampleCount ?? 1,
+      isRenderTarget: isRenderAttachment(usage),
+      bytes,
+    };
+    if (entry.isRenderTarget) renderTargets.push(entry);
+    else sampledTextures.push(entry);
+  }
+  sampledTextures.sort((a, b) => b.bytes - a.bytes);
+  renderTargets.sort((a, b) => b.bytes - a.bytes);
+  const buffers: BufferEntry[] = [];
+  for (const { descriptor } of instrumentation.buffers.values()) {
+    buffers.push({
+      name: descriptor.label || "(unnamed)",
+      bytes: descriptor.size,
+      usage: decodeBufferUsage(descriptor.usage),
+    });
+  }
+  buffers.sort((a, b) => b.bytes - a.bytes);
+  const totalSampled = sampledTextures.reduce((s, t) => s + t.bytes, 0);
+  const totalRT = renderTargets.reduce((s, t) => s + t.bytes, 0);
+  const totalBuf = buffers.reduce((s, b) => s + b.bytes, 0);
+  return {
+    sampledTextures,
+    renderTargets,
+    buffers,
+    totals: {
+      sampledTextures: totalSampled,
+      renderTargets: totalRT,
+      buffers: totalBuf,
+      all: totalSampled + totalRT + totalBuf,
+    },
+  };
+}
+/** Build a multi-line plain-text report. Feed to a copy button or bug report. */
+export function reportToText(report: MemoryReport): string {
+  const mb = (b: number) => `${(b / 1024 / 1024).toFixed(2)} MB`;
+  const lines: string[] = [];
+  lines.push(`VRAM snapshot (live, from device.createBuffer/createTexture)`);
+  lines.push(``);
+  lines.push(`Sampled textures: ${mb(report.totals.sampledTextures)}`);
+  for (const t of report.sampledTextures) {
+    lines.push(
+      `  ${t.name.padEnd(32)} ${t.width}x${t.height}${t.depth > 1 ? `x${t.depth}` : ""} ${t.format}  mips=${t.mipLevels}  ${mb(t.bytes)}`,
+    );
+  }
+  lines.push(``);
+  lines.push(`Render targets: ${mb(report.totals.renderTargets)}`);
+  for (const t of report.renderTargets) {
+    lines.push(
+      `  ${t.name.padEnd(32)} ${t.width}x${t.height} ${t.format}  ${mb(t.bytes)}`,
+    );
+  }
+  lines.push(``);
+  lines.push(`Buffers: ${mb(report.totals.buffers)}`);
+  for (const b of report.buffers) {
+    lines.push(`  ${b.name.padEnd(32)} ${b.usage}  ${mb(b.bytes)}`);
+  }
+  lines.push(``);
+  lines.push(`Total: ${mb(report.totals.all)}`);
+  return lines.join("\n");
+}