npm - @simulatte/webgpu-doe - Versions diffs - 0.1.2 → 0.3.2 - Mend

@simulatte/webgpu-doe 0.1.2 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +19 -0
package/LICENSE +191 -0
package/README.md +121 -126
package/assets/fawn-icon-main.svg +222 -0
package/examples/with-webgpu-compute.js +25 -0
package/package.json +31 -26
package/src/index.d.ts +125 -0
package/src/index.js +644 -408
package/binding.gyp +0 -20
package/native/doe_napi.c +0 -1677
package/prebuilds/darwin-arm64/doe_napi.node +0 -0
package/prebuilds/darwin-arm64/libdoe_webgpu.dylib +0 -0

package/src/index.js CHANGED Viewed

@@ -1,495 +1,731 @@
-import { createRequire } from 'node:module';
-import { existsSync } from 'node:fs';
-import { resolve, dirname } from 'node:path';
-import { fileURLToPath } from 'node:url';
+const DOE_GPU_BUFFER_USAGE = {
+  MAP_READ: 0x0001,
+  COPY_SRC: 0x0004,
+  COPY_DST: 0x0008,
+  UNIFORM: 0x0040,
+  STORAGE: 0x0080,
+};
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const require = createRequire(import.meta.url);
+const DOE_GPU_SHADER_STAGE = {
+  COMPUTE: 0x4,
+};
-const addon = loadAddon();
-const DOE_LIB_PATH = resolveDoeLibraryPath();
-let libraryLoaded = false;
+const DOE_GPU_MAP_MODE = {
+  READ: 0x0001,
+};
-function loadAddon() {
-  const prebuildPath = resolve(__dirname, '..', 'prebuilds', `${process.platform}-${process.arch}`, 'doe_napi.node');
-  try {
-    return require(prebuildPath);
-  } catch {
-    try {
-      return require('../build/Release/doe_napi.node');
-    } catch {
-      try {
-        return require('../build/Debug/doe_napi.node');
-      } catch {
-        return null;
-      }
-    }
+const DOE_BUFFER_META = new WeakMap();
+function resolveBufferUsageToken(token, combined = false) {
+  switch (token) {
+    case 'upload':
+      return DOE_GPU_BUFFER_USAGE.COPY_DST;
+    case 'readback':
+      return combined
+        ? DOE_GPU_BUFFER_USAGE.COPY_SRC
+        : DOE_GPU_BUFFER_USAGE.COPY_SRC | DOE_GPU_BUFFER_USAGE.COPY_DST | DOE_GPU_BUFFER_USAGE.MAP_READ;
+    case 'uniform':
+      return DOE_GPU_BUFFER_USAGE.UNIFORM | DOE_GPU_BUFFER_USAGE.COPY_DST;
+    case 'storageRead':
+      return DOE_GPU_BUFFER_USAGE.STORAGE | DOE_GPU_BUFFER_USAGE.COPY_DST;
+    case 'storageReadWrite':
+      return DOE_GPU_BUFFER_USAGE.STORAGE | DOE_GPU_BUFFER_USAGE.COPY_DST | DOE_GPU_BUFFER_USAGE.COPY_SRC;
+    default:
+      throw new Error(`Unknown Doe buffer usage token: ${token}`);
   }
 }
-function resolveDoeLibraryPath() {
-  const ext = process.platform === 'darwin' ? 'dylib'
-    : process.platform === 'win32' ? 'dll' : 'so';
-  const candidates = [
-    process.env.DOE_WEBGPU_LIB,
-    resolve(__dirname, '..', 'prebuilds', `${process.platform}-${process.arch}`, `libdoe_webgpu.${ext}`),
-    resolve(__dirname, '..', '..', '..', 'zig', 'zig-out', 'lib', `libdoe_webgpu.${ext}`),
-    resolve(process.cwd(), 'zig', 'zig-out', 'lib', `libdoe_webgpu.${ext}`),
-  ];
-  for (const candidate of candidates) {
-    if (candidate && existsSync(candidate)) return candidate;
-  }
-  return null;
+function resolveBufferUsage(usage) {
+  if (typeof usage === 'number') return usage;
+  if (typeof usage === 'string') return resolveBufferUsageToken(usage);
+  if (Array.isArray(usage)) {
+    const combined = usage.length > 1;
+    return usage.reduce((mask, token) => mask | (
+      typeof token === 'number'
+        ? token
+        : resolveBufferUsageToken(token, combined)
+    ), 0);
+  }
+  throw new Error('Doe buffer usage must be a number, string, or string array.');
 }
-function ensureLibrary() {
-  if (libraryLoaded) return;
-  if (!addon) {
-    throw new Error(
-      '@simulatte/webgpu-doe: Native addon not found. Run `npm run build` or `npx node-gyp rebuild`.'
-    );
+function inferBindingAccessToken(token) {
+  switch (token) {
+    case 'uniform':
+      return 'uniform';
+    case 'storageRead':
+      return 'storageRead';
+    case 'storageReadWrite':
+      return 'storageReadWrite';
+    default:
+      return null;
   }
-  if (!DOE_LIB_PATH) {
-    throw new Error(
-      '@simulatte/webgpu-doe: libdoe_webgpu not found. Build it with `cd fawn/zig && zig build dropin` or set DOE_WEBGPU_LIB.'
-    );
-  }
-  addon.loadLibrary(DOE_LIB_PATH);
-  libraryLoaded = true;
 }
-// WebGPU enum constants (standard values).
-export const globals = {
-  GPUBufferUsage: {
-    MAP_READ:      0x0001,
-    MAP_WRITE:     0x0002,
-    COPY_SRC:      0x0004,
-    COPY_DST:      0x0008,
-    INDEX:         0x0010,
-    VERTEX:        0x0020,
-    UNIFORM:       0x0040,
-    STORAGE:       0x0080,
-    INDIRECT:      0x0100,
-    QUERY_RESOLVE: 0x0200,
-  },
-  GPUShaderStage: {
-    VERTEX:   0x1,
-    FRAGMENT: 0x2,
-    COMPUTE:  0x4,
-  },
-  GPUMapMode: {
-    READ:  0x0001,
-    WRITE: 0x0002,
-  },
-  GPUTextureUsage: {
-    COPY_SRC:          0x01,
-    COPY_DST:          0x02,
-    TEXTURE_BINDING:   0x04,
-    STORAGE_BINDING:   0x08,
-    RENDER_ATTACHMENT: 0x10,
-  },
-};
+function inferBindingAccess(usage) {
+  if (typeof usage === 'number' || usage == null) return null;
+  const tokens = typeof usage === 'string'
+    ? [usage]
+    : Array.isArray(usage)
+      ? usage.filter((token) => typeof token !== 'number')
+      : null;
+  if (!tokens) {
+    throw new Error('Doe buffer usage must be a number, string, or string array.');
+  }
+  const inferred = [...new Set(tokens.map(inferBindingAccessToken).filter(Boolean))];
+  if (inferred.length > 1) {
+    throw new Error(`Doe buffer usage cannot imply multiple binding access modes: ${inferred.join(', ')}`);
+  }
+  return inferred[0] ?? null;
+}
-class DoeGPUBuffer {
-  constructor(native, instance, size, usage, queue) {
-    this._native = native;
-    this._instance = instance;
-    this._queue = queue;
-    this.size = size;
-    this.usage = usage;
-  }
+function rememberBufferUsage(buffer, usage) {
+  DOE_BUFFER_META.set(buffer, {
+    bindingAccess: inferBindingAccess(usage),
+  });
+  return buffer;
+}
-  async mapAsync(mode, offset = 0, size = this.size) {
-    if (this._queue) addon.queueFlush(this._queue);
-    addon.bufferMapSync(this._instance, this._native, mode, offset, size);
-  }
+function inferredBindingAccessForBuffer(buffer) {
+  return DOE_BUFFER_META.get(buffer)?.bindingAccess ?? null;
+}
-  getMappedRange(offset = 0, size = this.size) {
-    return addon.bufferGetMappedRange(this._native, offset, size);
+function normalizeWorkgroups(workgroups) {
+  if (typeof workgroups === 'number') {
+    return [workgroups, 1, 1];
   }
-  unmap() {
-    addon.bufferUnmap(this._native);
+  if (Array.isArray(workgroups) && workgroups.length === 2) {
+    return [workgroups[0], workgroups[1], 1];
   }
-  destroy() {
-    addon.bufferRelease(this._native);
-    this._native = null;
+  if (Array.isArray(workgroups) && workgroups.length === 3) {
+    return workgroups;
   }
+  throw new Error('Doe workgroups must be a number, [x, y], or [x, y, z].');
 }
-class DoeGPUComputePassEncoder {
-  constructor(native) { this._native = native; }
-  setPipeline(pipeline) {
-    addon.computePassSetPipeline(this._native, pipeline._native);
-  }
-  setBindGroup(index, bindGroup) {
-    addon.computePassSetBindGroup(this._native, index, bindGroup._native);
-  }
-  dispatchWorkgroups(x, y = 1, z = 1) {
-    addon.computePassDispatchWorkgroups(this._native, x, y, z);
+function validatePositiveInteger(value, label) {
+  if (!Number.isInteger(value) || value < 1) {
+    throw new Error(`${label} must be a positive integer.`);
   }
+}
-  dispatchWorkgroupsIndirect(indirectBuffer, indirectOffset = 0) {
-    addon.computePassDispatchWorkgroupsIndirect(this._native, indirectBuffer._native, indirectOffset);
+function validateWorkgroups(device, workgroups) {
+  const normalized = normalizeWorkgroups(workgroups);
+  const limits = device?.limits ?? {};
+  const [x, y, z] = normalized;
+  validatePositiveInteger(x, 'Doe workgroups.x');
+  validatePositiveInteger(y, 'Doe workgroups.y');
+  validatePositiveInteger(z, 'Doe workgroups.z');
+  if (limits.maxComputeWorkgroupsPerDimension) {
+    if (x > limits.maxComputeWorkgroupsPerDimension ||
+        y > limits.maxComputeWorkgroupsPerDimension ||
+        z > limits.maxComputeWorkgroupsPerDimension) {
+      throw new Error(
+        `Doe workgroups exceed maxComputeWorkgroupsPerDimension (${limits.maxComputeWorkgroupsPerDimension}).`
+      );
+    }
   }
-  end() {
-    addon.computePassEnd(this._native);
+  if (limits.maxComputeWorkgroupSizeX && x > limits.maxComputeWorkgroupSizeX) {
+    throw new Error(
+      `Doe workgroups.x (${x}) exceeds maxComputeWorkgroupSizeX (${limits.maxComputeWorkgroupSizeX}).`
+    );
   }
-}
-class DoeGPUCommandEncoder {
-  constructor(native) { this._native = native; }
-  beginComputePass(descriptor) {
-    const pass = addon.beginComputePass(this._native);
-    return new DoeGPUComputePassEncoder(pass);
+  if (limits.maxComputeWorkgroupSizeY && y > limits.maxComputeWorkgroupSizeY) {
+    throw new Error(
+      `Doe workgroups.y (${y}) exceeds maxComputeWorkgroupSizeY (${limits.maxComputeWorkgroupSizeY}).`
+    );
   }
-  beginRenderPass(descriptor) {
-    const colorAttachments = (descriptor.colorAttachments || []).map((a) => ({
-      view: a.view._native,
-      clearValue: a.clearValue || { r: 0, g: 0, b: 0, a: 1 },
-    }));
-    const pass = addon.beginRenderPass(this._native, colorAttachments);
-    return new DoeGPURenderPassEncoder(pass);
+  if (limits.maxComputeWorkgroupSizeZ && z > limits.maxComputeWorkgroupSizeZ) {
+    throw new Error(
+      `Doe workgroups.z (${z}) exceeds maxComputeWorkgroupSizeZ (${limits.maxComputeWorkgroupSizeZ}).`
+    );
   }
-  copyBufferToBuffer(src, srcOffset, dst, dstOffset, size) {
-    addon.commandEncoderCopyBufferToBuffer(
-      this._native, src._native, srcOffset, dst._native, dstOffset, size);
+  if (limits.maxComputeInvocationsPerWorkgroup) {
+    const invocations = x * y * z;
+    if (invocations > limits.maxComputeInvocationsPerWorkgroup) {
+      throw new Error(
+        `Doe workgroups (${invocations} invocations) exceed maxComputeInvocationsPerWorkgroup (${limits.maxComputeInvocationsPerWorkgroup}).`
+      );
+    }
   }
-  finish() {
-    const cmd = addon.commandEncoderFinish(this._native);
-    return { _native: cmd };
-  }
+  return normalized;
 }
-class DoeGPUQueue {
-  constructor(native) { this._native = native; }
-  submit(commandBuffers) {
-    const natives = commandBuffers.map((c) => c._native);
-    addon.queueSubmit(this._native, natives);
-  }
-  writeBuffer(buffer, bufferOffset, data, dataOffset = 0, size) {
-    let view = data;
-    if (dataOffset > 0 || size !== undefined) {
-      const byteOffset = data.byteOffset + dataOffset * (data.BYTES_PER_ELEMENT || 1);
-      const byteLength = size !== undefined
-        ? size * (data.BYTES_PER_ELEMENT || 1)
-        : data.byteLength - dataOffset * (data.BYTES_PER_ELEMENT || 1);
-      view = new Uint8Array(data.buffer, byteOffset, byteLength);
-    }
-    addon.queueWriteBuffer(this._native, buffer._native, bufferOffset, view);
+function normalizeDataView(data) {
+  if (ArrayBuffer.isView(data)) {
+    return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
   }
-  async onSubmittedWorkDone() {
-    addon.queueFlush(this._native);
+  if (data instanceof ArrayBuffer) {
+    return new Uint8Array(data);
   }
+  throw new Error('Doe buffer data must be an ArrayBuffer or ArrayBufferView.');
 }
-class DoeGPURenderPassEncoder {
-  constructor(native) { this._native = native; }
-  setPipeline(pipeline) {
-    addon.renderPassSetPipeline(this._native, pipeline._native);
+function resolveBufferSize(source) {
+  if (source && typeof source === 'object' && typeof source.size === 'number') {
+    return source.size;
   }
-  draw(vertexCount, instanceCount = 1, firstVertex = 0, firstInstance = 0) {
-    addon.renderPassDraw(this._native, vertexCount, instanceCount, firstVertex, firstInstance);
+  if (ArrayBuffer.isView(source)) {
+    return source.byteLength;
   }
-  end() {
-    addon.renderPassEnd(this._native);
+  if (source instanceof ArrayBuffer) {
+    return source.byteLength;
   }
+  throw new Error('Doe buffer-like source must expose a byte size or be ArrayBuffer-backed data.');
 }
-class DoeGPUTexture {
-  constructor(native) { this._native = native; }
-  createView(descriptor) {
-    const view = addon.textureCreateView(this._native);
-    return new DoeGPUTextureView(view);
-  }
-  destroy() {
-    addon.textureRelease(this._native);
-    this._native = null;
+function normalizeBinding(binding, index) {
+  const entry = binding && typeof binding === 'object' && 'buffer' in binding
+    ? binding
+    : { buffer: binding };
+  const access = entry.access ?? inferredBindingAccessForBuffer(entry.buffer);
+  if (!access) {
+    throw new Error(
+      'Doe binding access is required for buffers without Doe helper usage metadata. ' +
+      'Pass { buffer, access } or create the buffer through gpu.buffer.* with a bindable usage token.'
+    );
   }
+  return {
+    binding: index,
+    buffer: entry.buffer,
+    access,
+  };
 }
-class DoeGPUTextureView {
-  constructor(native) { this._native = native; }
+function bindGroupLayoutEntry(binding) {
+  const buffer_type = binding.access === 'uniform'
+    ? 'uniform'
+    : binding.access === 'storageRead'
+      ? 'read-only-storage'
+      : 'storage';
+  return {
+    binding: binding.binding,
+    visibility: DOE_GPU_SHADER_STAGE.COMPUTE,
+    buffer: { type: buffer_type },
+  };
 }
-class DoeGPUSampler {
-  constructor(native) { this._native = native; }
+function bindGroupEntry(binding) {
+  return {
+    binding: binding.binding,
+    resource: { buffer: binding.buffer },
+  };
 }
-class DoeGPURenderPipeline {
-  constructor(native) { this._native = native; }
+/**
+ * Reusable compute kernel compiled by `gpu.kernel.create(...)`.
+ *
+ * Surface: Doe API `gpu.kernel`.
+ * Input: Created from WGSL source, an entry point, and an initial binding shape.
+ * Returns: A reusable kernel object with `dispatch(...)`.
+ *
+ * This object keeps the compiled pipeline and bind-group layout for a repeated
+ * WGSL compute shape. Use it when you will dispatch the same shader more than
+ * once and want to avoid recompiling on every call.
+ *
+ * This example shows the API in its basic form.
+ *
+ * ```js
+ * const kernel = gpu.kernel.create({
+ *   code,
+ *   bindings: [src, dst],
+ * });
+ *
+ * await kernel.dispatch({
+ *   bindings: [src, dst],
+ *   workgroups: 1,
+ * });
+ * ```
+ *
+ * - See `gpu.kernel.run(...)` for the one-shot explicit path.
+ * - See `gpu.compute(...)` for the narrower typed-array workflow.
+ * - Instances are returned through the bound Doe API and are not exported directly.
+ */
+class DoeKernel {
+  constructor(device, pipeline, layout, entryPoint) {
+    this.device = device;
+    this.pipeline = pipeline;
+    this.layout = layout;
+    this.entryPoint = entryPoint;
+  }
+  /**
+   * Dispatch this compiled kernel once.
+   *
+   * Surface: Doe API `gpu.kernel`.
+   * Input: A binding list, workgroup counts, and an optional label.
+   * Returns: A promise that resolves after submission completes.
+   *
+   * This records one compute pass for the compiled pipeline, submits it, and
+   * waits for completion when the underlying queue exposes
+   * `onSubmittedWorkDone()`.
+   *
+   * This example shows the API in its basic form.
+   *
+   * ```js
+   * await kernel.dispatch({
+   *   bindings: [src, dst],
+   *   workgroups: [4, 1, 1],
+   * });
+   * ```
+   *
+   * - `workgroups` may be `number`, `[x, y]`, or `[x, y, z]`.
+   * - Bare buffers without Doe helper metadata require `{ buffer, access }`.
+   * - See `gpu.kernel.run(...)` when you do not need reuse.
+   */
+  async dispatch(options) {
+    const bindings = (options.bindings ?? []).map(normalizeBinding);
+    const workgroups = validateWorkgroups(this.device, options.workgroups);
+    const bindGroup = this.device.createBindGroup({
+      label: options.label ?? undefined,
+      layout: this.layout,
+      entries: bindings.map(bindGroupEntry),
+    });
+    const encoder = this.device.createCommandEncoder({ label: options.label ?? undefined });
+    const pass = encoder.beginComputePass({ label: options.label ?? undefined });
+    pass.setPipeline(this.pipeline);
+    if (bindings.length > 0) {
+      pass.setBindGroup(0, bindGroup);
+    }
+    pass.dispatchWorkgroups(workgroups[0], workgroups[1], workgroups[2]);
+    pass.end();
+    this.device.queue.submit([encoder.finish()]);
+    if (typeof this.device.queue.onSubmittedWorkDone === 'function') {
+      await this.device.queue.onSubmittedWorkDone();
+    }
+  }
 }
-class DoeGPUShaderModule {
-  constructor(native) { this._native = native; }
+function createKernel(device, options) {
+  const bindings = (options.bindings ?? []).map(normalizeBinding);
+  const shader = device.createShaderModule({ code: options.code });
+  const bindGroupLayout = device.createBindGroupLayout({
+    entries: bindings.map(bindGroupLayoutEntry),
+  });
+  const pipelineLayout = device.createPipelineLayout({
+    bindGroupLayouts: [bindGroupLayout],
+  });
+  const pipeline = device.createComputePipeline({
+    layout: pipelineLayout,
+    compute: {
+      module: shader,
+      entryPoint: options.entryPoint ?? 'main',
+    },
+  });
+  return new DoeKernel(device, pipeline, bindGroupLayout, options.entryPoint ?? 'main');
 }
-class DoeGPUComputePipeline {
-  constructor(native) { this._native = native; }
-  getBindGroupLayout(index) {
-    const layout = addon.computePipelineGetBindGroupLayout(this._native, index);
-    return new DoeGPUBindGroupLayout(layout);
-  }
+function createBuffer(device, options) {
+  if (!options || typeof options !== 'object') {
+    throw new Error('Doe buffer options must be an object.');
+  }
+  if (options.data != null) {
+    const view = normalizeDataView(options.data);
+    const usage = options.usage ?? 'storageRead';
+    const buffer = rememberBufferUsage(device.createBuffer({
+      label: options.label ?? undefined,
+      size: options.size ?? view.byteLength,
+      usage: resolveBufferUsage(usage),
+    }), usage);
+    device.queue.writeBuffer(buffer, 0, view);
+    return buffer;
+  }
+  validatePositiveInteger(options.size, 'Doe buffer size');
+  return rememberBufferUsage(device.createBuffer({
+    label: options.label ?? undefined,
+    size: options.size,
+    usage: resolveBufferUsage(options.usage),
+    mappedAtCreation: options.mappedAtCreation ?? false,
+  }), options.usage);
 }
-class DoeGPUBindGroupLayout {
-  constructor(native) { this._native = native; }
+function createBufferFromData(device, data, options = {}) {
+  const view = normalizeDataView(data);
+  const usage = options.usage ?? 'storageRead';
+  const buffer = rememberBufferUsage(device.createBuffer({
+    label: options.label ?? undefined,
+    size: view.byteLength,
+    usage: resolveBufferUsage(usage),
+  }), usage);
+  device.queue.writeBuffer(buffer, 0, view);
+  return buffer;
 }
-class DoeGPUBindGroup {
-  constructor(native) { this._native = native; }
+async function readBuffer(device, buffer, type, options = {}) {
+  if (arguments.length === 2 && buffer && typeof buffer === 'object') {
+    return readBuffer(device, buffer.buffer, buffer.type, buffer);
+  }
+  if (!buffer || typeof buffer !== 'object') {
+    throw new Error('Doe buffer.read requires a buffer.');
+  }
+  if (typeof type !== 'function') {
+    throw new Error('Doe buffer.read type must be a typed-array constructor.');
+  }
+  const offset = options.offset ?? 0;
+  const size = options.size ?? Math.max(0, (buffer.size ?? 0) - offset);
+  if (!Number.isInteger(offset) || offset < 0) {
+    throw new Error('Doe buffer.read offset must be a non-negative integer.');
+  }
+  if (!Number.isInteger(size) || size < 0) {
+    throw new Error('Doe buffer.read size must be a non-negative integer.');
+  }
+  if (((buffer.usage ?? 0) & DOE_GPU_BUFFER_USAGE.MAP_READ) !== 0) {
+    await buffer.mapAsync(DOE_GPU_MAP_MODE.READ, offset, size);
+    const copy = buffer.getMappedRange(offset, size).slice(0);
+    buffer.unmap();
+    return new type(copy);
+  }
+  const staging = device.createBuffer({
+    label: options.label ?? undefined,
+    size,
+    usage: DOE_GPU_BUFFER_USAGE.COPY_DST | DOE_GPU_BUFFER_USAGE.MAP_READ,
+  });
+  const encoder = device.createCommandEncoder({ label: options.label ?? undefined });
+  encoder.copyBufferToBuffer(buffer, offset, staging, 0, size);
+  device.queue.submit([encoder.finish()]);
+  await staging.mapAsync(DOE_GPU_MAP_MODE.READ);
+  const copy = staging.getMappedRange().slice(0);
+  staging.unmap();
+  if (typeof staging.destroy === 'function') {
+    staging.destroy();
+  }
+  return new type(copy);
 }
-class DoeGPUPipelineLayout {
-  constructor(native) { this._native = native; }
+async function runKernel(device, options) {
+  const kernel = createKernel(device, options);
+  await kernel.dispatch({
+    bindings: options.bindings ?? [],
+    workgroups: options.workgroups,
+    label: options.label,
+  });
 }
-// Metal defaults for Apple Silicon — matches doe_device_caps.zig METAL_LIMITS.
-const DOE_LIMITS = Object.freeze({
-  maxTextureDimension1D: 16384,
-  maxTextureDimension2D: 16384,
-  maxTextureDimension3D: 2048,
-  maxTextureArrayLayers: 2048,
-  maxBindGroups: 4,
-  maxBindGroupsPlusVertexBuffers: 24,
-  maxBindingsPerBindGroup: 1000,
-  maxDynamicUniformBuffersPerPipelineLayout: 8,
-  maxDynamicStorageBuffersPerPipelineLayout: 4,
-  maxSampledTexturesPerShaderStage: 16,
-  maxSamplersPerShaderStage: 16,
-  maxStorageBuffersPerShaderStage: 8,
-  maxStorageTexturesPerShaderStage: 4,
-  maxUniformBuffersPerShaderStage: 12,
-  maxUniformBufferBindingSize: 65536,
-  maxStorageBufferBindingSize: 134217728,
-  minUniformBufferOffsetAlignment: 256,
-  minStorageBufferOffsetAlignment: 32,
-  maxVertexBuffers: 8,
-  maxBufferSize: 268435456,
-  maxVertexAttributes: 16,
-  maxVertexBufferArrayStride: 2048,
-  maxInterStageShaderVariables: 16,
-  maxColorAttachments: 8,
-  maxColorAttachmentBytesPerSample: 32,
-  maxComputeWorkgroupStorageSize: 32768,
-  maxComputeInvocationsPerWorkgroup: 1024,
-  maxComputeWorkgroupSizeX: 1024,
-  maxComputeWorkgroupSizeY: 1024,
-  maxComputeWorkgroupSizeZ: 64,
-  maxComputeWorkgroupsPerDimension: 65535,
-});
-const DOE_FEATURES = Object.freeze(new Set(['shader-f16']));
-class DoeGPUDevice {
-  constructor(native, instance) {
-    this._native = native;
-    this._instance = instance;
-    const q = addon.deviceGetQueue(native);
-    this.queue = new DoeGPUQueue(q);
-    this.limits = DOE_LIMITS;
-    this.features = DOE_FEATURES;
-  }
-  createBuffer(descriptor) {
-    const buf = addon.createBuffer(this._native, descriptor);
-    return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue._native);
-  }
-  createShaderModule(descriptor) {
-    const code = descriptor.code || descriptor.source;
-    if (!code) throw new Error('createShaderModule: descriptor.code is required');
-    const mod = addon.createShaderModule(this._native, code);
-    return new DoeGPUShaderModule(mod);
-  }
-  createComputePipeline(descriptor) {
-    const shader = descriptor.compute?.module;
-    const entryPoint = descriptor.compute?.entryPoint || 'main';
-    const layout = descriptor.layout === 'auto' ? null : descriptor.layout;
-    const native = addon.createComputePipeline(
-      this._native, shader._native, entryPoint,
-      layout?._native ?? null);
-    return new DoeGPUComputePipeline(native);
-  }
-  async createComputePipelineAsync(descriptor) {
-    return this.createComputePipeline(descriptor);
-  }
-  createBindGroupLayout(descriptor) {
-    const entries = (descriptor.entries || []).map((e) => ({
-      binding: e.binding,
-      visibility: e.visibility,
-      buffer: e.buffer ? {
-        type: e.buffer.type || 'uniform',
-        hasDynamicOffset: e.buffer.hasDynamicOffset || false,
-        minBindingSize: e.buffer.minBindingSize || 0,
-      } : undefined,
-      storageTexture: e.storageTexture,
-    }));
-    const native = addon.createBindGroupLayout(this._native, entries);
-    return new DoeGPUBindGroupLayout(native);
-  }
-  createBindGroup(descriptor) {
-    const entries = (descriptor.entries || []).map((e) => {
-      const entry = {
-        binding: e.binding,
-        buffer: e.resource?.buffer?._native ?? e.resource?._native ?? null,
-        offset: e.resource?.offset ?? 0,
-      };
-      if (e.resource?.size !== undefined) entry.size = e.resource.size;
-      return entry;
-    });
-    const native = addon.createBindGroup(
-      this._native, descriptor.layout._native, entries);
-    return new DoeGPUBindGroup(native);
-  }
+function usesRawNumericFlags(usage) {
+  return typeof usage === 'number' || (Array.isArray(usage) && usage.some((token) => typeof token === 'number'));
+}
-  createPipelineLayout(descriptor) {
-    const layouts = (descriptor.bindGroupLayouts || []).map((l) => l._native);
-    const native = addon.createPipelineLayout(this._native, layouts);
-    return new DoeGPUPipelineLayout(native);
+function assertLayer3Usage(usage, access, path) {
+  if (usesRawNumericFlags(usage) && !access) {
+    throw new Error(`Doe ${path} accepts raw numeric usage flags only when explicit access is also provided.`);
   }
+}
-  createTexture(descriptor) {
-    const native = addon.createTexture(this._native, {
-      format: descriptor.format || 'rgba8unorm',
-      width: descriptor.size?.[0] ?? descriptor.size?.width ?? descriptor.size ?? 1,
-      height: descriptor.size?.[1] ?? descriptor.size?.height ?? 1,
-      depthOrArrayLayers: descriptor.size?.[2] ?? descriptor.size?.depthOrArrayLayers ?? 1,
-      usage: descriptor.usage || 0,
-      mipLevelCount: descriptor.mipLevelCount || 1,
+function normalizeOnceInput(device, input, index) {
+  if (ArrayBuffer.isView(input) || input instanceof ArrayBuffer) {
+    const buffer = createBufferFromData(device, input, {});
+    return {
+      binding: buffer,
+      buffer,
+      byte_length: resolveBufferSize(input),
+      owned: true,
+    };
+  }
+  if (input && typeof input === 'object' && 'data' in input) {
+    assertLayer3Usage(input.usage, input.access, `compute input ${index} usage`);
+    const buffer = createBufferFromData(device, input.data, {
+      usage: input.usage ?? 'storageRead',
+      label: input.label,
     });
-    return new DoeGPUTexture(native);
-  }
-  createSampler(descriptor = {}) {
-    const native = addon.createSampler(this._native, descriptor);
-    return new DoeGPUSampler(native);
+    return {
+      binding: input.access ? { buffer, access: input.access } : buffer,
+      buffer,
+      byte_length: resolveBufferSize(input.data),
+      owned: true,
+    };
   }
-  createRenderPipeline(descriptor) {
-    const native = addon.createRenderPipeline(this._native);
-    return new DoeGPURenderPipeline(native);
+  if (input && typeof input === 'object' && 'buffer' in input) {
+    return {
+      binding: input,
+      buffer: input.buffer,
+      byte_length: resolveBufferSize(input.buffer),
+      owned: false,
+    };
   }
-  createCommandEncoder(descriptor) {
-    const native = addon.createCommandEncoder(this._native);
-    return new DoeGPUCommandEncoder(native);
+  if (input && typeof input === 'object' && typeof input.size === 'number') {
+    return {
+      binding: input,
+      buffer: input,
+      byte_length: input.size,
+      owned: false,
+    };
   }
-  destroy() {
-    addon.deviceRelease(this._native);
-    this._native = null;
-  }
+  throw new Error(`Doe compute input ${index} must be data, a Doe input spec, or a buffer.`);
 }
-class DoeGPUAdapter {
-  constructor(native, instance) {
-    this._native = native;
-    this._instance = instance;
-    this.features = DOE_FEATURES;
-    this.limits = DOE_LIMITS;
-  }
-  async requestDevice(descriptor) {
-    const device = addon.requestDevice(this._instance, this._native);
-    return new DoeGPUDevice(device, this._instance);
+function normalizeOnceOutput(device, output, inputs) {
+  if (!output || typeof output !== 'object') {
+    throw new Error('Doe compute output is required.');
   }
-  destroy() {
-    addon.adapterRelease(this._native);
-    this._native = null;
+  if (typeof output.type !== 'function') {
+    throw new Error('Doe compute output.type must be a typed-array constructor.');
   }
-}
-class DoeGPU {
-  constructor(instance) {
-    this._instance = instance;
+  const fallbackInputIndex = inputs.length > 0 ? 0 : null;
+  const likeInputIndex = output.likeInput ?? fallbackInputIndex;
+  if (likeInputIndex != null && (!Number.isInteger(likeInputIndex) || likeInputIndex < 0 || likeInputIndex >= inputs.length)) {
+    throw new Error(`Doe compute output.likeInput must reference an input index in [0, ${Math.max(inputs.length - 1, 0)}].`);
   }
+  const size = output.size ?? (
+    likeInputIndex != null && inputs[likeInputIndex]
+      ? inputs[likeInputIndex].byte_length
+      : null
+  );
-  async requestAdapter(options) {
-    const adapter = addon.requestAdapter(this._instance);
-    return new DoeGPUAdapter(adapter, this._instance);
+  if (!(size > 0)) {
+    throw new Error('Doe compute output size must be provided or derived from likeInput.');
   }
-}
-export function create(createArgs = null) {
-  ensureLibrary();
-  const instance = addon.createInstance();
-  return new DoeGPU(instance);
+  assertLayer3Usage(output.usage, output.access, 'compute output usage');
+  const buffer = createBuffer(device, {
+    size,
+    usage: output.usage ?? 'storageReadWrite',
+    label: output.label,
+  });
+  return {
+    binding: output.access ? { buffer, access: output.access } : buffer,
+    buffer,
+    type: output.type,
+    read_options: output.read ?? {},
+  };
 }
-export function setupGlobals(target = globalThis, createArgs = null) {
-  for (const [name, value] of Object.entries(globals)) {
-    if (target[name] === undefined) {
-      Object.defineProperty(target, name, {
-        value,
-        writable: true,
-        configurable: true,
-        enumerable: false,
-      });
-    }
-  }
-  const gpu = create(createArgs);
-  if (typeof target.navigator === 'undefined') {
-    Object.defineProperty(target, 'navigator', {
-      value: { gpu },
-      writable: true,
-      configurable: true,
-      enumerable: false,
-    });
-  } else if (!target.navigator.gpu) {
-    Object.defineProperty(target.navigator, 'gpu', {
-      value: gpu,
-      writable: true,
-      configurable: true,
-      enumerable: false,
+async function computeOnce(device, options) {
+  const inputs = (options.inputs ?? []).map((input, index) => normalizeOnceInput(device, input, index));
+  const output = normalizeOnceOutput(device, options.output, inputs);
+  validateWorkgroups(device, options.workgroups);
+  try {
+    await runKernel(device, {
+      code: options.code,
+      entryPoint: options.entryPoint,
+      bindings: [...inputs.map((input) => input.binding), output.binding],
+      workgroups: options.workgroups,
+      label: options.label,
     });
+    return await readBuffer(device, output.buffer, output.type, output.read_options);
+  } finally {
+    if (typeof output.buffer.destroy === 'function') {
+      output.buffer.destroy();
+    }
+    for (const input of inputs) {
+      if (input.owned && typeof input.buffer.destroy === 'function') {
+        input.buffer.destroy();
+      }
+    }
   }
-  return gpu;
-}
-export async function requestAdapter(adapterOptions = undefined, createArgs = null) {
-  const gpu = create(createArgs);
-  return gpu.requestAdapter(adapterOptions);
 }
-export async function requestDevice(options = {}) {
-  const createArgs = options?.createArgs ?? null;
-  const adapter = await requestAdapter(options?.adapterOptions, createArgs);
-  return adapter.requestDevice(options?.deviceDescriptor);
+function createBoundDoe(device) {
+  /**
+   * Run a one-shot typed-array compute workflow.
+   *
+   * Surface: Doe API `gpu.compute`.
+   * Input: WGSL source, typed-array or buffer inputs, an output spec, and workgroups.
+   * Returns: A promise for the requested typed-array output.
+   *
+   * This is the most opinionated Doe helper. It creates temporary buffers
+   * as needed, uploads host data, dispatches the compute shader once,
+   * reads back the requested output, and destroys temporary resources
+   * before returning.
+   *
+   * This example shows the API in its basic form.
+   *
+   * ```js
+   * const out = await gpu.compute({
+   *   code,
+   *   inputs: [new Float32Array([1, 2, 3, 4])],
+   *   output: { type: Float32Array },
+   *   workgroups: 1,
+   * });
+   * ```
+   *
+   * - Raw numeric usage flags are accepted only when explicit Doe access is also provided.
+   * - Output size defaults from `likeInput` or the first input when possible.
+   * - See `gpu.kernel.run(...)` or `gpu.kernel.create(...)` when you need explicit resource ownership.
+   */
+  const compute = function compute(options) {
+    return computeOnce(device, options);
+  };
+  return {
+    device,
+    buffer: {
+      /**
+       * Create a buffer with explicit size and Doe usage tokens.
+       *
+       * Surface: Doe API `gpu.buffer`.
+       * Input: A buffer size, usage, and optional label or mapping flag.
+       * Returns: A GPU buffer with Doe usage metadata attached when possible.
+       *
+       * This is the explicit Doe helper over `device.createBuffer(...)`. It
+       * accepts Doe usage tokens such as `storageReadWrite`, and when `data`
+       * is provided it allocates and uploads in one step. Doe remembers the
+       * resulting binding access so later helper calls can infer how the
+       * buffer should be bound.
+       *
+       * This example shows the API in its basic form.
+       *
+       * ```js
+       * const src = gpu.buffer.create({ data: new Float32Array([1, 2, 3, 4]) });
+       * const dst = gpu.buffer.create({ size: src.size, usage: "storageReadWrite" });
+       * ```
+       *
+       * - When `data` is provided, usage defaults to `storageRead`.
+       * - Raw numeric usage flags are allowed here for explicit control.
+       * - Buffers created with raw numeric flags may later require `{ buffer, access }`.
+       */
+      create(options) {
+        return createBuffer(device, options);
+      },
+      /**
+       * Read a buffer back into a typed array.
+       *
+       * Surface: Doe API `gpu.buffer`.
+       * Input: A source buffer, a typed-array constructor, and optional offset or size.
+       * Returns: A promise for a newly allocated typed array.
+       *
+       * This reads GPU buffer contents back to JS. If the buffer is already
+       * mappable for read, Doe maps it directly; otherwise Doe stages the copy
+       * through a temporary readback buffer.
+       *
+       * This example shows the API in its basic form.
+       *
+       * ```js
+       * const out = await gpu.buffer.read(dst, Float32Array);
+       * ```
+       *
+       * - `options.offset` and `options.size` let you read a subrange.
+       * - The typed-array constructor must accept a plain `ArrayBuffer`.
+       * - See raw `buffer.mapAsync(...)` when you need manual readback control.
+       */
+      read(options_or_buffer, type, options = {}) {
+        if (arguments.length === 1 && options_or_buffer && typeof options_or_buffer === 'object') {
+          return readBuffer(device, options_or_buffer);
+        }
+        return readBuffer(device, options_or_buffer, type, options);
+      },
+    },
+    kernel: {
+      /**
+       * Compile and dispatch a one-off compute job.
+       *
+       * Surface: Doe API `gpu.kernel`.
+       * Input: WGSL source, bindings, workgroups, and an optional entry point or label.
+       * Returns: A promise that resolves after submission completes.
+       *
+       * This is the explicit one-shot compute path. It builds the pipeline for
+       * the provided shader, dispatches once, and waits for completion.
+       *
+       * This example shows the API in its basic form.
+       *
+       * ```js
+       * await gpu.kernel.run({
+       *   code,
+       *   bindings: [src, dst],
+       *   workgroups: 1,
+       * });
+       * ```
+       *
+       * - `workgroups` may be `number`, `[x, y]`, or `[x, y, z]`.
+       * - Bare buffers without Doe helper metadata require `{ buffer, access }`.
+       * - See `gpu.kernel.create(...)` when you will reuse the shader shape.
+       * - See `gpu.compute(...)` for the narrower typed-array workflow.
+       */
+      run(options) {
+        return runKernel(device, options);
+      },
+      /**
+       * Compile a reusable compute kernel.
+       *
+       * Surface: Doe API `gpu.kernel`.
+       * Input: WGSL source, an optional entry point, and an initial binding shape.
+       * Returns: A `DoeKernel` object with `dispatch(...)`.
+       *
+       * This creates the shader module, bind-group layout, and compute
+       * pipeline once so the same WGSL shape can be dispatched repeatedly.
+       *
+       * This example shows the API in its basic form.
+       *
+       * ```js
+       * const kernel = gpu.kernel.create({
+       *   code,
+       *   bindings: [src, dst],
+       * });
+       * ```
+       *
+       * - Binding access is inferred from the bindings passed at compile time.
+       * - See `kernel.dispatch(...)` to run the compiled kernel.
+       * - See `gpu.kernel.run(...)` when reuse does not matter.
+       */
+      create(options) {
+        return createKernel(device, options);
+      },
+    },
+    compute,
+  };
 }
-export function providerInfo() {
+export function createDoeNamespace({ requestDevice } = {}) {
   return {
-    module: '@simulatte/webgpu-doe',
-    loaded: !!addon && !!DOE_LIB_PATH,
-    loadError: !addon ? 'native addon not found' : !DOE_LIB_PATH ? 'libdoe_webgpu not found' : '',
-    defaultCreateArgs: [],
-    doeNative: true,
-    doeLibraryPath: DOE_LIB_PATH ?? '',
+    /**
+     * Request a device and return the bound Doe API in one step.
+     *
+     * Surface: Doe API namespace.
+     * Input: Optional package-local request options.
+     * Returns: A promise for the bound `gpu` helper object.
+     *
+     * This calls the package-local `requestDevice(...)` implementation and
+     * then wraps the resulting raw device in the bound Doe API.
+     *
+     * This example shows the API in its basic form.
+     *
+     * ```js
+     * const gpu = await doe.requestDevice();
+     * ```
+     *
+     * - Throws if this namespace was created without a `requestDevice` implementation.
+     * - `gpu.device` exposes the underlying raw device when you need lower-level control.
+     * - See `doe.bind(device)` when you already have a raw device.
+     */
+    async requestDevice(options = {}) {
+      if (typeof requestDevice !== 'function') {
+        throw new Error('Doe requestDevice() is unavailable in this context.');
+      }
+      return createBoundDoe(await requestDevice(options));
+    },
+    /**
+     * Wrap an existing device in the bound Doe API.
+     *
+     * Surface: Doe API namespace.
+     * Input: A raw device returned by the package surface.
+     * Returns: The bound `gpu` helper object for that device.
+     *
+     * Use this when you need the raw device first, but still want to opt into
+     * Doe helpers afterward.
+     *
+     * This example shows the API in its basic form.
+     *
+     * ```js
+     * const device = await requestDevice();
+     * const gpu = doe.bind(device);
+     * ```
+     *
+     * - No async work happens here; it only wraps the device you already have.
+     * - See `doe.requestDevice(...)` for the one-step helper entrypoint.
+     */
+    bind(device) {
+      return createBoundDoe(device);
+    },
   };
 }
+export const doe = createDoeNamespace();
+export default doe;