@playcanvas/splat-transform 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1884,7 +1884,7 @@ let Quat$1 = class Quat {
1884
1884
  }
1885
1885
  };
1886
1886
 
1887
- var version$1 = "0.5.3";
1887
+ var version$1 = "0.5.4";
1888
1888
 
1889
1889
  class Column {
1890
1890
  name;
@@ -89294,8 +89294,7 @@ ${useF16 ? 'enable f16;' : ''}
89294
89294
 
89295
89295
  struct Uniforms {
89296
89296
  numPoints: u32,
89297
- numCentroids: u32,
89298
- pointBase: u32
89297
+ numCentroids: u32
89299
89298
  };
89300
89299
 
89301
89300
  @group(0) @binding(0) var<uniform> uniforms: Uniforms;
@@ -89328,7 +89327,7 @@ fn main(
89328
89327
  @builtin(num_workgroups) num_workgroups: vec3u
89329
89328
  ) {
89330
89329
  // calculate row index for this thread point
89331
- let pointIndex = uniforms.pointBase + global_id.x + global_id.y * num_workgroups.x * 64u;
89330
+ let pointIndex = global_id.x + global_id.y * num_workgroups.x * 64u;
89332
89331
 
89333
89332
  // copy the point data from global memory
89334
89333
  var point: array<${floatType}, numColumns>;
@@ -89385,37 +89384,37 @@ fn main(
89385
89384
  const roundUp = (value, multiple) => {
89386
89385
  return Math.ceil(value / multiple) * multiple;
89387
89386
  };
89388
- const interleaveData = (dataTable, useF16) => {
89389
- const { numRows, numColumns } = dataTable;
89390
- if (useF16) {
89391
- const result = new Uint16Array(roundUp(numColumns * numRows, 2));
89387
+ const interleaveData = (result, dataTable, numRows, rowOffset) => {
89388
+ const { numColumns } = dataTable;
89389
+ if (result instanceof Uint16Array) {
89390
+ // interleave shorts
89392
89391
  for (let c = 0; c < numColumns; ++c) {
89393
89392
  const column = dataTable.columns[c];
89394
89393
  for (let r = 0; r < numRows; ++r) {
89395
- result[r * numColumns + c] = FloatPacking.float2Half(column.data[r]);
89394
+ result[r * numColumns + c] = FloatPacking.float2Half(column.data[rowOffset + r]);
89396
89395
  }
89397
89396
  }
89398
- return result;
89399
89397
  }
89400
- const result = new Float32Array(numColumns * numRows);
89401
- for (let c = 0; c < numColumns; ++c) {
89402
- const column = dataTable.columns[c];
89403
- for (let r = 0; r < numRows; ++r) {
89404
- result[r * numColumns + c] = column.data[r];
89398
+ else {
89399
+ // interleave floats
89400
+ for (let c = 0; c < numColumns; ++c) {
89401
+ const column = dataTable.columns[c];
89402
+ for (let r = 0; r < numRows; ++r) {
89403
+ result[r * numColumns + c] = column.data[rowOffset + r];
89404
+ }
89405
89405
  }
89406
89406
  }
89407
- return result;
89408
89407
  };
89409
- class GpuCluster {
89408
+ class GpuClustering {
89410
89409
  execute;
89411
89410
  destroy;
89412
- constructor(gpuDevice, points, numCentroids) {
89411
+ constructor(gpuDevice, numColumns, numCentroids) {
89413
89412
  const device = gpuDevice.app.graphicsDevice;
89414
89413
  // Check if device supports f16
89415
- const useF16 = 'supportsShaderF16' in device && device.supportsShaderF16;
89416
- const bytesPerFloat = useF16 ? 2 : 4;
89417
- const numPoints = points.numRows;
89418
- const numColumns = points.numColumns;
89414
+ const useF16 = !!('supportsShaderF16' in device && device.supportsShaderF16);
89415
+ const workgroupSize = 64;
89416
+ const workgroupsPerBatch = 1024;
89417
+ const batchSize = workgroupsPerBatch * workgroupSize;
89419
89418
  const bindGroupFormat = new BindGroupFormat(device, [
89420
89419
  new BindUniformBufferFormat('uniforms', SHADERSTAGE_COMPUTE),
89421
89420
  new BindStorageBufferFormat('pointsBuffer', SHADERSTAGE_COMPUTE, true),
@@ -89430,47 +89429,43 @@ class GpuCluster {
89430
89429
  computeUniformBufferFormats: {
89431
89430
  uniforms: new UniformBufferFormat(device, [
89432
89431
  new UniformFormat('numPoints', UNIFORMTYPE_UINT),
89433
- new UniformFormat('numCentroids', UNIFORMTYPE_UINT),
89434
- new UniformFormat('pointBase', UNIFORMTYPE_UINT)
89432
+ new UniformFormat('numCentroids', UNIFORMTYPE_UINT)
89435
89433
  ])
89436
89434
  },
89437
89435
  // @ts-ignore
89438
89436
  computeBindGroupFormat: bindGroupFormat
89439
89437
  });
89438
+ const interleavedPoints = useF16 ? new Uint16Array(roundUp(numColumns * batchSize, 2)) : new Float32Array(numColumns * batchSize);
89439
+ const interleavedCentroids = useF16 ? new Uint16Array(roundUp(numColumns * numCentroids, 2)) : new Float32Array(numColumns * numCentroids);
89440
+ const resultsData = new Uint32Array(batchSize);
89441
+ const pointsBuffer = new StorageBuffer(device, interleavedPoints.byteLength, BUFFERUSAGE_COPY_DST);
89442
+ const centroidsBuffer = new StorageBuffer(device, interleavedCentroids.byteLength, BUFFERUSAGE_COPY_DST);
89443
+ const resultsBuffer = new StorageBuffer(device, resultsData.byteLength, BUFFERUSAGE_COPY_SRC | BUFFERUSAGE_COPY_DST);
89440
89444
  const compute = new Compute(device, shader, 'compute-cluster');
89441
- const pointsBuffer = new StorageBuffer(device, useF16 ? roundUp(numColumns * numPoints, 2) * 2 : numColumns * numPoints * 4, BUFFERUSAGE_COPY_DST);
89442
- const centroidsBuffer = new StorageBuffer(device, numColumns * numCentroids * bytesPerFloat, BUFFERUSAGE_COPY_DST);
89443
- const resultsBuffer = new StorageBuffer(device, numPoints * 4, BUFFERUSAGE_COPY_SRC | BUFFERUSAGE_COPY_DST);
89444
- // interleave the points table data and write to gpu
89445
- const interleavedPoints = interleaveData(points, useF16);
89446
- pointsBuffer.write(0, interleavedPoints, 0, interleavedPoints.length);
89447
89445
  compute.setParameter('pointsBuffer', pointsBuffer);
89448
89446
  compute.setParameter('centroidsBuffer', centroidsBuffer);
89449
89447
  compute.setParameter('resultsBuffer', resultsBuffer);
89450
- this.execute = async (centroids, labels) => {
89451
- // interleave centroids and write to gpu
89452
- const interleavedCentroids = interleaveData(centroids, useF16);
89453
- centroidsBuffer.write(0, interleavedCentroids, 0, interleavedCentroids.length);
89454
- compute.setParameter('numPoints', points.numRows);
89455
- compute.setParameter('numCentroids', centroids.numRows);
89456
- // execute in batches of 1024 worksgroups
89457
- const workgroupSize = 64;
89458
- const workgroupsPerBatch = 1024;
89459
- const batchSize = workgroupsPerBatch * workgroupSize;
89448
+ this.execute = async (points, centroids, labels) => {
89449
+ const numPoints = points.numRows;
89460
89450
  const numBatches = Math.ceil(numPoints / batchSize);
89451
+ // upload centroid data to gpu
89452
+ interleaveData(interleavedCentroids, centroids, numCentroids, 0);
89453
+ centroidsBuffer.write(0, interleavedCentroids, 0, interleavedCentroids.length);
89454
+ compute.setParameter('numCentroids', numCentroids);
89461
89455
  for (let batch = 0; batch < numBatches; batch++) {
89462
89456
  const currentBatchSize = Math.min(numPoints - batch * batchSize, batchSize);
89463
89457
  const groups = Math.ceil(currentBatchSize / 64);
89464
- compute.setParameter('pointBase', batch * batchSize);
89458
+ // write this batch of point data to gpu
89459
+ interleaveData(interleavedPoints, points, currentBatchSize, batch * batchSize);
89460
+ pointsBuffer.write(0, interleavedPoints, 0, useF16 ? roundUp(numColumns * currentBatchSize, 2) : numColumns * currentBatchSize);
89461
+ compute.setParameter('numPoints', currentBatchSize);
89465
89462
  // start compute job
89466
89463
  compute.setupDispatch(groups);
89467
89464
  device.computeDispatch([compute], `cluster-dispatch-${batch}`);
89468
- // FIXME: submit call is required, but not public API
89469
- // @ts-ignore
89470
- device.submit();
89465
+ // read results from gpu and store in labels
89466
+ await resultsBuffer.read(0, currentBatchSize * 4, resultsData, true);
89467
+ labels.set(resultsData.subarray(0, currentBatchSize), batch * batchSize);
89471
89468
  }
89472
- // read results from gpu
89473
- await resultsBuffer.read(0, undefined, labels, true);
89474
89469
  };
89475
89470
  this.destroy = () => {
89476
89471
  pointsBuffer.destroy();
@@ -89550,14 +89545,14 @@ const kmeans = async (points, k, iterations, device) => {
89550
89545
  // construct centroids data table and assign initial values
89551
89546
  const centroids = new DataTable(points.columns.map(c => new Column(c.name, new Float32Array(k))));
89552
89547
  initializeCentroids(points, centroids, row);
89553
- const gpuCluster = device && new GpuCluster(device, points, k);
89548
+ const gpuClustering = device && new GpuClustering(device, points.numColumns, k);
89554
89549
  const labels = new Uint32Array(points.numRows);
89555
89550
  let converged = false;
89556
89551
  let steps = 0;
89557
89552
  console.log(`Running k-means clustering: dims=${points.numColumns} points=${points.numRows} clusters=${k} iterations=${iterations}...`);
89558
89553
  while (!converged) {
89559
- if (gpuCluster) {
89560
- await gpuCluster.execute(centroids, labels);
89554
+ if (gpuClustering) {
89555
+ await gpuClustering.execute(points, centroids, labels);
89561
89556
  }
89562
89557
  else {
89563
89558
  clusterKdTreeCpu(points, centroids, labels);
@@ -89574,6 +89569,9 @@ const kmeans = async (points, k, iterations, device) => {
89574
89569
  }
89575
89570
  stdout.write('#');
89576
89571
  }
89572
+ if (gpuClustering) {
89573
+ gpuClustering.destroy();
89574
+ }
89577
89575
  console.log(' done 🎉');
89578
89576
  return { centroids, labels };
89579
89577
  };