npm - @playcanvas/splat-transform - Versions diffs - 0.5.3 → 0.5.4 - Mend

@playcanvas/splat-transform 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.mjs CHANGED Viewed

@@ -1884,7 +1884,7 @@ let Quat$1 = class Quat {
 		}
 };
-var version$1 = "0.5.3";
+var version$1 = "0.5.4";
 class Column {
     name;
@@ -89294,8 +89294,7 @@ ${useF16 ? 'enable f16;' : ''}
 struct Uniforms {
     numPoints: u32,
-    numCentroids: u32,
-    pointBase: u32
+    numCentroids: u32
 };
 @group(0) @binding(0) var<uniform> uniforms: Uniforms;
@@ -89328,7 +89327,7 @@ fn main(
     @builtin(num_workgroups) num_workgroups: vec3u
 ) {
     // calculate row index for this thread point
-    let pointIndex = uniforms.pointBase + global_id.x + global_id.y * num_workgroups.x * 64u;
+    let pointIndex = global_id.x + global_id.y * num_workgroups.x * 64u;
     // copy the point data from global memory
     var point: array<${floatType}, numColumns>;
@@ -89385,37 +89384,37 @@ fn main(
 const roundUp = (value, multiple) => {
     return Math.ceil(value / multiple) * multiple;
 };
-const interleaveData = (dataTable, useF16) => {
-    const { numRows, numColumns } = dataTable;
-    if (useF16) {
-        const result = new Uint16Array(roundUp(numColumns * numRows, 2));
+const interleaveData = (result, dataTable, numRows, rowOffset) => {
+    const { numColumns } = dataTable;
+    if (result instanceof Uint16Array) {
+        // interleave shorts
         for (let c = 0; c < numColumns; ++c) {
             const column = dataTable.columns[c];
             for (let r = 0; r < numRows; ++r) {
-                result[r * numColumns + c] = FloatPacking.float2Half(column.data[r]);
+                result[r * numColumns + c] = FloatPacking.float2Half(column.data[rowOffset + r]);
             }
         }
-        return result;
     }
-    const result = new Float32Array(numColumns * numRows);
-    for (let c = 0; c < numColumns; ++c) {
-        const column = dataTable.columns[c];
-        for (let r = 0; r < numRows; ++r) {
-            result[r * numColumns + c] = column.data[r];
+    else {
+        // interleave floats
+        for (let c = 0; c < numColumns; ++c) {
+            const column = dataTable.columns[c];
+            for (let r = 0; r < numRows; ++r) {
+                result[r * numColumns + c] = column.data[rowOffset + r];
+            }
         }
     }
-    return result;
 };
-class GpuCluster {
+class GpuClustering {
     execute;
     destroy;
-    constructor(gpuDevice, points, numCentroids) {
+    constructor(gpuDevice, numColumns, numCentroids) {
         const device = gpuDevice.app.graphicsDevice;
         // Check if device supports f16
-        const useF16 = 'supportsShaderF16' in device && device.supportsShaderF16;
-        const bytesPerFloat = useF16 ? 2 : 4;
-        const numPoints = points.numRows;
-        const numColumns = points.numColumns;
+        const useF16 = !!('supportsShaderF16' in device && device.supportsShaderF16);
+        const workgroupSize = 64;
+        const workgroupsPerBatch = 1024;
+        const batchSize = workgroupsPerBatch * workgroupSize;
         const bindGroupFormat = new BindGroupFormat(device, [
             new BindUniformBufferFormat('uniforms', SHADERSTAGE_COMPUTE),
             new BindStorageBufferFormat('pointsBuffer', SHADERSTAGE_COMPUTE, true),
@@ -89430,47 +89429,43 @@ class GpuCluster {
             computeUniformBufferFormats: {
                 uniforms: new UniformBufferFormat(device, [
                     new UniformFormat('numPoints', UNIFORMTYPE_UINT),
-                    new UniformFormat('numCentroids', UNIFORMTYPE_UINT),
-                    new UniformFormat('pointBase', UNIFORMTYPE_UINT)
+                    new UniformFormat('numCentroids', UNIFORMTYPE_UINT)
                 ])
             },
             // @ts-ignore
             computeBindGroupFormat: bindGroupFormat
         });
+        const interleavedPoints = useF16 ? new Uint16Array(roundUp(numColumns * batchSize, 2)) : new Float32Array(numColumns * batchSize);
+        const interleavedCentroids = useF16 ? new Uint16Array(roundUp(numColumns * numCentroids, 2)) : new Float32Array(numColumns * numCentroids);
+        const resultsData = new Uint32Array(batchSize);
+        const pointsBuffer = new StorageBuffer(device, interleavedPoints.byteLength, BUFFERUSAGE_COPY_DST);
+        const centroidsBuffer = new StorageBuffer(device, interleavedCentroids.byteLength, BUFFERUSAGE_COPY_DST);
+        const resultsBuffer = new StorageBuffer(device, resultsData.byteLength, BUFFERUSAGE_COPY_SRC | BUFFERUSAGE_COPY_DST);
         const compute = new Compute(device, shader, 'compute-cluster');
-        const pointsBuffer = new StorageBuffer(device, useF16 ? roundUp(numColumns * numPoints, 2) * 2 : numColumns * numPoints * 4, BUFFERUSAGE_COPY_DST);
-        const centroidsBuffer = new StorageBuffer(device, numColumns * numCentroids * bytesPerFloat, BUFFERUSAGE_COPY_DST);
-        const resultsBuffer = new StorageBuffer(device, numPoints * 4, BUFFERUSAGE_COPY_SRC | BUFFERUSAGE_COPY_DST);
-        // interleave the points table data and write to gpu
-        const interleavedPoints = interleaveData(points, useF16);
-        pointsBuffer.write(0, interleavedPoints, 0, interleavedPoints.length);
         compute.setParameter('pointsBuffer', pointsBuffer);
         compute.setParameter('centroidsBuffer', centroidsBuffer);
         compute.setParameter('resultsBuffer', resultsBuffer);
-        this.execute = async (centroids, labels) => {
-            // interleave centroids and write to gpu
-            const interleavedCentroids = interleaveData(centroids, useF16);
-            centroidsBuffer.write(0, interleavedCentroids, 0, interleavedCentroids.length);
-            compute.setParameter('numPoints', points.numRows);
-            compute.setParameter('numCentroids', centroids.numRows);
-            // execute in batches of 1024 worksgroups
-            const workgroupSize = 64;
-            const workgroupsPerBatch = 1024;
-            const batchSize = workgroupsPerBatch * workgroupSize;
+        this.execute = async (points, centroids, labels) => {
+            const numPoints = points.numRows;
             const numBatches = Math.ceil(numPoints / batchSize);
+            // upload centroid data to gpu
+            interleaveData(interleavedCentroids, centroids, numCentroids, 0);
+            centroidsBuffer.write(0, interleavedCentroids, 0, interleavedCentroids.length);
+            compute.setParameter('numCentroids', numCentroids);
             for (let batch = 0; batch < numBatches; batch++) {
                 const currentBatchSize = Math.min(numPoints - batch * batchSize, batchSize);
                 const groups = Math.ceil(currentBatchSize / 64);
-                compute.setParameter('pointBase', batch * batchSize);
+                // write this batch of point data to gpu
+                interleaveData(interleavedPoints, points, currentBatchSize, batch * batchSize);
+                pointsBuffer.write(0, interleavedPoints, 0, useF16 ? roundUp(numColumns * currentBatchSize, 2) : numColumns * currentBatchSize);
+                compute.setParameter('numPoints', currentBatchSize);
                 // start compute job
                 compute.setupDispatch(groups);
                 device.computeDispatch([compute], `cluster-dispatch-${batch}`);
-                // FIXME: submit call is required, but not public API
-                // @ts-ignore
-                device.submit();
+                // read results from gpu and store in labels
+                await resultsBuffer.read(0, currentBatchSize * 4, resultsData, true);
+                labels.set(resultsData.subarray(0, currentBatchSize), batch * batchSize);
             }
-            // read results from gpu
-            await resultsBuffer.read(0, undefined, labels, true);
         };
         this.destroy = () => {
             pointsBuffer.destroy();
@@ -89550,14 +89545,14 @@ const kmeans = async (points, k, iterations, device) => {
     // construct centroids data table and assign initial values
     const centroids = new DataTable(points.columns.map(c => new Column(c.name, new Float32Array(k))));
     initializeCentroids(points, centroids, row);
-    const gpuCluster = device && new GpuCluster(device, points, k);
+    const gpuClustering = device && new GpuClustering(device, points.numColumns, k);
     const labels = new Uint32Array(points.numRows);
     let converged = false;
     let steps = 0;
     console.log(`Running k-means clustering: dims=${points.numColumns} points=${points.numRows} clusters=${k} iterations=${iterations}...`);
     while (!converged) {
-        if (gpuCluster) {
-            await gpuCluster.execute(centroids, labels);
+        if (gpuClustering) {
+            await gpuClustering.execute(points, centroids, labels);
         }
         else {
             clusterKdTreeCpu(points, centroids, labels);
@@ -89574,6 +89569,9 @@ const kmeans = async (points, k, iterations, device) => {
         }
         stdout.write('#');
     }
+    if (gpuClustering) {
+        gpuClustering.destroy();
+    }
     console.log(' done 🎉');
     return { centroids, labels };
 };