@playcanvas/splat-transform 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +47 -49
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1884,7 +1884,7 @@ let Quat$1 = class Quat {
|
|
|
1884
1884
|
}
|
|
1885
1885
|
};
|
|
1886
1886
|
|
|
1887
|
-
var version$1 = "0.5.
|
|
1887
|
+
var version$1 = "0.5.4";
|
|
1888
1888
|
|
|
1889
1889
|
class Column {
|
|
1890
1890
|
name;
|
|
@@ -89294,8 +89294,7 @@ ${useF16 ? 'enable f16;' : ''}
|
|
|
89294
89294
|
|
|
89295
89295
|
struct Uniforms {
|
|
89296
89296
|
numPoints: u32,
|
|
89297
|
-
numCentroids: u32
|
|
89298
|
-
pointBase: u32
|
|
89297
|
+
numCentroids: u32
|
|
89299
89298
|
};
|
|
89300
89299
|
|
|
89301
89300
|
@group(0) @binding(0) var<uniform> uniforms: Uniforms;
|
|
@@ -89328,7 +89327,7 @@ fn main(
|
|
|
89328
89327
|
@builtin(num_workgroups) num_workgroups: vec3u
|
|
89329
89328
|
) {
|
|
89330
89329
|
// calculate row index for this thread point
|
|
89331
|
-
let pointIndex =
|
|
89330
|
+
let pointIndex = global_id.x + global_id.y * num_workgroups.x * 64u;
|
|
89332
89331
|
|
|
89333
89332
|
// copy the point data from global memory
|
|
89334
89333
|
var point: array<${floatType}, numColumns>;
|
|
@@ -89385,37 +89384,37 @@ fn main(
|
|
|
89385
89384
|
const roundUp = (value, multiple) => {
|
|
89386
89385
|
return Math.ceil(value / multiple) * multiple;
|
|
89387
89386
|
};
|
|
89388
|
-
const interleaveData = (dataTable,
|
|
89389
|
-
const {
|
|
89390
|
-
if (
|
|
89391
|
-
|
|
89387
|
+
const interleaveData = (result, dataTable, numRows, rowOffset) => {
|
|
89388
|
+
const { numColumns } = dataTable;
|
|
89389
|
+
if (result instanceof Uint16Array) {
|
|
89390
|
+
// interleave shorts
|
|
89392
89391
|
for (let c = 0; c < numColumns; ++c) {
|
|
89393
89392
|
const column = dataTable.columns[c];
|
|
89394
89393
|
for (let r = 0; r < numRows; ++r) {
|
|
89395
|
-
result[r * numColumns + c] = FloatPacking.float2Half(column.data[r]);
|
|
89394
|
+
result[r * numColumns + c] = FloatPacking.float2Half(column.data[rowOffset + r]);
|
|
89396
89395
|
}
|
|
89397
89396
|
}
|
|
89398
|
-
return result;
|
|
89399
89397
|
}
|
|
89400
|
-
|
|
89401
|
-
|
|
89402
|
-
|
|
89403
|
-
|
|
89404
|
-
|
|
89398
|
+
else {
|
|
89399
|
+
// interleave floats
|
|
89400
|
+
for (let c = 0; c < numColumns; ++c) {
|
|
89401
|
+
const column = dataTable.columns[c];
|
|
89402
|
+
for (let r = 0; r < numRows; ++r) {
|
|
89403
|
+
result[r * numColumns + c] = column.data[rowOffset + r];
|
|
89404
|
+
}
|
|
89405
89405
|
}
|
|
89406
89406
|
}
|
|
89407
|
-
return result;
|
|
89408
89407
|
};
|
|
89409
|
-
class
|
|
89408
|
+
class GpuClustering {
|
|
89410
89409
|
execute;
|
|
89411
89410
|
destroy;
|
|
89412
|
-
constructor(gpuDevice,
|
|
89411
|
+
constructor(gpuDevice, numColumns, numCentroids) {
|
|
89413
89412
|
const device = gpuDevice.app.graphicsDevice;
|
|
89414
89413
|
// Check if device supports f16
|
|
89415
|
-
const useF16 = 'supportsShaderF16' in device && device.supportsShaderF16;
|
|
89416
|
-
const
|
|
89417
|
-
const
|
|
89418
|
-
const
|
|
89414
|
+
const useF16 = !!('supportsShaderF16' in device && device.supportsShaderF16);
|
|
89415
|
+
const workgroupSize = 64;
|
|
89416
|
+
const workgroupsPerBatch = 1024;
|
|
89417
|
+
const batchSize = workgroupsPerBatch * workgroupSize;
|
|
89419
89418
|
const bindGroupFormat = new BindGroupFormat(device, [
|
|
89420
89419
|
new BindUniformBufferFormat('uniforms', SHADERSTAGE_COMPUTE),
|
|
89421
89420
|
new BindStorageBufferFormat('pointsBuffer', SHADERSTAGE_COMPUTE, true),
|
|
@@ -89430,47 +89429,43 @@ class GpuCluster {
|
|
|
89430
89429
|
computeUniformBufferFormats: {
|
|
89431
89430
|
uniforms: new UniformBufferFormat(device, [
|
|
89432
89431
|
new UniformFormat('numPoints', UNIFORMTYPE_UINT),
|
|
89433
|
-
new UniformFormat('numCentroids', UNIFORMTYPE_UINT)
|
|
89434
|
-
new UniformFormat('pointBase', UNIFORMTYPE_UINT)
|
|
89432
|
+
new UniformFormat('numCentroids', UNIFORMTYPE_UINT)
|
|
89435
89433
|
])
|
|
89436
89434
|
},
|
|
89437
89435
|
// @ts-ignore
|
|
89438
89436
|
computeBindGroupFormat: bindGroupFormat
|
|
89439
89437
|
});
|
|
89438
|
+
const interleavedPoints = useF16 ? new Uint16Array(roundUp(numColumns * batchSize, 2)) : new Float32Array(numColumns * batchSize);
|
|
89439
|
+
const interleavedCentroids = useF16 ? new Uint16Array(roundUp(numColumns * numCentroids, 2)) : new Float32Array(numColumns * numCentroids);
|
|
89440
|
+
const resultsData = new Uint32Array(batchSize);
|
|
89441
|
+
const pointsBuffer = new StorageBuffer(device, interleavedPoints.byteLength, BUFFERUSAGE_COPY_DST);
|
|
89442
|
+
const centroidsBuffer = new StorageBuffer(device, interleavedCentroids.byteLength, BUFFERUSAGE_COPY_DST);
|
|
89443
|
+
const resultsBuffer = new StorageBuffer(device, resultsData.byteLength, BUFFERUSAGE_COPY_SRC | BUFFERUSAGE_COPY_DST);
|
|
89440
89444
|
const compute = new Compute(device, shader, 'compute-cluster');
|
|
89441
|
-
const pointsBuffer = new StorageBuffer(device, useF16 ? roundUp(numColumns * numPoints, 2) * 2 : numColumns * numPoints * 4, BUFFERUSAGE_COPY_DST);
|
|
89442
|
-
const centroidsBuffer = new StorageBuffer(device, numColumns * numCentroids * bytesPerFloat, BUFFERUSAGE_COPY_DST);
|
|
89443
|
-
const resultsBuffer = new StorageBuffer(device, numPoints * 4, BUFFERUSAGE_COPY_SRC | BUFFERUSAGE_COPY_DST);
|
|
89444
|
-
// interleave the points table data and write to gpu
|
|
89445
|
-
const interleavedPoints = interleaveData(points, useF16);
|
|
89446
|
-
pointsBuffer.write(0, interleavedPoints, 0, interleavedPoints.length);
|
|
89447
89445
|
compute.setParameter('pointsBuffer', pointsBuffer);
|
|
89448
89446
|
compute.setParameter('centroidsBuffer', centroidsBuffer);
|
|
89449
89447
|
compute.setParameter('resultsBuffer', resultsBuffer);
|
|
89450
|
-
this.execute = async (centroids, labels) => {
|
|
89451
|
-
|
|
89452
|
-
const interleavedCentroids = interleaveData(centroids, useF16);
|
|
89453
|
-
centroidsBuffer.write(0, interleavedCentroids, 0, interleavedCentroids.length);
|
|
89454
|
-
compute.setParameter('numPoints', points.numRows);
|
|
89455
|
-
compute.setParameter('numCentroids', centroids.numRows);
|
|
89456
|
-
// execute in batches of 1024 worksgroups
|
|
89457
|
-
const workgroupSize = 64;
|
|
89458
|
-
const workgroupsPerBatch = 1024;
|
|
89459
|
-
const batchSize = workgroupsPerBatch * workgroupSize;
|
|
89448
|
+
this.execute = async (points, centroids, labels) => {
|
|
89449
|
+
const numPoints = points.numRows;
|
|
89460
89450
|
const numBatches = Math.ceil(numPoints / batchSize);
|
|
89451
|
+
// upload centroid data to gpu
|
|
89452
|
+
interleaveData(interleavedCentroids, centroids, numCentroids, 0);
|
|
89453
|
+
centroidsBuffer.write(0, interleavedCentroids, 0, interleavedCentroids.length);
|
|
89454
|
+
compute.setParameter('numCentroids', numCentroids);
|
|
89461
89455
|
for (let batch = 0; batch < numBatches; batch++) {
|
|
89462
89456
|
const currentBatchSize = Math.min(numPoints - batch * batchSize, batchSize);
|
|
89463
89457
|
const groups = Math.ceil(currentBatchSize / 64);
|
|
89464
|
-
|
|
89458
|
+
// write this batch of point data to gpu
|
|
89459
|
+
interleaveData(interleavedPoints, points, currentBatchSize, batch * batchSize);
|
|
89460
|
+
pointsBuffer.write(0, interleavedPoints, 0, useF16 ? roundUp(numColumns * currentBatchSize, 2) : numColumns * currentBatchSize);
|
|
89461
|
+
compute.setParameter('numPoints', currentBatchSize);
|
|
89465
89462
|
// start compute job
|
|
89466
89463
|
compute.setupDispatch(groups);
|
|
89467
89464
|
device.computeDispatch([compute], `cluster-dispatch-${batch}`);
|
|
89468
|
-
//
|
|
89469
|
-
|
|
89470
|
-
|
|
89465
|
+
// read results from gpu and store in labels
|
|
89466
|
+
await resultsBuffer.read(0, currentBatchSize * 4, resultsData, true);
|
|
89467
|
+
labels.set(resultsData.subarray(0, currentBatchSize), batch * batchSize);
|
|
89471
89468
|
}
|
|
89472
|
-
// read results from gpu
|
|
89473
|
-
await resultsBuffer.read(0, undefined, labels, true);
|
|
89474
89469
|
};
|
|
89475
89470
|
this.destroy = () => {
|
|
89476
89471
|
pointsBuffer.destroy();
|
|
@@ -89550,14 +89545,14 @@ const kmeans = async (points, k, iterations, device) => {
|
|
|
89550
89545
|
// construct centroids data table and assign initial values
|
|
89551
89546
|
const centroids = new DataTable(points.columns.map(c => new Column(c.name, new Float32Array(k))));
|
|
89552
89547
|
initializeCentroids(points, centroids, row);
|
|
89553
|
-
const
|
|
89548
|
+
const gpuClustering = device && new GpuClustering(device, points.numColumns, k);
|
|
89554
89549
|
const labels = new Uint32Array(points.numRows);
|
|
89555
89550
|
let converged = false;
|
|
89556
89551
|
let steps = 0;
|
|
89557
89552
|
console.log(`Running k-means clustering: dims=${points.numColumns} points=${points.numRows} clusters=${k} iterations=${iterations}...`);
|
|
89558
89553
|
while (!converged) {
|
|
89559
|
-
if (
|
|
89560
|
-
await
|
|
89554
|
+
if (gpuClustering) {
|
|
89555
|
+
await gpuClustering.execute(points, centroids, labels);
|
|
89561
89556
|
}
|
|
89562
89557
|
else {
|
|
89563
89558
|
clusterKdTreeCpu(points, centroids, labels);
|
|
@@ -89574,6 +89569,9 @@ const kmeans = async (points, k, iterations, device) => {
|
|
|
89574
89569
|
}
|
|
89575
89570
|
stdout.write('#');
|
|
89576
89571
|
}
|
|
89572
|
+
if (gpuClustering) {
|
|
89573
|
+
gpuClustering.destroy();
|
|
89574
|
+
}
|
|
89577
89575
|
console.log(' done 🎉');
|
|
89578
89576
|
return { centroids, labels };
|
|
89579
89577
|
};
|