@manycore/aholo-splat-transform 1.2.7 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CHANGELOG.md +120 -106
  2. package/COPYRIGHT.md +17 -0
  3. package/README.md +39 -39
  4. package/THIRD_PARTY_LICENSES.txt +1373 -0
  5. package/bin/cli.js +125 -118
  6. package/dist/SplatData.d.ts +67 -67
  7. package/dist/SplatData.js +167 -156
  8. package/dist/constant.d.ts +3 -3
  9. package/dist/constant.js +13 -13
  10. package/dist/file/IFile.d.ts +5 -5
  11. package/dist/file/IFile.js +1 -1
  12. package/dist/file/esz.d.ts +11 -0
  13. package/dist/file/esz.js +337 -0
  14. package/dist/file/index.d.ts +8 -7
  15. package/dist/file/index.js +7 -6
  16. package/dist/file/ksplat.d.ts +12 -12
  17. package/dist/file/ksplat.js +293 -232
  18. package/dist/file/lcc.d.ts +11 -11
  19. package/dist/file/lcc.js +161 -157
  20. package/dist/file/ply.d.ts +13 -13
  21. package/dist/file/ply.js +439 -388
  22. package/dist/file/sog.d.ts +80 -80
  23. package/dist/file/sog.js +525 -504
  24. package/dist/file/splat.d.ts +6 -6
  25. package/dist/file/splat.js +119 -99
  26. package/dist/file/spz.d.ts +11 -8
  27. package/dist/file/spz.js +597 -400
  28. package/dist/file/voxel.d.ts +43 -37
  29. package/dist/file/voxel.js +411 -280
  30. package/dist/index.d.ts +33 -33
  31. package/dist/index.js +54 -54
  32. package/dist/native/index.d.ts +54 -54
  33. package/dist/native/index.js +122 -128
  34. package/dist/native/utils.d.ts +1 -0
  35. package/dist/native/utils.js +54 -0
  36. package/dist/tasks/AutoChunkLodTask.d.ts +13 -13
  37. package/dist/tasks/AutoChunkLodTask.js +117 -117
  38. package/dist/tasks/AutoLodTask.d.ts +10 -10
  39. package/dist/tasks/AutoLodTask.js +20 -20
  40. package/dist/tasks/BaseTask.d.ts +15 -15
  41. package/dist/tasks/BaseTask.js +5 -5
  42. package/dist/tasks/FlexLodTask.d.ts +12 -12
  43. package/dist/tasks/FlexLodTask.js +54 -44
  44. package/dist/tasks/ModifyTask.d.ts +9 -9
  45. package/dist/tasks/ModifyTask.js +166 -156
  46. package/dist/tasks/ReadTask.d.ts +9 -9
  47. package/dist/tasks/ReadTask.js +29 -29
  48. package/dist/tasks/SkeletonLodTask.d.ts +10 -10
  49. package/dist/tasks/SkeletonLodTask.js +176 -156
  50. package/dist/tasks/VoxelTask.d.ts +35 -30
  51. package/dist/tasks/VoxelTask.js +40 -37
  52. package/dist/tasks/WriteTask.d.ts +12 -11
  53. package/dist/tasks/WriteTask.js +70 -70
  54. package/dist/utils/BufferReader.d.ts +12 -12
  55. package/dist/utils/BufferReader.js +45 -47
  56. package/dist/utils/Logger.d.ts +11 -11
  57. package/dist/utils/Logger.js +40 -38
  58. package/dist/utils/StreamChunkDecoder.d.ts +16 -16
  59. package/dist/utils/StreamChunkDecoder.js +31 -36
  60. package/dist/utils/index.d.ts +27 -27
  61. package/dist/utils/index.js +101 -101
  62. package/dist/utils/k-means.d.ts +4 -4
  63. package/dist/utils/k-means.js +340 -350
  64. package/dist/utils/math.d.ts +46 -46
  65. package/dist/utils/math.js +350 -351
  66. package/dist/utils/quantize-1d.d.ts +4 -4
  67. package/dist/utils/quantize-1d.js +164 -164
  68. package/dist/utils/sh-rotate.d.ts +2 -2
  69. package/dist/utils/sh-rotate.js +236 -175
  70. package/dist/utils/splat.d.ts +21 -20
  71. package/dist/utils/splat.js +397 -378
  72. package/dist/utils/voxel/binary.d.ts +8 -0
  73. package/dist/utils/voxel/binary.js +176 -0
  74. package/dist/utils/voxel/common.d.ts +178 -162
  75. package/dist/utils/voxel/common.js +1752 -1700
  76. package/dist/utils/voxel/coplanar-merge.d.ts +63 -63
  77. package/dist/utils/voxel/coplanar-merge.js +818 -819
  78. package/dist/utils/voxel/filter-cluster.d.ts +20 -0
  79. package/dist/utils/voxel/filter-cluster.js +628 -0
  80. package/dist/utils/voxel/gpu-dilation.d.ts +2 -2
  81. package/dist/utils/voxel/gpu-dilation.js +677 -665
  82. package/dist/utils/voxel/marching-cubes.d.ts +42 -42
  83. package/dist/utils/voxel/marching-cubes.js +1645 -1657
  84. package/dist/utils/voxel/mesh.d.ts +3 -3
  85. package/dist/utils/voxel/mesh.js +130 -130
  86. package/dist/utils/voxel/nav.d.ts +29 -29
  87. package/dist/utils/voxel/nav.js +1068 -1043
  88. package/dist/utils/voxel/postprocess.d.ts +23 -23
  89. package/dist/utils/voxel/postprocess.js +408 -375
  90. package/dist/utils/voxel/voxel-faces.d.ts +18 -18
  91. package/dist/utils/voxel/voxel-faces.js +662 -663
  92. package/dist/utils/voxel/voxelize.d.ts +34 -33
  93. package/dist/utils/voxel/voxelize.js +1208 -1193
  94. package/dist/utils/webgpu.d.ts +8 -8
  95. package/dist/utils/webgpu.js +122 -122
  96. package/package.json +37 -30
  97. package/dist/native/cpp/bin/linux/binding.node +0 -0
  98. package/dist/native/cpp/bin/windows/binding.node +0 -0
@@ -1,665 +1,677 @@
1
- import { getOrCreateDevice } from '../webgpu.js';
2
- import { BLOCK_EMPTY, BLOCK_MIXED, BLOCK_SOLID, SparseVoxelGrid, readBlockType } from './common.js';
3
- const GPU_BUFFER_USAGE_STORAGE = 128;
4
- const GPU_BUFFER_USAGE_COPY_DST = 8;
5
- const GPU_BUFFER_USAGE_COPY_SRC = 4;
6
- const GPU_BUFFER_USAGE_UNIFORM = 64;
7
- const GPU_BUFFER_USAGE_MAP_READ = 1;
8
- const GPU_MAP_MODE_READ = 1;
9
- const CHUNK_INNER = 512;
10
- const SOLID_WORD = 0x55555555 >>> 0;
11
- const extractWgsl = () => /* wgsl */ `
12
- struct ExtractUniforms {
13
- minBx: i32,
14
- minBy: i32,
15
- minBz: i32,
16
- outerBx: u32,
17
- outerBy: u32,
18
- outerBz: u32,
19
- numXWords: u32,
20
- srcNbx: u32,
21
- srcNby: u32,
22
- srcNbz: u32,
23
- srcBStride: u32,
24
- srcCapMinusOne: u32
25
- }
26
-
27
- @group(0) @binding(0) var<uniform> u: ExtractUniforms;
28
- @group(0) @binding(1) var<storage, read> srcTypes: array<u32>;
29
- @group(0) @binding(2) var<storage, read> srcKeys: array<u32>;
30
- @group(0) @binding(3) var<storage, read> srcLo: array<u32>;
31
- @group(0) @binding(4) var<storage, read> srcHi: array<u32>;
32
- @group(0) @binding(5) var<storage, read_write> dstDense: array<atomic<u32>>;
33
-
34
- @compute @workgroup_size(8, 4, 8)
35
- fn main(@builtin(global_invocation_id) gid: vec3u) {
36
- if (gid.x >= u.outerBx || gid.y >= u.outerBy || gid.z >= u.outerBz) { return; }
37
-
38
- let chunkBx = i32(gid.x);
39
- let chunkBy = i32(gid.y);
40
- let chunkBz = i32(gid.z);
41
- let globalBx = u.minBx + chunkBx;
42
- let globalBy = u.minBy + chunkBy;
43
- let globalBz = u.minBz + chunkBz;
44
- if (globalBx < 0 || globalBy < 0 || globalBz < 0) { return; }
45
- if (globalBx >= i32(u.srcNbx) || globalBy >= i32(u.srcNby) || globalBz >= i32(u.srcNbz)) { return; }
46
-
47
- let blockIdx = u32(globalBx) + u32(globalBy) * u.srcNbx + u32(globalBz) * u.srcBStride;
48
- let typeWord = srcTypes[blockIdx >> 4u];
49
- let bt = (typeWord >> ((blockIdx & 15u) * 2u)) & 3u;
50
- if (bt == 0u) { return; }
51
-
52
- var lo: u32;
53
- var hi: u32;
54
- if (bt == 1u) {
55
- lo = 0xFFFFFFFFu;
56
- hi = 0xFFFFFFFFu;
57
- } else {
58
- var i = (blockIdx * 0x9E3779B9u) & u.srcCapMinusOne;
59
- loop {
60
- let k = srcKeys[i];
61
- if (k == blockIdx) {
62
- lo = srcLo[i];
63
- hi = srcHi[i];
64
- break;
65
- }
66
- if (k == 0xFFFFFFFFu) { return; }
67
- i = (i + 1u) & u.srcCapMinusOne;
68
- }
69
- }
70
-
71
- let dx0 = u32(chunkBx) * 4u;
72
- let wordOffsetX = dx0 / 32u;
73
- let bitShiftX = dx0 & 31u;
74
- let outerNy = u.outerBy * 4u;
75
- let planeWords = u.numXWords * outerNy;
76
-
77
- for (var lz = 0u; lz < 4u; lz = lz + 1u) {
78
- let dz = u32(chunkBz) * 4u + lz;
79
- let zBitBase = (lz & 1u) * 16u;
80
- let word = select(lo, hi, lz >= 2u);
81
- for (var ly = 0u; ly < 4u; ly = ly + 1u) {
82
- let dy = u32(chunkBy) * 4u + ly;
83
- let bitBase = zBitBase + ly * 4u;
84
- let pattern = (word >> bitBase) & 0xFu;
85
- if (pattern == 0u) { continue; }
86
- let wordIdx = wordOffsetX + dy * u.numXWords + dz * planeWords;
87
- atomicOr(&dstDense[wordIdx], pattern << bitShiftX);
88
- }
89
- }
90
- }
91
- `;
92
- const compactWgsl = () => /* wgsl */ `
93
- struct CompactUniforms {
94
- haloBx: u32,
95
- haloBy: u32,
96
- haloBz: u32,
97
- numXWords: u32,
98
- innerBx: u32,
99
- innerBy: u32,
100
- innerBz: u32,
101
- outerBy: u32
102
- }
103
-
104
- @group(0) @binding(0) var<uniform> u: CompactUniforms;
105
- @group(0) @binding(1) var<storage, read> dilatedDense: array<u32>;
106
- @group(0) @binding(2) var<storage, read_write> typesOut: array<atomic<u32>>;
107
- @group(0) @binding(3) var<storage, read_write> masksOut: array<u32>;
108
-
109
- @compute @workgroup_size(8, 4, 8)
110
- fn main(@builtin(global_invocation_id) gid: vec3u) {
111
- if (gid.x >= u.innerBx || gid.y >= u.innerBy || gid.z >= u.innerBz) { return; }
112
-
113
- let innerBlockIdx = gid.x + gid.y * u.innerBx + gid.z * u.innerBx * u.innerBy;
114
- let outerBx = gid.x + u.haloBx;
115
- let outerBy = gid.y + u.haloBy;
116
- let outerBz = gid.z + u.haloBz;
117
- let dx0 = outerBx * 4u;
118
- let wordOffsetX = dx0 / 32u;
119
- let bitShiftX = dx0 & 31u;
120
- let outerNy = u.outerBy * 4u;
121
- let planeWords = u.numXWords * outerNy;
122
-
123
- var lo = 0u;
124
- var hi = 0u;
125
- for (var lz = 0u; lz < 4u; lz = lz + 1u) {
126
- let dz = outerBz * 4u + lz;
127
- let zBitBase = (lz & 1u) * 16u;
128
- let inHi = lz >= 2u;
129
- let planeBase = dz * planeWords;
130
- for (var ly = 0u; ly < 4u; ly = ly + 1u) {
131
- let dy = outerBy * 4u + ly;
132
- let bitBase = zBitBase + ly * 4u;
133
- let wordIdx = wordOffsetX + dy * u.numXWords + planeBase;
134
- let pattern = (dilatedDense[wordIdx] >> bitShiftX) & 0xFu;
135
- let bits = pattern << bitBase;
136
- if (inHi) { hi = hi | bits; } else { lo = lo | bits; }
137
- }
138
- }
139
-
140
- masksOut[innerBlockIdx * 2u] = lo;
141
- masksOut[innerBlockIdx * 2u + 1u] = hi;
142
-
143
- var bt = 0u;
144
- if (lo != 0u || hi != 0u) {
145
- if (lo == 0xFFFFFFFFu && hi == 0xFFFFFFFFu) { bt = 1u; } else { bt = 2u; }
146
- }
147
- let typeWordIdx = innerBlockIdx >> 4u;
148
- let typeBitShift = (innerBlockIdx & 15u) * 2u;
149
- atomicOr(&typesOut[typeWordIdx], bt << typeBitShift);
150
- }
151
- `;
152
- const dilateXWgsl = () => /* wgsl */ `
153
- struct DilateXUniforms {
154
- numXWords: u32,
155
- ny: u32,
156
- nz: u32,
157
- halfExtent: u32
158
- }
159
-
160
- @group(0) @binding(0) var<uniform> u: DilateXUniforms;
161
- @group(0) @binding(1) var<storage, read> src: array<u32>;
162
- @group(0) @binding(2) var<storage, read_write> dst: array<u32>;
163
-
164
- fn readWord(rowOffset: u32, word: i32) -> u32 {
165
- if (word < 0 || word >= i32(u.numXWords)) { return 0u; }
166
- return src[rowOffset + u32(word)];
167
- }
168
-
169
- @compute @workgroup_size(8, 4, 8)
170
- fn main(@builtin(global_invocation_id) gid: vec3u) {
171
- if (gid.x >= u.numXWords || gid.y >= u.ny || gid.z >= u.nz) { return; }
172
-
173
- let xWord = gid.x;
174
- let y = gid.y;
175
- let z = gid.z;
176
- let rowStride = u.numXWords;
177
- let planeStride = rowStride * u.ny;
178
- let rowOffset = y * rowStride + z * planeStride;
179
- var output = src[rowOffset + xWord];
180
- let rowBits = u.numXWords * 32u;
181
- let r = min(u.halfExtent, rowBits);
182
- for (var d = 1u; d <= r; d = d + 1u) {
183
- let wordOffset = i32(d >> 5u);
184
- let bitShift = d & 31u;
185
- let baseWord = i32(xWord);
186
- var shiftedPos = readWord(rowOffset, baseWord + wordOffset);
187
- if (bitShift != 0u) {
188
- shiftedPos = (shiftedPos >> bitShift) | (readWord(rowOffset, baseWord + wordOffset + 1) << (32u - bitShift));
189
- }
190
- var shiftedNeg = readWord(rowOffset, baseWord - wordOffset);
191
- if (bitShift != 0u) {
192
- shiftedNeg = (shiftedNeg << bitShift) | (readWord(rowOffset, baseWord - wordOffset - 1) >> (32u - bitShift));
193
- }
194
- output = output | shiftedPos | shiftedNeg;
195
- if (output == 0xFFFFFFFFu) { break; }
196
- }
197
- dst[rowOffset + xWord] = output;
198
- }
199
- `;
200
- const dilateYZWgsl = () => /* wgsl */ `
201
- struct DilateYZUniforms {
202
- numXWords: u32,
203
- ny: u32,
204
- nz: u32,
205
- halfExtent: u32,
206
- stride: u32,
207
- axisLen: u32
208
- }
209
-
210
- @group(0) @binding(0) var<uniform> u: DilateYZUniforms;
211
- @group(0) @binding(1) var<storage, read> src: array<u32>;
212
- @group(0) @binding(2) var<storage, read_write> dst: array<u32>;
213
-
214
- @compute @workgroup_size(8, 4, 8)
215
- fn main(@builtin(global_invocation_id) gid: vec3u) {
216
- if (gid.x >= u.numXWords || gid.y >= u.ny || gid.z >= u.nz) { return; }
217
-
218
- let xWord = gid.x;
219
- let y = gid.y;
220
- let z = gid.z;
221
- let rowStride = u.numXWords;
222
- let planeStride = rowStride * u.ny;
223
- let outIdx = i32(xWord) + i32(y) * i32(rowStride) + i32(z) * i32(planeStride);
224
- let pos = select(z, y, u.stride == rowStride);
225
- let r = i32(u.halfExtent);
226
- let lo = max(0, i32(pos) - r);
227
- let hi = min(i32(u.axisLen) - 1, i32(pos) + r);
228
- let baseIdx = outIdx - i32(pos) * i32(u.stride);
229
- var output = 0u;
230
- for (var p = lo; p <= hi; p = p + 1) {
231
- output = output | src[baseIdx + p * i32(u.stride)];
232
- if (output == 0xFFFFFFFFu) { break; }
233
- }
234
- dst[outIdx] = output;
235
- }
236
- `;
237
- const makeBuffer = (device, size, usage) => (device.createBuffer({ size: Math.max(4, size), usage }));
238
- const writeUniform = (device, values) => {
239
- const buffer = makeBuffer(device, 256, GPU_BUFFER_USAGE_UNIFORM | GPU_BUFFER_USAGE_COPY_DST);
240
- device.queue.writeBuffer(buffer, 0, values.buffer, values.byteOffset, values.byteLength);
241
- return buffer;
242
- };
243
- const createStoragePipeline = (device, code) => (device.createComputePipeline({
244
- layout: 'auto',
245
- compute: { module: device.createShaderModule({ code }), entryPoint: 'main' }
246
- }));
247
- const blockAlignedExtent = (halfExtent) => (halfExtent === 0 ? 0 : Math.ceil(halfExtent / 4) * 4);
248
- const chunkIsEmpty = (src, ox, oy, oz, cx, cy, cz) => {
249
- const minBx = Math.max(0, Math.floor(ox / 4));
250
- const minBy = Math.max(0, Math.floor(oy / 4));
251
- const minBz = Math.max(0, Math.floor(oz / 4));
252
- const maxBx = Math.min(src.nbx, Math.ceil((ox + cx) / 4));
253
- const maxBy = Math.min(src.nby, Math.ceil((oy + cy) / 4));
254
- const maxBz = Math.min(src.nbz, Math.ceil((oz + cz) / 4));
255
- if (maxBx <= minBx || maxBy <= minBy || maxBz <= minBz) {
256
- return true;
257
- }
258
- for (let bz = minBz; bz < maxBz; bz++) {
259
- for (let by = minBy; by < maxBy; by++) {
260
- for (let bx = minBx; bx < maxBx; bx++) {
261
- const blockIdx = bx + by * src.nbx + bz * src.bStride;
262
- if (readBlockType(src.types, blockIdx) !== BLOCK_EMPTY) {
263
- return false;
264
- }
265
- }
266
- }
267
- }
268
- return true;
269
- };
270
- const chunkIsSaturated = (src, ox, oy, oz, cx, cy, cz) => {
271
- if (ox < 0 || oy < 0 || oz < 0) {
272
- return false;
273
- }
274
- if (ox + cx > src.nx || oy + cy > src.ny || oz + cz > src.nz) {
275
- return false;
276
- }
277
- const minBx = ox >> 2;
278
- const minBy = oy >> 2;
279
- const minBz = oz >> 2;
280
- const maxBx = (ox + cx + 3) >> 2;
281
- const maxBy = (oy + cy + 3) >> 2;
282
- const maxBz = (oz + cz + 3) >> 2;
283
- for (let bz = minBz; bz < maxBz; bz++) {
284
- for (let by = minBy; by < maxBy; by++) {
285
- for (let bx = minBx; bx < maxBx; bx++) {
286
- const blockIdx = bx + by * src.nbx + bz * src.bStride;
287
- if (readBlockType(src.types, blockIdx) !== BLOCK_SOLID) {
288
- return false;
289
- }
290
- }
291
- }
292
- }
293
- return true;
294
- };
295
- const insertSaturatedInner = (dst, innerOx, innerOy, innerOz, innerCx, innerCy, innerCz) => {
296
- const minBx = Math.max(0, innerOx >> 2);
297
- const minBy = Math.max(0, innerOy >> 2);
298
- const minBz = Math.max(0, innerOz >> 2);
299
- const maxBx = Math.min(dst.nbx, (innerOx + innerCx + 3) >> 2);
300
- const maxBy = Math.min(dst.nby, (innerOy + innerCy + 3) >> 2);
301
- const maxBz = Math.min(dst.nbz, (innerOz + innerCz + 3) >> 2);
302
- for (let bz = minBz; bz < maxBz; bz++) {
303
- for (let by = minBy; by < maxBy; by++) {
304
- const rowBase = by * dst.nbx + bz * dst.bStride;
305
- let blockIdx = rowBase + minBx;
306
- const endIdx = rowBase + maxBx;
307
- while (blockIdx < endIdx) {
308
- const w = blockIdx >>> 4;
309
- const shift = (blockIdx & 15) << 1;
310
- const remainingInWord = 16 - (blockIdx & 15);
311
- const remainingInRow = endIdx - blockIdx;
312
- const blocksToWrite = Math.min(remainingInWord, remainingInRow);
313
- if (blocksToWrite === 16) {
314
- dst.types[w] = SOLID_WORD;
315
- }
316
- else {
317
- const bits = blocksToWrite << 1;
318
- const mask = (((1 << bits) - 1) >>> 0) << shift;
319
- dst.types[w] = ((dst.types[w] & ~mask) | (SOLID_WORD & mask)) >>> 0;
320
- }
321
- blockIdx += blocksToWrite;
322
- }
323
- }
324
- }
325
- };
326
- const applyChunkToDst = (dst, typesOut, masksOut, cx, cy, cz, innerNx, innerNy, innerNz) => {
327
- const innerBx = innerNx >> 2;
328
- const innerBy = innerNy >> 2;
329
- const innerBz = innerNz >> 2;
330
- const baseBx = cx >> 2;
331
- const baseBy = cy >> 2;
332
- const baseBz = cz >> 2;
333
- let innerIdx = 0;
334
- for (let bz = 0; bz < innerBz; bz++) {
335
- const globalBz = baseBz + bz;
336
- for (let by = 0; by < innerBy; by++) {
337
- const globalBy = baseBy + by;
338
- const baseGlobalIdx = baseBx + globalBy * dst.nbx + globalBz * dst.bStride;
339
- for (let bx = 0; bx < innerBx; bx++, innerIdx++) {
340
- const wordIdx = innerIdx >>> 4;
341
- const bitShift = (innerIdx & 15) << 1;
342
- const bt = (typesOut[wordIdx] >>> bitShift) & 3;
343
- if (bt === BLOCK_EMPTY) {
344
- continue;
345
- }
346
- const globalBlockIdx = baseGlobalIdx + bx;
347
- const w = globalBlockIdx >>> 4;
348
- const shift = (globalBlockIdx & 15) << 1;
349
- dst.types[w] |= bt << shift;
350
- if (bt === BLOCK_MIXED) {
351
- const m2 = innerIdx * 2;
352
- dst.masks.set(globalBlockIdx, masksOut[m2], masksOut[m2 + 1]);
353
- }
354
- }
355
- }
356
- }
357
- };
358
- class GpuDilation {
359
- device;
360
- extractPipeline;
361
- compactPipeline;
362
- dilateXPipeline;
363
- dilateYZPipeline;
364
- slots = [];
365
- srcTypesBuffer;
366
- srcKeysBuffer;
367
- srcLoBuffer;
368
- srcHiBuffer;
369
- srcMeta = { nbx: 0, nby: 0, nbz: 0, bStride: 0, capMinusOne: 0 };
370
- static NUM_SLOTS = 2;
371
- constructor(device) {
372
- this.device = device;
373
- this.extractPipeline = createStoragePipeline(device, extractWgsl());
374
- this.compactPipeline = createStoragePipeline(device, compactWgsl());
375
- this.dilateXPipeline = createStoragePipeline(device, dilateXWgsl());
376
- this.dilateYZPipeline = createStoragePipeline(device, dilateYZWgsl());
377
- for (let i = 0; i < GpuDilation.NUM_SLOTS; i++) {
378
- const capacity = 1024 * 1024 * 4;
379
- const typesOutCapacity = 64 * 1024;
380
- const masksOutCapacity = 1024 * 1024;
381
- this.slots.push({
382
- bufferA: makeBuffer(device, capacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
383
- bufferB: makeBuffer(device, capacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
384
- readTypesBuffer: makeBuffer(device, typesOutCapacity, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ),
385
- readMasksBuffer: makeBuffer(device, masksOutCapacity, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ),
386
- typesOutBuffer: makeBuffer(device, typesOutCapacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
387
- masksOutBuffer: makeBuffer(device, masksOutCapacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
388
- capacity,
389
- typesOutCapacity,
390
- masksOutCapacity
391
- });
392
- }
393
- }
394
- replaceBuffer(slot, key, size, usage) {
395
- slot[key].destroy();
396
- slot[key] = makeBuffer(this.device, size, usage);
397
- }
398
- ensureSlotBuffers(slot, numWords) {
399
- const neededBytes = numWords * 4;
400
- if (neededBytes <= slot.capacity) {
401
- return;
402
- }
403
- let cap = slot.capacity;
404
- while (cap < neededBytes) {
405
- cap *= 2;
406
- }
407
- this.replaceBuffer(slot, 'bufferA', cap, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
408
- this.replaceBuffer(slot, 'bufferB', cap, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
409
- slot.capacity = cap;
410
- }
411
- ensureSlotOutputBuffers(slot, innerBlocks) {
412
- const typesBytes = ((innerBlocks + 15) >>> 4) * 4;
413
- if (slot.typesOutCapacity < typesBytes) {
414
- this.replaceBuffer(slot, 'typesOutBuffer', typesBytes, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
415
- this.replaceBuffer(slot, 'readTypesBuffer', typesBytes, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ);
416
- slot.typesOutCapacity = typesBytes;
417
- }
418
- const masksBytes = innerBlocks * 8;
419
- if (slot.masksOutCapacity < masksBytes) {
420
- this.replaceBuffer(slot, 'masksOutBuffer', masksBytes, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
421
- this.replaceBuffer(slot, 'readMasksBuffer', masksBytes, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ);
422
- slot.masksOutCapacity = masksBytes;
423
- }
424
- }
425
- uploadSrc(src) {
426
- this.releaseSrc();
427
- this.srcTypesBuffer = makeBuffer(this.device, src.types.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
428
- this.device.queue.writeBuffer(this.srcTypesBuffer, 0, src.types.buffer, src.types.byteOffset, src.types.byteLength);
429
- const keysU32 = new Uint32Array(src.masks.keys.buffer, src.masks.keys.byteOffset, src.masks.keys.length);
430
- this.srcKeysBuffer = makeBuffer(this.device, keysU32.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
431
- this.srcLoBuffer = makeBuffer(this.device, src.masks.lo.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
432
- this.srcHiBuffer = makeBuffer(this.device, src.masks.hi.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
433
- this.device.queue.writeBuffer(this.srcKeysBuffer, 0, keysU32.buffer, keysU32.byteOffset, keysU32.byteLength);
434
- this.device.queue.writeBuffer(this.srcLoBuffer, 0, src.masks.lo.buffer, src.masks.lo.byteOffset, src.masks.lo.byteLength);
435
- this.device.queue.writeBuffer(this.srcHiBuffer, 0, src.masks.hi.buffer, src.masks.hi.byteOffset, src.masks.hi.byteLength);
436
- this.srcMeta = {
437
- nbx: src.nbx,
438
- nby: src.nby,
439
- nbz: src.nbz,
440
- bStride: src.bStride,
441
- capMinusOne: src.masks.keys.length - 1
442
- };
443
- }
444
- releaseSrc() {
445
- this.srcTypesBuffer?.destroy();
446
- this.srcKeysBuffer?.destroy();
447
- this.srcLoBuffer?.destroy();
448
- this.srcHiBuffer?.destroy();
449
- this.srcTypesBuffer = undefined;
450
- this.srcKeysBuffer = undefined;
451
- this.srcLoBuffer = undefined;
452
- this.srcHiBuffer = undefined;
453
- }
454
- submitChunkSparse(slotIdx, minBx, minBy, minBz, outerBx, outerBy, outerBz, haloBx, haloBy, haloBz, innerBx, innerBy, innerBz, halfExtentXZ, halfExtentY) {
455
- if (!this.srcTypesBuffer || !this.srcKeysBuffer || !this.srcLoBuffer || !this.srcHiBuffer) {
456
- throw new Error('GpuDilation: must call uploadSrc() before submitChunkSparse()');
457
- }
458
- const slot = this.slots[slotIdx];
459
- const outerNx = outerBx * 4;
460
- const outerNy = outerBy * 4;
461
- const outerNz = outerBz * 4;
462
- const numXWords = (outerNx + 31) >>> 5;
463
- const numWords = numXWords * outerNy * outerNz;
464
- const innerBlocks = innerBx * innerBy * innerBz;
465
- const typesOutWords = (innerBlocks + 15) >>> 4;
466
- this.ensureSlotBuffers(slot, numWords);
467
- this.ensureSlotOutputBuffers(slot, innerBlocks);
468
- const uniformBuffers = [];
469
- const makeUniform = (values) => {
470
- const buffer = writeUniform(this.device, values);
471
- uniformBuffers.push(buffer);
472
- return buffer;
473
- };
474
- {
475
- const encoder = this.device.createCommandEncoder();
476
- encoder.clearBuffer(slot.bufferA, 0, numWords * 4);
477
- const uniforms = new Uint32Array([
478
- minBx >>> 0, minBy >>> 0, minBz >>> 0,
479
- outerBx, outerBy, outerBz, numXWords,
480
- this.srcMeta.nbx, this.srcMeta.nby, this.srcMeta.nbz,
481
- this.srcMeta.bStride, this.srcMeta.capMinusOne
482
- ]);
483
- const uniformBuffer = makeUniform(uniforms);
484
- const bindGroup = this.device.createBindGroup({
485
- layout: this.extractPipeline.getBindGroupLayout(0),
486
- entries: [
487
- { binding: 0, resource: { buffer: uniformBuffer } },
488
- { binding: 1, resource: { buffer: this.srcTypesBuffer } },
489
- { binding: 2, resource: { buffer: this.srcKeysBuffer } },
490
- { binding: 3, resource: { buffer: this.srcLoBuffer } },
491
- { binding: 4, resource: { buffer: this.srcHiBuffer } },
492
- { binding: 5, resource: { buffer: slot.bufferA } }
493
- ]
494
- });
495
- const pass = encoder.beginComputePass();
496
- pass.setPipeline(this.extractPipeline);
497
- pass.setBindGroup(0, bindGroup);
498
- pass.dispatchWorkgroups(Math.ceil(outerBx / 8), Math.ceil(outerBy / 4), Math.ceil(outerBz / 8));
499
- pass.end();
500
- this.device.queue.submit([encoder.finish()]);
501
- }
502
- {
503
- const encoder = this.device.createCommandEncoder();
504
- const dispatch = (pipeline, src, dst, uniforms, wgX, wgY, wgZ) => {
505
- const uniformBuffer = makeUniform(uniforms);
506
- const bindGroup = this.device.createBindGroup({
507
- layout: pipeline.getBindGroupLayout(0),
508
- entries: [
509
- { binding: 0, resource: { buffer: uniformBuffer } },
510
- { binding: 1, resource: { buffer: src } },
511
- { binding: 2, resource: { buffer: dst } }
512
- ]
513
- });
514
- const pass = encoder.beginComputePass();
515
- pass.setPipeline(pipeline);
516
- pass.setBindGroup(0, bindGroup);
517
- pass.dispatchWorkgroups(wgX, wgY, wgZ);
518
- pass.end();
519
- };
520
- dispatch(this.dilateXPipeline, slot.bufferA, slot.bufferB, new Uint32Array([numXWords, outerNy, outerNz, halfExtentXZ]), Math.ceil(numXWords / 8), Math.ceil(outerNy / 4), Math.ceil(outerNz / 8));
521
- dispatch(this.dilateYZPipeline, slot.bufferB, slot.bufferA, new Uint32Array([numXWords, outerNy, outerNz, halfExtentXZ, numXWords * outerNy, outerNz]), Math.ceil(numXWords / 8), Math.ceil(outerNy / 4), Math.ceil(outerNz / 8));
522
- dispatch(this.dilateYZPipeline, slot.bufferA, slot.bufferB, new Uint32Array([numXWords, outerNy, outerNz, halfExtentY, numXWords, outerNy]), Math.ceil(numXWords / 8), Math.ceil(outerNy / 4), Math.ceil(outerNz / 8));
523
- encoder.clearBuffer(slot.typesOutBuffer, 0, typesOutWords * 4);
524
- const compactUniformBuffer = makeUniform(new Uint32Array([
525
- haloBx, haloBy, haloBz, numXWords, innerBx, innerBy, innerBz, outerBy
526
- ]));
527
- const compactBindGroup = this.device.createBindGroup({
528
- layout: this.compactPipeline.getBindGroupLayout(0),
529
- entries: [
530
- { binding: 0, resource: { buffer: compactUniformBuffer } },
531
- { binding: 1, resource: { buffer: slot.bufferB } },
532
- { binding: 2, resource: { buffer: slot.typesOutBuffer } },
533
- { binding: 3, resource: { buffer: slot.masksOutBuffer } }
534
- ]
535
- });
536
- const pass = encoder.beginComputePass();
537
- pass.setPipeline(this.compactPipeline);
538
- pass.setBindGroup(0, compactBindGroup);
539
- pass.dispatchWorkgroups(Math.ceil(innerBx / 8), Math.ceil(innerBy / 4), Math.ceil(innerBz / 8));
540
- pass.end();
541
- encoder.copyBufferToBuffer(slot.typesOutBuffer, 0, slot.readTypesBuffer, 0, typesOutWords * 4);
542
- encoder.copyBufferToBuffer(slot.masksOutBuffer, 0, slot.readMasksBuffer, 0, innerBlocks * 8);
543
- this.device.queue.submit([encoder.finish()]);
544
- }
545
- const typesPromise = (async () => {
546
- await slot.readTypesBuffer.mapAsync(GPU_MAP_MODE_READ, 0, typesOutWords * 4);
547
- const mapped = new Uint32Array(slot.readTypesBuffer.getMappedRange(0, typesOutWords * 4));
548
- const out = new Uint32Array(typesOutWords);
549
- out.set(mapped);
550
- slot.readTypesBuffer.unmap();
551
- return out;
552
- })();
553
- const masksPromise = (async () => {
554
- await slot.readMasksBuffer.mapAsync(GPU_MAP_MODE_READ, 0, innerBlocks * 8);
555
- const mapped = new Uint32Array(slot.readMasksBuffer.getMappedRange(0, innerBlocks * 8));
556
- const out = new Uint32Array(innerBlocks * 2);
557
- out.set(mapped);
558
- slot.readMasksBuffer.unmap();
559
- return out;
560
- })();
561
- void Promise.all([typesPromise, masksPromise]).then(() => {
562
- for (const buffer of uniformBuffers) {
563
- buffer.destroy();
564
- }
565
- });
566
- return { types: typesPromise, masks: masksPromise };
567
- }
568
- destroy() {
569
- this.releaseSrc();
570
- for (const slot of this.slots) {
571
- slot.bufferA.destroy();
572
- slot.bufferB.destroy();
573
- slot.readTypesBuffer.destroy();
574
- slot.readMasksBuffer.destroy();
575
- slot.typesOutBuffer.destroy();
576
- slot.masksOutBuffer.destroy();
577
- }
578
- }
579
- }
580
- export const gpuDilate3 = async (src, halfExtentXZ, halfExtentY) => {
581
- if (halfExtentXZ === 0 && halfExtentY === 0) {
582
- return src.clone();
583
- }
584
- if (!Number.isInteger(halfExtentXZ) || halfExtentXZ < 0) {
585
- throw new Error(`gpuDilate3: halfExtentXZ=${halfExtentXZ} must be a non-negative integer`);
586
- }
587
- if (!Number.isInteger(halfExtentY) || halfExtentY < 0) {
588
- throw new Error(`gpuDilate3: halfExtentY=${halfExtentY} must be a non-negative integer`);
589
- }
590
- const device = await getOrCreateDevice();
591
- const gpu = new GpuDilation(device);
592
- const dst = new SparseVoxelGrid(src.nx, src.ny, src.nz);
593
- const haloX = blockAlignedExtent(halfExtentXZ);
594
- const haloY = blockAlignedExtent(halfExtentY);
595
- const haloZ = haloX;
596
- const haloBx = haloX / 4;
597
- const haloBy = haloY / 4;
598
- const haloBz = haloZ / 4;
599
- const innerStep = CHUNK_INNER & ~3;
600
- let currentSlot = 0;
601
- let inflight;
602
- const drainInflight = async () => {
603
- if (!inflight) {
604
- return;
605
- }
606
- const f = inflight;
607
- inflight = undefined;
608
- const [typesOut, masksOut] = await Promise.all([f.typesPromise, f.masksPromise]);
609
- applyChunkToDst(dst, typesOut, masksOut, f.cx, f.cy, f.cz, f.innerNx, f.innerNy, f.innerNz);
610
- };
611
- gpu.uploadSrc(src);
612
- try {
613
- for (let cz = 0; cz < src.nz; cz += innerStep) {
614
- for (let cy = 0; cy < src.ny; cy += innerStep) {
615
- for (let cx = 0; cx < src.nx; cx += innerStep) {
616
- const innerNx = Math.min(innerStep, src.nx - cx);
617
- const innerNy = Math.min(innerStep, src.ny - cy);
618
- const innerNz = Math.min(innerStep, src.nz - cz);
619
- const ox = cx - haloX;
620
- const oy = cy - haloY;
621
- const oz = cz - haloZ;
622
- const outerNx = innerNx + 2 * haloX;
623
- const outerNy = innerNy + 2 * haloY;
624
- const outerNz = innerNz + 2 * haloZ;
625
- if (chunkIsEmpty(src, ox, oy, oz, outerNx, outerNy, outerNz)) {
626
- continue;
627
- }
628
- if (chunkIsSaturated(src, ox, oy, oz, outerNx, outerNy, outerNz)) {
629
- insertSaturatedInner(dst, cx, cy, cz, innerNx, innerNy, innerNz);
630
- continue;
631
- }
632
- const innerBx = innerNx >> 2;
633
- const innerBy = innerNy >> 2;
634
- const innerBz = innerNz >> 2;
635
- const outerBx = outerNx >> 2;
636
- const outerBy = outerNy >> 2;
637
- const outerBz = outerNz >> 2;
638
- const minBx = Math.floor(ox / 4);
639
- const minBy = Math.floor(oy / 4);
640
- const minBz = Math.floor(oz / 4);
641
- const { types, masks } = gpu.submitChunkSparse(currentSlot, minBx, minBy, minBz, outerBx, outerBy, outerBz, haloBx, haloBy, haloBz, innerBx, innerBy, innerBz, halfExtentXZ, halfExtentY);
642
- if (inflight) {
643
- await drainInflight();
644
- }
645
- inflight = {
646
- typesPromise: types,
647
- masksPromise: masks,
648
- cx,
649
- cy,
650
- cz,
651
- innerNx,
652
- innerNy,
653
- innerNz
654
- };
655
- currentSlot = (currentSlot + 1) % GpuDilation.NUM_SLOTS;
656
- }
657
- }
658
- }
659
- await drainInflight();
660
- }
661
- finally {
662
- gpu.destroy();
663
- }
664
- return dst;
665
- };
1
+ import { getOrCreateDevice } from '../webgpu.js';
2
+ import { BLOCK_EMPTY, BLOCK_MIXED, BLOCK_SOLID, SparseVoxelGrid, readBlockType } from './common.js';
3
+ const GPU_BUFFER_USAGE_STORAGE = 128;
4
+ const GPU_BUFFER_USAGE_COPY_DST = 8;
5
+ const GPU_BUFFER_USAGE_COPY_SRC = 4;
6
+ const GPU_BUFFER_USAGE_UNIFORM = 64;
7
+ const GPU_BUFFER_USAGE_MAP_READ = 1;
8
+ const GPU_MAP_MODE_READ = 1;
9
+ const CHUNK_INNER = 512;
10
+ const SOLID_WORD = 0x55555555 >>> 0;
11
+ function extractWgsl() {
12
+ return /* wgsl */ `
13
+ struct ExtractUniforms {
14
+ minBx: i32,
15
+ minBy: i32,
16
+ minBz: i32,
17
+ outerBx: u32,
18
+ outerBy: u32,
19
+ outerBz: u32,
20
+ numXWords: u32,
21
+ srcNbx: u32,
22
+ srcNby: u32,
23
+ srcNbz: u32,
24
+ srcBStride: u32,
25
+ srcCapMinusOne: u32
26
+ }
27
+
28
+ @group(0) @binding(0) var<uniform> u: ExtractUniforms;
29
+ @group(0) @binding(1) var<storage, read> srcTypes: array<u32>;
30
+ @group(0) @binding(2) var<storage, read> srcKeys: array<u32>;
31
+ @group(0) @binding(3) var<storage, read> srcLo: array<u32>;
32
+ @group(0) @binding(4) var<storage, read> srcHi: array<u32>;
33
+ @group(0) @binding(5) var<storage, read_write> dstDense: array<atomic<u32>>;
34
+
35
+ @compute @workgroup_size(8, 4, 8)
36
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
37
+ if (gid.x >= u.outerBx || gid.y >= u.outerBy || gid.z >= u.outerBz) { return; }
38
+
39
+ let chunkBx = i32(gid.x);
40
+ let chunkBy = i32(gid.y);
41
+ let chunkBz = i32(gid.z);
42
+ let globalBx = u.minBx + chunkBx;
43
+ let globalBy = u.minBy + chunkBy;
44
+ let globalBz = u.minBz + chunkBz;
45
+ if (globalBx < 0 || globalBy < 0 || globalBz < 0) { return; }
46
+ if (globalBx >= i32(u.srcNbx) || globalBy >= i32(u.srcNby) || globalBz >= i32(u.srcNbz)) { return; }
47
+
48
+ let blockIdx = u32(globalBx) + u32(globalBy) * u.srcNbx + u32(globalBz) * u.srcBStride;
49
+ let typeWord = srcTypes[blockIdx >> 4u];
50
+ let bt = (typeWord >> ((blockIdx & 15u) * 2u)) & 3u;
51
+ if (bt == 0u) { return; }
52
+
53
+ var lo: u32;
54
+ var hi: u32;
55
+ if (bt == 1u) {
56
+ lo = 0xFFFFFFFFu;
57
+ hi = 0xFFFFFFFFu;
58
+ } else {
59
+ var i = (blockIdx * 0x9E3779B9u) & u.srcCapMinusOne;
60
+ loop {
61
+ let k = srcKeys[i];
62
+ if (k == blockIdx) {
63
+ lo = srcLo[i];
64
+ hi = srcHi[i];
65
+ break;
66
+ }
67
+ if (k == 0xFFFFFFFFu) { return; }
68
+ i = (i + 1u) & u.srcCapMinusOne;
69
+ }
70
+ }
71
+
72
+ let dx0 = u32(chunkBx) * 4u;
73
+ let wordOffsetX = dx0 / 32u;
74
+ let bitShiftX = dx0 & 31u;
75
+ let outerNy = u.outerBy * 4u;
76
+ let planeWords = u.numXWords * outerNy;
77
+
78
+ for (var lz = 0u; lz < 4u; lz = lz + 1u) {
79
+ let dz = u32(chunkBz) * 4u + lz;
80
+ let zBitBase = (lz & 1u) * 16u;
81
+ let word = select(lo, hi, lz >= 2u);
82
+ for (var ly = 0u; ly < 4u; ly = ly + 1u) {
83
+ let dy = u32(chunkBy) * 4u + ly;
84
+ let bitBase = zBitBase + ly * 4u;
85
+ let pattern = (word >> bitBase) & 0xFu;
86
+ if (pattern == 0u) { continue; }
87
+ let wordIdx = wordOffsetX + dy * u.numXWords + dz * planeWords;
88
+ atomicOr(&dstDense[wordIdx], pattern << bitShiftX);
89
+ }
90
+ }
91
+ }
92
+ `;
93
+ }
94
+ function compactWgsl() {
95
+ return /* wgsl */ `
96
+ struct CompactUniforms {
97
+ haloBx: u32,
98
+ haloBy: u32,
99
+ haloBz: u32,
100
+ numXWords: u32,
101
+ innerBx: u32,
102
+ innerBy: u32,
103
+ innerBz: u32,
104
+ outerBy: u32
105
+ }
106
+
107
+ @group(0) @binding(0) var<uniform> u: CompactUniforms;
108
+ @group(0) @binding(1) var<storage, read> dilatedDense: array<u32>;
109
+ @group(0) @binding(2) var<storage, read_write> typesOut: array<atomic<u32>>;
110
+ @group(0) @binding(3) var<storage, read_write> masksOut: array<u32>;
111
+
112
+ @compute @workgroup_size(8, 4, 8)
113
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
114
+ if (gid.x >= u.innerBx || gid.y >= u.innerBy || gid.z >= u.innerBz) { return; }
115
+
116
+ let innerBlockIdx = gid.x + gid.y * u.innerBx + gid.z * u.innerBx * u.innerBy;
117
+ let outerBx = gid.x + u.haloBx;
118
+ let outerBy = gid.y + u.haloBy;
119
+ let outerBz = gid.z + u.haloBz;
120
+ let dx0 = outerBx * 4u;
121
+ let wordOffsetX = dx0 / 32u;
122
+ let bitShiftX = dx0 & 31u;
123
+ let outerNy = u.outerBy * 4u;
124
+ let planeWords = u.numXWords * outerNy;
125
+
126
+ var lo = 0u;
127
+ var hi = 0u;
128
+ for (var lz = 0u; lz < 4u; lz = lz + 1u) {
129
+ let dz = outerBz * 4u + lz;
130
+ let zBitBase = (lz & 1u) * 16u;
131
+ let inHi = lz >= 2u;
132
+ let planeBase = dz * planeWords;
133
+ for (var ly = 0u; ly < 4u; ly = ly + 1u) {
134
+ let dy = outerBy * 4u + ly;
135
+ let bitBase = zBitBase + ly * 4u;
136
+ let wordIdx = wordOffsetX + dy * u.numXWords + planeBase;
137
+ let pattern = (dilatedDense[wordIdx] >> bitShiftX) & 0xFu;
138
+ let bits = pattern << bitBase;
139
+ if (inHi) { hi = hi | bits; } else { lo = lo | bits; }
140
+ }
141
+ }
142
+
143
+ masksOut[innerBlockIdx * 2u] = lo;
144
+ masksOut[innerBlockIdx * 2u + 1u] = hi;
145
+
146
+ var bt = 0u;
147
+ if (lo != 0u || hi != 0u) {
148
+ if (lo == 0xFFFFFFFFu && hi == 0xFFFFFFFFu) { bt = 1u; } else { bt = 2u; }
149
+ }
150
+ let typeWordIdx = innerBlockIdx >> 4u;
151
+ let typeBitShift = (innerBlockIdx & 15u) * 2u;
152
+ atomicOr(&typesOut[typeWordIdx], bt << typeBitShift);
153
+ }
154
+ `;
155
+ }
156
+ function dilateXWgsl() {
157
+ return /* wgsl */ `
158
+ struct DilateXUniforms {
159
+ numXWords: u32,
160
+ ny: u32,
161
+ nz: u32,
162
+ halfExtent: u32
163
+ }
164
+
165
+ @group(0) @binding(0) var<uniform> u: DilateXUniforms;
166
+ @group(0) @binding(1) var<storage, read> src: array<u32>;
167
+ @group(0) @binding(2) var<storage, read_write> dst: array<u32>;
168
+
169
+ fn readWord(rowOffset: u32, word: i32) -> u32 {
170
+ if (word < 0 || word >= i32(u.numXWords)) { return 0u; }
171
+ return src[rowOffset + u32(word)];
172
+ }
173
+
174
+ @compute @workgroup_size(8, 4, 8)
175
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
176
+ if (gid.x >= u.numXWords || gid.y >= u.ny || gid.z >= u.nz) { return; }
177
+
178
+ let xWord = gid.x;
179
+ let y = gid.y;
180
+ let z = gid.z;
181
+ let rowStride = u.numXWords;
182
+ let planeStride = rowStride * u.ny;
183
+ let rowOffset = y * rowStride + z * planeStride;
184
+ var output = src[rowOffset + xWord];
185
+ let rowBits = u.numXWords * 32u;
186
+ let r = min(u.halfExtent, rowBits);
187
+ for (var d = 1u; d <= r; d = d + 1u) {
188
+ let wordOffset = i32(d >> 5u);
189
+ let bitShift = d & 31u;
190
+ let baseWord = i32(xWord);
191
+ var shiftedPos = readWord(rowOffset, baseWord + wordOffset);
192
+ if (bitShift != 0u) {
193
+ shiftedPos = (shiftedPos >> bitShift) | (readWord(rowOffset, baseWord + wordOffset + 1) << (32u - bitShift));
194
+ }
195
+ var shiftedNeg = readWord(rowOffset, baseWord - wordOffset);
196
+ if (bitShift != 0u) {
197
+ shiftedNeg = (shiftedNeg << bitShift) | (readWord(rowOffset, baseWord - wordOffset - 1) >> (32u - bitShift));
198
+ }
199
+ output = output | shiftedPos | shiftedNeg;
200
+ if (output == 0xFFFFFFFFu) { break; }
201
+ }
202
+ dst[rowOffset + xWord] = output;
203
+ }
204
+ `;
205
+ }
206
+ function dilateYZWgsl() {
207
+ return /* wgsl */ `
208
+ struct DilateYZUniforms {
209
+ numXWords: u32,
210
+ ny: u32,
211
+ nz: u32,
212
+ halfExtent: u32,
213
+ stride: u32,
214
+ axisLen: u32
215
+ }
216
+
217
+ @group(0) @binding(0) var<uniform> u: DilateYZUniforms;
218
+ @group(0) @binding(1) var<storage, read> src: array<u32>;
219
+ @group(0) @binding(2) var<storage, read_write> dst: array<u32>;
220
+
221
+ @compute @workgroup_size(8, 4, 8)
222
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
223
+ if (gid.x >= u.numXWords || gid.y >= u.ny || gid.z >= u.nz) { return; }
224
+
225
+ let xWord = gid.x;
226
+ let y = gid.y;
227
+ let z = gid.z;
228
+ let rowStride = u.numXWords;
229
+ let planeStride = rowStride * u.ny;
230
+ let outIdx = i32(xWord) + i32(y) * i32(rowStride) + i32(z) * i32(planeStride);
231
+ let pos = select(z, y, u.stride == rowStride);
232
+ let r = i32(u.halfExtent);
233
+ let lo = max(0, i32(pos) - r);
234
+ let hi = min(i32(u.axisLen) - 1, i32(pos) + r);
235
+ let baseIdx = outIdx - i32(pos) * i32(u.stride);
236
+ var output = 0u;
237
+ for (var p = lo; p <= hi; p = p + 1) {
238
+ output = output | src[baseIdx + p * i32(u.stride)];
239
+ if (output == 0xFFFFFFFFu) { break; }
240
+ }
241
+ dst[outIdx] = output;
242
+ }
243
+ `;
244
+ }
245
+ function makeBuffer(device, size, usage) {
246
+ return device.createBuffer({ size: Math.max(4, size), usage });
247
+ }
248
+ function writeUniform(device, values) {
249
+ const buffer = makeBuffer(device, 256, GPU_BUFFER_USAGE_UNIFORM | GPU_BUFFER_USAGE_COPY_DST);
250
+ device.queue.writeBuffer(buffer, 0, values.buffer, values.byteOffset, values.byteLength);
251
+ return buffer;
252
+ }
253
+ function createStoragePipeline(device, code) {
254
+ return device.createComputePipeline({
255
+ layout: 'auto',
256
+ compute: { module: device.createShaderModule({ code }), entryPoint: 'main' },
257
+ });
258
+ }
259
+ function blockAlignedExtent(halfExtent) {
260
+ return halfExtent === 0 ? 0 : Math.ceil(halfExtent / 4) * 4;
261
+ }
262
+ function chunkIsEmpty(src, ox, oy, oz, cx, cy, cz) {
263
+ const minBx = Math.max(0, Math.floor(ox / 4));
264
+ const minBy = Math.max(0, Math.floor(oy / 4));
265
+ const minBz = Math.max(0, Math.floor(oz / 4));
266
+ const maxBx = Math.min(src.nbx, Math.ceil((ox + cx) / 4));
267
+ const maxBy = Math.min(src.nby, Math.ceil((oy + cy) / 4));
268
+ const maxBz = Math.min(src.nbz, Math.ceil((oz + cz) / 4));
269
+ if (maxBx <= minBx || maxBy <= minBy || maxBz <= minBz) {
270
+ return true;
271
+ }
272
+ for (let bz = minBz; bz < maxBz; bz++) {
273
+ for (let by = minBy; by < maxBy; by++) {
274
+ for (let bx = minBx; bx < maxBx; bx++) {
275
+ const blockIdx = bx + by * src.nbx + bz * src.bStride;
276
+ if (readBlockType(src.types, blockIdx) !== BLOCK_EMPTY) {
277
+ return false;
278
+ }
279
+ }
280
+ }
281
+ }
282
+ return true;
283
+ }
284
+ function chunkIsSaturated(src, ox, oy, oz, cx, cy, cz) {
285
+ if (ox < 0 || oy < 0 || oz < 0) {
286
+ return false;
287
+ }
288
+ if (ox + cx > src.nx || oy + cy > src.ny || oz + cz > src.nz) {
289
+ return false;
290
+ }
291
+ const minBx = ox >> 2;
292
+ const minBy = oy >> 2;
293
+ const minBz = oz >> 2;
294
+ const maxBx = (ox + cx + 3) >> 2;
295
+ const maxBy = (oy + cy + 3) >> 2;
296
+ const maxBz = (oz + cz + 3) >> 2;
297
+ for (let bz = minBz; bz < maxBz; bz++) {
298
+ for (let by = minBy; by < maxBy; by++) {
299
+ for (let bx = minBx; bx < maxBx; bx++) {
300
+ const blockIdx = bx + by * src.nbx + bz * src.bStride;
301
+ if (readBlockType(src.types, blockIdx) !== BLOCK_SOLID) {
302
+ return false;
303
+ }
304
+ }
305
+ }
306
+ }
307
+ return true;
308
+ }
309
+ function insertSaturatedInner(dst, innerOx, innerOy, innerOz, innerCx, innerCy, innerCz) {
310
+ const minBx = Math.max(0, innerOx >> 2);
311
+ const minBy = Math.max(0, innerOy >> 2);
312
+ const minBz = Math.max(0, innerOz >> 2);
313
+ const maxBx = Math.min(dst.nbx, (innerOx + innerCx + 3) >> 2);
314
+ const maxBy = Math.min(dst.nby, (innerOy + innerCy + 3) >> 2);
315
+ const maxBz = Math.min(dst.nbz, (innerOz + innerCz + 3) >> 2);
316
+ for (let bz = minBz; bz < maxBz; bz++) {
317
+ for (let by = minBy; by < maxBy; by++) {
318
+ const rowBase = by * dst.nbx + bz * dst.bStride;
319
+ let blockIdx = rowBase + minBx;
320
+ const endIdx = rowBase + maxBx;
321
+ while (blockIdx < endIdx) {
322
+ const w = blockIdx >>> 4;
323
+ const shift = (blockIdx & 15) << 1;
324
+ const remainingInWord = 16 - (blockIdx & 15);
325
+ const remainingInRow = endIdx - blockIdx;
326
+ const blocksToWrite = Math.min(remainingInWord, remainingInRow);
327
+ if (blocksToWrite === 16) {
328
+ dst.types[w] = SOLID_WORD;
329
+ }
330
+ else {
331
+ const bits = blocksToWrite << 1;
332
+ const mask = (((1 << bits) - 1) >>> 0) << shift;
333
+ dst.types[w] = ((dst.types[w] & ~mask) | (SOLID_WORD & mask)) >>> 0;
334
+ }
335
+ blockIdx += blocksToWrite;
336
+ }
337
+ }
338
+ }
339
+ }
340
+ function applyChunkToDst(dst, typesOut, masksOut, cx, cy, cz, innerNx, innerNy, innerNz) {
341
+ const innerBx = innerNx >> 2;
342
+ const innerBy = innerNy >> 2;
343
+ const innerBz = innerNz >> 2;
344
+ const baseBx = cx >> 2;
345
+ const baseBy = cy >> 2;
346
+ const baseBz = cz >> 2;
347
+ let innerIdx = 0;
348
+ for (let bz = 0; bz < innerBz; bz++) {
349
+ const globalBz = baseBz + bz;
350
+ for (let by = 0; by < innerBy; by++) {
351
+ const globalBy = baseBy + by;
352
+ const baseGlobalIdx = baseBx + globalBy * dst.nbx + globalBz * dst.bStride;
353
+ for (let bx = 0; bx < innerBx; bx++, innerIdx++) {
354
+ const wordIdx = innerIdx >>> 4;
355
+ const bitShift = (innerIdx & 15) << 1;
356
+ const bt = (typesOut[wordIdx] >>> bitShift) & 3;
357
+ if (bt === BLOCK_EMPTY) {
358
+ continue;
359
+ }
360
+ const globalBlockIdx = baseGlobalIdx + bx;
361
+ const w = globalBlockIdx >>> 4;
362
+ const shift = (globalBlockIdx & 15) << 1;
363
+ dst.types[w] |= bt << shift;
364
+ if (bt === BLOCK_MIXED) {
365
+ const m2 = innerIdx * 2;
366
+ dst.masks.set(globalBlockIdx, masksOut[m2], masksOut[m2 + 1]);
367
+ }
368
+ }
369
+ }
370
+ }
371
+ }
372
+ class GpuDilation {
373
+ static { this.NUM_SLOTS = 2; }
374
+ constructor(device) {
375
+ this.slots = [];
376
+ this.srcMeta = { nbx: 0, nby: 0, nbz: 0, bStride: 0, capMinusOne: 0 };
377
+ this.device = device;
378
+ this.extractPipeline = createStoragePipeline(device, extractWgsl());
379
+ this.compactPipeline = createStoragePipeline(device, compactWgsl());
380
+ this.dilateXPipeline = createStoragePipeline(device, dilateXWgsl());
381
+ this.dilateYZPipeline = createStoragePipeline(device, dilateYZWgsl());
382
+ for (let i = 0; i < GpuDilation.NUM_SLOTS; i++) {
383
+ const capacity = 1024 * 1024 * 4;
384
+ const typesOutCapacity = 64 * 1024;
385
+ const masksOutCapacity = 1024 * 1024;
386
+ this.slots.push({
387
+ bufferA: makeBuffer(device, capacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
388
+ bufferB: makeBuffer(device, capacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
389
+ readTypesBuffer: makeBuffer(device, typesOutCapacity, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ),
390
+ readMasksBuffer: makeBuffer(device, masksOutCapacity, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ),
391
+ typesOutBuffer: makeBuffer(device, typesOutCapacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
392
+ masksOutBuffer: makeBuffer(device, masksOutCapacity, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC),
393
+ capacity,
394
+ typesOutCapacity,
395
+ masksOutCapacity,
396
+ });
397
+ }
398
+ }
399
+ replaceBuffer(slot, key, size, usage) {
400
+ slot[key].destroy();
401
+ slot[key] = makeBuffer(this.device, size, usage);
402
+ }
403
+ ensureSlotBuffers(slot, numWords) {
404
+ const neededBytes = numWords * 4;
405
+ if (neededBytes <= slot.capacity) {
406
+ return;
407
+ }
408
+ let cap = slot.capacity;
409
+ while (cap < neededBytes) {
410
+ cap *= 2;
411
+ }
412
+ this.replaceBuffer(slot, 'bufferA', cap, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
413
+ this.replaceBuffer(slot, 'bufferB', cap, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
414
+ slot.capacity = cap;
415
+ }
416
+ ensureSlotOutputBuffers(slot, innerBlocks) {
417
+ const typesBytes = ((innerBlocks + 15) >>> 4) * 4;
418
+ if (slot.typesOutCapacity < typesBytes) {
419
+ this.replaceBuffer(slot, 'typesOutBuffer', typesBytes, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
420
+ this.replaceBuffer(slot, 'readTypesBuffer', typesBytes, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ);
421
+ slot.typesOutCapacity = typesBytes;
422
+ }
423
+ const masksBytes = innerBlocks * 8;
424
+ if (slot.masksOutCapacity < masksBytes) {
425
+ this.replaceBuffer(slot, 'masksOutBuffer', masksBytes, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_COPY_SRC);
426
+ this.replaceBuffer(slot, 'readMasksBuffer', masksBytes, GPU_BUFFER_USAGE_COPY_DST | GPU_BUFFER_USAGE_MAP_READ);
427
+ slot.masksOutCapacity = masksBytes;
428
+ }
429
+ }
430
+ uploadSrc(src) {
431
+ this.releaseSrc();
432
+ this.srcTypesBuffer = makeBuffer(this.device, src.types.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
433
+ this.device.queue.writeBuffer(this.srcTypesBuffer, 0, src.types.buffer, src.types.byteOffset, src.types.byteLength);
434
+ const keysU32 = new Uint32Array(src.masks.keys.buffer, src.masks.keys.byteOffset, src.masks.keys.length);
435
+ this.srcKeysBuffer = makeBuffer(this.device, keysU32.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
436
+ this.srcLoBuffer = makeBuffer(this.device, src.masks.lo.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
437
+ this.srcHiBuffer = makeBuffer(this.device, src.masks.hi.byteLength, GPU_BUFFER_USAGE_STORAGE | GPU_BUFFER_USAGE_COPY_DST);
438
+ this.device.queue.writeBuffer(this.srcKeysBuffer, 0, keysU32.buffer, keysU32.byteOffset, keysU32.byteLength);
439
+ this.device.queue.writeBuffer(this.srcLoBuffer, 0, src.masks.lo.buffer, src.masks.lo.byteOffset, src.masks.lo.byteLength);
440
+ this.device.queue.writeBuffer(this.srcHiBuffer, 0, src.masks.hi.buffer, src.masks.hi.byteOffset, src.masks.hi.byteLength);
441
+ this.srcMeta = {
442
+ nbx: src.nbx,
443
+ nby: src.nby,
444
+ nbz: src.nbz,
445
+ bStride: src.bStride,
446
+ capMinusOne: src.masks.keys.length - 1,
447
+ };
448
+ }
449
+ releaseSrc() {
450
+ this.srcTypesBuffer?.destroy();
451
+ this.srcKeysBuffer?.destroy();
452
+ this.srcLoBuffer?.destroy();
453
+ this.srcHiBuffer?.destroy();
454
+ this.srcTypesBuffer = undefined;
455
+ this.srcKeysBuffer = undefined;
456
+ this.srcLoBuffer = undefined;
457
+ this.srcHiBuffer = undefined;
458
+ }
459
+ submitChunkSparse(slotIdx, minBx, minBy, minBz, outerBx, outerBy, outerBz, haloBx, haloBy, haloBz, innerBx, innerBy, innerBz, halfExtentXZ, halfExtentY) {
460
+ if (!this.srcTypesBuffer || !this.srcKeysBuffer || !this.srcLoBuffer || !this.srcHiBuffer) {
461
+ throw new Error('GpuDilation: must call uploadSrc() before submitChunkSparse()');
462
+ }
463
+ const slot = this.slots[slotIdx];
464
+ const outerNx = outerBx * 4;
465
+ const outerNy = outerBy * 4;
466
+ const outerNz = outerBz * 4;
467
+ const numXWords = (outerNx + 31) >>> 5;
468
+ const numWords = numXWords * outerNy * outerNz;
469
+ const innerBlocks = innerBx * innerBy * innerBz;
470
+ const typesOutWords = (innerBlocks + 15) >>> 4;
471
+ this.ensureSlotBuffers(slot, numWords);
472
+ this.ensureSlotOutputBuffers(slot, innerBlocks);
473
+ const uniformBuffers = [];
474
+ const device = this.device;
475
+ function makeUniform(values) {
476
+ const buffer = writeUniform(device, values);
477
+ uniformBuffers.push(buffer);
478
+ return buffer;
479
+ }
480
+ {
481
+ const encoder = this.device.createCommandEncoder();
482
+ encoder.clearBuffer(slot.bufferA, 0, numWords * 4);
483
+ const uniforms = new Uint32Array([
484
+ minBx >>> 0,
485
+ minBy >>> 0,
486
+ minBz >>> 0,
487
+ outerBx,
488
+ outerBy,
489
+ outerBz,
490
+ numXWords,
491
+ this.srcMeta.nbx,
492
+ this.srcMeta.nby,
493
+ this.srcMeta.nbz,
494
+ this.srcMeta.bStride,
495
+ this.srcMeta.capMinusOne,
496
+ ]);
497
+ const uniformBuffer = makeUniform(uniforms);
498
+ const bindGroup = this.device.createBindGroup({
499
+ layout: this.extractPipeline.getBindGroupLayout(0),
500
+ entries: [
501
+ { binding: 0, resource: { buffer: uniformBuffer } },
502
+ { binding: 1, resource: { buffer: this.srcTypesBuffer } },
503
+ { binding: 2, resource: { buffer: this.srcKeysBuffer } },
504
+ { binding: 3, resource: { buffer: this.srcLoBuffer } },
505
+ { binding: 4, resource: { buffer: this.srcHiBuffer } },
506
+ { binding: 5, resource: { buffer: slot.bufferA } },
507
+ ],
508
+ });
509
+ const pass = encoder.beginComputePass();
510
+ pass.setPipeline(this.extractPipeline);
511
+ pass.setBindGroup(0, bindGroup);
512
+ pass.dispatchWorkgroups(Math.ceil(outerBx / 8), Math.ceil(outerBy / 4), Math.ceil(outerBz / 8));
513
+ pass.end();
514
+ this.device.queue.submit([encoder.finish()]);
515
+ }
516
+ {
517
+ const encoder = this.device.createCommandEncoder();
518
+ function dispatch(pipeline, src, dst, uniforms, wgX, wgY, wgZ) {
519
+ const uniformBuffer = makeUniform(uniforms);
520
+ const bindGroup = device.createBindGroup({
521
+ layout: pipeline.getBindGroupLayout(0),
522
+ entries: [
523
+ { binding: 0, resource: { buffer: uniformBuffer } },
524
+ { binding: 1, resource: { buffer: src } },
525
+ { binding: 2, resource: { buffer: dst } },
526
+ ],
527
+ });
528
+ const pass = encoder.beginComputePass();
529
+ pass.setPipeline(pipeline);
530
+ pass.setBindGroup(0, bindGroup);
531
+ pass.dispatchWorkgroups(wgX, wgY, wgZ);
532
+ pass.end();
533
+ }
534
+ dispatch(this.dilateXPipeline, slot.bufferA, slot.bufferB, new Uint32Array([numXWords, outerNy, outerNz, halfExtentXZ]), Math.ceil(numXWords / 8), Math.ceil(outerNy / 4), Math.ceil(outerNz / 8));
535
+ dispatch(this.dilateYZPipeline, slot.bufferB, slot.bufferA, new Uint32Array([numXWords, outerNy, outerNz, halfExtentXZ, numXWords * outerNy, outerNz]), Math.ceil(numXWords / 8), Math.ceil(outerNy / 4), Math.ceil(outerNz / 8));
536
+ dispatch(this.dilateYZPipeline, slot.bufferA, slot.bufferB, new Uint32Array([numXWords, outerNy, outerNz, halfExtentY, numXWords, outerNy]), Math.ceil(numXWords / 8), Math.ceil(outerNy / 4), Math.ceil(outerNz / 8));
537
+ encoder.clearBuffer(slot.typesOutBuffer, 0, typesOutWords * 4);
538
+ const compactUniformBuffer = makeUniform(new Uint32Array([haloBx, haloBy, haloBz, numXWords, innerBx, innerBy, innerBz, outerBy]));
539
+ const compactBindGroup = this.device.createBindGroup({
540
+ layout: this.compactPipeline.getBindGroupLayout(0),
541
+ entries: [
542
+ { binding: 0, resource: { buffer: compactUniformBuffer } },
543
+ { binding: 1, resource: { buffer: slot.bufferB } },
544
+ { binding: 2, resource: { buffer: slot.typesOutBuffer } },
545
+ { binding: 3, resource: { buffer: slot.masksOutBuffer } },
546
+ ],
547
+ });
548
+ const pass = encoder.beginComputePass();
549
+ pass.setPipeline(this.compactPipeline);
550
+ pass.setBindGroup(0, compactBindGroup);
551
+ pass.dispatchWorkgroups(Math.ceil(innerBx / 8), Math.ceil(innerBy / 4), Math.ceil(innerBz / 8));
552
+ pass.end();
553
+ encoder.copyBufferToBuffer(slot.typesOutBuffer, 0, slot.readTypesBuffer, 0, typesOutWords * 4);
554
+ encoder.copyBufferToBuffer(slot.masksOutBuffer, 0, slot.readMasksBuffer, 0, innerBlocks * 8);
555
+ this.device.queue.submit([encoder.finish()]);
556
+ }
557
+ const typesPromise = (async () => {
558
+ await slot.readTypesBuffer.mapAsync(GPU_MAP_MODE_READ, 0, typesOutWords * 4);
559
+ const mapped = new Uint32Array(slot.readTypesBuffer.getMappedRange(0, typesOutWords * 4));
560
+ const out = new Uint32Array(typesOutWords);
561
+ out.set(mapped);
562
+ slot.readTypesBuffer.unmap();
563
+ return out;
564
+ })();
565
+ const masksPromise = (async () => {
566
+ await slot.readMasksBuffer.mapAsync(GPU_MAP_MODE_READ, 0, innerBlocks * 8);
567
+ const mapped = new Uint32Array(slot.readMasksBuffer.getMappedRange(0, innerBlocks * 8));
568
+ const out = new Uint32Array(innerBlocks * 2);
569
+ out.set(mapped);
570
+ slot.readMasksBuffer.unmap();
571
+ return out;
572
+ })();
573
+ void Promise.all([typesPromise, masksPromise]).then(() => {
574
+ for (const buffer of uniformBuffers) {
575
+ buffer.destroy();
576
+ }
577
+ });
578
+ return { types: typesPromise, masks: masksPromise };
579
+ }
580
+ destroy() {
581
+ this.releaseSrc();
582
+ for (const slot of this.slots) {
583
+ slot.bufferA.destroy();
584
+ slot.bufferB.destroy();
585
+ slot.readTypesBuffer.destroy();
586
+ slot.readMasksBuffer.destroy();
587
+ slot.typesOutBuffer.destroy();
588
+ slot.masksOutBuffer.destroy();
589
+ }
590
+ }
591
+ }
592
+ export async function gpuDilate3(src, halfExtentXZ, halfExtentY) {
593
+ if (halfExtentXZ === 0 && halfExtentY === 0) {
594
+ return src.clone();
595
+ }
596
+ if (!Number.isInteger(halfExtentXZ) || halfExtentXZ < 0) {
597
+ throw new Error(`gpuDilate3: halfExtentXZ=${halfExtentXZ} must be a non-negative integer`);
598
+ }
599
+ if (!Number.isInteger(halfExtentY) || halfExtentY < 0) {
600
+ throw new Error(`gpuDilate3: halfExtentY=${halfExtentY} must be a non-negative integer`);
601
+ }
602
+ const device = await getOrCreateDevice();
603
+ const gpu = new GpuDilation(device);
604
+ const dst = new SparseVoxelGrid(src.nx, src.ny, src.nz);
605
+ const haloX = blockAlignedExtent(halfExtentXZ);
606
+ const haloY = blockAlignedExtent(halfExtentY);
607
+ const haloZ = haloX;
608
+ const haloBx = haloX / 4;
609
+ const haloBy = haloY / 4;
610
+ const haloBz = haloZ / 4;
611
+ const innerStep = CHUNK_INNER & ~3;
612
+ let currentSlot = 0;
613
+ let inflight;
614
+ async function drainInflight() {
615
+ if (!inflight) {
616
+ return;
617
+ }
618
+ const f = inflight;
619
+ inflight = undefined;
620
+ const [typesOut, masksOut] = await Promise.all([f.typesPromise, f.masksPromise]);
621
+ applyChunkToDst(dst, typesOut, masksOut, f.cx, f.cy, f.cz, f.innerNx, f.innerNy, f.innerNz);
622
+ }
623
+ gpu.uploadSrc(src);
624
+ try {
625
+ for (let cz = 0; cz < src.nz; cz += innerStep) {
626
+ for (let cy = 0; cy < src.ny; cy += innerStep) {
627
+ for (let cx = 0; cx < src.nx; cx += innerStep) {
628
+ const innerNx = Math.min(innerStep, src.nx - cx);
629
+ const innerNy = Math.min(innerStep, src.ny - cy);
630
+ const innerNz = Math.min(innerStep, src.nz - cz);
631
+ const ox = cx - haloX;
632
+ const oy = cy - haloY;
633
+ const oz = cz - haloZ;
634
+ const outerNx = innerNx + 2 * haloX;
635
+ const outerNy = innerNy + 2 * haloY;
636
+ const outerNz = innerNz + 2 * haloZ;
637
+ if (chunkIsEmpty(src, ox, oy, oz, outerNx, outerNy, outerNz)) {
638
+ continue;
639
+ }
640
+ if (chunkIsSaturated(src, ox, oy, oz, outerNx, outerNy, outerNz)) {
641
+ insertSaturatedInner(dst, cx, cy, cz, innerNx, innerNy, innerNz);
642
+ continue;
643
+ }
644
+ const innerBx = innerNx >> 2;
645
+ const innerBy = innerNy >> 2;
646
+ const innerBz = innerNz >> 2;
647
+ const outerBx = outerNx >> 2;
648
+ const outerBy = outerNy >> 2;
649
+ const outerBz = outerNz >> 2;
650
+ const minBx = Math.floor(ox / 4);
651
+ const minBy = Math.floor(oy / 4);
652
+ const minBz = Math.floor(oz / 4);
653
+ const { types, masks } = gpu.submitChunkSparse(currentSlot, minBx, minBy, minBz, outerBx, outerBy, outerBz, haloBx, haloBy, haloBz, innerBx, innerBy, innerBz, halfExtentXZ, halfExtentY);
654
+ if (inflight) {
655
+ await drainInflight();
656
+ }
657
+ inflight = {
658
+ typesPromise: types,
659
+ masksPromise: masks,
660
+ cx,
661
+ cy,
662
+ cz,
663
+ innerNx,
664
+ innerNy,
665
+ innerNz,
666
+ };
667
+ currentSlot = (currentSlot + 1) % GpuDilation.NUM_SLOTS;
668
+ }
669
+ }
670
+ }
671
+ await drainInflight();
672
+ }
673
+ finally {
674
+ gpu.destroy();
675
+ }
676
+ return dst;
677
+ }