querysub 0.356.0 → 0.358.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.cursorrules +9 -0
  2. package/bin/movelogs.js +4 -0
  3. package/package.json +13 -6
  4. package/scripts/postinstall.js +23 -0
  5. package/src/-a-archives/archiveCache.ts +10 -12
  6. package/src/-a-archives/archives.ts +29 -0
  7. package/src/-a-archives/archivesBackBlaze.ts +60 -12
  8. package/src/-a-archives/archivesDisk.ts +39 -13
  9. package/src/-a-archives/archivesLimitedCache.ts +21 -0
  10. package/src/-a-archives/archivesMemoryCache.ts +374 -0
  11. package/src/-a-archives/archivesPrivateFileSystem.ts +22 -0
  12. package/src/-g-core-values/NodeCapabilities.ts +3 -0
  13. package/src/0-path-value-core/auditLogs.ts +5 -1
  14. package/src/0-path-value-core/pathValueCore.ts +7 -7
  15. package/src/4-dom/qreact.tsx +1 -0
  16. package/src/4-querysub/Querysub.ts +1 -5
  17. package/src/config.ts +5 -0
  18. package/src/deployManager/components/MachineDetailPage.tsx +43 -2
  19. package/src/deployManager/components/MachinesListPage.tsx +10 -2
  20. package/src/deployManager/machineApplyMainCode.ts +3 -3
  21. package/src/deployManager/machineSchema.ts +39 -0
  22. package/src/diagnostics/MachineThreadInfo.tsx +235 -0
  23. package/src/diagnostics/NodeViewer.tsx +5 -3
  24. package/src/diagnostics/logs/FastArchiveAppendable.ts +79 -42
  25. package/src/diagnostics/logs/FastArchiveController.ts +102 -63
  26. package/src/diagnostics/logs/FastArchiveViewer.tsx +36 -8
  27. package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +462 -0
  28. package/src/diagnostics/logs/IndexedLogs/BufferIndexCPP.cpp +327 -0
  29. package/src/diagnostics/logs/IndexedLogs/BufferIndexCPP.d.ts +18 -0
  30. package/src/diagnostics/logs/IndexedLogs/BufferIndexCPP.js +1 -0
  31. package/src/diagnostics/logs/IndexedLogs/BufferIndexHelpers.ts +222 -0
  32. package/src/diagnostics/logs/IndexedLogs/BufferIndexLogsOptimizationConstants.ts +22 -0
  33. package/src/diagnostics/logs/IndexedLogs/BufferIndexWAT.wat +1145 -0
  34. package/src/diagnostics/logs/IndexedLogs/BufferIndexWAT.wat.d.ts +178 -0
  35. package/src/diagnostics/logs/IndexedLogs/BufferListStreamer.ts +208 -0
  36. package/src/diagnostics/logs/IndexedLogs/BufferUnitIndex.ts +716 -0
  37. package/src/diagnostics/logs/IndexedLogs/BufferUnitSet.ts +146 -0
  38. package/src/diagnostics/logs/IndexedLogs/FilePathSelector.tsx +569 -0
  39. package/src/diagnostics/logs/IndexedLogs/FindProgressTracker.ts +45 -0
  40. package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +685 -0
  41. package/src/diagnostics/logs/IndexedLogs/LogStreamer.ts +47 -0
  42. package/src/diagnostics/logs/IndexedLogs/LogViewer3.tsx +901 -0
  43. package/src/diagnostics/logs/IndexedLogs/TimeFileTree.ts +236 -0
  44. package/src/diagnostics/logs/IndexedLogs/binding.gyp +23 -0
  45. package/src/diagnostics/logs/IndexedLogs/moveIndexLogsToPublic.ts +251 -0
  46. package/src/diagnostics/logs/IndexedLogs/moveLogsEntry.ts +10 -0
  47. package/src/diagnostics/logs/LogViewer2.tsx +120 -55
  48. package/src/diagnostics/logs/TimeRangeSelector.tsx +5 -2
  49. package/src/diagnostics/logs/diskLogger.ts +32 -48
  50. package/src/diagnostics/logs/errorNotifications/ErrorNotificationController.ts +3 -2
  51. package/src/diagnostics/logs/errorNotifications/errorDigests.tsx +1 -0
  52. package/src/diagnostics/logs/errorNotifications2/errorNotifications2.ts +0 -0
  53. package/src/diagnostics/logs/lifeCycleAnalysis/LifeCyclePages.tsx +150 -0
  54. package/src/diagnostics/logs/lifeCycleAnalysis/lifeCycles.tsx +150 -15
  55. package/src/diagnostics/logs/lifeCycleAnalysis/test.ts +0 -0
  56. package/src/diagnostics/logs/lifeCycleAnalysis/test.wat +106 -0
  57. package/src/diagnostics/logs/lifeCycleAnalysis/test.wat.d.ts +2 -0
  58. package/src/diagnostics/logs/lifeCycleAnalysis/testHoist.ts +5 -0
  59. package/src/diagnostics/logs/logViewerExtractField.ts +2 -3
  60. package/src/diagnostics/managementPages.tsx +10 -0
  61. package/src/diagnostics/trackResources.ts +1 -1
  62. package/src/functional/limitProcessing.ts +39 -0
  63. package/src/misc/lz4_wasm_nodejs.d.ts +34 -0
  64. package/src/misc/lz4_wasm_nodejs.js +178 -0
  65. package/src/misc/lz4_wasm_nodejs_bg.js +94 -0
  66. package/src/misc/lz4_wasm_nodejs_bg.wasm +0 -0
  67. package/src/misc/lz4_wasm_nodejs_bg.wasm.d.ts +15 -0
  68. package/src/storage/CompressedStream.ts +13 -0
  69. package/src/storage/LZ4.ts +32 -0
  70. package/src/storage/ZSTD.ts +10 -0
  71. package/src/wat/watCompiler.ts +1716 -0
  72. package/src/wat/watGrammar.pegjs +93 -0
  73. package/src/wat/watHandler.ts +179 -0
  74. package/src/wat/watInstructions.txt +707 -0
  75. package/src/zip.ts +3 -89
  76. package/src/diagnostics/logs/lifeCycleAnalysis/spec.md +0 -125
@@ -0,0 +1,1145 @@
1
+ (module
2
+ ;; Import memory from JavaScript (with memory64 support for >4GB)
3
+ (import "env" "memory" (memory i64 1))
4
+ (global (export "__heap_base") i32 (i32.const 0))
5
+
6
+ ;; Helper: Hash unit to index
7
+ ;; Uses Fibonacci hashing for optimal distribution
8
+ ;; hashTableCapacity MUST be power of 2
9
+ (func $hashUnitToIndex (param $unit i32) (param $hashTableCapacity i32) (result i32)
10
+ (local $hash i32)
11
+ (local $shiftAmount i32)
12
+ ;; Multiply by 2654435761 (Knuth's golden ratio constant)
13
+ (local.set $hash (i32.mul (local.get $unit) (i32.const 2654435761)))
14
+ ;; Calculate shift amount: 32 - log2(capacity) = 1 + clz(capacity)
15
+ (local.set $shiftAmount (i32.add (i32.clz (local.get $hashTableCapacity)) (i32.const 1)))
16
+ ;; Fibonacci hashing: shift by calculated amount (auto-masks to correct bits)
17
+ ;; Then multiply by 16 (each slot is 16 bytes: 4 u32s)
18
+ (i32.mul
19
+ (i32.shr_u (local.get $hash) (local.get $shiftAmount))
20
+ (i32.const 16)))
21
+
22
+ ;; Helper: Get next probe index (linear probing with wraparound)
23
+ (func $getNextIndex (param $index i32) (param $hashTableSize i32) (result i32)
24
+ (local $nextIndex i32)
25
+ (local.set $nextIndex (i32.add (local.get $index) (i32.const 16)))
26
+ (if (i32.ge_u (local.get $nextIndex) (local.get $hashTableSize))
27
+ (then (local.set $nextIndex (i32.sub (local.get $nextIndex) (local.get $hashTableSize)))))
28
+ (local.get $nextIndex))
29
+
30
+ ;; Populate Units From Block (Loop Unrolled): Extract units from a concatenated block of buffers
31
+ ;; Block format: [count, length1, length2, ..., lengthN, buffer1_bytes, buffer2_bytes, ..., bufferN_bytes]
32
+ ;; Uses loop unrolling to process 4 units at a time with optimized fast path for non-zero units
33
+ (func (export "populate_units_from_block_simd")
34
+ (param $blockDataPtr i64)
35
+ (param $blockDataSize i32)
36
+ (param $blockIndex i32)
37
+ (param $unitsOutputPtr i64)
38
+ (param $bufferIndicesOutputPtr i64)
39
+ (param $blocksOutputPtr i64)
40
+ (param $startOffset i32)
41
+ (result i32)
42
+
43
+ (local $bufferCount i32)
44
+ (local $bufferIndex i32)
45
+ (local $bufferLength i32)
46
+ (local $bufferDataStart i32)
47
+ (local $bufferOffset i32)
48
+ (local $totalUnitsWritten i32)
49
+ (local $i i32)
50
+ (local $mainLoopEnd i32)
51
+ (local $writePos i32)
52
+ (local $unit i32)
53
+ (local $readAddr i64)
54
+ (local $writeAddr i64)
55
+
56
+ ;; Temps for unrolled loop
57
+ (local $unit0 i32)
58
+ (local $unit1 i32)
59
+ (local $unit2 i32)
60
+ (local $unit3 i32)
61
+ (local $j i32)
62
+
63
+ (local.set $totalUnitsWritten (i32.const 0))
64
+
65
+ ;; Read buffer count (first 4 bytes)
66
+ (local.set $bufferCount (i32.load align=0 (local.get $blockDataPtr)))
67
+
68
+ ;; Calculate where buffer data starts (after count + all lengths)
69
+ (local.set $bufferDataStart (i32.add (i32.const 4) (i32.mul (local.get $bufferCount) (i32.const 4))))
70
+ (local.set $bufferOffset (local.get $bufferDataStart))
71
+
72
+ ;; Loop through all buffers
73
+ (local.set $bufferIndex (i32.const 0))
74
+ (block $break_buffers
75
+ (loop $continue_buffers
76
+ (br_if $break_buffers (i32.ge_u (local.get $bufferIndex) (local.get $bufferCount)))
77
+
78
+ ;; Read buffer length from header
79
+ (local.set $bufferLength
80
+ (i32.load align=0 (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (i32.const 4) (i32.mul (local.get $bufferIndex) (i32.const 4)))))))
81
+
82
+ ;; Process this buffer - we need at least 4 bytes to create a unit
83
+ (if (i32.ge_u (local.get $bufferLength) (i32.const 4))
84
+ (then
85
+ ;; Calculate main loop end
86
+ ;; We need at least 7 bytes to extract 4 overlapping units (positions i, i+1, i+2, i+3 need bytes [i, i+7))
87
+ ;; mainLoopEnd is the last position where we can extract 4 units
88
+ (if (i32.ge_u (local.get $bufferLength) (i32.const 7))
89
+ (then
90
+ (local.set $mainLoopEnd (i32.sub (local.get $bufferLength) (i32.const 6)))) ;; i <= bufferLength-7, so i+6 < bufferLength
91
+ (else
92
+ (local.set $mainLoopEnd (i32.const 0)))) ;; Can't do SIMD, skip to tail
93
+ (local.set $i (i32.const 0))
94
+
95
+ ;; Main unrolled loop - process 4 overlapping units per iteration
96
+ ;; Optimized for the common case where all units are non-zero
97
+ (block $break_simd
98
+ (loop $continue_simd
99
+ (br_if $break_simd (i32.ge_u (local.get $i) (local.get $mainLoopEnd)))
100
+
101
+ ;; Calculate read address - we need to read at least 7 bytes [i, i+7)
102
+ (local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
103
+
104
+ ;; Load 4 overlapping units (unrolled, not SIMD)
105
+ ;; Unit 0: bytes [i+0, i+1, i+2, i+3]
106
+ (local.set $unit0 (i32.load align=0 (local.get $readAddr)))
107
+ ;; Unit 1: bytes [i+1, i+2, i+3, i+4]
108
+ (local.set $unit1 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 1))))
109
+ ;; Unit 2: bytes [i+2, i+3, i+4, i+5]
110
+ (local.set $unit2 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 2))))
111
+ ;; Unit 3: bytes [i+3, i+4, i+5, i+6]
112
+ (local.set $unit3 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 3))))
113
+
114
+ ;; Check if all units are non-zero (common case)
115
+ (if (i32.and (i32.and (local.get $unit0) (local.get $unit1))
116
+ (i32.and (local.get $unit2) (local.get $unit3)))
117
+ (then
118
+ ;; Fast path: all units are non-zero (common case)
119
+ ;; Group similar operations for better instruction-level parallelism
120
+ (local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
121
+
122
+ ;; Calculate base write address
123
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
124
+
125
+ ;; Write all units together
126
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit0))
127
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $unit1))
128
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $unit2))
129
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $unit3))
130
+
131
+ ;; Write all buffer indices together
132
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
133
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $bufferIndex))
134
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $bufferIndex))
135
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $bufferIndex))
136
+
137
+ ;; Write all blocks together
138
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
139
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $blockIndex))
140
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $blockIndex))
141
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $blockIndex))
142
+
143
+ ;; Increment total units written by 4
144
+ (local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 4))))
145
+ (else
146
+ ;; Slow path: at least one unit is zero (rare), process with a loop
147
+ ;; Re-read the 4 units and process one by one
148
+ (local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
149
+ (local.set $j (i32.const 0))
150
+ (block $break_zeros
151
+ (loop $continue_zeros
152
+ (br_if $break_zeros (i32.ge_u (local.get $j) (i32.const 4)))
153
+
154
+ ;; Read unit at offset j
155
+ (local.set $unit (i32.load align=0 (i64.add (local.get $readAddr) (i64.extend_i32_u (local.get $j)))))
156
+
157
+ ;; Write if non-zero
158
+ (if (local.get $unit)
159
+ (then
160
+ (local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
161
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
162
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
163
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
164
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
165
+ (local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 1)))))
166
+
167
+ ;; Move to next position
168
+ (local.set $j (i32.add (local.get $j) (i32.const 1)))
169
+ (br $continue_zeros)))))
170
+
171
+ ;; Advance by 4 bytes (processed 4 overlapping units at positions i, i+1, i+2, i+3)
172
+ (local.set $i (i32.add (local.get $i) (i32.const 4)))
173
+ (br $continue_simd)))
174
+
175
+ ;; Handle remaining units (scalar tail loop)
176
+ ;; Process positions from mainLoopEnd to bufferLength-4
177
+ (block $break_tail
178
+ (loop $continue_tail
179
+ ;; Check if we can read 4 more bytes
180
+ (br_if $break_tail (i32.gt_u (local.get $i) (i32.sub (local.get $bufferLength) (i32.const 4))))
181
+
182
+ ;; Calculate read address
183
+ (local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
184
+
185
+ ;; Read unit as little-endian u32
186
+ (local.set $unit (i32.load align=0 (local.get $readAddr)))
187
+
188
+ ;; Skip if unit is 0
189
+ (if (i32.eqz (local.get $unit))
190
+ (then
191
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
192
+ (br $continue_tail)))
193
+
194
+ ;; Calculate write position and address
195
+ (local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
196
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
197
+
198
+ ;; Write to output arrays
199
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
200
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
201
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
202
+
203
+ ;; Increment counters - advance by 1 byte for overlapping units
204
+ (local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 1)))
205
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
206
+
207
+ (br $continue_tail)))))
208
+
209
+ ;; Move to next buffer
210
+ (local.set $bufferOffset (i32.add (local.get $bufferOffset) (local.get $bufferLength)))
211
+ (local.set $bufferIndex (i32.add (local.get $bufferIndex) (i32.const 1)))
212
+
213
+ (br $continue_buffers)))
214
+
215
+ (local.get $totalUnitsWritten))
216
+
217
+ ;; Populate Units From Block: Extract units from a concatenated block of buffers
218
+ ;; Block format: [count, length1, length2, ..., lengthN, buffer1_bytes, buffer2_bytes, ..., bufferN_bytes]
219
+ ;; - count: u32 (4 bytes) - number of buffers
220
+ ;; - lengths: count * u32 (4 bytes each) - length of each buffer
221
+ ;; - buffers: concatenated buffer data
222
+ ;; Parameters:
223
+ ;; - blockDataPtr: pointer to concatenated block data
224
+ ;; - blockDataSize: total size of block data in bytes
225
+ ;; - blockIndex: block index value to store
226
+ ;; - unitsOutputPtr: pointer to units output array
227
+ ;; - bufferIndicesOutputPtr: pointer to buffer indices output array
228
+ ;; - blocksOutputPtr: pointer to blocks output array
229
+ ;; - startOffset: starting write position in output arrays
230
+ ;; Returns: number of units written
231
+ (func (export "populate_units_from_block")
232
+ (param $blockDataPtr i64)
233
+ (param $blockDataSize i32)
234
+ (param $blockIndex i32)
235
+ (param $unitsOutputPtr i64)
236
+ (param $bufferIndicesOutputPtr i64)
237
+ (param $blocksOutputPtr i64)
238
+ (param $startOffset i32)
239
+ (result i32)
240
+
241
+ (local $bufferCount i32)
242
+ (local $bufferIndex i32)
243
+ (local $bufferLength i32)
244
+ (local $bufferDataStart i32)
245
+ (local $bufferOffset i32)
246
+ (local $totalUnitsWritten i32)
247
+ (local $i i32)
248
+ (local $writePos i32)
249
+ (local $unit i32)
250
+ (local $readAddr i64)
251
+ (local $writeAddr i64)
252
+
253
+ (local.set $totalUnitsWritten (i32.const 0))
254
+
255
+ ;; Read buffer count (first 4 bytes)
256
+ (local.set $bufferCount (i32.load align=0 (local.get $blockDataPtr)))
257
+
258
+ ;; Calculate where buffer data starts (after count + all lengths)
259
+ (local.set $bufferDataStart (i32.add (i32.const 4) (i32.mul (local.get $bufferCount) (i32.const 4))))
260
+ (local.set $bufferOffset (local.get $bufferDataStart))
261
+
262
+ ;; Loop through all buffers
263
+ (local.set $bufferIndex (i32.const 0))
264
+ (block $break_buffers
265
+ (loop $continue_buffers
266
+ (br_if $break_buffers (i32.ge_u (local.get $bufferIndex) (local.get $bufferCount)))
267
+
268
+ ;; Read buffer length from header (offset: 4 + bufferIndex * 4)
269
+ (local.set $bufferLength
270
+ (i32.load align=0 (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (i32.const 4) (i32.mul (local.get $bufferIndex) (i32.const 4)))))))
271
+
272
+ ;; Process this buffer - we need at least 4 bytes to create a unit
273
+ (if (i32.ge_u (local.get $bufferLength) (i32.const 4))
274
+ (then
275
+ (local.set $i (i32.const 0))
276
+
277
+ ;; Loop through buffer positions
278
+ (block $break_buffer
279
+ (loop $continue_buffer
280
+ ;; Check if we can read 4 more bytes
281
+ (br_if $break_buffer (i32.gt_u (local.get $i) (i32.sub (local.get $bufferLength) (i32.const 4))))
282
+
283
+ ;; Calculate read address
284
+ (local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
285
+
286
+ ;; Read unit as little-endian u32
287
+ (local.set $unit (i32.load align=0 (local.get $readAddr)))
288
+
289
+ ;; Skip if unit is 0
290
+ (if (i32.eqz (local.get $unit))
291
+ (then
292
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
293
+ (br $continue_buffer)))
294
+
295
+ ;; Calculate write position and address
296
+ (local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
297
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
298
+
299
+ ;; Write to output arrays
300
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
301
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
302
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
303
+
304
+ ;; Increment counters
305
+ (local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 1)))
306
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
307
+
308
+ (br $continue_buffer)))))
309
+
310
+ ;; Move to next buffer
311
+ (local.set $bufferOffset (i32.add (local.get $bufferOffset) (local.get $bufferLength)))
312
+ (local.set $bufferIndex (i32.add (local.get $bufferIndex) (i32.const 1)))
313
+
314
+ (br $continue_buffers)))
315
+
316
+ (local.get $totalUnitsWritten))
317
+
318
+ ;; Populate Units From Buffer: Extract units from a buffer into three parallel arrays
319
+ ;; Parameters:
320
+ ;; - bufferPtr: pointer to source buffer bytes
321
+ ;; - bufferLength: length of source buffer in bytes
322
+ ;; - bufferIndex: buffer index value to store
323
+ ;; - block: block value to store
324
+ ;; - unitsOutputPtr: pointer to units output array
325
+ ;; - bufferIndicesOutputPtr: pointer to buffer indices output array
326
+ ;; - blocksOutputPtr: pointer to blocks output array
327
+ ;; - startOffset: starting write position in output arrays
328
+ ;; Returns: number of units written
329
+ (func (export "populate_units_from_buffer")
330
+ (param $bufferPtr i64)
331
+ (param $bufferLength i32)
332
+ (param $bufferIndex i32)
333
+ (param $block i32)
334
+ (param $unitsOutputPtr i64)
335
+ (param $bufferIndicesOutputPtr i64)
336
+ (param $blocksOutputPtr i64)
337
+ (param $startOffset i32)
338
+ (result i32)
339
+
340
+ (local $i i32)
341
+ (local $mainLoopEnd i32)
342
+ (local $count i32)
343
+ (local $unit i32)
344
+ (local $readAddr i64)
345
+ (local $writeAddr i64)
346
+
347
+ ;; Temps for unrolled loop
348
+ (local $unit0 i32)
349
+ (local $unit1 i32)
350
+ (local $unit2 i32)
351
+ (local $unit3 i32)
352
+ (local $j i32)
353
+
354
+ ;; We need at least 4 bytes to create a unit
355
+ (if (i32.lt_u (local.get $bufferLength) (i32.const 4))
356
+ (then (return (i32.const 0))))
357
+
358
+ (local.set $count (i32.const 0))
359
+ (local.set $i (i32.const 0))
360
+
361
+ ;; Calculate main loop end for unrolled loop
362
+ (if (i32.ge_u (local.get $bufferLength) (i32.const 7))
363
+ (then
364
+ (local.set $mainLoopEnd (i32.sub (local.get $bufferLength) (i32.const 6))))
365
+ (else
366
+ (local.set $mainLoopEnd (i32.const 0))))
367
+
368
+ ;; Main unrolled loop - process 4 overlapping units per iteration
369
+ (block $break_main
370
+ (loop $continue_main
371
+ (br_if $break_main (i32.ge_u (local.get $i) (local.get $mainLoopEnd)))
372
+
373
+ ;; Calculate read address
374
+ (local.set $readAddr (i64.add (local.get $bufferPtr) (i64.extend_i32_u (local.get $i))))
375
+
376
+ ;; Load 4 overlapping units
377
+ (local.set $unit0 (i32.load align=0 (local.get $readAddr)))
378
+ (local.set $unit1 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 1))))
379
+ (local.set $unit2 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 2))))
380
+ (local.set $unit3 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 3))))
381
+
382
+ ;; Check if all units are non-zero (common case)
383
+ (if (i32.and (i32.and (local.get $unit0) (local.get $unit1))
384
+ (i32.and (local.get $unit2) (local.get $unit3)))
385
+ (then
386
+ ;; Fast path: all units are non-zero
387
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (i32.add (local.get $startOffset) (local.get $count)) (i32.const 4))))
388
+
389
+ ;; Write all units together
390
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit0))
391
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $unit1))
392
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $unit2))
393
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $unit3))
394
+
395
+ ;; Write all buffer indices together
396
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
397
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $bufferIndex))
398
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $bufferIndex))
399
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $bufferIndex))
400
+
401
+ ;; Write all blocks together
402
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $block))
403
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $block))
404
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $block))
405
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $block))
406
+
407
+ ;; Increment count by 4
408
+ (local.set $count (i32.add (local.get $count) (i32.const 4))))
409
+ (else
410
+ ;; Slow path: at least one unit is zero (rare)
411
+ (local.set $j (i32.const 0))
412
+ (block $break_zeros
413
+ (loop $continue_zeros
414
+ (br_if $break_zeros (i32.ge_u (local.get $j) (i32.const 4)))
415
+
416
+ ;; Read unit at offset j
417
+ (local.set $unit (i32.load align=0 (i64.add (local.get $readAddr) (i64.extend_i32_u (local.get $j)))))
418
+
419
+ ;; Write if non-zero
420
+ (if (local.get $unit)
421
+ (then
422
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (i32.add (local.get $startOffset) (local.get $count)) (i32.const 4))))
423
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
424
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
425
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $block))
426
+ (local.set $count (i32.add (local.get $count) (i32.const 1)))))
427
+
428
+ (local.set $j (i32.add (local.get $j) (i32.const 1)))
429
+ (br $continue_zeros)))))
430
+
431
+ ;; Advance by 4 bytes
432
+ (local.set $i (i32.add (local.get $i) (i32.const 4)))
433
+ (br $continue_main)))
434
+
435
+ ;; Tail loop for remaining bytes
436
+ (block $break_tail
437
+ (loop $continue_tail
438
+ (br_if $break_tail (i32.gt_u (local.get $i) (i32.sub (local.get $bufferLength) (i32.const 4))))
439
+
440
+ ;; Read unit
441
+ (local.set $readAddr (i64.add (local.get $bufferPtr) (i64.extend_i32_u (local.get $i))))
442
+ (local.set $unit (i32.load align=0 (local.get $readAddr)))
443
+
444
+ ;; Skip if unit is 0
445
+ (if (i32.eqz (local.get $unit))
446
+ (then
447
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
448
+ (br $continue_tail)))
449
+
450
+ ;; Write unit
451
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (i32.add (local.get $startOffset) (local.get $count)) (i32.const 4))))
452
+ (i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
453
+ (i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
454
+ (i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $block))
455
+
456
+ (local.set $count (i32.add (local.get $count) (i32.const 1)))
457
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
458
+ (br $continue_tail)))
459
+
460
+ (local.get $count))
461
+
462
+ ;; Phase 1: Count & Deduplicate using hash table
463
+ ;; Parameters:
464
+ ;; - unitsPtr: pointer to units array
465
+ ;; - totalUnits: number of units
466
+ ;; - hashTablePtr: pointer to hash table [Unit, Count, Offset, ItemsWritten, ...]
467
+ ;; - hashTableCapacity: capacity of hash table
468
+ ;; - uniqueUnitListPtr: pointer to store unique units
469
+ ;; - maxUniqueCount: maximum fill threshold for unique units
470
+ ;; Returns: uniqueCount (or -1 if exceeded threshold)
471
+ (func (export "phase1_count_deduplicate")
472
+ (param $unitsPtr i64)
473
+ (param $totalUnits i32)
474
+ (param $hashTablePtr i64)
475
+ (param $hashTableCapacity i32)
476
+ (param $uniqueUnitListPtr i64)
477
+ (param $maxUniqueCount i32)
478
+ (result i32)
479
+
480
+ (local $i i32)
481
+ (local $unit i32)
482
+ (local $index i32)
483
+ (local $hashTableSize i32)
484
+ (local $uniqueCount i32)
485
+ (local $hashValue i32)
486
+ (local $unitAddr i64)
487
+
488
+ ;; hashTableSize = hashTableCapacity * 4 * 4 (4 entries per slot, 4 bytes per entry)
489
+ (local.set $hashTableSize
490
+ (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
491
+
492
+ (local.set $uniqueCount (i32.const 0))
493
+ (local.set $i (i32.const 0))
494
+ (local.set $unitAddr (local.get $unitsPtr))
495
+
496
+ ;; Loop through all units
497
+ (block $break
498
+ (loop $continue
499
+ ;; Check if we've processed all units
500
+ (br_if $break (i32.ge_u (local.get $i) (local.get $totalUnits)))
501
+
502
+ ;; Read unit from unitsArray[i]
503
+ (local.set $unit (i32.load (local.get $unitAddr)))
504
+ (local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 4)))
505
+
506
+ ;; Calculate hash index using helper
507
+ (local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
508
+
509
+ ;; Linear probing
510
+ (block $probe_break
511
+ (loop $probe_continue
512
+ ;; Read value at hashTable[index]
513
+ (local.set $hashValue
514
+ (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
515
+
516
+ ;; Check if empty slot (value == 0)
517
+ (if (i32.eqz (local.get $hashValue))
518
+ (then
519
+ ;; Empty slot - store unit
520
+ (i32.store
521
+ (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))
522
+ (local.get $unit))
523
+
524
+ ;; Store count = 1 at hashTable[index + 4]
525
+ (i32.store
526
+ (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4))))
527
+ (i32.const 1))
528
+
529
+ ;; Add to unique unit list
530
+ (i32.store
531
+ (i64.add (local.get $uniqueUnitListPtr)
532
+ (i64.extend_i32_u (i32.mul (local.get $uniqueCount) (i32.const 4))))
533
+ (local.get $unit))
534
+
535
+ ;; Increment unique count
536
+ (local.set $uniqueCount (i32.add (local.get $uniqueCount) (i32.const 1)))
537
+
538
+ ;; Check if exceeded threshold
539
+ (if (i32.gt_u (local.get $uniqueCount) (local.get $maxUniqueCount))
540
+ (then
541
+ (return (i32.const -1))))
542
+
543
+ (br $probe_break)))
544
+
545
+ ;; Check if matching unit
546
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
547
+ (then
548
+ ;; Increment count at hashTable[index + 4]
549
+ (i32.store
550
+ (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4))))
551
+ (i32.add
552
+ (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4)))))
553
+ (i32.const 1)))
554
+ (br $probe_break)))
555
+
556
+ ;; Collision - probe next slot
557
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
558
+
559
+ (br $probe_continue)))
560
+
561
+ ;; Next unit
562
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
563
+ (br $continue)))
564
+
565
+ ;; Return unique count
566
+ (local.get $uniqueCount))
567
+
568
+ ;; Phase 3: Calculate offsets
569
+ ;; Parameters:
570
+ ;; - uniqueUnitListPtr: pointer to sorted unique units
571
+ ;; - uniqueCount: number of unique units
572
+ ;; - hashTablePtr: pointer to hash table
573
+ ;; - hashTableCapacity: capacity of hash table
574
+ ;; - maxPositionsPerUnit: max positions to store per unit (typically 1000)
575
+ ;; Returns: total offset (size of output arrays)
576
+ (func (export "phase3_calc_offsets")
577
+ (param $uniqueUnitListPtr i64)
578
+ (param $uniqueCount i32)
579
+ (param $hashTablePtr i64)
580
+ (param $hashTableCapacity i32)
581
+ (param $maxPositionsPerUnit i32)
582
+ (result i32)
583
+
584
+ (local $i i32)
585
+ (local $unit i32)
586
+ (local $index i32)
587
+ (local $hashTableSize i32)
588
+ (local $globalOffset i32)
589
+ (local $totalCount i32)
590
+ (local $effectiveCount i32)
591
+ (local $hashValue i32)
592
+
593
+ (local.set $hashTableSize
594
+ (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
595
+
596
+ (local.set $globalOffset (i32.const 0))
597
+ (local.set $i (i32.const 0))
598
+
599
+ (block $break
600
+ (loop $continue
601
+ (br_if $break (i32.ge_u (local.get $i) (local.get $uniqueCount)))
602
+
603
+ ;; Read unit from uniqueUnitList[i]
604
+ (local.set $unit
605
+ (i32.load (i64.add (local.get $uniqueUnitListPtr)
606
+ (i64.extend_i32_u (i32.mul (local.get $i) (i32.const 4))))))
607
+
608
+ ;; Find unit in hash table using helper
609
+ (local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
610
+
611
+ ;; Linear probing to find the unit
612
+ (block $probe_break
613
+ (loop $probe_continue
614
+ (local.set $hashValue
615
+ (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
616
+
617
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
618
+ (then (br $probe_break)))
619
+
620
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
621
+
622
+ (br $probe_continue)))
623
+
624
+ ;; Read totalCount from hashTable[index + 4]
625
+ (local.set $totalCount
626
+ (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4))))))
627
+
628
+ ;; effectiveCount = min(totalCount, maxPositionsPerUnit)
629
+ (local.set $effectiveCount (local.get $totalCount))
630
+ (if (i32.gt_u (local.get $effectiveCount) (local.get $maxPositionsPerUnit))
631
+ (then (local.set $effectiveCount (local.get $maxPositionsPerUnit))))
632
+
633
+ ;; Store offset at hashTable[index + 8]
634
+ (i32.store
635
+ (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 8))))
636
+ (local.get $globalOffset))
637
+
638
+ ;; Increment global offset
639
+ (local.set $globalOffset (i32.add (local.get $globalOffset) (local.get $effectiveCount)))
640
+
641
+ ;; Next unit
642
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
643
+ (br $continue)))
644
+
645
+ (local.get $globalOffset))
646
+
647
+ ;; Phase 4: Fill & Filter
648
+ ;; Parameters:
649
+ ;; - unitsPtr: pointer to units array
650
+ ;; - bufferIndicesPtr: pointer to buffer indices array
651
+ ;; - blocksPtr: pointer to blocks array
652
+ ;; - totalUnits: number of units
653
+ ;; - hashTablePtr: pointer to hash table
654
+ ;; - hashTableCapacity: capacity of hash table
655
+ ;; - maxPositionsPerUnit: max positions per unit
656
+ ;; - filteredUnitsPtr: output array for units
657
+ ;; - filteredBufferIndicesPtr: output array for buffer indices
658
+ ;; - filteredBlocksPtr: output array for blocks
659
+ ;; Returns: void
660
+ (func (export "phase4_fill_filter")
661
+ (param $unitsPtr i64)
662
+ (param $bufferIndicesPtr i64)
663
+ (param $blocksPtr i64)
664
+ (param $totalUnits i32)
665
+ (param $hashTablePtr i64)
666
+ (param $hashTableCapacity i32)
667
+ (param $maxPositionsPerUnit i32)
668
+ (param $filteredUnitsPtr i64)
669
+ (param $filteredBufferIndicesPtr i64)
670
+ (param $filteredBlocksPtr i64)
671
+
672
+ (local $i i32)
673
+ (local $unit i32)
674
+ (local $bufferIndex i32)
675
+ (local $block i32)
676
+ (local $index i32)
677
+ (local $hashTableSize i32)
678
+ (local $itemsWritten i32)
679
+ (local $offset i32)
680
+ (local $writePos i32)
681
+ (local $hashValue i32)
682
+ (local $unitAddr i64)
683
+ (local $bufferIndexAddr i64)
684
+ (local $blockAddr i64)
685
+ (local $hashAddr i64)
686
+ (local $writeAddr i64)
687
+
688
+ (local.set $hashTableSize
689
+ (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
690
+
691
+ (local.set $i (i32.const 0))
692
+ (local.set $unitAddr (local.get $unitsPtr))
693
+ (local.set $bufferIndexAddr (local.get $bufferIndicesPtr))
694
+ (local.set $blockAddr (local.get $blocksPtr))
695
+
696
+ (block $break
697
+ (loop $continue
698
+ (br_if $break (i32.ge_u (local.get $i) (local.get $totalUnits)))
699
+
700
+ ;; Read unit, bufferIndex, block from separate arrays
701
+ (local.set $unit (i32.load (local.get $unitAddr)))
702
+ (local.set $bufferIndex (i32.load (local.get $bufferIndexAddr)))
703
+ (local.set $block (i32.load (local.get $blockAddr)))
704
+
705
+ ;; Advance to next position (4 bytes each)
706
+ (local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 4)))
707
+ (local.set $bufferIndexAddr (i64.add (local.get $bufferIndexAddr) (i64.const 4)))
708
+ (local.set $blockAddr (i64.add (local.get $blockAddr) (i64.const 4)))
709
+
710
+ ;; Find unit in hash table using helper
711
+ (local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
712
+
713
+ ;; Linear probing to find matching unit
714
+ (local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
715
+ (block $probe_break
716
+ (loop $probe_continue
717
+ (local.set $hashValue (i32.load (local.get $hashAddr)))
718
+
719
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
720
+ (then (br $probe_break)))
721
+
722
+ ;; Probe next using helper
723
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
724
+ (local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
725
+ (br $probe_continue)))
726
+
727
+ ;; Read itemsWritten from hashTable[index + 12] using hashAddr
728
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
729
+
730
+ ;; Skip if already at max
731
+ (if (i32.ge_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
732
+ (then
733
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
734
+ (br $continue)))
735
+
736
+ ;; Read offset from hashTable[index + 8]
737
+ (local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
738
+
739
+ ;; Calculate write position
740
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
741
+
742
+ ;; Write to output arrays - use precomputed addresses
743
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
744
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
745
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
746
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
747
+
748
+ ;; Increment itemsWritten at hashTable[index + 12]
749
+ (i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))
750
+
751
+ ;; Next unit
752
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
753
+ (br $continue)))
754
+ )
755
+
756
+ ;; Phase 4 Fill Filter Vector: Fill with collision detection (SIMD vectorized)
757
+ ;; Parameters:
758
+ ;; - unitsPtr: pointer to units array
759
+ ;; - bufferIndicesPtr: pointer to buffer indices array
760
+ ;; - blocksPtr: pointer to blocks array
761
+ ;; - totalUnits: number of units
762
+ ;; - hashTablePtr: pointer to hash table
763
+ ;; - hashTableCapacity: capacity of hash table
764
+ ;; - maxPositionsPerUnit: max positions per unit
765
+ ;; - filteredUnitsPtr: output array for units
766
+ ;; - filteredBufferIndicesPtr: output array for buffer indices
767
+ ;; - filteredBlocksPtr: output array for blocks
768
+ ;; Returns: number of times slow path was taken (as f64)
769
+ (func (export "phase4_fill_filter_vector")
770
+ (param $unitsPtr i64)
771
+ (param $bufferIndicesPtr i64)
772
+ (param $blocksPtr i64)
773
+ (param $totalUnits i32)
774
+ (param $hashTablePtr i64)
775
+ (param $hashTableCapacity i32)
776
+ (param $maxPositionsPerUnit i32)
777
+ (param $filteredUnitsPtr i64)
778
+ (param $filteredBufferIndicesPtr i64)
779
+ (param $filteredBlocksPtr i64)
780
+ (result f64)
781
+
782
+ (local $i i32)
783
+ (local $slowPathCount f64)
784
+ (local $unitAddr i64)
785
+ (local $bufferIndexAddr i64)
786
+ (local $blockAddr i64)
787
+ (local $mainLoopEnd i32)
788
+ (local $hashTableSize i32)
789
+
790
+ ;; SIMD vectors
791
+ (local $unitsVec v128)
792
+ (local $bufferIndicesVec v128)
793
+ (local $blocksVec v128)
794
+ (local $hashesVec v128)
795
+ (local $indicesVec v128)
796
+
797
+ ;; SIMD constants
798
+ (local $hashConstVec v128)
799
+ (local $mult16Vec v128)
800
+ (local $shiftAmount i32)
801
+
802
+ ;; Scalar temps for collision checking
803
+ (local $idx0 i32)
804
+ (local $idx1 i32)
805
+ (local $idx2 i32)
806
+ (local $idx3 i32)
807
+ (local $unit0 i32)
808
+ (local $unit1 i32)
809
+ (local $unit2 i32)
810
+ (local $unit3 i32)
811
+ (local $bufferIndex0 i32)
812
+ (local $bufferIndex1 i32)
813
+ (local $bufferIndex2 i32)
814
+ (local $bufferIndex3 i32)
815
+ (local $block0 i32)
816
+ (local $block1 i32)
817
+ (local $block2 i32)
818
+ (local $block3 i32)
819
+ (local $hash0 i32)
820
+ (local $hash1 i32)
821
+ (local $hash2 i32)
822
+ (local $hash3 i32)
823
+ (local $allMatch i32)
824
+
825
+ ;; Temps for slow path processing
826
+ (local $unit i32)
827
+ (local $bufferIndex i32)
828
+ (local $block i32)
829
+ (local $index i32)
830
+ (local $hashValue i32)
831
+ (local $itemsWritten i32)
832
+ (local $offset i32)
833
+ (local $writePos i32)
834
+ (local $writeAddr i64)
835
+ (local $hashAddr i64)
836
+
837
+ (local.set $i (i32.const 0))
838
+ (local.set $unitAddr (local.get $unitsPtr))
839
+ (local.set $bufferIndexAddr (local.get $bufferIndicesPtr))
840
+ (local.set $blockAddr (local.get $blocksPtr))
841
+ (local.set $slowPathCount (f64.const 0))
842
+
843
+ ;; Calculate hash table size
844
+ (local.set $hashTableSize (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
845
+
846
+ ;; Initialize SIMD constants
847
+ (local.set $hashConstVec (i32x4.splat (i32.const 2654435761)))
848
+ (local.set $mult16Vec (i32x4.splat (i32.const 16)))
849
+
850
+ ;; Calculate optimal shift amount for Fibonacci hashing
851
+ ;; shift_amount = 32 - log2(capacity) = 32 - (31 - clz(capacity)) = 1 + clz(capacity)
852
+ (local.set $shiftAmount (i32.add (i32.clz (local.get $hashTableCapacity)) (i32.const 1)))
853
+
854
+ ;; Calculate main loop end (rounded down to multiple of 4)
855
+ (local.set $mainLoopEnd (i32.sub (local.get $totalUnits) (i32.rem_u (local.get $totalUnits) (i32.const 4))))
856
+
857
+ ;; Main SIMD loop - process 4 units per iteration
858
+ (block $break_main
859
+ (loop $continue_main
860
+ (br_if $break_main (i32.ge_u (local.get $i) (local.get $mainLoopEnd)))
861
+
862
+ ;; Load 4 units, buffer indices, and blocks at once using SIMD
863
+ (local.set $unitsVec (v128.load (local.get $unitAddr)))
864
+ (local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 16)))
865
+
866
+ (local.set $bufferIndicesVec (v128.load (local.get $bufferIndexAddr)))
867
+ (local.set $bufferIndexAddr (i64.add (local.get $bufferIndexAddr) (i64.const 16)))
868
+
869
+ (local.set $blocksVec (v128.load (local.get $blockAddr)))
870
+ (local.set $blockAddr (i64.add (local.get $blockAddr) (i64.const 16)))
871
+
872
+ ;; Hash all 4 units in parallel: hash = unit * 2654435761
873
+ (local.set $hashesVec (i32x4.mul (local.get $unitsVec) (local.get $hashConstVec)))
874
+
875
+ ;; Fibonacci hashing: shift by (32 - log2(capacity))
876
+ ;; This automatically masks to the correct number of bits without needing AND
877
+ (local.set $hashesVec (i32x4.shr_u (local.get $hashesVec) (local.get $shiftAmount)))
878
+
879
+ ;; Multiply by 16 - vectorized (each slot is 16 bytes: 4 u32s)
880
+ (local.set $indicesVec (i32x4.mul (local.get $hashesVec) (local.get $mult16Vec)))
881
+
882
+ ;; Extract all 4 indices for collision checking
883
+ (local.set $idx0 (i32x4.extract_lane 0 (local.get $indicesVec)))
884
+ (local.set $idx1 (i32x4.extract_lane 1 (local.get $indicesVec)))
885
+ (local.set $idx2 (i32x4.extract_lane 2 (local.get $indicesVec)))
886
+ (local.set $idx3 (i32x4.extract_lane 3 (local.get $indicesVec)))
887
+
888
+ ;; Extract all 4 units for collision checking
889
+ (local.set $unit0 (i32x4.extract_lane 0 (local.get $unitsVec)))
890
+ (local.set $unit1 (i32x4.extract_lane 1 (local.get $unitsVec)))
891
+ (local.set $unit2 (i32x4.extract_lane 2 (local.get $unitsVec)))
892
+ (local.set $unit3 (i32x4.extract_lane 3 (local.get $unitsVec)))
893
+
894
+ ;; Extract buffer indices and blocks using SIMD
895
+ (local.set $bufferIndex0 (i32x4.extract_lane 0 (local.get $bufferIndicesVec)))
896
+ (local.set $bufferIndex1 (i32x4.extract_lane 1 (local.get $bufferIndicesVec)))
897
+ (local.set $bufferIndex2 (i32x4.extract_lane 2 (local.get $bufferIndicesVec)))
898
+ (local.set $bufferIndex3 (i32x4.extract_lane 3 (local.get $bufferIndicesVec)))
899
+
900
+ (local.set $block0 (i32x4.extract_lane 0 (local.get $blocksVec)))
901
+ (local.set $block1 (i32x4.extract_lane 1 (local.get $blocksVec)))
902
+ (local.set $block2 (i32x4.extract_lane 2 (local.get $blocksVec)))
903
+ (local.set $block3 (i32x4.extract_lane 3 (local.get $blocksVec)))
904
+
905
+ ;; Load hash values at initial indices to check for collisions
906
+ (local.set $hash0 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx0)))))
907
+ (local.set $hash1 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx1)))))
908
+ (local.set $hash2 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx2)))))
909
+ (local.set $hash3 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx3)))))
910
+
911
+ ;; Check if all 4 match (no collisions on first probe)
912
+ (local.set $allMatch
913
+ (i32.and
914
+ (i32.and
915
+ (i32.eq (local.get $hash0) (local.get $unit0))
916
+ (i32.eq (local.get $hash1) (local.get $unit1)))
917
+ (i32.and
918
+ (i32.eq (local.get $hash2) (local.get $unit2))
919
+ (i32.eq (local.get $hash3) (local.get $unit3)))))
920
+
921
+ (if (local.get $allMatch)
922
+ (then
923
+ ;; Fast path - no collisions, process all 4 units inline
924
+ ;; Process unit 0
925
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx0) (i32.const 12))))))
926
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
927
+ (then
928
+ (local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx0) (i32.const 8))))))
929
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
930
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
931
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit0))
932
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex0))
933
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block0))
934
+ (i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx0) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1)))))
935
+
936
+ ;; Process unit 1
937
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx1) (i32.const 12))))))
938
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
939
+ (then
940
+ (local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx1) (i32.const 8))))))
941
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
942
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
943
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit1))
944
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex1))
945
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block1))
946
+ (i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx1) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1)))))
947
+
948
+ ;; Process unit 2
949
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx2) (i32.const 12))))))
950
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
951
+ (then
952
+ (local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx2) (i32.const 8))))))
953
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
954
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
955
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit2))
956
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex2))
957
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block2))
958
+ (i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx2) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1)))))
959
+
960
+ ;; Process unit 3
961
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx3) (i32.const 12))))))
962
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
963
+ (then
964
+ (local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx3) (i32.const 8))))))
965
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
966
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
967
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit3))
968
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex3))
969
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block3))
970
+ (i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx3) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1))))))
971
+ (else
972
+ ;; Slow path - at least one collision, process each unit with linear probing
973
+ (local.set $slowPathCount (f64.add (local.get $slowPathCount) (f64.const 1)))
974
+
975
+ ;; Process unit 0
976
+ (local.set $unit (local.get $unit0))
977
+ (local.set $bufferIndex (local.get $bufferIndex0))
978
+ (local.set $block (local.get $block0))
979
+ (local.set $index (local.get $idx0))
980
+ (block $probe_break0
981
+ (loop $probe_continue0
982
+ (local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
983
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
984
+ (then (br $probe_break0)))
985
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
986
+ (br $probe_continue0)))
987
+ (local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
988
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
989
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
990
+ (then
991
+ (local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
992
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
993
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
994
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
995
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
996
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
997
+ (i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))
998
+
999
+ ;; Process unit 1
1000
+ (local.set $unit (local.get $unit1))
1001
+ (local.set $bufferIndex (local.get $bufferIndex1))
1002
+ (local.set $block (local.get $block1))
1003
+ (local.set $index (local.get $idx1))
1004
+ (block $probe_break1
1005
+ (loop $probe_continue1
1006
+ (local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
1007
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
1008
+ (then (br $probe_break1)))
1009
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
1010
+ (br $probe_continue1)))
1011
+ (local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
1012
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
1013
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
1014
+ (then
1015
+ (local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
1016
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
1017
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
1018
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
1019
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
1020
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
1021
+ (i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))
1022
+
1023
+ ;; Process unit 2
1024
+ (local.set $unit (local.get $unit2))
1025
+ (local.set $bufferIndex (local.get $bufferIndex2))
1026
+ (local.set $block (local.get $block2))
1027
+ (local.set $index (local.get $idx2))
1028
+ (block $probe_break2
1029
+ (loop $probe_continue2
1030
+ (local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
1031
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
1032
+ (then (br $probe_break2)))
1033
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
1034
+ (br $probe_continue2)))
1035
+ (local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
1036
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
1037
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
1038
+ (then
1039
+ (local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
1040
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
1041
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
1042
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
1043
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
1044
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
1045
+ (i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))
1046
+
1047
+ ;; Process unit 3
1048
+ (local.set $unit (local.get $unit3))
1049
+ (local.set $bufferIndex (local.get $bufferIndex3))
1050
+ (local.set $block (local.get $block3))
1051
+ (local.set $index (local.get $idx3))
1052
+ (block $probe_break3
1053
+ (loop $probe_continue3
1054
+ (local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
1055
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
1056
+ (then (br $probe_break3)))
1057
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
1058
+ (br $probe_continue3)))
1059
+ (local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
1060
+ (local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
1061
+ (if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
1062
+ (then
1063
+ (local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
1064
+ (local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
1065
+ (local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
1066
+ (i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
1067
+ (i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
1068
+ (i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
1069
+ (i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))))
1070
+
1071
+ ;; Increment by 4
1072
+ (local.set $i (i32.add (local.get $i) (i32.const 4)))
1073
+ (br $continue_main)))
1074
+
1075
+ ;; Return slow path count
1076
+ (local.get $slowPathCount))
1077
+
1078
+ ;; Count Collisions: Calculate collision count for phase1
1079
+ ;; Parameters:
1080
+ ;; - unitsPtr: pointer to units array
1081
+ ;; - totalUnits: number of units
1082
+ ;; - hashTablePtr: pointer to hash table
1083
+ ;; - hashTableCapacity: capacity of hash table
1084
+ ;; Returns: total collision count
1085
+ (func (export "count_collisions")
1086
+ (param $unitsPtr i64)
1087
+ (param $totalUnits i32)
1088
+ (param $hashTablePtr i64)
1089
+ (param $hashTableCapacity i32)
1090
+ (result i32)
1091
+
1092
+ (local $i i32)
1093
+ (local $unit i32)
1094
+ (local $index i32)
1095
+ (local $hashTableSize i32)
1096
+ (local $hashValue i32)
1097
+ (local $unitAddr i64)
1098
+ (local $collisions i32)
1099
+ (local $probeCount i32)
1100
+
1101
+ (local.set $hashTableSize (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
1102
+ (local.set $collisions (i32.const 0))
1103
+ (local.set $i (i32.const 0))
1104
+ (local.set $unitAddr (local.get $unitsPtr))
1105
+
1106
+ ;; Loop through all units
1107
+ (block $break
1108
+ (loop $continue
1109
+ (br_if $break (i32.ge_u (local.get $i) (local.get $totalUnits)))
1110
+
1111
+ ;; Read unit
1112
+ (local.set $unit (i32.load (local.get $unitAddr)))
1113
+ (local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 4)))
1114
+
1115
+ ;; Calculate hash index
1116
+ (local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
1117
+ (local.set $probeCount (i32.const 0))
1118
+
1119
+ ;; Linear probing - count collisions
1120
+ (block $probe_break
1121
+ (loop $probe_continue
1122
+ (local.set $hashValue
1123
+ (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
1124
+
1125
+ ;; Check if empty slot or matching unit (we're done)
1126
+ (if (i32.eqz (local.get $hashValue))
1127
+ (then (br $probe_break)))
1128
+
1129
+ (if (i32.eq (local.get $hashValue) (local.get $unit))
1130
+ (then (br $probe_break)))
1131
+
1132
+ ;; Collision - increment and probe next
1133
+ (local.set $probeCount (i32.add (local.get $probeCount) (i32.const 1)))
1134
+ (local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
1135
+ (br $probe_continue)))
1136
+
1137
+ ;; Add probe count to total collisions
1138
+ (local.set $collisions (i32.add (local.get $collisions) (local.get $probeCount)))
1139
+
1140
+ ;; Next unit
1141
+ (local.set $i (i32.add (local.get $i) (i32.const 1)))
1142
+ (br $continue)))
1143
+
1144
+ (local.get $collisions))
1145
+ )