querysub 0.356.0 → 0.358.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cursorrules +9 -0
- package/bin/movelogs.js +4 -0
- package/package.json +13 -6
- package/scripts/postinstall.js +23 -0
- package/src/-a-archives/archiveCache.ts +10 -12
- package/src/-a-archives/archives.ts +29 -0
- package/src/-a-archives/archivesBackBlaze.ts +60 -12
- package/src/-a-archives/archivesDisk.ts +39 -13
- package/src/-a-archives/archivesLimitedCache.ts +21 -0
- package/src/-a-archives/archivesMemoryCache.ts +374 -0
- package/src/-a-archives/archivesPrivateFileSystem.ts +22 -0
- package/src/-g-core-values/NodeCapabilities.ts +3 -0
- package/src/0-path-value-core/auditLogs.ts +5 -1
- package/src/0-path-value-core/pathValueCore.ts +7 -7
- package/src/4-dom/qreact.tsx +1 -0
- package/src/4-querysub/Querysub.ts +1 -5
- package/src/config.ts +5 -0
- package/src/deployManager/components/MachineDetailPage.tsx +43 -2
- package/src/deployManager/components/MachinesListPage.tsx +10 -2
- package/src/deployManager/machineApplyMainCode.ts +3 -3
- package/src/deployManager/machineSchema.ts +39 -0
- package/src/diagnostics/MachineThreadInfo.tsx +235 -0
- package/src/diagnostics/NodeViewer.tsx +5 -3
- package/src/diagnostics/logs/FastArchiveAppendable.ts +79 -42
- package/src/diagnostics/logs/FastArchiveController.ts +102 -63
- package/src/diagnostics/logs/FastArchiveViewer.tsx +36 -8
- package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +462 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexCPP.cpp +327 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexCPP.d.ts +18 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexCPP.js +1 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexHelpers.ts +222 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexLogsOptimizationConstants.ts +22 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexWAT.wat +1145 -0
- package/src/diagnostics/logs/IndexedLogs/BufferIndexWAT.wat.d.ts +178 -0
- package/src/diagnostics/logs/IndexedLogs/BufferListStreamer.ts +208 -0
- package/src/diagnostics/logs/IndexedLogs/BufferUnitIndex.ts +716 -0
- package/src/diagnostics/logs/IndexedLogs/BufferUnitSet.ts +146 -0
- package/src/diagnostics/logs/IndexedLogs/FilePathSelector.tsx +569 -0
- package/src/diagnostics/logs/IndexedLogs/FindProgressTracker.ts +45 -0
- package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +685 -0
- package/src/diagnostics/logs/IndexedLogs/LogStreamer.ts +47 -0
- package/src/diagnostics/logs/IndexedLogs/LogViewer3.tsx +901 -0
- package/src/diagnostics/logs/IndexedLogs/TimeFileTree.ts +236 -0
- package/src/diagnostics/logs/IndexedLogs/binding.gyp +23 -0
- package/src/diagnostics/logs/IndexedLogs/moveIndexLogsToPublic.ts +251 -0
- package/src/diagnostics/logs/IndexedLogs/moveLogsEntry.ts +10 -0
- package/src/diagnostics/logs/LogViewer2.tsx +120 -55
- package/src/diagnostics/logs/TimeRangeSelector.tsx +5 -2
- package/src/diagnostics/logs/diskLogger.ts +32 -48
- package/src/diagnostics/logs/errorNotifications/ErrorNotificationController.ts +3 -2
- package/src/diagnostics/logs/errorNotifications/errorDigests.tsx +1 -0
- package/src/diagnostics/logs/errorNotifications2/errorNotifications2.ts +0 -0
- package/src/diagnostics/logs/lifeCycleAnalysis/LifeCyclePages.tsx +150 -0
- package/src/diagnostics/logs/lifeCycleAnalysis/lifeCycles.tsx +150 -15
- package/src/diagnostics/logs/lifeCycleAnalysis/test.ts +0 -0
- package/src/diagnostics/logs/lifeCycleAnalysis/test.wat +106 -0
- package/src/diagnostics/logs/lifeCycleAnalysis/test.wat.d.ts +2 -0
- package/src/diagnostics/logs/lifeCycleAnalysis/testHoist.ts +5 -0
- package/src/diagnostics/logs/logViewerExtractField.ts +2 -3
- package/src/diagnostics/managementPages.tsx +10 -0
- package/src/diagnostics/trackResources.ts +1 -1
- package/src/functional/limitProcessing.ts +39 -0
- package/src/misc/lz4_wasm_nodejs.d.ts +34 -0
- package/src/misc/lz4_wasm_nodejs.js +178 -0
- package/src/misc/lz4_wasm_nodejs_bg.js +94 -0
- package/src/misc/lz4_wasm_nodejs_bg.wasm +0 -0
- package/src/misc/lz4_wasm_nodejs_bg.wasm.d.ts +15 -0
- package/src/storage/CompressedStream.ts +13 -0
- package/src/storage/LZ4.ts +32 -0
- package/src/storage/ZSTD.ts +10 -0
- package/src/wat/watCompiler.ts +1716 -0
- package/src/wat/watGrammar.pegjs +93 -0
- package/src/wat/watHandler.ts +179 -0
- package/src/wat/watInstructions.txt +707 -0
- package/src/zip.ts +3 -89
- package/src/diagnostics/logs/lifeCycleAnalysis/spec.md +0 -125
|
@@ -0,0 +1,1145 @@
|
|
|
1
|
+
(module
|
|
2
|
+
;; Import memory from JavaScript (with memory64 support for >4GB)
|
|
3
|
+
(import "env" "memory" (memory i64 1))
|
|
4
|
+
(global (export "__heap_base") i32 (i32.const 0))
|
|
5
|
+
|
|
6
|
+
;; Helper: Hash unit to index
|
|
7
|
+
;; Uses Fibonacci hashing for optimal distribution
|
|
8
|
+
;; hashTableCapacity MUST be power of 2
|
|
9
|
+
(func $hashUnitToIndex (param $unit i32) (param $hashTableCapacity i32) (result i32)
|
|
10
|
+
(local $hash i32)
|
|
11
|
+
(local $shiftAmount i32)
|
|
12
|
+
;; Multiply by 2654435761 (Knuth's golden ratio constant)
|
|
13
|
+
(local.set $hash (i32.mul (local.get $unit) (i32.const 2654435761)))
|
|
14
|
+
;; Calculate shift amount: 32 - log2(capacity) = 1 + clz(capacity)
|
|
15
|
+
(local.set $shiftAmount (i32.add (i32.clz (local.get $hashTableCapacity)) (i32.const 1)))
|
|
16
|
+
;; Fibonacci hashing: shift by calculated amount (auto-masks to correct bits)
|
|
17
|
+
;; Then multiply by 16 (each slot is 16 bytes: 4 u32s)
|
|
18
|
+
(i32.mul
|
|
19
|
+
(i32.shr_u (local.get $hash) (local.get $shiftAmount))
|
|
20
|
+
(i32.const 16)))
|
|
21
|
+
|
|
22
|
+
;; Helper: Get next probe index (linear probing with wraparound)
|
|
23
|
+
(func $getNextIndex (param $index i32) (param $hashTableSize i32) (result i32)
|
|
24
|
+
(local $nextIndex i32)
|
|
25
|
+
(local.set $nextIndex (i32.add (local.get $index) (i32.const 16)))
|
|
26
|
+
(if (i32.ge_u (local.get $nextIndex) (local.get $hashTableSize))
|
|
27
|
+
(then (local.set $nextIndex (i32.sub (local.get $nextIndex) (local.get $hashTableSize)))))
|
|
28
|
+
(local.get $nextIndex))
|
|
29
|
+
|
|
30
|
+
;; Populate Units From Block (Loop Unrolled): Extract units from a concatenated block of buffers
|
|
31
|
+
;; Block format: [count, length1, length2, ..., lengthN, buffer1_bytes, buffer2_bytes, ..., bufferN_bytes]
|
|
32
|
+
;; Uses loop unrolling to process 4 units at a time with optimized fast path for non-zero units
|
|
33
|
+
(func (export "populate_units_from_block_simd")
|
|
34
|
+
(param $blockDataPtr i64)
|
|
35
|
+
(param $blockDataSize i32)
|
|
36
|
+
(param $blockIndex i32)
|
|
37
|
+
(param $unitsOutputPtr i64)
|
|
38
|
+
(param $bufferIndicesOutputPtr i64)
|
|
39
|
+
(param $blocksOutputPtr i64)
|
|
40
|
+
(param $startOffset i32)
|
|
41
|
+
(result i32)
|
|
42
|
+
|
|
43
|
+
(local $bufferCount i32)
|
|
44
|
+
(local $bufferIndex i32)
|
|
45
|
+
(local $bufferLength i32)
|
|
46
|
+
(local $bufferDataStart i32)
|
|
47
|
+
(local $bufferOffset i32)
|
|
48
|
+
(local $totalUnitsWritten i32)
|
|
49
|
+
(local $i i32)
|
|
50
|
+
(local $mainLoopEnd i32)
|
|
51
|
+
(local $writePos i32)
|
|
52
|
+
(local $unit i32)
|
|
53
|
+
(local $readAddr i64)
|
|
54
|
+
(local $writeAddr i64)
|
|
55
|
+
|
|
56
|
+
;; Temps for unrolled loop
|
|
57
|
+
(local $unit0 i32)
|
|
58
|
+
(local $unit1 i32)
|
|
59
|
+
(local $unit2 i32)
|
|
60
|
+
(local $unit3 i32)
|
|
61
|
+
(local $j i32)
|
|
62
|
+
|
|
63
|
+
(local.set $totalUnitsWritten (i32.const 0))
|
|
64
|
+
|
|
65
|
+
;; Read buffer count (first 4 bytes)
|
|
66
|
+
(local.set $bufferCount (i32.load align=0 (local.get $blockDataPtr)))
|
|
67
|
+
|
|
68
|
+
;; Calculate where buffer data starts (after count + all lengths)
|
|
69
|
+
(local.set $bufferDataStart (i32.add (i32.const 4) (i32.mul (local.get $bufferCount) (i32.const 4))))
|
|
70
|
+
(local.set $bufferOffset (local.get $bufferDataStart))
|
|
71
|
+
|
|
72
|
+
;; Loop through all buffers
|
|
73
|
+
(local.set $bufferIndex (i32.const 0))
|
|
74
|
+
(block $break_buffers
|
|
75
|
+
(loop $continue_buffers
|
|
76
|
+
(br_if $break_buffers (i32.ge_u (local.get $bufferIndex) (local.get $bufferCount)))
|
|
77
|
+
|
|
78
|
+
;; Read buffer length from header
|
|
79
|
+
(local.set $bufferLength
|
|
80
|
+
(i32.load align=0 (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (i32.const 4) (i32.mul (local.get $bufferIndex) (i32.const 4)))))))
|
|
81
|
+
|
|
82
|
+
;; Process this buffer - we need at least 4 bytes to create a unit
|
|
83
|
+
(if (i32.ge_u (local.get $bufferLength) (i32.const 4))
|
|
84
|
+
(then
|
|
85
|
+
;; Calculate main loop end
|
|
86
|
+
;; We need at least 7 bytes to extract 4 overlapping units (positions i, i+1, i+2, i+3 need bytes [i, i+7))
|
|
87
|
+
;; mainLoopEnd is the last position where we can extract 4 units
|
|
88
|
+
(if (i32.ge_u (local.get $bufferLength) (i32.const 7))
|
|
89
|
+
(then
|
|
90
|
+
(local.set $mainLoopEnd (i32.sub (local.get $bufferLength) (i32.const 6)))) ;; i <= bufferLength-7, so i+6 < bufferLength
|
|
91
|
+
(else
|
|
92
|
+
(local.set $mainLoopEnd (i32.const 0)))) ;; Can't do SIMD, skip to tail
|
|
93
|
+
(local.set $i (i32.const 0))
|
|
94
|
+
|
|
95
|
+
;; Main unrolled loop - process 4 overlapping units per iteration
|
|
96
|
+
;; Optimized for the common case where all units are non-zero
|
|
97
|
+
(block $break_simd
|
|
98
|
+
(loop $continue_simd
|
|
99
|
+
(br_if $break_simd (i32.ge_u (local.get $i) (local.get $mainLoopEnd)))
|
|
100
|
+
|
|
101
|
+
;; Calculate read address - we need to read at least 7 bytes [i, i+7)
|
|
102
|
+
(local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
|
|
103
|
+
|
|
104
|
+
;; Load 4 overlapping units (unrolled, not SIMD)
|
|
105
|
+
;; Unit 0: bytes [i+0, i+1, i+2, i+3]
|
|
106
|
+
(local.set $unit0 (i32.load align=0 (local.get $readAddr)))
|
|
107
|
+
;; Unit 1: bytes [i+1, i+2, i+3, i+4]
|
|
108
|
+
(local.set $unit1 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 1))))
|
|
109
|
+
;; Unit 2: bytes [i+2, i+3, i+4, i+5]
|
|
110
|
+
(local.set $unit2 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 2))))
|
|
111
|
+
;; Unit 3: bytes [i+3, i+4, i+5, i+6]
|
|
112
|
+
(local.set $unit3 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 3))))
|
|
113
|
+
|
|
114
|
+
;; Check if all units are non-zero (common case)
|
|
115
|
+
(if (i32.and (i32.and (local.get $unit0) (local.get $unit1))
|
|
116
|
+
(i32.and (local.get $unit2) (local.get $unit3)))
|
|
117
|
+
(then
|
|
118
|
+
;; Fast path: all units are non-zero (common case)
|
|
119
|
+
;; Group similar operations for better instruction-level parallelism
|
|
120
|
+
(local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
|
|
121
|
+
|
|
122
|
+
;; Calculate base write address
|
|
123
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
124
|
+
|
|
125
|
+
;; Write all units together
|
|
126
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit0))
|
|
127
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $unit1))
|
|
128
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $unit2))
|
|
129
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $unit3))
|
|
130
|
+
|
|
131
|
+
;; Write all buffer indices together
|
|
132
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
133
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $bufferIndex))
|
|
134
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $bufferIndex))
|
|
135
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $bufferIndex))
|
|
136
|
+
|
|
137
|
+
;; Write all blocks together
|
|
138
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
|
|
139
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $blockIndex))
|
|
140
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $blockIndex))
|
|
141
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $blockIndex))
|
|
142
|
+
|
|
143
|
+
;; Increment total units written by 4
|
|
144
|
+
(local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 4))))
|
|
145
|
+
(else
|
|
146
|
+
;; Slow path: at least one unit is zero (rare), process with a loop
|
|
147
|
+
;; Re-read the 4 units and process one by one
|
|
148
|
+
(local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
|
|
149
|
+
(local.set $j (i32.const 0))
|
|
150
|
+
(block $break_zeros
|
|
151
|
+
(loop $continue_zeros
|
|
152
|
+
(br_if $break_zeros (i32.ge_u (local.get $j) (i32.const 4)))
|
|
153
|
+
|
|
154
|
+
;; Read unit at offset j
|
|
155
|
+
(local.set $unit (i32.load align=0 (i64.add (local.get $readAddr) (i64.extend_i32_u (local.get $j)))))
|
|
156
|
+
|
|
157
|
+
;; Write if non-zero
|
|
158
|
+
(if (local.get $unit)
|
|
159
|
+
(then
|
|
160
|
+
(local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
|
|
161
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
162
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
|
|
163
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
164
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
|
|
165
|
+
(local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 1)))))
|
|
166
|
+
|
|
167
|
+
;; Move to next position
|
|
168
|
+
(local.set $j (i32.add (local.get $j) (i32.const 1)))
|
|
169
|
+
(br $continue_zeros)))))
|
|
170
|
+
|
|
171
|
+
;; Advance by 4 bytes (processed 4 overlapping units at positions i, i+1, i+2, i+3)
|
|
172
|
+
(local.set $i (i32.add (local.get $i) (i32.const 4)))
|
|
173
|
+
(br $continue_simd)))
|
|
174
|
+
|
|
175
|
+
;; Handle remaining units (scalar tail loop)
|
|
176
|
+
;; Process positions from mainLoopEnd to bufferLength-4
|
|
177
|
+
(block $break_tail
|
|
178
|
+
(loop $continue_tail
|
|
179
|
+
;; Check if we can read 4 more bytes
|
|
180
|
+
(br_if $break_tail (i32.gt_u (local.get $i) (i32.sub (local.get $bufferLength) (i32.const 4))))
|
|
181
|
+
|
|
182
|
+
;; Calculate read address
|
|
183
|
+
(local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
|
|
184
|
+
|
|
185
|
+
;; Read unit as little-endian u32
|
|
186
|
+
(local.set $unit (i32.load align=0 (local.get $readAddr)))
|
|
187
|
+
|
|
188
|
+
;; Skip if unit is 0
|
|
189
|
+
(if (i32.eqz (local.get $unit))
|
|
190
|
+
(then
|
|
191
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
192
|
+
(br $continue_tail)))
|
|
193
|
+
|
|
194
|
+
;; Calculate write position and address
|
|
195
|
+
(local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
|
|
196
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
197
|
+
|
|
198
|
+
;; Write to output arrays
|
|
199
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
|
|
200
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
201
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
|
|
202
|
+
|
|
203
|
+
;; Increment counters - advance by 1 byte for overlapping units
|
|
204
|
+
(local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 1)))
|
|
205
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
206
|
+
|
|
207
|
+
(br $continue_tail)))))
|
|
208
|
+
|
|
209
|
+
;; Move to next buffer
|
|
210
|
+
(local.set $bufferOffset (i32.add (local.get $bufferOffset) (local.get $bufferLength)))
|
|
211
|
+
(local.set $bufferIndex (i32.add (local.get $bufferIndex) (i32.const 1)))
|
|
212
|
+
|
|
213
|
+
(br $continue_buffers)))
|
|
214
|
+
|
|
215
|
+
(local.get $totalUnitsWritten))
|
|
216
|
+
|
|
217
|
+
;; Populate Units From Block: Extract units from a concatenated block of buffers
|
|
218
|
+
;; Block format: [count, length1, length2, ..., lengthN, buffer1_bytes, buffer2_bytes, ..., bufferN_bytes]
|
|
219
|
+
;; - count: u32 (4 bytes) - number of buffers
|
|
220
|
+
;; - lengths: count * u32 (4 bytes each) - length of each buffer
|
|
221
|
+
;; - buffers: concatenated buffer data
|
|
222
|
+
;; Parameters:
|
|
223
|
+
;; - blockDataPtr: pointer to concatenated block data
|
|
224
|
+
;; - blockDataSize: total size of block data in bytes
|
|
225
|
+
;; - blockIndex: block index value to store
|
|
226
|
+
;; - unitsOutputPtr: pointer to units output array
|
|
227
|
+
;; - bufferIndicesOutputPtr: pointer to buffer indices output array
|
|
228
|
+
;; - blocksOutputPtr: pointer to blocks output array
|
|
229
|
+
;; - startOffset: starting write position in output arrays
|
|
230
|
+
;; Returns: number of units written
|
|
231
|
+
(func (export "populate_units_from_block")
|
|
232
|
+
(param $blockDataPtr i64)
|
|
233
|
+
(param $blockDataSize i32)
|
|
234
|
+
(param $blockIndex i32)
|
|
235
|
+
(param $unitsOutputPtr i64)
|
|
236
|
+
(param $bufferIndicesOutputPtr i64)
|
|
237
|
+
(param $blocksOutputPtr i64)
|
|
238
|
+
(param $startOffset i32)
|
|
239
|
+
(result i32)
|
|
240
|
+
|
|
241
|
+
(local $bufferCount i32)
|
|
242
|
+
(local $bufferIndex i32)
|
|
243
|
+
(local $bufferLength i32)
|
|
244
|
+
(local $bufferDataStart i32)
|
|
245
|
+
(local $bufferOffset i32)
|
|
246
|
+
(local $totalUnitsWritten i32)
|
|
247
|
+
(local $i i32)
|
|
248
|
+
(local $writePos i32)
|
|
249
|
+
(local $unit i32)
|
|
250
|
+
(local $readAddr i64)
|
|
251
|
+
(local $writeAddr i64)
|
|
252
|
+
|
|
253
|
+
(local.set $totalUnitsWritten (i32.const 0))
|
|
254
|
+
|
|
255
|
+
;; Read buffer count (first 4 bytes)
|
|
256
|
+
(local.set $bufferCount (i32.load align=0 (local.get $blockDataPtr)))
|
|
257
|
+
|
|
258
|
+
;; Calculate where buffer data starts (after count + all lengths)
|
|
259
|
+
(local.set $bufferDataStart (i32.add (i32.const 4) (i32.mul (local.get $bufferCount) (i32.const 4))))
|
|
260
|
+
(local.set $bufferOffset (local.get $bufferDataStart))
|
|
261
|
+
|
|
262
|
+
;; Loop through all buffers
|
|
263
|
+
(local.set $bufferIndex (i32.const 0))
|
|
264
|
+
(block $break_buffers
|
|
265
|
+
(loop $continue_buffers
|
|
266
|
+
(br_if $break_buffers (i32.ge_u (local.get $bufferIndex) (local.get $bufferCount)))
|
|
267
|
+
|
|
268
|
+
;; Read buffer length from header (offset: 4 + bufferIndex * 4)
|
|
269
|
+
(local.set $bufferLength
|
|
270
|
+
(i32.load align=0 (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (i32.const 4) (i32.mul (local.get $bufferIndex) (i32.const 4)))))))
|
|
271
|
+
|
|
272
|
+
;; Process this buffer - we need at least 4 bytes to create a unit
|
|
273
|
+
(if (i32.ge_u (local.get $bufferLength) (i32.const 4))
|
|
274
|
+
(then
|
|
275
|
+
(local.set $i (i32.const 0))
|
|
276
|
+
|
|
277
|
+
;; Loop through buffer positions
|
|
278
|
+
(block $break_buffer
|
|
279
|
+
(loop $continue_buffer
|
|
280
|
+
;; Check if we can read 4 more bytes
|
|
281
|
+
(br_if $break_buffer (i32.gt_u (local.get $i) (i32.sub (local.get $bufferLength) (i32.const 4))))
|
|
282
|
+
|
|
283
|
+
;; Calculate read address
|
|
284
|
+
(local.set $readAddr (i64.add (local.get $blockDataPtr) (i64.extend_i32_u (i32.add (local.get $bufferOffset) (local.get $i)))))
|
|
285
|
+
|
|
286
|
+
;; Read unit as little-endian u32
|
|
287
|
+
(local.set $unit (i32.load align=0 (local.get $readAddr)))
|
|
288
|
+
|
|
289
|
+
;; Skip if unit is 0
|
|
290
|
+
(if (i32.eqz (local.get $unit))
|
|
291
|
+
(then
|
|
292
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
293
|
+
(br $continue_buffer)))
|
|
294
|
+
|
|
295
|
+
;; Calculate write position and address
|
|
296
|
+
(local.set $writePos (i32.add (local.get $startOffset) (local.get $totalUnitsWritten)))
|
|
297
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
298
|
+
|
|
299
|
+
;; Write to output arrays
|
|
300
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
|
|
301
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
302
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $blockIndex))
|
|
303
|
+
|
|
304
|
+
;; Increment counters
|
|
305
|
+
(local.set $totalUnitsWritten (i32.add (local.get $totalUnitsWritten) (i32.const 1)))
|
|
306
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
307
|
+
|
|
308
|
+
(br $continue_buffer)))))
|
|
309
|
+
|
|
310
|
+
;; Move to next buffer
|
|
311
|
+
(local.set $bufferOffset (i32.add (local.get $bufferOffset) (local.get $bufferLength)))
|
|
312
|
+
(local.set $bufferIndex (i32.add (local.get $bufferIndex) (i32.const 1)))
|
|
313
|
+
|
|
314
|
+
(br $continue_buffers)))
|
|
315
|
+
|
|
316
|
+
(local.get $totalUnitsWritten))
|
|
317
|
+
|
|
318
|
+
;; Populate Units From Buffer: Extract units from a buffer into three parallel arrays
|
|
319
|
+
;; Parameters:
|
|
320
|
+
;; - bufferPtr: pointer to source buffer bytes
|
|
321
|
+
;; - bufferLength: length of source buffer in bytes
|
|
322
|
+
;; - bufferIndex: buffer index value to store
|
|
323
|
+
;; - block: block value to store
|
|
324
|
+
;; - unitsOutputPtr: pointer to units output array
|
|
325
|
+
;; - bufferIndicesOutputPtr: pointer to buffer indices output array
|
|
326
|
+
;; - blocksOutputPtr: pointer to blocks output array
|
|
327
|
+
;; - startOffset: starting write position in output arrays
|
|
328
|
+
;; Returns: number of units written
|
|
329
|
+
(func (export "populate_units_from_buffer")
|
|
330
|
+
(param $bufferPtr i64)
|
|
331
|
+
(param $bufferLength i32)
|
|
332
|
+
(param $bufferIndex i32)
|
|
333
|
+
(param $block i32)
|
|
334
|
+
(param $unitsOutputPtr i64)
|
|
335
|
+
(param $bufferIndicesOutputPtr i64)
|
|
336
|
+
(param $blocksOutputPtr i64)
|
|
337
|
+
(param $startOffset i32)
|
|
338
|
+
(result i32)
|
|
339
|
+
|
|
340
|
+
(local $i i32)
|
|
341
|
+
(local $mainLoopEnd i32)
|
|
342
|
+
(local $count i32)
|
|
343
|
+
(local $unit i32)
|
|
344
|
+
(local $readAddr i64)
|
|
345
|
+
(local $writeAddr i64)
|
|
346
|
+
|
|
347
|
+
;; Temps for unrolled loop
|
|
348
|
+
(local $unit0 i32)
|
|
349
|
+
(local $unit1 i32)
|
|
350
|
+
(local $unit2 i32)
|
|
351
|
+
(local $unit3 i32)
|
|
352
|
+
(local $j i32)
|
|
353
|
+
|
|
354
|
+
;; We need at least 4 bytes to create a unit
|
|
355
|
+
(if (i32.lt_u (local.get $bufferLength) (i32.const 4))
|
|
356
|
+
(then (return (i32.const 0))))
|
|
357
|
+
|
|
358
|
+
(local.set $count (i32.const 0))
|
|
359
|
+
(local.set $i (i32.const 0))
|
|
360
|
+
|
|
361
|
+
;; Calculate main loop end for unrolled loop
|
|
362
|
+
(if (i32.ge_u (local.get $bufferLength) (i32.const 7))
|
|
363
|
+
(then
|
|
364
|
+
(local.set $mainLoopEnd (i32.sub (local.get $bufferLength) (i32.const 6))))
|
|
365
|
+
(else
|
|
366
|
+
(local.set $mainLoopEnd (i32.const 0))))
|
|
367
|
+
|
|
368
|
+
;; Main unrolled loop - process 4 overlapping units per iteration
|
|
369
|
+
(block $break_main
|
|
370
|
+
(loop $continue_main
|
|
371
|
+
(br_if $break_main (i32.ge_u (local.get $i) (local.get $mainLoopEnd)))
|
|
372
|
+
|
|
373
|
+
;; Calculate read address
|
|
374
|
+
(local.set $readAddr (i64.add (local.get $bufferPtr) (i64.extend_i32_u (local.get $i))))
|
|
375
|
+
|
|
376
|
+
;; Load 4 overlapping units
|
|
377
|
+
(local.set $unit0 (i32.load align=0 (local.get $readAddr)))
|
|
378
|
+
(local.set $unit1 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 1))))
|
|
379
|
+
(local.set $unit2 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 2))))
|
|
380
|
+
(local.set $unit3 (i32.load align=0 (i64.add (local.get $readAddr) (i64.const 3))))
|
|
381
|
+
|
|
382
|
+
;; Check if all units are non-zero (common case)
|
|
383
|
+
(if (i32.and (i32.and (local.get $unit0) (local.get $unit1))
|
|
384
|
+
(i32.and (local.get $unit2) (local.get $unit3)))
|
|
385
|
+
(then
|
|
386
|
+
;; Fast path: all units are non-zero
|
|
387
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (i32.add (local.get $startOffset) (local.get $count)) (i32.const 4))))
|
|
388
|
+
|
|
389
|
+
;; Write all units together
|
|
390
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit0))
|
|
391
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $unit1))
|
|
392
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $unit2))
|
|
393
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $unit3))
|
|
394
|
+
|
|
395
|
+
;; Write all buffer indices together
|
|
396
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
397
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $bufferIndex))
|
|
398
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $bufferIndex))
|
|
399
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $bufferIndex))
|
|
400
|
+
|
|
401
|
+
;; Write all blocks together
|
|
402
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $block))
|
|
403
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 4))) (local.get $block))
|
|
404
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 8))) (local.get $block))
|
|
405
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (i64.add (local.get $writeAddr) (i64.const 12))) (local.get $block))
|
|
406
|
+
|
|
407
|
+
;; Increment count by 4
|
|
408
|
+
(local.set $count (i32.add (local.get $count) (i32.const 4))))
|
|
409
|
+
(else
|
|
410
|
+
;; Slow path: at least one unit is zero (rare)
|
|
411
|
+
(local.set $j (i32.const 0))
|
|
412
|
+
(block $break_zeros
|
|
413
|
+
(loop $continue_zeros
|
|
414
|
+
(br_if $break_zeros (i32.ge_u (local.get $j) (i32.const 4)))
|
|
415
|
+
|
|
416
|
+
;; Read unit at offset j
|
|
417
|
+
(local.set $unit (i32.load align=0 (i64.add (local.get $readAddr) (i64.extend_i32_u (local.get $j)))))
|
|
418
|
+
|
|
419
|
+
;; Write if non-zero
|
|
420
|
+
(if (local.get $unit)
|
|
421
|
+
(then
|
|
422
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (i32.add (local.get $startOffset) (local.get $count)) (i32.const 4))))
|
|
423
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
|
|
424
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
425
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $block))
|
|
426
|
+
(local.set $count (i32.add (local.get $count) (i32.const 1)))))
|
|
427
|
+
|
|
428
|
+
(local.set $j (i32.add (local.get $j) (i32.const 1)))
|
|
429
|
+
(br $continue_zeros)))))
|
|
430
|
+
|
|
431
|
+
;; Advance by 4 bytes
|
|
432
|
+
(local.set $i (i32.add (local.get $i) (i32.const 4)))
|
|
433
|
+
(br $continue_main)))
|
|
434
|
+
|
|
435
|
+
;; Tail loop for remaining bytes
|
|
436
|
+
(block $break_tail
|
|
437
|
+
(loop $continue_tail
|
|
438
|
+
(br_if $break_tail (i32.gt_u (local.get $i) (i32.sub (local.get $bufferLength) (i32.const 4))))
|
|
439
|
+
|
|
440
|
+
;; Read unit
|
|
441
|
+
(local.set $readAddr (i64.add (local.get $bufferPtr) (i64.extend_i32_u (local.get $i))))
|
|
442
|
+
(local.set $unit (i32.load align=0 (local.get $readAddr)))
|
|
443
|
+
|
|
444
|
+
;; Skip if unit is 0
|
|
445
|
+
(if (i32.eqz (local.get $unit))
|
|
446
|
+
(then
|
|
447
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
448
|
+
(br $continue_tail)))
|
|
449
|
+
|
|
450
|
+
;; Write unit
|
|
451
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (i32.add (local.get $startOffset) (local.get $count)) (i32.const 4))))
|
|
452
|
+
(i32.store align=0 (i64.add (local.get $unitsOutputPtr) (local.get $writeAddr)) (local.get $unit))
|
|
453
|
+
(i32.store align=0 (i64.add (local.get $bufferIndicesOutputPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
454
|
+
(i32.store align=0 (i64.add (local.get $blocksOutputPtr) (local.get $writeAddr)) (local.get $block))
|
|
455
|
+
|
|
456
|
+
(local.set $count (i32.add (local.get $count) (i32.const 1)))
|
|
457
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
458
|
+
(br $continue_tail)))
|
|
459
|
+
|
|
460
|
+
(local.get $count))
|
|
461
|
+
|
|
462
|
+
;; Phase 1: Count & Deduplicate using hash table
|
|
463
|
+
;; Parameters:
|
|
464
|
+
;; - unitsPtr: pointer to units array
|
|
465
|
+
;; - totalUnits: number of units
|
|
466
|
+
;; - hashTablePtr: pointer to hash table [Unit, Count, Offset, ItemsWritten, ...]
|
|
467
|
+
;; - hashTableCapacity: capacity of hash table
|
|
468
|
+
;; - uniqueUnitListPtr: pointer to store unique units
|
|
469
|
+
;; - maxUniqueCount: maximum fill threshold for unique units
|
|
470
|
+
;; Returns: uniqueCount (or -1 if exceeded threshold)
|
|
471
|
+
(func (export "phase1_count_deduplicate")
|
|
472
|
+
(param $unitsPtr i64)
|
|
473
|
+
(param $totalUnits i32)
|
|
474
|
+
(param $hashTablePtr i64)
|
|
475
|
+
(param $hashTableCapacity i32)
|
|
476
|
+
(param $uniqueUnitListPtr i64)
|
|
477
|
+
(param $maxUniqueCount i32)
|
|
478
|
+
(result i32)
|
|
479
|
+
|
|
480
|
+
(local $i i32)
|
|
481
|
+
(local $unit i32)
|
|
482
|
+
(local $index i32)
|
|
483
|
+
(local $hashTableSize i32)
|
|
484
|
+
(local $uniqueCount i32)
|
|
485
|
+
(local $hashValue i32)
|
|
486
|
+
(local $unitAddr i64)
|
|
487
|
+
|
|
488
|
+
;; hashTableSize = hashTableCapacity * 4 * 4 (4 entries per slot, 4 bytes per entry)
|
|
489
|
+
(local.set $hashTableSize
|
|
490
|
+
(i32.mul (local.get $hashTableCapacity) (i32.const 16)))
|
|
491
|
+
|
|
492
|
+
(local.set $uniqueCount (i32.const 0))
|
|
493
|
+
(local.set $i (i32.const 0))
|
|
494
|
+
(local.set $unitAddr (local.get $unitsPtr))
|
|
495
|
+
|
|
496
|
+
;; Loop through all units
|
|
497
|
+
(block $break
|
|
498
|
+
(loop $continue
|
|
499
|
+
;; Check if we've processed all units
|
|
500
|
+
(br_if $break (i32.ge_u (local.get $i) (local.get $totalUnits)))
|
|
501
|
+
|
|
502
|
+
;; Read unit from unitsArray[i]
|
|
503
|
+
(local.set $unit (i32.load (local.get $unitAddr)))
|
|
504
|
+
(local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 4)))
|
|
505
|
+
|
|
506
|
+
;; Calculate hash index using helper
|
|
507
|
+
(local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
|
|
508
|
+
|
|
509
|
+
;; Linear probing
|
|
510
|
+
(block $probe_break
|
|
511
|
+
(loop $probe_continue
|
|
512
|
+
;; Read value at hashTable[index]
|
|
513
|
+
(local.set $hashValue
|
|
514
|
+
(i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
515
|
+
|
|
516
|
+
;; Check if empty slot (value == 0)
|
|
517
|
+
(if (i32.eqz (local.get $hashValue))
|
|
518
|
+
(then
|
|
519
|
+
;; Empty slot - store unit
|
|
520
|
+
(i32.store
|
|
521
|
+
(i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))
|
|
522
|
+
(local.get $unit))
|
|
523
|
+
|
|
524
|
+
;; Store count = 1 at hashTable[index + 4]
|
|
525
|
+
(i32.store
|
|
526
|
+
(i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4))))
|
|
527
|
+
(i32.const 1))
|
|
528
|
+
|
|
529
|
+
;; Add to unique unit list
|
|
530
|
+
(i32.store
|
|
531
|
+
(i64.add (local.get $uniqueUnitListPtr)
|
|
532
|
+
(i64.extend_i32_u (i32.mul (local.get $uniqueCount) (i32.const 4))))
|
|
533
|
+
(local.get $unit))
|
|
534
|
+
|
|
535
|
+
;; Increment unique count
|
|
536
|
+
(local.set $uniqueCount (i32.add (local.get $uniqueCount) (i32.const 1)))
|
|
537
|
+
|
|
538
|
+
;; Check if exceeded threshold
|
|
539
|
+
(if (i32.gt_u (local.get $uniqueCount) (local.get $maxUniqueCount))
|
|
540
|
+
(then
|
|
541
|
+
(return (i32.const -1))))
|
|
542
|
+
|
|
543
|
+
(br $probe_break)))
|
|
544
|
+
|
|
545
|
+
;; Check if matching unit
|
|
546
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
547
|
+
(then
|
|
548
|
+
;; Increment count at hashTable[index + 4]
|
|
549
|
+
(i32.store
|
|
550
|
+
(i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4))))
|
|
551
|
+
(i32.add
|
|
552
|
+
(i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4)))))
|
|
553
|
+
(i32.const 1)))
|
|
554
|
+
(br $probe_break)))
|
|
555
|
+
|
|
556
|
+
;; Collision - probe next slot
|
|
557
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
558
|
+
|
|
559
|
+
(br $probe_continue)))
|
|
560
|
+
|
|
561
|
+
;; Next unit
|
|
562
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
563
|
+
(br $continue)))
|
|
564
|
+
|
|
565
|
+
;; Return unique count
|
|
566
|
+
(local.get $uniqueCount))
|
|
567
|
+
|
|
568
|
+
;; Phase 3: Calculate offsets
|
|
569
|
+
;; Parameters:
|
|
570
|
+
;; - uniqueUnitListPtr: pointer to sorted unique units
|
|
571
|
+
;; - uniqueCount: number of unique units
|
|
572
|
+
;; - hashTablePtr: pointer to hash table
|
|
573
|
+
;; - hashTableCapacity: capacity of hash table
|
|
574
|
+
;; - maxPositionsPerUnit: max positions to store per unit (typically 1000)
|
|
575
|
+
;; Returns: total offset (size of output arrays)
|
|
576
|
+
(func (export "phase3_calc_offsets")
|
|
577
|
+
(param $uniqueUnitListPtr i64)
|
|
578
|
+
(param $uniqueCount i32)
|
|
579
|
+
(param $hashTablePtr i64)
|
|
580
|
+
(param $hashTableCapacity i32)
|
|
581
|
+
(param $maxPositionsPerUnit i32)
|
|
582
|
+
(result i32)
|
|
583
|
+
|
|
584
|
+
(local $i i32)
|
|
585
|
+
(local $unit i32)
|
|
586
|
+
(local $index i32)
|
|
587
|
+
(local $hashTableSize i32)
|
|
588
|
+
(local $globalOffset i32)
|
|
589
|
+
(local $totalCount i32)
|
|
590
|
+
(local $effectiveCount i32)
|
|
591
|
+
(local $hashValue i32)
|
|
592
|
+
|
|
593
|
+
(local.set $hashTableSize
|
|
594
|
+
(i32.mul (local.get $hashTableCapacity) (i32.const 16)))
|
|
595
|
+
|
|
596
|
+
(local.set $globalOffset (i32.const 0))
|
|
597
|
+
(local.set $i (i32.const 0))
|
|
598
|
+
|
|
599
|
+
(block $break
|
|
600
|
+
(loop $continue
|
|
601
|
+
(br_if $break (i32.ge_u (local.get $i) (local.get $uniqueCount)))
|
|
602
|
+
|
|
603
|
+
;; Read unit from uniqueUnitList[i]
|
|
604
|
+
(local.set $unit
|
|
605
|
+
(i32.load (i64.add (local.get $uniqueUnitListPtr)
|
|
606
|
+
(i64.extend_i32_u (i32.mul (local.get $i) (i32.const 4))))))
|
|
607
|
+
|
|
608
|
+
;; Find unit in hash table using helper
|
|
609
|
+
(local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
|
|
610
|
+
|
|
611
|
+
;; Linear probing to find the unit
|
|
612
|
+
(block $probe_break
|
|
613
|
+
(loop $probe_continue
|
|
614
|
+
(local.set $hashValue
|
|
615
|
+
(i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
616
|
+
|
|
617
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
618
|
+
(then (br $probe_break)))
|
|
619
|
+
|
|
620
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
621
|
+
|
|
622
|
+
(br $probe_continue)))
|
|
623
|
+
|
|
624
|
+
;; Read totalCount from hashTable[index + 4]
|
|
625
|
+
(local.set $totalCount
|
|
626
|
+
(i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 4))))))
|
|
627
|
+
|
|
628
|
+
;; effectiveCount = min(totalCount, maxPositionsPerUnit)
|
|
629
|
+
(local.set $effectiveCount (local.get $totalCount))
|
|
630
|
+
(if (i32.gt_u (local.get $effectiveCount) (local.get $maxPositionsPerUnit))
|
|
631
|
+
(then (local.set $effectiveCount (local.get $maxPositionsPerUnit))))
|
|
632
|
+
|
|
633
|
+
;; Store offset at hashTable[index + 8]
|
|
634
|
+
(i32.store
|
|
635
|
+
(i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $index) (i32.const 8))))
|
|
636
|
+
(local.get $globalOffset))
|
|
637
|
+
|
|
638
|
+
;; Increment global offset
|
|
639
|
+
(local.set $globalOffset (i32.add (local.get $globalOffset) (local.get $effectiveCount)))
|
|
640
|
+
|
|
641
|
+
;; Next unit
|
|
642
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
643
|
+
(br $continue)))
|
|
644
|
+
|
|
645
|
+
(local.get $globalOffset))
|
|
646
|
+
|
|
647
|
+
;; Phase 4: Fill & Filter
|
|
648
|
+
;; Parameters:
|
|
649
|
+
;; - unitsPtr: pointer to units array
|
|
650
|
+
;; - bufferIndicesPtr: pointer to buffer indices array
|
|
651
|
+
;; - blocksPtr: pointer to blocks array
|
|
652
|
+
;; - totalUnits: number of units
|
|
653
|
+
;; - hashTablePtr: pointer to hash table
|
|
654
|
+
;; - hashTableCapacity: capacity of hash table
|
|
655
|
+
;; - maxPositionsPerUnit: max positions per unit
|
|
656
|
+
;; - filteredUnitsPtr: output array for units
|
|
657
|
+
;; - filteredBufferIndicesPtr: output array for buffer indices
|
|
658
|
+
;; - filteredBlocksPtr: output array for blocks
|
|
659
|
+
;; Returns: void
|
|
660
|
+
(func (export "phase4_fill_filter")
|
|
661
|
+
(param $unitsPtr i64)
|
|
662
|
+
(param $bufferIndicesPtr i64)
|
|
663
|
+
(param $blocksPtr i64)
|
|
664
|
+
(param $totalUnits i32)
|
|
665
|
+
(param $hashTablePtr i64)
|
|
666
|
+
(param $hashTableCapacity i32)
|
|
667
|
+
(param $maxPositionsPerUnit i32)
|
|
668
|
+
(param $filteredUnitsPtr i64)
|
|
669
|
+
(param $filteredBufferIndicesPtr i64)
|
|
670
|
+
(param $filteredBlocksPtr i64)
|
|
671
|
+
|
|
672
|
+
(local $i i32)
|
|
673
|
+
(local $unit i32)
|
|
674
|
+
(local $bufferIndex i32)
|
|
675
|
+
(local $block i32)
|
|
676
|
+
(local $index i32)
|
|
677
|
+
(local $hashTableSize i32)
|
|
678
|
+
(local $itemsWritten i32)
|
|
679
|
+
(local $offset i32)
|
|
680
|
+
(local $writePos i32)
|
|
681
|
+
(local $hashValue i32)
|
|
682
|
+
(local $unitAddr i64)
|
|
683
|
+
(local $bufferIndexAddr i64)
|
|
684
|
+
(local $blockAddr i64)
|
|
685
|
+
(local $hashAddr i64)
|
|
686
|
+
(local $writeAddr i64)
|
|
687
|
+
|
|
688
|
+
(local.set $hashTableSize
|
|
689
|
+
(i32.mul (local.get $hashTableCapacity) (i32.const 16)))
|
|
690
|
+
|
|
691
|
+
(local.set $i (i32.const 0))
|
|
692
|
+
(local.set $unitAddr (local.get $unitsPtr))
|
|
693
|
+
(local.set $bufferIndexAddr (local.get $bufferIndicesPtr))
|
|
694
|
+
(local.set $blockAddr (local.get $blocksPtr))
|
|
695
|
+
|
|
696
|
+
(block $break
|
|
697
|
+
(loop $continue
|
|
698
|
+
(br_if $break (i32.ge_u (local.get $i) (local.get $totalUnits)))
|
|
699
|
+
|
|
700
|
+
;; Read unit, bufferIndex, block from separate arrays
|
|
701
|
+
(local.set $unit (i32.load (local.get $unitAddr)))
|
|
702
|
+
(local.set $bufferIndex (i32.load (local.get $bufferIndexAddr)))
|
|
703
|
+
(local.set $block (i32.load (local.get $blockAddr)))
|
|
704
|
+
|
|
705
|
+
;; Advance to next position (4 bytes each)
|
|
706
|
+
(local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 4)))
|
|
707
|
+
(local.set $bufferIndexAddr (i64.add (local.get $bufferIndexAddr) (i64.const 4)))
|
|
708
|
+
(local.set $blockAddr (i64.add (local.get $blockAddr) (i64.const 4)))
|
|
709
|
+
|
|
710
|
+
;; Find unit in hash table using helper
|
|
711
|
+
(local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
|
|
712
|
+
|
|
713
|
+
;; Linear probing to find matching unit
|
|
714
|
+
(local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
|
|
715
|
+
(block $probe_break
|
|
716
|
+
(loop $probe_continue
|
|
717
|
+
(local.set $hashValue (i32.load (local.get $hashAddr)))
|
|
718
|
+
|
|
719
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
720
|
+
(then (br $probe_break)))
|
|
721
|
+
|
|
722
|
+
;; Probe next using helper
|
|
723
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
724
|
+
(local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
|
|
725
|
+
(br $probe_continue)))
|
|
726
|
+
|
|
727
|
+
;; Read itemsWritten from hashTable[index + 12] using hashAddr
|
|
728
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
|
|
729
|
+
|
|
730
|
+
;; Skip if already at max
|
|
731
|
+
(if (i32.ge_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
732
|
+
(then
|
|
733
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
734
|
+
(br $continue)))
|
|
735
|
+
|
|
736
|
+
;; Read offset from hashTable[index + 8]
|
|
737
|
+
(local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
|
|
738
|
+
|
|
739
|
+
;; Calculate write position
|
|
740
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
741
|
+
|
|
742
|
+
;; Write to output arrays - use precomputed addresses
|
|
743
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
744
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
|
|
745
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
746
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
|
|
747
|
+
|
|
748
|
+
;; Increment itemsWritten at hashTable[index + 12]
|
|
749
|
+
(i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))
|
|
750
|
+
|
|
751
|
+
;; Next unit
|
|
752
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
753
|
+
(br $continue)))
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
;; Phase 4 Fill Filter Vector: Fill with collision detection (SIMD vectorized)
|
|
757
|
+
;; Parameters:
|
|
758
|
+
;; - unitsPtr: pointer to units array
|
|
759
|
+
;; - bufferIndicesPtr: pointer to buffer indices array
|
|
760
|
+
;; - blocksPtr: pointer to blocks array
|
|
761
|
+
;; - totalUnits: number of units
|
|
762
|
+
;; - hashTablePtr: pointer to hash table
|
|
763
|
+
;; - hashTableCapacity: capacity of hash table
|
|
764
|
+
;; - maxPositionsPerUnit: max positions per unit
|
|
765
|
+
;; - filteredUnitsPtr: output array for units
|
|
766
|
+
;; - filteredBufferIndicesPtr: output array for buffer indices
|
|
767
|
+
;; - filteredBlocksPtr: output array for blocks
|
|
768
|
+
;; Returns: number of times slow path was taken (as f64)
|
|
769
|
+
(func (export "phase4_fill_filter_vector")
|
|
770
|
+
(param $unitsPtr i64)
|
|
771
|
+
(param $bufferIndicesPtr i64)
|
|
772
|
+
(param $blocksPtr i64)
|
|
773
|
+
(param $totalUnits i32)
|
|
774
|
+
(param $hashTablePtr i64)
|
|
775
|
+
(param $hashTableCapacity i32)
|
|
776
|
+
(param $maxPositionsPerUnit i32)
|
|
777
|
+
(param $filteredUnitsPtr i64)
|
|
778
|
+
(param $filteredBufferIndicesPtr i64)
|
|
779
|
+
(param $filteredBlocksPtr i64)
|
|
780
|
+
(result f64)
|
|
781
|
+
|
|
782
|
+
(local $i i32)
|
|
783
|
+
(local $slowPathCount f64)
|
|
784
|
+
(local $unitAddr i64)
|
|
785
|
+
(local $bufferIndexAddr i64)
|
|
786
|
+
(local $blockAddr i64)
|
|
787
|
+
(local $mainLoopEnd i32)
|
|
788
|
+
(local $hashTableSize i32)
|
|
789
|
+
|
|
790
|
+
;; SIMD vectors
|
|
791
|
+
(local $unitsVec v128)
|
|
792
|
+
(local $bufferIndicesVec v128)
|
|
793
|
+
(local $blocksVec v128)
|
|
794
|
+
(local $hashesVec v128)
|
|
795
|
+
(local $indicesVec v128)
|
|
796
|
+
|
|
797
|
+
;; SIMD constants
|
|
798
|
+
(local $hashConstVec v128)
|
|
799
|
+
(local $mult16Vec v128)
|
|
800
|
+
(local $shiftAmount i32)
|
|
801
|
+
|
|
802
|
+
;; Scalar temps for collision checking
|
|
803
|
+
(local $idx0 i32)
|
|
804
|
+
(local $idx1 i32)
|
|
805
|
+
(local $idx2 i32)
|
|
806
|
+
(local $idx3 i32)
|
|
807
|
+
(local $unit0 i32)
|
|
808
|
+
(local $unit1 i32)
|
|
809
|
+
(local $unit2 i32)
|
|
810
|
+
(local $unit3 i32)
|
|
811
|
+
(local $bufferIndex0 i32)
|
|
812
|
+
(local $bufferIndex1 i32)
|
|
813
|
+
(local $bufferIndex2 i32)
|
|
814
|
+
(local $bufferIndex3 i32)
|
|
815
|
+
(local $block0 i32)
|
|
816
|
+
(local $block1 i32)
|
|
817
|
+
(local $block2 i32)
|
|
818
|
+
(local $block3 i32)
|
|
819
|
+
(local $hash0 i32)
|
|
820
|
+
(local $hash1 i32)
|
|
821
|
+
(local $hash2 i32)
|
|
822
|
+
(local $hash3 i32)
|
|
823
|
+
(local $allMatch i32)
|
|
824
|
+
|
|
825
|
+
;; Temps for slow path processing
|
|
826
|
+
(local $unit i32)
|
|
827
|
+
(local $bufferIndex i32)
|
|
828
|
+
(local $block i32)
|
|
829
|
+
(local $index i32)
|
|
830
|
+
(local $hashValue i32)
|
|
831
|
+
(local $itemsWritten i32)
|
|
832
|
+
(local $offset i32)
|
|
833
|
+
(local $writePos i32)
|
|
834
|
+
(local $writeAddr i64)
|
|
835
|
+
(local $hashAddr i64)
|
|
836
|
+
|
|
837
|
+
(local.set $i (i32.const 0))
|
|
838
|
+
(local.set $unitAddr (local.get $unitsPtr))
|
|
839
|
+
(local.set $bufferIndexAddr (local.get $bufferIndicesPtr))
|
|
840
|
+
(local.set $blockAddr (local.get $blocksPtr))
|
|
841
|
+
(local.set $slowPathCount (f64.const 0))
|
|
842
|
+
|
|
843
|
+
;; Calculate hash table size
|
|
844
|
+
(local.set $hashTableSize (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
|
|
845
|
+
|
|
846
|
+
;; Initialize SIMD constants
|
|
847
|
+
(local.set $hashConstVec (i32x4.splat (i32.const 2654435761)))
|
|
848
|
+
(local.set $mult16Vec (i32x4.splat (i32.const 16)))
|
|
849
|
+
|
|
850
|
+
;; Calculate optimal shift amount for Fibonacci hashing
|
|
851
|
+
;; shift_amount = 32 - log2(capacity) = 32 - (31 - clz(capacity)) = 1 + clz(capacity)
|
|
852
|
+
(local.set $shiftAmount (i32.add (i32.clz (local.get $hashTableCapacity)) (i32.const 1)))
|
|
853
|
+
|
|
854
|
+
;; Calculate main loop end (rounded down to multiple of 4)
|
|
855
|
+
(local.set $mainLoopEnd (i32.sub (local.get $totalUnits) (i32.rem_u (local.get $totalUnits) (i32.const 4))))
|
|
856
|
+
|
|
857
|
+
;; Main SIMD loop - process 4 units per iteration
|
|
858
|
+
(block $break_main
|
|
859
|
+
(loop $continue_main
|
|
860
|
+
(br_if $break_main (i32.ge_u (local.get $i) (local.get $mainLoopEnd)))
|
|
861
|
+
|
|
862
|
+
;; Load 4 units, buffer indices, and blocks at once using SIMD
|
|
863
|
+
(local.set $unitsVec (v128.load (local.get $unitAddr)))
|
|
864
|
+
(local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 16)))
|
|
865
|
+
|
|
866
|
+
(local.set $bufferIndicesVec (v128.load (local.get $bufferIndexAddr)))
|
|
867
|
+
(local.set $bufferIndexAddr (i64.add (local.get $bufferIndexAddr) (i64.const 16)))
|
|
868
|
+
|
|
869
|
+
(local.set $blocksVec (v128.load (local.get $blockAddr)))
|
|
870
|
+
(local.set $blockAddr (i64.add (local.get $blockAddr) (i64.const 16)))
|
|
871
|
+
|
|
872
|
+
;; Hash all 4 units in parallel: hash = unit * 2654435761
|
|
873
|
+
(local.set $hashesVec (i32x4.mul (local.get $unitsVec) (local.get $hashConstVec)))
|
|
874
|
+
|
|
875
|
+
;; Fibonacci hashing: shift by (32 - log2(capacity))
|
|
876
|
+
;; This automatically masks to the correct number of bits without needing AND
|
|
877
|
+
(local.set $hashesVec (i32x4.shr_u (local.get $hashesVec) (local.get $shiftAmount)))
|
|
878
|
+
|
|
879
|
+
;; Multiply by 16 - vectorized (each slot is 16 bytes: 4 u32s)
|
|
880
|
+
(local.set $indicesVec (i32x4.mul (local.get $hashesVec) (local.get $mult16Vec)))
|
|
881
|
+
|
|
882
|
+
;; Extract all 4 indices for collision checking
|
|
883
|
+
(local.set $idx0 (i32x4.extract_lane 0 (local.get $indicesVec)))
|
|
884
|
+
(local.set $idx1 (i32x4.extract_lane 1 (local.get $indicesVec)))
|
|
885
|
+
(local.set $idx2 (i32x4.extract_lane 2 (local.get $indicesVec)))
|
|
886
|
+
(local.set $idx3 (i32x4.extract_lane 3 (local.get $indicesVec)))
|
|
887
|
+
|
|
888
|
+
;; Extract all 4 units for collision checking
|
|
889
|
+
(local.set $unit0 (i32x4.extract_lane 0 (local.get $unitsVec)))
|
|
890
|
+
(local.set $unit1 (i32x4.extract_lane 1 (local.get $unitsVec)))
|
|
891
|
+
(local.set $unit2 (i32x4.extract_lane 2 (local.get $unitsVec)))
|
|
892
|
+
(local.set $unit3 (i32x4.extract_lane 3 (local.get $unitsVec)))
|
|
893
|
+
|
|
894
|
+
;; Extract buffer indices and blocks using SIMD
|
|
895
|
+
(local.set $bufferIndex0 (i32x4.extract_lane 0 (local.get $bufferIndicesVec)))
|
|
896
|
+
(local.set $bufferIndex1 (i32x4.extract_lane 1 (local.get $bufferIndicesVec)))
|
|
897
|
+
(local.set $bufferIndex2 (i32x4.extract_lane 2 (local.get $bufferIndicesVec)))
|
|
898
|
+
(local.set $bufferIndex3 (i32x4.extract_lane 3 (local.get $bufferIndicesVec)))
|
|
899
|
+
|
|
900
|
+
(local.set $block0 (i32x4.extract_lane 0 (local.get $blocksVec)))
|
|
901
|
+
(local.set $block1 (i32x4.extract_lane 1 (local.get $blocksVec)))
|
|
902
|
+
(local.set $block2 (i32x4.extract_lane 2 (local.get $blocksVec)))
|
|
903
|
+
(local.set $block3 (i32x4.extract_lane 3 (local.get $blocksVec)))
|
|
904
|
+
|
|
905
|
+
;; Load hash values at initial indices to check for collisions
|
|
906
|
+
(local.set $hash0 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx0)))))
|
|
907
|
+
(local.set $hash1 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx1)))))
|
|
908
|
+
(local.set $hash2 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx2)))))
|
|
909
|
+
(local.set $hash3 (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $idx3)))))
|
|
910
|
+
|
|
911
|
+
;; Check if all 4 match (no collisions on first probe)
|
|
912
|
+
(local.set $allMatch
|
|
913
|
+
(i32.and
|
|
914
|
+
(i32.and
|
|
915
|
+
(i32.eq (local.get $hash0) (local.get $unit0))
|
|
916
|
+
(i32.eq (local.get $hash1) (local.get $unit1)))
|
|
917
|
+
(i32.and
|
|
918
|
+
(i32.eq (local.get $hash2) (local.get $unit2))
|
|
919
|
+
(i32.eq (local.get $hash3) (local.get $unit3)))))
|
|
920
|
+
|
|
921
|
+
(if (local.get $allMatch)
|
|
922
|
+
(then
|
|
923
|
+
;; Fast path - no collisions, process all 4 units inline
|
|
924
|
+
;; Process unit 0
|
|
925
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx0) (i32.const 12))))))
|
|
926
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
927
|
+
(then
|
|
928
|
+
(local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx0) (i32.const 8))))))
|
|
929
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
930
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
931
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit0))
|
|
932
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex0))
|
|
933
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block0))
|
|
934
|
+
(i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx0) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1)))))
|
|
935
|
+
|
|
936
|
+
;; Process unit 1
|
|
937
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx1) (i32.const 12))))))
|
|
938
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
939
|
+
(then
|
|
940
|
+
(local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx1) (i32.const 8))))))
|
|
941
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
942
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
943
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit1))
|
|
944
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex1))
|
|
945
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block1))
|
|
946
|
+
(i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx1) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1)))))
|
|
947
|
+
|
|
948
|
+
;; Process unit 2
|
|
949
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx2) (i32.const 12))))))
|
|
950
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
951
|
+
(then
|
|
952
|
+
(local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx2) (i32.const 8))))))
|
|
953
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
954
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
955
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit2))
|
|
956
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex2))
|
|
957
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block2))
|
|
958
|
+
(i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx2) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1)))))
|
|
959
|
+
|
|
960
|
+
;; Process unit 3
|
|
961
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx3) (i32.const 12))))))
|
|
962
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
963
|
+
(then
|
|
964
|
+
(local.set $offset (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx3) (i32.const 8))))))
|
|
965
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
966
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
967
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit3))
|
|
968
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex3))
|
|
969
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block3))
|
|
970
|
+
(i32.store (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (i32.add (local.get $idx3) (i32.const 12)))) (i32.add (local.get $itemsWritten) (i32.const 1))))))
|
|
971
|
+
(else
|
|
972
|
+
;; Slow path - at least one collision, process each unit with linear probing
|
|
973
|
+
(local.set $slowPathCount (f64.add (local.get $slowPathCount) (f64.const 1)))
|
|
974
|
+
|
|
975
|
+
;; Process unit 0
|
|
976
|
+
(local.set $unit (local.get $unit0))
|
|
977
|
+
(local.set $bufferIndex (local.get $bufferIndex0))
|
|
978
|
+
(local.set $block (local.get $block0))
|
|
979
|
+
(local.set $index (local.get $idx0))
|
|
980
|
+
(block $probe_break0
|
|
981
|
+
(loop $probe_continue0
|
|
982
|
+
(local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
983
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
984
|
+
(then (br $probe_break0)))
|
|
985
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
986
|
+
(br $probe_continue0)))
|
|
987
|
+
(local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
|
|
988
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
|
|
989
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
990
|
+
(then
|
|
991
|
+
(local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
|
|
992
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
993
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
994
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
|
|
995
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
996
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
|
|
997
|
+
(i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))
|
|
998
|
+
|
|
999
|
+
;; Process unit 1
|
|
1000
|
+
(local.set $unit (local.get $unit1))
|
|
1001
|
+
(local.set $bufferIndex (local.get $bufferIndex1))
|
|
1002
|
+
(local.set $block (local.get $block1))
|
|
1003
|
+
(local.set $index (local.get $idx1))
|
|
1004
|
+
(block $probe_break1
|
|
1005
|
+
(loop $probe_continue1
|
|
1006
|
+
(local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
1007
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
1008
|
+
(then (br $probe_break1)))
|
|
1009
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
1010
|
+
(br $probe_continue1)))
|
|
1011
|
+
(local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
|
|
1012
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
|
|
1013
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
1014
|
+
(then
|
|
1015
|
+
(local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
|
|
1016
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
1017
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
1018
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
|
|
1019
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
1020
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
|
|
1021
|
+
(i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))
|
|
1022
|
+
|
|
1023
|
+
;; Process unit 2
|
|
1024
|
+
(local.set $unit (local.get $unit2))
|
|
1025
|
+
(local.set $bufferIndex (local.get $bufferIndex2))
|
|
1026
|
+
(local.set $block (local.get $block2))
|
|
1027
|
+
(local.set $index (local.get $idx2))
|
|
1028
|
+
(block $probe_break2
|
|
1029
|
+
(loop $probe_continue2
|
|
1030
|
+
(local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
1031
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
1032
|
+
(then (br $probe_break2)))
|
|
1033
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
1034
|
+
(br $probe_continue2)))
|
|
1035
|
+
(local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
|
|
1036
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
|
|
1037
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
1038
|
+
(then
|
|
1039
|
+
(local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
|
|
1040
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
1041
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
1042
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
|
|
1043
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
1044
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
|
|
1045
|
+
(i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))
|
|
1046
|
+
|
|
1047
|
+
;; Process unit 3
|
|
1048
|
+
(local.set $unit (local.get $unit3))
|
|
1049
|
+
(local.set $bufferIndex (local.get $bufferIndex3))
|
|
1050
|
+
(local.set $block (local.get $block3))
|
|
1051
|
+
(local.set $index (local.get $idx3))
|
|
1052
|
+
(block $probe_break3
|
|
1053
|
+
(loop $probe_continue3
|
|
1054
|
+
(local.set $hashValue (i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
1055
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
1056
|
+
(then (br $probe_break3)))
|
|
1057
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
1058
|
+
(br $probe_continue3)))
|
|
1059
|
+
(local.set $hashAddr (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index))))
|
|
1060
|
+
(local.set $itemsWritten (i32.load (i64.add (local.get $hashAddr) (i64.const 12))))
|
|
1061
|
+
(if (i32.lt_u (local.get $itemsWritten) (local.get $maxPositionsPerUnit))
|
|
1062
|
+
(then
|
|
1063
|
+
(local.set $offset (i32.load (i64.add (local.get $hashAddr) (i64.const 8))))
|
|
1064
|
+
(local.set $writePos (i32.add (local.get $offset) (local.get $itemsWritten)))
|
|
1065
|
+
(local.set $writeAddr (i64.extend_i32_u (i32.mul (local.get $writePos) (i32.const 4))))
|
|
1066
|
+
(i32.store (i64.add (local.get $filteredUnitsPtr) (local.get $writeAddr)) (local.get $unit))
|
|
1067
|
+
(i32.store (i64.add (local.get $filteredBufferIndicesPtr) (local.get $writeAddr)) (local.get $bufferIndex))
|
|
1068
|
+
(i32.store (i64.add (local.get $filteredBlocksPtr) (local.get $writeAddr)) (local.get $block))
|
|
1069
|
+
(i32.store (i64.add (local.get $hashAddr) (i64.const 12)) (i32.add (local.get $itemsWritten) (i32.const 1)))))))
|
|
1070
|
+
|
|
1071
|
+
;; Increment by 4
|
|
1072
|
+
(local.set $i (i32.add (local.get $i) (i32.const 4)))
|
|
1073
|
+
(br $continue_main)))
|
|
1074
|
+
|
|
1075
|
+
;; Return slow path count
|
|
1076
|
+
(local.get $slowPathCount))
|
|
1077
|
+
|
|
1078
|
+
;; Count Collisions: Calculate collision count for phase1
|
|
1079
|
+
;; Parameters:
|
|
1080
|
+
;; - unitsPtr: pointer to units array
|
|
1081
|
+
;; - totalUnits: number of units
|
|
1082
|
+
;; - hashTablePtr: pointer to hash table
|
|
1083
|
+
;; - hashTableCapacity: capacity of hash table
|
|
1084
|
+
;; Returns: total collision count
|
|
1085
|
+
(func (export "count_collisions")
|
|
1086
|
+
(param $unitsPtr i64)
|
|
1087
|
+
(param $totalUnits i32)
|
|
1088
|
+
(param $hashTablePtr i64)
|
|
1089
|
+
(param $hashTableCapacity i32)
|
|
1090
|
+
(result i32)
|
|
1091
|
+
|
|
1092
|
+
(local $i i32)
|
|
1093
|
+
(local $unit i32)
|
|
1094
|
+
(local $index i32)
|
|
1095
|
+
(local $hashTableSize i32)
|
|
1096
|
+
(local $hashValue i32)
|
|
1097
|
+
(local $unitAddr i64)
|
|
1098
|
+
(local $collisions i32)
|
|
1099
|
+
(local $probeCount i32)
|
|
1100
|
+
|
|
1101
|
+
(local.set $hashTableSize (i32.mul (local.get $hashTableCapacity) (i32.const 16)))
|
|
1102
|
+
(local.set $collisions (i32.const 0))
|
|
1103
|
+
(local.set $i (i32.const 0))
|
|
1104
|
+
(local.set $unitAddr (local.get $unitsPtr))
|
|
1105
|
+
|
|
1106
|
+
;; Loop through all units
|
|
1107
|
+
(block $break
|
|
1108
|
+
(loop $continue
|
|
1109
|
+
(br_if $break (i32.ge_u (local.get $i) (local.get $totalUnits)))
|
|
1110
|
+
|
|
1111
|
+
;; Read unit
|
|
1112
|
+
(local.set $unit (i32.load (local.get $unitAddr)))
|
|
1113
|
+
(local.set $unitAddr (i64.add (local.get $unitAddr) (i64.const 4)))
|
|
1114
|
+
|
|
1115
|
+
;; Calculate hash index
|
|
1116
|
+
(local.set $index (call $hashUnitToIndex (local.get $unit) (local.get $hashTableCapacity)))
|
|
1117
|
+
(local.set $probeCount (i32.const 0))
|
|
1118
|
+
|
|
1119
|
+
;; Linear probing - count collisions
|
|
1120
|
+
(block $probe_break
|
|
1121
|
+
(loop $probe_continue
|
|
1122
|
+
(local.set $hashValue
|
|
1123
|
+
(i32.load (i64.add (local.get $hashTablePtr) (i64.extend_i32_u (local.get $index)))))
|
|
1124
|
+
|
|
1125
|
+
;; Check if empty slot or matching unit (we're done)
|
|
1126
|
+
(if (i32.eqz (local.get $hashValue))
|
|
1127
|
+
(then (br $probe_break)))
|
|
1128
|
+
|
|
1129
|
+
(if (i32.eq (local.get $hashValue) (local.get $unit))
|
|
1130
|
+
(then (br $probe_break)))
|
|
1131
|
+
|
|
1132
|
+
;; Collision - increment and probe next
|
|
1133
|
+
(local.set $probeCount (i32.add (local.get $probeCount) (i32.const 1)))
|
|
1134
|
+
(local.set $index (call $getNextIndex (local.get $index) (local.get $hashTableSize)))
|
|
1135
|
+
(br $probe_continue)))
|
|
1136
|
+
|
|
1137
|
+
;; Add probe count to total collisions
|
|
1138
|
+
(local.set $collisions (i32.add (local.get $collisions) (local.get $probeCount)))
|
|
1139
|
+
|
|
1140
|
+
;; Next unit
|
|
1141
|
+
(local.set $i (i32.add (local.get $i) (i32.const 1)))
|
|
1142
|
+
(br $continue)))
|
|
1143
|
+
|
|
1144
|
+
(local.get $collisions))
|
|
1145
|
+
)
|