@rlabs-inc/sparse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ // ============================================================================
2
+ // TEST: Scatter-Add in Loop
3
+ // This is the EXACT pattern that crashed node-mlx.
4
+ // If this works, we've solved the problem.
5
+ // ============================================================================
6
+
7
+ import * as gpu from './gpu'
8
+
9
+ console.log('=== SPARSE: Testing Scatter-Add in Loop ===\n')
10
+
11
+ // Get device info
12
+ const info = gpu.deviceInfo()
13
+ console.log('Device:', info.name)
14
+ console.log('Memory:', (Number(info.memory) / 1024 / 1024 / 1024).toFixed(1), 'GB')
15
+ console.log()
16
+
17
+ // Test 1: Basic scatter-add
18
+ console.log('Test 1: Basic scatter-add')
19
+ {
20
+ const arr = gpu.zeros(10)
21
+ const idxs = gpu.indices([0, 2, 5])
22
+ const vals = gpu.array([1, 2, 3])
23
+
24
+ gpu.scatterAdd(arr, idxs, vals)
25
+
26
+ console.log(' Expected: [1, 0, 2, 0, 0, 3, 0, 0, 0, 0]')
27
+ console.log(' Got: ', arr.toArray())
28
+ console.log()
29
+
30
+ arr.free()
31
+ idxs.free()
32
+ vals.free()
33
+ }
34
+
35
+ // Test 2: Scatter-add with duplicate indices (critical!)
36
+ console.log('Test 2: Scatter-add with DUPLICATE indices')
37
+ {
38
+ const arr = gpu.zeros(5)
39
+ const idxs = gpu.indices([0, 0, 0, 2, 2])
40
+ const vals = gpu.array([1, 2, 3, 0.5, 0.5])
41
+
42
+ gpu.scatterAdd(arr, idxs, vals)
43
+
44
+ console.log(' Expected: [6, 0, 1, 0, 0] (1+2+3=6, 0.5+0.5=1)')
45
+ console.log(' Got: ', arr.toArray())
46
+ console.log()
47
+
48
+ arr.free()
49
+ idxs.free()
50
+ vals.free()
51
+ }
52
+
53
+ // Test 3: THE CRITICAL TEST - Scatter-add in a loop with REUSED indices
54
+ console.log('Test 3: Scatter-add in LOOP with REUSED indices')
55
+ console.log(' (This is what crashed node-mlx!)')
56
+ {
57
+ // Create indices ONCE, reuse in every iteration
58
+ const idxs = gpu.indices([0, 1, 2])
59
+ const vals = gpu.array([1, 2, 3])
60
+
61
+ for (let i = 0; i < 10; i++) {
62
+ const arr = gpu.zeros(10)
63
+ gpu.scatterAdd(arr, idxs, vals)
64
+ console.log(` Iteration ${i}: ${arr.toArray().slice(0, 5).join(', ')}...`)
65
+ arr.free()
66
+ }
67
+
68
+ console.log(' ALL 10 ITERATIONS COMPLETED!')
69
+ console.log()
70
+
71
+ idxs.free()
72
+ vals.free()
73
+ }
74
+
75
+ // Test 4: Large-scale performance
76
+ console.log('Test 4: Large-scale scatter-add (1M neurons, 100K synapses)')
77
+ {
78
+ const numNeurons = 1_000_000
79
+ const numSynapses = 100_000
80
+
81
+ // Create indices once
82
+ const idxData = new Uint32Array(numSynapses)
83
+ for (let i = 0; i < numSynapses; i++) {
84
+ idxData[i] = Math.floor(Math.random() * numNeurons)
85
+ }
86
+ const idxs = gpu.indices(idxData)
87
+ const vals = gpu.random.uniform(numSynapses, 0, 1)
88
+
89
+ console.log(' Running 10 iterations...')
90
+ const start = performance.now()
91
+
92
+ for (let i = 0; i < 10; i++) {
93
+ const arr = gpu.zeros(numNeurons)
94
+ gpu.scatterAdd(arr, idxs, vals)
95
+ arr.free()
96
+ }
97
+
98
+ const elapsed = performance.now() - start
99
+ console.log(` Time: ${elapsed.toFixed(2)}ms (${(elapsed / 10).toFixed(2)}ms per iteration)`)
100
+ console.log()
101
+
102
+ idxs.free()
103
+ vals.free()
104
+ }
105
+
106
+ // Test 5: Full Izhikevich pattern
107
+ console.log('Test 5: Full Izhikevich neuron simulation pattern')
108
+ {
109
+ const numNeurons = 500
110
+ const numSynapses = 5000
111
+ const numSteps = 20
112
+
113
+ // Neuron state
114
+ let v = gpu.full(numNeurons, -65)
115
+ let u = gpu.multiply(v, 0.2)
116
+
117
+ // Static connectivity - created ONCE, reused every timestep
118
+ const preIdxData = new Uint32Array(numSynapses)
119
+ const postIdxData = new Uint32Array(numSynapses)
120
+ for (let i = 0; i < numSynapses; i++) {
121
+ preIdxData[i] = Math.floor(Math.random() * numNeurons)
122
+ postIdxData[i] = Math.floor(Math.random() * numNeurons)
123
+ }
124
+ const preIndices = gpu.indices(preIdxData)
125
+ const postIndices = gpu.indices(postIdxData)
126
+ const weights = gpu.random.uniform(numSynapses, 0, 0.5)
127
+
128
+ // Input
129
+ const inputIdxData = new Uint32Array(50)
130
+ for (let i = 0; i < 50; i++) inputIdxData[i] = i
131
+ const inputIndices = gpu.indices(inputIdxData)
132
+ const inputValues = gpu.full(50, 15)
133
+
134
+ console.log(' Simulating', numSteps, 'timesteps...')
135
+ const start = performance.now()
136
+
137
+ for (let t = 0; t < numSteps; t++) {
138
+ // 1. Detect firing
139
+ const fired = gpu.greaterEqual(v, 30)
140
+
141
+ // 2. Sparse transmission
142
+ const preFired = gpu.gatherBool(fired, preIndices)
143
+ const zerosArr = gpu.zeros(numSynapses)
144
+ const contribution = gpu.where(preFired, weights, zerosArr)
145
+
146
+ // 3. Scatter-add currents
147
+ let current = gpu.zeros(numNeurons)
148
+ gpu.scatterAdd(current, postIndices, contribution)
149
+ gpu.scatterAdd(current, inputIndices, inputValues)
150
+
151
+ // 4. Izhikevich update
152
+ const vSq = gpu.square(v)
153
+ const vSq004 = gpu.multiply(vSq, 0.04)
154
+ const v5 = gpu.multiply(v, 5)
155
+ const sum1 = gpu.add(vSq004, v5)
156
+ const sum2 = gpu.add(sum1, 140)
157
+ const negU = gpu.multiply(u, -1)
158
+ const sum3 = gpu.add(sum2, negU)
159
+ const dv = gpu.add(sum3, current)
160
+
161
+ const bv = gpu.multiply(v, 0.2)
162
+ const diff = gpu.subtract(bv, u)
163
+ const du = gpu.multiply(diff, 0.02)
164
+
165
+ const newV = gpu.add(v, dv)
166
+ const newU = gpu.add(u, du)
167
+
168
+ // 5. Reset spiked neurons
169
+ const vReset = gpu.where(fired, -65, newV)
170
+ const uBump = gpu.add(u, 8)
171
+ const uReset = gpu.where(fired, uBump, newU)
172
+
173
+ // Count spikes
174
+ const spikeCount = gpu.sum(fired)
175
+
176
+ if (t % 5 === 0) {
177
+ console.log(` Step ${t}: ${spikeCount} spikes`)
178
+ }
179
+
180
+ // Cleanup intermediates
181
+ fired.free()
182
+ preFired.free()
183
+ zerosArr.free()
184
+ contribution.free()
185
+ current.free()
186
+ vSq.free()
187
+ vSq004.free()
188
+ v5.free()
189
+ sum1.free()
190
+ sum2.free()
191
+ negU.free()
192
+ sum3.free()
193
+ dv.free()
194
+ bv.free()
195
+ diff.free()
196
+ du.free()
197
+ newV.free()
198
+ newU.free()
199
+ uBump.free()
200
+
201
+ // Swap state
202
+ v.free()
203
+ u.free()
204
+ v = vReset
205
+ u = uReset
206
+ }
207
+
208
+ const elapsed = performance.now() - start
209
+ console.log(` Time: ${elapsed.toFixed(2)}ms (${(elapsed / numSteps).toFixed(2)}ms per step)`)
210
+ console.log(' SIMULATION COMPLETE!')
211
+ console.log()
212
+
213
+ // Cleanup
214
+ v.free()
215
+ u.free()
216
+ preIndices.free()
217
+ postIndices.free()
218
+ weights.free()
219
+ inputIndices.free()
220
+ inputValues.free()
221
+ }
222
+
223
+ // Cleanup
224
+ gpu.cleanup()
225
+
226
+ console.log('=== ALL TESTS PASSED ===')
@@ -0,0 +1,160 @@
1
+ // ============================================================================
2
+ // STRESS TEST: Let's see what the M1 Max 64GB can handle!
3
+ // ============================================================================
4
+
5
+ import * as gpu from './gpu'
6
+
7
+ console.log('=== STRESS TEST: Frying the M1 Max ===\n')
8
+
9
+ const info = gpu.deviceInfo()
10
+ console.log('Device:', info.name)
11
+ console.log('Memory:', (Number(info.memory) / 1024 / 1024 / 1024).toFixed(1), 'GB')
12
+ console.log()
13
+
14
+ // Test configurations - let's scale up!
15
+ const tests = [
16
+ { neurons: 1_000, synapses: 10_000, steps: 100 },
17
+ { neurons: 10_000, synapses: 100_000, steps: 100 },
18
+ { neurons: 100_000, synapses: 1_000_000, steps: 100 },
19
+ { neurons: 1_000_000, synapses: 10_000_000, steps: 50 },
20
+ { neurons: 10_000_000, synapses: 100_000_000, steps: 10 }, // 10M neurons, 100M synapses!
21
+ ]
22
+
23
+ for (const { neurons, synapses, steps } of tests) {
24
+ console.log(`\n${'='.repeat(60)}`)
25
+ console.log(`Testing: ${neurons.toLocaleString()} neurons, ${synapses.toLocaleString()} synapses`)
26
+ console.log(`${'='.repeat(60)}`)
27
+
28
+ try {
29
+ // Create neuron state
30
+ console.log(' Allocating neuron state...')
31
+ const allocStart = performance.now()
32
+
33
+ let v = gpu.full(neurons, -65)
34
+ let u = gpu.multiply(v, 0.2)
35
+
36
+ // Create connectivity
37
+ console.log(' Creating connectivity...')
38
+ const preIdxData = new Uint32Array(synapses)
39
+ const postIdxData = new Uint32Array(synapses)
40
+ for (let i = 0; i < synapses; i++) {
41
+ preIdxData[i] = Math.floor(Math.random() * neurons)
42
+ postIdxData[i] = Math.floor(Math.random() * neurons)
43
+ }
44
+
45
+ const preIndices = gpu.indices(preIdxData)
46
+ const postIndices = gpu.indices(postIdxData)
47
+ const weights = gpu.random.uniform(synapses, 0, 0.5)
48
+
49
+ // Input to 10% of neurons
50
+ const numInput = Math.floor(neurons * 0.1)
51
+ const inputIdxData = new Uint32Array(numInput)
52
+ for (let i = 0; i < numInput; i++) inputIdxData[i] = i
53
+ const inputIndices = gpu.indices(inputIdxData)
54
+ const inputValues = gpu.full(numInput, 15)
55
+
56
+ const allocTime = performance.now() - allocStart
57
+ console.log(` Allocation time: ${allocTime.toFixed(0)}ms`)
58
+
59
+ // Simulate
60
+ console.log(` Running ${steps} timesteps...`)
61
+ const simStart = performance.now()
62
+ let totalSpikes = 0
63
+
64
+ for (let t = 0; t < steps; t++) {
65
+ // 1. Detect firing
66
+ const fired = gpu.greaterEqual(v, 30)
67
+
68
+ // 2. Sparse transmission
69
+ const preFired = gpu.gatherBool(fired, preIndices)
70
+ const zerosArr = gpu.zeros(synapses)
71
+ const contribution = gpu.where(preFired, weights, zerosArr)
72
+
73
+ // 3. Scatter-add currents
74
+ let current = gpu.zeros(neurons)
75
+ gpu.scatterAdd(current, postIndices, contribution)
76
+ gpu.scatterAdd(current, inputIndices, inputValues)
77
+
78
+ // 4. Izhikevich update (simplified for speed)
79
+ const vSq = gpu.square(v)
80
+ const term1 = gpu.multiply(vSq, 0.04)
81
+ const term2 = gpu.multiply(v, 5)
82
+ const sum1 = gpu.add(term1, term2)
83
+ const sum2 = gpu.add(sum1, 140)
84
+ const sum3 = gpu.subtract(sum2, u)
85
+ const dv = gpu.add(sum3, current)
86
+
87
+ const bv = gpu.multiply(v, 0.2)
88
+ const diff = gpu.subtract(bv, u)
89
+ const du = gpu.multiply(diff, 0.02)
90
+
91
+ const newV = gpu.add(v, dv)
92
+ const newU = gpu.add(u, du)
93
+
94
+ // 5. Reset
95
+ const vReset = gpu.where(fired, -65, newV)
96
+ const uBump = gpu.add(newU, 8)
97
+ const uReset = gpu.where(fired, uBump, newU)
98
+
99
+ // Count spikes
100
+ const spikeCount = gpu.sum(fired)
101
+ totalSpikes += spikeCount
102
+
103
+ // Cleanup intermediates
104
+ fired.free()
105
+ preFired.free()
106
+ zerosArr.free()
107
+ contribution.free()
108
+ current.free()
109
+ vSq.free()
110
+ term1.free()
111
+ term2.free()
112
+ sum1.free()
113
+ sum2.free()
114
+ sum3.free()
115
+ dv.free()
116
+ bv.free()
117
+ diff.free()
118
+ du.free()
119
+ newV.free()
120
+ newU.free()
121
+ uBump.free()
122
+
123
+ v.free()
124
+ u.free()
125
+ v = vReset
126
+ u = uReset
127
+
128
+ if (t % Math.max(1, Math.floor(steps / 5)) === 0) {
129
+ process.stdout.write(` Step ${t}/${steps} (${spikeCount.toLocaleString()} spikes)\n`)
130
+ }
131
+ }
132
+
133
+ const simTime = performance.now() - simStart
134
+ const msPerStep = simTime / steps
135
+
136
+ console.log()
137
+ console.log(` ✅ COMPLETED!`)
138
+ console.log(` Total time: ${simTime.toFixed(0)}ms`)
139
+ console.log(` Per step: ${msPerStep.toFixed(2)}ms`)
140
+ console.log(` Total spikes: ${totalSpikes.toLocaleString()}`)
141
+ console.log(` Neurons/ms: ${(neurons / msPerStep).toLocaleString()}`)
142
+ console.log(` Synapses/ms: ${(synapses / msPerStep).toLocaleString()}`)
143
+
144
+ // Cleanup
145
+ v.free()
146
+ u.free()
147
+ preIndices.free()
148
+ postIndices.free()
149
+ weights.free()
150
+ inputIndices.free()
151
+ inputValues.free()
152
+
153
+ } catch (error) {
154
+ console.log(` ❌ FAILED: ${error}`)
155
+ break
156
+ }
157
+ }
158
+
159
+ gpu.cleanup()
160
+ console.log('\n=== STRESS TEST COMPLETE ===')
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Quick test to check WebGPU availability in Bun
3
+ */
4
+
5
+ // Check if WebGPU is available
6
+ console.log('Checking WebGPU support in Bun...')
7
+ console.log()
8
+
9
+ // @ts-ignore - checking global availability
10
+ const hasGPU = typeof navigator !== 'undefined' && 'gpu' in navigator
11
+
12
+ console.log('navigator.gpu available:', hasGPU)
13
+
14
+ if (hasGPU) {
15
+ // @ts-ignore
16
+ const adapter = await navigator.gpu.requestAdapter()
17
+ console.log('Adapter:', adapter)
18
+
19
+ if (adapter) {
20
+ const device = await adapter.requestDevice()
21
+ console.log('Device:', device)
22
+ console.log('Device limits:', device.limits)
23
+ }
24
+ } else {
25
+ console.log()
26
+ console.log('WebGPU not available in Bun runtime.')
27
+ console.log('Options:')
28
+ console.log(' 1. Use wgpu-native bindings (Rust → FFI)')
29
+ console.log(' 2. Use Metal directly via FFI')
30
+ console.log(' 3. Use Dawn (Google\'s WebGPU implementation) via FFI')
31
+ }