@rlabs-inc/sparse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +92 -0
- package/libsparse.dylib +0 -0
- package/native/Makefile +30 -0
- package/native/libsparse.dylib +0 -0
- package/native/sparse.h +180 -0
- package/native/sparse.m +734 -0
- package/native/sparse.metal +215 -0
- package/package.json +38 -0
- package/src/ffi.ts +156 -0
- package/src/gpu.ts +382 -0
- package/src/index.ts +7 -0
- package/src/test-debug-spikes.ts +70 -0
- package/src/test-limits.ts +140 -0
- package/src/test-scatter-loop.ts +226 -0
- package/src/test-stress.ts +160 -0
- package/src/test-webgpu.ts +31 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// TEST: Scatter-Add in Loop
|
|
3
|
+
// This is the EXACT pattern that crashed node-mlx.
|
|
4
|
+
// If this works, we've solved the problem.
|
|
5
|
+
// ============================================================================
|
|
6
|
+
|
|
7
|
+
import * as gpu from './gpu'
|
|
8
|
+
|
|
9
|
+
console.log('=== SPARSE: Testing Scatter-Add in Loop ===\n')
|
|
10
|
+
|
|
11
|
+
// Get device info
|
|
12
|
+
const info = gpu.deviceInfo()
|
|
13
|
+
console.log('Device:', info.name)
|
|
14
|
+
console.log('Memory:', (Number(info.memory) / 1024 / 1024 / 1024).toFixed(1), 'GB')
|
|
15
|
+
console.log()
|
|
16
|
+
|
|
17
|
+
// Test 1: Basic scatter-add
|
|
18
|
+
console.log('Test 1: Basic scatter-add')
|
|
19
|
+
{
|
|
20
|
+
const arr = gpu.zeros(10)
|
|
21
|
+
const idxs = gpu.indices([0, 2, 5])
|
|
22
|
+
const vals = gpu.array([1, 2, 3])
|
|
23
|
+
|
|
24
|
+
gpu.scatterAdd(arr, idxs, vals)
|
|
25
|
+
|
|
26
|
+
console.log(' Expected: [1, 0, 2, 0, 0, 3, 0, 0, 0, 0]')
|
|
27
|
+
console.log(' Got: ', arr.toArray())
|
|
28
|
+
console.log()
|
|
29
|
+
|
|
30
|
+
arr.free()
|
|
31
|
+
idxs.free()
|
|
32
|
+
vals.free()
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Test 2: Scatter-add with duplicate indices (critical!)
|
|
36
|
+
console.log('Test 2: Scatter-add with DUPLICATE indices')
|
|
37
|
+
{
|
|
38
|
+
const arr = gpu.zeros(5)
|
|
39
|
+
const idxs = gpu.indices([0, 0, 0, 2, 2])
|
|
40
|
+
const vals = gpu.array([1, 2, 3, 0.5, 0.5])
|
|
41
|
+
|
|
42
|
+
gpu.scatterAdd(arr, idxs, vals)
|
|
43
|
+
|
|
44
|
+
console.log(' Expected: [6, 0, 1, 0, 0] (1+2+3=6, 0.5+0.5=1)')
|
|
45
|
+
console.log(' Got: ', arr.toArray())
|
|
46
|
+
console.log()
|
|
47
|
+
|
|
48
|
+
arr.free()
|
|
49
|
+
idxs.free()
|
|
50
|
+
vals.free()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Test 3: THE CRITICAL TEST - Scatter-add in a loop with REUSED indices
|
|
54
|
+
console.log('Test 3: Scatter-add in LOOP with REUSED indices')
|
|
55
|
+
console.log(' (This is what crashed node-mlx!)')
|
|
56
|
+
{
|
|
57
|
+
// Create indices ONCE, reuse in every iteration
|
|
58
|
+
const idxs = gpu.indices([0, 1, 2])
|
|
59
|
+
const vals = gpu.array([1, 2, 3])
|
|
60
|
+
|
|
61
|
+
for (let i = 0; i < 10; i++) {
|
|
62
|
+
const arr = gpu.zeros(10)
|
|
63
|
+
gpu.scatterAdd(arr, idxs, vals)
|
|
64
|
+
console.log(` Iteration ${i}: ${arr.toArray().slice(0, 5).join(', ')}...`)
|
|
65
|
+
arr.free()
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
console.log(' ALL 10 ITERATIONS COMPLETED!')
|
|
69
|
+
console.log()
|
|
70
|
+
|
|
71
|
+
idxs.free()
|
|
72
|
+
vals.free()
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Test 4: Large-scale performance
|
|
76
|
+
console.log('Test 4: Large-scale scatter-add (1M neurons, 100K synapses)')
|
|
77
|
+
{
|
|
78
|
+
const numNeurons = 1_000_000
|
|
79
|
+
const numSynapses = 100_000
|
|
80
|
+
|
|
81
|
+
// Create indices once
|
|
82
|
+
const idxData = new Uint32Array(numSynapses)
|
|
83
|
+
for (let i = 0; i < numSynapses; i++) {
|
|
84
|
+
idxData[i] = Math.floor(Math.random() * numNeurons)
|
|
85
|
+
}
|
|
86
|
+
const idxs = gpu.indices(idxData)
|
|
87
|
+
const vals = gpu.random.uniform(numSynapses, 0, 1)
|
|
88
|
+
|
|
89
|
+
console.log(' Running 10 iterations...')
|
|
90
|
+
const start = performance.now()
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < 10; i++) {
|
|
93
|
+
const arr = gpu.zeros(numNeurons)
|
|
94
|
+
gpu.scatterAdd(arr, idxs, vals)
|
|
95
|
+
arr.free()
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const elapsed = performance.now() - start
|
|
99
|
+
console.log(` Time: ${elapsed.toFixed(2)}ms (${(elapsed / 10).toFixed(2)}ms per iteration)`)
|
|
100
|
+
console.log()
|
|
101
|
+
|
|
102
|
+
idxs.free()
|
|
103
|
+
vals.free()
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Test 5: Full Izhikevich pattern
|
|
107
|
+
console.log('Test 5: Full Izhikevich neuron simulation pattern')
|
|
108
|
+
{
|
|
109
|
+
const numNeurons = 500
|
|
110
|
+
const numSynapses = 5000
|
|
111
|
+
const numSteps = 20
|
|
112
|
+
|
|
113
|
+
// Neuron state
|
|
114
|
+
let v = gpu.full(numNeurons, -65)
|
|
115
|
+
let u = gpu.multiply(v, 0.2)
|
|
116
|
+
|
|
117
|
+
// Static connectivity - created ONCE, reused every timestep
|
|
118
|
+
const preIdxData = new Uint32Array(numSynapses)
|
|
119
|
+
const postIdxData = new Uint32Array(numSynapses)
|
|
120
|
+
for (let i = 0; i < numSynapses; i++) {
|
|
121
|
+
preIdxData[i] = Math.floor(Math.random() * numNeurons)
|
|
122
|
+
postIdxData[i] = Math.floor(Math.random() * numNeurons)
|
|
123
|
+
}
|
|
124
|
+
const preIndices = gpu.indices(preIdxData)
|
|
125
|
+
const postIndices = gpu.indices(postIdxData)
|
|
126
|
+
const weights = gpu.random.uniform(numSynapses, 0, 0.5)
|
|
127
|
+
|
|
128
|
+
// Input
|
|
129
|
+
const inputIdxData = new Uint32Array(50)
|
|
130
|
+
for (let i = 0; i < 50; i++) inputIdxData[i] = i
|
|
131
|
+
const inputIndices = gpu.indices(inputIdxData)
|
|
132
|
+
const inputValues = gpu.full(50, 15)
|
|
133
|
+
|
|
134
|
+
console.log(' Simulating', numSteps, 'timesteps...')
|
|
135
|
+
const start = performance.now()
|
|
136
|
+
|
|
137
|
+
for (let t = 0; t < numSteps; t++) {
|
|
138
|
+
// 1. Detect firing
|
|
139
|
+
const fired = gpu.greaterEqual(v, 30)
|
|
140
|
+
|
|
141
|
+
// 2. Sparse transmission
|
|
142
|
+
const preFired = gpu.gatherBool(fired, preIndices)
|
|
143
|
+
const zerosArr = gpu.zeros(numSynapses)
|
|
144
|
+
const contribution = gpu.where(preFired, weights, zerosArr)
|
|
145
|
+
|
|
146
|
+
// 3. Scatter-add currents
|
|
147
|
+
let current = gpu.zeros(numNeurons)
|
|
148
|
+
gpu.scatterAdd(current, postIndices, contribution)
|
|
149
|
+
gpu.scatterAdd(current, inputIndices, inputValues)
|
|
150
|
+
|
|
151
|
+
// 4. Izhikevich update
|
|
152
|
+
const vSq = gpu.square(v)
|
|
153
|
+
const vSq004 = gpu.multiply(vSq, 0.04)
|
|
154
|
+
const v5 = gpu.multiply(v, 5)
|
|
155
|
+
const sum1 = gpu.add(vSq004, v5)
|
|
156
|
+
const sum2 = gpu.add(sum1, 140)
|
|
157
|
+
const negU = gpu.multiply(u, -1)
|
|
158
|
+
const sum3 = gpu.add(sum2, negU)
|
|
159
|
+
const dv = gpu.add(sum3, current)
|
|
160
|
+
|
|
161
|
+
const bv = gpu.multiply(v, 0.2)
|
|
162
|
+
const diff = gpu.subtract(bv, u)
|
|
163
|
+
const du = gpu.multiply(diff, 0.02)
|
|
164
|
+
|
|
165
|
+
const newV = gpu.add(v, dv)
|
|
166
|
+
const newU = gpu.add(u, du)
|
|
167
|
+
|
|
168
|
+
// 5. Reset spiked neurons
|
|
169
|
+
const vReset = gpu.where(fired, -65, newV)
|
|
170
|
+
const uBump = gpu.add(u, 8)
|
|
171
|
+
const uReset = gpu.where(fired, uBump, newU)
|
|
172
|
+
|
|
173
|
+
// Count spikes
|
|
174
|
+
const spikeCount = gpu.sum(fired)
|
|
175
|
+
|
|
176
|
+
if (t % 5 === 0) {
|
|
177
|
+
console.log(` Step ${t}: ${spikeCount} spikes`)
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Cleanup intermediates
|
|
181
|
+
fired.free()
|
|
182
|
+
preFired.free()
|
|
183
|
+
zerosArr.free()
|
|
184
|
+
contribution.free()
|
|
185
|
+
current.free()
|
|
186
|
+
vSq.free()
|
|
187
|
+
vSq004.free()
|
|
188
|
+
v5.free()
|
|
189
|
+
sum1.free()
|
|
190
|
+
sum2.free()
|
|
191
|
+
negU.free()
|
|
192
|
+
sum3.free()
|
|
193
|
+
dv.free()
|
|
194
|
+
bv.free()
|
|
195
|
+
diff.free()
|
|
196
|
+
du.free()
|
|
197
|
+
newV.free()
|
|
198
|
+
newU.free()
|
|
199
|
+
uBump.free()
|
|
200
|
+
|
|
201
|
+
// Swap state
|
|
202
|
+
v.free()
|
|
203
|
+
u.free()
|
|
204
|
+
v = vReset
|
|
205
|
+
u = uReset
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const elapsed = performance.now() - start
|
|
209
|
+
console.log(` Time: ${elapsed.toFixed(2)}ms (${(elapsed / numSteps).toFixed(2)}ms per step)`)
|
|
210
|
+
console.log(' SIMULATION COMPLETE!')
|
|
211
|
+
console.log()
|
|
212
|
+
|
|
213
|
+
// Cleanup
|
|
214
|
+
v.free()
|
|
215
|
+
u.free()
|
|
216
|
+
preIndices.free()
|
|
217
|
+
postIndices.free()
|
|
218
|
+
weights.free()
|
|
219
|
+
inputIndices.free()
|
|
220
|
+
inputValues.free()
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Cleanup
|
|
224
|
+
gpu.cleanup()
|
|
225
|
+
|
|
226
|
+
console.log('=== ALL TESTS PASSED ===')
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// STRESS TEST: Let's see what the M1 Max 64GB can handle!
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
import * as gpu from './gpu'
|
|
6
|
+
|
|
7
|
+
console.log('=== STRESS TEST: Frying the M1 Max ===\n')
|
|
8
|
+
|
|
9
|
+
const info = gpu.deviceInfo()
|
|
10
|
+
console.log('Device:', info.name)
|
|
11
|
+
console.log('Memory:', (Number(info.memory) / 1024 / 1024 / 1024).toFixed(1), 'GB')
|
|
12
|
+
console.log()
|
|
13
|
+
|
|
14
|
+
// Test configurations - let's scale up!
|
|
15
|
+
const tests = [
|
|
16
|
+
{ neurons: 1_000, synapses: 10_000, steps: 100 },
|
|
17
|
+
{ neurons: 10_000, synapses: 100_000, steps: 100 },
|
|
18
|
+
{ neurons: 100_000, synapses: 1_000_000, steps: 100 },
|
|
19
|
+
{ neurons: 1_000_000, synapses: 10_000_000, steps: 50 },
|
|
20
|
+
{ neurons: 10_000_000, synapses: 100_000_000, steps: 10 }, // 10M neurons, 100M synapses!
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
for (const { neurons, synapses, steps } of tests) {
|
|
24
|
+
console.log(`\n${'='.repeat(60)}`)
|
|
25
|
+
console.log(`Testing: ${neurons.toLocaleString()} neurons, ${synapses.toLocaleString()} synapses`)
|
|
26
|
+
console.log(`${'='.repeat(60)}`)
|
|
27
|
+
|
|
28
|
+
try {
|
|
29
|
+
// Create neuron state
|
|
30
|
+
console.log(' Allocating neuron state...')
|
|
31
|
+
const allocStart = performance.now()
|
|
32
|
+
|
|
33
|
+
let v = gpu.full(neurons, -65)
|
|
34
|
+
let u = gpu.multiply(v, 0.2)
|
|
35
|
+
|
|
36
|
+
// Create connectivity
|
|
37
|
+
console.log(' Creating connectivity...')
|
|
38
|
+
const preIdxData = new Uint32Array(synapses)
|
|
39
|
+
const postIdxData = new Uint32Array(synapses)
|
|
40
|
+
for (let i = 0; i < synapses; i++) {
|
|
41
|
+
preIdxData[i] = Math.floor(Math.random() * neurons)
|
|
42
|
+
postIdxData[i] = Math.floor(Math.random() * neurons)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const preIndices = gpu.indices(preIdxData)
|
|
46
|
+
const postIndices = gpu.indices(postIdxData)
|
|
47
|
+
const weights = gpu.random.uniform(synapses, 0, 0.5)
|
|
48
|
+
|
|
49
|
+
// Input to 10% of neurons
|
|
50
|
+
const numInput = Math.floor(neurons * 0.1)
|
|
51
|
+
const inputIdxData = new Uint32Array(numInput)
|
|
52
|
+
for (let i = 0; i < numInput; i++) inputIdxData[i] = i
|
|
53
|
+
const inputIndices = gpu.indices(inputIdxData)
|
|
54
|
+
const inputValues = gpu.full(numInput, 15)
|
|
55
|
+
|
|
56
|
+
const allocTime = performance.now() - allocStart
|
|
57
|
+
console.log(` Allocation time: ${allocTime.toFixed(0)}ms`)
|
|
58
|
+
|
|
59
|
+
// Simulate
|
|
60
|
+
console.log(` Running ${steps} timesteps...`)
|
|
61
|
+
const simStart = performance.now()
|
|
62
|
+
let totalSpikes = 0
|
|
63
|
+
|
|
64
|
+
for (let t = 0; t < steps; t++) {
|
|
65
|
+
// 1. Detect firing
|
|
66
|
+
const fired = gpu.greaterEqual(v, 30)
|
|
67
|
+
|
|
68
|
+
// 2. Sparse transmission
|
|
69
|
+
const preFired = gpu.gatherBool(fired, preIndices)
|
|
70
|
+
const zerosArr = gpu.zeros(synapses)
|
|
71
|
+
const contribution = gpu.where(preFired, weights, zerosArr)
|
|
72
|
+
|
|
73
|
+
// 3. Scatter-add currents
|
|
74
|
+
let current = gpu.zeros(neurons)
|
|
75
|
+
gpu.scatterAdd(current, postIndices, contribution)
|
|
76
|
+
gpu.scatterAdd(current, inputIndices, inputValues)
|
|
77
|
+
|
|
78
|
+
// 4. Izhikevich update (simplified for speed)
|
|
79
|
+
const vSq = gpu.square(v)
|
|
80
|
+
const term1 = gpu.multiply(vSq, 0.04)
|
|
81
|
+
const term2 = gpu.multiply(v, 5)
|
|
82
|
+
const sum1 = gpu.add(term1, term2)
|
|
83
|
+
const sum2 = gpu.add(sum1, 140)
|
|
84
|
+
const sum3 = gpu.subtract(sum2, u)
|
|
85
|
+
const dv = gpu.add(sum3, current)
|
|
86
|
+
|
|
87
|
+
const bv = gpu.multiply(v, 0.2)
|
|
88
|
+
const diff = gpu.subtract(bv, u)
|
|
89
|
+
const du = gpu.multiply(diff, 0.02)
|
|
90
|
+
|
|
91
|
+
const newV = gpu.add(v, dv)
|
|
92
|
+
const newU = gpu.add(u, du)
|
|
93
|
+
|
|
94
|
+
// 5. Reset
|
|
95
|
+
const vReset = gpu.where(fired, -65, newV)
|
|
96
|
+
const uBump = gpu.add(newU, 8)
|
|
97
|
+
const uReset = gpu.where(fired, uBump, newU)
|
|
98
|
+
|
|
99
|
+
// Count spikes
|
|
100
|
+
const spikeCount = gpu.sum(fired)
|
|
101
|
+
totalSpikes += spikeCount
|
|
102
|
+
|
|
103
|
+
// Cleanup intermediates
|
|
104
|
+
fired.free()
|
|
105
|
+
preFired.free()
|
|
106
|
+
zerosArr.free()
|
|
107
|
+
contribution.free()
|
|
108
|
+
current.free()
|
|
109
|
+
vSq.free()
|
|
110
|
+
term1.free()
|
|
111
|
+
term2.free()
|
|
112
|
+
sum1.free()
|
|
113
|
+
sum2.free()
|
|
114
|
+
sum3.free()
|
|
115
|
+
dv.free()
|
|
116
|
+
bv.free()
|
|
117
|
+
diff.free()
|
|
118
|
+
du.free()
|
|
119
|
+
newV.free()
|
|
120
|
+
newU.free()
|
|
121
|
+
uBump.free()
|
|
122
|
+
|
|
123
|
+
v.free()
|
|
124
|
+
u.free()
|
|
125
|
+
v = vReset
|
|
126
|
+
u = uReset
|
|
127
|
+
|
|
128
|
+
if (t % Math.max(1, Math.floor(steps / 5)) === 0) {
|
|
129
|
+
process.stdout.write(` Step ${t}/${steps} (${spikeCount.toLocaleString()} spikes)\n`)
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const simTime = performance.now() - simStart
|
|
134
|
+
const msPerStep = simTime / steps
|
|
135
|
+
|
|
136
|
+
console.log()
|
|
137
|
+
console.log(` ✅ COMPLETED!`)
|
|
138
|
+
console.log(` Total time: ${simTime.toFixed(0)}ms`)
|
|
139
|
+
console.log(` Per step: ${msPerStep.toFixed(2)}ms`)
|
|
140
|
+
console.log(` Total spikes: ${totalSpikes.toLocaleString()}`)
|
|
141
|
+
console.log(` Neurons/ms: ${(neurons / msPerStep).toLocaleString()}`)
|
|
142
|
+
console.log(` Synapses/ms: ${(synapses / msPerStep).toLocaleString()}`)
|
|
143
|
+
|
|
144
|
+
// Cleanup
|
|
145
|
+
v.free()
|
|
146
|
+
u.free()
|
|
147
|
+
preIndices.free()
|
|
148
|
+
postIndices.free()
|
|
149
|
+
weights.free()
|
|
150
|
+
inputIndices.free()
|
|
151
|
+
inputValues.free()
|
|
152
|
+
|
|
153
|
+
} catch (error) {
|
|
154
|
+
console.log(` ❌ FAILED: ${error}`)
|
|
155
|
+
break
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
gpu.cleanup()
|
|
160
|
+
console.log('\n=== STRESS TEST COMPLETE ===')
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quick test to check WebGPU availability in Bun
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// Check if WebGPU is available
|
|
6
|
+
console.log('Checking WebGPU support in Bun...')
|
|
7
|
+
console.log()
|
|
8
|
+
|
|
9
|
+
// @ts-ignore - checking global availability
|
|
10
|
+
const hasGPU = typeof navigator !== 'undefined' && 'gpu' in navigator
|
|
11
|
+
|
|
12
|
+
console.log('navigator.gpu available:', hasGPU)
|
|
13
|
+
|
|
14
|
+
if (hasGPU) {
|
|
15
|
+
// @ts-ignore
|
|
16
|
+
const adapter = await navigator.gpu.requestAdapter()
|
|
17
|
+
console.log('Adapter:', adapter)
|
|
18
|
+
|
|
19
|
+
if (adapter) {
|
|
20
|
+
const device = await adapter.requestDevice()
|
|
21
|
+
console.log('Device:', device)
|
|
22
|
+
console.log('Device limits:', device.limits)
|
|
23
|
+
}
|
|
24
|
+
} else {
|
|
25
|
+
console.log()
|
|
26
|
+
console.log('WebGPU not available in Bun runtime.')
|
|
27
|
+
console.log('Options:')
|
|
28
|
+
console.log(' 1. Use wgpu-native bindings (Rust → FFI)')
|
|
29
|
+
console.log(' 2. Use Metal directly via FFI')
|
|
30
|
+
console.log(' 3. Use Dawn (Google\'s WebGPU implementation) via FFI')
|
|
31
|
+
}
|