@rlabs-inc/sparse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/gpu.ts ADDED
@@ -0,0 +1,382 @@
1
+ // ============================================================================
2
+ // SPARSE - GPU Array API
3
+ // High-level TypeScript API for GPU compute
4
+ // Sparsity-first. Indices are READ-ONLY, never corrupted.
5
+ // ============================================================================
6
+
7
+ import { ptr, toArrayBuffer, type Pointer } from 'bun:ffi'
8
+ import { symbols, DataType } from './ffi'
9
+
10
+ // ============================================================================
11
+ // TYPES
12
+ // ============================================================================
13
+
14
+ export type BufferPtr = Pointer
15
+ export type ContextPtr = Pointer
16
+
17
+ export interface GPUDeviceInfo {
18
+ name: string
19
+ memory: bigint
20
+ }
21
+
22
+ // ============================================================================
23
+ // GPU CONTEXT
24
+ // ============================================================================
25
+
26
+ let _context: ContextPtr | null = null
27
+
28
+ /**
29
+ * Initialize the GPU context (called automatically on first use)
30
+ */
31
+ export function init(): ContextPtr {
32
+ if (_context) return _context
33
+ _context = symbols.sparse_init()
34
+ if (!_context) {
35
+ throw new Error('Failed to initialize Metal GPU context')
36
+ }
37
+ return _context
38
+ }
39
+
40
+ /**
41
+ * Get device info
42
+ */
43
+ export function deviceInfo(): GPUDeviceInfo {
44
+ const ctx = init()
45
+ return {
46
+ name: symbols.sparse_device_name(ctx) as unknown as string,
47
+ memory: symbols.sparse_device_memory(ctx),
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Synchronize - wait for all GPU operations to complete
53
+ */
54
+ export function sync(): void {
55
+ if (_context) {
56
+ symbols.sparse_sync(_context)
57
+ }
58
+ }
59
+
60
+ /**
61
+ * Cleanup GPU resources (call when done)
62
+ */
63
+ export function cleanup(): void {
64
+ if (_context) {
65
+ symbols.sparse_cleanup(_context)
66
+ _context = null
67
+ }
68
+ }
69
+
70
+ // ============================================================================
71
+ // GPU ARRAY CLASS
72
+ // ============================================================================
73
+
74
+ export class GPUArray {
75
+ private _ptr: BufferPtr
76
+ private _count: number
77
+ private _dtype: number
78
+ private _freed = false
79
+
80
+ constructor(ptr: BufferPtr, count: number, dtype: number) {
81
+ this._ptr = ptr
82
+ this._count = count
83
+ this._dtype = dtype
84
+ }
85
+
86
+ get ptr(): BufferPtr {
87
+ if (this._freed) throw new Error('GPUArray has been freed')
88
+ return this._ptr
89
+ }
90
+
91
+ get count(): number {
92
+ return this._count
93
+ }
94
+
95
+ get dtype(): number {
96
+ return this._dtype
97
+ }
98
+
99
+ get isFloat(): boolean {
100
+ return this._dtype === DataType.FLOAT32
101
+ }
102
+
103
+ get isBool(): boolean {
104
+ return this._dtype === DataType.UINT32
105
+ }
106
+
107
+ /**
108
+ * Read data back to CPU as Float32Array
109
+ */
110
+ toFloat32Array(): Float32Array {
111
+ const result = new Float32Array(this._count)
112
+ symbols.sparse_to_float(this._ptr, ptr(result), this._count)
113
+ return result
114
+ }
115
+
116
+ /**
117
+ * Read data back to CPU as Uint32Array
118
+ */
119
+ toUint32Array(): Uint32Array {
120
+ const result = new Uint32Array(this._count)
121
+ symbols.sparse_to_uint(this._ptr, ptr(result), this._count)
122
+ return result
123
+ }
124
+
125
+ /**
126
+ * Read data back as regular array
127
+ */
128
+ toArray(): number[] {
129
+ if (this._dtype === DataType.FLOAT32) {
130
+ return Array.from(this.toFloat32Array())
131
+ } else {
132
+ return Array.from(this.toUint32Array())
133
+ }
134
+ }
135
+
136
+ /**
137
+ * Free GPU memory (called automatically by GC, but can be called manually)
138
+ */
139
+ free(): void {
140
+ if (!this._freed) {
141
+ symbols.sparse_buffer_free(this._ptr)
142
+ this._freed = true
143
+ }
144
+ }
145
+ }
146
+
147
+ // ============================================================================
148
+ // ARRAY CREATION
149
+ // ============================================================================
150
+
151
+ /**
152
+ * Create a GPU array filled with zeros
153
+ */
154
+ export function zeros(count: number, dtype: number = DataType.FLOAT32): GPUArray {
155
+ const ctx = init()
156
+ const bufPtr = symbols.sparse_zeros(ctx, count, dtype)
157
+ if (!bufPtr) throw new Error('Failed to create zeros buffer')
158
+ return new GPUArray(bufPtr, count, dtype)
159
+ }
160
+
161
+ /**
162
+ * Create a GPU array filled with a value
163
+ */
164
+ export function full(count: number, value: number): GPUArray {
165
+ const ctx = init()
166
+ const bufPtr = symbols.sparse_full(ctx, count, value)
167
+ if (!bufPtr) throw new Error('Failed to create full buffer')
168
+ return new GPUArray(bufPtr, count, DataType.FLOAT32)
169
+ }
170
+
171
+ /**
172
+ * Create a GPU array from Float32Array or number[]
173
+ */
174
+ export function array(data: Float32Array | number[]): GPUArray {
175
+ const ctx = init()
176
+ const float32 = data instanceof Float32Array ? data : new Float32Array(data)
177
+ const bufPtr = symbols.sparse_from_float(ctx, ptr(float32), float32.length)
178
+ if (!bufPtr) throw new Error('Failed to create array buffer')
179
+ return new GPUArray(bufPtr, float32.length, DataType.FLOAT32)
180
+ }
181
+
182
+ /**
183
+ * Create a GPU array of uint32 indices
184
+ */
185
+ export function indices(data: Uint32Array | number[]): GPUArray {
186
+ const ctx = init()
187
+ const uint32 = data instanceof Uint32Array ? data : new Uint32Array(data)
188
+ const bufPtr = symbols.sparse_from_uint(ctx, ptr(uint32), uint32.length)
189
+ if (!bufPtr) throw new Error('Failed to create indices buffer')
190
+ return new GPUArray(bufPtr, uint32.length, DataType.UINT32)
191
+ }
192
+
193
+ // ============================================================================
194
+ // CORE OPERATIONS
195
+ // ============================================================================
196
+
197
+ /**
198
+ * Scatter-add: target[indices[i]] += values[i]
199
+ * CRITICAL: This is atomic and thread-safe. Indices are READ-ONLY, never corrupted.
200
+ * This is the core operation for sparse synapse transmission.
201
+ */
202
+ export function scatterAdd(
203
+ target: GPUArray,
204
+ idxs: GPUArray,
205
+ values: GPUArray
206
+ ): void {
207
+ const ctx = init()
208
+ symbols.sparse_scatter_add(ctx, target.ptr, idxs.ptr, values.ptr, idxs.count)
209
+ }
210
+
211
+ /**
212
+ * Gather: result[i] = source[indices[i]]
213
+ */
214
+ export function gather(source: GPUArray, idxs: GPUArray): GPUArray {
215
+ const ctx = init()
216
+ const bufPtr = symbols.sparse_gather(ctx, source.ptr, idxs.ptr, idxs.count)
217
+ if (!bufPtr) throw new Error('Failed to gather')
218
+ return new GPUArray(bufPtr, idxs.count, DataType.FLOAT32)
219
+ }
220
+
221
+ /**
222
+ * Gather boolean: result[i] = source[indices[i]] (for boolean arrays)
223
+ */
224
+ export function gatherBool(source: GPUArray, idxs: GPUArray): GPUArray {
225
+ const ctx = init()
226
+ const bufPtr = symbols.sparse_gather_bool(ctx, source.ptr, idxs.ptr, idxs.count)
227
+ if (!bufPtr) throw new Error('Failed to gather bool')
228
+ return new GPUArray(bufPtr, idxs.count, DataType.UINT32)
229
+ }
230
+
231
+ // ============================================================================
232
+ // ELEMENT-WISE OPERATIONS
233
+ // ============================================================================
234
+
235
+ /**
236
+ * Add two arrays or add scalar
237
+ */
238
+ export function add(a: GPUArray, b: GPUArray | number): GPUArray {
239
+ const ctx = init()
240
+ let bufPtr: BufferPtr
241
+
242
+ if (typeof b === 'number') {
243
+ bufPtr = symbols.sparse_add_scalar(ctx, a.ptr, b)
244
+ } else {
245
+ bufPtr = symbols.sparse_add(ctx, a.ptr, b.ptr)
246
+ }
247
+
248
+ if (!bufPtr) throw new Error('Failed to add')
249
+ return new GPUArray(bufPtr, a.count, DataType.FLOAT32)
250
+ }
251
+
252
+ /**
253
+ * Subtract: a - b
254
+ */
255
+ export function subtract(a: GPUArray, b: GPUArray | number): GPUArray {
256
+ if (typeof b === 'number') {
257
+ return add(a, -b)
258
+ } else {
259
+ // a - b = a + (-1 * b)
260
+ const negB = multiply(b, -1)
261
+ const result = add(a, negB)
262
+ negB.free()
263
+ return result
264
+ }
265
+ }
266
+
267
+ /**
268
+ * Multiply two arrays or multiply by scalar
269
+ */
270
+ export function multiply(a: GPUArray, b: GPUArray | number): GPUArray {
271
+ const ctx = init()
272
+ let bufPtr: BufferPtr
273
+
274
+ if (typeof b === 'number') {
275
+ bufPtr = symbols.sparse_multiply_scalar(ctx, a.ptr, b)
276
+ } else {
277
+ bufPtr = symbols.sparse_multiply(ctx, a.ptr, b.ptr)
278
+ }
279
+
280
+ if (!bufPtr) throw new Error('Failed to multiply')
281
+ return new GPUArray(bufPtr, a.count, DataType.FLOAT32)
282
+ }
283
+
284
+ /**
285
+ * Square each element
286
+ */
287
+ export function square(a: GPUArray): GPUArray {
288
+ const ctx = init()
289
+ const bufPtr = symbols.sparse_square(ctx, a.ptr)
290
+ if (!bufPtr) throw new Error('Failed to square')
291
+ return new GPUArray(bufPtr, a.count, DataType.FLOAT32)
292
+ }
293
+
294
+ // ============================================================================
295
+ // CONDITIONAL OPERATIONS
296
+ // ============================================================================
297
+
298
+ /**
299
+ * Compare: result[i] = (a[i] >= threshold) ? 1 : 0
300
+ */
301
+ export function greaterEqual(a: GPUArray, threshold: number): GPUArray {
302
+ const ctx = init()
303
+ const bufPtr = symbols.sparse_greater_equal(ctx, a.ptr, threshold)
304
+ if (!bufPtr) throw new Error('Failed to compare')
305
+ return new GPUArray(bufPtr, a.count, DataType.UINT32)
306
+ }
307
+
308
+ /**
309
+ * Where: result[i] = condition[i] ? ifTrue[i] : ifFalse[i]
310
+ * Can also use scalar values for ifTrue/ifFalse
311
+ */
312
+ export function where(
313
+ condition: GPUArray,
314
+ ifTrue: GPUArray | number,
315
+ ifFalse: GPUArray | number
316
+ ): GPUArray {
317
+ const ctx = init()
318
+ let bufPtr: BufferPtr
319
+
320
+ if (typeof ifTrue === 'number' && typeof ifFalse === 'number') {
321
+ bufPtr = symbols.sparse_where_scalar(ctx, condition.ptr, ifTrue, ifFalse)
322
+ } else if (ifTrue instanceof GPUArray && ifFalse instanceof GPUArray) {
323
+ bufPtr = symbols.sparse_where(ctx, condition.ptr, ifTrue.ptr, ifFalse.ptr)
324
+ } else if (typeof ifTrue === 'number' && ifFalse instanceof GPUArray) {
325
+ // Mixed: scalar ifTrue, array ifFalse -> create temp array
326
+ const ifTrueArr = full(condition.count, ifTrue)
327
+ bufPtr = symbols.sparse_where(ctx, condition.ptr, ifTrueArr.ptr, ifFalse.ptr)
328
+ ifTrueArr.free()
329
+ } else if (ifTrue instanceof GPUArray && typeof ifFalse === 'number') {
330
+ // Mixed: array ifTrue, scalar ifFalse -> create temp array
331
+ const ifFalseArr = full(condition.count, ifFalse)
332
+ bufPtr = symbols.sparse_where(ctx, condition.ptr, ifTrue.ptr, ifFalseArr.ptr)
333
+ ifFalseArr.free()
334
+ } else {
335
+ throw new Error('where() invalid argument types')
336
+ }
337
+
338
+ if (!bufPtr) throw new Error('Failed to where')
339
+ return new GPUArray(bufPtr, condition.count, DataType.FLOAT32)
340
+ }
341
+
342
+ // ============================================================================
343
+ // REDUCTION OPERATIONS
344
+ // ============================================================================
345
+
346
+ /**
347
+ * Sum all elements
348
+ */
349
+ export function sum(a: GPUArray): number {
350
+ const ctx = init()
351
+ if (a.dtype === DataType.FLOAT32) {
352
+ return symbols.sparse_sum(ctx, a.ptr)
353
+ } else {
354
+ return symbols.sparse_sum_bool(ctx, a.ptr)
355
+ }
356
+ }
357
+
358
+ // ============================================================================
359
+ // RANDOM OPERATIONS
360
+ // ============================================================================
361
+
362
+ export const random = {
363
+ /**
364
+ * Random uniform values in [low, high)
365
+ */
366
+ uniform(count: number, low = 0, high = 1): GPUArray {
367
+ const ctx = init()
368
+ const bufPtr = symbols.sparse_random_uniform(ctx, count, low, high)
369
+ if (!bufPtr) throw new Error('Failed to create random uniform')
370
+ return new GPUArray(bufPtr, count, DataType.FLOAT32)
371
+ },
372
+
373
+ /**
374
+ * Random normal values with mean and std
375
+ */
376
+ normal(count: number, mean = 0, std = 1): GPUArray {
377
+ const ctx = init()
378
+ const bufPtr = symbols.sparse_random_normal(ctx, count, mean, std)
379
+ if (!bufPtr) throw new Error('Failed to create random normal')
380
+ return new GPUArray(bufPtr, count, DataType.FLOAT32)
381
+ },
382
+ }
package/src/index.ts ADDED
@@ -0,0 +1,7 @@
1
+ // ============================================================================
2
+ // SPARSE - Sparsity-First GPU Compute
3
+ // Minimal. Elegant. For biological neural simulation, not transformers.
4
+ // ============================================================================
5
+
6
+ export * from './gpu'
7
+ export { DataType } from './ffi'
@@ -0,0 +1,70 @@
1
+ // Debug: Why no spikes?
2
+
3
+ import * as gpu from './gpu'
4
+
5
+ console.log('=== Debug: Izhikevich Dynamics ===\n')
6
+
7
+ const numNeurons = 10
8
+
9
+ // Initial state
10
+ let v = gpu.full(numNeurons, -65)
11
+ let u = gpu.multiply(v, 0.2)
12
+
13
+ // Constant input to all neurons
14
+ const input = gpu.full(numNeurons, 15)
15
+
16
+ console.log('Initial:')
17
+ console.log(' v:', v.toArray())
18
+ console.log(' u:', u.toArray())
19
+ console.log()
20
+
21
+ for (let t = 0; t < 15; t++) {
22
+ // Izhikevich equations
23
+ // dv = 0.04*v^2 + 5*v + 140 - u + I
24
+ const vSq = gpu.square(v)
25
+ const term1 = gpu.multiply(vSq, 0.04) // 0.04*v^2
26
+ const term2 = gpu.multiply(v, 5) // 5*v
27
+ const sum1 = gpu.add(term1, term2)
28
+ const sum2 = gpu.add(sum1, 140)
29
+ const sum3 = gpu.subtract(sum2, u)
30
+ const dv = gpu.add(sum3, input)
31
+
32
+ // du = a * (b*v - u) where a=0.02, b=0.2
33
+ const bv = gpu.multiply(v, 0.2)
34
+ const diff = gpu.subtract(bv, u)
35
+ const du = gpu.multiply(diff, 0.02)
36
+
37
+ // Update
38
+ const newV = gpu.add(v, dv)
39
+ const newU = gpu.add(u, du)
40
+
41
+ // Check for spikes
42
+ const fired = gpu.greaterEqual(newV, 30)
43
+ const spikeCount = gpu.sum(fired)
44
+
45
+ console.log(`Step ${t}:`)
46
+ console.log(' v:', newV.toArray().map(x => x.toFixed(1)))
47
+ console.log(' dv:', dv.toArray().map(x => x.toFixed(1)))
48
+ console.log(' spikes:', spikeCount)
49
+
50
+ // Reset if spiked
51
+ const vReset = gpu.where(fired, -65, newV)
52
+ const uBump = gpu.add(newU, 8)
53
+ const uReset = gpu.where(fired, uBump, newU)
54
+
55
+ // Cleanup
56
+ vSq.free(); term1.free(); term2.free(); sum1.free(); sum2.free(); sum3.free(); dv.free()
57
+ bv.free(); diff.free(); du.free()
58
+ newV.free(); newU.free(); fired.free(); uBump.free()
59
+ v.free(); u.free()
60
+
61
+ v = vReset
62
+ u = uReset
63
+
64
+ if (spikeCount > 0) {
65
+ console.log('\n >>> SPIKE DETECTED! <<<\n')
66
+ }
67
+ console.log()
68
+ }
69
+
70
+ gpu.cleanup()
@@ -0,0 +1,140 @@
1
+ // ============================================================================
2
+ // LIMIT TEST: Find the M1 Max 64GB ceiling
3
+ // ============================================================================
4
+
5
+ import * as gpu from './gpu'
6
+
7
+ console.log('=== LIMIT TEST: Finding the Ceiling ===\n')
8
+
9
+ const info = gpu.deviceInfo()
10
+ console.log('Device:', info.name)
11
+ console.log('Memory:', (Number(info.memory) / 1024 / 1024 / 1024).toFixed(1), 'GB')
12
+ console.log()
13
+
14
+ // Memory estimation:
15
+ // - 1 neuron: 2 floats (v, u) = 8 bytes
16
+ // - 1 synapse: 2 uint32 (pre, post) + 1 float (weight) = 12 bytes
17
+ // - Plus intermediate buffers during computation (~5x overhead)
18
+
19
+ // Let's scale up aggressively
20
+ const tests = [
21
+ { neurons: 10_000_000, synapses: 100_000_000 }, // 10M neurons, 100M synapses (worked!)
22
+ { neurons: 20_000_000, synapses: 200_000_000 }, // 20M neurons, 200M synapses
23
+ { neurons: 50_000_000, synapses: 500_000_000 }, // 50M neurons, 500M synapses
24
+ { neurons: 100_000_000, synapses: 1_000_000_000 }, // 100M neurons, 1B synapses!
25
+ { neurons: 200_000_000, synapses: 2_000_000_000 }, // 200M neurons, 2B synapses!
26
+ ]
27
+
28
+ function formatBytes(bytes: number): string {
29
+ const gb = bytes / 1024 / 1024 / 1024
30
+ return `${gb.toFixed(2)} GB`
31
+ }
32
+
33
+ function estimateMemory(neurons: number, synapses: number): number {
34
+ // Neuron state: v, u (2 floats per neuron)
35
+ const neuronBytes = neurons * 2 * 4
36
+
37
+ // Synapse data: preIdx, postIdx (2 uint32) + weight (1 float)
38
+ const synapseBytes = synapses * (2 * 4 + 4)
39
+
40
+ // Intermediate buffers during computation (rough estimate: ~3x neuron + 2x synapse)
41
+ const overhead = neuronBytes * 3 + synapseBytes * 2
42
+
43
+ return neuronBytes + synapseBytes + overhead
44
+ }
45
+
46
+ for (const { neurons, synapses } of tests) {
47
+ console.log(`\n${'='.repeat(70)}`)
48
+ console.log(`Testing: ${(neurons / 1_000_000).toFixed(0)}M neurons, ${(synapses / 1_000_000).toFixed(0)}M synapses`)
49
+ console.log(`Estimated memory: ${formatBytes(estimateMemory(neurons, synapses))}`)
50
+ console.log(`${'='.repeat(70)}`)
51
+
52
+ try {
53
+ // Create neuron state
54
+ console.log(' [1/4] Allocating neuron state...')
55
+ const t1 = performance.now()
56
+ let v = gpu.full(neurons, -65)
57
+ let u = gpu.multiply(v, 0.2)
58
+ console.log(` Done in ${(performance.now() - t1).toFixed(0)}ms`)
59
+
60
+ // Create connectivity - this is the big one
61
+ console.log(' [2/4] Creating pre-synaptic indices...')
62
+ const t2 = performance.now()
63
+ const preIdxData = new Uint32Array(synapses)
64
+ for (let i = 0; i < synapses; i++) {
65
+ preIdxData[i] = Math.floor(Math.random() * neurons)
66
+ }
67
+ const preIndices = gpu.indices(preIdxData)
68
+ console.log(` Done in ${(performance.now() - t2).toFixed(0)}ms`)
69
+
70
+ console.log(' [3/4] Creating post-synaptic indices...')
71
+ const t3 = performance.now()
72
+ const postIdxData = new Uint32Array(synapses)
73
+ for (let i = 0; i < synapses; i++) {
74
+ postIdxData[i] = Math.floor(Math.random() * neurons)
75
+ }
76
+ const postIndices = gpu.indices(postIdxData)
77
+ console.log(` Done in ${(performance.now() - t3).toFixed(0)}ms`)
78
+
79
+ console.log(' [4/4] Creating synaptic weights...')
80
+ const t4 = performance.now()
81
+ const weights = gpu.random.uniform(synapses, 0, 0.5)
82
+ console.log(` Done in ${(performance.now() - t4).toFixed(0)}ms`)
83
+
84
+ // Run ONE timestep to verify it works
85
+ console.log(' Running 1 timestep...')
86
+ const simStart = performance.now()
87
+
88
+ // Detect firing
89
+ const fired = gpu.greaterEqual(v, 30)
90
+ const spikeCount = gpu.sum(fired)
91
+
92
+ // Sparse transmission
93
+ const preFired = gpu.gatherBool(fired, preIndices)
94
+ const zerosArr = gpu.zeros(synapses)
95
+ const contribution = gpu.where(preFired, weights, zerosArr)
96
+
97
+ // Scatter-add
98
+ let current = gpu.zeros(neurons)
99
+ gpu.scatterAdd(current, postIndices, contribution)
100
+
101
+ // Izhikevich
102
+ const vSq = gpu.square(v)
103
+ const term1 = gpu.multiply(vSq, 0.04)
104
+ const term2 = gpu.multiply(v, 5)
105
+ const sum1 = gpu.add(term1, term2)
106
+ const sum2 = gpu.add(sum1, 140)
107
+ const sum3 = gpu.subtract(sum2, u)
108
+ const dv = gpu.add(sum3, current)
109
+ const newV = gpu.add(v, dv)
110
+
111
+ const simTime = performance.now() - simStart
112
+
113
+ console.log()
114
+ console.log(` ✅ SUCCESS!`)
115
+ console.log(` 1 timestep: ${simTime.toFixed(0)}ms`)
116
+ console.log(` Spikes: ${spikeCount.toLocaleString()}`)
117
+ console.log(` Throughput: ${((neurons + synapses) / simTime / 1000).toFixed(1)}M ops/ms`)
118
+
119
+ // Cleanup
120
+ v.free(); u.free()
121
+ preIndices.free(); postIndices.free(); weights.free()
122
+ fired.free(); preFired.free(); zerosArr.free(); contribution.free()
123
+ current.free(); vSq.free(); term1.free(); term2.free()
124
+ sum1.free(); sum2.free(); sum3.free(); dv.free(); newV.free()
125
+
126
+ // Let JS GC handle the arrays
127
+
128
+ } catch (error: any) {
129
+ console.log()
130
+ console.log(` ❌ HIT THE CEILING!`)
131
+ console.log(` Error: ${error.message || error}`)
132
+ console.log()
133
+ console.log(` Maximum sustainable scale:`)
134
+ console.log(` Previous test passed - that's your limit!`)
135
+ break
136
+ }
137
+ }
138
+
139
+ gpu.cleanup()
140
+ console.log('\n=== LIMIT TEST COMPLETE ===')