@rlabs-inc/sparse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +92 -0
- package/libsparse.dylib +0 -0
- package/native/Makefile +30 -0
- package/native/libsparse.dylib +0 -0
- package/native/sparse.h +180 -0
- package/native/sparse.m +734 -0
- package/native/sparse.metal +215 -0
- package/package.json +38 -0
- package/src/ffi.ts +156 -0
- package/src/gpu.ts +382 -0
- package/src/index.ts +7 -0
- package/src/test-debug-spikes.ts +70 -0
- package/src/test-limits.ts +140 -0
- package/src/test-scatter-loop.ts +226 -0
- package/src/test-stress.ts +160 -0
- package/src/test-webgpu.ts +31 -0
package/src/gpu.ts
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// SPARSE - GPU Array API
|
|
3
|
+
// High-level TypeScript API for GPU compute
|
|
4
|
+
// Sparsity-first. Indices are READ-ONLY, never corrupted.
|
|
5
|
+
// ============================================================================
|
|
6
|
+
|
|
7
|
+
import { ptr, toArrayBuffer, type Pointer } from 'bun:ffi'
|
|
8
|
+
import { symbols, DataType } from './ffi'
|
|
9
|
+
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// TYPES
|
|
12
|
+
// ============================================================================
|
|
13
|
+
|
|
14
|
+
export type BufferPtr = Pointer
|
|
15
|
+
export type ContextPtr = Pointer
|
|
16
|
+
|
|
17
|
+
export interface GPUDeviceInfo {
|
|
18
|
+
name: string
|
|
19
|
+
memory: bigint
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// GPU CONTEXT
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
26
|
+
let _context: ContextPtr | null = null
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Initialize the GPU context (called automatically on first use)
|
|
30
|
+
*/
|
|
31
|
+
export function init(): ContextPtr {
|
|
32
|
+
if (_context) return _context
|
|
33
|
+
_context = symbols.sparse_init()
|
|
34
|
+
if (!_context) {
|
|
35
|
+
throw new Error('Failed to initialize Metal GPU context')
|
|
36
|
+
}
|
|
37
|
+
return _context
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Get device info
|
|
42
|
+
*/
|
|
43
|
+
export function deviceInfo(): GPUDeviceInfo {
|
|
44
|
+
const ctx = init()
|
|
45
|
+
return {
|
|
46
|
+
name: symbols.sparse_device_name(ctx) as unknown as string,
|
|
47
|
+
memory: symbols.sparse_device_memory(ctx),
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Synchronize - wait for all GPU operations to complete
|
|
53
|
+
*/
|
|
54
|
+
export function sync(): void {
|
|
55
|
+
if (_context) {
|
|
56
|
+
symbols.sparse_sync(_context)
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Cleanup GPU resources (call when done)
|
|
62
|
+
*/
|
|
63
|
+
export function cleanup(): void {
|
|
64
|
+
if (_context) {
|
|
65
|
+
symbols.sparse_cleanup(_context)
|
|
66
|
+
_context = null
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ============================================================================
|
|
71
|
+
// GPU ARRAY CLASS
|
|
72
|
+
// ============================================================================
|
|
73
|
+
|
|
74
|
+
export class GPUArray {
|
|
75
|
+
private _ptr: BufferPtr
|
|
76
|
+
private _count: number
|
|
77
|
+
private _dtype: number
|
|
78
|
+
private _freed = false
|
|
79
|
+
|
|
80
|
+
constructor(ptr: BufferPtr, count: number, dtype: number) {
|
|
81
|
+
this._ptr = ptr
|
|
82
|
+
this._count = count
|
|
83
|
+
this._dtype = dtype
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
get ptr(): BufferPtr {
|
|
87
|
+
if (this._freed) throw new Error('GPUArray has been freed')
|
|
88
|
+
return this._ptr
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
get count(): number {
|
|
92
|
+
return this._count
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
get dtype(): number {
|
|
96
|
+
return this._dtype
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
get isFloat(): boolean {
|
|
100
|
+
return this._dtype === DataType.FLOAT32
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
get isBool(): boolean {
|
|
104
|
+
return this._dtype === DataType.UINT32
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Read data back to CPU as Float32Array
|
|
109
|
+
*/
|
|
110
|
+
toFloat32Array(): Float32Array {
|
|
111
|
+
const result = new Float32Array(this._count)
|
|
112
|
+
symbols.sparse_to_float(this._ptr, ptr(result), this._count)
|
|
113
|
+
return result
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Read data back to CPU as Uint32Array
|
|
118
|
+
*/
|
|
119
|
+
toUint32Array(): Uint32Array {
|
|
120
|
+
const result = new Uint32Array(this._count)
|
|
121
|
+
symbols.sparse_to_uint(this._ptr, ptr(result), this._count)
|
|
122
|
+
return result
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Read data back as regular array
|
|
127
|
+
*/
|
|
128
|
+
toArray(): number[] {
|
|
129
|
+
if (this._dtype === DataType.FLOAT32) {
|
|
130
|
+
return Array.from(this.toFloat32Array())
|
|
131
|
+
} else {
|
|
132
|
+
return Array.from(this.toUint32Array())
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Free GPU memory (called automatically by GC, but can be called manually)
|
|
138
|
+
*/
|
|
139
|
+
free(): void {
|
|
140
|
+
if (!this._freed) {
|
|
141
|
+
symbols.sparse_buffer_free(this._ptr)
|
|
142
|
+
this._freed = true
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ============================================================================
|
|
148
|
+
// ARRAY CREATION
|
|
149
|
+
// ============================================================================
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Create a GPU array filled with zeros
|
|
153
|
+
*/
|
|
154
|
+
export function zeros(count: number, dtype: number = DataType.FLOAT32): GPUArray {
|
|
155
|
+
const ctx = init()
|
|
156
|
+
const bufPtr = symbols.sparse_zeros(ctx, count, dtype)
|
|
157
|
+
if (!bufPtr) throw new Error('Failed to create zeros buffer')
|
|
158
|
+
return new GPUArray(bufPtr, count, dtype)
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Create a GPU array filled with a value
|
|
163
|
+
*/
|
|
164
|
+
export function full(count: number, value: number): GPUArray {
|
|
165
|
+
const ctx = init()
|
|
166
|
+
const bufPtr = symbols.sparse_full(ctx, count, value)
|
|
167
|
+
if (!bufPtr) throw new Error('Failed to create full buffer')
|
|
168
|
+
return new GPUArray(bufPtr, count, DataType.FLOAT32)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Create a GPU array from Float32Array or number[]
|
|
173
|
+
*/
|
|
174
|
+
export function array(data: Float32Array | number[]): GPUArray {
|
|
175
|
+
const ctx = init()
|
|
176
|
+
const float32 = data instanceof Float32Array ? data : new Float32Array(data)
|
|
177
|
+
const bufPtr = symbols.sparse_from_float(ctx, ptr(float32), float32.length)
|
|
178
|
+
if (!bufPtr) throw new Error('Failed to create array buffer')
|
|
179
|
+
return new GPUArray(bufPtr, float32.length, DataType.FLOAT32)
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Create a GPU array of uint32 indices
|
|
184
|
+
*/
|
|
185
|
+
export function indices(data: Uint32Array | number[]): GPUArray {
|
|
186
|
+
const ctx = init()
|
|
187
|
+
const uint32 = data instanceof Uint32Array ? data : new Uint32Array(data)
|
|
188
|
+
const bufPtr = symbols.sparse_from_uint(ctx, ptr(uint32), uint32.length)
|
|
189
|
+
if (!bufPtr) throw new Error('Failed to create indices buffer')
|
|
190
|
+
return new GPUArray(bufPtr, uint32.length, DataType.UINT32)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// ============================================================================
|
|
194
|
+
// CORE OPERATIONS
|
|
195
|
+
// ============================================================================
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Scatter-add: target[indices[i]] += values[i]
|
|
199
|
+
* CRITICAL: This is atomic and thread-safe. Indices are READ-ONLY, never corrupted.
|
|
200
|
+
* This is the core operation for sparse synapse transmission.
|
|
201
|
+
*/
|
|
202
|
+
export function scatterAdd(
|
|
203
|
+
target: GPUArray,
|
|
204
|
+
idxs: GPUArray,
|
|
205
|
+
values: GPUArray
|
|
206
|
+
): void {
|
|
207
|
+
const ctx = init()
|
|
208
|
+
symbols.sparse_scatter_add(ctx, target.ptr, idxs.ptr, values.ptr, idxs.count)
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Gather: result[i] = source[indices[i]]
|
|
213
|
+
*/
|
|
214
|
+
export function gather(source: GPUArray, idxs: GPUArray): GPUArray {
|
|
215
|
+
const ctx = init()
|
|
216
|
+
const bufPtr = symbols.sparse_gather(ctx, source.ptr, idxs.ptr, idxs.count)
|
|
217
|
+
if (!bufPtr) throw new Error('Failed to gather')
|
|
218
|
+
return new GPUArray(bufPtr, idxs.count, DataType.FLOAT32)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Gather boolean: result[i] = source[indices[i]] (for boolean arrays)
|
|
223
|
+
*/
|
|
224
|
+
export function gatherBool(source: GPUArray, idxs: GPUArray): GPUArray {
|
|
225
|
+
const ctx = init()
|
|
226
|
+
const bufPtr = symbols.sparse_gather_bool(ctx, source.ptr, idxs.ptr, idxs.count)
|
|
227
|
+
if (!bufPtr) throw new Error('Failed to gather bool')
|
|
228
|
+
return new GPUArray(bufPtr, idxs.count, DataType.UINT32)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// ============================================================================
|
|
232
|
+
// ELEMENT-WISE OPERATIONS
|
|
233
|
+
// ============================================================================
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Add two arrays or add scalar
|
|
237
|
+
*/
|
|
238
|
+
export function add(a: GPUArray, b: GPUArray | number): GPUArray {
|
|
239
|
+
const ctx = init()
|
|
240
|
+
let bufPtr: BufferPtr
|
|
241
|
+
|
|
242
|
+
if (typeof b === 'number') {
|
|
243
|
+
bufPtr = symbols.sparse_add_scalar(ctx, a.ptr, b)
|
|
244
|
+
} else {
|
|
245
|
+
bufPtr = symbols.sparse_add(ctx, a.ptr, b.ptr)
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (!bufPtr) throw new Error('Failed to add')
|
|
249
|
+
return new GPUArray(bufPtr, a.count, DataType.FLOAT32)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Subtract: a - b
|
|
254
|
+
*/
|
|
255
|
+
export function subtract(a: GPUArray, b: GPUArray | number): GPUArray {
|
|
256
|
+
if (typeof b === 'number') {
|
|
257
|
+
return add(a, -b)
|
|
258
|
+
} else {
|
|
259
|
+
// a - b = a + (-1 * b)
|
|
260
|
+
const negB = multiply(b, -1)
|
|
261
|
+
const result = add(a, negB)
|
|
262
|
+
negB.free()
|
|
263
|
+
return result
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Multiply two arrays or multiply by scalar
|
|
269
|
+
*/
|
|
270
|
+
export function multiply(a: GPUArray, b: GPUArray | number): GPUArray {
|
|
271
|
+
const ctx = init()
|
|
272
|
+
let bufPtr: BufferPtr
|
|
273
|
+
|
|
274
|
+
if (typeof b === 'number') {
|
|
275
|
+
bufPtr = symbols.sparse_multiply_scalar(ctx, a.ptr, b)
|
|
276
|
+
} else {
|
|
277
|
+
bufPtr = symbols.sparse_multiply(ctx, a.ptr, b.ptr)
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (!bufPtr) throw new Error('Failed to multiply')
|
|
281
|
+
return new GPUArray(bufPtr, a.count, DataType.FLOAT32)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Square each element
|
|
286
|
+
*/
|
|
287
|
+
export function square(a: GPUArray): GPUArray {
|
|
288
|
+
const ctx = init()
|
|
289
|
+
const bufPtr = symbols.sparse_square(ctx, a.ptr)
|
|
290
|
+
if (!bufPtr) throw new Error('Failed to square')
|
|
291
|
+
return new GPUArray(bufPtr, a.count, DataType.FLOAT32)
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ============================================================================
|
|
295
|
+
// CONDITIONAL OPERATIONS
|
|
296
|
+
// ============================================================================
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Compare: result[i] = (a[i] >= threshold) ? 1 : 0
|
|
300
|
+
*/
|
|
301
|
+
export function greaterEqual(a: GPUArray, threshold: number): GPUArray {
|
|
302
|
+
const ctx = init()
|
|
303
|
+
const bufPtr = symbols.sparse_greater_equal(ctx, a.ptr, threshold)
|
|
304
|
+
if (!bufPtr) throw new Error('Failed to compare')
|
|
305
|
+
return new GPUArray(bufPtr, a.count, DataType.UINT32)
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Where: result[i] = condition[i] ? ifTrue[i] : ifFalse[i]
|
|
310
|
+
* Can also use scalar values for ifTrue/ifFalse
|
|
311
|
+
*/
|
|
312
|
+
export function where(
|
|
313
|
+
condition: GPUArray,
|
|
314
|
+
ifTrue: GPUArray | number,
|
|
315
|
+
ifFalse: GPUArray | number
|
|
316
|
+
): GPUArray {
|
|
317
|
+
const ctx = init()
|
|
318
|
+
let bufPtr: BufferPtr
|
|
319
|
+
|
|
320
|
+
if (typeof ifTrue === 'number' && typeof ifFalse === 'number') {
|
|
321
|
+
bufPtr = symbols.sparse_where_scalar(ctx, condition.ptr, ifTrue, ifFalse)
|
|
322
|
+
} else if (ifTrue instanceof GPUArray && ifFalse instanceof GPUArray) {
|
|
323
|
+
bufPtr = symbols.sparse_where(ctx, condition.ptr, ifTrue.ptr, ifFalse.ptr)
|
|
324
|
+
} else if (typeof ifTrue === 'number' && ifFalse instanceof GPUArray) {
|
|
325
|
+
// Mixed: scalar ifTrue, array ifFalse -> create temp array
|
|
326
|
+
const ifTrueArr = full(condition.count, ifTrue)
|
|
327
|
+
bufPtr = symbols.sparse_where(ctx, condition.ptr, ifTrueArr.ptr, ifFalse.ptr)
|
|
328
|
+
ifTrueArr.free()
|
|
329
|
+
} else if (ifTrue instanceof GPUArray && typeof ifFalse === 'number') {
|
|
330
|
+
// Mixed: array ifTrue, scalar ifFalse -> create temp array
|
|
331
|
+
const ifFalseArr = full(condition.count, ifFalse)
|
|
332
|
+
bufPtr = symbols.sparse_where(ctx, condition.ptr, ifTrue.ptr, ifFalseArr.ptr)
|
|
333
|
+
ifFalseArr.free()
|
|
334
|
+
} else {
|
|
335
|
+
throw new Error('where() invalid argument types')
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if (!bufPtr) throw new Error('Failed to where')
|
|
339
|
+
return new GPUArray(bufPtr, condition.count, DataType.FLOAT32)
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ============================================================================
|
|
343
|
+
// REDUCTION OPERATIONS
|
|
344
|
+
// ============================================================================
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Sum all elements
|
|
348
|
+
*/
|
|
349
|
+
export function sum(a: GPUArray): number {
|
|
350
|
+
const ctx = init()
|
|
351
|
+
if (a.dtype === DataType.FLOAT32) {
|
|
352
|
+
return symbols.sparse_sum(ctx, a.ptr)
|
|
353
|
+
} else {
|
|
354
|
+
return symbols.sparse_sum_bool(ctx, a.ptr)
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// ============================================================================
|
|
359
|
+
// RANDOM OPERATIONS
|
|
360
|
+
// ============================================================================
|
|
361
|
+
|
|
362
|
+
export const random = {
|
|
363
|
+
/**
|
|
364
|
+
* Random uniform values in [low, high)
|
|
365
|
+
*/
|
|
366
|
+
uniform(count: number, low = 0, high = 1): GPUArray {
|
|
367
|
+
const ctx = init()
|
|
368
|
+
const bufPtr = symbols.sparse_random_uniform(ctx, count, low, high)
|
|
369
|
+
if (!bufPtr) throw new Error('Failed to create random uniform')
|
|
370
|
+
return new GPUArray(bufPtr, count, DataType.FLOAT32)
|
|
371
|
+
},
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Random normal values with mean and std
|
|
375
|
+
*/
|
|
376
|
+
normal(count: number, mean = 0, std = 1): GPUArray {
|
|
377
|
+
const ctx = init()
|
|
378
|
+
const bufPtr = symbols.sparse_random_normal(ctx, count, mean, std)
|
|
379
|
+
if (!bufPtr) throw new Error('Failed to create random normal')
|
|
380
|
+
return new GPUArray(bufPtr, count, DataType.FLOAT32)
|
|
381
|
+
},
|
|
382
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// SPARSE - Sparsity-First GPU Compute
|
|
3
|
+
// Minimal. Elegant. For biological neural simulation, not transformers.
|
|
4
|
+
// ============================================================================
|
|
5
|
+
|
|
6
|
+
export * from './gpu'
|
|
7
|
+
export { DataType } from './ffi'
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// Debug: Why no spikes?
|
|
2
|
+
|
|
3
|
+
import * as gpu from './gpu'
|
|
4
|
+
|
|
5
|
+
console.log('=== Debug: Izhikevich Dynamics ===\n')
|
|
6
|
+
|
|
7
|
+
const numNeurons = 10
|
|
8
|
+
|
|
9
|
+
// Initial state
|
|
10
|
+
let v = gpu.full(numNeurons, -65)
|
|
11
|
+
let u = gpu.multiply(v, 0.2)
|
|
12
|
+
|
|
13
|
+
// Constant input to all neurons
|
|
14
|
+
const input = gpu.full(numNeurons, 15)
|
|
15
|
+
|
|
16
|
+
console.log('Initial:')
|
|
17
|
+
console.log(' v:', v.toArray())
|
|
18
|
+
console.log(' u:', u.toArray())
|
|
19
|
+
console.log()
|
|
20
|
+
|
|
21
|
+
for (let t = 0; t < 15; t++) {
|
|
22
|
+
// Izhikevich equations
|
|
23
|
+
// dv = 0.04*v^2 + 5*v + 140 - u + I
|
|
24
|
+
const vSq = gpu.square(v)
|
|
25
|
+
const term1 = gpu.multiply(vSq, 0.04) // 0.04*v^2
|
|
26
|
+
const term2 = gpu.multiply(v, 5) // 5*v
|
|
27
|
+
const sum1 = gpu.add(term1, term2)
|
|
28
|
+
const sum2 = gpu.add(sum1, 140)
|
|
29
|
+
const sum3 = gpu.subtract(sum2, u)
|
|
30
|
+
const dv = gpu.add(sum3, input)
|
|
31
|
+
|
|
32
|
+
// du = a * (b*v - u) where a=0.02, b=0.2
|
|
33
|
+
const bv = gpu.multiply(v, 0.2)
|
|
34
|
+
const diff = gpu.subtract(bv, u)
|
|
35
|
+
const du = gpu.multiply(diff, 0.02)
|
|
36
|
+
|
|
37
|
+
// Update
|
|
38
|
+
const newV = gpu.add(v, dv)
|
|
39
|
+
const newU = gpu.add(u, du)
|
|
40
|
+
|
|
41
|
+
// Check for spikes
|
|
42
|
+
const fired = gpu.greaterEqual(newV, 30)
|
|
43
|
+
const spikeCount = gpu.sum(fired)
|
|
44
|
+
|
|
45
|
+
console.log(`Step ${t}:`)
|
|
46
|
+
console.log(' v:', newV.toArray().map(x => x.toFixed(1)))
|
|
47
|
+
console.log(' dv:', dv.toArray().map(x => x.toFixed(1)))
|
|
48
|
+
console.log(' spikes:', spikeCount)
|
|
49
|
+
|
|
50
|
+
// Reset if spiked
|
|
51
|
+
const vReset = gpu.where(fired, -65, newV)
|
|
52
|
+
const uBump = gpu.add(newU, 8)
|
|
53
|
+
const uReset = gpu.where(fired, uBump, newU)
|
|
54
|
+
|
|
55
|
+
// Cleanup
|
|
56
|
+
vSq.free(); term1.free(); term2.free(); sum1.free(); sum2.free(); sum3.free(); dv.free()
|
|
57
|
+
bv.free(); diff.free(); du.free()
|
|
58
|
+
newV.free(); newU.free(); fired.free(); uBump.free()
|
|
59
|
+
v.free(); u.free()
|
|
60
|
+
|
|
61
|
+
v = vReset
|
|
62
|
+
u = uReset
|
|
63
|
+
|
|
64
|
+
if (spikeCount > 0) {
|
|
65
|
+
console.log('\n >>> SPIKE DETECTED! <<<\n')
|
|
66
|
+
}
|
|
67
|
+
console.log()
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
gpu.cleanup()
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// LIMIT TEST: Find the M1 Max 64GB ceiling
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
import * as gpu from './gpu'
|
|
6
|
+
|
|
7
|
+
console.log('=== LIMIT TEST: Finding the Ceiling ===\n')
|
|
8
|
+
|
|
9
|
+
const info = gpu.deviceInfo()
|
|
10
|
+
console.log('Device:', info.name)
|
|
11
|
+
console.log('Memory:', (Number(info.memory) / 1024 / 1024 / 1024).toFixed(1), 'GB')
|
|
12
|
+
console.log()
|
|
13
|
+
|
|
14
|
+
// Memory estimation:
|
|
15
|
+
// - 1 neuron: 2 floats (v, u) = 8 bytes
|
|
16
|
+
// - 1 synapse: 2 uint32 (pre, post) + 1 float (weight) = 12 bytes
|
|
17
|
+
// - Plus intermediate buffers during computation (~5x overhead)
|
|
18
|
+
|
|
19
|
+
// Let's scale up aggressively
|
|
20
|
+
const tests = [
|
|
21
|
+
{ neurons: 10_000_000, synapses: 100_000_000 }, // 10M neurons, 100M synapses (worked!)
|
|
22
|
+
{ neurons: 20_000_000, synapses: 200_000_000 }, // 20M neurons, 200M synapses
|
|
23
|
+
{ neurons: 50_000_000, synapses: 500_000_000 }, // 50M neurons, 500M synapses
|
|
24
|
+
{ neurons: 100_000_000, synapses: 1_000_000_000 }, // 100M neurons, 1B synapses!
|
|
25
|
+
{ neurons: 200_000_000, synapses: 2_000_000_000 }, // 200M neurons, 2B synapses!
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
function formatBytes(bytes: number): string {
|
|
29
|
+
const gb = bytes / 1024 / 1024 / 1024
|
|
30
|
+
return `${gb.toFixed(2)} GB`
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function estimateMemory(neurons: number, synapses: number): number {
|
|
34
|
+
// Neuron state: v, u (2 floats per neuron)
|
|
35
|
+
const neuronBytes = neurons * 2 * 4
|
|
36
|
+
|
|
37
|
+
// Synapse data: preIdx, postIdx (2 uint32) + weight (1 float)
|
|
38
|
+
const synapseBytes = synapses * (2 * 4 + 4)
|
|
39
|
+
|
|
40
|
+
// Intermediate buffers during computation (rough estimate: ~3x neuron + 2x synapse)
|
|
41
|
+
const overhead = neuronBytes * 3 + synapseBytes * 2
|
|
42
|
+
|
|
43
|
+
return neuronBytes + synapseBytes + overhead
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
for (const { neurons, synapses } of tests) {
|
|
47
|
+
console.log(`\n${'='.repeat(70)}`)
|
|
48
|
+
console.log(`Testing: ${(neurons / 1_000_000).toFixed(0)}M neurons, ${(synapses / 1_000_000).toFixed(0)}M synapses`)
|
|
49
|
+
console.log(`Estimated memory: ${formatBytes(estimateMemory(neurons, synapses))}`)
|
|
50
|
+
console.log(`${'='.repeat(70)}`)
|
|
51
|
+
|
|
52
|
+
try {
|
|
53
|
+
// Create neuron state
|
|
54
|
+
console.log(' [1/4] Allocating neuron state...')
|
|
55
|
+
const t1 = performance.now()
|
|
56
|
+
let v = gpu.full(neurons, -65)
|
|
57
|
+
let u = gpu.multiply(v, 0.2)
|
|
58
|
+
console.log(` Done in ${(performance.now() - t1).toFixed(0)}ms`)
|
|
59
|
+
|
|
60
|
+
// Create connectivity - this is the big one
|
|
61
|
+
console.log(' [2/4] Creating pre-synaptic indices...')
|
|
62
|
+
const t2 = performance.now()
|
|
63
|
+
const preIdxData = new Uint32Array(synapses)
|
|
64
|
+
for (let i = 0; i < synapses; i++) {
|
|
65
|
+
preIdxData[i] = Math.floor(Math.random() * neurons)
|
|
66
|
+
}
|
|
67
|
+
const preIndices = gpu.indices(preIdxData)
|
|
68
|
+
console.log(` Done in ${(performance.now() - t2).toFixed(0)}ms`)
|
|
69
|
+
|
|
70
|
+
console.log(' [3/4] Creating post-synaptic indices...')
|
|
71
|
+
const t3 = performance.now()
|
|
72
|
+
const postIdxData = new Uint32Array(synapses)
|
|
73
|
+
for (let i = 0; i < synapses; i++) {
|
|
74
|
+
postIdxData[i] = Math.floor(Math.random() * neurons)
|
|
75
|
+
}
|
|
76
|
+
const postIndices = gpu.indices(postIdxData)
|
|
77
|
+
console.log(` Done in ${(performance.now() - t3).toFixed(0)}ms`)
|
|
78
|
+
|
|
79
|
+
console.log(' [4/4] Creating synaptic weights...')
|
|
80
|
+
const t4 = performance.now()
|
|
81
|
+
const weights = gpu.random.uniform(synapses, 0, 0.5)
|
|
82
|
+
console.log(` Done in ${(performance.now() - t4).toFixed(0)}ms`)
|
|
83
|
+
|
|
84
|
+
// Run ONE timestep to verify it works
|
|
85
|
+
console.log(' Running 1 timestep...')
|
|
86
|
+
const simStart = performance.now()
|
|
87
|
+
|
|
88
|
+
// Detect firing
|
|
89
|
+
const fired = gpu.greaterEqual(v, 30)
|
|
90
|
+
const spikeCount = gpu.sum(fired)
|
|
91
|
+
|
|
92
|
+
// Sparse transmission
|
|
93
|
+
const preFired = gpu.gatherBool(fired, preIndices)
|
|
94
|
+
const zerosArr = gpu.zeros(synapses)
|
|
95
|
+
const contribution = gpu.where(preFired, weights, zerosArr)
|
|
96
|
+
|
|
97
|
+
// Scatter-add
|
|
98
|
+
let current = gpu.zeros(neurons)
|
|
99
|
+
gpu.scatterAdd(current, postIndices, contribution)
|
|
100
|
+
|
|
101
|
+
// Izhikevich
|
|
102
|
+
const vSq = gpu.square(v)
|
|
103
|
+
const term1 = gpu.multiply(vSq, 0.04)
|
|
104
|
+
const term2 = gpu.multiply(v, 5)
|
|
105
|
+
const sum1 = gpu.add(term1, term2)
|
|
106
|
+
const sum2 = gpu.add(sum1, 140)
|
|
107
|
+
const sum3 = gpu.subtract(sum2, u)
|
|
108
|
+
const dv = gpu.add(sum3, current)
|
|
109
|
+
const newV = gpu.add(v, dv)
|
|
110
|
+
|
|
111
|
+
const simTime = performance.now() - simStart
|
|
112
|
+
|
|
113
|
+
console.log()
|
|
114
|
+
console.log(` ✅ SUCCESS!`)
|
|
115
|
+
console.log(` 1 timestep: ${simTime.toFixed(0)}ms`)
|
|
116
|
+
console.log(` Spikes: ${spikeCount.toLocaleString()}`)
|
|
117
|
+
console.log(` Throughput: ${((neurons + synapses) / simTime / 1000).toFixed(1)}M ops/ms`)
|
|
118
|
+
|
|
119
|
+
// Cleanup
|
|
120
|
+
v.free(); u.free()
|
|
121
|
+
preIndices.free(); postIndices.free(); weights.free()
|
|
122
|
+
fired.free(); preFired.free(); zerosArr.free(); contribution.free()
|
|
123
|
+
current.free(); vSq.free(); term1.free(); term2.free()
|
|
124
|
+
sum1.free(); sum2.free(); sum3.free(); dv.free(); newV.free()
|
|
125
|
+
|
|
126
|
+
// Let JS GC handle the arrays
|
|
127
|
+
|
|
128
|
+
} catch (error: any) {
|
|
129
|
+
console.log()
|
|
130
|
+
console.log(` ❌ HIT THE CEILING!`)
|
|
131
|
+
console.log(` Error: ${error.message || error}`)
|
|
132
|
+
console.log()
|
|
133
|
+
console.log(` Maximum sustainable scale:`)
|
|
134
|
+
console.log(` Previous test passed - that's your limit!`)
|
|
135
|
+
break
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
gpu.cleanup()
|
|
140
|
+
console.log('\n=== LIMIT TEST COMPLETE ===')
|