@rlabs-inc/sparse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +92 -0
- package/libsparse.dylib +0 -0
- package/native/Makefile +30 -0
- package/native/libsparse.dylib +0 -0
- package/native/sparse.h +180 -0
- package/native/sparse.m +734 -0
- package/native/sparse.metal +215 -0
- package/package.json +38 -0
- package/src/ffi.ts +156 -0
- package/src/gpu.ts +382 -0
- package/src/index.ts +7 -0
- package/src/test-debug-spikes.ts +70 -0
- package/src/test-limits.ts +140 -0
- package/src/test-scatter-loop.ts +226 -0
- package/src/test-stress.ts +160 -0
- package/src/test-webgpu.ts +31 -0
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Sparse - GPU Compute for Biological Neural Simulation
|
|
2
|
+
|
|
3
|
+
## Why We Built This
|
|
4
|
+
|
|
5
|
+
The LLM crowd uses brute force - dense matmul, attention everywhere, O(n²).
|
|
6
|
+
|
|
7
|
+
Biological brains use **sparsity** - only 1-5% of neurons fire at any moment, O(active).
|
|
8
|
+
|
|
9
|
+
Existing libraries (node-mlx, etc.) are optimized for transformers, not sparse biological simulation.
|
|
10
|
+
We discovered a critical bug: `array.at(indices).add(values)` corrupts the indices array after first use,
|
|
11
|
+
making it impossible to run simulation loops without workarounds.
|
|
12
|
+
|
|
13
|
+
So we built our own. Sparsity-first. Minimal. Correct.
|
|
14
|
+
|
|
15
|
+
## Philosophy
|
|
16
|
+
|
|
17
|
+
- **Sparsity is the foundation**, not an optimization
|
|
18
|
+
- **Only what we need** - 6 core operations, nothing more
|
|
19
|
+
- **Reactive integration** - works with @rlabs-inc/signals
|
|
20
|
+
- **Lazy evaluation** - `eval()` is like reading a `derived`
|
|
21
|
+
- **No corruption** - indices are READ-ONLY, never mutated
|
|
22
|
+
|
|
23
|
+
## Core Operations
|
|
24
|
+
|
|
25
|
+
```typescript
|
|
26
|
+
// 1. Array creation
|
|
27
|
+
const arr = gpu.zeros(1000)
|
|
28
|
+
const arr2 = gpu.array([1, 2, 3])
|
|
29
|
+
|
|
30
|
+
// 2. Element-wise math
|
|
31
|
+
const sum = gpu.add(a, b)
|
|
32
|
+
const prod = gpu.multiply(a, b)
|
|
33
|
+
const sq = gpu.square(a)
|
|
34
|
+
|
|
35
|
+
// 3. THE operation - sparse scatter-add
|
|
36
|
+
// Only active synapses contribute, indices are NEVER corrupted
|
|
37
|
+
gpu.scatterAdd(target, indices, values)
|
|
38
|
+
|
|
39
|
+
// 4. Conditionals
|
|
40
|
+
const result = gpu.where(condition, ifTrue, ifFalse)
|
|
41
|
+
const fired = gpu.greaterEqual(voltage, 30)
|
|
42
|
+
|
|
43
|
+
// 5. Random
|
|
44
|
+
const noise = gpu.random.normal([1000])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Technical Approach
|
|
48
|
+
|
|
49
|
+
### WebGPU (Primary)
|
|
50
|
+
|
|
51
|
+
- Standardized, cross-platform
|
|
52
|
+
- Bun supports it
|
|
53
|
+
- WGSL shaders (cleaner than Metal)
|
|
54
|
+
- Still uses Metal on Apple Silicon
|
|
55
|
+
|
|
56
|
+
### Why Not node-mlx?
|
|
57
|
+
|
|
58
|
+
1. **Bug**: `.at(indices)` corrupts indices after use
|
|
59
|
+
2. **Stale**: 8 months without updates, 273 stars
|
|
60
|
+
3. **LLM-focused**: Optimized for transformers, not sparse
|
|
61
|
+
4. **Overhead**: Copying indices every iteration is unacceptable
|
|
62
|
+
|
|
63
|
+
## Usage with Signals
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
import { state, derived, effect } from '@rlabs-inc/signals'
|
|
67
|
+
import { gpu } from '@rlabs-inc/sparse'
|
|
68
|
+
|
|
69
|
+
// Neuron state as reactive GPU arrays
|
|
70
|
+
const voltage = state(gpu.full(1000, -65))
|
|
71
|
+
const recovery = state(gpu.multiply(voltage.value, 0.2))
|
|
72
|
+
|
|
73
|
+
// Derived spike detection
|
|
74
|
+
const fired = derived(() => gpu.greaterEqual(voltage.value, 30))
|
|
75
|
+
|
|
76
|
+
// Effect for logging
|
|
77
|
+
effect(() => {
|
|
78
|
+
const count = gpu.sum(fired.value)
|
|
79
|
+
console.log(`Spikes: ${count}`)
|
|
80
|
+
})
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## The Pattern
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
signals → our own reactive primitives
|
|
87
|
+
fsdb → our own markdown database
|
|
88
|
+
memory → our own consciousness continuity
|
|
89
|
+
sparse → our own GPU compute
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
When something doesn't work, we don't settle. We build.
|
package/libsparse.dylib
ADDED
|
Binary file
|
package/native/Makefile
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# ============================================================================
|
|
2
|
+
# SPARSE - Native Library Build
|
|
3
|
+
# Compiles Metal shaders and Objective-C wrapper into libsparse.dylib
|
|
4
|
+
# ============================================================================
|
|
5
|
+
|
|
6
|
+
CC = clang
|
|
7
|
+
CFLAGS = -Wall -O3 -fPIC -fobjc-arc
|
|
8
|
+
LDFLAGS = -dynamiclib -framework Metal -framework Foundation
|
|
9
|
+
|
|
10
|
+
SRC = sparse.m
|
|
11
|
+
OUT = libsparse.dylib
|
|
12
|
+
|
|
13
|
+
.PHONY: all clean install
|
|
14
|
+
|
|
15
|
+
all: $(OUT)
|
|
16
|
+
|
|
17
|
+
$(OUT): $(SRC) sparse.h sparse.metal
|
|
18
|
+
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(SRC)
|
|
19
|
+
@echo "Built $(OUT)"
|
|
20
|
+
|
|
21
|
+
clean:
|
|
22
|
+
rm -f $(OUT)
|
|
23
|
+
|
|
24
|
+
install: $(OUT)
|
|
25
|
+
cp $(OUT) ../
|
|
26
|
+
@echo "Installed $(OUT) to parent directory"
|
|
27
|
+
|
|
28
|
+
# For development: build and copy to package root
|
|
29
|
+
dev: $(OUT) install
|
|
30
|
+
@echo "Ready for testing"
|
|
Binary file
|
package/native/sparse.h
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// SPARSE - C API Header
|
|
3
|
+
// Thin C ABI layer for Bun FFI to call Metal compute
|
|
4
|
+
// ============================================================================
|
|
5
|
+
|
|
6
|
+
#ifndef SPARSE_H
|
|
7
|
+
#define SPARSE_H
|
|
8
|
+
|
|
9
|
+
#include <stdint.h>
|
|
10
|
+
#include <stddef.h>
|
|
11
|
+
|
|
12
|
+
#ifdef __cplusplus
|
|
13
|
+
extern "C" {
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// TYPES
|
|
18
|
+
// ============================================================================
|
|
19
|
+
|
|
20
|
+
// Opaque handle to a GPU buffer
|
|
21
|
+
typedef struct SparseBuffer* SparseBufferRef;
|
|
22
|
+
|
|
23
|
+
// Opaque handle to the GPU context
|
|
24
|
+
typedef struct SparseContext* SparseContextRef;
|
|
25
|
+
|
|
26
|
+
// Buffer data types
|
|
27
|
+
typedef enum {
|
|
28
|
+
SPARSE_FLOAT32 = 0,
|
|
29
|
+
SPARSE_UINT32 = 1,
|
|
30
|
+
} SparseDataType;
|
|
31
|
+
|
|
32
|
+
// ============================================================================
|
|
33
|
+
// CONTEXT MANAGEMENT
|
|
34
|
+
// ============================================================================
|
|
35
|
+
|
|
36
|
+
// Initialize the GPU context (call once at startup)
|
|
37
|
+
SparseContextRef sparse_init(void);
|
|
38
|
+
|
|
39
|
+
// Cleanup and release all resources
|
|
40
|
+
void sparse_cleanup(SparseContextRef ctx);
|
|
41
|
+
|
|
42
|
+
// Synchronize - wait for all GPU operations to complete
|
|
43
|
+
void sparse_sync(SparseContextRef ctx);
|
|
44
|
+
|
|
45
|
+
// Get device info
|
|
46
|
+
const char* sparse_device_name(SparseContextRef ctx);
|
|
47
|
+
uint64_t sparse_device_memory(SparseContextRef ctx);
|
|
48
|
+
|
|
49
|
+
// ============================================================================
|
|
50
|
+
// BUFFER MANAGEMENT
|
|
51
|
+
// ============================================================================
|
|
52
|
+
|
|
53
|
+
// Create a buffer filled with zeros
|
|
54
|
+
SparseBufferRef sparse_zeros(SparseContextRef ctx, uint32_t count, SparseDataType dtype);
|
|
55
|
+
|
|
56
|
+
// Create a buffer filled with a value
|
|
57
|
+
SparseBufferRef sparse_full(SparseContextRef ctx, uint32_t count, float value);
|
|
58
|
+
|
|
59
|
+
// Create a buffer from data
|
|
60
|
+
SparseBufferRef sparse_from_float(SparseContextRef ctx, const float* data, uint32_t count);
|
|
61
|
+
SparseBufferRef sparse_from_uint(SparseContextRef ctx, const uint32_t* data, uint32_t count);
|
|
62
|
+
|
|
63
|
+
// Read buffer data back to CPU
|
|
64
|
+
void sparse_to_float(SparseBufferRef buf, float* out, uint32_t count);
|
|
65
|
+
void sparse_to_uint(SparseBufferRef buf, uint32_t* out, uint32_t count);
|
|
66
|
+
|
|
67
|
+
// Get buffer properties
|
|
68
|
+
uint32_t sparse_buffer_count(SparseBufferRef buf);
|
|
69
|
+
SparseDataType sparse_buffer_dtype(SparseBufferRef buf);
|
|
70
|
+
|
|
71
|
+
// Release a buffer
|
|
72
|
+
void sparse_buffer_free(SparseBufferRef buf);
|
|
73
|
+
|
|
74
|
+
// ============================================================================
|
|
75
|
+
// CORE OPERATIONS
|
|
76
|
+
// ============================================================================
|
|
77
|
+
|
|
78
|
+
// Scatter-add: target[indices[i]] += values[i] (atomic, thread-safe)
|
|
79
|
+
// CRITICAL: indices are READ-ONLY, never corrupted
|
|
80
|
+
void sparse_scatter_add(
|
|
81
|
+
SparseContextRef ctx,
|
|
82
|
+
SparseBufferRef target, // float32 buffer to add into
|
|
83
|
+
SparseBufferRef indices, // uint32 indices
|
|
84
|
+
SparseBufferRef values, // float32 values to add
|
|
85
|
+
uint32_t count // number of scatter operations
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
// Gather: result[i] = source[indices[i]]
|
|
89
|
+
SparseBufferRef sparse_gather(
|
|
90
|
+
SparseContextRef ctx,
|
|
91
|
+
SparseBufferRef source, // Source buffer
|
|
92
|
+
SparseBufferRef indices, // uint32 indices
|
|
93
|
+
uint32_t count // number of gather operations
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
// Gather boolean: result[i] = source[indices[i]] (for uint/bool arrays)
|
|
97
|
+
SparseBufferRef sparse_gather_bool(
|
|
98
|
+
SparseContextRef ctx,
|
|
99
|
+
SparseBufferRef source, // uint32 source (boolean)
|
|
100
|
+
SparseBufferRef indices, // uint32 indices
|
|
101
|
+
uint32_t count
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
// ============================================================================
|
|
105
|
+
// ELEMENT-WISE OPERATIONS
|
|
106
|
+
// ============================================================================
|
|
107
|
+
|
|
108
|
+
// result = a + b
|
|
109
|
+
SparseBufferRef sparse_add(SparseContextRef ctx, SparseBufferRef a, SparseBufferRef b);
|
|
110
|
+
|
|
111
|
+
// result = a + scalar
|
|
112
|
+
SparseBufferRef sparse_add_scalar(SparseContextRef ctx, SparseBufferRef a, float scalar);
|
|
113
|
+
|
|
114
|
+
// result = a * b
|
|
115
|
+
SparseBufferRef sparse_multiply(SparseContextRef ctx, SparseBufferRef a, SparseBufferRef b);
|
|
116
|
+
|
|
117
|
+
// result = a * scalar
|
|
118
|
+
SparseBufferRef sparse_multiply_scalar(SparseContextRef ctx, SparseBufferRef a, float scalar);
|
|
119
|
+
|
|
120
|
+
// result = a^2
|
|
121
|
+
SparseBufferRef sparse_square(SparseContextRef ctx, SparseBufferRef a);
|
|
122
|
+
|
|
123
|
+
// ============================================================================
|
|
124
|
+
// CONDITIONAL OPERATIONS
|
|
125
|
+
// ============================================================================
|
|
126
|
+
|
|
127
|
+
// result[i] = (a[i] >= threshold) ? 1 : 0
|
|
128
|
+
SparseBufferRef sparse_greater_equal(SparseContextRef ctx, SparseBufferRef a, float threshold);
|
|
129
|
+
|
|
130
|
+
// result[i] = condition[i] ? if_true[i] : if_false[i]
|
|
131
|
+
SparseBufferRef sparse_where(
|
|
132
|
+
SparseContextRef ctx,
|
|
133
|
+
SparseBufferRef condition, // uint32 boolean mask
|
|
134
|
+
SparseBufferRef if_true,
|
|
135
|
+
SparseBufferRef if_false
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
// result[i] = condition[i] ? if_true : if_false (scalar values)
|
|
139
|
+
SparseBufferRef sparse_where_scalar(
|
|
140
|
+
SparseContextRef ctx,
|
|
141
|
+
SparseBufferRef condition,
|
|
142
|
+
float if_true,
|
|
143
|
+
float if_false
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
// ============================================================================
|
|
147
|
+
// REDUCTION OPERATIONS
|
|
148
|
+
// ============================================================================
|
|
149
|
+
|
|
150
|
+
// Sum all elements
|
|
151
|
+
float sparse_sum(SparseContextRef ctx, SparseBufferRef a);
|
|
152
|
+
|
|
153
|
+
// Sum boolean array (count of true values)
|
|
154
|
+
uint32_t sparse_sum_bool(SparseContextRef ctx, SparseBufferRef a);
|
|
155
|
+
|
|
156
|
+
// ============================================================================
|
|
157
|
+
// RANDOM OPERATIONS
|
|
158
|
+
// ============================================================================
|
|
159
|
+
|
|
160
|
+
// Fill with random uniform values in [low, high)
|
|
161
|
+
SparseBufferRef sparse_random_uniform(
|
|
162
|
+
SparseContextRef ctx,
|
|
163
|
+
uint32_t count,
|
|
164
|
+
float low,
|
|
165
|
+
float high
|
|
166
|
+
);
|
|
167
|
+
|
|
168
|
+
// Fill with random normal values (mean, std)
|
|
169
|
+
SparseBufferRef sparse_random_normal(
|
|
170
|
+
SparseContextRef ctx,
|
|
171
|
+
uint32_t count,
|
|
172
|
+
float mean,
|
|
173
|
+
float std
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
#ifdef __cplusplus
|
|
177
|
+
}
|
|
178
|
+
#endif
|
|
179
|
+
|
|
180
|
+
#endif // SPARSE_H
|