@seanhogg/builderforce-memory-engine 2026.6.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +393 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/kernels/activations.d.ts +5 -0
- package/dist/kernels/activations.d.ts.map +1 -0
- package/dist/kernels/activations.js +171 -0
- package/dist/kernels/activations.js.map +1 -0
- package/dist/kernels/attention.d.ts +19 -0
- package/dist/kernels/attention.d.ts.map +1 -0
- package/dist/kernels/attention.js +263 -0
- package/dist/kernels/attention.js.map +1 -0
- package/dist/kernels/complex_ssd.d.ts +33 -0
- package/dist/kernels/complex_ssd.d.ts.map +1 -0
- package/dist/kernels/complex_ssd.js +305 -0
- package/dist/kernels/complex_ssd.js.map +1 -0
- package/dist/kernels/conv1d.d.ts +3 -0
- package/dist/kernels/conv1d.d.ts.map +1 -0
- package/dist/kernels/conv1d.js +158 -0
- package/dist/kernels/conv1d.js.map +1 -0
- package/dist/kernels/linear_projection.d.ts +3 -0
- package/dist/kernels/linear_projection.d.ts.map +1 -0
- package/dist/kernels/linear_projection.js +219 -0
- package/dist/kernels/linear_projection.js.map +1 -0
- package/dist/kernels/selective_scan.d.ts +3 -0
- package/dist/kernels/selective_scan.d.ts.map +1 -0
- package/dist/kernels/selective_scan.js +348 -0
- package/dist/kernels/selective_scan.js.map +1 -0
- package/dist/kernels/ssd.d.ts +29 -0
- package/dist/kernels/ssd.d.ts.map +1 -0
- package/dist/kernels/ssd.js +276 -0
- package/dist/kernels/ssd.js.map +1 -0
- package/dist/kernels/weight_update.d.ts +3 -0
- package/dist/kernels/weight_update.d.ts.map +1 -0
- package/dist/kernels/weight_update.js +119 -0
- package/dist/kernels/weight_update.js.map +1 -0
- package/dist/model/attention_block.d.ts +48 -0
- package/dist/model/attention_block.d.ts.map +1 -0
- package/dist/model/attention_block.js +262 -0
- package/dist/model/attention_block.js.map +1 -0
- package/dist/model/mamba1_block.d.ts +70 -0
- package/dist/model/mamba1_block.d.ts.map +1 -0
- package/dist/model/mamba1_block.js +333 -0
- package/dist/model/mamba1_block.js.map +1 -0
- package/dist/model/mamba2_block.d.ts +44 -0
- package/dist/model/mamba2_block.d.ts.map +1 -0
- package/dist/model/mamba2_block.js +252 -0
- package/dist/model/mamba2_block.js.map +1 -0
- package/dist/model/mamba3_block.d.ts +51 -0
- package/dist/model/mamba3_block.d.ts.map +1 -0
- package/dist/model/mamba3_block.js +270 -0
- package/dist/model/mamba3_block.js.map +1 -0
- package/dist/model/mamba_block.d.ts +64 -0
- package/dist/model/mamba_block.d.ts.map +1 -0
- package/dist/model/mamba_block.js +303 -0
- package/dist/model/mamba_block.js.map +1 -0
- package/dist/model/mamba_model.d.ts +140 -0
- package/dist/model/mamba_model.d.ts.map +1 -0
- package/dist/model/mamba_model.js +527 -0
- package/dist/model/mamba_model.js.map +1 -0
- package/dist/model/sequence_layer.d.ts +25 -0
- package/dist/model/sequence_layer.d.ts.map +1 -0
- package/dist/model/sequence_layer.js +8 -0
- package/dist/model/sequence_layer.js.map +1 -0
- package/dist/tokenizer/bpe.d.ts +29 -0
- package/dist/tokenizer/bpe.d.ts.map +1 -0
- package/dist/tokenizer/bpe.js +164 -0
- package/dist/tokenizer/bpe.js.map +1 -0
- package/dist/training/autograd.d.ts +27 -0
- package/dist/training/autograd.d.ts.map +1 -0
- package/dist/training/autograd.js +120 -0
- package/dist/training/autograd.js.map +1 -0
- package/dist/training/trainer.d.ts +36 -0
- package/dist/training/trainer.d.ts.map +1 -0
- package/dist/training/trainer.js +183 -0
- package/dist/training/trainer.js.map +1 -0
- package/dist/utils/gpu_utils.d.ts +21 -0
- package/dist/utils/gpu_utils.d.ts.map +1 -0
- package/dist/utils/gpu_utils.js +111 -0
- package/dist/utils/gpu_utils.js.map +1 -0
- package/dist/utils/quantization.d.ts +26 -0
- package/dist/utils/quantization.d.ts.map +1 -0
- package/dist/utils/quantization.js +116 -0
- package/dist/utils/quantization.js.map +1 -0
- package/dist/utils/rng.d.ts +36 -0
- package/dist/utils/rng.d.ts.map +1 -0
- package/dist/utils/rng.js +61 -0
- package/dist/utils/rng.js.map +1 -0
- package/package.json +99 -0
- package/src/index.ts +114 -0
- package/src/kernels/activations.ts +174 -0
- package/src/kernels/attention.ts +268 -0
- package/src/kernels/complex_ssd.ts +307 -0
- package/src/kernels/conv1d.ts +159 -0
- package/src/kernels/linear_projection.ts +220 -0
- package/src/kernels/selective_scan.ts +350 -0
- package/src/kernels/ssd.ts +278 -0
- package/src/kernels/weight_update.ts +120 -0
- package/src/model/attention_block.ts +344 -0
- package/src/model/mamba1_block.ts +437 -0
- package/src/model/mamba2_block.ts +319 -0
- package/src/model/mamba3_block.ts +335 -0
- package/src/model/mamba_block.ts +401 -0
- package/src/model/mamba_model.ts +678 -0
- package/src/model/sequence_layer.ts +29 -0
- package/src/tokenizer/bpe.ts +186 -0
- package/src/training/autograd.ts +135 -0
- package/src/training/trainer.ts +309 -0
- package/src/utils/gpu_utils.ts +147 -0
- package/src/utils/quantization.ts +154 -0
- package/src/utils/rng.ts +65 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sean Hogg
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
# MambaCode.js
|
|
2
|
+
|
|
3
|
+
> WebGPU-accelerated Mamba-1/2/3 and Hybrid SSM library — written in **TypeScript**, compiled for use in any JavaScript application.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@seanhogg/mambacode.js)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
|
|
8
|
+
MambaCode.js is a **TypeScript-first** library that brings the Mamba family of State Space Models to the browser via WebGPU. Version 2.0.0 adds **Mamba-2** (SSD), **Mamba-3** (complex-valued MIMO + ET discretisation), and **hybrid attention** layers, while remaining fully backward-compatible with Mamba-1 checkpoints.
|
|
9
|
+
|
|
10
|
+
> 📖 **New to MambaCode.js?** Start with the [Getting Started Guide](./docs/getting-started.md).
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## What's New in v2.0.0
|
|
15
|
+
|
|
16
|
+
| Feature | Detail |
|
|
17
|
+
|---|---|
|
|
18
|
+
| **Mamba-2 (SSD)** | Structured State Space Duality — chunked matmul scan, multi-head, scalar A, inner RMSNorm |
|
|
19
|
+
| **Mamba-3** | Complex-valued states (ℂ^N), ET discretisation, MIMO recurrence, 2× smaller state size |
|
|
20
|
+
| **AttentionBlock** | Causal multi-head attention for hybrid (Jamba/Zamba) layer schedules |
|
|
21
|
+
| **HybridMambaModel** | Per-layer type schedule — mix mamba1/2/3/attention freely |
|
|
22
|
+
| **MBJS v2 format** | Layer-type metadata in checkpoint header; v1 files still load unchanged |
|
|
23
|
+
| **`MambaBlock` alias** | Kept as deprecated alias for `Mamba1Block` until 3.0.0 |
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Key Features
|
|
28
|
+
|
|
29
|
+
| Feature | Detail |
|
|
30
|
+
|---|---|
|
|
31
|
+
| **TypeScript-first** | Full type declarations shipped with the package |
|
|
32
|
+
| **Plain JS compatible** | Import the compiled `dist/` in any JavaScript project |
|
|
33
|
+
| **SSM variants** | Mamba-1 (S6), Mamba-2 (SSD), Mamba-3 (complex MIMO+ET) |
|
|
34
|
+
| **Hybrid models** | Jamba/Zamba-style mixed SSM + attention schedules |
|
|
35
|
+
| **Hardware target** | WebGPU (WGSL) — Chrome 113+, Edge 113+, Firefox Nightly |
|
|
36
|
+
| **No heavy frameworks** | Zero TensorFlow.js / Transformers.js dependencies |
|
|
37
|
+
| **On-device training** | Tape-based autograd + AdamW GPU optimizer |
|
|
38
|
+
| **Quantization** | FP16 weights, Int8 activations |
|
|
39
|
+
| **Tokenizer** | Browser-side BPE (Qwen2.5-Coder compatible) |
|
|
40
|
+
| **WSLA mode** | Fast-adapt: trains only the selective projection rows |
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
npm install mambacode.js
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Build from source:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
npm run build # compiles TypeScript → dist/
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
### Mamba-1 (backward-compatible, unchanged)
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { MambaModel, MambaTrainer, BPETokenizer, initWebGPU } from 'mambacode.js';
|
|
64
|
+
|
|
65
|
+
const { device } = await initWebGPU();
|
|
66
|
+
const tokenizer = new BPETokenizer();
|
|
67
|
+
await tokenizer.load('/vocab.json', '/merges.txt');
|
|
68
|
+
|
|
69
|
+
const model = new MambaModel(device, {
|
|
70
|
+
vocabSize : tokenizer.vocabSize,
|
|
71
|
+
dModel : 512,
|
|
72
|
+
numLayers : 8,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
await model.loadWeights(await (await fetch('/checkpoint.bin')).arrayBuffer());
|
|
76
|
+
const ids = await model.generate(tokenizer.encode('function add('), 200);
|
|
77
|
+
console.log(tokenizer.decode(ids));
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Mamba-2 (SSD)
|
|
81
|
+
|
|
82
|
+
```ts
|
|
83
|
+
import { HybridMambaModel } from 'mambacode.js';
|
|
84
|
+
|
|
85
|
+
const model = new HybridMambaModel(device, {
|
|
86
|
+
vocabSize : tokenizer.vocabSize,
|
|
87
|
+
dModel : 512,
|
|
88
|
+
numLayers : 8,
|
|
89
|
+
nHeads : 8,
|
|
90
|
+
layers : Array(8).fill({ type: 'mamba2' }),
|
|
91
|
+
});
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Mamba-3 (complex states)
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
const model = new HybridMambaModel(device, {
|
|
98
|
+
vocabSize : tokenizer.vocabSize,
|
|
99
|
+
dModel : 512,
|
|
100
|
+
numLayers : 8,
|
|
101
|
+
nHeads : 8,
|
|
102
|
+
layers : Array(8).fill({ type: 'mamba3' }),
|
|
103
|
+
});
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Hybrid (Jamba-style: every 4th layer is attention)
|
|
107
|
+
|
|
108
|
+
```ts
|
|
109
|
+
const model = new HybridMambaModel(device, {
|
|
110
|
+
vocabSize : tokenizer.vocabSize,
|
|
111
|
+
dModel : 512,
|
|
112
|
+
numLayers : 12,
|
|
113
|
+
nHeads : 8,
|
|
114
|
+
layers : Array.from({ length: 12 }, (_, i) => ({
|
|
115
|
+
type: i % 4 === 3 ? 'attention' : 'mamba2',
|
|
116
|
+
})),
|
|
117
|
+
});
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Architecture Reference
|
|
123
|
+
|
|
124
|
+
### Mamba-1 Block (S6)
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
Input (B, L, D)
|
|
128
|
+
└─ RMSNorm
|
|
129
|
+
└─ in_proj → x, z (gate)
|
|
130
|
+
x → conv1d → SiLU → x_proj → Δ, B, C
|
|
131
|
+
Δ → dt_proj → softplus
|
|
132
|
+
Selective Scan S6
|
|
133
|
+
h_t = Ā·h_{t-1} + B̄·x_t
|
|
134
|
+
y_t = C·h_t + D·x_t
|
|
135
|
+
└─ y * SiLU(z)
|
|
136
|
+
└─ out_proj + residual
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Mamba-2 Block (SSD)
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
Input (B, L, D)
|
|
143
|
+
└─ RMSNorm
|
|
144
|
+
└─ in_proj → [x (D_inner), B (G·N), C (G·N), dt (H)]
|
|
145
|
+
conv1d over x, B, C (fused)
|
|
146
|
+
SSD scan: A_bar = exp(-softplus(A) · softplus(dt))
|
|
147
|
+
h_t = A_bar · h_{t-1} + B · x_t
|
|
148
|
+
y_t = C · h_t
|
|
149
|
+
└─ inner RMSNorm
|
|
150
|
+
└─ out_proj + residual
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Mamba-3 Block (complex MIMO, ET)
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
Input (B, L, D)
|
|
157
|
+
└─ Same structure as Mamba-2 but:
|
|
158
|
+
• A ∈ ℂ (log|A|, arg(A)) per head
|
|
159
|
+
• A_bar = exp(Δ·A) [complex]
|
|
160
|
+
• B_bar = (A_bar − 1)·A⁻¹·B [ET, exact]
|
|
161
|
+
• h_t ∈ ℂ^(N/2), y_t = Re(C·h_t)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### AttentionBlock (causal MHA)
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
Input (B, L, D)
|
|
168
|
+
└─ RMSNorm
|
|
169
|
+
└─ wQKV → Q, K, V (B, L, H, d_head)
|
|
170
|
+
└─ scores = Q·Kᵀ / √d_head (causal mask)
|
|
171
|
+
└─ softmax → weighted V sum
|
|
172
|
+
└─ concat heads → wO + residual
|
|
173
|
+
[optional FFN sublayer]
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## File Structure
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
src/
|
|
182
|
+
├── index.ts ← public API entry point (v2.0.0)
|
|
183
|
+
├── kernels/
|
|
184
|
+
│ ├── selective_scan.ts ← WGSL: S6 forward/backward (Mamba-1)
|
|
185
|
+
│ ├── ssd.ts ← WGSL: chunked SSD forward/backward (Mamba-2)
|
|
186
|
+
│ ├── complex_ssd.ts ← WGSL: complex SSD + ET + MIMO (Mamba-3)
|
|
187
|
+
│ ├── attention.ts ← WGSL: tiled causal MHA forward/backward
|
|
188
|
+
│ ├── conv1d.ts ← WGSL: 1D causal convolution (+ groups param)
|
|
189
|
+
│ ├── linear_projection.ts ← WGSL: tiled GEMM
|
|
190
|
+
│ ├── weight_update.ts ← WGSL: AdamW + gradient clipping
|
|
191
|
+
│ └── activations.ts ← WGSL: SiLU, RMSNorm, Softmax
|
|
192
|
+
├── model/
|
|
193
|
+
│ ├── sequence_layer.ts ← SequenceLayer interface (LayerType, LayerParam)
|
|
194
|
+
│ ├── mamba1_block.ts ← Mamba1Block (renamed from MambaBlock)
|
|
195
|
+
│ ├── mamba2_block.ts ← Mamba2Block (SSD)
|
|
196
|
+
│ ├── mamba3_block.ts ← Mamba3Block (complex + MIMO + ET)
|
|
197
|
+
│ ├── attention_block.ts ← AttentionBlock (causal MHA)
|
|
198
|
+
│ └── mamba_model.ts ← HybridMambaModel + MambaModel alias
|
|
199
|
+
├── training/
|
|
200
|
+
│ ├── autograd.ts ← Tape-based AD + loss helpers
|
|
201
|
+
│ └── trainer.ts ← MambaTrainer (AdamW, WSLA)
|
|
202
|
+
├── tokenizer/
|
|
203
|
+
│ └── bpe.ts ← Browser-side BPE tokenizer
|
|
204
|
+
└── utils/
|
|
205
|
+
├── gpu_utils.ts ← WebGPU device/buffer management
|
|
206
|
+
└── quantization.ts ← FP16 / Int8 quantization
|
|
207
|
+
|
|
208
|
+
tools/ ← Model building & checkpoint tooling
|
|
209
|
+
├── generate-bin.js ← CLI: generate an MBJS v2 checkpoint from scratch
|
|
210
|
+
├── pretrain.html ← Browser: pretrain a model on a text corpus
|
|
211
|
+
└── convert.html ← Browser: convert HuggingFace Mamba → MBJS format
|
|
212
|
+
|
|
213
|
+
tests/
|
|
214
|
+
├── kernels.test.ts
|
|
215
|
+
├── autograd.test.ts
|
|
216
|
+
├── bpe.test.ts
|
|
217
|
+
└── quantization.test.ts
|
|
218
|
+
|
|
219
|
+
docs/
|
|
220
|
+
├── getting-started.md
|
|
221
|
+
├── integration-architecture.md
|
|
222
|
+
├── weight-lifecycle.md
|
|
223
|
+
├── api-reference.md
|
|
224
|
+
└── prd-mambacode-v2-v3-hybrid.md ← PRD: Mamba-2/3/hybrid implementation spec
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Tools
|
|
230
|
+
|
|
231
|
+
The `tools/` directory contains model-building and checkpoint utilities that operate at the mambacode.js level. These are **not part of the MambaKit API** — they are for authors who want to build, pretrain, or convert model weights.
|
|
232
|
+
|
|
233
|
+
### `tools/generate-bin.js` — Generate a blank MBJS checkpoint
|
|
234
|
+
|
|
235
|
+
Creates a properly-shaped MBJS v2 `.bin` file with randomly initialised weights. Useful as a starting point before pretraining.
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
node tools/generate-bin.js # nano → model.bin
|
|
239
|
+
node tools/generate-bin.js --size small # small preset
|
|
240
|
+
node tools/generate-bin.js --size nano --out my.bin
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
The weights are **not pretrained** — use `pretrain.html` to run language-model training.
|
|
244
|
+
|
|
245
|
+
### `tools/pretrain.html` — Browser pretraining UI
|
|
246
|
+
|
|
247
|
+
In-browser training loop over a text corpus. Requires a WebGPU-capable browser.
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
npm run build
|
|
251
|
+
npm run serve
|
|
252
|
+
# Open http://localhost:3000/tools/pretrain.html
|
|
253
|
+
# Load a corpus (e.g. TinyStories), configure size/epochs, click Start Training
|
|
254
|
+
# Download the resulting .bin checkpoint
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### `tools/convert.html` — HuggingFace → MBJS converter
|
|
258
|
+
|
|
259
|
+
Converts `state-spaces/mamba` safetensors checkpoints to MBJS format.
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
# Open http://localhost:3000/tools/convert.html
|
|
263
|
+
# Drop model.safetensors from huggingface.co/state-spaces/mamba-130m
|
|
264
|
+
# Download converted .bin
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## WGSL Kernels
|
|
270
|
+
|
|
271
|
+
| Kernel file | Entry points | Used by |
|
|
272
|
+
|---|---|---|
|
|
273
|
+
| `selective_scan.ts` | `forward_scan`, `forward_reduce`, `selective_scan_backward` | Mamba-1 |
|
|
274
|
+
| `ssd.ts` | `ssd_chunk_forward`, `ssd_chunk_backward` | Mamba-2 |
|
|
275
|
+
| `complex_ssd.ts` | `complex_ssd_forward`, `complex_ssd_backward` | Mamba-3 |
|
|
276
|
+
| `attention.ts` | `attention_forward`, `attention_value`, `attention_backward` | Attention |
|
|
277
|
+
| `conv1d.ts` | `conv1d_forward`, `conv1d_backward_dx`, `conv1d_backward_dw` | All SSM |
|
|
278
|
+
| `linear_projection.ts` | `linear_forward`, `linear_backward_dX`, `linear_backward_dW` | All layers |
|
|
279
|
+
| `activations.ts` | `silu_forward`, `rmsnorm_forward`, `softmax_forward_simple` | All layers |
|
|
280
|
+
| `weight_update.ts` | `adamw_update`, `grad_norm_reduce`, `grad_clip_scale` | Training |
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## MBJS Binary Format
|
|
285
|
+
|
|
286
|
+
### Version 1 (legacy, still readable)
|
|
287
|
+
|
|
288
|
+
```
|
|
289
|
+
[0..3] magic = 0x4D424A53 ('MBJS')
|
|
290
|
+
[4..7] version = 1
|
|
291
|
+
[8..11] nParams : uint32
|
|
292
|
+
[12 ..] numel[i] : uint32 (×nParams)
|
|
293
|
+
[data] float32 values
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Version 2 (written by default from v2.0.0)
|
|
297
|
+
|
|
298
|
+
```
|
|
299
|
+
[0..3] magic = 0x4D424A53
|
|
300
|
+
[4..7] version = 2
|
|
301
|
+
[8..11] nLayers : uint32
|
|
302
|
+
[12 ..] layerType[i] : uint8 (0=mamba1, 1=mamba2, 2=mamba3, 3=attention)
|
|
303
|
+
[pad] aligned to 4 bytes
|
|
304
|
+
[next4] nParams : uint32
|
|
305
|
+
[next..] numel[i] : uint32 (×nParams)
|
|
306
|
+
[data] float32 values
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
Version 1 files are loaded transparently — all layers assumed `mamba1`.
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## Migration from v1.x
|
|
314
|
+
|
|
315
|
+
```ts
|
|
316
|
+
// v1.x — no change needed (mamba1 default is preserved)
|
|
317
|
+
const model = new MambaModel(device, config);
|
|
318
|
+
|
|
319
|
+
// v2.x — opt into Mamba-2
|
|
320
|
+
const model = new HybridMambaModel(device, { ...config, layers: Array(8).fill({ type: 'mamba2' }) });
|
|
321
|
+
|
|
322
|
+
// v2.x — MambaBlock is a deprecated alias for Mamba1Block; both still work
|
|
323
|
+
import { MambaBlock, Mamba1Block } from 'mambacode.js';
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Testing
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
npm test # unit tests (no GPU required)
|
|
332
|
+
npm run build # compile TypeScript → dist/
|
|
333
|
+
npm run lint # ESLint
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
---
|
|
337
|
+
|
|
338
|
+
## Browser Compatibility
|
|
339
|
+
|
|
340
|
+
| Browser | Version | Status |
|
|
341
|
+
|---|---|---|
|
|
342
|
+
| Chrome | 113+ | ✅ Supported |
|
|
343
|
+
| Edge | 113+ | ✅ Supported |
|
|
344
|
+
| Firefox | Nightly | ✅ (flag: `dom.webgpu.enabled`) |
|
|
345
|
+
| Safari | 18+ | ⚠️ Partial |
|
|
346
|
+
| Node.js | — | ❌ Not supported |
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## Acknowledgements
|
|
351
|
+
|
|
352
|
+
- **Mamba-3** — Lahoti et al., *Mamba: The Hard Way* (arXiv 2603.15569, ICLR 2026)
|
|
353
|
+
- **Mamba-2** — Dao & Gu, *Transformers are SSMs* (arXiv 2405.21060, 2024)
|
|
354
|
+
- **Mamba-1** — Gu & Dao, *Mamba: Linear-Time Sequence Modeling with Selective State Spaces* (arXiv 2312.00752, 2023)
|
|
355
|
+
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
## Professional Platform
|
|
359
|
+
|
|
360
|
+
**Want managed infrastructure for your MambaCode.js models?**
|
|
361
|
+
|
|
362
|
+
[**Builderforce.ai**](https://builderforce.ai) is the professional enterprise platform built on MambaCode.js. It provides:
|
|
363
|
+
|
|
364
|
+
- **In-browser LoRA training** — fine-tune up to 2B-parameter models on instruction datasets using the MambaCode.js WebGPU kernels, entirely client-side
|
|
365
|
+
- **Hybrid Local Brain** — the Mamba State Engine runs a selective scan alongside Transformers.js inference for persistent agent memory, powered by MambaCode.js WGSL kernels
|
|
366
|
+
- **Dataset generation** — LLM-assisted JSONL instruction dataset creation with streaming progress
|
|
367
|
+
- **Workforce Registry** — publish trained models as specialist AI agents; discoverable and hirable by the community
|
|
368
|
+
- **Agent portability** — `AgentPackage` bundles the LoRA adapter, `MambaStateSnapshot`, and agent profile into a single portable JSON artifact
|
|
369
|
+
- **BuilderForce Agents mesh** — trained agents deploy as self-hosted coding agents via [BuilderForce Agents](https://builderforce.ai), orchestrated from Builderforce
|
|
370
|
+
|
|
371
|
+
Use MambaCode.js to build and experiment locally. Use Builderforce.ai to deploy, manage, and share at scale.
|
|
372
|
+
|
|
373
|
+
```
|
|
374
|
+
MambaCode.js (WebGPU kernels)
|
|
375
|
+
↓
|
|
376
|
+
SSM.js (session API + runtime + memory)
|
|
377
|
+
↓
|
|
378
|
+
Builderforce.ai (enterprise IDE + training + registry)
|
|
379
|
+
↓
|
|
380
|
+
BuilderForce Agents (self-hosted agent mesh)
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
---
|
|
384
|
+
|
|
385
|
+
## Consolidated Gap Register
|
|
386
|
+
|
|
387
|
+
- **Release publish blocked by NPM_TOKEN permissions** (`.github/workflows/release.yml`): the `2026.5.31` tag failed `npm publish` with a 404 PUT. The package exists on npm (maintainer `seanhogg`) and `npm whoami` passes, so the `NPM_TOKEN` secret authenticates but lacks read-write on `@seanhogg/mambacode.js` (read-only token, wrong account, or a granular token whose allowlist omits the package). The workflow now fails fast with this diagnosis, but the actual fix is registry-side and manual: rotate `NPM_TOKEN` to an Automation/granular token with publish rights, then re-push the tag. Unblocks every downstream consumer pinned to a published `@seanhogg/mambacode.js` version.
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
## License
|
|
392
|
+
|
|
393
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MambaCode.js – Entry Point (v2.0.0)
|
|
3
|
+
*/
|
|
4
|
+
export { HybridMambaModel, MambaModel } from './model/mamba_model.js';
|
|
5
|
+
export { Mamba1Block } from './model/mamba1_block.js';
|
|
6
|
+
export { Mamba2Block } from './model/mamba2_block.js';
|
|
7
|
+
export { Mamba3Block } from './model/mamba3_block.js';
|
|
8
|
+
export { AttentionBlock } from './model/attention_block.js';
|
|
9
|
+
export { MambaBlock } from './model/mamba1_block.js';
|
|
10
|
+
export { MambaTrainer } from './training/trainer.js';
|
|
11
|
+
export { Tensor, backward, enableGrad, noGrad, clearTape, recordOperation, crossEntropyLoss, crossEntropyGrad, } from './training/autograd.js';
|
|
12
|
+
export { BPETokenizer } from './tokenizer/bpe.js';
|
|
13
|
+
export { SeededRng, setInitSeed, randn, gaussianArray } from './utils/rng.js';
|
|
14
|
+
export type { HybridMambaModelConfig, MambaModelConfig, ModelForwardResult, SamplingOptions, LayerSpec, } from './model/mamba_model.js';
|
|
15
|
+
export type { SequenceLayer, LayerParam, LayerType, LayerForwardResult } from './model/sequence_layer.js';
|
|
16
|
+
export type { Mamba1BlockConfig, BlockParam, BlockCache, BlockForwardResult, MambaBlockConfig } from './model/mamba1_block.js';
|
|
17
|
+
export type { Mamba2BlockConfig, Mamba2Cache } from './model/mamba2_block.js';
|
|
18
|
+
export type { Mamba3BlockConfig, Mamba3Cache } from './model/mamba3_block.js';
|
|
19
|
+
export type { AttentionBlockConfig, AttentionCache } from './model/attention_block.js';
|
|
20
|
+
export { initWebGPU, createStorageBuffer, createEmptyStorageBuffer, createUniformBuffer, createComputePipeline, createBindGroup, dispatchKernel, readBuffer, uploadBuffer, cdiv, } from './utils/gpu_utils.js';
|
|
21
|
+
export { quantizeFp16, dequantizeFp16, floatToFp16, fp16ToFloat, quantizeInt8, dequantizeInt8, quantizeInt8PerChannel, dequantizeInt8PerChannel, estimateMemory, } from './utils/quantization.js';
|
|
22
|
+
export { SELECTIVE_SCAN_FORWARD_WGSL, SELECTIVE_SCAN_BACKWARD_WGSL } from './kernels/selective_scan.js';
|
|
23
|
+
export { CONV1D_FORWARD_WGSL, CONV1D_BACKWARD_WGSL } from './kernels/conv1d.js';
|
|
24
|
+
export { LINEAR_FORWARD_WGSL, LINEAR_BACKWARD_WGSL } from './kernels/linear_projection.js';
|
|
25
|
+
export { WEIGHT_UPDATE_WGSL, GRAD_CLIP_WGSL } from './kernels/weight_update.js';
|
|
26
|
+
export { ACTIVATIONS_WGSL, ACTIVATIONS_BACKWARD_WGSL, SOFTMAX_FORWARD_WGSL, SOFTMAX_BACKWARD_WGSL } from './kernels/activations.js';
|
|
27
|
+
export { SSD_FORWARD_WGSL, SSD_BACKWARD_WGSL } from './kernels/ssd.js';
|
|
28
|
+
export { COMPLEX_SSD_FORWARD_WGSL, COMPLEX_SSD_BACKWARD_WGSL } from './kernels/complex_ssd.js';
|
|
29
|
+
export { ATTENTION_FORWARD_WGSL, ATTENTION_BACKWARD_WGSL, SOFTMAX_WGSL } from './kernels/attention.js';
|
|
30
|
+
export declare const VERSION = "2.0.0";
|
|
31
|
+
export declare const DESCRIPTION = "MambaCode.js: WebGPU-accelerated Mamba-1/2/3 and Hybrid SSM for browser code models";
|
|
32
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAGtE,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAG5D,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAIrD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACH,MAAM,EACN,QAAQ,EACR,UAAU,EACV,MAAM,EACN,SAAS,EACT,eAAe,EACf,gBAAgB,EAChB,gBAAgB,GACnB,MAAM,wBAAwB,CAAC;AAIhC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAIlD,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAI9E,YAAY,EACR,sBAAsB,EACtB,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,EACf,SAAS,GACZ,MAAM,wBAAwB,CAAC;AAEhC,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC1G,YAAY,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC/H,YAAY,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC9E,YAAY,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAO,yBAAyB,CAAC;AAC/E,YAAY,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAIvF,OAAO,EACH,UAAU,EACV,mBAAmB,EACnB,wBAAwB,EACxB,mBAAmB,EACnB,qBAAqB,EACrB,eAAe,EACf,cAAc,EACd,UAAU,EACV,YAAY,EACZ,IAAI,GACP,MAAM,sBAAsB,CAAC;AAI9B,OAAO,EACH,YAAY,EACZ,cAAc,EACd,WAAW,EACX,WAAW,EACX,YAAY,EACZ,cAAc,EACd,sBAAsB,EACtB,wBAAwB,EACxB,cAAc,GACjB,MAAM,yBAAyB,CAAC;AAKjC,OAAO,EAAE,2BAA2B,EAAE,4BAA4B,EAAE,MAC3D,6BAA6B,CAAC;AACvC,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,qBAAqB,CAAC;AAC/B,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,gCAAgC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MACpC,4BAA4B,CAAC;AACtC,OAAO,EAAE,gBAAgB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,MAC1F,0BAA0B,CAAC;AAGpC,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MACrC,kBAAkB,CAAC;AAG5B,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MACrD,0BAA0B,CAAC;AAGpC,OAAO,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAC/D,wBAAwB,CAAC;AAIlC,eAAO,MAAM,OAAO,UAAc,CAAC;AACnC,eAAO,MAAM,WAAW,wFAAwF,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MambaCode.js – Entry Point (v2.0.0)
|
|
3
|
+
*/
|
|
4
|
+
// ── Model classes ─────────────────────────────────────────────────────────────
|
|
5
|
+
export { HybridMambaModel, MambaModel } from './model/mamba_model.js';
|
|
6
|
+
// New block classes
|
|
7
|
+
export { Mamba1Block } from './model/mamba1_block.js';
|
|
8
|
+
export { Mamba2Block } from './model/mamba2_block.js';
|
|
9
|
+
export { Mamba3Block } from './model/mamba3_block.js';
|
|
10
|
+
export { AttentionBlock } from './model/attention_block.js';
|
|
11
|
+
// Deprecated alias — kept until 3.0.0
|
|
12
|
+
export { MambaBlock } from './model/mamba1_block.js';
|
|
13
|
+
// ── Training ──────────────────────────────────────────────────────────────────
|
|
14
|
+
export { MambaTrainer } from './training/trainer.js';
|
|
15
|
+
export { Tensor, backward, enableGrad, noGrad, clearTape, recordOperation, crossEntropyLoss, crossEntropyGrad, } from './training/autograd.js';
|
|
16
|
+
// ── Tokenizer ─────────────────────────────────────────────────────────────────
|
|
17
|
+
export { BPETokenizer } from './tokenizer/bpe.js';
|
|
18
|
+
// ── Seeded RNG (reproducible weight init) ─────────────────────────────────────
|
|
19
|
+
export { SeededRng, setInitSeed, randn, gaussianArray } from './utils/rng.js';
|
|
20
|
+
// ── GPU utilities ─────────────────────────────────────────────────────────────
|
|
21
|
+
export { initWebGPU, createStorageBuffer, createEmptyStorageBuffer, createUniformBuffer, createComputePipeline, createBindGroup, dispatchKernel, readBuffer, uploadBuffer, cdiv, } from './utils/gpu_utils.js';
|
|
22
|
+
// ── Quantization ──────────────────────────────────────────────────────────────
|
|
23
|
+
export { quantizeFp16, dequantizeFp16, floatToFp16, fp16ToFloat, quantizeInt8, dequantizeInt8, quantizeInt8PerChannel, dequantizeInt8PerChannel, estimateMemory, } from './utils/quantization.js';
|
|
24
|
+
// ── WGSL kernel sources ───────────────────────────────────────────────────────
|
|
25
|
+
// Mamba-1 kernels (unchanged)
|
|
26
|
+
export { SELECTIVE_SCAN_FORWARD_WGSL, SELECTIVE_SCAN_BACKWARD_WGSL } from './kernels/selective_scan.js';
|
|
27
|
+
export { CONV1D_FORWARD_WGSL, CONV1D_BACKWARD_WGSL } from './kernels/conv1d.js';
|
|
28
|
+
export { LINEAR_FORWARD_WGSL, LINEAR_BACKWARD_WGSL } from './kernels/linear_projection.js';
|
|
29
|
+
export { WEIGHT_UPDATE_WGSL, GRAD_CLIP_WGSL } from './kernels/weight_update.js';
|
|
30
|
+
export { ACTIVATIONS_WGSL, ACTIVATIONS_BACKWARD_WGSL, SOFTMAX_FORWARD_WGSL, SOFTMAX_BACKWARD_WGSL } from './kernels/activations.js';
|
|
31
|
+
// Mamba-2 SSD kernels
|
|
32
|
+
export { SSD_FORWARD_WGSL, SSD_BACKWARD_WGSL } from './kernels/ssd.js';
|
|
33
|
+
// Mamba-3 complex SSD kernels
|
|
34
|
+
export { COMPLEX_SSD_FORWARD_WGSL, COMPLEX_SSD_BACKWARD_WGSL } from './kernels/complex_ssd.js';
|
|
35
|
+
// Attention kernels
|
|
36
|
+
export { ATTENTION_FORWARD_WGSL, ATTENTION_BACKWARD_WGSL, SOFTMAX_WGSL } from './kernels/attention.js';
|
|
37
|
+
// ── Version ───────────────────────────────────────────────────────────────────
|
|
38
|
+
export const VERSION = '2.0.0';
|
|
39
|
+
export const DESCRIPTION = 'MambaCode.js: WebGPU-accelerated Mamba-1/2/3 and Hybrid SSM for browser code models';
|
|
40
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,iFAAiF;AAEjF,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEtE,oBAAoB;AACpB,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAE5D,sCAAsC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAErD,iFAAiF;AAEjF,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACH,MAAM,EACN,QAAQ,EACR,UAAU,EACV,MAAM,EACN,SAAS,EACT,eAAe,EACf,gBAAgB,EAChB,gBAAgB,GACnB,MAAM,wBAAwB,CAAC;AAEhC,iFAAiF;AAEjF,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,iFAAiF;AAEjF,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAkB9E,iFAAiF;AAEjF,OAAO,EACH,UAAU,EACV,mBAAmB,EACnB,wBAAwB,EACxB,mBAAmB,EACnB,qBAAqB,EACrB,eAAe,EACf,cAAc,EACd,UAAU,EACV,YAAY,EACZ,IAAI,GACP,MAAM,sBAAsB,CAAC;AAE9B,iFAAiF;AAEjF,OAAO,EACH,YAAY,EACZ,cAAc,EACd,WAAW,EACX,WAAW,EACX,YAAY,EACZ,cAAc,EACd,sBAAsB,EACtB,wBAAwB,EACxB,cAAc,GACjB,MAAM,yBAAyB,CAAC;AAEjC,iFAAiF;AAEjF,8BAA8B;AAC9B,OAAO,EAAE,2BAA2B,EAAE,4BAA4B,EAAE,MAC3D,6BAA6B,CAAC;AACvC,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,qBAAqB,CAAC;AAC/B,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,gCAAgC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MACpC,4BAA4B,CAAC;AACtC,OAAO,EAAE,gBAAgB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,MAC1F,0BAA0B,CAAC;AAEpC,sBAAsB;AACtB,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MACrC,kBAAkB,CAAC;AAE5B,8BAA8B;AAC9B,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MACrD,0BAA0B,CAAC;AAEpC,oBAAoB;AACpB,OAAO,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAC/D,wBAAwB,CAAC;AAElC,iFAAiF;AAEjF,MAAM,CAAC,MAAM,OAAO,GAAO,OAAO,CAAC;AACnC,MAAM,CAAC,MAAM,WAAW,GAAG,qFAAqF,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"activations.d.ts","sourceRoot":"","sources":["../../src/kernels/activations.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,gBAAgB,EAAE,MAyD9B,CAAC;AAOF,eAAO,MAAM,oBAAoB,EAAE,MA2ClC,CAAC;AAEF,eAAO,MAAM,qBAAqB,EAAE,MAkCnC,CAAC;AAGF,eAAO,MAAM,yBAAyB,EAAE,MAwBvC,CAAC"}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
// Activation function WGSL kernels: SiLU (Swish) and its backward pass.
|
|
2
|
+
// Used in the gating mechanism of the Mamba Mixer Block.
|
|
3
|
+
export const ACTIVATIONS_WGSL = /* wgsl */ `
|
|
4
|
+
|
|
5
|
+
struct ActParams {
|
|
6
|
+
num_elements : u32,
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
@group(0) @binding(0) var<uniform> p : ActParams;
|
|
10
|
+
@group(0) @binding(1) var<storage, read> x : array<f32>;
|
|
11
|
+
@group(0) @binding(2) var<storage, read_write> y : array<f32>;
|
|
12
|
+
|
|
13
|
+
// SiLU(x) = x * sigmoid(x)
|
|
14
|
+
@compute @workgroup_size(256, 1, 1)
|
|
15
|
+
fn silu_forward(
|
|
16
|
+
@builtin(global_invocation_id) gid : vec3<u32>,
|
|
17
|
+
) {
|
|
18
|
+
let i = gid.x;
|
|
19
|
+
if (i >= p.num_elements) { return; }
|
|
20
|
+
let v = x[i];
|
|
21
|
+
y[i] = v / (1.0 + exp(-v));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// RMSNorm forward: y = x / rms(x) * weight
|
|
25
|
+
// Requires separate uniform for rms norm params.
|
|
26
|
+
struct RMSNormParams {
|
|
27
|
+
num_rows : u32, // number of vectors (batch * seq_len)
|
|
28
|
+
dim : u32, // feature dimension
|
|
29
|
+
eps : f32,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
@group(0) @binding(0) var<uniform> rms_p : RMSNormParams;
|
|
33
|
+
@group(0) @binding(1) var<storage, read> rms_x : array<f32>;
|
|
34
|
+
@group(0) @binding(2) var<storage, read> rms_w : array<f32>; // scale (dim,)
|
|
35
|
+
@group(0) @binding(3) var<storage, read_write> rms_y : array<f32>;
|
|
36
|
+
@group(0) @binding(4) var<storage, read_write> rms_inv : array<f32>; // cache 1/rms per row
|
|
37
|
+
|
|
38
|
+
@compute @workgroup_size(64, 1, 1)
|
|
39
|
+
fn rmsnorm_forward(
|
|
40
|
+
@builtin(global_invocation_id) gid : vec3<u32>,
|
|
41
|
+
) {
|
|
42
|
+
let row = gid.x;
|
|
43
|
+
if (row >= rms_p.num_rows) { return; }
|
|
44
|
+
|
|
45
|
+
let D = rms_p.dim;
|
|
46
|
+
let base = row * D;
|
|
47
|
+
|
|
48
|
+
var sq_sum: f32 = 0.0;
|
|
49
|
+
for (var i: u32 = 0u; i < D; i = i + 1u) {
|
|
50
|
+
let v = rms_x[base + i];
|
|
51
|
+
sq_sum = sq_sum + v * v;
|
|
52
|
+
}
|
|
53
|
+
let inv_rms = 1.0 / sqrt(sq_sum / f32(D) + rms_p.eps);
|
|
54
|
+
rms_inv[row] = inv_rms;
|
|
55
|
+
|
|
56
|
+
for (var i: u32 = 0u; i < D; i = i + 1u) {
|
|
57
|
+
rms_y[base + i] = rms_x[base + i] * inv_rms * rms_w[i];
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
`;
|
|
61
|
+
// ---- Softmax (row-wise with optional causal mask) ----
|
|
62
|
+
// Standalone softmax used by AttentionBlock for the score matrix.
|
|
63
|
+
// Dispatch: (L, H, B) — one workgroup per (row, head, batch).
|
|
64
|
+
// This version is a simple sequential-within-workgroup implementation;
|
|
65
|
+
// for large L prefer the cooperative version in attention.ts.
|
|
66
|
+
export const SOFTMAX_FORWARD_WGSL = /* wgsl */ `
|
|
67
|
+
struct SoftmaxParams {
|
|
68
|
+
rows : u32, // L
|
|
69
|
+
cols : u32, // L
|
|
70
|
+
causal : u32, // 1 = apply causal mask, 0 = full softmax
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
@group(0) @binding(0) var<uniform> sp : SoftmaxParams;
|
|
74
|
+
@group(0) @binding(1) var<storage, read_write> data : array<f32>;
|
|
75
|
+
|
|
76
|
+
@compute @workgroup_size(1, 1, 1)
|
|
77
|
+
fn softmax_forward_simple(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
78
|
+
let row = gid.x;
|
|
79
|
+
let head = gid.y;
|
|
80
|
+
let bat = gid.z;
|
|
81
|
+
|
|
82
|
+
if (row >= sp.rows) { return; }
|
|
83
|
+
|
|
84
|
+
let L = sp.cols;
|
|
85
|
+
let base = bat * sp.rows * L + head * L * L + row * L;
|
|
86
|
+
let lim = select(L, row + 1u, sp.causal == 1u);
|
|
87
|
+
|
|
88
|
+
var max_val = -1e38;
|
|
89
|
+
for (var c = 0u; c < lim; c = c + 1u) {
|
|
90
|
+
if (data[base + c] > max_val) { max_val = data[base + c]; }
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
var sum_exp = 0.0;
|
|
94
|
+
for (var c = 0u; c < lim; c = c + 1u) {
|
|
95
|
+
let e = exp(data[base + c] - max_val);
|
|
96
|
+
data[base + c] = e;
|
|
97
|
+
sum_exp = sum_exp + e;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let inv = 1.0 / (sum_exp + 1e-12);
|
|
101
|
+
for (var c = 0u; c < lim; c = c + 1u) {
|
|
102
|
+
data[base + c] = data[base + c] * inv;
|
|
103
|
+
}
|
|
104
|
+
// Zero out masked positions
|
|
105
|
+
for (var c = lim; c < L; c = c + 1u) {
|
|
106
|
+
data[base + c] = 0.0;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
`;
|
|
110
|
+
export const SOFTMAX_BACKWARD_WGSL = /* wgsl */ `
|
|
111
|
+
struct SoftmaxParams {
|
|
112
|
+
rows : u32,
|
|
113
|
+
cols : u32,
|
|
114
|
+
causal : u32,
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
@group(0) @binding(0) var<uniform> sp : SoftmaxParams;
|
|
118
|
+
@group(0) @binding(1) var<storage, read> p : array<f32>; // post-softmax probs
|
|
119
|
+
@group(0) @binding(2) var<storage, read> dp : array<f32>; // upstream gradient
|
|
120
|
+
@group(0) @binding(3) var<storage, read_write> dx : array<f32>; // output gradient
|
|
121
|
+
|
|
122
|
+
@compute @workgroup_size(1, 1, 1)
|
|
123
|
+
fn softmax_backward(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
124
|
+
let row = gid.x;
|
|
125
|
+
let head = gid.y;
|
|
126
|
+
let bat = gid.z;
|
|
127
|
+
|
|
128
|
+
if (row >= sp.rows) { return; }
|
|
129
|
+
|
|
130
|
+
let L = sp.cols;
|
|
131
|
+
let base = bat * sp.rows * L + head * L * L + row * L;
|
|
132
|
+
let lim = select(L, row + 1u, sp.causal == 1u);
|
|
133
|
+
|
|
134
|
+
// dot = sum_i p[i] * dp[i]
|
|
135
|
+
var dot = 0.0;
|
|
136
|
+
for (var i = 0u; i < lim; i = i + 1u) {
|
|
137
|
+
dot = dot + p[base + i] * dp[base + i];
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
for (var i = 0u; i < lim; i = i + 1u) {
|
|
141
|
+
dx[base + i] = p[base + i] * (dp[base + i] - dot);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
`;
|
|
145
|
+
// ---- Backward for SiLU ----
|
|
146
|
+
export const ACTIVATIONS_BACKWARD_WGSL = /* wgsl */ `
|
|
147
|
+
|
|
148
|
+
struct ActParams {
|
|
149
|
+
num_elements : u32,
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
@group(0) @binding(0) var<uniform> p : ActParams;
|
|
153
|
+
@group(0) @binding(1) var<storage, read> x : array<f32>;
|
|
154
|
+
@group(0) @binding(2) var<storage, read> dy : array<f32>;
|
|
155
|
+
@group(0) @binding(3) var<storage, read_write> dx : array<f32>;
|
|
156
|
+
|
|
157
|
+
// d/dx [x * sigmoid(x)] = sigmoid(x) + x * sigmoid(x) * (1 - sigmoid(x))
|
|
158
|
+
// = silu(x)/x + sigmoid(x) * (1 - sigmoid(x)) * x
|
|
159
|
+
// simplified: sigmoid(x) * (1 + x*(1 - sigmoid(x)))
|
|
160
|
+
@compute @workgroup_size(256, 1, 1)
|
|
161
|
+
fn silu_backward(
|
|
162
|
+
@builtin(global_invocation_id) gid : vec3<u32>,
|
|
163
|
+
) {
|
|
164
|
+
let i = gid.x;
|
|
165
|
+
if (i >= p.num_elements) { return; }
|
|
166
|
+
let v = x[i];
|
|
167
|
+
let sig = 1.0 / (1.0 + exp(-v));
|
|
168
|
+
dx[i] = dy[i] * sig * (1.0 + v * (1.0 - sig));
|
|
169
|
+
}
|
|
170
|
+
`;
|
|
171
|
+
//# sourceMappingURL=activations.js.map
|