@seanhogg/builderforce-memory-engine 2026.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +393 -0
  3. package/dist/index.d.ts +32 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +40 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/kernels/activations.d.ts +5 -0
  8. package/dist/kernels/activations.d.ts.map +1 -0
  9. package/dist/kernels/activations.js +171 -0
  10. package/dist/kernels/activations.js.map +1 -0
  11. package/dist/kernels/attention.d.ts +19 -0
  12. package/dist/kernels/attention.d.ts.map +1 -0
  13. package/dist/kernels/attention.js +263 -0
  14. package/dist/kernels/attention.js.map +1 -0
  15. package/dist/kernels/complex_ssd.d.ts +33 -0
  16. package/dist/kernels/complex_ssd.d.ts.map +1 -0
  17. package/dist/kernels/complex_ssd.js +305 -0
  18. package/dist/kernels/complex_ssd.js.map +1 -0
  19. package/dist/kernels/conv1d.d.ts +3 -0
  20. package/dist/kernels/conv1d.d.ts.map +1 -0
  21. package/dist/kernels/conv1d.js +158 -0
  22. package/dist/kernels/conv1d.js.map +1 -0
  23. package/dist/kernels/linear_projection.d.ts +3 -0
  24. package/dist/kernels/linear_projection.d.ts.map +1 -0
  25. package/dist/kernels/linear_projection.js +219 -0
  26. package/dist/kernels/linear_projection.js.map +1 -0
  27. package/dist/kernels/selective_scan.d.ts +3 -0
  28. package/dist/kernels/selective_scan.d.ts.map +1 -0
  29. package/dist/kernels/selective_scan.js +348 -0
  30. package/dist/kernels/selective_scan.js.map +1 -0
  31. package/dist/kernels/ssd.d.ts +29 -0
  32. package/dist/kernels/ssd.d.ts.map +1 -0
  33. package/dist/kernels/ssd.js +276 -0
  34. package/dist/kernels/ssd.js.map +1 -0
  35. package/dist/kernels/weight_update.d.ts +3 -0
  36. package/dist/kernels/weight_update.d.ts.map +1 -0
  37. package/dist/kernels/weight_update.js +119 -0
  38. package/dist/kernels/weight_update.js.map +1 -0
  39. package/dist/model/attention_block.d.ts +48 -0
  40. package/dist/model/attention_block.d.ts.map +1 -0
  41. package/dist/model/attention_block.js +262 -0
  42. package/dist/model/attention_block.js.map +1 -0
  43. package/dist/model/mamba1_block.d.ts +70 -0
  44. package/dist/model/mamba1_block.d.ts.map +1 -0
  45. package/dist/model/mamba1_block.js +333 -0
  46. package/dist/model/mamba1_block.js.map +1 -0
  47. package/dist/model/mamba2_block.d.ts +44 -0
  48. package/dist/model/mamba2_block.d.ts.map +1 -0
  49. package/dist/model/mamba2_block.js +252 -0
  50. package/dist/model/mamba2_block.js.map +1 -0
  51. package/dist/model/mamba3_block.d.ts +51 -0
  52. package/dist/model/mamba3_block.d.ts.map +1 -0
  53. package/dist/model/mamba3_block.js +270 -0
  54. package/dist/model/mamba3_block.js.map +1 -0
  55. package/dist/model/mamba_block.d.ts +64 -0
  56. package/dist/model/mamba_block.d.ts.map +1 -0
  57. package/dist/model/mamba_block.js +303 -0
  58. package/dist/model/mamba_block.js.map +1 -0
  59. package/dist/model/mamba_model.d.ts +140 -0
  60. package/dist/model/mamba_model.d.ts.map +1 -0
  61. package/dist/model/mamba_model.js +527 -0
  62. package/dist/model/mamba_model.js.map +1 -0
  63. package/dist/model/sequence_layer.d.ts +25 -0
  64. package/dist/model/sequence_layer.d.ts.map +1 -0
  65. package/dist/model/sequence_layer.js +8 -0
  66. package/dist/model/sequence_layer.js.map +1 -0
  67. package/dist/tokenizer/bpe.d.ts +29 -0
  68. package/dist/tokenizer/bpe.d.ts.map +1 -0
  69. package/dist/tokenizer/bpe.js +164 -0
  70. package/dist/tokenizer/bpe.js.map +1 -0
  71. package/dist/training/autograd.d.ts +27 -0
  72. package/dist/training/autograd.d.ts.map +1 -0
  73. package/dist/training/autograd.js +120 -0
  74. package/dist/training/autograd.js.map +1 -0
  75. package/dist/training/trainer.d.ts +36 -0
  76. package/dist/training/trainer.d.ts.map +1 -0
  77. package/dist/training/trainer.js +183 -0
  78. package/dist/training/trainer.js.map +1 -0
  79. package/dist/utils/gpu_utils.d.ts +21 -0
  80. package/dist/utils/gpu_utils.d.ts.map +1 -0
  81. package/dist/utils/gpu_utils.js +111 -0
  82. package/dist/utils/gpu_utils.js.map +1 -0
  83. package/dist/utils/quantization.d.ts +26 -0
  84. package/dist/utils/quantization.d.ts.map +1 -0
  85. package/dist/utils/quantization.js +116 -0
  86. package/dist/utils/quantization.js.map +1 -0
  87. package/dist/utils/rng.d.ts +36 -0
  88. package/dist/utils/rng.d.ts.map +1 -0
  89. package/dist/utils/rng.js +61 -0
  90. package/dist/utils/rng.js.map +1 -0
  91. package/package.json +99 -0
  92. package/src/index.ts +114 -0
  93. package/src/kernels/activations.ts +174 -0
  94. package/src/kernels/attention.ts +268 -0
  95. package/src/kernels/complex_ssd.ts +307 -0
  96. package/src/kernels/conv1d.ts +159 -0
  97. package/src/kernels/linear_projection.ts +220 -0
  98. package/src/kernels/selective_scan.ts +350 -0
  99. package/src/kernels/ssd.ts +278 -0
  100. package/src/kernels/weight_update.ts +120 -0
  101. package/src/model/attention_block.ts +344 -0
  102. package/src/model/mamba1_block.ts +437 -0
  103. package/src/model/mamba2_block.ts +319 -0
  104. package/src/model/mamba3_block.ts +335 -0
  105. package/src/model/mamba_block.ts +401 -0
  106. package/src/model/mamba_model.ts +678 -0
  107. package/src/model/sequence_layer.ts +29 -0
  108. package/src/tokenizer/bpe.ts +186 -0
  109. package/src/training/autograd.ts +135 -0
  110. package/src/training/trainer.ts +309 -0
  111. package/src/utils/gpu_utils.ts +147 -0
  112. package/src/utils/quantization.ts +154 -0
  113. package/src/utils/rng.ts +65 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sean Hogg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,393 @@
1
+ # MambaCode.js
2
+
3
+ > WebGPU-accelerated Mamba-1/2/3 and Hybrid SSM library — written in **TypeScript**, compiled for use in any JavaScript application.
4
+
5
+ [![npm](https://img.shields.io/npm/v/@seanhogg/mambacode.js)](https://www.npmjs.com/package/@seanhogg/mambacode.js)
6
+ [![license](https://img.shields.io/badge/license-MIT-blue)](./LICENSE)
7
+
8
+ MambaCode.js is a **TypeScript-first** library that brings the Mamba family of State Space Models to the browser via WebGPU. Version 2.0.0 adds **Mamba-2** (SSD), **Mamba-3** (complex-valued MIMO + ET discretisation), and **hybrid attention** layers, while remaining fully backward-compatible with Mamba-1 checkpoints.
9
+
10
+ > 📖 **New to MambaCode.js?** Start with the [Getting Started Guide](./docs/getting-started.md).
11
+
12
+ ---
13
+
14
+ ## What's New in v2.0.0
15
+
16
+ | Feature | Detail |
17
+ |---|---|
18
+ | **Mamba-2 (SSD)** | Structured State Space Duality — chunked matmul scan, multi-head, scalar A, inner RMSNorm |
19
+ | **Mamba-3** | Complex-valued states (ℂ^N), ET discretisation, MIMO recurrence, 2× smaller state size |
20
+ | **AttentionBlock** | Causal multi-head attention for hybrid (Jamba/Zamba) layer schedules |
21
+ | **HybridMambaModel** | Per-layer type schedule — mix mamba1/2/3/attention freely |
22
+ | **MBJS v2 format** | Layer-type metadata in checkpoint header; v1 files still load unchanged |
23
+ | **`MambaBlock` alias** | Kept as deprecated alias for `Mamba1Block` until 3.0.0 |
24
+
25
+ ---
26
+
27
+ ## Key Features
28
+
29
+ | Feature | Detail |
30
+ |---|---|
31
+ | **TypeScript-first** | Full type declarations shipped with the package |
32
+ | **Plain JS compatible** | Import the compiled `dist/` in any JavaScript project |
33
+ | **SSM variants** | Mamba-1 (S6), Mamba-2 (SSD), Mamba-3 (complex MIMO+ET) |
34
+ | **Hybrid models** | Jamba/Zamba-style mixed SSM + attention schedules |
35
+ | **Hardware target** | WebGPU (WGSL) — Chrome 113+, Edge 113+, Firefox Nightly |
36
+ | **No heavy frameworks** | Zero TensorFlow.js / Transformers.js dependencies |
37
+ | **On-device training** | Tape-based autograd + AdamW GPU optimizer |
38
+ | **Quantization** | FP16 weights, Int8 activations |
39
+ | **Tokenizer** | Browser-side BPE (Qwen2.5-Coder compatible) |
40
+ | **WSLA mode** | Fast-adapt: trains only the selective projection rows |
41
+
42
+ ---
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ npm install mambacode.js
48
+ ```
49
+
50
+ Build from source:
51
+
52
+ ```bash
53
+ npm run build # compiles TypeScript → dist/
54
+ ```
55
+
56
+ ---
57
+
58
+ ## Quick Start
59
+
60
+ ### Mamba-1 (backward-compatible, unchanged)
61
+
62
+ ```ts
63
+ import { MambaModel, MambaTrainer, BPETokenizer, initWebGPU } from 'mambacode.js';
64
+
65
+ const { device } = await initWebGPU();
66
+ const tokenizer = new BPETokenizer();
67
+ await tokenizer.load('/vocab.json', '/merges.txt');
68
+
69
+ const model = new MambaModel(device, {
70
+ vocabSize : tokenizer.vocabSize,
71
+ dModel : 512,
72
+ numLayers : 8,
73
+ });
74
+
75
+ await model.loadWeights(await (await fetch('/checkpoint.bin')).arrayBuffer());
76
+ const ids = await model.generate(tokenizer.encode('function add('), 200);
77
+ console.log(tokenizer.decode(ids));
78
+ ```
79
+
80
+ ### Mamba-2 (SSD)
81
+
82
+ ```ts
83
+ import { HybridMambaModel } from 'mambacode.js';
84
+
85
+ const model = new HybridMambaModel(device, {
86
+ vocabSize : tokenizer.vocabSize,
87
+ dModel : 512,
88
+ numLayers : 8,
89
+ nHeads : 8,
90
+ layers : Array(8).fill({ type: 'mamba2' }),
91
+ });
92
+ ```
93
+
94
+ ### Mamba-3 (complex states)
95
+
96
+ ```ts
97
+ const model = new HybridMambaModel(device, {
98
+ vocabSize : tokenizer.vocabSize,
99
+ dModel : 512,
100
+ numLayers : 8,
101
+ nHeads : 8,
102
+ layers : Array(8).fill({ type: 'mamba3' }),
103
+ });
104
+ ```
105
+
106
+ ### Hybrid (Jamba-style: every 4th layer is attention)
107
+
108
+ ```ts
109
+ const model = new HybridMambaModel(device, {
110
+ vocabSize : tokenizer.vocabSize,
111
+ dModel : 512,
112
+ numLayers : 12,
113
+ nHeads : 8,
114
+ layers : Array.from({ length: 12 }, (_, i) => ({
115
+ type: i % 4 === 3 ? 'attention' : 'mamba2',
116
+ })),
117
+ });
118
+ ```
119
+
120
+ ---
121
+
122
+ ## Architecture Reference
123
+
124
+ ### Mamba-1 Block (S6)
125
+
126
+ ```
127
+ Input (B, L, D)
128
+ └─ RMSNorm
129
+ └─ in_proj → x, z (gate)
130
+ x → conv1d → SiLU → x_proj → Δ, B, C
131
+ Δ → dt_proj → softplus
132
+ Selective Scan S6
133
+ h_t = Ā·h_{t-1} + B̄·x_t
134
+ y_t = C·h_t + D·x_t
135
+ └─ y * SiLU(z)
136
+ └─ out_proj + residual
137
+ ```
138
+
139
+ ### Mamba-2 Block (SSD)
140
+
141
+ ```
142
+ Input (B, L, D)
143
+ └─ RMSNorm
144
+ └─ in_proj → [x (D_inner), B (G·N), C (G·N), dt (H)]
145
+ conv1d over x, B, C (fused)
146
+ SSD scan: A_bar = exp(-softplus(A) · softplus(dt))
147
+ h_t = A_bar · h_{t-1} + B · x_t
148
+ y_t = C · h_t
149
+ └─ inner RMSNorm
150
+ └─ out_proj + residual
151
+ ```
152
+
153
+ ### Mamba-3 Block (complex MIMO, ET)
154
+
155
+ ```
156
+ Input (B, L, D)
157
+ └─ Same structure as Mamba-2 but:
158
+ • A ∈ ℂ (log|A|, arg(A)) per head
159
+ • A_bar = exp(Δ·A) [complex]
160
+ • B_bar = (A_bar − 1)·A⁻¹·B [ET, exact]
161
+ • h_t ∈ ℂ^(N/2), y_t = Re(C·h_t)
162
+ ```
163
+
164
+ ### AttentionBlock (causal MHA)
165
+
166
+ ```
167
+ Input (B, L, D)
168
+ └─ RMSNorm
169
+ └─ wQKV → Q, K, V (B, L, H, d_head)
170
+ └─ scores = Q·Kᵀ / √d_head (causal mask)
171
+ └─ softmax → weighted V sum
172
+ └─ concat heads → wO + residual
173
+ [optional FFN sublayer]
174
+ ```
175
+
176
+ ---
177
+
178
+ ## File Structure
179
+
180
+ ```
181
+ src/
182
+ ├── index.ts ← public API entry point (v2.0.0)
183
+ ├── kernels/
184
+ │ ├── selective_scan.ts ← WGSL: S6 forward/backward (Mamba-1)
185
+ │ ├── ssd.ts ← WGSL: chunked SSD forward/backward (Mamba-2)
186
+ │ ├── complex_ssd.ts ← WGSL: complex SSD + ET + MIMO (Mamba-3)
187
+ │ ├── attention.ts ← WGSL: tiled causal MHA forward/backward
188
+ │ ├── conv1d.ts ← WGSL: 1D causal convolution (+ groups param)
189
+ │ ├── linear_projection.ts ← WGSL: tiled GEMM
190
+ │ ├── weight_update.ts ← WGSL: AdamW + gradient clipping
191
+ │ └── activations.ts ← WGSL: SiLU, RMSNorm, Softmax
192
+ ├── model/
193
+ │ ├── sequence_layer.ts ← SequenceLayer interface (LayerType, LayerParam)
194
+ │ ├── mamba1_block.ts ← Mamba1Block (renamed from MambaBlock)
195
+ │ ├── mamba2_block.ts ← Mamba2Block (SSD)
196
+ │ ├── mamba3_block.ts ← Mamba3Block (complex + MIMO + ET)
197
+ │ ├── attention_block.ts ← AttentionBlock (causal MHA)
198
+ │ └── mamba_model.ts ← HybridMambaModel + MambaModel alias
199
+ ├── training/
200
+ │ ├── autograd.ts ← Tape-based AD + loss helpers
201
+ │ └── trainer.ts ← MambaTrainer (AdamW, WSLA)
202
+ ├── tokenizer/
203
+ │ └── bpe.ts ← Browser-side BPE tokenizer
204
+ └── utils/
205
+ ├── gpu_utils.ts ← WebGPU device/buffer management
206
+ └── quantization.ts ← FP16 / Int8 quantization
207
+
208
+ tools/ ← Model building & checkpoint tooling
209
+ ├── generate-bin.js ← CLI: generate an MBJS v2 checkpoint from scratch
210
+ ├── pretrain.html ← Browser: pretrain a model on a text corpus
211
+ └── convert.html ← Browser: convert HuggingFace Mamba → MBJS format
212
+
213
+ tests/
214
+ ├── kernels.test.ts
215
+ ├── autograd.test.ts
216
+ ├── bpe.test.ts
217
+ └── quantization.test.ts
218
+
219
+ docs/
220
+ ├── getting-started.md
221
+ ├── integration-architecture.md
222
+ ├── weight-lifecycle.md
223
+ ├── api-reference.md
224
+ └── prd-mambacode-v2-v3-hybrid.md ← PRD: Mamba-2/3/hybrid implementation spec
225
+ ```
226
+
227
+ ---
228
+
229
+ ## Tools
230
+
231
+ The `tools/` directory contains model-building and checkpoint utilities that operate at the mambacode.js level. These are **not part of the MambaKit API** — they are for authors who want to build, pretrain, or convert model weights.
232
+
233
+ ### `tools/generate-bin.js` — Generate a blank MBJS checkpoint
234
+
235
+ Creates a properly-shaped MBJS v2 `.bin` file with randomly initialised weights. Useful as a starting point before pretraining.
236
+
237
+ ```bash
238
+ node tools/generate-bin.js # nano → model.bin
239
+ node tools/generate-bin.js --size small # small preset
240
+ node tools/generate-bin.js --size nano --out my.bin
241
+ ```
242
+
243
+ The weights are **not pretrained** — use `pretrain.html` to run language-model training.
244
+
245
+ ### `tools/pretrain.html` — Browser pretraining UI
246
+
247
+ In-browser training loop over a text corpus. Requires a WebGPU-capable browser.
248
+
249
+ ```bash
250
+ npm run build
251
+ npm run serve
252
+ # Open http://localhost:3000/tools/pretrain.html
253
+ # Load a corpus (e.g. TinyStories), configure size/epochs, click Start Training
254
+ # Download the resulting .bin checkpoint
255
+ ```
256
+
257
+ ### `tools/convert.html` — HuggingFace → MBJS converter
258
+
259
+ Converts `state-spaces/mamba` safetensors checkpoints to MBJS format.
260
+
261
+ ```bash
262
+ # Open http://localhost:3000/tools/convert.html
263
+ # Drop model.safetensors from huggingface.co/state-spaces/mamba-130m
264
+ # Download converted .bin
265
+ ```
266
+
267
+ ---
268
+
269
+ ## WGSL Kernels
270
+
271
+ | Kernel file | Entry points | Used by |
272
+ |---|---|---|
273
+ | `selective_scan.ts` | `forward_scan`, `forward_reduce`, `selective_scan_backward` | Mamba-1 |
274
+ | `ssd.ts` | `ssd_chunk_forward`, `ssd_chunk_backward` | Mamba-2 |
275
+ | `complex_ssd.ts` | `complex_ssd_forward`, `complex_ssd_backward` | Mamba-3 |
276
+ | `attention.ts` | `attention_forward`, `attention_value`, `attention_backward` | Attention |
277
+ | `conv1d.ts` | `conv1d_forward`, `conv1d_backward_dx`, `conv1d_backward_dw` | All SSM |
278
+ | `linear_projection.ts` | `linear_forward`, `linear_backward_dX`, `linear_backward_dW` | All layers |
279
+ | `activations.ts` | `silu_forward`, `rmsnorm_forward`, `softmax_forward_simple` | All layers |
280
+ | `weight_update.ts` | `adamw_update`, `grad_norm_reduce`, `grad_clip_scale` | Training |
281
+
282
+ ---
283
+
284
+ ## MBJS Binary Format
285
+
286
+ ### Version 1 (legacy, still readable)
287
+
288
+ ```
289
+ [0..3] magic = 0x4D424A53 ('MBJS')
290
+ [4..7] version = 1
291
+ [8..11] nParams : uint32
292
+ [12 ..] numel[i] : uint32 (×nParams)
293
+ [data] float32 values
294
+ ```
295
+
296
+ ### Version 2 (written by default from v2.0.0)
297
+
298
+ ```
299
+ [0..3] magic = 0x4D424A53
300
+ [4..7] version = 2
301
+ [8..11] nLayers : uint32
302
+ [12 ..] layerType[i] : uint8 (0=mamba1, 1=mamba2, 2=mamba3, 3=attention)
303
+ [pad] aligned to 4 bytes
304
+ [next4] nParams : uint32
305
+ [next..] numel[i] : uint32 (×nParams)
306
+ [data] float32 values
307
+ ```
308
+
309
+ Version 1 files are loaded transparently — all layers assumed `mamba1`.
310
+
311
+ ---
312
+
313
+ ## Migration from v1.x
314
+
315
+ ```ts
316
+ // v1.x — no change needed (mamba1 default is preserved)
317
+ const model = new MambaModel(device, config);
318
+
319
+ // v2.x — opt into Mamba-2
320
+ const model = new HybridMambaModel(device, { ...config, layers: Array(8).fill({ type: 'mamba2' }) });
321
+
322
+ // v2.x — MambaBlock is a deprecated alias for Mamba1Block; both still work
323
+ import { MambaBlock, Mamba1Block } from 'mambacode.js';
324
+ ```
325
+
326
+ ---
327
+
328
+ ## Testing
329
+
330
+ ```bash
331
+ npm test # unit tests (no GPU required)
332
+ npm run build # compile TypeScript → dist/
333
+ npm run lint # ESLint
334
+ ```
335
+
336
+ ---
337
+
338
+ ## Browser Compatibility
339
+
340
+ | Browser | Version | Status |
341
+ |---|---|---|
342
+ | Chrome | 113+ | ✅ Supported |
343
+ | Edge | 113+ | ✅ Supported |
344
+ | Firefox | Nightly | ✅ (flag: `dom.webgpu.enabled`) |
345
+ | Safari | 18+ | ⚠️ Partial |
346
+ | Node.js | — | ❌ Not supported |
347
+
348
+ ---
349
+
350
+ ## Acknowledgements
351
+
352
+ - **Mamba-3** — Lahoti et al., *Mamba: The Hard Way* (arXiv 2603.15569, ICLR 2026)
353
+ - **Mamba-2** — Dao & Gu, *Transformers are SSMs* (arXiv 2405.21060, 2024)
354
+ - **Mamba-1** — Gu & Dao, *Mamba: Linear-Time Sequence Modeling with Selective State Spaces* (arXiv 2312.00752, 2023)
355
+
356
+ ---
357
+
358
+ ## Professional Platform
359
+
360
+ **Want managed infrastructure for your MambaCode.js models?**
361
+
362
+ [**Builderforce.ai**](https://builderforce.ai) is the professional enterprise platform built on MambaCode.js. It provides:
363
+
364
+ - **In-browser LoRA training** — fine-tune up to 2B-parameter models on instruction datasets using the MambaCode.js WebGPU kernels, entirely client-side
365
+ - **Hybrid Local Brain** — the Mamba State Engine runs a selective scan alongside Transformers.js inference for persistent agent memory, powered by MambaCode.js WGSL kernels
366
+ - **Dataset generation** — LLM-assisted JSONL instruction dataset creation with streaming progress
367
+ - **Workforce Registry** — publish trained models as specialist AI agents; discoverable and hirable by the community
368
+ - **Agent portability** — `AgentPackage` bundles the LoRA adapter, `MambaStateSnapshot`, and agent profile into a single portable JSON artifact
369
+ - **BuilderForce Agents mesh** — trained agents deploy as self-hosted coding agents via [BuilderForce Agents](https://builderforce.ai), orchestrated from Builderforce
370
+
371
+ Use MambaCode.js to build and experiment locally. Use Builderforce.ai to deploy, manage, and share at scale.
372
+
373
+ ```
374
+ MambaCode.js (WebGPU kernels)
375
+
376
+ SSM.js (session API + runtime + memory)
377
+
378
+ Builderforce.ai (enterprise IDE + training + registry)
379
+
380
+ BuilderForce Agents (self-hosted agent mesh)
381
+ ```
382
+
383
+ ---
384
+
385
+ ## Consolidated Gap Register
386
+
387
+ - **Release publish blocked by NPM_TOKEN permissions** (`.github/workflows/release.yml`): the `2026.5.31` tag failed `npm publish` with a 404 PUT. The package exists on npm (maintainer `seanhogg`) and `npm whoami` passes, so the `NPM_TOKEN` secret authenticates but lacks read-write on `@seanhogg/mambacode.js` (read-only token, wrong account, or a granular token whose allowlist omits the package). The workflow now fails fast with this diagnosis, but the actual fix is registry-side and manual: rotate `NPM_TOKEN` to an Automation/granular token with publish rights, then re-push the tag. Unblocks every downstream consumer pinned to a published `@seanhogg/mambacode.js` version.
388
+
389
+ ---
390
+
391
+ ## License
392
+
393
+ MIT
@@ -0,0 +1,32 @@
1
+ /**
2
+ * MambaCode.js – Entry Point (v2.0.0)
3
+ */
4
+ export { HybridMambaModel, MambaModel } from './model/mamba_model.js';
5
+ export { Mamba1Block } from './model/mamba1_block.js';
6
+ export { Mamba2Block } from './model/mamba2_block.js';
7
+ export { Mamba3Block } from './model/mamba3_block.js';
8
+ export { AttentionBlock } from './model/attention_block.js';
9
+ export { MambaBlock } from './model/mamba1_block.js';
10
+ export { MambaTrainer } from './training/trainer.js';
11
+ export { Tensor, backward, enableGrad, noGrad, clearTape, recordOperation, crossEntropyLoss, crossEntropyGrad, } from './training/autograd.js';
12
+ export { BPETokenizer } from './tokenizer/bpe.js';
13
+ export { SeededRng, setInitSeed, randn, gaussianArray } from './utils/rng.js';
14
+ export type { HybridMambaModelConfig, MambaModelConfig, ModelForwardResult, SamplingOptions, LayerSpec, } from './model/mamba_model.js';
15
+ export type { SequenceLayer, LayerParam, LayerType, LayerForwardResult } from './model/sequence_layer.js';
16
+ export type { Mamba1BlockConfig, BlockParam, BlockCache, BlockForwardResult, MambaBlockConfig } from './model/mamba1_block.js';
17
+ export type { Mamba2BlockConfig, Mamba2Cache } from './model/mamba2_block.js';
18
+ export type { Mamba3BlockConfig, Mamba3Cache } from './model/mamba3_block.js';
19
+ export type { AttentionBlockConfig, AttentionCache } from './model/attention_block.js';
20
+ export { initWebGPU, createStorageBuffer, createEmptyStorageBuffer, createUniformBuffer, createComputePipeline, createBindGroup, dispatchKernel, readBuffer, uploadBuffer, cdiv, } from './utils/gpu_utils.js';
21
+ export { quantizeFp16, dequantizeFp16, floatToFp16, fp16ToFloat, quantizeInt8, dequantizeInt8, quantizeInt8PerChannel, dequantizeInt8PerChannel, estimateMemory, } from './utils/quantization.js';
22
+ export { SELECTIVE_SCAN_FORWARD_WGSL, SELECTIVE_SCAN_BACKWARD_WGSL } from './kernels/selective_scan.js';
23
+ export { CONV1D_FORWARD_WGSL, CONV1D_BACKWARD_WGSL } from './kernels/conv1d.js';
24
+ export { LINEAR_FORWARD_WGSL, LINEAR_BACKWARD_WGSL } from './kernels/linear_projection.js';
25
+ export { WEIGHT_UPDATE_WGSL, GRAD_CLIP_WGSL } from './kernels/weight_update.js';
26
+ export { ACTIVATIONS_WGSL, ACTIVATIONS_BACKWARD_WGSL, SOFTMAX_FORWARD_WGSL, SOFTMAX_BACKWARD_WGSL } from './kernels/activations.js';
27
+ export { SSD_FORWARD_WGSL, SSD_BACKWARD_WGSL } from './kernels/ssd.js';
28
+ export { COMPLEX_SSD_FORWARD_WGSL, COMPLEX_SSD_BACKWARD_WGSL } from './kernels/complex_ssd.js';
29
+ export { ATTENTION_FORWARD_WGSL, ATTENTION_BACKWARD_WGSL, SOFTMAX_WGSL } from './kernels/attention.js';
30
+ export declare const VERSION = "2.0.0";
31
+ export declare const DESCRIPTION = "MambaCode.js: WebGPU-accelerated Mamba-1/2/3 and Hybrid SSM for browser code models";
32
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAGtE,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAG5D,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAIrD,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACH,MAAM,EACN,QAAQ,EACR,UAAU,EACV,MAAM,EACN,SAAS,EACT,eAAe,EACf,gBAAgB,EAChB,gBAAgB,GACnB,MAAM,wBAAwB,CAAC;AAIhC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAIlD,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAI9E,YAAY,EACR,sBAAsB,EACtB,gBAAgB,EAChB,kBAAkB,EAClB,eAAe,EACf,SAAS,GACZ,MAAM,wBAAwB,CAAC;AAEhC,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC1G,YAAY,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC/H,YAAY,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC9E,YAAY,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAO,yBAAyB,CAAC;AAC/E,YAAY,EAAE,oBAAoB,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAIvF,OAAO,EACH,UAAU,EACV,mBAAmB,EACnB,wBAAwB,EACxB,mBAAmB,EACnB,qBAAqB,EACrB,eAAe,EACf,cAAc,EACd,UAAU,EACV,YAAY,EACZ,IAAI,GACP,MAAM,sBAAsB,CAAC;AAI9B,OAAO,EACH,YAAY,EACZ,cAAc,EACd,WAAW,EACX,WAAW,EACX,YAAY,EACZ,cAAc,EACd,sBAAsB,EACtB,wBAAwB,EACxB,cAAc,GACjB,MAAM,yBAAyB,CAAC;AAKjC,OAAO,EAAE,2BAA2B,EAAE,4BAA4B,EAAE,MAC3D,6BAA6B,CAAC;AACvC,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,qBAAqB,CAAC;AAC/B,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,gCAAgC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MACpC,4BAA4B,CAAC;AACtC,OAAO,EAAE,gBAAgB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,MAC1F,0BAA0B,CAAC;AAGpC,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MACrC,kBAAkB,CAAC;AAG5B,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MACrD,0BAA0B,CAAC;AAGpC,OAAO,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAC/D,wBAAwB,CAAC;AAIlC,eAAO,MAAM,OAAO,UAAc,CAAC;AACnC,eAAO,MAAM,WAAW,wFAAwF,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,40 @@
1
+ /**
2
+ * MambaCode.js – Entry Point (v2.0.0)
3
+ */
4
+ // ── Model classes ─────────────────────────────────────────────────────────────
5
+ export { HybridMambaModel, MambaModel } from './model/mamba_model.js';
6
+ // New block classes
7
+ export { Mamba1Block } from './model/mamba1_block.js';
8
+ export { Mamba2Block } from './model/mamba2_block.js';
9
+ export { Mamba3Block } from './model/mamba3_block.js';
10
+ export { AttentionBlock } from './model/attention_block.js';
11
+ // Deprecated alias — kept until 3.0.0
12
+ export { MambaBlock } from './model/mamba1_block.js';
13
+ // ── Training ──────────────────────────────────────────────────────────────────
14
+ export { MambaTrainer } from './training/trainer.js';
15
+ export { Tensor, backward, enableGrad, noGrad, clearTape, recordOperation, crossEntropyLoss, crossEntropyGrad, } from './training/autograd.js';
16
+ // ── Tokenizer ─────────────────────────────────────────────────────────────────
17
+ export { BPETokenizer } from './tokenizer/bpe.js';
18
+ // ── Seeded RNG (reproducible weight init) ─────────────────────────────────────
19
+ export { SeededRng, setInitSeed, randn, gaussianArray } from './utils/rng.js';
20
+ // ── GPU utilities ─────────────────────────────────────────────────────────────
21
+ export { initWebGPU, createStorageBuffer, createEmptyStorageBuffer, createUniformBuffer, createComputePipeline, createBindGroup, dispatchKernel, readBuffer, uploadBuffer, cdiv, } from './utils/gpu_utils.js';
22
+ // ── Quantization ──────────────────────────────────────────────────────────────
23
+ export { quantizeFp16, dequantizeFp16, floatToFp16, fp16ToFloat, quantizeInt8, dequantizeInt8, quantizeInt8PerChannel, dequantizeInt8PerChannel, estimateMemory, } from './utils/quantization.js';
24
+ // ── WGSL kernel sources ───────────────────────────────────────────────────────
25
+ // Mamba-1 kernels (unchanged)
26
+ export { SELECTIVE_SCAN_FORWARD_WGSL, SELECTIVE_SCAN_BACKWARD_WGSL } from './kernels/selective_scan.js';
27
+ export { CONV1D_FORWARD_WGSL, CONV1D_BACKWARD_WGSL } from './kernels/conv1d.js';
28
+ export { LINEAR_FORWARD_WGSL, LINEAR_BACKWARD_WGSL } from './kernels/linear_projection.js';
29
+ export { WEIGHT_UPDATE_WGSL, GRAD_CLIP_WGSL } from './kernels/weight_update.js';
30
+ export { ACTIVATIONS_WGSL, ACTIVATIONS_BACKWARD_WGSL, SOFTMAX_FORWARD_WGSL, SOFTMAX_BACKWARD_WGSL } from './kernels/activations.js';
31
+ // Mamba-2 SSD kernels
32
+ export { SSD_FORWARD_WGSL, SSD_BACKWARD_WGSL } from './kernels/ssd.js';
33
+ // Mamba-3 complex SSD kernels
34
+ export { COMPLEX_SSD_FORWARD_WGSL, COMPLEX_SSD_BACKWARD_WGSL } from './kernels/complex_ssd.js';
35
+ // Attention kernels
36
+ export { ATTENTION_FORWARD_WGSL, ATTENTION_BACKWARD_WGSL, SOFTMAX_WGSL } from './kernels/attention.js';
37
+ // ── Version ───────────────────────────────────────────────────────────────────
38
+ export const VERSION = '2.0.0';
39
+ export const DESCRIPTION = 'MambaCode.js: WebGPU-accelerated Mamba-1/2/3 and Hybrid SSM for browser code models';
40
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,iFAAiF;AAEjF,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEtE,oBAAoB;AACpB,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAQ,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAE5D,sCAAsC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAErD,iFAAiF;AAEjF,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACH,MAAM,EACN,QAAQ,EACR,UAAU,EACV,MAAM,EACN,SAAS,EACT,eAAe,EACf,gBAAgB,EAChB,gBAAgB,GACnB,MAAM,wBAAwB,CAAC;AAEhC,iFAAiF;AAEjF,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,iFAAiF;AAEjF,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAkB9E,iFAAiF;AAEjF,OAAO,EACH,UAAU,EACV,mBAAmB,EACnB,wBAAwB,EACxB,mBAAmB,EACnB,qBAAqB,EACrB,eAAe,EACf,cAAc,EACd,UAAU,EACV,YAAY,EACZ,IAAI,GACP,MAAM,sBAAsB,CAAC;AAE9B,iFAAiF;AAEjF,OAAO,EACH,YAAY,EACZ,cAAc,EACd,WAAW,EACX,WAAW,EACX,YAAY,EACZ,cAAc,EACd,sBAAsB,EACtB,wBAAwB,EACxB,cAAc,GACjB,MAAM,yBAAyB,CAAC;AAEjC,iFAAiF;AAEjF,8BAA8B;AAC9B,OAAO,EAAE,2BAA2B,EAAE,4BAA4B,EAAE,MAC3D,6BAA6B,CAAC;AACvC,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,qBAAqB,CAAC;AAC/B,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAC3C,gCAAgC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MACpC,4BAA4B,CAAC;AACtC,OAAO,EAAE,gBAAgB,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,MAC1F,0BAA0B,CAAC;AAEpC,sBAAsB;AACtB,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MACrC,kBAAkB,CAAC;AAE5B,8BAA8B;AAC9B,OAAO,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MACrD,0BAA0B,CAAC;AAEpC,oBAAoB;AACpB,OAAO,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAC/D,wBAAwB,CAAC;AAElC,iFAAiF;AAEjF,MAAM,CAAC,MAAM,OAAO,GAAO,OAAO,CAAC;AACnC,MAAM,CAAC,MAAM,WAAW,GAAG,qFAAqF,CAAC"}
@@ -0,0 +1,5 @@
1
+ export declare const ACTIVATIONS_WGSL: string;
2
+ export declare const SOFTMAX_FORWARD_WGSL: string;
3
+ export declare const SOFTMAX_BACKWARD_WGSL: string;
4
+ export declare const ACTIVATIONS_BACKWARD_WGSL: string;
5
+ //# sourceMappingURL=activations.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"activations.d.ts","sourceRoot":"","sources":["../../src/kernels/activations.ts"],"names":[],"mappings":"AAGA,eAAO,MAAM,gBAAgB,EAAE,MAyD9B,CAAC;AAOF,eAAO,MAAM,oBAAoB,EAAE,MA2ClC,CAAC;AAEF,eAAO,MAAM,qBAAqB,EAAE,MAkCnC,CAAC;AAGF,eAAO,MAAM,yBAAyB,EAAE,MAwBvC,CAAC"}
@@ -0,0 +1,171 @@
1
+ // Activation function WGSL kernels: SiLU (Swish) and its backward pass.
2
+ // Used in the gating mechanism of the Mamba Mixer Block.
3
+ export const ACTIVATIONS_WGSL = /* wgsl */ `
4
+
5
+ struct ActParams {
6
+ num_elements : u32,
7
+ };
8
+
9
+ @group(0) @binding(0) var<uniform> p : ActParams;
10
+ @group(0) @binding(1) var<storage, read> x : array<f32>;
11
+ @group(0) @binding(2) var<storage, read_write> y : array<f32>;
12
+
13
+ // SiLU(x) = x * sigmoid(x)
14
+ @compute @workgroup_size(256, 1, 1)
15
+ fn silu_forward(
16
+ @builtin(global_invocation_id) gid : vec3<u32>,
17
+ ) {
18
+ let i = gid.x;
19
+ if (i >= p.num_elements) { return; }
20
+ let v = x[i];
21
+ y[i] = v / (1.0 + exp(-v));
22
+ }
23
+
24
+ // RMSNorm forward: y = x / rms(x) * weight
25
+ // Requires separate uniform for rms norm params.
26
+ struct RMSNormParams {
27
+ num_rows : u32, // number of vectors (batch * seq_len)
28
+ dim : u32, // feature dimension
29
+ eps : f32,
30
+ };
31
+
32
+ @group(0) @binding(0) var<uniform> rms_p : RMSNormParams;
33
+ @group(0) @binding(1) var<storage, read> rms_x : array<f32>;
34
+ @group(0) @binding(2) var<storage, read> rms_w : array<f32>; // scale (dim,)
35
+ @group(0) @binding(3) var<storage, read_write> rms_y : array<f32>;
36
+ @group(0) @binding(4) var<storage, read_write> rms_inv : array<f32>; // cache 1/rms per row
37
+
38
+ @compute @workgroup_size(64, 1, 1)
39
+ fn rmsnorm_forward(
40
+ @builtin(global_invocation_id) gid : vec3<u32>,
41
+ ) {
42
+ let row = gid.x;
43
+ if (row >= rms_p.num_rows) { return; }
44
+
45
+ let D = rms_p.dim;
46
+ let base = row * D;
47
+
48
+ var sq_sum: f32 = 0.0;
49
+ for (var i: u32 = 0u; i < D; i = i + 1u) {
50
+ let v = rms_x[base + i];
51
+ sq_sum = sq_sum + v * v;
52
+ }
53
+ let inv_rms = 1.0 / sqrt(sq_sum / f32(D) + rms_p.eps);
54
+ rms_inv[row] = inv_rms;
55
+
56
+ for (var i: u32 = 0u; i < D; i = i + 1u) {
57
+ rms_y[base + i] = rms_x[base + i] * inv_rms * rms_w[i];
58
+ }
59
+ }
60
+ `;
61
+ // ---- Softmax (row-wise with optional causal mask) ----
62
+ // Standalone softmax used by AttentionBlock for the score matrix.
63
+ // Dispatch: (L, H, B) — one workgroup per (row, head, batch).
64
+ // This version is a simple sequential-within-workgroup implementation;
65
+ // for large L prefer the cooperative version in attention.ts.
66
+ export const SOFTMAX_FORWARD_WGSL = /* wgsl */ `
67
+ struct SoftmaxParams {
68
+ rows : u32, // L
69
+ cols : u32, // L
70
+ causal : u32, // 1 = apply causal mask, 0 = full softmax
71
+ };
72
+
73
+ @group(0) @binding(0) var<uniform> sp : SoftmaxParams;
74
+ @group(0) @binding(1) var<storage, read_write> data : array<f32>;
75
+
76
+ @compute @workgroup_size(1, 1, 1)
77
+ fn softmax_forward_simple(@builtin(global_invocation_id) gid: vec3<u32>) {
78
+ let row = gid.x;
79
+ let head = gid.y;
80
+ let bat = gid.z;
81
+
82
+ if (row >= sp.rows) { return; }
83
+
84
+ let L = sp.cols;
85
+ let base = bat * sp.rows * L + head * L * L + row * L;
86
+ let lim = select(L, row + 1u, sp.causal == 1u);
87
+
88
+ var max_val = -1e38;
89
+ for (var c = 0u; c < lim; c = c + 1u) {
90
+ if (data[base + c] > max_val) { max_val = data[base + c]; }
91
+ }
92
+
93
+ var sum_exp = 0.0;
94
+ for (var c = 0u; c < lim; c = c + 1u) {
95
+ let e = exp(data[base + c] - max_val);
96
+ data[base + c] = e;
97
+ sum_exp = sum_exp + e;
98
+ }
99
+
100
+ let inv = 1.0 / (sum_exp + 1e-12);
101
+ for (var c = 0u; c < lim; c = c + 1u) {
102
+ data[base + c] = data[base + c] * inv;
103
+ }
104
+ // Zero out masked positions
105
+ for (var c = lim; c < L; c = c + 1u) {
106
+ data[base + c] = 0.0;
107
+ }
108
+ }
109
+ `;
110
+ export const SOFTMAX_BACKWARD_WGSL = /* wgsl */ `
111
+ struct SoftmaxParams {
112
+ rows : u32,
113
+ cols : u32,
114
+ causal : u32,
115
+ };
116
+
117
+ @group(0) @binding(0) var<uniform> sp : SoftmaxParams;
118
+ @group(0) @binding(1) var<storage, read> p : array<f32>; // post-softmax probs
119
+ @group(0) @binding(2) var<storage, read> dp : array<f32>; // upstream gradient
120
+ @group(0) @binding(3) var<storage, read_write> dx : array<f32>; // output gradient
121
+
122
+ @compute @workgroup_size(1, 1, 1)
123
+ fn softmax_backward(@builtin(global_invocation_id) gid: vec3<u32>) {
124
+ let row = gid.x;
125
+ let head = gid.y;
126
+ let bat = gid.z;
127
+
128
+ if (row >= sp.rows) { return; }
129
+
130
+ let L = sp.cols;
131
+ let base = bat * sp.rows * L + head * L * L + row * L;
132
+ let lim = select(L, row + 1u, sp.causal == 1u);
133
+
134
+ // dot = sum_i p[i] * dp[i]
135
+ var dot = 0.0;
136
+ for (var i = 0u; i < lim; i = i + 1u) {
137
+ dot = dot + p[base + i] * dp[base + i];
138
+ }
139
+
140
+ for (var i = 0u; i < lim; i = i + 1u) {
141
+ dx[base + i] = p[base + i] * (dp[base + i] - dot);
142
+ }
143
+ }
144
+ `;
145
+ // ---- Backward for SiLU ----
146
+ export const ACTIVATIONS_BACKWARD_WGSL = /* wgsl */ `
147
+
148
+ struct ActParams {
149
+ num_elements : u32,
150
+ };
151
+
152
+ @group(0) @binding(0) var<uniform> p : ActParams;
153
+ @group(0) @binding(1) var<storage, read> x : array<f32>;
154
+ @group(0) @binding(2) var<storage, read> dy : array<f32>;
155
+ @group(0) @binding(3) var<storage, read_write> dx : array<f32>;
156
+
157
+ // d/dx [x * sigmoid(x)] = sigmoid(x) + x * sigmoid(x) * (1 - sigmoid(x))
158
+ // = silu(x)/x + sigmoid(x) * (1 - sigmoid(x)) * x
159
+ // simplified: sigmoid(x) * (1 + x*(1 - sigmoid(x)))
160
+ @compute @workgroup_size(256, 1, 1)
161
+ fn silu_backward(
162
+ @builtin(global_invocation_id) gid : vec3<u32>,
163
+ ) {
164
+ let i = gid.x;
165
+ if (i >= p.num_elements) { return; }
166
+ let v = x[i];
167
+ let sig = 1.0 / (1.0 + exp(-v));
168
+ dx[i] = dy[i] * sig * (1.0 + v * (1.0 - sig));
169
+ }
170
+ `;
171
+ //# sourceMappingURL=activations.js.map