@ruvector/attention-wasm 0.1.0 → 0.1.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/package.json +20 -33
- package/ruvector_attention_wasm.d.ts +422 -0
- package/ruvector_attention_wasm.js +1494 -0
- package/ruvector_attention_wasm_bg.wasm +0 -0
- package/js/index.ts +0 -412
- package/js/types.ts +0 -108
- package/pkg/LICENSE +0 -21
- package/pkg/README.md +0 -193
package/pkg/README.md
DELETED
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
# ruvector-attention-wasm
|
|
2
|
-
|
|
3
|
-
WebAssembly bindings for the ruvector-attention package, providing high-performance attention mechanisms for browser and Node.js environments.
|
|
4
|
-
|
|
5
|
-
## Features
|
|
6
|
-
|
|
7
|
-
- **Multiple Attention Mechanisms**:
|
|
8
|
-
- Scaled Dot-Product Attention
|
|
9
|
-
- Multi-Head Attention
|
|
10
|
-
- Hyperbolic Attention (for hierarchical data)
|
|
11
|
-
- Linear Attention (Performer-style)
|
|
12
|
-
- Flash Attention (memory-efficient)
|
|
13
|
-
- Local-Global Attention
|
|
14
|
-
- Mixture of Experts (MoE) Attention
|
|
15
|
-
|
|
16
|
-
- **Training Utilities**:
|
|
17
|
-
- InfoNCE contrastive loss
|
|
18
|
-
- Adam optimizer
|
|
19
|
-
- AdamW optimizer (with decoupled weight decay)
|
|
20
|
-
- Learning rate scheduler (warmup + cosine decay)
|
|
21
|
-
|
|
22
|
-
- **TypeScript Support**: Full type definitions and modern API
|
|
23
|
-
|
|
24
|
-
## Installation
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
npm install ruvector-attention-wasm
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
## Usage
|
|
31
|
-
|
|
32
|
-
### TypeScript/JavaScript
|
|
33
|
-
|
|
34
|
-
```typescript
|
|
35
|
-
import { initialize, MultiHeadAttention, utils } from 'ruvector-attention-wasm';
|
|
36
|
-
|
|
37
|
-
// Initialize WASM module
|
|
38
|
-
await initialize();
|
|
39
|
-
|
|
40
|
-
// Create multi-head attention
|
|
41
|
-
const attention = new MultiHeadAttention({ dim: 64, numHeads: 8 });
|
|
42
|
-
|
|
43
|
-
// Prepare inputs
|
|
44
|
-
const query = new Float32Array(64);
|
|
45
|
-
const keys = [new Float32Array(64), new Float32Array(64)];
|
|
46
|
-
const values = [new Float32Array(64), new Float32Array(64)];
|
|
47
|
-
|
|
48
|
-
// Compute attention
|
|
49
|
-
const output = attention.compute(query, keys, values);
|
|
50
|
-
|
|
51
|
-
// Use utilities
|
|
52
|
-
const similarity = utils.cosineSimilarity(query, keys[0]);
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
### Advanced Examples
|
|
56
|
-
|
|
57
|
-
#### Hyperbolic Attention
|
|
58
|
-
|
|
59
|
-
```typescript
|
|
60
|
-
import { HyperbolicAttention } from 'ruvector-attention-wasm';
|
|
61
|
-
|
|
62
|
-
const hyperbolic = new HyperbolicAttention({
|
|
63
|
-
dim: 128,
|
|
64
|
-
curvature: 1.0
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
const output = hyperbolic.compute(query, keys, values);
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
#### MoE Attention with Expert Stats
|
|
71
|
-
|
|
72
|
-
```typescript
|
|
73
|
-
import { MoEAttention } from 'ruvector-attention-wasm';
|
|
74
|
-
|
|
75
|
-
const moe = new MoEAttention({
|
|
76
|
-
dim: 64,
|
|
77
|
-
numExperts: 4,
|
|
78
|
-
topK: 2
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
const output = moe.compute(query, keys, values);
|
|
82
|
-
|
|
83
|
-
// Get expert utilization
|
|
84
|
-
const stats = moe.getExpertStats();
|
|
85
|
-
console.log('Load balance:', stats.loadBalance);
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
#### Training with InfoNCE Loss
|
|
89
|
-
|
|
90
|
-
```typescript
|
|
91
|
-
import { InfoNCELoss, Adam } from 'ruvector-attention-wasm';
|
|
92
|
-
|
|
93
|
-
const loss = new InfoNCELoss(0.07);
|
|
94
|
-
const optimizer = new Adam(paramCount, {
|
|
95
|
-
learningRate: 0.001,
|
|
96
|
-
beta1: 0.9,
|
|
97
|
-
beta2: 0.999,
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
// Training loop
|
|
101
|
-
const lossValue = loss.compute(anchor, positive, negatives);
|
|
102
|
-
optimizer.step(params, gradients);
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
#### Learning Rate Scheduling
|
|
106
|
-
|
|
107
|
-
```typescript
|
|
108
|
-
import { LRScheduler, AdamW } from 'ruvector-attention-wasm';
|
|
109
|
-
|
|
110
|
-
const scheduler = new LRScheduler({
|
|
111
|
-
initialLR: 0.001,
|
|
112
|
-
warmupSteps: 1000,
|
|
113
|
-
totalSteps: 10000,
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
const optimizer = new AdamW(paramCount, {
|
|
117
|
-
learningRate: scheduler.getLR(),
|
|
118
|
-
weightDecay: 0.01,
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
// Training loop
|
|
122
|
-
for (let step = 0; step < 10000; step++) {
|
|
123
|
-
optimizer.learningRate = scheduler.getLR();
|
|
124
|
-
optimizer.step(params, gradients);
|
|
125
|
-
scheduler.step();
|
|
126
|
-
}
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
## Building from Source
|
|
130
|
-
|
|
131
|
-
### Prerequisites
|
|
132
|
-
|
|
133
|
-
- Rust 1.70+
|
|
134
|
-
- wasm-pack
|
|
135
|
-
|
|
136
|
-
### Build Commands
|
|
137
|
-
|
|
138
|
-
```bash
|
|
139
|
-
# Build for web (ES modules)
|
|
140
|
-
wasm-pack build --target web --out-dir pkg
|
|
141
|
-
|
|
142
|
-
# Build for Node.js
|
|
143
|
-
wasm-pack build --target nodejs --out-dir pkg-node
|
|
144
|
-
|
|
145
|
-
# Build for bundlers (webpack, vite, etc.)
|
|
146
|
-
wasm-pack build --target bundler --out-dir pkg-bundler
|
|
147
|
-
|
|
148
|
-
# Run tests
|
|
149
|
-
wasm-pack test --headless --firefox
|
|
150
|
-
```
|
|
151
|
-
|
|
152
|
-
## API Reference
|
|
153
|
-
|
|
154
|
-
### Attention Mechanisms
|
|
155
|
-
|
|
156
|
-
- `MultiHeadAttention` - Standard multi-head attention
|
|
157
|
-
- `HyperbolicAttention` - Attention in hyperbolic space
|
|
158
|
-
- `LinearAttention` - Linear complexity attention (Performer)
|
|
159
|
-
- `FlashAttention` - Memory-efficient attention
|
|
160
|
-
- `LocalGlobalAttention` - Combined local and global attention
|
|
161
|
-
- `MoEAttention` - Mixture of Experts attention
|
|
162
|
-
- `scaledDotAttention()` - Functional API for basic attention
|
|
163
|
-
|
|
164
|
-
### Training
|
|
165
|
-
|
|
166
|
-
- `InfoNCELoss` - Contrastive loss function
|
|
167
|
-
- `Adam` - Adam optimizer
|
|
168
|
-
- `AdamW` - AdamW optimizer with weight decay
|
|
169
|
-
- `LRScheduler` - Learning rate scheduler
|
|
170
|
-
|
|
171
|
-
### Utilities
|
|
172
|
-
|
|
173
|
-
- `utils.cosineSimilarity()` - Cosine similarity between vectors
|
|
174
|
-
- `utils.l2Norm()` - L2 norm of a vector
|
|
175
|
-
- `utils.normalize()` - Normalize vector to unit length
|
|
176
|
-
- `utils.softmax()` - Apply softmax transformation
|
|
177
|
-
- `utils.attentionWeights()` - Compute attention weights from scores
|
|
178
|
-
- `utils.batchNormalize()` - Batch normalization
|
|
179
|
-
- `utils.randomOrthogonalMatrix()` - Generate random orthogonal matrix
|
|
180
|
-
- `utils.pairwiseDistances()` - Compute pairwise distances
|
|
181
|
-
|
|
182
|
-
## Performance
|
|
183
|
-
|
|
184
|
-
The WASM bindings provide near-native performance for attention computations:
|
|
185
|
-
|
|
186
|
-
- Optimized with `opt-level = "s"` and LTO
|
|
187
|
-
- SIMD acceleration where available
|
|
188
|
-
- Efficient memory management
|
|
189
|
-
- Zero-copy data transfer where possible
|
|
190
|
-
|
|
191
|
-
## License
|
|
192
|
-
|
|
193
|
-
MIT OR Apache-2.0
|