@sparkleideas/ruv-swarm 1.0.18-patch.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1565 -0
- package/bin/ruv-swarm-clean.js +1872 -0
- package/bin/ruv-swarm-memory.js +119 -0
- package/bin/ruv-swarm-secure-heartbeat.js +1549 -0
- package/bin/ruv-swarm-secure.js +1689 -0
- package/package.json +221 -0
- package/src/agent.ts +342 -0
- package/src/benchmark.js +267 -0
- package/src/claude-flow-enhanced.js +839 -0
- package/src/claude-integration/advanced-commands.js +561 -0
- package/src/claude-integration/core.js +112 -0
- package/src/claude-integration/docs.js +1548 -0
- package/src/claude-integration/env-template.js +39 -0
- package/src/claude-integration/index.js +209 -0
- package/src/claude-integration/remote.js +408 -0
- package/src/cli-diagnostics.js +364 -0
- package/src/cognitive-pattern-evolution.js +1317 -0
- package/src/daa-cognition.js +977 -0
- package/src/daa-service.d.ts +298 -0
- package/src/daa-service.js +1116 -0
- package/src/diagnostics.js +533 -0
- package/src/errors.js +528 -0
- package/src/github-coordinator/README.md +193 -0
- package/src/github-coordinator/claude-hooks.js +162 -0
- package/src/github-coordinator/gh-cli-coordinator.js +260 -0
- package/src/hooks/cli.js +82 -0
- package/src/hooks/index.js +1900 -0
- package/src/index-enhanced.d.ts +371 -0
- package/src/index-enhanced.js +734 -0
- package/src/index.d.ts +287 -0
- package/src/index.js +405 -0
- package/src/index.ts +457 -0
- package/src/logger.js +182 -0
- package/src/logging-config.js +179 -0
- package/src/mcp-daa-tools.js +735 -0
- package/src/mcp-tools-benchmarks.js +328 -0
- package/src/mcp-tools-enhanced.js +2863 -0
- package/src/memory-config.js +42 -0
- package/src/meta-learning-framework.js +1359 -0
- package/src/neural-agent.js +830 -0
- package/src/neural-coordination-protocol.js +1363 -0
- package/src/neural-models/README.md +118 -0
- package/src/neural-models/autoencoder.js +543 -0
- package/src/neural-models/base.js +269 -0
- package/src/neural-models/cnn.js +497 -0
- package/src/neural-models/gnn.js +447 -0
- package/src/neural-models/gru.js +536 -0
- package/src/neural-models/index.js +273 -0
- package/src/neural-models/lstm.js +551 -0
- package/src/neural-models/neural-presets-complete.js +1306 -0
- package/src/neural-models/presets/graph.js +392 -0
- package/src/neural-models/presets/index.js +279 -0
- package/src/neural-models/presets/nlp.js +328 -0
- package/src/neural-models/presets/timeseries.js +368 -0
- package/src/neural-models/presets/vision.js +387 -0
- package/src/neural-models/resnet.js +534 -0
- package/src/neural-models/transformer.js +515 -0
- package/src/neural-models/vae.js +489 -0
- package/src/neural-network-manager.js +1938 -0
- package/src/neural-network.ts +296 -0
- package/src/neural.js +574 -0
- package/src/performance-benchmarks.js +898 -0
- package/src/performance.js +458 -0
- package/src/persistence-pooled.js +695 -0
- package/src/persistence.js +480 -0
- package/src/schemas.js +864 -0
- package/src/security.js +218 -0
- package/src/singleton-container.js +183 -0
- package/src/sqlite-pool.js +587 -0
- package/src/sqlite-worker.js +141 -0
- package/src/types.ts +164 -0
- package/src/utils.ts +286 -0
- package/src/wasm-loader.js +601 -0
- package/src/wasm-loader2.js +404 -0
- package/src/wasm-memory-optimizer.js +783 -0
- package/src/wasm-types.d.ts +63 -0
- package/wasm/README.md +347 -0
- package/wasm/neuro-divergent.wasm +0 -0
- package/wasm/package.json +18 -0
- package/wasm/ruv-fann.wasm +0 -0
- package/wasm/ruv_swarm_simd.wasm +0 -0
- package/wasm/ruv_swarm_wasm.d.ts +391 -0
- package/wasm/ruv_swarm_wasm.js +2164 -0
- package/wasm/ruv_swarm_wasm_bg.wasm +0 -0
- package/wasm/ruv_swarm_wasm_bg.wasm.d.ts +123 -0
- package/wasm/wasm-bindings-loader.mjs +435 -0
- package/wasm/wasm-updates.md +684 -0
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transformer Neural Network Model
|
|
3
|
+
* Implements multi-head attention mechanism with positional encoding
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { NeuralModel } from './base.js';
|
|
7
|
+
|
|
8
|
+
class TransformerModel extends NeuralModel {
|
|
9
|
+
constructor(config = {}) {
|
|
10
|
+
super('transformer');
|
|
11
|
+
|
|
12
|
+
// Transformer configuration
|
|
13
|
+
this.config = {
|
|
14
|
+
dimensions: config.dimensions || 512,
|
|
15
|
+
heads: config.heads || 8,
|
|
16
|
+
layers: config.layers || 6,
|
|
17
|
+
ffDimensions: config.ffDimensions || 2048,
|
|
18
|
+
maxSequenceLength: config.maxSequenceLength || 1024,
|
|
19
|
+
vocabularySize: config.vocabularySize || 50000,
|
|
20
|
+
dropoutRate: config.dropoutRate || 0.1,
|
|
21
|
+
...config,
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
// Initialize components
|
|
25
|
+
this.headDimension = Math.floor(this.config.dimensions / this.config.heads);
|
|
26
|
+
this.positionalEncoding = this.createPositionalEncoding();
|
|
27
|
+
this.attentionWeights = new Map();
|
|
28
|
+
this.layerNorms = [];
|
|
29
|
+
this.feedForwardWeights = [];
|
|
30
|
+
|
|
31
|
+
this.initializeWeights();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
initializeWeights() {
|
|
35
|
+
// Initialize multi-head attention weights for each layer
|
|
36
|
+
for (let layer = 0; layer < this.config.layers; layer++) {
|
|
37
|
+
this.attentionWeights.set(`layer_${layer}`, {
|
|
38
|
+
query: this.createWeight([this.config.dimensions, this.config.dimensions]),
|
|
39
|
+
key: this.createWeight([this.config.dimensions, this.config.dimensions]),
|
|
40
|
+
value: this.createWeight([this.config.dimensions, this.config.dimensions]),
|
|
41
|
+
output: this.createWeight([this.config.dimensions, this.config.dimensions]),
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Layer normalization parameters
|
|
45
|
+
this.layerNorms.push({
|
|
46
|
+
gamma: new Float32Array(this.config.dimensions).fill(1.0),
|
|
47
|
+
beta: new Float32Array(this.config.dimensions).fill(0.0),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Feed-forward network weights
|
|
51
|
+
this.feedForwardWeights.push({
|
|
52
|
+
w1: this.createWeight([this.config.dimensions, this.config.ffDimensions]),
|
|
53
|
+
b1: new Float32Array(this.config.ffDimensions).fill(0.0),
|
|
54
|
+
w2: this.createWeight([this.config.ffDimensions, this.config.dimensions]),
|
|
55
|
+
b2: new Float32Array(this.config.dimensions).fill(0.0),
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Output layer weights
|
|
60
|
+
this.outputWeights = {
|
|
61
|
+
projection: this.createWeight([this.config.dimensions, this.config.vocabularySize]),
|
|
62
|
+
bias: new Float32Array(this.config.vocabularySize).fill(0.0),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
createWeight(shape) {
|
|
67
|
+
const size = shape.reduce((a, b) => a * b, 1);
|
|
68
|
+
const weight = new Float32Array(size);
|
|
69
|
+
|
|
70
|
+
// Xavier/Glorot initialization
|
|
71
|
+
const scale = Math.sqrt(2.0 / (shape[0] + shape[1]));
|
|
72
|
+
for (let i = 0; i < size; i++) {
|
|
73
|
+
weight[i] = (Math.random() * 2 - 1) * scale;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return weight;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
createPositionalEncoding() {
|
|
80
|
+
const encoding = new Float32Array(this.config.maxSequenceLength * this.config.dimensions);
|
|
81
|
+
|
|
82
|
+
for (let pos = 0; pos < this.config.maxSequenceLength; pos++) {
|
|
83
|
+
for (let i = 0; i < this.config.dimensions; i++) {
|
|
84
|
+
const angle = pos / Math.pow(10000, (2 * Math.floor(i / 2)) / this.config.dimensions);
|
|
85
|
+
|
|
86
|
+
if (i % 2 === 0) {
|
|
87
|
+
encoding[pos * this.config.dimensions + i] = Math.sin(angle);
|
|
88
|
+
} else {
|
|
89
|
+
encoding[pos * this.config.dimensions + i] = Math.cos(angle);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return encoding;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async forward(input, training = false) {
|
|
98
|
+
// Input should be token indices [batch_size, sequence_length]
|
|
99
|
+
const batchSize = input.shape[0];
|
|
100
|
+
const sequenceLength = input.shape[1];
|
|
101
|
+
|
|
102
|
+
// Token embedding (simplified - in practice would use embedding layer)
|
|
103
|
+
let x = this.tokenEmbedding(input);
|
|
104
|
+
|
|
105
|
+
// Add positional encoding
|
|
106
|
+
x = this.addPositionalEncoding(x, sequenceLength);
|
|
107
|
+
|
|
108
|
+
// Apply dropout if training
|
|
109
|
+
if (training && this.config.dropoutRate > 0) {
|
|
110
|
+
x = this.dropout(x, this.config.dropoutRate);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Process through transformer layers
|
|
114
|
+
for (let layer = 0; layer < this.config.layers; layer++) {
|
|
115
|
+
// Multi-head self-attention
|
|
116
|
+
const attentionOutput = await this.multiHeadAttention(x, layer, training);
|
|
117
|
+
|
|
118
|
+
// Add & Norm
|
|
119
|
+
x = this.layerNorm(this.add(x, attentionOutput), this.layerNorms[layer]);
|
|
120
|
+
|
|
121
|
+
// Feed-forward network
|
|
122
|
+
const ffOutput = this.feedForward(x, layer);
|
|
123
|
+
|
|
124
|
+
// Add & Norm
|
|
125
|
+
x = this.layerNorm(this.add(x, ffOutput), this.layerNorms[layer]);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Final output projection
|
|
129
|
+
const output = this.outputProjection(x);
|
|
130
|
+
|
|
131
|
+
return output;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async multiHeadAttention(input, layerIndex, training = false) {
|
|
135
|
+
const weights = this.attentionWeights.get(`layer_${layerIndex}`);
|
|
136
|
+
const batchSize = input.shape[0];
|
|
137
|
+
const sequenceLength = input.shape[1];
|
|
138
|
+
|
|
139
|
+
// Linear projections for Q, K, V
|
|
140
|
+
const Q = this.matmul(input, weights.query);
|
|
141
|
+
const K = this.matmul(input, weights.key);
|
|
142
|
+
const V = this.matmul(input, weights.value);
|
|
143
|
+
|
|
144
|
+
// Reshape for multi-head attention
|
|
145
|
+
const QHeads = this.reshapeForHeads(Q, batchSize, sequenceLength);
|
|
146
|
+
const KHeads = this.reshapeForHeads(K, batchSize, sequenceLength);
|
|
147
|
+
const VHeads = this.reshapeForHeads(V, batchSize, sequenceLength);
|
|
148
|
+
|
|
149
|
+
// Scaled dot-product attention for each head
|
|
150
|
+
const attentionScores = new Float32Array(batchSize * this.config.heads * sequenceLength * sequenceLength);
|
|
151
|
+
|
|
152
|
+
for (let b = 0; b < batchSize; b++) {
|
|
153
|
+
for (let h = 0; h < this.config.heads; h++) {
|
|
154
|
+
for (let i = 0; i < sequenceLength; i++) {
|
|
155
|
+
for (let j = 0; j < sequenceLength; j++) {
|
|
156
|
+
let score = 0;
|
|
157
|
+
|
|
158
|
+
// Compute dot product
|
|
159
|
+
for (let d = 0; d < this.headDimension; d++) {
|
|
160
|
+
const qIdx = b * this.config.heads * sequenceLength * this.headDimension +
|
|
161
|
+
h * sequenceLength * this.headDimension +
|
|
162
|
+
i * this.headDimension + d;
|
|
163
|
+
const kIdx = b * this.config.heads * sequenceLength * this.headDimension +
|
|
164
|
+
h * sequenceLength * this.headDimension +
|
|
165
|
+
j * this.headDimension + d;
|
|
166
|
+
|
|
167
|
+
score += QHeads[qIdx] * KHeads[kIdx];
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Scale by sqrt(d_k)
|
|
171
|
+
score /= Math.sqrt(this.headDimension);
|
|
172
|
+
|
|
173
|
+
const scoreIdx = b * this.config.heads * sequenceLength * sequenceLength +
|
|
174
|
+
h * sequenceLength * sequenceLength +
|
|
175
|
+
i * sequenceLength + j;
|
|
176
|
+
attentionScores[scoreIdx] = score;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Apply softmax
|
|
183
|
+
const attentionWeights = this.softmax(attentionScores, sequenceLength);
|
|
184
|
+
|
|
185
|
+
// Apply attention weights to values
|
|
186
|
+
const attendedValues = this.applyAttentionWeights(attentionWeights, VHeads, batchSize, sequenceLength);
|
|
187
|
+
|
|
188
|
+
// Concatenate heads and project
|
|
189
|
+
const concatenated = this.concatenateHeads(attendedValues, batchSize, sequenceLength);
|
|
190
|
+
const output = this.matmul(concatenated, weights.output);
|
|
191
|
+
|
|
192
|
+
// Apply dropout if training
|
|
193
|
+
if (training && this.config.dropoutRate > 0) {
|
|
194
|
+
return this.dropout(output, this.config.dropoutRate);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return output;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
feedForward(input, layerIndex) {
|
|
201
|
+
const weights = this.feedForwardWeights[layerIndex];
|
|
202
|
+
|
|
203
|
+
// First linear transformation
|
|
204
|
+
let hidden = this.matmul(input, weights.w1);
|
|
205
|
+
hidden = this.addBias(hidden, weights.b1);
|
|
206
|
+
|
|
207
|
+
// ReLU activation
|
|
208
|
+
hidden = this.relu(hidden);
|
|
209
|
+
|
|
210
|
+
// Second linear transformation
|
|
211
|
+
let output = this.matmul(hidden, weights.w2);
|
|
212
|
+
output = this.addBias(output, weights.b2);
|
|
213
|
+
|
|
214
|
+
return output;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
layerNorm(input, normParams) {
|
|
218
|
+
const { shape } = input;
|
|
219
|
+
const lastDim = shape[shape.length - 1];
|
|
220
|
+
const normalized = new Float32Array(input.length);
|
|
221
|
+
|
|
222
|
+
// Compute mean and variance for each position
|
|
223
|
+
for (let i = 0; i < input.length / lastDim; i++) {
|
|
224
|
+
let mean = 0;
|
|
225
|
+
let variance = 0;
|
|
226
|
+
|
|
227
|
+
// Calculate mean
|
|
228
|
+
for (let j = 0; j < lastDim; j++) {
|
|
229
|
+
mean += input[i * lastDim + j];
|
|
230
|
+
}
|
|
231
|
+
mean /= lastDim;
|
|
232
|
+
|
|
233
|
+
// Calculate variance
|
|
234
|
+
for (let j = 0; j < lastDim; j++) {
|
|
235
|
+
const diff = input[i * lastDim + j] - mean;
|
|
236
|
+
variance += diff * diff;
|
|
237
|
+
}
|
|
238
|
+
variance /= lastDim;
|
|
239
|
+
|
|
240
|
+
// Normalize and apply scale/shift
|
|
241
|
+
const std = Math.sqrt(variance + 1e-5);
|
|
242
|
+
for (let j = 0; j < lastDim; j++) {
|
|
243
|
+
const idx = i * lastDim + j;
|
|
244
|
+
normalized[idx] = normParams.gamma[j] * ((input[idx] - mean) / std) + normParams.beta[j];
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
normalized.shape = shape;
|
|
249
|
+
return normalized;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
async train(trainingData, options = {}) {
|
|
253
|
+
const {
|
|
254
|
+
epochs = 10,
|
|
255
|
+
batchSize = 32,
|
|
256
|
+
learningRate = 0.001,
|
|
257
|
+
warmupSteps = 4000,
|
|
258
|
+
validationSplit = 0.1,
|
|
259
|
+
} = options;
|
|
260
|
+
|
|
261
|
+
const trainingHistory = [];
|
|
262
|
+
|
|
263
|
+
// Split data into training and validation
|
|
264
|
+
const splitIndex = Math.floor(trainingData.length * (1 - validationSplit));
|
|
265
|
+
const trainData = trainingData.slice(0, splitIndex);
|
|
266
|
+
const valData = trainingData.slice(splitIndex);
|
|
267
|
+
|
|
268
|
+
let globalStep = 0;
|
|
269
|
+
|
|
270
|
+
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
271
|
+
let epochLoss = 0;
|
|
272
|
+
let batchCount = 0;
|
|
273
|
+
|
|
274
|
+
// Shuffle training data
|
|
275
|
+
const shuffled = this.shuffle(trainData);
|
|
276
|
+
|
|
277
|
+
// Process batches
|
|
278
|
+
for (let i = 0; i < shuffled.length; i += batchSize) {
|
|
279
|
+
const batch = shuffled.slice(i, Math.min(i + batchSize, shuffled.length));
|
|
280
|
+
|
|
281
|
+
// Adaptive learning rate with warmup
|
|
282
|
+
const currentLR = this.getAdaptiveLearningRate(learningRate, globalStep, warmupSteps);
|
|
283
|
+
|
|
284
|
+
// Forward pass
|
|
285
|
+
const predictions = await this.forward(batch.inputs, true);
|
|
286
|
+
|
|
287
|
+
// Calculate loss
|
|
288
|
+
const loss = this.crossEntropyLoss(predictions, batch.targets);
|
|
289
|
+
epochLoss += loss;
|
|
290
|
+
|
|
291
|
+
// Backward pass (simplified)
|
|
292
|
+
await this.backward(loss, currentLR);
|
|
293
|
+
|
|
294
|
+
globalStep++;
|
|
295
|
+
batchCount++;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Validation
|
|
299
|
+
const valLoss = await this.validate(valData);
|
|
300
|
+
|
|
301
|
+
const avgTrainLoss = epochLoss / batchCount;
|
|
302
|
+
trainingHistory.push({
|
|
303
|
+
epoch: epoch + 1,
|
|
304
|
+
trainLoss: avgTrainLoss,
|
|
305
|
+
valLoss,
|
|
306
|
+
learningRate: this.getAdaptiveLearningRate(learningRate, globalStep, warmupSteps),
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
console.log(`Epoch ${epoch + 1}/${epochs} - Train Loss: ${avgTrainLoss.toFixed(4)}, Val Loss: ${valLoss.toFixed(4)}`);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return {
|
|
313
|
+
history: trainingHistory,
|
|
314
|
+
finalLoss: trainingHistory[trainingHistory.length - 1].trainLoss,
|
|
315
|
+
modelType: 'transformer',
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
getAdaptiveLearningRate(baseLR, step, warmupSteps) {
|
|
320
|
+
// Learning rate schedule with warmup (as in original Transformer paper)
|
|
321
|
+
const arg1 = Math.sqrt(step);
|
|
322
|
+
const arg2 = step * Math.pow(warmupSteps, -1.5);
|
|
323
|
+
const lr = baseLR * Math.min(arg1, arg2) * Math.sqrt(this.config.dimensions);
|
|
324
|
+
return lr;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Utility functions
|
|
328
|
+
tokenEmbedding(tokenIndices) {
|
|
329
|
+
// Simplified token embedding - in practice would use learned embeddings
|
|
330
|
+
const embedded = new Float32Array(tokenIndices.shape[0] * tokenIndices.shape[1] * this.config.dimensions);
|
|
331
|
+
|
|
332
|
+
for (let b = 0; b < tokenIndices.shape[0]; b++) {
|
|
333
|
+
for (let s = 0; s < tokenIndices.shape[1]; s++) {
|
|
334
|
+
for (let d = 0; d < this.config.dimensions; d++) {
|
|
335
|
+
const idx = b * tokenIndices.shape[1] * this.config.dimensions +
|
|
336
|
+
s * this.config.dimensions + d;
|
|
337
|
+
// Simple embedding based on token index
|
|
338
|
+
embedded[idx] = (tokenIndices[b * tokenIndices.shape[1] + s] % this.config.vocabularySize) /
|
|
339
|
+
this.config.vocabularySize + (Math.random() - 0.5) * 0.1;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
embedded.shape = [tokenIndices.shape[0], tokenIndices.shape[1], this.config.dimensions];
|
|
345
|
+
return embedded;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
addPositionalEncoding(embeddings, sequenceLength) {
|
|
349
|
+
const result = new Float32Array(embeddings.length);
|
|
350
|
+
|
|
351
|
+
for (let b = 0; b < embeddings.shape[0]; b++) {
|
|
352
|
+
for (let s = 0; s < sequenceLength; s++) {
|
|
353
|
+
for (let d = 0; d < this.config.dimensions; d++) {
|
|
354
|
+
const embIdx = b * sequenceLength * this.config.dimensions +
|
|
355
|
+
s * this.config.dimensions + d;
|
|
356
|
+
const posIdx = s * this.config.dimensions + d;
|
|
357
|
+
|
|
358
|
+
result[embIdx] = embeddings[embIdx] + this.positionalEncoding[posIdx];
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
result.shape = embeddings.shape;
|
|
364
|
+
return result;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
reshapeForHeads(tensor, batchSize, sequenceLength) {
|
|
368
|
+
// Reshape to [batch, heads, sequence, head_dimension]
|
|
369
|
+
const reshaped = new Float32Array(tensor.length);
|
|
370
|
+
|
|
371
|
+
for (let b = 0; b < batchSize; b++) {
|
|
372
|
+
for (let s = 0; s < sequenceLength; s++) {
|
|
373
|
+
for (let h = 0; h < this.config.heads; h++) {
|
|
374
|
+
for (let d = 0; d < this.headDimension; d++) {
|
|
375
|
+
const srcIdx = b * sequenceLength * this.config.dimensions +
|
|
376
|
+
s * this.config.dimensions +
|
|
377
|
+
h * this.headDimension + d;
|
|
378
|
+
const dstIdx = b * this.config.heads * sequenceLength * this.headDimension +
|
|
379
|
+
h * sequenceLength * this.headDimension +
|
|
380
|
+
s * this.headDimension + d;
|
|
381
|
+
|
|
382
|
+
reshaped[dstIdx] = tensor[srcIdx];
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
return reshaped;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
concatenateHeads(tensor, batchSize, sequenceLength) {
|
|
392
|
+
// Reshape from [batch, heads, sequence, head_dimension] to [batch, sequence, dimensions]
|
|
393
|
+
const concatenated = new Float32Array(batchSize * sequenceLength * this.config.dimensions);
|
|
394
|
+
|
|
395
|
+
for (let b = 0; b < batchSize; b++) {
|
|
396
|
+
for (let s = 0; s < sequenceLength; s++) {
|
|
397
|
+
for (let h = 0; h < this.config.heads; h++) {
|
|
398
|
+
for (let d = 0; d < this.headDimension; d++) {
|
|
399
|
+
const srcIdx = b * this.config.heads * sequenceLength * this.headDimension +
|
|
400
|
+
h * sequenceLength * this.headDimension +
|
|
401
|
+
s * this.headDimension + d;
|
|
402
|
+
const dstIdx = b * sequenceLength * this.config.dimensions +
|
|
403
|
+
s * this.config.dimensions +
|
|
404
|
+
h * this.headDimension + d;
|
|
405
|
+
|
|
406
|
+
concatenated[dstIdx] = tensor[srcIdx];
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
concatenated.shape = [batchSize, sequenceLength, this.config.dimensions];
|
|
413
|
+
return concatenated;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
softmax(scores, sequenceLength) {
|
|
417
|
+
const softmaxScores = new Float32Array(scores.length);
|
|
418
|
+
|
|
419
|
+
// Apply softmax per attention head and query position
|
|
420
|
+
const stride = sequenceLength;
|
|
421
|
+
|
|
422
|
+
for (let i = 0; i < scores.length; i += stride) {
|
|
423
|
+
let maxScore = -Infinity;
|
|
424
|
+
|
|
425
|
+
// Find max for numerical stability
|
|
426
|
+
for (let j = 0; j < stride; j++) {
|
|
427
|
+
maxScore = Math.max(maxScore, scores[i + j]);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Compute exp and sum
|
|
431
|
+
let sumExp = 0;
|
|
432
|
+
for (let j = 0; j < stride; j++) {
|
|
433
|
+
softmaxScores[i + j] = Math.exp(scores[i + j] - maxScore);
|
|
434
|
+
sumExp += softmaxScores[i + j];
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// Normalize
|
|
438
|
+
for (let j = 0; j < stride; j++) {
|
|
439
|
+
softmaxScores[i + j] /= sumExp;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return softmaxScores;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
applyAttentionWeights(weights, values, batchSize, sequenceLength) {
|
|
447
|
+
const output = new Float32Array(batchSize * this.config.heads * sequenceLength * this.headDimension);
|
|
448
|
+
|
|
449
|
+
for (let b = 0; b < batchSize; b++) {
|
|
450
|
+
for (let h = 0; h < this.config.heads; h++) {
|
|
451
|
+
for (let i = 0; i < sequenceLength; i++) {
|
|
452
|
+
for (let d = 0; d < this.headDimension; d++) {
|
|
453
|
+
let sum = 0;
|
|
454
|
+
|
|
455
|
+
for (let j = 0; j < sequenceLength; j++) {
|
|
456
|
+
const weightIdx = b * this.config.heads * sequenceLength * sequenceLength +
|
|
457
|
+
h * sequenceLength * sequenceLength +
|
|
458
|
+
i * sequenceLength + j;
|
|
459
|
+
const valueIdx = b * this.config.heads * sequenceLength * this.headDimension +
|
|
460
|
+
h * sequenceLength * this.headDimension +
|
|
461
|
+
j * this.headDimension + d;
|
|
462
|
+
|
|
463
|
+
sum += weights[weightIdx] * values[valueIdx];
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const outIdx = b * this.config.heads * sequenceLength * this.headDimension +
|
|
467
|
+
h * sequenceLength * this.headDimension +
|
|
468
|
+
i * this.headDimension + d;
|
|
469
|
+
output[outIdx] = sum;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
return output;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
outputProjection(input) {
|
|
479
|
+
// Project to vocabulary size
|
|
480
|
+
return this.matmul(input, this.outputWeights.projection);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
getConfig() {
|
|
484
|
+
return {
|
|
485
|
+
type: 'transformer',
|
|
486
|
+
...this.config,
|
|
487
|
+
parameters: this.countParameters(),
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
countParameters() {
|
|
492
|
+
let count = 0;
|
|
493
|
+
|
|
494
|
+
// Attention weights
|
|
495
|
+
for (let layer = 0; layer < this.config.layers; layer++) {
|
|
496
|
+
count += 4 * this.config.dimensions * this.config.dimensions; // Q, K, V, O projections
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// Feed-forward weights
|
|
500
|
+
count += this.config.layers * (
|
|
501
|
+
this.config.dimensions * this.config.ffDimensions * 2 + // W1, W2
|
|
502
|
+
this.config.ffDimensions + this.config.dimensions // biases
|
|
503
|
+
);
|
|
504
|
+
|
|
505
|
+
// Layer norm parameters
|
|
506
|
+
count += this.config.layers * 2 * this.config.dimensions; // gamma, beta
|
|
507
|
+
|
|
508
|
+
// Output projection
|
|
509
|
+
count += this.config.dimensions * this.config.vocabularySize + this.config.vocabularySize;
|
|
510
|
+
|
|
511
|
+
return count;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
export { TransformerModel };
|