@sparkleideas/ruv-swarm 1.0.18-patch.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +1565 -0
  2. package/bin/ruv-swarm-clean.js +1872 -0
  3. package/bin/ruv-swarm-memory.js +119 -0
  4. package/bin/ruv-swarm-secure-heartbeat.js +1549 -0
  5. package/bin/ruv-swarm-secure.js +1689 -0
  6. package/package.json +221 -0
  7. package/src/agent.ts +342 -0
  8. package/src/benchmark.js +267 -0
  9. package/src/claude-flow-enhanced.js +839 -0
  10. package/src/claude-integration/advanced-commands.js +561 -0
  11. package/src/claude-integration/core.js +112 -0
  12. package/src/claude-integration/docs.js +1548 -0
  13. package/src/claude-integration/env-template.js +39 -0
  14. package/src/claude-integration/index.js +209 -0
  15. package/src/claude-integration/remote.js +408 -0
  16. package/src/cli-diagnostics.js +364 -0
  17. package/src/cognitive-pattern-evolution.js +1317 -0
  18. package/src/daa-cognition.js +977 -0
  19. package/src/daa-service.d.ts +298 -0
  20. package/src/daa-service.js +1116 -0
  21. package/src/diagnostics.js +533 -0
  22. package/src/errors.js +528 -0
  23. package/src/github-coordinator/README.md +193 -0
  24. package/src/github-coordinator/claude-hooks.js +162 -0
  25. package/src/github-coordinator/gh-cli-coordinator.js +260 -0
  26. package/src/hooks/cli.js +82 -0
  27. package/src/hooks/index.js +1900 -0
  28. package/src/index-enhanced.d.ts +371 -0
  29. package/src/index-enhanced.js +734 -0
  30. package/src/index.d.ts +287 -0
  31. package/src/index.js +405 -0
  32. package/src/index.ts +457 -0
  33. package/src/logger.js +182 -0
  34. package/src/logging-config.js +179 -0
  35. package/src/mcp-daa-tools.js +735 -0
  36. package/src/mcp-tools-benchmarks.js +328 -0
  37. package/src/mcp-tools-enhanced.js +2863 -0
  38. package/src/memory-config.js +42 -0
  39. package/src/meta-learning-framework.js +1359 -0
  40. package/src/neural-agent.js +830 -0
  41. package/src/neural-coordination-protocol.js +1363 -0
  42. package/src/neural-models/README.md +118 -0
  43. package/src/neural-models/autoencoder.js +543 -0
  44. package/src/neural-models/base.js +269 -0
  45. package/src/neural-models/cnn.js +497 -0
  46. package/src/neural-models/gnn.js +447 -0
  47. package/src/neural-models/gru.js +536 -0
  48. package/src/neural-models/index.js +273 -0
  49. package/src/neural-models/lstm.js +551 -0
  50. package/src/neural-models/neural-presets-complete.js +1306 -0
  51. package/src/neural-models/presets/graph.js +392 -0
  52. package/src/neural-models/presets/index.js +279 -0
  53. package/src/neural-models/presets/nlp.js +328 -0
  54. package/src/neural-models/presets/timeseries.js +368 -0
  55. package/src/neural-models/presets/vision.js +387 -0
  56. package/src/neural-models/resnet.js +534 -0
  57. package/src/neural-models/transformer.js +515 -0
  58. package/src/neural-models/vae.js +489 -0
  59. package/src/neural-network-manager.js +1938 -0
  60. package/src/neural-network.ts +296 -0
  61. package/src/neural.js +574 -0
  62. package/src/performance-benchmarks.js +898 -0
  63. package/src/performance.js +458 -0
  64. package/src/persistence-pooled.js +695 -0
  65. package/src/persistence.js +480 -0
  66. package/src/schemas.js +864 -0
  67. package/src/security.js +218 -0
  68. package/src/singleton-container.js +183 -0
  69. package/src/sqlite-pool.js +587 -0
  70. package/src/sqlite-worker.js +141 -0
  71. package/src/types.ts +164 -0
  72. package/src/utils.ts +286 -0
  73. package/src/wasm-loader.js +601 -0
  74. package/src/wasm-loader2.js +404 -0
  75. package/src/wasm-memory-optimizer.js +783 -0
  76. package/src/wasm-types.d.ts +63 -0
  77. package/wasm/README.md +347 -0
  78. package/wasm/neuro-divergent.wasm +0 -0
  79. package/wasm/package.json +18 -0
  80. package/wasm/ruv-fann.wasm +0 -0
  81. package/wasm/ruv_swarm_simd.wasm +0 -0
  82. package/wasm/ruv_swarm_wasm.d.ts +391 -0
  83. package/wasm/ruv_swarm_wasm.js +2164 -0
  84. package/wasm/ruv_swarm_wasm_bg.wasm +0 -0
  85. package/wasm/ruv_swarm_wasm_bg.wasm.d.ts +123 -0
  86. package/wasm/wasm-bindings-loader.mjs +435 -0
  87. package/wasm/wasm-updates.md +684 -0
@@ -0,0 +1,515 @@
1
+ /**
2
+ * Transformer Neural Network Model
3
+ * Implements multi-head attention mechanism with positional encoding
4
+ */
5
+
6
+ import { NeuralModel } from './base.js';
7
+
8
+ class TransformerModel extends NeuralModel {
9
+ constructor(config = {}) {
10
+ super('transformer');
11
+
12
+ // Transformer configuration
13
+ this.config = {
14
+ dimensions: config.dimensions || 512,
15
+ heads: config.heads || 8,
16
+ layers: config.layers || 6,
17
+ ffDimensions: config.ffDimensions || 2048,
18
+ maxSequenceLength: config.maxSequenceLength || 1024,
19
+ vocabularySize: config.vocabularySize || 50000,
20
+ dropoutRate: config.dropoutRate || 0.1,
21
+ ...config,
22
+ };
23
+
24
+ // Initialize components
25
+ this.headDimension = Math.floor(this.config.dimensions / this.config.heads);
26
+ this.positionalEncoding = this.createPositionalEncoding();
27
+ this.attentionWeights = new Map();
28
+ this.layerNorms = [];
29
+ this.feedForwardWeights = [];
30
+
31
+ this.initializeWeights();
32
+ }
33
+
34
+ initializeWeights() {
35
+ // Initialize multi-head attention weights for each layer
36
+ for (let layer = 0; layer < this.config.layers; layer++) {
37
+ this.attentionWeights.set(`layer_${layer}`, {
38
+ query: this.createWeight([this.config.dimensions, this.config.dimensions]),
39
+ key: this.createWeight([this.config.dimensions, this.config.dimensions]),
40
+ value: this.createWeight([this.config.dimensions, this.config.dimensions]),
41
+ output: this.createWeight([this.config.dimensions, this.config.dimensions]),
42
+ });
43
+
44
+ // Layer normalization parameters
45
+ this.layerNorms.push({
46
+ gamma: new Float32Array(this.config.dimensions).fill(1.0),
47
+ beta: new Float32Array(this.config.dimensions).fill(0.0),
48
+ });
49
+
50
+ // Feed-forward network weights
51
+ this.feedForwardWeights.push({
52
+ w1: this.createWeight([this.config.dimensions, this.config.ffDimensions]),
53
+ b1: new Float32Array(this.config.ffDimensions).fill(0.0),
54
+ w2: this.createWeight([this.config.ffDimensions, this.config.dimensions]),
55
+ b2: new Float32Array(this.config.dimensions).fill(0.0),
56
+ });
57
+ }
58
+
59
+ // Output layer weights
60
+ this.outputWeights = {
61
+ projection: this.createWeight([this.config.dimensions, this.config.vocabularySize]),
62
+ bias: new Float32Array(this.config.vocabularySize).fill(0.0),
63
+ };
64
+ }
65
+
66
+ createWeight(shape) {
67
+ const size = shape.reduce((a, b) => a * b, 1);
68
+ const weight = new Float32Array(size);
69
+
70
+ // Xavier/Glorot initialization
71
+ const scale = Math.sqrt(2.0 / (shape[0] + shape[1]));
72
+ for (let i = 0; i < size; i++) {
73
+ weight[i] = (Math.random() * 2 - 1) * scale;
74
+ }
75
+
76
+ return weight;
77
+ }
78
+
79
+ createPositionalEncoding() {
80
+ const encoding = new Float32Array(this.config.maxSequenceLength * this.config.dimensions);
81
+
82
+ for (let pos = 0; pos < this.config.maxSequenceLength; pos++) {
83
+ for (let i = 0; i < this.config.dimensions; i++) {
84
+ const angle = pos / Math.pow(10000, (2 * Math.floor(i / 2)) / this.config.dimensions);
85
+
86
+ if (i % 2 === 0) {
87
+ encoding[pos * this.config.dimensions + i] = Math.sin(angle);
88
+ } else {
89
+ encoding[pos * this.config.dimensions + i] = Math.cos(angle);
90
+ }
91
+ }
92
+ }
93
+
94
+ return encoding;
95
+ }
96
+
97
+ async forward(input, training = false) {
98
+ // Input should be token indices [batch_size, sequence_length]
99
+ const batchSize = input.shape[0];
100
+ const sequenceLength = input.shape[1];
101
+
102
+ // Token embedding (simplified - in practice would use embedding layer)
103
+ let x = this.tokenEmbedding(input);
104
+
105
+ // Add positional encoding
106
+ x = this.addPositionalEncoding(x, sequenceLength);
107
+
108
+ // Apply dropout if training
109
+ if (training && this.config.dropoutRate > 0) {
110
+ x = this.dropout(x, this.config.dropoutRate);
111
+ }
112
+
113
+ // Process through transformer layers
114
+ for (let layer = 0; layer < this.config.layers; layer++) {
115
+ // Multi-head self-attention
116
+ const attentionOutput = await this.multiHeadAttention(x, layer, training);
117
+
118
+ // Add & Norm
119
+ x = this.layerNorm(this.add(x, attentionOutput), this.layerNorms[layer]);
120
+
121
+ // Feed-forward network
122
+ const ffOutput = this.feedForward(x, layer);
123
+
124
+ // Add & Norm
125
+ x = this.layerNorm(this.add(x, ffOutput), this.layerNorms[layer]);
126
+ }
127
+
128
+ // Final output projection
129
+ const output = this.outputProjection(x);
130
+
131
+ return output;
132
+ }
133
+
134
+ async multiHeadAttention(input, layerIndex, training = false) {
135
+ const weights = this.attentionWeights.get(`layer_${layerIndex}`);
136
+ const batchSize = input.shape[0];
137
+ const sequenceLength = input.shape[1];
138
+
139
+ // Linear projections for Q, K, V
140
+ const Q = this.matmul(input, weights.query);
141
+ const K = this.matmul(input, weights.key);
142
+ const V = this.matmul(input, weights.value);
143
+
144
+ // Reshape for multi-head attention
145
+ const QHeads = this.reshapeForHeads(Q, batchSize, sequenceLength);
146
+ const KHeads = this.reshapeForHeads(K, batchSize, sequenceLength);
147
+ const VHeads = this.reshapeForHeads(V, batchSize, sequenceLength);
148
+
149
+ // Scaled dot-product attention for each head
150
+ const attentionScores = new Float32Array(batchSize * this.config.heads * sequenceLength * sequenceLength);
151
+
152
+ for (let b = 0; b < batchSize; b++) {
153
+ for (let h = 0; h < this.config.heads; h++) {
154
+ for (let i = 0; i < sequenceLength; i++) {
155
+ for (let j = 0; j < sequenceLength; j++) {
156
+ let score = 0;
157
+
158
+ // Compute dot product
159
+ for (let d = 0; d < this.headDimension; d++) {
160
+ const qIdx = b * this.config.heads * sequenceLength * this.headDimension +
161
+ h * sequenceLength * this.headDimension +
162
+ i * this.headDimension + d;
163
+ const kIdx = b * this.config.heads * sequenceLength * this.headDimension +
164
+ h * sequenceLength * this.headDimension +
165
+ j * this.headDimension + d;
166
+
167
+ score += QHeads[qIdx] * KHeads[kIdx];
168
+ }
169
+
170
+ // Scale by sqrt(d_k)
171
+ score /= Math.sqrt(this.headDimension);
172
+
173
+ const scoreIdx = b * this.config.heads * sequenceLength * sequenceLength +
174
+ h * sequenceLength * sequenceLength +
175
+ i * sequenceLength + j;
176
+ attentionScores[scoreIdx] = score;
177
+ }
178
+ }
179
+ }
180
+ }
181
+
182
+ // Apply softmax
183
+ const attentionWeights = this.softmax(attentionScores, sequenceLength);
184
+
185
+ // Apply attention weights to values
186
+ const attendedValues = this.applyAttentionWeights(attentionWeights, VHeads, batchSize, sequenceLength);
187
+
188
+ // Concatenate heads and project
189
+ const concatenated = this.concatenateHeads(attendedValues, batchSize, sequenceLength);
190
+ const output = this.matmul(concatenated, weights.output);
191
+
192
+ // Apply dropout if training
193
+ if (training && this.config.dropoutRate > 0) {
194
+ return this.dropout(output, this.config.dropoutRate);
195
+ }
196
+
197
+ return output;
198
+ }
199
+
200
+ feedForward(input, layerIndex) {
201
+ const weights = this.feedForwardWeights[layerIndex];
202
+
203
+ // First linear transformation
204
+ let hidden = this.matmul(input, weights.w1);
205
+ hidden = this.addBias(hidden, weights.b1);
206
+
207
+ // ReLU activation
208
+ hidden = this.relu(hidden);
209
+
210
+ // Second linear transformation
211
+ let output = this.matmul(hidden, weights.w2);
212
+ output = this.addBias(output, weights.b2);
213
+
214
+ return output;
215
+ }
216
+
217
+ layerNorm(input, normParams) {
218
+ const { shape } = input;
219
+ const lastDim = shape[shape.length - 1];
220
+ const normalized = new Float32Array(input.length);
221
+
222
+ // Compute mean and variance for each position
223
+ for (let i = 0; i < input.length / lastDim; i++) {
224
+ let mean = 0;
225
+ let variance = 0;
226
+
227
+ // Calculate mean
228
+ for (let j = 0; j < lastDim; j++) {
229
+ mean += input[i * lastDim + j];
230
+ }
231
+ mean /= lastDim;
232
+
233
+ // Calculate variance
234
+ for (let j = 0; j < lastDim; j++) {
235
+ const diff = input[i * lastDim + j] - mean;
236
+ variance += diff * diff;
237
+ }
238
+ variance /= lastDim;
239
+
240
+ // Normalize and apply scale/shift
241
+ const std = Math.sqrt(variance + 1e-5);
242
+ for (let j = 0; j < lastDim; j++) {
243
+ const idx = i * lastDim + j;
244
+ normalized[idx] = normParams.gamma[j] * ((input[idx] - mean) / std) + normParams.beta[j];
245
+ }
246
+ }
247
+
248
+ normalized.shape = shape;
249
+ return normalized;
250
+ }
251
+
252
+ async train(trainingData, options = {}) {
253
+ const {
254
+ epochs = 10,
255
+ batchSize = 32,
256
+ learningRate = 0.001,
257
+ warmupSteps = 4000,
258
+ validationSplit = 0.1,
259
+ } = options;
260
+
261
+ const trainingHistory = [];
262
+
263
+ // Split data into training and validation
264
+ const splitIndex = Math.floor(trainingData.length * (1 - validationSplit));
265
+ const trainData = trainingData.slice(0, splitIndex);
266
+ const valData = trainingData.slice(splitIndex);
267
+
268
+ let globalStep = 0;
269
+
270
+ for (let epoch = 0; epoch < epochs; epoch++) {
271
+ let epochLoss = 0;
272
+ let batchCount = 0;
273
+
274
+ // Shuffle training data
275
+ const shuffled = this.shuffle(trainData);
276
+
277
+ // Process batches
278
+ for (let i = 0; i < shuffled.length; i += batchSize) {
279
+ const batch = shuffled.slice(i, Math.min(i + batchSize, shuffled.length));
280
+
281
+ // Adaptive learning rate with warmup
282
+ const currentLR = this.getAdaptiveLearningRate(learningRate, globalStep, warmupSteps);
283
+
284
+ // Forward pass
285
+ const predictions = await this.forward(batch.inputs, true);
286
+
287
+ // Calculate loss
288
+ const loss = this.crossEntropyLoss(predictions, batch.targets);
289
+ epochLoss += loss;
290
+
291
+ // Backward pass (simplified)
292
+ await this.backward(loss, currentLR);
293
+
294
+ globalStep++;
295
+ batchCount++;
296
+ }
297
+
298
+ // Validation
299
+ const valLoss = await this.validate(valData);
300
+
301
+ const avgTrainLoss = epochLoss / batchCount;
302
+ trainingHistory.push({
303
+ epoch: epoch + 1,
304
+ trainLoss: avgTrainLoss,
305
+ valLoss,
306
+ learningRate: this.getAdaptiveLearningRate(learningRate, globalStep, warmupSteps),
307
+ });
308
+
309
+ console.log(`Epoch ${epoch + 1}/${epochs} - Train Loss: ${avgTrainLoss.toFixed(4)}, Val Loss: ${valLoss.toFixed(4)}`);
310
+ }
311
+
312
+ return {
313
+ history: trainingHistory,
314
+ finalLoss: trainingHistory[trainingHistory.length - 1].trainLoss,
315
+ modelType: 'transformer',
316
+ };
317
+ }
318
+
319
+ getAdaptiveLearningRate(baseLR, step, warmupSteps) {
320
+ // Learning rate schedule with warmup (as in original Transformer paper)
321
+ const arg1 = Math.sqrt(step);
322
+ const arg2 = step * Math.pow(warmupSteps, -1.5);
323
+ const lr = baseLR * Math.min(arg1, arg2) * Math.sqrt(this.config.dimensions);
324
+ return lr;
325
+ }
326
+
327
+ // Utility functions
328
+ tokenEmbedding(tokenIndices) {
329
+ // Simplified token embedding - in practice would use learned embeddings
330
+ const embedded = new Float32Array(tokenIndices.shape[0] * tokenIndices.shape[1] * this.config.dimensions);
331
+
332
+ for (let b = 0; b < tokenIndices.shape[0]; b++) {
333
+ for (let s = 0; s < tokenIndices.shape[1]; s++) {
334
+ for (let d = 0; d < this.config.dimensions; d++) {
335
+ const idx = b * tokenIndices.shape[1] * this.config.dimensions +
336
+ s * this.config.dimensions + d;
337
+ // Simple embedding based on token index
338
+ embedded[idx] = (tokenIndices[b * tokenIndices.shape[1] + s] % this.config.vocabularySize) /
339
+ this.config.vocabularySize + (Math.random() - 0.5) * 0.1;
340
+ }
341
+ }
342
+ }
343
+
344
+ embedded.shape = [tokenIndices.shape[0], tokenIndices.shape[1], this.config.dimensions];
345
+ return embedded;
346
+ }
347
+
348
+ addPositionalEncoding(embeddings, sequenceLength) {
349
+ const result = new Float32Array(embeddings.length);
350
+
351
+ for (let b = 0; b < embeddings.shape[0]; b++) {
352
+ for (let s = 0; s < sequenceLength; s++) {
353
+ for (let d = 0; d < this.config.dimensions; d++) {
354
+ const embIdx = b * sequenceLength * this.config.dimensions +
355
+ s * this.config.dimensions + d;
356
+ const posIdx = s * this.config.dimensions + d;
357
+
358
+ result[embIdx] = embeddings[embIdx] + this.positionalEncoding[posIdx];
359
+ }
360
+ }
361
+ }
362
+
363
+ result.shape = embeddings.shape;
364
+ return result;
365
+ }
366
+
367
+ reshapeForHeads(tensor, batchSize, sequenceLength) {
368
+ // Reshape to [batch, heads, sequence, head_dimension]
369
+ const reshaped = new Float32Array(tensor.length);
370
+
371
+ for (let b = 0; b < batchSize; b++) {
372
+ for (let s = 0; s < sequenceLength; s++) {
373
+ for (let h = 0; h < this.config.heads; h++) {
374
+ for (let d = 0; d < this.headDimension; d++) {
375
+ const srcIdx = b * sequenceLength * this.config.dimensions +
376
+ s * this.config.dimensions +
377
+ h * this.headDimension + d;
378
+ const dstIdx = b * this.config.heads * sequenceLength * this.headDimension +
379
+ h * sequenceLength * this.headDimension +
380
+ s * this.headDimension + d;
381
+
382
+ reshaped[dstIdx] = tensor[srcIdx];
383
+ }
384
+ }
385
+ }
386
+ }
387
+
388
+ return reshaped;
389
+ }
390
+
391
+ concatenateHeads(tensor, batchSize, sequenceLength) {
392
+ // Reshape from [batch, heads, sequence, head_dimension] to [batch, sequence, dimensions]
393
+ const concatenated = new Float32Array(batchSize * sequenceLength * this.config.dimensions);
394
+
395
+ for (let b = 0; b < batchSize; b++) {
396
+ for (let s = 0; s < sequenceLength; s++) {
397
+ for (let h = 0; h < this.config.heads; h++) {
398
+ for (let d = 0; d < this.headDimension; d++) {
399
+ const srcIdx = b * this.config.heads * sequenceLength * this.headDimension +
400
+ h * sequenceLength * this.headDimension +
401
+ s * this.headDimension + d;
402
+ const dstIdx = b * sequenceLength * this.config.dimensions +
403
+ s * this.config.dimensions +
404
+ h * this.headDimension + d;
405
+
406
+ concatenated[dstIdx] = tensor[srcIdx];
407
+ }
408
+ }
409
+ }
410
+ }
411
+
412
+ concatenated.shape = [batchSize, sequenceLength, this.config.dimensions];
413
+ return concatenated;
414
+ }
415
+
416
+ softmax(scores, sequenceLength) {
417
+ const softmaxScores = new Float32Array(scores.length);
418
+
419
+ // Apply softmax per attention head and query position
420
+ const stride = sequenceLength;
421
+
422
+ for (let i = 0; i < scores.length; i += stride) {
423
+ let maxScore = -Infinity;
424
+
425
+ // Find max for numerical stability
426
+ for (let j = 0; j < stride; j++) {
427
+ maxScore = Math.max(maxScore, scores[i + j]);
428
+ }
429
+
430
+ // Compute exp and sum
431
+ let sumExp = 0;
432
+ for (let j = 0; j < stride; j++) {
433
+ softmaxScores[i + j] = Math.exp(scores[i + j] - maxScore);
434
+ sumExp += softmaxScores[i + j];
435
+ }
436
+
437
+ // Normalize
438
+ for (let j = 0; j < stride; j++) {
439
+ softmaxScores[i + j] /= sumExp;
440
+ }
441
+ }
442
+
443
+ return softmaxScores;
444
+ }
445
+
446
+ applyAttentionWeights(weights, values, batchSize, sequenceLength) {
447
+ const output = new Float32Array(batchSize * this.config.heads * sequenceLength * this.headDimension);
448
+
449
+ for (let b = 0; b < batchSize; b++) {
450
+ for (let h = 0; h < this.config.heads; h++) {
451
+ for (let i = 0; i < sequenceLength; i++) {
452
+ for (let d = 0; d < this.headDimension; d++) {
453
+ let sum = 0;
454
+
455
+ for (let j = 0; j < sequenceLength; j++) {
456
+ const weightIdx = b * this.config.heads * sequenceLength * sequenceLength +
457
+ h * sequenceLength * sequenceLength +
458
+ i * sequenceLength + j;
459
+ const valueIdx = b * this.config.heads * sequenceLength * this.headDimension +
460
+ h * sequenceLength * this.headDimension +
461
+ j * this.headDimension + d;
462
+
463
+ sum += weights[weightIdx] * values[valueIdx];
464
+ }
465
+
466
+ const outIdx = b * this.config.heads * sequenceLength * this.headDimension +
467
+ h * sequenceLength * this.headDimension +
468
+ i * this.headDimension + d;
469
+ output[outIdx] = sum;
470
+ }
471
+ }
472
+ }
473
+ }
474
+
475
+ return output;
476
+ }
477
+
478
+ outputProjection(input) {
479
+ // Project to vocabulary size
480
+ return this.matmul(input, this.outputWeights.projection);
481
+ }
482
+
483
+ getConfig() {
484
+ return {
485
+ type: 'transformer',
486
+ ...this.config,
487
+ parameters: this.countParameters(),
488
+ };
489
+ }
490
+
491
+ countParameters() {
492
+ let count = 0;
493
+
494
+ // Attention weights
495
+ for (let layer = 0; layer < this.config.layers; layer++) {
496
+ count += 4 * this.config.dimensions * this.config.dimensions; // Q, K, V, O projections
497
+ }
498
+
499
+ // Feed-forward weights
500
+ count += this.config.layers * (
501
+ this.config.dimensions * this.config.ffDimensions * 2 + // W1, W2
502
+ this.config.ffDimensions + this.config.dimensions // biases
503
+ );
504
+
505
+ // Layer norm parameters
506
+ count += this.config.layers * 2 * this.config.dimensions; // gamma, beta
507
+
508
+ // Output projection
509
+ count += this.config.dimensions * this.config.vocabularySize + this.config.vocabularySize;
510
+
511
+ return count;
512
+ }
513
+ }
514
+
515
+ export { TransformerModel };