@sparkleideas/ruv-swarm 1.0.18-patch.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1565 -0
- package/bin/ruv-swarm-clean.js +1872 -0
- package/bin/ruv-swarm-memory.js +119 -0
- package/bin/ruv-swarm-secure-heartbeat.js +1549 -0
- package/bin/ruv-swarm-secure.js +1689 -0
- package/package.json +221 -0
- package/src/agent.ts +342 -0
- package/src/benchmark.js +267 -0
- package/src/claude-flow-enhanced.js +839 -0
- package/src/claude-integration/advanced-commands.js +561 -0
- package/src/claude-integration/core.js +112 -0
- package/src/claude-integration/docs.js +1548 -0
- package/src/claude-integration/env-template.js +39 -0
- package/src/claude-integration/index.js +209 -0
- package/src/claude-integration/remote.js +408 -0
- package/src/cli-diagnostics.js +364 -0
- package/src/cognitive-pattern-evolution.js +1317 -0
- package/src/daa-cognition.js +977 -0
- package/src/daa-service.d.ts +298 -0
- package/src/daa-service.js +1116 -0
- package/src/diagnostics.js +533 -0
- package/src/errors.js +528 -0
- package/src/github-coordinator/README.md +193 -0
- package/src/github-coordinator/claude-hooks.js +162 -0
- package/src/github-coordinator/gh-cli-coordinator.js +260 -0
- package/src/hooks/cli.js +82 -0
- package/src/hooks/index.js +1900 -0
- package/src/index-enhanced.d.ts +371 -0
- package/src/index-enhanced.js +734 -0
- package/src/index.d.ts +287 -0
- package/src/index.js +405 -0
- package/src/index.ts +457 -0
- package/src/logger.js +182 -0
- package/src/logging-config.js +179 -0
- package/src/mcp-daa-tools.js +735 -0
- package/src/mcp-tools-benchmarks.js +328 -0
- package/src/mcp-tools-enhanced.js +2863 -0
- package/src/memory-config.js +42 -0
- package/src/meta-learning-framework.js +1359 -0
- package/src/neural-agent.js +830 -0
- package/src/neural-coordination-protocol.js +1363 -0
- package/src/neural-models/README.md +118 -0
- package/src/neural-models/autoencoder.js +543 -0
- package/src/neural-models/base.js +269 -0
- package/src/neural-models/cnn.js +497 -0
- package/src/neural-models/gnn.js +447 -0
- package/src/neural-models/gru.js +536 -0
- package/src/neural-models/index.js +273 -0
- package/src/neural-models/lstm.js +551 -0
- package/src/neural-models/neural-presets-complete.js +1306 -0
- package/src/neural-models/presets/graph.js +392 -0
- package/src/neural-models/presets/index.js +279 -0
- package/src/neural-models/presets/nlp.js +328 -0
- package/src/neural-models/presets/timeseries.js +368 -0
- package/src/neural-models/presets/vision.js +387 -0
- package/src/neural-models/resnet.js +534 -0
- package/src/neural-models/transformer.js +515 -0
- package/src/neural-models/vae.js +489 -0
- package/src/neural-network-manager.js +1938 -0
- package/src/neural-network.ts +296 -0
- package/src/neural.js +574 -0
- package/src/performance-benchmarks.js +898 -0
- package/src/performance.js +458 -0
- package/src/persistence-pooled.js +695 -0
- package/src/persistence.js +480 -0
- package/src/schemas.js +864 -0
- package/src/security.js +218 -0
- package/src/singleton-container.js +183 -0
- package/src/sqlite-pool.js +587 -0
- package/src/sqlite-worker.js +141 -0
- package/src/types.ts +164 -0
- package/src/utils.ts +286 -0
- package/src/wasm-loader.js +601 -0
- package/src/wasm-loader2.js +404 -0
- package/src/wasm-memory-optimizer.js +783 -0
- package/src/wasm-types.d.ts +63 -0
- package/wasm/README.md +347 -0
- package/wasm/neuro-divergent.wasm +0 -0
- package/wasm/package.json +18 -0
- package/wasm/ruv-fann.wasm +0 -0
- package/wasm/ruv_swarm_simd.wasm +0 -0
- package/wasm/ruv_swarm_wasm.d.ts +391 -0
- package/wasm/ruv_swarm_wasm.js +2164 -0
- package/wasm/ruv_swarm_wasm_bg.wasm +0 -0
- package/wasm/ruv_swarm_wasm_bg.wasm.d.ts +123 -0
- package/wasm/wasm-bindings-loader.mjs +435 -0
- package/wasm/wasm-updates.md +684 -0
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gated Recurrent Unit (GRU) Model
|
|
3
|
+
* Alternative to LSTM with fewer parameters
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { NeuralModel } from './base.js';
|
|
7
|
+
|
|
8
|
+
class GRUModel extends NeuralModel {
|
|
9
|
+
constructor(config = {}) {
|
|
10
|
+
super('gru');
|
|
11
|
+
|
|
12
|
+
// GRU configuration
|
|
13
|
+
this.config = {
|
|
14
|
+
inputSize: config.inputSize || 128,
|
|
15
|
+
hiddenSize: config.hiddenSize || 256,
|
|
16
|
+
numLayers: config.numLayers || 2,
|
|
17
|
+
outputSize: config.outputSize || 10,
|
|
18
|
+
dropoutRate: config.dropoutRate || 0.2,
|
|
19
|
+
bidirectional: config.bidirectional || false,
|
|
20
|
+
...config,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
// Initialize GRU gates and weights
|
|
24
|
+
this.gates = [];
|
|
25
|
+
this.outputLayer = null;
|
|
26
|
+
|
|
27
|
+
this.initializeWeights();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
initializeWeights() {
|
|
31
|
+
const directions = this.config.bidirectional ? 2 : 1;
|
|
32
|
+
|
|
33
|
+
// Initialize weights for each layer and direction
|
|
34
|
+
for (let layer = 0; layer < this.config.numLayers; layer++) {
|
|
35
|
+
const layerGates = [];
|
|
36
|
+
|
|
37
|
+
for (let dir = 0; dir < directions; dir++) {
|
|
38
|
+
const inputSize = layer === 0 ? this.config.inputSize :
|
|
39
|
+
this.config.hiddenSize * directions;
|
|
40
|
+
|
|
41
|
+
// GRU has 3 gates: reset, update, and candidate
|
|
42
|
+
const gates = {
|
|
43
|
+
// Reset gate
|
|
44
|
+
resetInput: this.createWeight([inputSize, this.config.hiddenSize]),
|
|
45
|
+
resetHidden: this.createWeight([this.config.hiddenSize, this.config.hiddenSize]),
|
|
46
|
+
resetBias: new Float32Array(this.config.hiddenSize).fill(0),
|
|
47
|
+
|
|
48
|
+
// Update gate
|
|
49
|
+
updateInput: this.createWeight([inputSize, this.config.hiddenSize]),
|
|
50
|
+
updateHidden: this.createWeight([this.config.hiddenSize, this.config.hiddenSize]),
|
|
51
|
+
updateBias: new Float32Array(this.config.hiddenSize).fill(0),
|
|
52
|
+
|
|
53
|
+
// Candidate hidden state
|
|
54
|
+
candidateInput: this.createWeight([inputSize, this.config.hiddenSize]),
|
|
55
|
+
candidateHidden: this.createWeight([this.config.hiddenSize, this.config.hiddenSize]),
|
|
56
|
+
candidateBias: new Float32Array(this.config.hiddenSize).fill(0),
|
|
57
|
+
|
|
58
|
+
direction: dir === 0 ? 'forward' : 'backward',
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
layerGates.push(gates);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
this.gates.push(layerGates);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Output layer
|
|
68
|
+
const outputInputSize = this.config.hiddenSize * directions;
|
|
69
|
+
this.outputLayer = {
|
|
70
|
+
weight: this.createWeight([outputInputSize, this.config.outputSize]),
|
|
71
|
+
bias: new Float32Array(this.config.outputSize).fill(0),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
createWeight(shape) {
|
|
76
|
+
const size = shape.reduce((a, b) => a * b, 1);
|
|
77
|
+
const weight = new Float32Array(size);
|
|
78
|
+
|
|
79
|
+
// Xavier initialization
|
|
80
|
+
const scale = Math.sqrt(2.0 / (shape[0] + shape[1]));
|
|
81
|
+
|
|
82
|
+
for (let i = 0; i < size; i++) {
|
|
83
|
+
weight[i] = (Math.random() * 2 - 1) * scale;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
weight.shape = shape;
|
|
87
|
+
return weight;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async forward(input, training = false) {
|
|
91
|
+
const batchSize = input.shape[0];
|
|
92
|
+
const sequenceLength = input.shape[1];
|
|
93
|
+
|
|
94
|
+
// Initialize hidden states for all layers
|
|
95
|
+
const hiddenStates = this.initializeHiddenStates(batchSize);
|
|
96
|
+
|
|
97
|
+
// Process through GRU layers
|
|
98
|
+
let layerInput = input;
|
|
99
|
+
|
|
100
|
+
for (let layer = 0; layer < this.config.numLayers; layer++) {
|
|
101
|
+
const layerOutput = await this.processLayer(
|
|
102
|
+
layerInput,
|
|
103
|
+
hiddenStates[layer],
|
|
104
|
+
layer,
|
|
105
|
+
training,
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
layerInput = layerOutput.output;
|
|
109
|
+
hiddenStates[layer] = layerOutput.finalHidden;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Apply output layer to final hidden states
|
|
113
|
+
const output = this.applyOutputLayer(layerInput);
|
|
114
|
+
|
|
115
|
+
return output;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
initializeHiddenStates(batchSize) {
|
|
119
|
+
const hiddenStates = [];
|
|
120
|
+
const directions = this.config.bidirectional ? 2 : 1;
|
|
121
|
+
|
|
122
|
+
for (let layer = 0; layer < this.config.numLayers; layer++) {
|
|
123
|
+
const layerHidden = [];
|
|
124
|
+
|
|
125
|
+
for (let dir = 0; dir < directions; dir++) {
|
|
126
|
+
const hidden = new Float32Array(batchSize * this.config.hiddenSize);
|
|
127
|
+
hidden.shape = [batchSize, this.config.hiddenSize];
|
|
128
|
+
layerHidden.push(hidden);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
hiddenStates.push(layerHidden);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return hiddenStates;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async processLayer(input, hiddenStates, layerIndex, training) {
|
|
138
|
+
const batchSize = input.shape[0];
|
|
139
|
+
const sequenceLength = input.shape[1];
|
|
140
|
+
const inputSize = input.shape[2];
|
|
141
|
+
|
|
142
|
+
const directions = this.config.bidirectional ? 2 : 1;
|
|
143
|
+
const outputs = [];
|
|
144
|
+
|
|
145
|
+
for (let dir = 0; dir < directions; dir++) {
|
|
146
|
+
const gates = this.gates[layerIndex][dir];
|
|
147
|
+
const isBackward = dir === 1;
|
|
148
|
+
|
|
149
|
+
// Process sequence in appropriate direction
|
|
150
|
+
const sequenceOutput = new Float32Array(
|
|
151
|
+
batchSize * sequenceLength * this.config.hiddenSize,
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
let hidden = hiddenStates[dir];
|
|
155
|
+
|
|
156
|
+
for (let t = 0; t < sequenceLength; t++) {
|
|
157
|
+
const timeStep = isBackward ? sequenceLength - 1 - t : t;
|
|
158
|
+
|
|
159
|
+
// Extract input at current time step
|
|
160
|
+
const xt = new Float32Array(batchSize * inputSize);
|
|
161
|
+
for (let b = 0; b < batchSize; b++) {
|
|
162
|
+
for (let i = 0; i < inputSize; i++) {
|
|
163
|
+
xt[b * inputSize + i] = input[b * sequenceLength * inputSize +
|
|
164
|
+
timeStep * inputSize + i];
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
xt.shape = [batchSize, inputSize];
|
|
168
|
+
|
|
169
|
+
// GRU computation
|
|
170
|
+
const gruOutput = this.gruCell(xt, hidden, gates);
|
|
171
|
+
hidden = gruOutput;
|
|
172
|
+
|
|
173
|
+
// Store output
|
|
174
|
+
for (let b = 0; b < batchSize; b++) {
|
|
175
|
+
for (let h = 0; h < this.config.hiddenSize; h++) {
|
|
176
|
+
sequenceOutput[b * sequenceLength * this.config.hiddenSize +
|
|
177
|
+
timeStep * this.config.hiddenSize + h] =
|
|
178
|
+
hidden[b * this.config.hiddenSize + h];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
sequenceOutput.shape = [batchSize, sequenceLength, this.config.hiddenSize];
|
|
184
|
+
outputs.push(sequenceOutput);
|
|
185
|
+
hiddenStates[dir] = hidden;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Concatenate outputs if bidirectional
|
|
189
|
+
let finalOutput;
|
|
190
|
+
if (this.config.bidirectional) {
|
|
191
|
+
finalOutput = this.concatenateBidirectional(outputs[0], outputs[1]);
|
|
192
|
+
} else {
|
|
193
|
+
finalOutput = outputs[0];
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Apply dropout if training
|
|
197
|
+
if (training && this.config.dropoutRate > 0 && layerIndex < this.config.numLayers - 1) {
|
|
198
|
+
finalOutput = this.dropout(finalOutput, this.config.dropoutRate);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
output: finalOutput,
|
|
203
|
+
finalHidden: hiddenStates,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
gruCell(input, hidden, gates) {
|
|
208
|
+
const batchSize = input.shape[0];
|
|
209
|
+
const inputSize = input.shape[1];
|
|
210
|
+
const { hiddenSize } = this.config;
|
|
211
|
+
|
|
212
|
+
// Reset gate: r = σ(W_ir @ x + W_hr @ h + b_r)
|
|
213
|
+
const resetGate = new Float32Array(batchSize * hiddenSize);
|
|
214
|
+
for (let b = 0; b < batchSize; b++) {
|
|
215
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
216
|
+
let sum = gates.resetBias[h];
|
|
217
|
+
|
|
218
|
+
// Input contribution
|
|
219
|
+
for (let i = 0; i < inputSize; i++) {
|
|
220
|
+
sum += input[b * inputSize + i] *
|
|
221
|
+
gates.resetInput[i * hiddenSize + h];
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Hidden contribution
|
|
225
|
+
for (let hh = 0; hh < hiddenSize; hh++) {
|
|
226
|
+
sum += hidden[b * hiddenSize + hh] *
|
|
227
|
+
gates.resetHidden[hh * hiddenSize + h];
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
resetGate[b * hiddenSize + h] = 1 / (1 + Math.exp(-sum)); // sigmoid
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Update gate: z = σ(W_iz @ x + W_hz @ h + b_z)
|
|
235
|
+
const updateGate = new Float32Array(batchSize * hiddenSize);
|
|
236
|
+
for (let b = 0; b < batchSize; b++) {
|
|
237
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
238
|
+
let sum = gates.updateBias[h];
|
|
239
|
+
|
|
240
|
+
// Input contribution
|
|
241
|
+
for (let i = 0; i < inputSize; i++) {
|
|
242
|
+
sum += input[b * inputSize + i] *
|
|
243
|
+
gates.updateInput[i * hiddenSize + h];
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Hidden contribution
|
|
247
|
+
for (let hh = 0; hh < hiddenSize; hh++) {
|
|
248
|
+
sum += hidden[b * hiddenSize + hh] *
|
|
249
|
+
gates.updateHidden[hh * hiddenSize + h];
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
updateGate[b * hiddenSize + h] = 1 / (1 + Math.exp(-sum)); // sigmoid
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Candidate hidden state: h_tilde = tanh(W_ih @ x + W_hh @ (r * h) + b_h)
|
|
257
|
+
const candidateHidden = new Float32Array(batchSize * hiddenSize);
|
|
258
|
+
for (let b = 0; b < batchSize; b++) {
|
|
259
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
260
|
+
let sum = gates.candidateBias[h];
|
|
261
|
+
|
|
262
|
+
// Input contribution
|
|
263
|
+
for (let i = 0; i < inputSize; i++) {
|
|
264
|
+
sum += input[b * inputSize + i] *
|
|
265
|
+
gates.candidateInput[i * hiddenSize + h];
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Hidden contribution (modulated by reset gate)
|
|
269
|
+
for (let hh = 0; hh < hiddenSize; hh++) {
|
|
270
|
+
const modulatedHidden = resetGate[b * hiddenSize + hh] *
|
|
271
|
+
hidden[b * hiddenSize + hh];
|
|
272
|
+
sum += modulatedHidden * gates.candidateHidden[hh * hiddenSize + h];
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
candidateHidden[b * hiddenSize + h] = Math.tanh(sum);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// New hidden state: h_t = z * h_{t-1} + (1 - z) * h_tilde
|
|
280
|
+
const newHidden = new Float32Array(batchSize * hiddenSize);
|
|
281
|
+
for (let b = 0; b < batchSize; b++) {
|
|
282
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
283
|
+
const idx = b * hiddenSize + h;
|
|
284
|
+
const z = updateGate[idx];
|
|
285
|
+
newHidden[idx] = z * hidden[idx] + (1 - z) * candidateHidden[idx];
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
newHidden.shape = [batchSize, hiddenSize];
|
|
290
|
+
return newHidden;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
concatenateBidirectional(forward, backward) {
|
|
294
|
+
const [batchSize, sequenceLength, hiddenSize] = forward.shape;
|
|
295
|
+
const output = new Float32Array(batchSize * sequenceLength * hiddenSize * 2);
|
|
296
|
+
|
|
297
|
+
for (let b = 0; b < batchSize; b++) {
|
|
298
|
+
for (let t = 0; t < sequenceLength; t++) {
|
|
299
|
+
// Copy forward direction
|
|
300
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
301
|
+
output[b * sequenceLength * hiddenSize * 2 +
|
|
302
|
+
t * hiddenSize * 2 + h] =
|
|
303
|
+
forward[b * sequenceLength * hiddenSize +
|
|
304
|
+
t * hiddenSize + h];
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Copy backward direction
|
|
308
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
309
|
+
output[b * sequenceLength * hiddenSize * 2 +
|
|
310
|
+
t * hiddenSize * 2 + hiddenSize + h] =
|
|
311
|
+
backward[b * sequenceLength * hiddenSize +
|
|
312
|
+
t * hiddenSize + h];
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
output.shape = [batchSize, sequenceLength, hiddenSize * 2];
|
|
318
|
+
return output;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
applyOutputLayer(input) {
|
|
322
|
+
const [batchSize, sequenceLength, hiddenSize] = input.shape;
|
|
323
|
+
|
|
324
|
+
// Apply output layer to last time step
|
|
325
|
+
const lastTimeStep = new Float32Array(batchSize * hiddenSize);
|
|
326
|
+
|
|
327
|
+
for (let b = 0; b < batchSize; b++) {
|
|
328
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
329
|
+
lastTimeStep[b * hiddenSize + h] =
|
|
330
|
+
input[b * sequenceLength * hiddenSize +
|
|
331
|
+
(sequenceLength - 1) * hiddenSize + h];
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
lastTimeStep.shape = [batchSize, hiddenSize];
|
|
336
|
+
|
|
337
|
+
// Linear transformation
|
|
338
|
+
const output = new Float32Array(batchSize * this.config.outputSize);
|
|
339
|
+
|
|
340
|
+
for (let b = 0; b < batchSize; b++) {
|
|
341
|
+
for (let o = 0; o < this.config.outputSize; o++) {
|
|
342
|
+
let sum = this.outputLayer.bias[o];
|
|
343
|
+
|
|
344
|
+
for (let h = 0; h < hiddenSize; h++) {
|
|
345
|
+
sum += lastTimeStep[b * hiddenSize + h] *
|
|
346
|
+
this.outputLayer.weight[h * this.config.outputSize + o];
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
output[b * this.config.outputSize + o] = sum;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
output.shape = [batchSize, this.config.outputSize];
|
|
354
|
+
return output;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
async train(trainingData, options = {}) {
|
|
358
|
+
const {
|
|
359
|
+
epochs = 10,
|
|
360
|
+
batchSize = 32,
|
|
361
|
+
learningRate = 0.001,
|
|
362
|
+
gradientClipping = 5.0,
|
|
363
|
+
validationSplit = 0.1,
|
|
364
|
+
} = options;
|
|
365
|
+
|
|
366
|
+
const trainingHistory = [];
|
|
367
|
+
|
|
368
|
+
// Split data
|
|
369
|
+
const splitIndex = Math.floor(trainingData.length * (1 - validationSplit));
|
|
370
|
+
const trainData = trainingData.slice(0, splitIndex);
|
|
371
|
+
const valData = trainingData.slice(splitIndex);
|
|
372
|
+
|
|
373
|
+
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
374
|
+
let epochLoss = 0;
|
|
375
|
+
let epochAccuracy = 0;
|
|
376
|
+
let batchCount = 0;
|
|
377
|
+
|
|
378
|
+
// Shuffle training data
|
|
379
|
+
const shuffled = this.shuffle(trainData);
|
|
380
|
+
|
|
381
|
+
// Process batches
|
|
382
|
+
for (let i = 0; i < shuffled.length; i += batchSize) {
|
|
383
|
+
const batch = shuffled.slice(i, Math.min(i + batchSize, shuffled.length));
|
|
384
|
+
|
|
385
|
+
// Forward pass
|
|
386
|
+
const predictions = await this.forward(batch.inputs, true);
|
|
387
|
+
|
|
388
|
+
// Calculate loss
|
|
389
|
+
const loss = this.crossEntropyLoss(predictions, batch.targets);
|
|
390
|
+
epochLoss += loss;
|
|
391
|
+
|
|
392
|
+
// Calculate accuracy for classification
|
|
393
|
+
if (this.config.outputSize > 1) {
|
|
394
|
+
const accuracy = this.calculateAccuracy(predictions, batch.targets);
|
|
395
|
+
epochAccuracy += accuracy;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Backward pass with gradient clipping
|
|
399
|
+
await this.backward(loss, learningRate, gradientClipping);
|
|
400
|
+
|
|
401
|
+
batchCount++;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Validation
|
|
405
|
+
const valMetrics = await this.evaluate(valData);
|
|
406
|
+
|
|
407
|
+
const avgTrainLoss = epochLoss / batchCount;
|
|
408
|
+
const avgTrainAccuracy = epochAccuracy / batchCount;
|
|
409
|
+
|
|
410
|
+
const historyEntry = {
|
|
411
|
+
epoch: epoch + 1,
|
|
412
|
+
trainLoss: avgTrainLoss,
|
|
413
|
+
valLoss: valMetrics.loss,
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
if (this.config.outputSize > 1) {
|
|
417
|
+
historyEntry.trainAccuracy = avgTrainAccuracy;
|
|
418
|
+
historyEntry.valAccuracy = valMetrics.accuracy;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
trainingHistory.push(historyEntry);
|
|
422
|
+
|
|
423
|
+
console.log(
|
|
424
|
+
`Epoch ${epoch + 1}/${epochs} - ` +
|
|
425
|
+
`Train Loss: ${avgTrainLoss.toFixed(4)}, ${
|
|
426
|
+
this.config.outputSize > 1 ?
|
|
427
|
+
`Train Acc: ${(avgTrainAccuracy * 100).toFixed(2)}%, ` : ''
|
|
428
|
+
}Val Loss: ${valMetrics.loss.toFixed(4)}${
|
|
429
|
+
this.config.outputSize > 1 ?
|
|
430
|
+
`, Val Acc: ${(valMetrics.accuracy * 100).toFixed(2)}%` : ''}`,
|
|
431
|
+
);
|
|
432
|
+
|
|
433
|
+
this.updateMetrics(avgTrainLoss, avgTrainAccuracy);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return {
|
|
437
|
+
history: trainingHistory,
|
|
438
|
+
finalLoss: trainingHistory[trainingHistory.length - 1].trainLoss,
|
|
439
|
+
modelType: 'gru',
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
async evaluate(data) {
|
|
444
|
+
let totalLoss = 0;
|
|
445
|
+
let totalAccuracy = 0;
|
|
446
|
+
let batchCount = 0;
|
|
447
|
+
|
|
448
|
+
for (const batch of data) {
|
|
449
|
+
const predictions = await this.forward(batch.inputs, false);
|
|
450
|
+
const loss = this.crossEntropyLoss(predictions, batch.targets);
|
|
451
|
+
|
|
452
|
+
totalLoss += loss;
|
|
453
|
+
|
|
454
|
+
if (this.config.outputSize > 1) {
|
|
455
|
+
const accuracy = this.calculateAccuracy(predictions, batch.targets);
|
|
456
|
+
totalAccuracy += accuracy;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
batchCount++;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const metrics = {
|
|
463
|
+
loss: totalLoss / batchCount,
|
|
464
|
+
};
|
|
465
|
+
|
|
466
|
+
if (this.config.outputSize > 1) {
|
|
467
|
+
metrics.accuracy = totalAccuracy / batchCount;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
return metrics;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
calculateAccuracy(predictions, targets) {
|
|
474
|
+
const batchSize = predictions.shape[0];
|
|
475
|
+
let correct = 0;
|
|
476
|
+
|
|
477
|
+
for (let b = 0; b < batchSize; b++) {
|
|
478
|
+
let maxIdx = 0;
|
|
479
|
+
let maxVal = -Infinity;
|
|
480
|
+
|
|
481
|
+
for (let i = 0; i < this.config.outputSize; i++) {
|
|
482
|
+
const val = predictions[b * this.config.outputSize + i];
|
|
483
|
+
if (val > maxVal) {
|
|
484
|
+
maxVal = val;
|
|
485
|
+
maxIdx = i;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if (targets[b * this.config.outputSize + maxIdx] === 1) {
|
|
490
|
+
correct++;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
return correct / batchSize;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
getConfig() {
|
|
498
|
+
return {
|
|
499
|
+
type: 'gru',
|
|
500
|
+
...this.config,
|
|
501
|
+
parameters: this.countParameters(),
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
countParameters() {
|
|
506
|
+
let count = 0;
|
|
507
|
+
|
|
508
|
+
// GRU gates parameters
|
|
509
|
+
for (const layer of this.gates) {
|
|
510
|
+
for (const gates of layer) {
|
|
511
|
+
// Reset gate
|
|
512
|
+
count += gates.resetInput.length;
|
|
513
|
+
count += gates.resetHidden.length;
|
|
514
|
+
count += gates.resetBias.length;
|
|
515
|
+
|
|
516
|
+
// Update gate
|
|
517
|
+
count += gates.updateInput.length;
|
|
518
|
+
count += gates.updateHidden.length;
|
|
519
|
+
count += gates.updateBias.length;
|
|
520
|
+
|
|
521
|
+
// Candidate
|
|
522
|
+
count += gates.candidateInput.length;
|
|
523
|
+
count += gates.candidateHidden.length;
|
|
524
|
+
count += gates.candidateBias.length;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// Output layer
|
|
529
|
+
count += this.outputLayer.weight.length;
|
|
530
|
+
count += this.outputLayer.bias.length;
|
|
531
|
+
|
|
532
|
+
return count;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
export { GRUModel };
|