mini-jstorch 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -33
- package/{tests → demo}/fu_fun.js +12 -12
- package/demo/scheduler.js +69 -0
- package/package.json +1 -1
- package/src/jstorch.js +343 -62
- package/tests/scheduler.js +0 -23
- /package/{tests → demo}/MakeModel.js +0 -0
package/README.md
CHANGED
|
@@ -4,17 +4,18 @@ A lightweight JavaScript neural network library for rapid frontend AI experiment
|
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
6
|
|
|
7
|
-
Mini-JSTorch is a high-performance, minimalist JavaScript library for building neural networks. It runs efficiently in
|
|
7
|
+
Mini-JSTorch is a high-performance, minimalist JavaScript library for building neural networks. It runs efficiently in frontend environments, including low-end devices. The library enables quick experimentation and learning in AI without compromising stability, accuracy, or training reliability.
|
|
8
8
|
|
|
9
|
-
This release, **version 1.
|
|
10
|
-
For Architecture Compability.
|
|
9
|
+
This release, **version 1.6.0:** Adds **LION** Optimizer, Adds **ReduceLROnPlateau** Scheduler, enhanced stability, and improved architecture compatibility.
|
|
11
10
|
|
|
12
11
|
---
|
|
13
12
|
|
|
14
13
|
## New Features Highlights
|
|
15
14
|
|
|
16
|
-
- **
|
|
17
|
-
- **
|
|
15
|
+
- **LIONS Optimizer:** State-of-the-art optimizer with superior stability and convergence
|
|
16
|
+
- **ReduceLROnPlateau Scheduler:** Adaptive learning rate scheduling based on loss plateaus
|
|
17
|
+
- **Enhanced Stability:** Gradient clipping, better weight initialization, and NaN prevention
|
|
18
|
+
|
|
18
19
|
|
|
19
20
|
---
|
|
20
21
|
|
|
@@ -23,8 +24,8 @@ For Architecture Compability.
|
|
|
23
24
|
- **Layers:** Linear, Flatten, Conv2D
|
|
24
25
|
- **Activations:** ReLU, Sigmoid, Tanh, LeakyReLU, GELU, Mish, SiLU, ELU
|
|
25
26
|
- **Loss Functions:** MSELoss, CrossEntropyLoss
|
|
26
|
-
- **Optimizers:** Adam, SGD
|
|
27
|
-
- **Schedulers:** StepLR, LambdaLR
|
|
27
|
+
- **Optimizers:** Adam, SGD, **LION**
|
|
28
|
+
- **Schedulers:** StepLR, LambdaLR, **ReduceLROnPlateau**
|
|
28
29
|
- **Regularization:** Dropout, BatchNorm2D
|
|
29
30
|
- **Utilities:** zeros, randomMatrix, softmax, crossEntropy, dot, addMatrices, reshape, stack, flatten, eye, concat
|
|
30
31
|
- **Model Container:** Sequential (for stacking layers with forward/backward passes)
|
|
@@ -43,7 +44,7 @@ npm install mini-jstorch
|
|
|
43
44
|
## Quick Start Example
|
|
44
45
|
|
|
45
46
|
```javascript
|
|
46
|
-
import { Sequential, Linear, ReLU, Sigmoid, CrossEntropyLoss, Adam, StepLR } from './jstorch.js';
|
|
47
|
+
import { Sequential, Linear, ReLU, Sigmoid, CrossEntropyLoss, Adam, StepLR } from './src/jstorch.js';
|
|
47
48
|
|
|
48
49
|
// Build model
|
|
49
50
|
const model = new Sequential([
|
|
@@ -101,21 +102,23 @@ loadModel(model2, json);
|
|
|
101
102
|
|
|
102
103
|
## Demos & Testing
|
|
103
104
|
|
|
104
|
-
Check the `
|
|
105
|
-
- **
|
|
106
|
-
- **
|
|
105
|
+
Check the `demo/` directory for ready-to-run demos:
|
|
106
|
+
- **demo/MakeModel.js:** Build and run a simple neural network.
|
|
107
|
+
- **demo/scheduler.js:** Experiment with learning rate schedulers.
|
|
108
|
+
- **demo/fu_fun.js:** Tests All fu_functions (for users).
|
|
107
109
|
- Add your own scripts for quick prototyping!
|
|
108
110
|
|
|
109
111
|
```bash
|
|
110
|
-
node
|
|
111
|
-
node
|
|
112
|
+
node demo/MakeModel.js
|
|
113
|
+
node demo/scheduler.js
|
|
114
|
+
node demo/fu_fun.js
|
|
112
115
|
```
|
|
113
116
|
|
|
114
117
|
---
|
|
115
118
|
|
|
116
119
|
## Intended Use Cases
|
|
117
120
|
|
|
118
|
-
- Rapid prototyping of neural networks in frontend
|
|
121
|
+
- Rapid prototyping of neural networks in frontend.
|
|
119
122
|
- Learning and teaching foundational neural network concepts.
|
|
120
123
|
- Experimentation on low-end devices or mobile browsers.
|
|
121
124
|
- Lightweight AI projects without GPU dependency.
|
|
@@ -126,22 +129,4 @@ node tests/scheduler.js
|
|
|
126
129
|
|
|
127
130
|
`MIT License`
|
|
128
131
|
|
|
129
|
-
**Copyright (c) 2025 rizal-editors**
|
|
130
|
-
|
|
131
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
132
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
133
|
-
in the Software without restriction, including without limitation the rights
|
|
134
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
135
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
136
|
-
furnished to do so, subject to the following conditions:
|
|
137
|
-
|
|
138
|
-
The above copyright notice and this permission notice shall be included in all
|
|
139
|
-
copies or substantial portions of the Software.
|
|
140
|
-
|
|
141
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
142
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
143
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
144
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
145
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
146
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
147
|
-
SOFTWARE.
|
|
132
|
+
**Copyright (c) 2025 rizal-editors**
|
package/{tests → demo}/fu_fun.js
RENAMED
|
@@ -5,68 +5,68 @@ import {
|
|
|
5
5
|
} from '../src/jstorch.js';
|
|
6
6
|
|
|
7
7
|
function testAllFuFunctions() {
|
|
8
|
-
console.log("
|
|
8
|
+
console.log("TESTING ALL FU_FUNCTIONS\n");
|
|
9
9
|
|
|
10
10
|
// Test 1: fu_tensor
|
|
11
11
|
console.log("1. fu_tensor");
|
|
12
12
|
const t1 = fu_tensor([[1, 2], [3, 4]]);
|
|
13
|
-
console.log("
|
|
13
|
+
console.log("", t1.data);
|
|
14
14
|
|
|
15
15
|
// Test 2: fu_add
|
|
16
16
|
console.log("\n2. fu_add");
|
|
17
17
|
const a = fu_tensor([[1, 2]]);
|
|
18
18
|
const b = fu_tensor([[3, 4]]);
|
|
19
19
|
const c = fu_add(a, b);
|
|
20
|
-
console.log("
|
|
20
|
+
console.log("", a.data, "+", b.data, "=", c.data);
|
|
21
21
|
|
|
22
22
|
// Test 3: fu_mul
|
|
23
23
|
console.log("\n3. fu_mul");
|
|
24
24
|
const d = fu_mul(a, b);
|
|
25
|
-
console.log("
|
|
25
|
+
console.log("", a.data, "*", b.data, "=", d.data);
|
|
26
26
|
|
|
27
27
|
// Test 4: fu_matmul
|
|
28
28
|
console.log("\n4. fu_matmul");
|
|
29
29
|
const e = fu_tensor([[1, 2]]);
|
|
30
30
|
const f = fu_tensor([[3], [4]]);
|
|
31
31
|
const g = fu_matmul(e, f);
|
|
32
|
-
console.log("
|
|
32
|
+
console.log("matmul =", g.data);
|
|
33
33
|
|
|
34
34
|
// Test 5: fu_sum & fu_mean
|
|
35
35
|
console.log("\n5. fu_sum & fu_mean");
|
|
36
36
|
const h = fu_tensor([[1, 2], [3, 4]]);
|
|
37
37
|
const sum = fu_sum(h);
|
|
38
38
|
const mean = fu_mean(h);
|
|
39
|
-
console.log("
|
|
39
|
+
console.log("sum =", sum.data, "mean =", mean.data);
|
|
40
40
|
|
|
41
41
|
// Test 6: fu_relu
|
|
42
42
|
console.log("\n6. fu_relu");
|
|
43
43
|
const i = fu_tensor([[-1, 0], [1, 2]]);
|
|
44
44
|
const relu = fu_relu(i);
|
|
45
|
-
console.log("
|
|
45
|
+
console.log("relu =", relu.data);
|
|
46
46
|
|
|
47
47
|
// Test 7: fu_sigmoid
|
|
48
48
|
console.log("\n7. fu_sigmoid");
|
|
49
49
|
const sigmoid = fu_sigmoid(i);
|
|
50
|
-
console.log("
|
|
50
|
+
console.log("sigmoid =", sigmoid.data);
|
|
51
51
|
|
|
52
52
|
// Test 8: fu_tanh
|
|
53
53
|
console.log("\n8. fu_tanh");
|
|
54
54
|
const tanh = fu_tanh(i);
|
|
55
|
-
console.log("
|
|
55
|
+
console.log("tanh =", tanh.data);
|
|
56
56
|
|
|
57
57
|
// Test 9: fu_softmax
|
|
58
58
|
console.log("\n9. fu_softmax");
|
|
59
59
|
const j = fu_tensor([[1, 2, 3]]);
|
|
60
60
|
const softmax = fu_softmax(j);
|
|
61
|
-
console.log("
|
|
61
|
+
console.log("softmax =", softmax.data);
|
|
62
62
|
|
|
63
63
|
// Test 10: fu_flatten & fu_reshape
|
|
64
64
|
console.log("\n10. fu_flatten & fu_reshape");
|
|
65
65
|
const k = fu_tensor([[1, 2], [3, 4]]);
|
|
66
66
|
const flat = fu_flatten(k);
|
|
67
67
|
const reshaped = fu_reshape(flat, 1, 4);
|
|
68
|
-
console.log("
|
|
69
|
-
console.log("
|
|
68
|
+
console.log("flatten =", flat.data);
|
|
69
|
+
console.log("reshape =", reshaped.data);
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
testAllFuFunctions();
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// Example: Test ALL learning rate schedulers with mini-jstorch optimizers
|
|
2
|
+
|
|
3
|
+
import { SGD, StepLR, LambdaLR, ReduceLROnPlateau, Tensor } from "../src/jstorch.js";
|
|
4
|
+
|
|
5
|
+
const param = { param: [[1, 2], [3, 4]], grad: [[0, 0], [0, 0]] };
|
|
6
|
+
const optimizer = new SGD([param], 0.1);
|
|
7
|
+
|
|
8
|
+
// --- Test StepLR ---
|
|
9
|
+
console.log("Testing StepLR...");
|
|
10
|
+
const stepScheduler = new StepLR(optimizer, 3, 0.5);
|
|
11
|
+
for (let epoch = 1; epoch <= 10; epoch++) {
|
|
12
|
+
stepScheduler.step();
|
|
13
|
+
console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// --- Test LambdaLR ---
|
|
17
|
+
console.log("\nTesting LambdaLR...");
|
|
18
|
+
optimizer.lr = 0.1; // Reset LR
|
|
19
|
+
const lambdaScheduler = new LambdaLR(optimizer, epoch => 1.0 / (1 + epoch));
|
|
20
|
+
for (let epoch = 1; epoch <= 5; epoch++) {
|
|
21
|
+
lambdaScheduler.step();
|
|
22
|
+
console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// --- Test ReduceLROnPlateau ---
|
|
26
|
+
console.log("\nTesting ReduceLROnPlateau...");
|
|
27
|
+
optimizer.lr = 0.1; // Reset LR
|
|
28
|
+
const plateauScheduler = new ReduceLROnPlateau(optimizer, {
|
|
29
|
+
patience: 2,
|
|
30
|
+
factor: 0.5,
|
|
31
|
+
min_lr: 0.01,
|
|
32
|
+
verbose: true
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
// Simulate training with plateauing loss
|
|
36
|
+
const losses = [0.9, 0.8, 0.7, 0.69, 0.68, 0.68, 0.68, 0.67, 0.67, 0.67];
|
|
37
|
+
console.log("Simulated training with plateauing loss:");
|
|
38
|
+
for (let epoch = 0; epoch < losses.length; epoch++) {
|
|
39
|
+
plateauScheduler.step(losses[epoch]);
|
|
40
|
+
console.log(`Epoch ${epoch + 1}: Loss = ${losses[epoch].toFixed(3)}, LR = ${optimizer.lr.toFixed(4)}, Wait = ${plateauScheduler.wait}`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// --- Test ReduceLROnPlateau with Cooldown ---
|
|
44
|
+
console.log("\nTesting ReduceLROnPlateau with Cooldown...");
|
|
45
|
+
optimizer.lr = 0.1; // Reset LR
|
|
46
|
+
const plateauWithCooldown = new ReduceLROnPlateau(optimizer, {
|
|
47
|
+
patience: 2,
|
|
48
|
+
factor: 0.5,
|
|
49
|
+
min_lr: 0.01,
|
|
50
|
+
cooldown: 2,
|
|
51
|
+
verbose: true
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
// Simulate training with multiple plateaus
|
|
55
|
+
const losses2 = [0.9, 0.9, 0.9, 0.9, 0.8, 0.8, 0.8, 0.8, 0.7, 0.7];
|
|
56
|
+
console.log("Simulated training with cooldown:");
|
|
57
|
+
for (let epoch = 0; epoch < losses2.length; epoch++) {
|
|
58
|
+
plateauWithCooldown.step(losses2[epoch]);
|
|
59
|
+
console.log(`Epoch ${epoch + 1}: Loss = ${losses2[epoch].toFixed(3)}, LR = ${optimizer.lr.toFixed(4)}, Wait = ${plateauWithCooldown.wait}, Cooldown = ${plateauWithCooldown.cooldown_counter}`);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// --- Summary ---
|
|
63
|
+
console.log("\nSCHEDULER SUMMARY:");
|
|
64
|
+
console.log(`StepLR: ${stepScheduler.last_epoch} epochs processed`);
|
|
65
|
+
console.log(`LambdaLR: ${lambdaScheduler.last_epoch} epochs processed`);
|
|
66
|
+
console.log(`ReduceLROnPlateau: ${plateauScheduler.num_reductions} LR reductions`);
|
|
67
|
+
console.log(`ReduceLROnPlateau with Cooldown: ${plateauWithCooldown.num_reductions} LR reductions`);
|
|
68
|
+
|
|
69
|
+
console.log("\nAll schedulers tested successfully!");
|
package/package.json
CHANGED
package/src/jstorch.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/*!
|
|
2
2
|
* Project: mini-jstorch
|
|
3
3
|
* File: MainEngine.js
|
|
4
|
-
* Author:
|
|
4
|
+
* Author: Rizal-editors
|
|
5
5
|
* License: MIT
|
|
6
|
-
* Copyright (C) 2025
|
|
6
|
+
* Copyright (C) 2025 Rizal-editors
|
|
7
7
|
*
|
|
8
8
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
9
9
|
* of this software and associated documentation files (the "Software"), to deal
|
|
@@ -34,10 +34,14 @@ export function ones(rows, cols) {
|
|
|
34
34
|
return Array.from({length:rows},()=>Array(cols).fill(1));
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
export function randomMatrix(rows, cols, scale=
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
37
|
+
export function randomMatrix(rows, cols, scale=null){
|
|
38
|
+
// Auto-scale based on layer size (Xavier init)
|
|
39
|
+
if (scale === null){
|
|
40
|
+
scale = Math.sqrt(2.0 / (rows + cols));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return Array.from({length: rows}, () =>
|
|
44
|
+
Array.from({length: cols}, () => (Math.random() * 2 - 1) * scale));
|
|
41
45
|
}
|
|
42
46
|
|
|
43
47
|
export function transpose(matrix){
|
|
@@ -242,35 +246,75 @@ export class Tensor {
|
|
|
242
246
|
|
|
243
247
|
// ---------------------- Layers ----------------------
|
|
244
248
|
export class Linear {
|
|
245
|
-
constructor(inputDim,outputDim){
|
|
246
|
-
this.W=randomMatrix(inputDim,outputDim);
|
|
247
|
-
this.b=Array(outputDim).fill(0);
|
|
248
|
-
this.gradW=zeros(inputDim,outputDim);
|
|
249
|
-
this.gradb=Array(outputDim).fill(0);
|
|
250
|
-
this.x=null;
|
|
249
|
+
constructor(inputDim, outputDim){
|
|
250
|
+
this.W = randomMatrix(inputDim, outputDim);
|
|
251
|
+
this.b = Array(outputDim).fill(0);
|
|
252
|
+
this.gradW = zeros(inputDim, outputDim);
|
|
253
|
+
this.gradb = Array(outputDim).fill(0);
|
|
254
|
+
this.x = null;
|
|
255
|
+
this.originalShape = null; // Track input shape
|
|
251
256
|
}
|
|
252
257
|
|
|
253
258
|
forward(x){
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
259
|
+
// Handle both [batch, features] and [batch, 1, features]
|
|
260
|
+
this.originalShape = this._getShapeType(x);
|
|
261
|
+
|
|
262
|
+
if (this.originalShape === '3d') {
|
|
263
|
+
// Convert from [batch, 1, features] to [batch, features]
|
|
264
|
+
this.x = x.map(sample => sample[0]);
|
|
265
|
+
} else {
|
|
266
|
+
// Already in [batch, features] format
|
|
267
|
+
this.x = x;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const out = dot(this.x, this.W);
|
|
271
|
+
return out.map((row, i) => row.map((v, j) => v + this.b[j]));
|
|
257
272
|
}
|
|
258
273
|
|
|
259
274
|
backward(grad){
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
275
|
+
// Compute gradients
|
|
276
|
+
for(let i = 0; i < this.W.length; i++) {
|
|
277
|
+
for(let j = 0; j < this.W[0].length; j++) {
|
|
278
|
+
this.gradW[i][j] = this.x.reduce((sum, row, k) => sum + row[i] * grad[k][j], 0);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
for(let j = 0; j < this.b.length; j++) {
|
|
283
|
+
this.gradb[j] = grad.reduce((sum, row) => sum + row[j], 0);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const gradInput = zeros(this.x.length, this.W.length);
|
|
287
|
+
for(let i = 0; i < this.x.length; i++) {
|
|
288
|
+
for(let j = 0; j < this.W.length; j++) {
|
|
289
|
+
for(let k = 0; k < this.W[0].length; k++) {
|
|
290
|
+
gradInput[i][j] += grad[i][k] * this.W[j][k];
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
//Convert back to original shape if needed
|
|
296
|
+
if (this.originalShape === '3d') {
|
|
297
|
+
return gradInput.map(row => [row]); // Back to [batch, 1, features]
|
|
298
|
+
}
|
|
270
299
|
return gradInput;
|
|
271
300
|
}
|
|
272
301
|
|
|
273
|
-
|
|
302
|
+
_getShapeType(x) {
|
|
303
|
+
if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && !Array.isArray(x[0][0][0])) {
|
|
304
|
+
return '3d'; // [batch, 1, features]
|
|
305
|
+
} else if (Array.isArray(x[0]) && !Array.isArray(x[0][0])) {
|
|
306
|
+
return '2d'; // [batch, features]
|
|
307
|
+
} else {
|
|
308
|
+
throw new Error(`Unsupported input shape for Linear layer`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
parameters(){
|
|
313
|
+
return [
|
|
314
|
+
{param: this.W, grad: this.gradW},
|
|
315
|
+
{param: [this.b], grad: [this.gradb]}
|
|
316
|
+
];
|
|
317
|
+
}
|
|
274
318
|
}
|
|
275
319
|
|
|
276
320
|
export class Flatten {
|
|
@@ -509,36 +553,43 @@ export class Sequential {
|
|
|
509
553
|
|
|
510
554
|
// ---------------------- Activations ----------------------
|
|
511
555
|
export class ReLU{
|
|
512
|
-
constructor(){ this.
|
|
556
|
+
constructor(){ this.mask = null; this.originalShape = null; }
|
|
513
557
|
|
|
514
558
|
forward(x){
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
row.map(v => Math.max(0, v))
|
|
522
|
-
)
|
|
523
|
-
)
|
|
524
|
-
);
|
|
559
|
+
this.originalShape = this._getShapeType(x);
|
|
560
|
+
|
|
561
|
+
if (this.originalShape === '3d') {
|
|
562
|
+
// Handle [batch, 1, features]
|
|
563
|
+
this.mask = x.map(sample => sample[0].map(v => v > 0));
|
|
564
|
+
return x.map(sample => [sample[0].map(v => Math.max(0, v))]);
|
|
525
565
|
} else {
|
|
526
|
-
// [batch, features]
|
|
527
|
-
this.
|
|
566
|
+
// Handle [batch, features]
|
|
567
|
+
this.mask = x.map(row => row.map(v => v > 0));
|
|
568
|
+
return x.map(row => row.map(v => Math.max(0, v)));
|
|
528
569
|
}
|
|
529
|
-
return this.out;
|
|
530
570
|
}
|
|
531
571
|
|
|
532
572
|
backward(grad){
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
573
|
+
if (this.originalShape === '3d') {
|
|
574
|
+
return grad.map((sample, i) =>
|
|
575
|
+
[sample[0].map((v, j) => this.mask[i][j] ? v : 0)]
|
|
576
|
+
);
|
|
537
577
|
} else {
|
|
538
|
-
|
|
539
|
-
|
|
578
|
+
return grad.map((row, i) =>
|
|
579
|
+
row.map((v, j) => this.mask[i][j] ? v : 0)
|
|
580
|
+
);
|
|
540
581
|
}
|
|
541
|
-
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
_getShapeType(x) {
|
|
585
|
+
if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && !Array.isArray(x[0][0][0])) {
|
|
586
|
+
return '3d';
|
|
587
|
+
} else if (Array.isArray(x[0]) && !Array.isArray(x[0][0])) {
|
|
588
|
+
return '2d';
|
|
589
|
+
} else {
|
|
590
|
+
throw new Error(`Unsupported input shape for ReLU`);
|
|
591
|
+
}
|
|
592
|
+
}
|
|
542
593
|
}
|
|
543
594
|
|
|
544
595
|
export class Sigmoid{ constructor(){ this.out=null; } forward(x){ const fn=v=>1/(1+Math.exp(-v)); this.out=x.map(r=>r.map(fn)); return this.out; } backward(grad){ return grad.map((r,i)=>r.map((v,j)=>v*this.out[i][j]*(1-this.out[i][j]))); } }
|
|
@@ -555,24 +606,172 @@ export class CrossEntropyLoss{ forward(pred,target){ this.pred=pred; this.target
|
|
|
555
606
|
|
|
556
607
|
// ---------------------- Optimizers ----------------------
|
|
557
608
|
export class Adam{
|
|
558
|
-
constructor(params,lr=0.001,b1=0.9,b2=0.999,eps=1e-8){
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
609
|
+
constructor(params, lr = 0.001, b1 = 0.9, b2 = 0.999, eps = 1e-8, max_grad_norm = 1.0){
|
|
610
|
+
// Handle both parameter styles: (params, lr) OR (params, {lr, ...})
|
|
611
|
+
if (typeof lr === 'object') {
|
|
612
|
+
// Options object provided
|
|
613
|
+
const options = lr;
|
|
614
|
+
this.lr = options.lr || 0.001;
|
|
615
|
+
this.beta1 = options.b1 || options.beta1 || 0.9;
|
|
616
|
+
this.beta2 = options.b2 || options.beta2 || 0.999;
|
|
617
|
+
this.eps = options.eps || 1e-8;
|
|
618
|
+
this.max_grad_norm = options.max_grad_norm || 1.0;
|
|
619
|
+
} else {
|
|
620
|
+
// Individual parameters provided
|
|
621
|
+
this.lr = lr;
|
|
622
|
+
this.beta1 = b1;
|
|
623
|
+
this.beta2 = b2;
|
|
624
|
+
this.eps = eps;
|
|
625
|
+
this.max_grad_norm = max_grad_norm;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
this.params = params;
|
|
629
|
+
this.m = params.map(p => zeros(p.param.length, p.param[0].length || 1));
|
|
630
|
+
this.v = params.map(p => zeros(p.param.length, p.param[0].length || 1));
|
|
631
|
+
this.t = 0;
|
|
632
|
+
|
|
563
633
|
}
|
|
634
|
+
|
|
564
635
|
step(){
|
|
565
636
|
this.t++;
|
|
566
|
-
this.params.forEach((p,idx)=>{
|
|
567
|
-
for
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
const vHat=this.v[idx][i][j]/(1-Math.pow(this.beta2,this.t));
|
|
574
|
-
p.param[i][j]-=this.lr*mHat/(Math.sqrt(vHat)+this.eps);
|
|
637
|
+
this.params.forEach((p, idx) => {
|
|
638
|
+
// Calculate gradient norm for clipping
|
|
639
|
+
let grad_norm_sq = 0;
|
|
640
|
+
for (let i = 0; i < p.param.length; i++){
|
|
641
|
+
for (let j = 0; j < (p.param[0].length || 1); j++){
|
|
642
|
+
const grad_val = p.grad[i] && p.grad[i][j] !== undefined ? p.grad[i][j] : 0;
|
|
643
|
+
grad_norm_sq += grad_val * grad_val;
|
|
575
644
|
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
const grad_norm = Math.sqrt(grad_norm_sq);
|
|
648
|
+
const clip_scale = grad_norm > this.max_grad_norm ? this.max_grad_norm / grad_norm : 1.0;
|
|
649
|
+
|
|
650
|
+
// Update with clipped gradients
|
|
651
|
+
for (let i = 0; i < p.param.length; i++){
|
|
652
|
+
for(let j = 0; j < (p.param[0].length || 1); j++){
|
|
653
|
+
if (p.grad[i] && p.grad[i][j] !== undefined){
|
|
654
|
+
const g = p.grad[i][j] * clip_scale;
|
|
655
|
+
this.m[idx][i][j] = this.beta1 * this.m[idx][i][j] + (1 - this.beta1) * g;
|
|
656
|
+
this.v[idx][i][j] = this.beta2 * this.v[idx][i][j] + (1 - this.beta2) * g * g;
|
|
657
|
+
const mHat = this.m[idx][i][j] / (1 - Math.pow(this.beta1, this.t));
|
|
658
|
+
const vHat = this.v[idx][i][j] / (1 - Math.pow(this.beta2, this.t));
|
|
659
|
+
p.param[i][j] -= this.lr * mHat / (Math.sqrt(vHat) + this.eps);
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
export class SGD{
|
|
668
|
+
constructor(params, lr = 0.01, max_grad_norm = 1.0) {
|
|
669
|
+
this.params = params;
|
|
670
|
+
this.lr = lr;
|
|
671
|
+
this.max_grad_norm = max_grad_norm; // Gradient Clipping
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
step() {
|
|
675
|
+
this.params.forEach(p => {
|
|
676
|
+
// Calculate gradient norm
|
|
677
|
+
let grad_norm_sq = 0;
|
|
678
|
+
let total_params = 0;
|
|
679
|
+
|
|
680
|
+
for (let i = 0; i < p.param.length; i++){
|
|
681
|
+
const row = p.param[i];
|
|
682
|
+
for (let j = 0; j < (row.length || 1); j++) {
|
|
683
|
+
const grad_val = p.grad[i] && p.grad[i][j] !== undefined ? p.grad[i][j] : 0;
|
|
684
|
+
grad_norm_sq += grad_val * grad_val;
|
|
685
|
+
total_params++;
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
const grad_norm = Math.sqrt(grad_norm_sq);
|
|
690
|
+
|
|
691
|
+
// Apply gradient clipping if needed
|
|
692
|
+
const clip_scale = grad_norm > this.max_grad_norm ? this.max_grad_norm / grad_norm : 1.0;
|
|
693
|
+
|
|
694
|
+
// Update parameters with clipped gradients
|
|
695
|
+
for (let i = 0; i < p.param.length; i++){
|
|
696
|
+
const row = p.param[i];
|
|
697
|
+
for (let j = 0; j < (row.length || 1); j++) {
|
|
698
|
+
if (p.grad[i] && p.grad[i][j] !== undefined){
|
|
699
|
+
p.param[i][j] -= this.lr * (p.grad[i][j] * clip_scale);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
export class LION {
|
|
709
|
+
constructor(params, options = {}) {
|
|
710
|
+
this.params = params;
|
|
711
|
+
|
|
712
|
+
const {
|
|
713
|
+
lr = 0.0001, // Lions typically uses smaller LR
|
|
714
|
+
beta1 = 0.9, // First moment decay
|
|
715
|
+
beta2 = 0.99, // Second moment decay
|
|
716
|
+
weight_decay = 0, // L2 regularization
|
|
717
|
+
eps = 1e-8 // Numerical stability
|
|
718
|
+
} = options;
|
|
719
|
+
|
|
720
|
+
this.lr = lr;
|
|
721
|
+
this.beta1 = beta1;
|
|
722
|
+
this.beta2 = beta2;
|
|
723
|
+
this.weight_decay = weight_decay;
|
|
724
|
+
this.eps = eps;
|
|
725
|
+
|
|
726
|
+
// Initialize momentums
|
|
727
|
+
this.m = params.map(p => zeros(p.param.length, p.param[0].length || 1));
|
|
728
|
+
this.t = 0;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
step() {
|
|
732
|
+
this.t++;
|
|
733
|
+
|
|
734
|
+
this.params.forEach((p, idx) => {
|
|
735
|
+
for (let i = 0; i < p.param.length; i++) {
|
|
736
|
+
for (let j = 0; j < (p.param[0].length || 1); j++) {
|
|
737
|
+
if (p.grad[i] && p.grad[i][j] !== undefined) {
|
|
738
|
+
const grad = p.grad[i][j];
|
|
739
|
+
|
|
740
|
+
// Update momentum: m_t = β1 * m_{t-1} + (1 - β1) * g_t
|
|
741
|
+
this.m[idx][i][j] = this.beta1 * this.m[idx][i][j] + (1 - this.beta1) * grad;
|
|
742
|
+
|
|
743
|
+
// LIONS update: param = param - η * sign(m_t + β2 * g_t)
|
|
744
|
+
const update_term = this.m[idx][i][j] + this.beta2 * grad;
|
|
745
|
+
|
|
746
|
+
// Get sign with epsilon for stability
|
|
747
|
+
let sign_val;
|
|
748
|
+
if (update_term > this.eps) sign_val = 1;
|
|
749
|
+
else if (update_term < -this.eps) sign_val = -1;
|
|
750
|
+
else sign_val = 0;
|
|
751
|
+
|
|
752
|
+
let update = sign_val * this.lr;
|
|
753
|
+
|
|
754
|
+
// Add weight decay if specified
|
|
755
|
+
if (this.weight_decay > 0) {
|
|
756
|
+
update += this.weight_decay * this.lr * p.param[i][j];
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
p.param[i][j] -= update;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
});
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
zeroGrad() {
|
|
767
|
+
this.params.forEach(p => {
|
|
768
|
+
if (p.grad) {
|
|
769
|
+
for (let i = 0; i < p.grad.length; i++) {
|
|
770
|
+
for (let j = 0; j < p.grad[i].length; j++) {
|
|
771
|
+
p.grad[i][j] = 0;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
}
|
|
576
775
|
});
|
|
577
776
|
}
|
|
578
777
|
}
|
|
@@ -619,6 +818,89 @@ export class LambdaLR {
|
|
|
619
818
|
}
|
|
620
819
|
}
|
|
621
820
|
|
|
821
|
+
// ---------------------- ReduceLROnPlateau Scheduler ----------------------
|
|
822
|
+
export class ReduceLROnPlateau {
|
|
823
|
+
constructor(optimizer, options = {}) {
|
|
824
|
+
this.optimizer = optimizer;
|
|
825
|
+
|
|
826
|
+
// Destructure with defaults
|
|
827
|
+
const {
|
|
828
|
+
patience = 10,
|
|
829
|
+
factor = 0.5,
|
|
830
|
+
min_lr = 1e-6,
|
|
831
|
+
threshold = 1e-4,
|
|
832
|
+
cooldown = 0,
|
|
833
|
+
verbose = false
|
|
834
|
+
} = options;
|
|
835
|
+
|
|
836
|
+
this.patience = patience;
|
|
837
|
+
this.factor = factor;
|
|
838
|
+
this.min_lr = min_lr;
|
|
839
|
+
this.threshold = threshold;
|
|
840
|
+
this.cooldown = cooldown;
|
|
841
|
+
this.verbose = verbose;
|
|
842
|
+
|
|
843
|
+
// State tracking
|
|
844
|
+
this.bestLoss = Infinity;
|
|
845
|
+
this.wait = 0;
|
|
846
|
+
this.cooldown_counter = 0;
|
|
847
|
+
this.num_reductions = 0;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
step(loss) {
|
|
851
|
+
// Handle cooldown
|
|
852
|
+
if (this.cooldown_counter > 0) {
|
|
853
|
+
this.cooldown_counter--;
|
|
854
|
+
return;
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// Check if this is significant improvement (relative threshold)
|
|
858
|
+
const improvement_needed = this.bestLoss * (1 - this.threshold);
|
|
859
|
+
const is_better = loss < improvement_needed;
|
|
860
|
+
|
|
861
|
+
if (is_better) {
|
|
862
|
+
// Significant improvement - reset
|
|
863
|
+
this.bestLoss = loss;
|
|
864
|
+
this.wait = 0;
|
|
865
|
+
} else {
|
|
866
|
+
// No significant improvement
|
|
867
|
+
this.wait += 1;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
// Check if we've waited long enough
|
|
871
|
+
if (this.wait >= this.patience) {
|
|
872
|
+
this._reduce_lr();
|
|
873
|
+
this.cooldown_counter = this.cooldown;
|
|
874
|
+
this.wait = 0;
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
_reduce_lr() {
|
|
879
|
+
const old_lr = this.optimizer.lr;
|
|
880
|
+
const new_lr = Math.max(old_lr * this.factor, this.min_lr);
|
|
881
|
+
|
|
882
|
+
if (new_lr < old_lr) {
|
|
883
|
+
this.optimizer.lr = new_lr;
|
|
884
|
+
this.num_reductions++;
|
|
885
|
+
|
|
886
|
+
if (this.verbose) {
|
|
887
|
+
console.log(`ReduceLROnPlateau: reducing LR from ${old_lr} to ${new_lr}`);
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
get_last_lr() {
|
|
893
|
+
return this.optimizer.lr;
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
reset() {
|
|
897
|
+
this.bestLoss = Infinity;
|
|
898
|
+
this.wait = 0;
|
|
899
|
+
this.cooldown_counter = 0;
|
|
900
|
+
this.num_reductions = 0;
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
|
|
622
904
|
// ---------------------- ELU Activation ----------------------
|
|
623
905
|
export class ELU {
|
|
624
906
|
constructor(alpha=1.0) {
|
|
@@ -708,7 +990,6 @@ export class SiLU {
|
|
|
708
990
|
}
|
|
709
991
|
}
|
|
710
992
|
|
|
711
|
-
export class SGD{ constructor(params,lr=0.01){ this.params=params; this.lr=lr; } step(){ this.params.forEach(p=>{ for(let i=0;i<p.param.length;i++) for(let j=0;j<(p.param[0].length||1);j++) p.param[i][j]-=this.lr*p.grad[i][j]; }); } }
|
|
712
993
|
|
|
713
994
|
// ---------------------- BatchNorm2D ----------------------
|
|
714
995
|
export class BatchNorm2d {
|
package/tests/scheduler.js
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
// Example: Test learning rate schedulers (StepLR and LambdaLR) with mini-jstorch optimizers
|
|
2
|
-
|
|
3
|
-
import { SGD, StepLR, LambdaLR, Tensor } from "../src/jstorch.js";
|
|
4
|
-
|
|
5
|
-
const param = { param: [[1, 2], [3, 4]], grad: [[0, 0], [0, 0]] };
|
|
6
|
-
const optimizer = new SGD([param], 0.1);
|
|
7
|
-
|
|
8
|
-
// --- Test StepLR ---
|
|
9
|
-
console.log("Testing StepLR...");
|
|
10
|
-
const stepScheduler = new StepLR(optimizer, 3, 0.5);
|
|
11
|
-
for (let epoch = 1; epoch <= 10; epoch++) {
|
|
12
|
-
stepScheduler.step();
|
|
13
|
-
console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
// --- Test LambdaLR ---
|
|
17
|
-
console.log("\nTesting LambdaLR...");
|
|
18
|
-
optimizer.lr = 0.1; // Reset LR
|
|
19
|
-
const lambdaScheduler = new LambdaLR(optimizer, epoch => 1.0 / (1 + epoch));
|
|
20
|
-
for (let epoch = 1; epoch <= 5; epoch++) {
|
|
21
|
-
lambdaScheduler.step();
|
|
22
|
-
console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
|
|
23
|
-
}
|
|
File without changes
|