npm - mini-jstorch - Versions diffs - 1.5.0 → 1.6.0 - Mend

mini-jstorch 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +18 -33
package/{tests → demo}/fu_fun.js +12 -12
package/demo/scheduler.js +69 -0
package/package.json +1 -1
package/src/jstorch.js +343 -62
package/tests/scheduler.js +0 -23
/package/{tests → demo}/MakeModel.js +0 -0

package/README.md CHANGED Viewed

@@ -4,17 +4,18 @@ A lightweight JavaScript neural network library for rapid frontend AI experiment
 ## Overview
-Mini-JSTorch is a high-performance, minimalist JavaScript library for building neural networks. It runs efficiently in both frontend and backend environments, including low-end devices. The library enables quick experimentation and learning in AI without compromising stability, accuracy, or training reliability.
+Mini-JSTorch is a high-performance, minimalist JavaScript library for building neural networks. It runs efficiently in frontend environments, including low-end devices. The library enables quick experimentation and learning in AI without compromising stability, accuracy, or training reliability.
-This release, **version 1.5.0:** Adds user-friendly tensor functions, Flatten layer, and improved Conv2D operations and Modify Some Class
-For Architecture Compability.
+This release, **version 1.6.0:** Adds **LION** Optimizer, Adds **ReduceLROnPlateau** Scheduler, enhanced stability, and improved architecture compatibility.
 ---
 ## New Features Highlights
-- **User-Friendly Tensor API:** New `fu_` functions (`fu_tensor`, `fu_add`, `fu_matmul`, etc.) with automatic validation and shape checking
-- **Flatten Layer:** Essential for connecting CNN architectures to dense layers.
+- **LIONS Optimizer:** State-of-the-art optimizer with superior stability and convergence
+- **ReduceLROnPlateau Scheduler:** Adaptive learning rate scheduling based on loss plateaus
+- **Enhanced Stability:** Gradient clipping, better weight initialization, and NaN prevention
 ---
@@ -23,8 +24,8 @@ For Architecture Compability.
 - **Layers:** Linear, Flatten, Conv2D
 - **Activations:** ReLU, Sigmoid, Tanh, LeakyReLU, GELU, Mish, SiLU, ELU
 - **Loss Functions:** MSELoss, CrossEntropyLoss
-- **Optimizers:** Adam, SGD
-- **Schedulers:** StepLR, LambdaLR
+- **Optimizers:** Adam, SGD, **LION**
+- **Schedulers:** StepLR, LambdaLR, **ReduceLROnPlateau**
 - **Regularization:** Dropout, BatchNorm2D
 - **Utilities:** zeros, randomMatrix, softmax, crossEntropy, dot, addMatrices, reshape, stack, flatten, eye, concat
 - **Model Container:** Sequential (for stacking layers with forward/backward passes)
@@ -43,7 +44,7 @@ npm install mini-jstorch
 ## Quick Start Example
 ```javascript
-import { Sequential, Linear, ReLU, Sigmoid, CrossEntropyLoss, Adam, StepLR } from './jstorch.js';
+import { Sequential, Linear, ReLU, Sigmoid, CrossEntropyLoss, Adam, StepLR } from './src/jstorch.js';
 // Build model
 const model = new Sequential([
@@ -101,21 +102,23 @@ loadModel(model2, json);
 ## Demos & Testing
-Check the `tests/` directory for ready-to-run demos:
-- **tests/MakeModel.js:** Build and run a simple neural network.
-- **tests/scheduler.js:** Experiment with learning rate schedulers.
+Check the `demo/` directory for ready-to-run demos:
+- **demo/MakeModel.js:** Build and run a simple neural network.
+- **demo/scheduler.js:** Experiment with learning rate schedulers.
+- **demo/fu_fun.js:** Tests All fu_functions (for users).
 - Add your own scripts for quick prototyping!
 ```bash
-node tests/MakeModel.js
-node tests/scheduler.js
+node demo/MakeModel.js
+node demo/scheduler.js
+node demo/fu_fun.js
 ```
 ---
 ## Intended Use Cases
- - Rapid prototyping of neural networks in frontend and backend.
+ - Rapid prototyping of neural networks in frontend.
  - Learning and teaching foundational neural network concepts.
  - Experimentation on low-end devices or mobile browsers.
  - Lightweight AI projects without GPU dependency.
@@ -126,22 +129,4 @@ node tests/scheduler.js
 `MIT License`
-**Copyright (c) 2025 rizal-editors**
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+**Copyright (c) 2025 rizal-editors**

package/{tests → demo}/fu_fun.js RENAMED Viewed

@@ -5,68 +5,68 @@ import {
 } from '../src/jstorch.js';
 function testAllFuFunctions() {
-    console.log("🧪 TESTING ALL FU_FUNCTIONS\n");
+    console.log("TESTING ALL FU_FUNCTIONS\n");
     // Test 1: fu_tensor
     console.log("1. fu_tensor");
     const t1 = fu_tensor([[1, 2], [3, 4]]);
-    console.log("✅", t1.data);
+    console.log("", t1.data);
     // Test 2: fu_add
     console.log("\n2. fu_add");
     const a = fu_tensor([[1, 2]]);
     const b = fu_tensor([[3, 4]]);
     const c = fu_add(a, b);
-    console.log("✅", a.data, "+", b.data, "=", c.data);
+    console.log("", a.data, "+", b.data, "=", c.data);
     // Test 3: fu_mul
     console.log("\n3. fu_mul");
     const d = fu_mul(a, b);
-    console.log("✅", a.data, "*", b.data, "=", d.data);
+    console.log("", a.data, "*", b.data, "=", d.data);
     // Test 4: fu_matmul
     console.log("\n4. fu_matmul");
     const e = fu_tensor([[1, 2]]);
     const f = fu_tensor([[3], [4]]);
     const g = fu_matmul(e, f);
-    console.log("✅ matmul =", g.data);
+    console.log("matmul =", g.data);
     // Test 5: fu_sum & fu_mean
     console.log("\n5. fu_sum & fu_mean");
     const h = fu_tensor([[1, 2], [3, 4]]);
     const sum = fu_sum(h);
     const mean = fu_mean(h);
-    console.log("✅ sum =", sum.data, "mean =", mean.data);
+    console.log("sum =", sum.data, "mean =", mean.data);
     // Test 6: fu_relu
     console.log("\n6. fu_relu");
     const i = fu_tensor([[-1, 0], [1, 2]]);
     const relu = fu_relu(i);
-    console.log("✅ relu =", relu.data);
+    console.log("relu =", relu.data);
     // Test 7: fu_sigmoid
     console.log("\n7. fu_sigmoid");
     const sigmoid = fu_sigmoid(i);
-    console.log("✅ sigmoid =", sigmoid.data);
+    console.log("sigmoid =", sigmoid.data);
     // Test 8: fu_tanh
     console.log("\n8. fu_tanh");
     const tanh = fu_tanh(i);
-    console.log("✅ tanh =", tanh.data);
+    console.log("tanh =", tanh.data);
     // Test 9: fu_softmax
     console.log("\n9. fu_softmax");
     const j = fu_tensor([[1, 2, 3]]);
     const softmax = fu_softmax(j);
-    console.log("✅ softmax =", softmax.data);
+    console.log("softmax =", softmax.data);
     // Test 10: fu_flatten & fu_reshape
     console.log("\n10. fu_flatten & fu_reshape");
     const k = fu_tensor([[1, 2], [3, 4]]);
     const flat = fu_flatten(k);
     const reshaped = fu_reshape(flat, 1, 4);
-    console.log("✅ flatten =", flat.data);
-    console.log("✅ reshape =", reshaped.data);
+    console.log("flatten =", flat.data);
+    console.log("reshape =", reshaped.data);
 }
 testAllFuFunctions();

package/demo/scheduler.js ADDED Viewed

@@ -0,0 +1,69 @@
+// Example: Test ALL learning rate schedulers with mini-jstorch optimizers
+import { SGD, StepLR, LambdaLR, ReduceLROnPlateau, Tensor } from "../src/jstorch.js";
+const param = { param: [[1, 2], [3, 4]], grad: [[0, 0], [0, 0]] };
+const optimizer = new SGD([param], 0.1);
+// --- Test StepLR ---
+console.log("Testing StepLR...");
+const stepScheduler = new StepLR(optimizer, 3, 0.5);
+for (let epoch = 1; epoch <= 10; epoch++) {
+    stepScheduler.step();
+    console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
+}
+// --- Test LambdaLR ---
+console.log("\nTesting LambdaLR...");
+optimizer.lr = 0.1; // Reset LR
+const lambdaScheduler = new LambdaLR(optimizer, epoch => 1.0 / (1 + epoch));
+for (let epoch = 1; epoch <= 5; epoch++) {
+    lambdaScheduler.step();
+    console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
+}
+// --- Test ReduceLROnPlateau ---
+console.log("\nTesting ReduceLROnPlateau...");
+optimizer.lr = 0.1; // Reset LR
+const plateauScheduler = new ReduceLROnPlateau(optimizer, {
+    patience: 2,
+    factor: 0.5,
+    min_lr: 0.01,
+    verbose: true
+});
+// Simulate training with plateauing loss
+const losses = [0.9, 0.8, 0.7, 0.69, 0.68, 0.68, 0.68, 0.67, 0.67, 0.67];
+console.log("Simulated training with plateauing loss:");
+for (let epoch = 0; epoch < losses.length; epoch++) {
+    plateauScheduler.step(losses[epoch]);
+    console.log(`Epoch ${epoch + 1}: Loss = ${losses[epoch].toFixed(3)}, LR = ${optimizer.lr.toFixed(4)}, Wait = ${plateauScheduler.wait}`);
+}
+// --- Test ReduceLROnPlateau with Cooldown ---
+console.log("\nTesting ReduceLROnPlateau with Cooldown...");
+optimizer.lr = 0.1; // Reset LR
+const plateauWithCooldown = new ReduceLROnPlateau(optimizer, {
+    patience: 2,
+    factor: 0.5,
+    min_lr: 0.01,
+    cooldown: 2,
+    verbose: true
+});
+// Simulate training with multiple plateaus
+const losses2 = [0.9, 0.9, 0.9, 0.9, 0.8, 0.8, 0.8, 0.8, 0.7, 0.7];
+console.log("Simulated training with cooldown:");
+for (let epoch = 0; epoch < losses2.length; epoch++) {
+    plateauWithCooldown.step(losses2[epoch]);
+    console.log(`Epoch ${epoch + 1}: Loss = ${losses2[epoch].toFixed(3)}, LR = ${optimizer.lr.toFixed(4)}, Wait = ${plateauWithCooldown.wait}, Cooldown = ${plateauWithCooldown.cooldown_counter}`);
+}
+// --- Summary ---
+console.log("\nSCHEDULER SUMMARY:");
+console.log(`StepLR: ${stepScheduler.last_epoch} epochs processed`);
+console.log(`LambdaLR: ${lambdaScheduler.last_epoch} epochs processed`);
+console.log(`ReduceLROnPlateau: ${plateauScheduler.num_reductions} LR reductions`);
+console.log(`ReduceLROnPlateau with Cooldown: ${plateauWithCooldown.num_reductions} LR reductions`);
+console.log("\nAll schedulers tested successfully!");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mini-jstorch",
-  "version": "1.5.0",
+  "version": "1.6.0",
   "type": "module",
   "description": "A lightweight JavaScript neural network library for rapid frontend AI experimentation on low-resource devices Inspired by PyTorch.",
   "main": "index.js",

package/src/jstorch.js CHANGED Viewed

@@ -1,9 +1,9 @@
 /*!
  * Project: mini-jstorch
  * File: MainEngine.js
- * Author: M. Rizal H. (Actual Author Name)
+ * Author: Rizal-editors
  * License: MIT
- * Copyright (C) 2025 M. Rizal H.
+ * Copyright (C) 2025 Rizal-editors
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -34,10 +34,14 @@ export function ones(rows, cols) {
     return Array.from({length:rows},()=>Array(cols).fill(1));
 }
-export function randomMatrix(rows, cols, scale=0.1){
-    return Array.from({length:rows},()=>
-        Array.from({length:cols},()=> (Math.random()*2-1)*scale)
-    );
+export function randomMatrix(rows, cols, scale=null){
+    // Auto-scale based on layer size (Xavier init)
+    if (scale === null){
+        scale = Math.sqrt(2.0 / (rows + cols));
+    }
+    return Array.from({length: rows}, () =>
+        Array.from({length: cols}, () => (Math.random() * 2 - 1) * scale));
 }
 export function transpose(matrix){
@@ -242,35 +246,75 @@ export class Tensor {
 // ---------------------- Layers ----------------------
 export class Linear {
-    constructor(inputDim,outputDim){
-        this.W=randomMatrix(inputDim,outputDim);
-        this.b=Array(outputDim).fill(0);
-        this.gradW=zeros(inputDim,outputDim);
-        this.gradb=Array(outputDim).fill(0);
-        this.x=null;
+    constructor(inputDim, outputDim){
+        this.W = randomMatrix(inputDim, outputDim);
+        this.b = Array(outputDim).fill(0);
+        this.gradW = zeros(inputDim, outputDim);
+        this.gradb = Array(outputDim).fill(0);
+        this.x = null;
+        this.originalShape = null; // Track input shape
     }
     forward(x){
-        this.x=x;
-        const out=dot(x,this.W);
-        return out.map((row,i)=>row.map((v,j)=>v+this.b[j]));
+        // Handle both [batch, features] and [batch, 1, features]
+        this.originalShape = this._getShapeType(x);
+        if (this.originalShape === '3d') {
+            // Convert from [batch, 1, features] to [batch, features]
+            this.x = x.map(sample => sample[0]);
+        } else {
+            // Already in [batch, features] format
+            this.x = x;
+        }
+        const out = dot(this.x, this.W);
+        return out.map((row, i) => row.map((v, j) => v + this.b[j]));
     }
     backward(grad){
-        for(let i=0;i<this.W.length;i++) for(let j=0;j<this.W[0].length;j++)
-            this.gradW[i][j]=this.x.reduce((sum,row,k)=>sum+row[i]*grad[k][j],0);
-        for(let j=0;j<this.b.length;j++)
-            this.gradb[j]=grad.reduce((sum,row)=>sum+row[j],0);
-        const gradInput=zeros(this.x.length,this.W.length);
-        for(let i=0;i<this.x.length;i++)
-            for(let j=0;j<this.W.length;j++)
-                for(let k=0;k<this.W[0].length;k++)
-                    gradInput[i][j]+=grad[i][k]*this.W[j][k];
+        // Compute gradients
+        for(let i = 0; i < this.W.length; i++) {
+            for(let j = 0; j < this.W[0].length; j++) {
+                this.gradW[i][j] = this.x.reduce((sum, row, k) => sum + row[i] * grad[k][j], 0);
+            }
+        }
+        for(let j = 0; j < this.b.length; j++) {
+            this.gradb[j] = grad.reduce((sum, row) => sum + row[j], 0);
+        }
+        const gradInput = zeros(this.x.length, this.W.length);
+        for(let i = 0; i < this.x.length; i++) {
+            for(let j = 0; j < this.W.length; j++) {
+                for(let k = 0; k < this.W[0].length; k++) {
+                    gradInput[i][j] += grad[i][k] * this.W[j][k];
+                }
+            }
+        }
+        //Convert back to original shape if needed
+        if (this.originalShape === '3d') {
+            return gradInput.map(row => [row]); // Back to [batch, 1, features]
+        }
         return gradInput;
     }
-    parameters(){ return [ {param:this.W,grad:this.gradW}, {param:[this.b],grad:[this.gradb]} ]; }
+    _getShapeType(x) {
+        if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && !Array.isArray(x[0][0][0])) {
+            return '3d'; // [batch, 1, features]
+        } else if (Array.isArray(x[0]) && !Array.isArray(x[0][0])) {
+            return '2d'; // [batch, features]
+        } else {
+            throw new Error(`Unsupported input shape for Linear layer`);
+        }
+    }
+    parameters(){
+        return [
+            {param: this.W, grad: this.gradW},
+            {param: [this.b], grad: [this.gradb]}
+        ];
+    }
 }
 export class Flatten {
@@ -509,36 +553,43 @@ export class Sequential {
 // ---------------------- Activations ----------------------
 export class ReLU{
-    constructor(){ this.out=null; }
+    constructor(){ this.mask = null; this.originalShape = null; }
     forward(x){
-        // Handle both [batch, features] and [batch, channels, height, width]
-        if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && Array.isArray(x[0][0][0])) {
-            // [batch, channels, height, width]
-            this.out = x.map(batch =>
-                batch.flatMap(channel =>
-                    channel.flatMap(row =>
-                        row.map(v => Math.max(0, v))
-                    )
-                )
-            );
+        this.originalShape = this._getShapeType(x);
+        if (this.originalShape === '3d') {
+            // Handle [batch, 1, features]
+            this.mask = x.map(sample => sample[0].map(v => v > 0));
+            return x.map(sample => [sample[0].map(v => Math.max(0, v))]);
         } else {
-            // [batch, features] - existing behavior
-            this.out = x.map(r => r.map(v => Math.max(0, v)));
+            // Handle [batch, features]
+            this.mask = x.map(row => row.map(v => v > 0));
+            return x.map(row => row.map(v => Math.max(0, v)));
         }
-        return this.out;
     }
     backward(grad){
-        // Gradient shape must match forward output shape
-        if (Array.isArray(grad[0]) && Array.isArray(grad[0][0])) {
-            // Standard [batch, features]
-            return grad.map((r, i) => r.map((v, j) => v * (this.out[i][j] > 0 ? 1 : 0)));
+        if (this.originalShape === '3d') {
+            return grad.map((sample, i) =>
+                [sample[0].map((v, j) => this.mask[i][j] ? v : 0)]
+            );
         } else {
-            // return as is
-            return grad;
+            return grad.map((row, i) =>
+                row.map((v, j) => this.mask[i][j] ? v : 0)
+            );
         }
-    }
+    }
+    _getShapeType(x) {
+        if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && !Array.isArray(x[0][0][0])) {
+            return '3d';
+        } else if (Array.isArray(x[0]) && !Array.isArray(x[0][0])) {
+            return '2d';
+        } else {
+            throw new Error(`Unsupported input shape for ReLU`);
+        }
+    }
 }
 export class Sigmoid{ constructor(){ this.out=null; } forward(x){ const fn=v=>1/(1+Math.exp(-v)); this.out=x.map(r=>r.map(fn)); return this.out; } backward(grad){ return grad.map((r,i)=>r.map((v,j)=>v*this.out[i][j]*(1-this.out[i][j]))); } }
@@ -555,24 +606,172 @@ export class CrossEntropyLoss{ forward(pred,target){ this.pred=pred; this.target
 // ---------------------- Optimizers ----------------------
 export class Adam{
-    constructor(params,lr=0.001,b1=0.9,b2=0.999,eps=1e-8){
-        this.params=params; this.lr=lr; this.beta1=b1; this.beta2=b2; this.eps=eps;
-        this.m=params.map(p=>zeros(p.param.length,p.param[0].length||1));
-        this.v=params.map(p=>zeros(p.param.length,p.param[0].length||1));
-        this.t=0;
+    constructor(params, lr = 0.001, b1 = 0.9, b2 = 0.999, eps = 1e-8, max_grad_norm = 1.0){
+        // Handle both parameter styles: (params, lr) OR (params, {lr, ...})
+        if (typeof lr === 'object') {
+            // Options object provided
+            const options = lr;
+            this.lr = options.lr || 0.001;
+            this.beta1 = options.b1 || options.beta1 || 0.9;
+            this.beta2 = options.b2 || options.beta2 || 0.999;
+            this.eps = options.eps || 1e-8;
+            this.max_grad_norm = options.max_grad_norm || 1.0;
+        } else {
+            // Individual parameters provided
+            this.lr = lr;
+            this.beta1 = b1;
+            this.beta2 = b2;
+            this.eps = eps;
+            this.max_grad_norm = max_grad_norm;
+        }
+        this.params = params;
+        this.m = params.map(p => zeros(p.param.length, p.param[0].length || 1));
+        this.v = params.map(p => zeros(p.param.length, p.param[0].length || 1));
+        this.t = 0;
     }
     step(){
         this.t++;
-        this.params.forEach((p,idx)=>{
-            for(let i=0;i<p.param.length;i++)
-                for(let j=0;j<(p.param[0].length||1);j++){
-                    const g=p.grad[i][j];
-                    this.m[idx][i][j]=this.beta1*this.m[idx][i][j]+(1-this.beta1)*g;
-                    this.v[idx][i][j]=this.beta2*this.v[idx][i][j]+(1-this.beta2)*g*g;
-                    const mHat=this.m[idx][i][j]/(1-Math.pow(this.beta1,this.t));
-                    const vHat=this.v[idx][i][j]/(1-Math.pow(this.beta2,this.t));
-                    p.param[i][j]-=this.lr*mHat/(Math.sqrt(vHat)+this.eps);
+        this.params.forEach((p, idx) => {
+            // Calculate gradient norm for clipping
+            let grad_norm_sq = 0;
+            for (let i = 0; i < p.param.length; i++){
+                for (let j = 0; j < (p.param[0].length || 1); j++){
+                    const grad_val = p.grad[i] && p.grad[i][j] !== undefined ? p.grad[i][j] : 0;
+                    grad_norm_sq += grad_val * grad_val;
                 }
+            }
+            const grad_norm = Math.sqrt(grad_norm_sq);
+            const clip_scale = grad_norm > this.max_grad_norm ? this.max_grad_norm / grad_norm : 1.0;
+            // Update with clipped gradients
+            for (let i = 0; i < p.param.length; i++){
+                for(let j = 0; j < (p.param[0].length || 1); j++){
+                    if (p.grad[i] && p.grad[i][j] !== undefined){
+                        const g = p.grad[i][j] * clip_scale;
+                        this.m[idx][i][j] = this.beta1 * this.m[idx][i][j] + (1 - this.beta1) * g;
+                        this.v[idx][i][j] = this.beta2 * this.v[idx][i][j] + (1 - this.beta2) * g * g;
+                        const mHat = this.m[idx][i][j] / (1 - Math.pow(this.beta1, this.t));
+                        const vHat = this.v[idx][i][j] / (1 - Math.pow(this.beta2, this.t));
+                        p.param[i][j] -= this.lr * mHat / (Math.sqrt(vHat) + this.eps);
+                    }
+                }
+            }
+        });
+    }
+}
+export class SGD{
+    constructor(params, lr = 0.01, max_grad_norm = 1.0) {
+        this.params = params;
+        this.lr = lr;
+        this.max_grad_norm = max_grad_norm; // Gradient Clipping
+    }
+    step() {
+        this.params.forEach(p => {
+            // Calculate gradient norm
+            let grad_norm_sq = 0;
+            let total_params = 0;
+            for (let i = 0; i < p.param.length; i++){
+                const row = p.param[i];
+                for (let j = 0; j < (row.length || 1); j++) {
+                    const grad_val = p.grad[i] && p.grad[i][j] !== undefined ? p.grad[i][j] : 0;
+                    grad_norm_sq += grad_val * grad_val;
+                    total_params++;
+                }
+            }
+            const grad_norm = Math.sqrt(grad_norm_sq);
+            // Apply gradient clipping if needed
+            const clip_scale = grad_norm > this.max_grad_norm ? this.max_grad_norm / grad_norm : 1.0;
+            // Update parameters with clipped gradients
+            for (let i = 0; i < p.param.length; i++){
+                const row = p.param[i];
+                for (let j = 0; j < (row.length || 1); j++) {
+                    if (p.grad[i] && p.grad[i][j] !== undefined){
+                        p.param[i][j] -= this.lr * (p.grad[i][j] * clip_scale);
+                    }
+                }
+            }
+        });
+    }
+}
+export class LION {
+    constructor(params, options = {}) {
+        this.params = params;
+        const {
+            lr = 0.0001,      // Lions typically uses smaller LR
+            beta1 = 0.9,      // First moment decay
+            beta2 = 0.99,     // Second moment decay
+            weight_decay = 0, // L2 regularization
+            eps = 1e-8        // Numerical stability
+        } = options;
+        this.lr = lr;
+        this.beta1 = beta1;
+        this.beta2 = beta2;
+        this.weight_decay = weight_decay;
+        this.eps = eps;
+        // Initialize momentums
+        this.m = params.map(p => zeros(p.param.length, p.param[0].length || 1));
+        this.t = 0;
+    }
+    step() {
+        this.t++;
+        this.params.forEach((p, idx) => {
+            for (let i = 0; i < p.param.length; i++) {
+                for (let j = 0; j < (p.param[0].length || 1); j++) {
+                    if (p.grad[i] && p.grad[i][j] !== undefined) {
+                        const grad = p.grad[i][j];
+                        // Update momentum: m_t = β1 * m_{t-1} + (1 - β1) * g_t
+                        this.m[idx][i][j] = this.beta1 * this.m[idx][i][j] + (1 - this.beta1) * grad;
+                        // LIONS update: param = param - η * sign(m_t + β2 * g_t)
+                        const update_term = this.m[idx][i][j] + this.beta2 * grad;
+                        // Get sign with epsilon for stability
+                        let sign_val;
+                        if (update_term > this.eps) sign_val = 1;
+                        else if (update_term < -this.eps) sign_val = -1;
+                        else sign_val = 0;
+                        let update = sign_val * this.lr;
+                        // Add weight decay if specified
+                        if (this.weight_decay > 0) {
+                            update += this.weight_decay * this.lr * p.param[i][j];
+                        }
+                        p.param[i][j] -= update;
+                    }
+                }
+            }
+        });
+    }
+    zeroGrad() {
+        this.params.forEach(p => {
+            if (p.grad) {
+                for (let i = 0; i < p.grad.length; i++) {
+                    for (let j = 0; j < p.grad[i].length; j++) {
+                        p.grad[i][j] = 0;
+                    }
+                }
+            }
         });
     }
 }
@@ -619,6 +818,89 @@ export class LambdaLR {
     }
 }
+// ---------------------- ReduceLROnPlateau Scheduler ----------------------
+export class ReduceLROnPlateau {
+    constructor(optimizer, options = {}) {
+        this.optimizer = optimizer;
+        // Destructure with defaults
+        const {
+            patience = 10,
+            factor = 0.5,
+            min_lr = 1e-6,
+            threshold = 1e-4,
+            cooldown = 0,
+            verbose = false
+        } = options;
+        this.patience = patience;
+        this.factor = factor;
+        this.min_lr = min_lr;
+        this.threshold = threshold;
+        this.cooldown = cooldown;
+        this.verbose = verbose;
+        // State tracking
+        this.bestLoss = Infinity;
+        this.wait = 0;
+        this.cooldown_counter = 0;
+        this.num_reductions = 0;
+    }
+    step(loss) {
+        // Handle cooldown
+        if (this.cooldown_counter > 0) {
+            this.cooldown_counter--;
+            return;
+        }
+        // Check if this is significant improvement (relative threshold)
+        const improvement_needed = this.bestLoss * (1 - this.threshold);
+        const is_better = loss < improvement_needed;
+        if (is_better) {
+            // Significant improvement - reset
+            this.bestLoss = loss;
+            this.wait = 0;
+        } else {
+            // No significant improvement
+            this.wait += 1;
+        }
+        // Check if we've waited long enough
+        if (this.wait >= this.patience) {
+            this._reduce_lr();
+            this.cooldown_counter = this.cooldown;
+            this.wait = 0;
+        }
+    }
+    _reduce_lr() {
+        const old_lr = this.optimizer.lr;
+        const new_lr = Math.max(old_lr * this.factor, this.min_lr);
+        if (new_lr < old_lr) {
+            this.optimizer.lr = new_lr;
+            this.num_reductions++;
+            if (this.verbose) {
+                console.log(`ReduceLROnPlateau: reducing LR from ${old_lr} to ${new_lr}`);
+            }
+        }
+    }
+    get_last_lr() {
+        return this.optimizer.lr;
+    }
+    reset() {
+        this.bestLoss = Infinity;
+        this.wait = 0;
+        this.cooldown_counter = 0;
+        this.num_reductions = 0;
+    }
+}
 // ---------------------- ELU Activation ----------------------
 export class ELU {
     constructor(alpha=1.0) {
@@ -708,7 +990,6 @@ export class SiLU {
     }
 }
-export class SGD{ constructor(params,lr=0.01){ this.params=params; this.lr=lr; } step(){ this.params.forEach(p=>{ for(let i=0;i<p.param.length;i++) for(let j=0;j<(p.param[0].length||1);j++) p.param[i][j]-=this.lr*p.grad[i][j]; }); } }
 // ---------------------- BatchNorm2D ----------------------
 export class BatchNorm2d {

package/tests/scheduler.js DELETED Viewed

@@ -1,23 +0,0 @@
-// Example: Test learning rate schedulers (StepLR and LambdaLR) with mini-jstorch optimizers
-import { SGD, StepLR, LambdaLR, Tensor } from "../src/jstorch.js";
-const param = { param: [[1, 2], [3, 4]], grad: [[0, 0], [0, 0]] };
-const optimizer = new SGD([param], 0.1);
-// --- Test StepLR ---
-console.log("Testing StepLR...");
-const stepScheduler = new StepLR(optimizer, 3, 0.5);
-for (let epoch = 1; epoch <= 10; epoch++) {
-    stepScheduler.step();
-    console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
-}
-// --- Test LambdaLR ---
-console.log("\nTesting LambdaLR...");
-optimizer.lr = 0.1; // Reset LR
-const lambdaScheduler = new LambdaLR(optimizer, epoch => 1.0 / (1 + epoch));
-for (let epoch = 1; epoch <= 5; epoch++) {
-    lambdaScheduler.step();
-    console.log(`Epoch ${epoch}: LR = ${optimizer.lr.toFixed(4)}`);
-}

/package/{tests → demo}/MakeModel.js RENAMED Viewed

File without changes