npm - mini-jstorch - Versions diffs - 2.0.0 → 2.0.2 - Mend

mini-jstorch 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/src/jstorch.js CHANGED Viewed

@@ -81,62 +81,26 @@ export function crossEntropy(pred,target){
 // ---------------------- USERS FRIENDLY UTILS (USE THIS FOR YOUR UTILS!) ----------------
 export function fu_tensor(data, requiresGrad = false) {
-    if (!Array.isArray(data) || !Array.isArray(data[0])) {
-        throw new Error("fu_tensor: Data must be 2D array");
-    }
-    const tensor = new Tensor(data);
-    tensor.requiresGrad = requiresGrad;
-    return tensor;
+    if (!Array.isArray(data) || !Array.isArray(data[0])){
+		throw new Error("fu_tensor: Data must be 2D array");
+	}
+	return new Tensor(data, requiresGrad);
 }
-// fu_add
-export function fu_add(a, b) {
-    if (!(a instanceof Tensor) && !(b instanceof Tensor)) {
-        throw new Error("fu_add: At least one operand must be Tensor");
-    }
-    if (!(a instanceof Tensor)) {
-        a = fu_tensor(Array(b.shape()[0]).fill().map(() =>
-            Array(b.shape()[1]).fill(a)
-        ));
-    }
-    if (!(b instanceof Tensor)) {
-        b = fu_tensor(Array(a.shape()[0]).fill().map(() =>
-            Array(a.shape()[1]).fill(b)
-        ));
-    }
-    if (a.shape()[0] !== b.shape()[0] || a.shape()[1] !== b.shape()[1]) {
-        throw new Error(`fu_add: Shape mismatch ${a.shape()} vs ${b.shape()}`);
-    }
-    return new Tensor(a.data.map((r, i) => r.map((v, j) => v + b.data[i][j])));
+// fu_add
+export function fu_add(a, b){
+	if (!(a instanceof Tensor)) a = fu_tensor(a);
+	if (!(b instanceof Tensor)) b = fu_tensor(b);
+	return a.add(b);
 }
 // fu_mul
-export function fu_mul(a, b) {
-    if (!(a instanceof Tensor) && !(b instanceof Tensor)) {
-        throw new Error("fu_mul: At least one operand must be Tensor");
-    }
-    if (!(a instanceof Tensor)) {
-        a = fu_tensor(Array(b.shape()[0]).fill().map(() =>
-            Array(b.shape()[1]).fill(a)
-        ));
-    }
-    if (!(b instanceof Tensor)) {
-        b = fu_tensor(Array(a.shape()[0]).fill().map(() =>
-            Array(a.shape()[1]).fill(b)
-        ));
-    }
-    if (a.shape()[0] !== b.shape()[0] || a.shape()[1] !== b.shape()[1]) {
-        throw new Error(`fu_mul: Shape mismatch ${a.shape()} vs ${b.shape()}`);
-    }
-    return new Tensor(a.data.map((r, i) => r.map((v, j) => v * b.data[i][j])));
+export function fu_mul(a, b){
+	if (!(a instanceof Tensor)) a = fu_tensor(a);
+	if (!(b instanceof Tensor)) b = fu_tensor(b);
+	return a.mul(b);
 }
 // fu_matmul
@@ -144,11 +108,7 @@ export function fu_matmul(a, b) {
     if (!(a instanceof Tensor)) a = fu_tensor(a);
     if (!(b instanceof Tensor)) b = fu_tensor(b);
-    if (a.shape()[1] !== b.shape()[0]) {
-        throw new Error(`fu_matmul: Inner dimension mismatch ${a.shape()[1]} vs ${b.shape()[0]}`);
-    }
-    return new Tensor(dot(a.data, b.data));
+    return a.matmul(b);
 }
 // fu_sum
@@ -344,276 +304,144 @@ export class Tensor {
 // ---------------------- Layers ----------------------
 export class Linear {
-    constructor(inputDim, outputDim){
-        this.W = randomMatrix(inputDim, outputDim);
-        this.b = Array(outputDim).fill(0);
-        this.gradW = zeros(inputDim, outputDim);
-        this.gradb = Array(outputDim).fill(0);
-        this.x = null;
-        this.originalShape = null;
-        this._WFlat = null;
-        this._bFlat = null;
+    constructor(inFeatures, outFeatures) {
+        this.inFeatures = inFeatures;
+        this.outFeatures = outFeatures;
+        // Weights: [inFeatures, outFeatures]
+        this.W = randomMatrix(inFeatures, outFeatures);
+        this.gradW = zeros(inFeatures, outFeatures);
+        // Bias: [1, outFeatures]
+        this.b = [Array(outFeatures).fill(0)];
+        this.gradB = [Array(outFeatures).fill(0)];
+        this.x = null; // cache input
     }
-	_updateCache() {
-		const rows = this.W.length;
-		const cols = this.W[0].length;
-		this._WFlat = new Float32Array(rows * cols);
-		for (let i = 0; i < rows; i++){
-			const offset = i * cols;
-			const row = this.W[i];
-			for (let j = 0; j < cols; j++){
-				this._WFlat[offset + j] = row[j];
-			}
-		}
-		this._bFlat = new Float32Array(this.b);
-	}
-    forward(x){
-        this.originalShape = this._getShapeType(x);
-        if (this.originalShape === '3d') {
-            this.x = x.map(sample => sample[0]);
-        } else {
-            this.x = x;
-        }
-		this._updateCache();
-		const m = this.x.length;
-		const k = this.x[0].length;
-		const n = this.W[0].length;
-        if (!this._WFlat) {
-            const rows = this.W.length;
-            const cols = this.W[0].length;
-            this._WFlat = new Float32Array(rows * cols);
-            for (let i = 0; i < rows; i++) {
-                const offset = i * cols;
-                const row = this.W[i];
-                for (let j = 0; j < cols; j++) {
-                    this._WFlat[offset + j] = row[j];
-                }
+    forward(x) {
+        // x: [batch, inFeatures]
+        this.x = x;
+        const out = dot(x, this.W); // [batch, outFeatures]
+        // add bias
+        for (let i = 0; i < out.length; i++) {
+            for (let j = 0; j < this.outFeatures; j++) {
+                out[i][j] += this.b[0][j];
             }
-            this._bFlat = new Float32Array(this.b);
         }
+        return out;
+    }
+    backward(grad) {
+        // grad: [batch, outFeatures]
+        const batchSize = grad.length;
-        // Flatten input x to Float32Array
-        const xFlat = new Float32Array(m * k);
-        for (let i = 0; i < m; i++) {
-            const row = this.x[i];
-            const offset = i * k;
-            for (let j = 0; j < k; j++) {
-                xFlat[offset + j] = row[j];
+        // mutate in place to preserve optimizer Reference
+        const xT = transpose(this.x);
+        const computedGradW = dot(xT, grad);
+        for (let i=0; i<this.inFeatures; i++){
+            for (let j=0; j<this.outFeatures; j++){
+                this.gradW[i][j] = computedGradW[i][j]
             }
         }
-        const outFlat = new Float32Array(m * n);
-        for (let i = 0; i < m; i++) {
-            const xOffset = i * k;
-            for (let j = 0; j < n; j++) {
-                let sum = 0;
-                for (let l = 0; l < k; l++) {
-                    sum += xFlat[xOffset + l] * this._WFlat[l * n + j];
-                }
-                outFlat[i * n + j] = sum + this._bFlat[j];
+        // gradB = sum over batch
+        for (let j=0; j<this.outFeatures; j++){
+            let sum = 0;
+            for (let i=0; i <batchSize; i++){
+                sum+=grad[i][j]
             }
+            this.gradB[0][j] = sum;
         }
-        const out = Array(m);
-        for (let i = 0; i < m; i++) {
-            const row = Array(n);
-            const offset = i * n;
-            for (let j = 0; j < n; j++) {
-                row[j] = outFlat[offset + j];
-            }
-            out[i] = row;
-        }
+        const WT = transpose(this.W);
+        const gradInput = dot(grad, WT);
-        return out;
+        return gradInput;
     }
-    backward(grad){
-		const m = this.x.length;
-		const k = this.W.length;      // input dim
-		const n = this.W[0].length;   // output dim
-		// Convert grad to Float32Array
-		const gradFlat = new Float32Array(m * n);
-		for (let i = 0; i < m; i++) {
-			const row = grad[i];
-			const offset = i * n;
-			for (let j = 0; j < n; j++) {
-				gradFlat[offset + j] = row[j];
-			}
-		}
-		// Convert x to Float32Array
-		const xFlat = new Float32Array(m * k);
-		for (let i = 0; i < m; i++) {
-			const row = this.x[i];
-			const offset = i * k;
-			for (let j = 0; j < k; j++) {
-				xFlat[offset + j] = row[j];
-			}
-		}
-		// Reset gradW
-		for (let i = 0; i < this.gradW.length; i++) {
-			for (let j = 0; j < this.gradW[0].length; j++) {
-				this.gradW[i][j] = 0;
-			}
-		}
-		// Compute gradW = x^T * grad
-		for (let i = 0; i < k; i++) {
-			for (let j = 0; j < n; j++) {
-				let sum = 0;
-				for (let batch = 0; batch < m; batch++) {
-					sum += xFlat[batch * k + i] * gradFlat[batch * n + j];
-				}
-				this.gradW[i][j] = sum;
-			}
-		}
-		// Compute gradb
-		for (let j = 0; j < n; j++) {
-			let sum = 0;
-			for (let batch = 0; batch < m; batch++) {
-				sum += gradFlat[batch * n + j];
-			}
-			this.gradb[j] = sum;
-		}
-		const gradInputFlat = new Float32Array(m * k);
-		for (let i = 0; i < m; i++) {
-			for (let j = 0; j < k; j++) {
-				let sum = 0;
-				for (let l = 0; l < n; l++) {
-					sum += gradFlat[i * n + l] * this.W[j][l];
-				}
-				gradInputFlat[i * k + j] = sum;
-			}
-		}
-		// Convert back to 2D array
-		const gradInput = Array(m);
-		for (let i = 0; i < m; i++) {
-			const row = Array(k);
-			const offset = i * k;
-			for (let j = 0; j < k; j++) {
-				row[j] = gradInputFlat[offset + j];
-			}
-			gradInput[i] = row;
-		}
-		if (this.originalShape === '3d') {
-			return gradInput.map(row => [row]);
-		}
-		return gradInput;
-	}
+    step(lr) {
+        for (let i = 0; i < this.inFeatures; i++) {
+            for (let j = 0; j < this.outFeatures; j++) {
+                this.W[i][j] -= lr * this.gradW[i][j];
+            }
+        }
-    _getShapeType(x) {
-        if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && !Array.isArray(x[0][0][0])) {
-            return '3d';
-        } else if (Array.isArray(x[0]) && !Array.isArray(x[0][0])) {
-            return '2d';
-        } else {
-            throw new Error(`Unsupported input shape for Linear layer`);
+        for (let j = 0; j < this.outFeatures; j++) {
+            this.b[0][j] -= lr * this.gradB[0][j];
         }
     }
-    parameters(){
-        return [
-            {param: this.W, grad: this.gradW},
-            {param: [this.b], grad: [this.gradb]}
-        ];
+    parameters() {
+        return [
+            { param: this.W, grad: this.gradW },
+            { param: this.b, grad: this.gradB }
+        ];
     }
 }
 export class Flatten {
-    constructor() {
-        this.originalShape = null;
-    }
-    forward(x) {
-        // Always convert to [batch, features] format
-        this.originalShape = x.map(sample => this._getShape(sample));
-        return x.map(sample => {
-            const flat = this._flatten(sample);
-            return flat; // Return as 1D array for [batch, features] compatibility
-        });
-    }
-    backward(grad) {
-        // grad is [batch, features], reshape back to original shape
-        return grad.map((flatGrad, batchIdx) => {
-            const shape = this.originalShape[batchIdx];
-            return this._unflatten(flatGrad, shape);
-        });
-    }
-    _getShape(sample) {
-        if (Array.isArray(sample[0]) && Array.isArray(sample[0][0])) {
-            return {
-                type: '3d',
-                dims: [sample.length, sample[0].length, sample[0][0].length]
-            };
-        } else if (Array.isArray(sample[0])) {
-            return {
-                type: '2d',
-                dims: [sample.length, sample[0].length]
-            };
-        } else {
-            return {
-                type: '1d',
-                dims: [sample.length]
-            };
-        }
-    }
-    _flatten(sample) {
-        if (Array.isArray(sample[0]) && Array.isArray(sample[0][0])) {
-            return sample.flat(2); // [channels, height, width] -> flat
-        } else if (Array.isArray(sample[0])) {
-            return sample.flat(); // [height, width] -> flat
-        } else {
-            return sample; // already flat
-        }
-    }
-    _unflatten(flat, shape) {
-        if (shape.type === '3d') {
-            const [channels, height, width] = shape.dims;
-            const result = [];
-            let index = 0;
-            for (let c = 0; c < channels; c++) {
-                const channel = [];
-                for (let h = 0; h < height; h++) {
-                    const row = [];
-                    for (let w = 0; w < width; w++) {
-                        row.push(flat[index++]);
-                    }
-                    channel.push(row);
-                }
-                result.push(channel);
-            }
-            return result;
-        } else if (shape.type === '2d') {
-            const [height, width] = shape.dims;
-            const result = [];
-            for (let h = 0; h < height; h++) {
-                result.push(flat.slice(h * width, h * width + width));
-            }
-            return result;
-        } else {
-            return flat; // 1d
-        }
-    }
-    parameters() { return []; }
+	constructor(){
+		this.originalShape = null;
+	}
+	forward(x){
+		// Save full shape per sample
+		this.originalShape = x.map(sample => this._getDims(sample));
+		return x.map(sample => this._flattenDeep(sample));
+	}
+	backward(grad){
+		return grad.map((flat, i) =>
+			this._reshape(flat, this.originalShape[i])
+		);
+	}
+	// Get dimensions recursively
+	_getDims(arr){
+		const dims = [];
+		let current = arr;
+		while (Array.isArray(current)){
+			dims.push(current.length);
+			current = current[0];
+		}
+		return dims;
+	}
+	// Flatten ANY depth
+	_flattenDeep(arr){
+		return arr.flat(Infinity);
+	}
+	// Reshape back using saved dims
+	_reshape(flat, dims){
+		let index = 0;
+		function build(dimIdx){
+			const size = dims[dimIdx];
+			const result = [];
+			if (dimIdx === dims.length - 1){
+				for (let i=0; i<size; i++){
+					result.push(flat[index++]);
+				}
+			} else {
+				for (let i=0; i<size; i++){
+					result.push(build(dimIdx + 1));
+				}
+			}
+			return result;
+		}
+		return build(0);
+	}
+	parameters() { return []; }
 }
 // ---------------------- Conv2D (BETA) ----------------------
@@ -632,24 +460,25 @@ export class Conv2D {
         );
         this.x = null;
-        // Cache Float32Array untuk kernels
         this._WFlat = null;
-        this._cacheKernels();
+        this._updateCache();
     }
-    _cacheKernels() {
-        this._WFlat = this.W.map(oc =>
+    _updateCache(){
+        this._WFlat = this.W.map(oc =>
             oc.map(ic => {
                 const rows = ic.length;
                 const cols = ic[0].length;
                 const flat = new Float32Array(rows * cols);
-                for (let i = 0; i < rows; i++) {
+                for (let i=0; i<rows; i++){
                     const offset = i * cols;
                     const row = ic[i];
-                    for (let j = 0; j < cols; j++) {
-                        flat[offset + j] = row[j];
+                    for (let j=0; j<cols; j++){
+                        flat[offset+j] = row[j];
                     }
                 }
                 return flat;
             })
         );
@@ -697,6 +526,8 @@ export class Conv2D {
     }
     forward(batch) {
+        this._updateCache();
         this.x = batch;
         const kH = this.kernel;
         const kW = this.kernel;
@@ -727,43 +558,52 @@ export class Conv2D {
     backward(grad) {
         const batchSize = this.x.length;
-        const gradW = this.gradW.map(oc => oc.map(ic => zeros(this.kernel, this.kernel)));
-        const gradInput = this.x.map(sample =>
+        const gradInput = this.x.map(sample =>
             sample.map(chan => zeros(chan.length, chan[0].length))
         );
-        for (let b = 0; b < batchSize; b++) {
-            for (let oc = 0; oc < this.outC; oc++) {
-                for (let ic = 0; ic < this.inC; ic++) {
+        // Zero existing gradW in place
+        for (let oc=0; oc<this.outC; oc++){
+            for (let ic=0; ic<this.inC; ic++){
+                for (let i=0; i<this.kernel; i++){
+                    for (let j=0; j<this.kernel; j++){
+                        this.gradW[oc][ic][i][j] = 0;
+                    }
+                }
+            }
+        }
+        for (let b=0; b<batchSize; b++){
+            for (let oc=0; oc<this.outC; oc++){
+                for (let ic=0; ic<this.inC; ic++){
                     const outGrad = grad[b][oc];
-                    // Compute gradW
-                    for (let i = 0; i < this.kernel; i++) {
-                        for (let j = 0; j < this.kernel; j++) {
+                    // Accumulate gradW in place
+                    for (let i=0; i<this.kernel; i++){
+                        for (let j=0; j<this.kernel; j++){
                             let sum = 0;
-                            for (let y = 0; y < outGrad.length; y++) {
-                                for (let x = 0; x < outGrad[0].length; x++) {
+                            for (let y=0; y<outGrad.length; y++){
+                                for (let x=0; x<outGrad[0].length; x++){
                                     const inY = y * this.stride + i;
                                     const inX = x * this.stride + j;
-                                    if (inY < this.x[b][ic].length && inX < this.x[b][ic][0].length) {
-                                        sum += this.x[b][ic][inY][inX] * outGrad[y][x];
+                                    if (inY<this.x[b][ic].length && inX < this.x[b][ic][0].length){
+                                        sum+=this.x[b][ic][inY][inX] * outGrad[y][x];
                                     }
                                 }
                             }
-                            gradW[oc][ic][i][j] += sum;
+                            this.gradW[oc][ic][i][j] += sum;
                         }
                     }
-                    // Compute gradInput
-                    for (let y = 0; y < outGrad.length; y++) {
-                        for (let x = 0; x < outGrad[0].length; x++) {
-                            for (let ki = 0; ki < this.kernel; ki++) {
-                                for (let kj = 0; kj < this.kernel; kj++) {
+                    // Compute gradInput
+                    for (let y=0; y<outGrad.length; y++){
+                        for (let x=0; x<outGrad[0].length; x++){
+                            for (let ki=0; ki<this.kernel; ki++){
+                                for (let kj=0; kj<this.kernel; kj++){
                                     const inY = y * this.stride + ki;
                                     const inX = x * this.stride + kj;
-                                    if (inY < gradInput[b][ic].length && inX < gradInput[b][ic][0].length) {
-                                        gradInput[b][ic][inY][inX] +=
-                                            this.W[oc][ic][ki][kj] * outGrad[y][x];
+                                    if (inY<gradInput[b][ic].length && inX < gradInput[b][ic][0].length){
+                                        gradInput[b][ic][inY][inX] += this.W[oc][ic][ki][kj] * outGrad[y][x];
                                     }
                                 }
                             }
@@ -772,8 +612,7 @@ export class Conv2D {
                 }
             }
         }
-        this.gradW = gradW;
         return gradInput;
     }
@@ -781,7 +620,7 @@ export class Conv2D {
         return this.W.flatMap((w, oc) =>
             w.map((wc, ic) => ({
                 param: wc,
-                grad: this.gradW[oc][ic]
+                grad: this.gradW[oc][ic]  // Reference stays valid — gradW is mutated in-place now
             }))
         );
     }
@@ -872,6 +711,14 @@ export class Sequential {
 		return state;
 	}
+	step(lr){
+		this.layers.forEach(layer => {
+			if(typeof layer.step === "function"){
+				layer.step(lr);
+			}
+		})
+	}
 	/**
 	* Load state dict
 	*/
@@ -895,43 +742,35 @@ export class Sequential {
 }
 // ---------------------- Activations ----------------------
-export class ReLU{
-    constructor(){ this.mask = null; this.originalShape = null; }
-    forward(x){
-        this.originalShape = this._getShapeType(x);
-        if (this.originalShape === '3d') {
-            // Handle [batch, 1, features]
-            this.mask = x.map(sample => sample[0].map(v => v > 0));
-            return x.map(sample => [sample[0].map(v => Math.max(0, v))]);
-        } else {
-            // Handle [batch, features]
-            this.mask = x.map(row => row.map(v => v > 0));
-            return x.map(row => row.map(v => Math.max(0, v)));
-        }
-    }
-    backward(grad){
-        if (this.originalShape === '3d') {
-            return grad.map((sample, i) =>
-                [sample[0].map((v, j) => this.mask[i][j] ? v : 0)]
-            );
-        } else {
-            return grad.map((row, i) =>
-                row.map((v, j) => this.mask[i][j] ? v : 0)
-            );
+export class ReLU {
+    constructor() {
+        this.mask = null;
+    }
+    forward(x) {
+        this.mask = this._mapRecursive(x, v => v > 0);
+        return this._mapRecursive(x, v => Math.max(0, v));
+    }
+    backward(grad) {
+        return this._mapRecursiveWithMask(grad, this.mask, (g, m) => m ? g : 0);
+    }
+    // ===== helper =====
+    _mapRecursive(arr, fn) {
+        if (Array.isArray(arr)) {
+            return arr.map(v => this._mapRecursive(v, fn));
         }
+        return fn(arr);
     }
-    _getShapeType(x) {
-        if (Array.isArray(x[0]) && Array.isArray(x[0][0]) && !Array.isArray(x[0][0][0])) {
-            return '3d';
-        } else if (Array.isArray(x[0]) && !Array.isArray(x[0][0])) {
-            return '2d';
-        } else {
-            throw new Error(`Unsupported input shape for ReLU`);
+    _mapRecursiveWithMask(arr, mask, fn) {
+        if (Array.isArray(arr)) {
+            return arr.map((v, i) =>
+                this._mapRecursiveWithMask(v, mask[i], fn)
+            );
         }
+        return fn(arr, mask);
     }
 }
@@ -1156,7 +995,10 @@ export class Tokenizer {
             .replace(/\s+/g, ' ')
             .trim()
             .split(' ')
-            .filter(word => word.length > 0 && !/^[.!?;:,]+$/.test(word) || word.length > 1);
+            .filter(word =>
+				word.length > 0 &&
+				(!/^[.!?;:,]+$/.test(word) || word.length > 1)
+			);
     }
     /**
@@ -1812,23 +1654,25 @@ export class ReduceLROnPlateau {
 // ---------------------- ELU Activation ----------------------
 export class ELU {
-    constructor(alpha=1.0) {
+    constructor(alpha=1.0){
         this.alpha = alpha;
-        this.out = null;
+        this.x = null; // Cache input for correct derivative
     }
-    forward(x) {
-        this.out = x.map(row =>
-            row.map(v => v > 0 ? v : this.alpha * (Math.exp(v) - 1))
+    forward(x){
+        this.x=x; // Store original input for backward
+        return x.map(row =>
+            row.map(v=>v>0 ? v : this.alpha*(Math.exp(v) - 1))
         );
-        return this.out;
     }
-    backward(grad) {
-        return grad.map((row, i) =>
-            row.map((v, j) =>
-                v * (this.out[i][j] > 0 ? 1 : this.alpha * Math.exp(this.out[i][j]))
-            )
+    backward(grad){
+        return grad.map((row, i) =>
+            row.map((v, j) => {
+                const xVal = this.x[i][j];
+                // d/dx ELU: 1 if x > 0, else alpha * exp(x)
+                return v * (xVal > 0 ? 1 : this.alpha * Math.exp(xVal));
+            })
         );
     }
 }
@@ -1910,10 +1754,10 @@ export class BatchNorm2d {
         // Parameters
         if (affine) {
-            this.weight = Array(numFeatures).fill(1);
-            this.bias = Array(numFeatures).fill(0);
-            this.gradWeight = Array(numFeatures).fill(0);
-            this.gradBias = Array(numFeatures).fill(0);
+            this.weight = [Array(numFeatures).fill(1)];
+			this.bias = [Array(numFeatures).fill(0)];
+			this.gradWeight = [Array(numFeatures).fill(0)];
+			this.gradBias = [Array(numFeatures).fill(0)];
         }
         // Running statistics
@@ -1993,7 +1837,7 @@ export class BatchNorm2d {
                             // Apply affine transformation if enabled
                             if (this.affine) {
-                                channelOut[i][j] = channelOut[i][j] * this.weight[c] + this.bias[c];
+								channelOut[i][j] = channelOut[i][j] * this.weight[0][c] + this.bias[0][c];
                             }
                         }
                     }
@@ -2022,7 +1866,7 @@ export class BatchNorm2d {
                             // Apply affine transformation if enabled
                             if (this.affine) {
-                                channelOut[i][j] = channelOut[i][j] * this.weight[c] + this.bias[c];
+                                channelOut[i][j] = channelOut[i][j] * this.weight[0][c] + this.bias[0][c];
                             }
                         }
                     }
@@ -2052,8 +1896,10 @@ export class BatchNorm2d {
         );
         if (this.affine) {
-            this.gradWeight.fill(0);
-            this.gradBias.fill(0);
+            for (let c=0; c<channels; c++){
+                this.gradWeight[0][c] = 0;
+                this.gradBias[0][c] = 0;
+            }
         }
         for (let c = 0; c < channels; c++) {
@@ -2083,7 +1929,7 @@ export class BatchNorm2d {
                         let grad = channelGrad[i][j];
                         if (this.affine) {
-                            grad *= this.weight[c];
+                            grad *= this.weight[0][c];
                         }
                         grad *= stdInv;
@@ -2093,8 +1939,8 @@ export class BatchNorm2d {
             }
             if (this.affine) {
-                this.gradWeight[c] = sumGradWeight / batchSize;
-                this.gradBias[c] = sumGradBias / batchSize;
+                this.gradWeight[0][c] = sumGradWeight / batchSize;
+                this.gradBias[0][c] = sumGradBias / batchSize;
             }
         }
@@ -2104,9 +1950,9 @@ export class BatchNorm2d {
     parameters() {
         if (!this.affine) return [];
         return [
-            { param: [this.weight], grad: [this.gradWeight] },
-            { param: [this.bias], grad: [this.gradBias] }
-        ];
+			{ param: this.weight, grad: this.gradWeight },
+			{ param: this.bias, grad: this.gradBias }
+		]
     }
     train() { this.training = true; }
@@ -2115,20 +1961,143 @@ export class BatchNorm2d {
 // ---------------------- Model Save/Load (BETA) ----------------------
 export function saveModel(model){
-    if(!(model instanceof Sequential)) throw new Error("saveModel supports only Sequential");
-    const weights=model.layers.map(layer=>({weights:layer.W||null,biases:layer.b||null}));
-    return JSON.stringify(weights);
-	/* Didn't expect this to work */
+    if(!(model instanceof Sequential)){
+        throw new Error("saveModel supports only Sequential models");
+    }
+    const state = {
+        version: "2.0.0",
+        layers: model.layers.map((layer, idx) => {
+            const params = layer.parameters ? layer.parameters() : [];
+            if (params.length === 0){
+                return { type: layer.constructor.name, params: [] };
+            }
+            return {
+                type: layer.constructor.name,
+                params: params.map(p => ({
+                    // Deep clone parameter data
+                    data: p.param.map(row =>
+                        Array.isArray(row) ? [...row] : row
+                    ),
+                    // Preserve shape metadata for validation
+                    shape: Array.isArray(p.param[0])
+                        ? [p.param.length, p.param[0].length]
+                        : [p.param.length]
+                }))
+            };
+        })
+    };
+    return JSON.stringify(state);
 }
-export function loadModel(model,json){
-    if(!(model instanceof Sequential)) throw new Error("loadModel supports only Sequential");
-    const weights=JSON.parse(json);
-    model.layers.forEach((layer,i)=>{
-        if(layer.W && weights[i].weights) layer.W=weights[i].weights;
-        if(layer.b && weights[i].biases) layer.b=weights[i].biases;
-    });
-	/* Didn't expect this to work */
+export function loadModel(model, json){
+    if (!(model instanceof Sequential)){
+        throw new Error("loadModel supports only Sequential models");
+    }
+    const state = JSON.parse(json);
+    // Validate structure
+    if (!state.layers || !Array.isArray(state.layers)){
+        throw new Error("loadModel: invalid save format - missing 'layers' array");
+    }
+    if (state.layers.length !== model.layers.length){
+        console.warn(
+            `[JST WARN]: Layer count mismatch - saved ${state.layers.length},` +
+            `current model has ${model.layers.length}. Loading what matches.`
+        );
+    }
+    let loadedCount = 0;
+    let skippedCount = 0;
+    for(let i=0; i<Math.min(state.layers.length, model.layers.length); i++){
+        const savedLayer = state.layers[i];
+        const currentLayer = model.layers[i];
+        if(savedLayer.params.length === 0){
+            // Layer with no trainable params - skip
+            continue
+        }
+        // Validate layer type
+        if (savedLayer.type !== currentLayer.constructor.name){
+            console.warn(
+                `[JST WARN]: Layer ${i} type mismatch - ` +
+                `saved: ${savedLayer.type}, current: ${currentLayer.constructor.name}. Skipping.`
+            );
+            skippedCount++;
+            continue;
+        }
+        // Get current layer parameters
+        const currentParams = currentLayer.parameters ? currentLayer.parameters() : [];
+        if (currentParams.length !== savedLayer.params.length){
+            console.warn(
+                `[JST WARN]: Layer ${i} parameter count mismatch - ` +
+                `saved: ${savedLayer.params.length}, current: ${currentParams.length}. Skipping.`
+            );
+            skippedCount++;
+            continue;
+        }
+        // Load parameters wiht shape validation
+        for (let j=0; j<savedLayer.params.length; j++){
+            const savedParam = savedLayer.params[j];
+            const currentParam = currentParams[j].param;
+            // Validate shape
+            const currentRows = currentParam.length;
+            const currentCols = Array.isArray(currentParam[0])
+                ? currentParam[0].length
+                : 1;
+            const savedRows = savedParam.shape[0];
+            const savedCols = savedParam.shape[1] || 1;
+            if (currentRows !== savedRows || currentCols !== savedCols){
+                console.warn(
+                    `[JST WARN]: Layer ${i} param ${j} shape mismatch - ` +
+                    `saved: [${savedRows}, ${savedCols}],` +
+                    `current: [${currentRows}, ${currentCols}]. Skipping this parameter.`
+                );
+                continue
+            }
+            // Copy parameter data
+            if (Array.isArray(currentParam[0])){
+                // 2D Parameter
+                for (let r=0; r<currentRows; r++){
+                    for (let c=0; c<currentCols; c++){
+                        currentParam[r][c] = savedParam.data[r][c];
+                    }
+                }
+            } else {
+                // 1D parameter
+                for (let r=0; r<currentRows; r++){
+                    currentParam[r] = savedParam.data[r];
+                }
+            }
+        }
+        // Invalidate any cached flat representations
+        if (typeof currentLayer._updateCache === 'function'){
+            currentLayer._updateCache();
+        }
+        loadedCount++;
+    }
+    console.log(
+        `[JST]: Model loaded: ${loadedCount} layers restored, ${skippedCount} skipped.`
+    );
+    return model;
 }
 // ---------------------- Advanced Utils ----------------------