mini-jstorch 1.4.5 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -46
- package/demo/MakeModel.js +36 -0
- package/demo/fu_fun.js +72 -0
- package/demo/scheduler.js +69 -0
- package/index.js +1 -1
- package/package.json +1 -1
- package/src/jstorch.js +1237 -0
- package/src/MainEngine.js +0 -663
- package/tests/MakeModel.js +0 -38
- package/tests/scheduler.js +0 -23
package/src/MainEngine.js
DELETED
|
@@ -1,663 +0,0 @@
|
|
|
1
|
-
/*!
|
|
2
|
-
* Project: mini-jstorch
|
|
3
|
-
* File: MainEngine.js
|
|
4
|
-
* Author: M. Rizal H. (Actual Author Name)
|
|
5
|
-
* License: MIT
|
|
6
|
-
* Copyright (C) 2025 M. Rizal H.
|
|
7
|
-
*
|
|
8
|
-
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
9
|
-
* of this software and associated documentation files (the "Software"), to deal
|
|
10
|
-
* in the Software without restriction, including without limitation the rights
|
|
11
|
-
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
12
|
-
* copies of the Software, and to permit persons to whom the Software is
|
|
13
|
-
* furnished to do so, subject to the following conditions:
|
|
14
|
-
*
|
|
15
|
-
* The above copyright notice and this permission notice shall be included in all
|
|
16
|
-
* copies or substantial portions of the Software.
|
|
17
|
-
*
|
|
18
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
19
|
-
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
20
|
-
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
21
|
-
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
22
|
-
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
23
|
-
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
24
|
-
* SOFTWARE.
|
|
25
|
-
*/
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
// ---------------------- Utilities ----------------------
|
|
29
|
-
export function zeros(rows, cols) {
|
|
30
|
-
return Array.from({length:rows},()=>Array(cols).fill(0));
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export function ones(rows, cols) {
|
|
34
|
-
return Array.from({length:rows},()=>Array(cols).fill(1));
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export function randomMatrix(rows, cols, scale=0.1){
|
|
38
|
-
return Array.from({length:rows},()=>
|
|
39
|
-
Array.from({length:cols},()=> (Math.random()*2-1)*scale)
|
|
40
|
-
);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
export function transpose(matrix){
|
|
44
|
-
return matrix[0].map((_,i)=>matrix.map(row=>row[i]));
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
export function addMatrices(a,b){
|
|
48
|
-
return a.map((row,i)=>
|
|
49
|
-
row.map((v,j)=>v+(b[i] && b[i][j]!==undefined?b[i][j]:0))
|
|
50
|
-
);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
export function dot(a,b){
|
|
54
|
-
const res=zeros(a.length,b[0].length);
|
|
55
|
-
for(let i=0;i<a.length;i++)
|
|
56
|
-
for(let j=0;j<b[0].length;j++)
|
|
57
|
-
for(let k=0;k<a[0].length;k++)
|
|
58
|
-
res[i][j]+=a[i][k]*b[k][j];
|
|
59
|
-
return res;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export function softmax(x){
|
|
63
|
-
const m=Math.max(...x);
|
|
64
|
-
const exps=x.map(v=>Math.exp(v-m));
|
|
65
|
-
const s=exps.reduce((a,b)=>a+b,0);
|
|
66
|
-
return exps.map(v=>v/s);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export function crossEntropy(pred,target){
|
|
70
|
-
const eps=1e-12;
|
|
71
|
-
return -target.reduce((sum,t,i)=>sum+t*Math.log(pred[i]+eps),0);
|
|
72
|
-
}
|
|
73
|
-
/* Not Added more Utils yet. (this patch not MINOR)
|
|
74
|
-
Just make it Public to other files. */
|
|
75
|
-
|
|
76
|
-
// ---------------------- Tensor ----------------------
|
|
77
|
-
export class Tensor {
|
|
78
|
-
constructor(data){ this.data=data; this.grad=zeros(data.length,data[0].length); }
|
|
79
|
-
shape(){ return [this.data.length,this.data[0].length]; }
|
|
80
|
-
add(t){ return t instanceof Tensor?this.data.map((r,i)=>r.map((v,j)=>v+t.data[i][j])):this.data.map(r=>r.map(v=>v+t)); }
|
|
81
|
-
sub(t){ return t instanceof Tensor?this.data.map((r,i)=>r.map((v,j)=>v-t.data[i][j])):this.data.map(r=>r.map(v=>v-t)); }
|
|
82
|
-
mul(t){ return t instanceof Tensor?this.data.map((r,i)=>r.map((v,j)=>v*t.data[i][j])):this.data.map(r=>r.map(v=>v*t)); }
|
|
83
|
-
matmul(t){ if(t instanceof Tensor) return dot(this.data,t.data); else throw new Error("matmul requires Tensor"); }
|
|
84
|
-
transpose(){ return transpose(this.data); }
|
|
85
|
-
flatten(){ return this.data.flat(); }
|
|
86
|
-
static zeros(r,c){ return new Tensor(zeros(r,c)); }
|
|
87
|
-
static ones(r,c){ return new Tensor(ones(r,c)); }
|
|
88
|
-
static random(r,c,scale=0.1){ return new Tensor(randomMatrix(r,c,scale)); }
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// ---------------------- Layers ----------------------
|
|
92
|
-
export class Linear {
|
|
93
|
-
constructor(inputDim,outputDim){
|
|
94
|
-
this.W=randomMatrix(inputDim,outputDim);
|
|
95
|
-
this.b=Array(outputDim).fill(0);
|
|
96
|
-
this.gradW=zeros(inputDim,outputDim);
|
|
97
|
-
this.gradb=Array(outputDim).fill(0);
|
|
98
|
-
this.x=null;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
forward(x){
|
|
102
|
-
this.x=x;
|
|
103
|
-
const out=dot(x,this.W);
|
|
104
|
-
return out.map((row,i)=>row.map((v,j)=>v+this.b[j]));
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
backward(grad){
|
|
108
|
-
for(let i=0;i<this.W.length;i++) for(let j=0;j<this.W[0].length;j++)
|
|
109
|
-
this.gradW[i][j]=this.x.reduce((sum,row,k)=>sum+row[i]*grad[k][j],0);
|
|
110
|
-
for(let j=0;j<this.b.length;j++)
|
|
111
|
-
this.gradb[j]=grad.reduce((sum,row)=>sum+row[j],0);
|
|
112
|
-
|
|
113
|
-
const gradInput=zeros(this.x.length,this.W.length);
|
|
114
|
-
for(let i=0;i<this.x.length;i++)
|
|
115
|
-
for(let j=0;j<this.W.length;j++)
|
|
116
|
-
for(let k=0;k<this.W[0].length;k++)
|
|
117
|
-
gradInput[i][j]+=grad[i][k]*this.W[j][k];
|
|
118
|
-
return gradInput;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
parameters(){ return [ {param:this.W,grad:this.gradW}, {param:[this.b],grad:[this.gradb]} ]; }
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// ---------------------- Conv2D ----------------------
|
|
125
|
-
export class Conv2D {
|
|
126
|
-
constructor(inC,outC,kernel,stride=1,padding=0){
|
|
127
|
-
this.inC=inC; this.outC=outC; this.kernel=kernel;
|
|
128
|
-
this.stride=stride; this.padding=padding;
|
|
129
|
-
this.W=Array(outC).fill(0).map(()=>Array(inC).fill(0).map(()=>randomMatrix(kernel,kernel)));
|
|
130
|
-
this.gradW=Array(outC).fill(0).map(()=>Array(inC).fill(0).map(()=>zeros(kernel,kernel)));
|
|
131
|
-
this.x=null;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
pad2D(input,pad){
|
|
135
|
-
return input.map(channel=>{
|
|
136
|
-
const rows=channel.length+2*pad;
|
|
137
|
-
const cols=channel[0].length+2*pad;
|
|
138
|
-
const out=Array.from({length:rows},()=>Array(cols).fill(0));
|
|
139
|
-
for(let i=0;i<channel.length;i++) for(let j=0;j<channel[0].length;j++) out[i+pad][j+pad]=channel[i][j];
|
|
140
|
-
return out;
|
|
141
|
-
});
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
conv2DSingle(input,kernel){
|
|
145
|
-
const rows=input.length-kernel.length+1;
|
|
146
|
-
const cols=input[0].length-kernel[0].length+1;
|
|
147
|
-
const out=zeros(rows,cols);
|
|
148
|
-
for(let i=0;i<rows;i++) for(let j=0;j<cols;j++)
|
|
149
|
-
for(let ki=0;ki<kernel.length;ki++) for(let kj=0;kj<kernel[0].length;kj++)
|
|
150
|
-
out[i][j]+=input[i+ki][j+kj]*kernel[ki][kj];
|
|
151
|
-
return out;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
forward(batch){
|
|
155
|
-
this.x=batch;
|
|
156
|
-
return batch.map(sample=>{
|
|
157
|
-
const channelsOut=[];
|
|
158
|
-
for(let oc=0;oc<this.outC;oc++){
|
|
159
|
-
let outChan=zeros(sample[0].length,sample[0][0].length);
|
|
160
|
-
for(let ic=0;ic<this.inC;ic++){
|
|
161
|
-
let inputChan=sample[ic];
|
|
162
|
-
if(this.padding>0) inputChan=this.pad2D([inputChan],this.padding)[0];
|
|
163
|
-
const conv=this.conv2DSingle(inputChan,this.W[oc][ic]);
|
|
164
|
-
outChan=addMatrices(outChan,conv);
|
|
165
|
-
}
|
|
166
|
-
channelsOut.push(outChan);
|
|
167
|
-
}
|
|
168
|
-
return channelsOut;
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
backward(grad) {
|
|
173
|
-
const batchSize = this.x.length;
|
|
174
|
-
const gradInput = this.x.map(sample => sample.map(chan => zeros(chan.length, chan[0].length)));
|
|
175
|
-
const gradW = this.W.map(oc => oc.map(ic => zeros(this.kernel,this.kernel)));
|
|
176
|
-
|
|
177
|
-
for (let b = 0; b < batchSize; b++) {
|
|
178
|
-
const xPadded = this.pad2D(this.x[b], this.padding);
|
|
179
|
-
const gradInputPadded = xPadded.map(chan => zeros(chan.length, chan[0].length));
|
|
180
|
-
|
|
181
|
-
for (let oc = 0; oc < this.outC; oc++) {
|
|
182
|
-
for (let ic = 0; ic < this.inC; ic++) {
|
|
183
|
-
const outGrad = grad[b][oc];
|
|
184
|
-
const inChan = xPadded[ic];
|
|
185
|
-
|
|
186
|
-
// Compute gradW
|
|
187
|
-
for (let i = 0; i < this.kernel; i++) {
|
|
188
|
-
for (let j = 0; j < this.kernel; j++) {
|
|
189
|
-
let sum = 0;
|
|
190
|
-
for (let y = 0; y < outGrad.length; y++) {
|
|
191
|
-
for (let x = 0; x < outGrad[0].length; x++) {
|
|
192
|
-
const inY = y * this.stride + i;
|
|
193
|
-
const inX = x * this.stride + j;
|
|
194
|
-
if (inY < inChan.length && inX < inChan[0].length) {
|
|
195
|
-
sum += inChan[inY][inX] * outGrad[y][x];
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
gradW[oc][ic][i][j] += sum;
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// Compute gradInput
|
|
204
|
-
const flippedKernel = this.W[oc][ic].map(row => [...row].reverse()).reverse();
|
|
205
|
-
for (let y = 0; y < outGrad.length; y++) {
|
|
206
|
-
for (let x = 0; x < outGrad[0].length; x++) {
|
|
207
|
-
for (let i = 0; i < this.kernel; i++) {
|
|
208
|
-
for (let j = 0; j < this.kernel; j++) {
|
|
209
|
-
const inY = y * this.stride + i;
|
|
210
|
-
const inX = x * this.stride + j;
|
|
211
|
-
if (inY < gradInputPadded[ic].length && inX < gradInputPadded[ic][0].length) {
|
|
212
|
-
gradInputPadded[ic][inY][inX] += flippedKernel[i][j] * outGrad[y][x];
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
// Remove padding from gradInput
|
|
222
|
-
if (this.padding > 0) {
|
|
223
|
-
for (let ic = 0; ic < this.inC; ic++) {
|
|
224
|
-
const padded = gradInputPadded[ic];
|
|
225
|
-
const cropped = padded.slice(this.padding, padded.length - this.padding)
|
|
226
|
-
.map(row => row.slice(this.padding, row.length - this.padding));
|
|
227
|
-
gradInput[b][ic] = cropped;
|
|
228
|
-
}
|
|
229
|
-
} else {
|
|
230
|
-
for (let ic = 0; ic < this.inC; ic++) gradInput[b][ic] = gradInputPadded[ic];
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
this.gradW = gradW;
|
|
235
|
-
return gradInput;
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
parameters(){ return this.W.flatMap((w,oc)=>w.map((wc,ic)=>({param:wc,grad:this.gradW[oc][ic]}))); }
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// ---------------------- Sequential ----------------------
|
|
242
|
-
export class Sequential {
|
|
243
|
-
constructor(layers=[]){ this.layers=layers; }
|
|
244
|
-
forward(x){ return this.layers.reduce((acc,l)=>l.forward(acc), x); }
|
|
245
|
-
backward(grad){ return this.layers.reduceRight((g,l)=>l.backward(g), grad); }
|
|
246
|
-
parameters(){ return this.layers.flatMap(l=>l.parameters?l.parameters():[]); }
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// ---------------------- Activations ----------------------
|
|
250
|
-
export class ReLU{ constructor(){ this.out=null; } forward(x){ this.out=x.map(r=>r.map(v=>Math.max(0,v))); return this.out; } backward(grad){ return grad.map((r,i)=>r.map((v,j)=>v*(this.out[i][j]>0?1:0))); } }
|
|
251
|
-
export class Sigmoid{ constructor(){ this.out=null; } forward(x){ const fn=v=>1/(1+Math.exp(-v)); this.out=x.map(r=>r.map(fn)); return this.out; } backward(grad){ return grad.map((r,i)=>r.map((v,j)=>v*this.out[i][j]*(1-this.out[i][j]))); } }
|
|
252
|
-
export class Tanh{ constructor(){ this.out=null; } forward(x){ this.out=x.map(r=>r.map(v=>Math.tanh(v))); return this.out; } backward(grad){ return grad.map((r,i)=>r.map((v,j)=>v*(1-this.out[i][j]**2))); } }
|
|
253
|
-
export class LeakyReLU{ constructor(alpha=0.01){ this.alpha=alpha; this.out=null; } forward(x){ this.out=x.map(r=>r.map(v=>v>0?v:v*this.alpha)); return this.out; } backward(grad){ return grad.map((r,i)=>r.map((v,j)=>v*(this.out[i][j]>0?1:this.alpha))); } }
|
|
254
|
-
export class GELU{ constructor(){ this.out=null; } forward(x){ const fn=v=>0.5*v*(1+Math.tanh(Math.sqrt(2/Math.PI)*(v+0.044715*v**3))); this.out=x.map(r=>r.map(fn)); return this.out; } backward(grad){ return grad.map((r,i)=>r.map(v=>v*1)); } }
|
|
255
|
-
|
|
256
|
-
// ---------------------- Dropout ----------------------
|
|
257
|
-
export class Dropout{ constructor(p=0.5){ this.p=p; } forward(x){ return x.map(r=>r.map(v=>v*Math.random()>=this.p?v:0)); } backward(grad){ return grad.map(r=>r.map(v=>v*(1-this.p))); } }
|
|
258
|
-
|
|
259
|
-
// ---------------------- Losses ----------------------
|
|
260
|
-
export class MSELoss{ forward(pred,target){ this.pred=pred; this.target=target; const losses=pred.map((row,i)=>row.reduce((sum,v,j)=>sum+(v-target[i][j])**2,0)/row.length); return losses.reduce((a,b)=>a+b,0)/pred.length; } backward(){ return this.pred.map((row,i)=>row.map((v,j)=>2*(v-this.target[i][j])/row.length)); } }
|
|
261
|
-
export class CrossEntropyLoss{ forward(pred,target){ this.pred=pred; this.target=target; const losses=pred.map((p,i)=>crossEntropy(softmax(p),target[i])); return losses.reduce((a,b)=>a+b,0)/pred.length; } backward(){ return this.pred.map((p,i)=>{ const s=softmax(p); return s.map((v,j)=>(v-this.target[i][j])/this.pred.length); }); } }
|
|
262
|
-
|
|
263
|
-
// ---------------------- Optimizers ----------------------
|
|
264
|
-
export class Adam{
|
|
265
|
-
constructor(params,lr=0.001,b1=0.9,b2=0.999,eps=1e-8){
|
|
266
|
-
this.params=params; this.lr=lr; this.beta1=b1; this.beta2=b2; this.eps=eps;
|
|
267
|
-
this.m=params.map(p=>zeros(p.param.length,p.param[0].length||1));
|
|
268
|
-
this.v=params.map(p=>zeros(p.param.length,p.param[0].length||1));
|
|
269
|
-
this.t=0;
|
|
270
|
-
}
|
|
271
|
-
step(){
|
|
272
|
-
this.t++;
|
|
273
|
-
this.params.forEach((p,idx)=>{
|
|
274
|
-
for(let i=0;i<p.param.length;i++)
|
|
275
|
-
for(let j=0;j<(p.param[0].length||1);j++){
|
|
276
|
-
const g=p.grad[i][j];
|
|
277
|
-
this.m[idx][i][j]=this.beta1*this.m[idx][i][j]+(1-this.beta1)*g;
|
|
278
|
-
this.v[idx][i][j]=this.beta2*this.v[idx][i][j]+(1-this.beta2)*g*g;
|
|
279
|
-
const mHat=this.m[idx][i][j]/(1-Math.pow(this.beta1,this.t));
|
|
280
|
-
const vHat=this.v[idx][i][j]/(1-Math.pow(this.beta2,this.t));
|
|
281
|
-
p.param[i][j]-=this.lr*mHat/(Math.sqrt(vHat)+this.eps);
|
|
282
|
-
}
|
|
283
|
-
});
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// ---------------------- Learning Rate Schedulers ----------------------
|
|
288
|
-
export class StepLR {
|
|
289
|
-
constructor(optimizer, step_size, gamma=1.0) {
|
|
290
|
-
this.optimizer = optimizer;
|
|
291
|
-
this.step_size = step_size;
|
|
292
|
-
this.gamma = gamma;
|
|
293
|
-
this.last_epoch = 0;
|
|
294
|
-
this.base_lr = optimizer.lr;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
step() {
|
|
298
|
-
this.last_epoch += 1;
|
|
299
|
-
if (this.last_epoch % this.step_size === 0) {
|
|
300
|
-
this.optimizer.lr *= this.gamma;
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
get_lr() {
|
|
305
|
-
return this.optimizer.lr;
|
|
306
|
-
/* Do nothing else */
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
export class LambdaLR {
|
|
311
|
-
constructor(optimizer, lr_lambda) {
|
|
312
|
-
this.optimizer = optimizer;
|
|
313
|
-
this.lr_lambda = lr_lambda;
|
|
314
|
-
this.last_epoch = 0;
|
|
315
|
-
this.base_lr = optimizer.lr;
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
step() {
|
|
319
|
-
this.last_epoch += 1;
|
|
320
|
-
this.optimizer.lr = this.base_lr * this.lr_lambda(this.last_epoch);
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
get_lr() {
|
|
324
|
-
return this.optimizer.lr;
|
|
325
|
-
/* Do nothing else */
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// ---------------------- ELU Activation ----------------------
|
|
330
|
-
export class ELU {
|
|
331
|
-
constructor(alpha=1.0) {
|
|
332
|
-
this.alpha = alpha;
|
|
333
|
-
this.out = null;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
forward(x) {
|
|
337
|
-
this.out = x.map(row =>
|
|
338
|
-
row.map(v => v > 0 ? v : this.alpha * (Math.exp(v) - 1))
|
|
339
|
-
);
|
|
340
|
-
return this.out;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
backward(grad) {
|
|
344
|
-
return grad.map((row, i) =>
|
|
345
|
-
row.map((v, j) =>
|
|
346
|
-
v * (this.out[i][j] > 0 ? 1 : this.alpha * Math.exp(this.out[i][j]))
|
|
347
|
-
)
|
|
348
|
-
);
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
// ---------------------- Mish Activation ----------------------
|
|
353
|
-
export class Mish {
|
|
354
|
-
constructor() {
|
|
355
|
-
this.x = null;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
forward(x) {
|
|
359
|
-
this.x = x;
|
|
360
|
-
return x.map(row =>
|
|
361
|
-
row.map(v => {
|
|
362
|
-
// Mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^x))
|
|
363
|
-
const softplus = Math.log(1 + Math.exp(v));
|
|
364
|
-
return v * Math.tanh(softplus);
|
|
365
|
-
})
|
|
366
|
-
);
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
backward(grad) {
|
|
370
|
-
return grad.map((row, i) =>
|
|
371
|
-
row.map((v, j) => {
|
|
372
|
-
const x_val = this.x[i][j];
|
|
373
|
-
|
|
374
|
-
// Gradient of Mish:
|
|
375
|
-
// δ = ω * (4(x+1) + 4e^2x + e^3x + e^x(4x+6)) / (2e^x + e^2x + 2)^2
|
|
376
|
-
// where ω = sech^2(softplus(x))
|
|
377
|
-
|
|
378
|
-
const exp_x = Math.exp(x_val);
|
|
379
|
-
const exp_2x = Math.exp(2 * x_val);
|
|
380
|
-
const exp_3x = Math.exp(3 * x_val);
|
|
381
|
-
const softplus = Math.log(1 + exp_x);
|
|
382
|
-
|
|
383
|
-
const sech_softplus = 1 / Math.cosh(softplus);
|
|
384
|
-
const numerator = 4 * (x_val + 1) + 4 * exp_2x + exp_3x + exp_x * (4 * x_val + 6);
|
|
385
|
-
const denominator = Math.pow(2 * exp_x + exp_2x + 2, 2);
|
|
386
|
-
|
|
387
|
-
const mish_grad = (sech_softplus * sech_softplus) * (numerator / denominator);
|
|
388
|
-
return v * mish_grad;
|
|
389
|
-
})
|
|
390
|
-
);
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
// ---------------------- SiLU Activation ----------------------
|
|
395
|
-
export class SiLU {
|
|
396
|
-
constructor() {
|
|
397
|
-
this.x = null;
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
forward(x) {
|
|
401
|
-
this.x = x;
|
|
402
|
-
return x.map(row =>
|
|
403
|
-
row.map(v => v / (1 + Math.exp(-v))) // x * sigmoid(x)
|
|
404
|
-
);
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
backward(grad) {
|
|
408
|
-
return grad.map((row, i) =>
|
|
409
|
-
row.map((v, j) => {
|
|
410
|
-
const x_val = this.x[i][j];
|
|
411
|
-
const sigmoid = 1 / (1 + Math.exp(-x_val));
|
|
412
|
-
return v * (sigmoid * (1 + x_val * (1 - sigmoid)));
|
|
413
|
-
})
|
|
414
|
-
);
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
export class SGD{ constructor(params,lr=0.01){ this.params=params; this.lr=lr; } step(){ this.params.forEach(p=>{ for(let i=0;i<p.param.length;i++) for(let j=0;j<(p.param[0].length||1);j++) p.param[i][j]-=this.lr*p.grad[i][j]; }); } }
|
|
419
|
-
|
|
420
|
-
// ---------------------- BatchNorm2D ----------------------
|
|
421
|
-
export class BatchNorm2d {
|
|
422
|
-
constructor(numFeatures, eps=1e-5, momentum=0.1, affine=true) {
|
|
423
|
-
this.numFeatures = numFeatures;
|
|
424
|
-
this.eps = eps;
|
|
425
|
-
this.momentum = momentum;
|
|
426
|
-
this.affine = affine;
|
|
427
|
-
|
|
428
|
-
// Parameters
|
|
429
|
-
if (affine) {
|
|
430
|
-
this.weight = Array(numFeatures).fill(1);
|
|
431
|
-
this.bias = Array(numFeatures).fill(0);
|
|
432
|
-
this.gradWeight = Array(numFeatures).fill(0);
|
|
433
|
-
this.gradBias = Array(numFeatures).fill(0);
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
// Running statistics
|
|
437
|
-
this.runningMean = Array(numFeatures).fill(0);
|
|
438
|
-
this.runningVar = Array(numFeatures).fill(1);
|
|
439
|
-
|
|
440
|
-
// Training state
|
|
441
|
-
this.training = true;
|
|
442
|
-
this.x = null;
|
|
443
|
-
this.xCentered = null;
|
|
444
|
-
this.std = null;
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
forward(x) {
|
|
448
|
-
// x shape: [batch, channels, height, width]
|
|
449
|
-
this.x = x;
|
|
450
|
-
const batchSize = x.length;
|
|
451
|
-
const channels = x[0].length;
|
|
452
|
-
|
|
453
|
-
if (this.training) {
|
|
454
|
-
// Calculate mean per channel
|
|
455
|
-
const means = Array(channels).fill(0);
|
|
456
|
-
for (let b = 0; b < batchSize; b++) {
|
|
457
|
-
for (let c = 0; c < channels; c++) {
|
|
458
|
-
const channelData = x[b][c];
|
|
459
|
-
let sum = 0;
|
|
460
|
-
for (let i = 0; i < channelData.length; i++) {
|
|
461
|
-
for (let j = 0; j < channelData[0].length; j++) {
|
|
462
|
-
sum += channelData[i][j];
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
means[c] += sum / (channelData.length * channelData[0].length);
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
means.forEach((_, c) => means[c] /= batchSize);
|
|
469
|
-
|
|
470
|
-
// Calculate variance per channel
|
|
471
|
-
const variances = Array(channels).fill(0);
|
|
472
|
-
for (let b = 0; b < batchSize; b++) {
|
|
473
|
-
for (let c = 0; c < channels; c++) {
|
|
474
|
-
const channelData = x[b][c];
|
|
475
|
-
let sum = 0;
|
|
476
|
-
for (let i = 0; i < channelData.length; i++) {
|
|
477
|
-
for (let j = 0; j < channelData[0].length; j++) {
|
|
478
|
-
sum += Math.pow(channelData[i][j] - means[c], 2);
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
variances[c] += sum / (channelData.length * channelData[0].length);
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
variances.forEach((_, c) => variances[c] /= batchSize);
|
|
485
|
-
|
|
486
|
-
// Update running statistics
|
|
487
|
-
for (let c = 0; c < channels; c++) {
|
|
488
|
-
this.runningMean[c] = this.momentum * means[c] + (1 - this.momentum) * this.runningMean[c];
|
|
489
|
-
this.runningVar[c] = this.momentum * variances[c] + (1 - this.momentum) * this.runningVar[c];
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
// Normalize
|
|
493
|
-
this.xCentered = [];
|
|
494
|
-
this.std = Array(channels).fill(0).map(() => []);
|
|
495
|
-
|
|
496
|
-
const output = [];
|
|
497
|
-
for (let b = 0; b < batchSize; b++) {
|
|
498
|
-
const batchOut = [];
|
|
499
|
-
for (let c = 0; c < channels; c++) {
|
|
500
|
-
const channelData = x[b][c];
|
|
501
|
-
const channelOut = zeros(channelData.length, channelData[0].length);
|
|
502
|
-
const channelCentered = zeros(channelData.length, channelData[0].length);
|
|
503
|
-
const channelStd = Math.sqrt(variances[c] + this.eps);
|
|
504
|
-
this.std[c].push(channelStd);
|
|
505
|
-
|
|
506
|
-
for (let i = 0; i < channelData.length; i++) {
|
|
507
|
-
for (let j = 0; j < channelData[0].length; j++) {
|
|
508
|
-
channelCentered[i][j] = channelData[i][j] - means[c];
|
|
509
|
-
channelOut[i][j] = channelCentered[i][j] / channelStd;
|
|
510
|
-
|
|
511
|
-
// Apply affine transformation if enabled
|
|
512
|
-
if (this.affine) {
|
|
513
|
-
channelOut[i][j] = channelOut[i][j] * this.weight[c] + this.bias[c];
|
|
514
|
-
}
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
batchOut.push(channelOut);
|
|
519
|
-
if (b === 0) this.xCentered.push(channelCentered);
|
|
520
|
-
else this.xCentered[c] = addMatrices(this.xCentered[c], channelCentered);
|
|
521
|
-
}
|
|
522
|
-
output.push(batchOut);
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
return output;
|
|
526
|
-
} else {
|
|
527
|
-
// Inference mode - use running statistics
|
|
528
|
-
const output = [];
|
|
529
|
-
for (let b = 0; b < batchSize; b++) {
|
|
530
|
-
const batchOut = [];
|
|
531
|
-
for (let c = 0; c < channels; c++) {
|
|
532
|
-
const channelData = x[b][c];
|
|
533
|
-
const channelOut = zeros(channelData.length, channelData[0].length);
|
|
534
|
-
const channelStd = Math.sqrt(this.runningVar[c] + this.eps);
|
|
535
|
-
|
|
536
|
-
for (let i = 0; i < channelData.length; i++) {
|
|
537
|
-
for (let j = 0; j < channelData[0].length; j++) {
|
|
538
|
-
channelOut[i][j] = (channelData[i][j] - this.runningMean[c]) / channelStd;
|
|
539
|
-
|
|
540
|
-
// Apply affine transformation if enabled
|
|
541
|
-
if (this.affine) {
|
|
542
|
-
channelOut[i][j] = channelOut[i][j] * this.weight[c] + this.bias[c];
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
batchOut.push(channelOut);
|
|
548
|
-
}
|
|
549
|
-
output.push(batchOut);
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
return output;
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
backward(gradOutput) {
|
|
557
|
-
if (!this.training) {
|
|
558
|
-
throw new Error("Backward should only be called in training mode");
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
const batchSize = gradOutput.length;
|
|
562
|
-
const channels = gradOutput[0].length;
|
|
563
|
-
|
|
564
|
-
// Initialize gradients
|
|
565
|
-
const gradInput = this.x.map(batch =>
|
|
566
|
-
batch.map(channel =>
|
|
567
|
-
zeros(channel.length, channel[0].length)
|
|
568
|
-
)
|
|
569
|
-
);
|
|
570
|
-
|
|
571
|
-
if (this.affine) {
|
|
572
|
-
this.gradWeight.fill(0);
|
|
573
|
-
this.gradBias.fill(0);
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
for (let c = 0; c < channels; c++) {
|
|
577
|
-
let sumGradWeight = 0;
|
|
578
|
-
let sumGradBias = 0;
|
|
579
|
-
|
|
580
|
-
for (let b = 0; b < batchSize; b++) {
|
|
581
|
-
const channelGrad = gradOutput[b][c];
|
|
582
|
-
const channelData = this.x[b][c];
|
|
583
|
-
|
|
584
|
-
// Calculate gradients for bias and weight
|
|
585
|
-
if (this.affine) {
|
|
586
|
-
for (let i = 0; i < channelGrad.length; i++) {
|
|
587
|
-
for (let j = 0; j < channelGrad[0].length; j++) {
|
|
588
|
-
sumGradBias += channelGrad[i][j];
|
|
589
|
-
sumGradWeight += channelGrad[i][j] * (this.xCentered[c][i][j] / this.std[c][b]);
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
// Calculate gradient for input
|
|
595
|
-
const n = channelData.length * channelData[0].length;
|
|
596
|
-
const stdInv = 1 / this.std[c][b];
|
|
597
|
-
|
|
598
|
-
for (let i = 0; i < channelGrad.length; i++) {
|
|
599
|
-
for (let j = 0; j < channelGrad[0].length; j++) {
|
|
600
|
-
let grad = channelGrad[i][j];
|
|
601
|
-
|
|
602
|
-
if (this.affine) {
|
|
603
|
-
grad *= this.weight[c];
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
grad *= stdInv;
|
|
607
|
-
gradInput[b][c][i][j] = grad;
|
|
608
|
-
}
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
if (this.affine) {
|
|
613
|
-
this.gradWeight[c] = sumGradWeight / batchSize;
|
|
614
|
-
this.gradBias[c] = sumGradBias / batchSize;
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
return gradInput;
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
parameters() {
|
|
622
|
-
if (!this.affine) return [];
|
|
623
|
-
return [
|
|
624
|
-
{ param: [this.weight], grad: [this.gradWeight] },
|
|
625
|
-
{ param: [this.bias], grad: [this.gradBias] }
|
|
626
|
-
];
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
train() { this.training = true; }
|
|
630
|
-
eval() { this.training = false; }
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
// ---------------------- Model Save/Load ----------------------
|
|
634
|
-
export function saveModel(model){
|
|
635
|
-
if(!(model instanceof Sequential)) throw new Error("saveModel supports only Sequential");
|
|
636
|
-
const weights=model.layers.map(layer=>({weights:layer.W||null,biases:layer.b||null}));
|
|
637
|
-
return JSON.stringify(weights);
|
|
638
|
-
/* Didn't expect this to work */
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
export function loadModel(model,json){
|
|
642
|
-
if(!(model instanceof Sequential)) throw new Error("loadModel supports only Sequential");
|
|
643
|
-
const weights=JSON.parse(json);
|
|
644
|
-
model.layers.forEach((layer,i)=>{
|
|
645
|
-
if(layer.W && weights[i].weights) layer.W=weights[i].weights;
|
|
646
|
-
if(layer.b && weights[i].biases) layer.b=weights[i].biases;
|
|
647
|
-
});
|
|
648
|
-
/* Didn't expect this to work */
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
// ---------------------- Advanced Utils ----------------------
|
|
652
|
-
export function flattenBatch(batch){ return batch.flat(2); }
|
|
653
|
-
export function stack(tensors){ return tensors.map(t=>t.data); }
|
|
654
|
-
export function eye(n){ return Array.from({length:n},(_,i)=>Array.from({length:n},(_,j)=>i===j?1:0)); }
|
|
655
|
-
export function concat(a,b,axis=0){ /* concat along axis */ if(axis===0) return [...a,...b]; if(axis===1) return a.map((row,i)=>[...row,...b[i]]); }
|
|
656
|
-
export function reshape(tensor, rows, cols) {
|
|
657
|
-
let flat = tensor.data.flat(); // flatten first
|
|
658
|
-
if(flat.length < rows*cols) throw new Error("reshape size mismatch");
|
|
659
|
-
const out = Array.from({length: rows}, (_, i) =>
|
|
660
|
-
flat.slice(i*cols, i*cols + cols)
|
|
661
|
-
);
|
|
662
|
-
return out;
|
|
663
|
-
}
|
package/tests/MakeModel.js
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
// Example: Build and run a simple neural network model using mini-jstorch
|
|
2
|
-
|
|
3
|
-
import { Sequential, Linear, ReLU, MSELoss, SGD, Tensor } from "../src/MainEngine.js";
|
|
4
|
-
|
|
5
|
-
// Create dummy input and target data
|
|
6
|
-
const input = new Tensor([[0.5, -1.0], [1.5, 2.0]]); // shape: [2,2]
|
|
7
|
-
const target = new Tensor([[1.0, 0.0], [0.0, 1.0]]); // shape: [2,2]
|
|
8
|
-
|
|
9
|
-
// Build a simple model: Linear -> ReLU -> Linear
|
|
10
|
-
const model = new Sequential([
|
|
11
|
-
new Linear(2, 4),
|
|
12
|
-
new ReLU(),
|
|
13
|
-
new Linear(4, 2)
|
|
14
|
-
]);
|
|
15
|
-
|
|
16
|
-
const criterion = new MSELoss();
|
|
17
|
-
const optimizer = new SGD(model.parameters(), 0.01);
|
|
18
|
-
|
|
19
|
-
// Forward pass
|
|
20
|
-
const output = model.forward(input.data);
|
|
21
|
-
console.log("Model output:", output);
|
|
22
|
-
|
|
23
|
-
// Compute loss
|
|
24
|
-
const loss = criterion.forward(output, target.data);
|
|
25
|
-
console.log("Loss:", loss);
|
|
26
|
-
|
|
27
|
-
// Backward pass
|
|
28
|
-
const grad = criterion.backward();
|
|
29
|
-
model.backward(grad);
|
|
30
|
-
|
|
31
|
-
// Optimizer step
|
|
32
|
-
optimizer.step();
|
|
33
|
-
console.log("Parameters updated!");
|
|
34
|
-
|
|
35
|
-
// Run again to show change
|
|
36
|
-
const output2 = model.forward(input.data);
|
|
37
|
-
const loss2 = criterion.forward(output2, target.data);
|
|
38
|
-
console.log("New Loss:", loss2);
|