mini-jstorch 1.7.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,100 +1,367 @@
1
- # Mini-JSTorch
1
+ ## Mini-JSTorch
2
2
 
3
- A lightweight JavaScript neural network library for rapid frontend AI experimentation on low-resource devices, inspired by PyTorch.
4
3
 
5
- ## Overview
4
+ Mini-JSTorch is a lightweight, `dependency-free` JavaScript neural network library designed for `education`, `experimentation`, and `small-scale models`.
5
+ It runs in Node.js and modern browsers, with a simple API inspired by PyTorch-style workflows.
6
6
 
7
- Mini-JSTorch is a high-performance, minimalist JavaScript library for building neural networks. It runs efficiently in Frontend environments, including low-end devices. The library enables quick experimentation and learning in AI without compromising stability, accuracy, or training reliability.
7
+ This project prioritizes `clarity`, `numerical correctness`, and `accessibility` over performance or large-scale production use.
8
+
9
+ In this version `1.8.0`, we Introduce the **SoftmaxCrossEntropyLoss**, and **BCEWithLogitsLoss**
8
10
 
9
- This release, **1.7.1** allows users to access the JST package from the `browser global scope` or `via HTML` (Sorry I was forgot this feature for a long time).
10
11
  ---
11
12
 
12
- ## New Features Highlights
13
+ # Overview
14
+
15
+ **Mini-JSTorch provides a minimal neural network engine implemented entirely in plain JavaScript.**
16
+
17
+ *It is intended for:*
18
+
19
+ - learning how neural networks work internally
20
+ - experimenting with small models
21
+ - running simple training loops in the browser
22
+ - environments where large frameworks are unnecessary or unavailable
13
23
 
14
- - **Softmax Layer:** Professional classification output with proper gradient computation
15
- - **Tokenizer:** Lightweight text preprocessing for NLP tasks
16
- - **AdamW Optimizer:** Modern optimizer with decoupled weight decay
24
+ `Mini-JSTorch is NOT a replacement for PyTorch, TensorFlow, or TensorFlow.js.`
25
+
26
+ `It is intentionally scoped to remain small, readable, and easy to debug.`
27
+
28
+ ---
29
+
30
+ # Key Characteristics
31
+
32
+ - Zero dependencies
33
+ - ESM-first (`type: module`)
34
+ - Works in Node.js and browser environments
35
+ - Explicit, manual forward and backward passes
36
+ - Focused on 2D training logic (`[batch][features]`)
37
+ - Designed for educational and experimental use
38
+
39
+ ---
40
+
41
+ # Browser Support
42
+
43
+ Now, Mini-JSTorch can be used directly in browsers:
44
+
45
+ - via ESM imports
46
+ - via CDN / `<script>` with a global `JST` object
47
+
48
+ This makes it suitable for:
49
+
50
+ - demos
51
+ - learning environments
52
+ - lightweight frontend experiments
53
+
54
+ Here example code to make a simple Model with JSTorch.
55
+ In Browser/Website:
56
+
57
+ ```html
58
+ <!DOCTYPE html>
59
+ <html>
60
+ <body>
61
+ <div id="output">
62
+ <p>Status: <span id="status">Initializing...</span></p>
63
+ <div id="training-log"></div>
64
+ <div id="results" style="margin-top: 20px;"></div>
65
+ </div>
66
+
67
+ <script type="module">
68
+ import { Sequential, Linear, ReLU, MSELoss, Adam, StepLR, Tanh } from 'https://unpkg.com/jstorch'; // DO NOT CHANGE
69
+
70
+ const statusEl = document.getElementById('status');
71
+ const trainingLogEl = document.getElementById('training-log');
72
+ const resultsEl = document.getElementById('results');
73
+
74
+ async function trainModel() {
75
+ try {
76
+ statusEl.textContent = 'Creating model...';
77
+
78
+ const model = new Sequential([
79
+ new Linear(2, 16),
80
+ new Tanh(),
81
+ new Linear(16, 8),
82
+ new ReLU(),
83
+ new Linear(8, 1)
84
+ ]);
85
+
86
+ const X = [[0,0], [0,1], [1,0], [1,1]];
87
+ const y = [[0], [1], [1], [0]];
88
+
89
+ const criterion = new MSELoss();
90
+ const optimizer = new Adam(model.parameters(), 0.1);
91
+ const scheduler = new StepLR(optimizer, 25, 0.5);
92
+
93
+ trainingLogEl.innerHTML = '<h4>Training Progress:</h4>';
94
+ const logList = document.createElement('ul');
95
+ trainingLogEl.appendChild(logList);
96
+
97
+ statusEl.textContent = 'Training...';
98
+
99
+ for (let epoch = 0; epoch < 1000; epoch++) {
100
+ const pred = model.forward(X);
101
+ const loss = criterion.forward(pred, y);
102
+ const grad = criterion.backward();
103
+ model.backward(grad);
104
+ optimizer.step();
105
+ scheduler.step();
106
+
107
+ if (epoch % 100 === 0) {
108
+ const logItem = document.createElement('li');
109
+ logItem.textContent = `Epoch ${epoch}: Loss = ${loss.toFixed(6)}`;
110
+ logList.appendChild(logItem);
111
+
112
+ // Update status every 100 epochs
113
+ statusEl.textContent = `Training... Epoch ${epoch}/1000 (Loss: ${loss.toFixed(6)})`;
114
+
115
+ await new Promise(resolve => setTimeout(resolve, 10));
116
+ }
117
+ }
118
+
119
+ statusEl.textContent = 'Training completed!';
120
+ statusEl.style.color = 'green';
121
+
122
+ resultsEl.innerHTML = '<h4>XOR Predictions:</h4>';
123
+ const resultsTable = document.createElement('table');
124
+ resultsTable.style.border = '1px solid #ccc';
125
+ resultsTable.style.borderCollapse = 'collapse';
126
+ resultsTable.style.width = '300px';
127
+
128
+ // Table header
129
+ const headerRow = document.createElement('tr');
130
+ ['Input A', 'Input B', 'Prediction', 'Target'].forEach(text => {
131
+ const th = document.createElement('th');
132
+ th.textContent = text;
133
+ th.style.border = '1px solid #ccc';
134
+ th.style.padding = '8px';
135
+ headerRow.appendChild(th);
136
+ });
137
+ resultsTable.appendChild(headerRow);
138
+
139
+ const predictions = model.forward(X);
140
+ predictions.forEach((pred, i) => {
141
+ const row = document.createElement('tr');
142
+
143
+ const cell1 = document.createElement('td');
144
+ cell1.textContent = X[i][0];
145
+ cell1.style.border = '1px solid #ccc';
146
+ cell1.style.padding = '8px';
147
+ cell1.style.textAlign = 'center';
148
+
149
+ const cell2 = document.createElement('td');
150
+ cell2.textContent = X[i][1];
151
+ cell2.style.border = '1px solid #ccc';
152
+ cell2.style.padding = '8px';
153
+ cell2.style.textAlign = 'center';
154
+
155
+ const cell3 = document.createElement('td');
156
+ cell3.textContent = pred[0].toFixed(4);
157
+ cell3.style.border = '1px solid #ccc';
158
+ cell3.style.padding = '8px';
159
+ cell3.style.textAlign = 'center';
160
+ cell3.style.fontWeight = 'bold';
161
+ cell3.style.color = Math.abs(pred[0] - y[i][0]) < 0.1 ? 'green' : 'red';
162
+
163
+ const cell4 = document.createElement('td');
164
+ cell4.textContent = y[i][0];
165
+ cell4.style.border = '1px solid #ccc';
166
+ cell4.style.padding = '8px';
167
+ cell4.style.textAlign = 'center';
168
+
169
+ row.appendChild(cell1);
170
+ row.appendChild(cell2);
171
+ row.appendChild(cell3);
172
+ row.appendChild(cell4);
173
+ resultsTable.appendChild(row);
174
+ });
175
+
176
+ resultsEl.appendChild(resultsTable);
177
+
178
+ const summary = document.createElement('div');
179
+ summary.style.marginTop = '20px';
180
+ summary.style.padding = '10px';
181
+ summary.style.backgroundColor = '#f0f0f0';
182
+ summary.style.borderRadius = '5px';
183
+ summary.innerHTML = `
184
+ <p><strong>Model Architecture:</strong> 2 → 16 → 8 → 1</p>
185
+ <p><strong>Activation:</strong> Tanh → ReLU</p>
186
+ <p><strong>Loss Function:</strong> MSE</p>
187
+ <p><strong>Optimizer:</strong> Adam (LR: 0.1)</p>
188
+ <p><strong>Epochs:</strong> 1000</p>
189
+ `;
190
+ resultsEl.appendChild(summary);
191
+
192
+ } catch (error) {
193
+ statusEl.textContent = `Error: ${error.message}`;
194
+ statusEl.style.color = 'red';
195
+ console.error(error);
196
+ }
197
+ }
198
+
199
+ trainModel();
200
+ </script>
201
+ </body>
202
+ </html>
203
+ ```
17
204
 
18
205
  ---
19
206
 
20
- ## Core Features
207
+ # Core Features
21
208
 
22
- - **Layers:** Linear, Flatten, Conv2D
23
- - **Activations:** ReLU, Sigmoid, Tanh, LeakyReLU, GELU, Mish, SiLU, ELU
24
- - **Loss Functions:** MSELoss, CrossEntropyLoss
25
- - **Optimizers:** Adam, SGD, LION, AdamW
26
- - **Schedulers:** StepLR, LambdaLR, ReduceLROnPlateau
27
- - **Regularization:** Dropout, BatchNorm2D
28
- - **Utilities:** zeros, randomMatrix, softmax, crossEntropy, dot, addMatrices, reshape, stack, flatten, eye, concat
29
- - **Model Container:** Sequential (for stacking layers with forward/backward passes)
209
+ # Layers
30
210
 
31
- # Others
211
+ - Linear
212
+ - Flatten
213
+ - Conv2D (*experimental*)
32
214
 
33
- - **Tokenizer**
34
- - **Softmax Layer**
215
+ # Activations
216
+
217
+ - ReLU
218
+ - Sigmoid
219
+ - Tanh
220
+ - LeakyReLU
221
+ - GELU
222
+ - Mish
223
+ - SiLU
224
+ - ELU
225
+
226
+ # Loss Functions
227
+
228
+ - MSELoss
229
+ - CrossEntropyLoss (*legacy*)
230
+ - SoftmaxCrossEntropyLoss (**recommended**)
231
+ - BCEWithLogitsLoss (**recommended**)
232
+
233
+ # Optimizers
234
+
235
+ - SGD
236
+ - Adam
237
+ - AdamW
238
+ - Lion
239
+
240
+ # Learning Rate Schedulers
241
+
242
+ - StepLR
243
+ - LambdaLR
244
+ - ReduceLROnPlateau
245
+ - Regularization
246
+ - Dropout (*basic*, *educational*)
247
+ - BatchNorm2D (*experimental*)
248
+
249
+ # Utilities
250
+
251
+ - zeros
252
+ - randomMatrix
253
+ - dot
254
+ - addMatrices
255
+ - reshape
256
+ - stack
257
+ - flatten
258
+ - concat
259
+ - softmax
260
+ - crossEntropy
261
+
262
+ # Model Container
263
+
264
+ - Sequential
35
265
 
36
266
  ---
37
267
 
38
- ## Installation
268
+ # Installation
39
269
 
40
270
  ```bash
41
271
  npm install mini-jstorch
42
- # Node.js v20+ recommended for best performance
43
272
  ```
273
+ Node.js v18+ or any modern browser with ES module support is recommended.
44
274
 
45
275
  ---
46
276
 
47
- ## Quick Start Example
277
+ # Quick Start (Recommended Loss)
278
+
279
+ # Multi-class Classification (SoftmaxCrossEntropy)
48
280
 
49
281
  ```javascript
50
- import { Sequential, Linear, ReLU, Sigmoid, CrossEntropyLoss, Adam, StepLR } from './src/jstorch.js';
282
+ import {
283
+ Sequential,
284
+ Linear,
285
+ ReLU,
286
+ SoftmaxCrossEntropyLoss,
287
+ Adam
288
+ } from "./src/jstorch.js";
51
289
 
52
- // Build model
53
290
  const model = new Sequential([
54
- new Linear(2,4),
291
+ new Linear(2, 4),
55
292
  new ReLU(),
56
- new Linear(4,2),
57
- new Sigmoid()
293
+ new Linear(4, 2) // logits output
58
294
  ]);
59
295
 
60
- // Sample XOR dataset
61
296
  const X = [
62
297
  [0,0], [0,1], [1,0], [1,1]
63
298
  ];
299
+
64
300
  const Y = [
65
301
  [1,0], [0,1], [0,1], [1,0]
66
302
  ];
67
303
 
68
- // Loss & optimizer
69
- const lossFn = new CrossEntropyLoss();
304
+ const lossFn = new SoftmaxCrossEntropyLoss();
70
305
  const optimizer = new Adam(model.parameters(), 0.1);
71
- const scheduler = new StepLR(optimizer, 20, 0.5); // Halve LR every 20 epochs
72
-
73
- // Training loop
74
- for (let epoch = 1; epoch <= 100; epoch++) {
75
- const pred = model.forward(X);
76
- const loss = lossFn.forward(pred, Y);
77
- const gradLoss = lossFn.backward();
78
- model.backward(gradLoss);
306
+
307
+ for (let epoch = 1; epoch <= 300; epoch++) {
308
+ const logits = model.forward(X);
309
+ const loss = lossFn.forward(logits, Y);
310
+ const grad = lossFn.backward();
311
+ model.backward(grad);
79
312
  optimizer.step();
80
- scheduler.step();
81
- if (epoch % 20 === 0) console.log(`Epoch ${epoch}, Loss: ${loss.toFixed(4)}, LR: ${optimizer.lr.toFixed(4)}`);
313
+
314
+ if (epoch % 50 === 0) {
315
+ console.log(`Epoch ${epoch}, Loss: ${loss.toFixed(4)}`);
316
+ }
82
317
  }
318
+ ```
319
+ Do not combine `SoftmaxCrossEntropyLoss` with a `Softmax` layer.
320
+
321
+ # Binary Classifiaction (BCEWithLogitsLoss)
83
322
 
84
- // Prediction
85
- const predTest = model.forward(X);
86
- predTest.forEach((p,i) => {
87
- const predictedClass = p.indexOf(Math.max(...p));
88
- console.log(`Input: ${X[i]}, Predicted class: ${predictedClass}, Raw output: ${p.map(v => v.toFixed(3))}`);
89
- });
323
+ ```javascript
324
+ import {
325
+ Sequential,
326
+ Linear,
327
+ ReLU,
328
+ BCEWithLogitsLoss,
329
+ Adam
330
+ } from "./src/jstorch.js";
331
+
332
+ const model = new Sequential([
333
+ new Linear(2, 4),
334
+ new ReLU(),
335
+ new Linear(4, 1) // logit
336
+ ]);
337
+
338
+ const X = [
339
+ [0,0], [0,1], [1,0], [1,1]
340
+ ];
341
+
342
+ const Y = [
343
+ [0], [1], [1], [0]
344
+ ];
345
+
346
+ const lossFn = new BCEWithLogitsLoss();
347
+ const optimizer = new Adam(model.parameters(), 0.1);
348
+
349
+ for (let epoch = 1; epoch <= 300; epoch++) {
350
+ const logits = model.forward(X);
351
+ const loss = lossFn.forward(logits, Y);
352
+ const grad = lossFn.backward();
353
+ model.backward(grad);
354
+ optimizer.step();
355
+ }
90
356
  ```
357
+ Do not combine `BCEWithLogitsLoss` with a `Sigmoid` layer.
91
358
 
92
359
  ---
93
360
 
94
- ## Save & Load Models
361
+ # Save & Load Models
95
362
 
96
363
  ```javascript
97
- import { saveModel, loadModel, Sequential } from '.jstorch.js';
364
+ import { saveModel, loadModel, Sequential } from "mini-jstorch";
98
365
 
99
366
  const json = saveModel(model);
100
367
  const model2 = new Sequential([...]); // same architecture
@@ -103,13 +370,12 @@ loadModel(model2, json);
103
370
 
104
371
  ---
105
372
 
106
- ## Demos & Testing
373
+ # Demos
107
374
 
108
- Check the `demo/` directory for ready-to-run demos:
109
- - **demo/MakeModel.js:** Build and run a simple neural network.
110
- - **demo/scheduler.js:** Experiment with learning rate schedulers.
111
- - **demo/fu_fun.js:** Test all user-friendly (fu or For Users/Friendly Users) functions
112
- - Add your own scripts for quick prototyping!
375
+ See the `demo/` directory for runnable examples:
376
+ - `demo/MakeModel.js` simple training loop
377
+ - `demo/scheduler.js` learning rate schedulers
378
+ - `demo/fu_fun.js` utility functions
113
379
 
114
380
  ```bash
115
381
  node demo/MakeModel.js
@@ -119,17 +385,30 @@ node demo/fu_fun.js
119
385
 
120
386
  ---
121
387
 
122
- ## Intended Use Cases
388
+ # Design Notes & Limitations
123
389
 
124
- - Rapid prototyping of neural networks in frontend.
125
- - Learning and teaching foundational neural network concepts.
126
- - Experimentation on low-end devices or mobile browsers.
127
- - Lightweight AI projects without GPU dependency.
390
+ - Training logic is 2D-first: `[batch][features]`
391
+ - Higher-dimensional data is reshaped internally by specific layers (e.g. Conv2D, Flatten)
392
+ - No automatic broadcasting or autograd graph
393
+ - Some components (Conv2D, BatchNorm2D, Dropout) are educational / experimental
394
+ - Not intended for large-scale or production ML workloads
128
395
 
129
396
  ---
130
397
 
398
+ # Intended Use Cases
399
+
400
+ - Learning how neural networks work internally
401
+ - Teaching ML fundamentals
402
+ - Small experiments in Node.js or the browser
403
+ - Lightweight AI demos without GPU or large frameworks
404
+
405
+ ---
406
+
131
407
  # License
132
408
 
133
- `MIT License`
409
+ MIT License
410
+
411
+ Copyright (c) 2024
412
+ rizal-editors
134
413
 
135
- **Copyright (c) 2024 rizal-editors**
414
+ ---
package/index.js CHANGED
@@ -1,7 +1,6 @@
1
1
  // package root
2
- // ugh, i forgot provide JST can use in browser global scope...
3
2
 
4
- // now we provided JST use in browser global scope
3
+ // provide JST in browser global scope
5
4
  import * as JST from './src/jstorch.js';
6
5
 
7
6
  if (typeof window !== 'undefined') {
package/package.json CHANGED
@@ -1,29 +1,19 @@
1
1
  {
2
2
  "name": "mini-jstorch",
3
- "version": "1.7.1",
3
+ "version": "1.8.0",
4
4
  "type": "module",
5
5
  "description": "A lightweight JavaScript neural network library for learning AI concepts and rapid Frontend experimentation. PyTorch-inspired, zero dependencies, perfect for educational use.",
6
6
  "main": "index.js",
7
7
  "keywords": [
8
- "neural-network",
9
- "JST",
10
- "javascript",
11
- "lightweight-torch",
12
- "lightweight",
13
- "small-torch",
8
+ "lightweight-ml",
14
9
  "javascript-torch",
15
- "jstorch",
16
10
  "front-end-torch",
17
- "machine-learning",
18
11
  "tiny-ml",
19
- "frontend-nn",
20
- "frontend-ai",
21
- "mini-neural-network"
12
+ "mini-neural-network",
13
+ "mini-ml-library",
14
+ "mini-js-ml",
15
+ "educational-ml"
22
16
  ],
23
17
  "author": "Rizal",
24
- "license": "MIT",
25
- "repository": {
26
- "type": "git",
27
- "url": "https://github.com/rizal-editors/mini-jstorch.git"
28
- }
18
+ "license": "MIT"
29
19
  }
package/src/jstorch.js CHANGED
@@ -1,9 +1,8 @@
1
1
  /*!
2
- * Project: mini-jstorch
3
2
  * File: jstorch.js
4
- * Author: Rizal-editors
3
+ * Author: rizal-editors
5
4
  * License: MIT
6
- * Copyright (C) 2025 Rizal-editors
5
+ * Copyright (C) 2025 rizal-editors
7
6
  *
8
7
  * Permission is hereby granted, free of charge, to any person obtaining a copy
9
8
  * of this software and associated documentation files (the "Software"), to deal
@@ -30,7 +29,7 @@
30
29
  // See the Documentation for more details.
31
30
  // --------------------------------------------------------------
32
31
 
33
- // ---------------------- DONOT USE THESE (ENGINE INTERNALS) ----------------------
32
+ // ---------------------- DONOT USE THESE (ENGINE INTERNALS) ERROR/BUG ARE EXPECTED ----------------------
34
33
  export function zeros(rows, cols) {
35
34
  return Array.from({length:rows},()=>Array(cols).fill(0));
36
35
  }
@@ -80,7 +79,7 @@ export function crossEntropy(pred,target){
80
79
  return -target.reduce((sum,t,i)=>sum+t*Math.log(pred[i]+eps),0);
81
80
  }
82
81
 
83
- // ---------------------- USERS FRIENDLY UTILS (USE THIS!) ----------------
82
+ // ---------------------- USERS FRIENDLY UTILS (USE THIS FOR YOUR UTILS!) ----------------
84
83
  export function fu_tensor(data, requiresGrad = false) {
85
84
  if (!Array.isArray(data) || !Array.isArray(data[0])) {
86
85
  throw new Error("fu_tensor: Data must be 2D array");
@@ -751,6 +750,69 @@ export class Dropout{ constructor(p=0.5){ this.p=p; } forward(x){ return x.map(r
751
750
  export class MSELoss{ forward(pred,target){ this.pred=pred; this.target=target; const losses=pred.map((row,i)=>row.reduce((sum,v,j)=>sum+(v-target[i][j])**2,0)/row.length); return losses.reduce((a,b)=>a+b,0)/pred.length; } backward(){ return this.pred.map((row,i)=>row.map((v,j)=>2*(v-this.target[i][j])/row.length)); } }
752
751
  export class CrossEntropyLoss{ forward(pred,target){ this.pred=pred; this.target=target; const losses=pred.map((p,i)=>crossEntropy(softmax(p),target[i])); return losses.reduce((a,b)=>a+b,0)/pred.length; } backward(){ return this.pred.map((p,i)=>{ const s=softmax(p); return s.map((v,j)=>(v-this.target[i][j])/this.pred.length); }); } }
753
752
 
753
+ export class SoftmaxCrossEntropyLoss {
754
+ forward(logits, targets) {
755
+ this.targets = targets;
756
+ const batch = logits.length;
757
+
758
+ // stable softmax
759
+ this.probs = logits.map(row => {
760
+ const max = Math.max(...row);
761
+ const exps = row.map(v => Math.exp(v - max));
762
+ const sum = exps.reduce((a,b)=>a+b, 0);
763
+ return exps.map(v => v / sum);
764
+ });
765
+
766
+ let loss = 0;
767
+ for (let i = 0; i < batch; i++) {
768
+ for (let j = 0; j < this.probs[i].length; j++) {
769
+ if (targets[i][j] === 1) {
770
+ loss -= Math.log(this.probs[i][j] + 1e-12);
771
+ }
772
+ }
773
+ }
774
+
775
+ return loss / batch;
776
+ }
777
+
778
+ backward() {
779
+ const batch = this.targets.length;
780
+ return this.probs.map((row,i) =>
781
+ row.map((p,j) => (p - this.targets[i][j]) / batch)
782
+ );
783
+ }
784
+ }
785
+
786
+ export class BCEWithLogitsLoss {
787
+ forward(logits, targets) {
788
+ this.logits = logits;
789
+ this.targets = targets;
790
+ const batch = logits.length;
791
+ let loss = 0;
792
+
793
+ for (let i = 0; i < batch; i++) {
794
+ for (let j = 0; j < logits[i].length; j++) {
795
+ const x = logits[i][j];
796
+ const y = targets[i][j];
797
+ // stable BCE
798
+ loss += Math.max(x, 0) - x*y + Math.log(1 + Math.exp(-Math.abs(x)));
799
+ }
800
+ }
801
+
802
+ return loss / batch;
803
+ }
804
+
805
+ backward() {
806
+ const batch = this.logits.length;
807
+ return this.logits.map((row,i) =>
808
+ row.map((x,j) => {
809
+ const sigmoid = 1 / (1 + Math.exp(-x));
810
+ return (sigmoid - this.targets[i][j]) / batch;
811
+ })
812
+ );
813
+ }
814
+ }
815
+
754
816
  // ---------------------- Optimizers ----------------------
755
817
  export class Adam{
756
818
  constructor(params, lr = 0.001, b1 = 0.9, b2 = 0.999, eps = 1e-8, max_grad_norm = 1.0){