npm - @genai-fi/nanogpt - Versions diffs - 0.3.1 → 0.3.2 - Mend

@genai-fi/nanogpt 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/TeachableLLM.d.ts +4 -3
package/dist/Trainer.js +20 -13
package/dist/training/FullTrainer.js +40 -32
package/dist/training/LayerTrainer.js +32 -27
package/dist/training/Trainer.d.ts +6 -1
package/package.json +1 -1

package/dist/TeachableLLM.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 import { GPTConfig } from './config';
 import { ITokeniser } from './tokeniser/type';
-import { default as NanoGPT } from './NanoGPTModel';
+import { default as NanoGPT, TrainingLogEntry } from './NanoGPTModel';
 import { SaveOptions } from './utilities/save';
 import { default as Generator, IGenerateOptions } from './Generator';
 import { default as Trainer, ITrainerOptions } from './Trainer';
 import { default as MemoryProfiler } from './utilities/profile';
+import { TrainingProgress } from './training/Trainer';
 type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
 export default class TeachableLLM {
     private ee;
@@ -36,11 +37,11 @@ export default class TeachableLLM {
     dispose(): void;
     on(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
     on(event: 'error', listener: (error: Error) => void): void;
-    on(event: 'trainStep', listener: (step: number) => void): void;
+    on(event: 'trainStep', listener: (step: TrainingLogEntry, progress: TrainingProgress) => void): void;
     on(event: 'loaded', listener: () => void): void;
     off(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
     off(event: 'error', listener: (error: Error) => void): void;
-    off(event: 'trainStep', listener: (step: number) => void): void;
+    off(event: 'trainStep', listener: (step: TrainingLogEntry, progress: TrainingProgress) => void): void;
     off(event: 'loaded', listener: () => void): void;
 }
 export {};

package/dist/Trainer.js CHANGED Viewed

@@ -1,10 +1,10 @@
-import { E as l } from "./index-Dwqa6Zy2.js";
-import h from "./training/FullTrainer.js";
-class c extends l {
+import { E as m } from "./index-Dwqa6Zy2.js";
+import d from "./training/FullTrainer.js";
+class S extends m {
   trainer;
   hasTrained = !1;
   constructor(e, t) {
-    super(), this.trainer = new h(e, t, 1e-3);
+    super(), this.trainer = new d(e, t, 1e-3);
   }
   stop() {
     this.trainer.stop();
@@ -13,28 +13,35 @@ class c extends l {
     this.hasTrained = !1, this.trainer.reset();
   }
   async train(e, t) {
-    const { trainDataset: a, validationDataset: r } = await this.trainer.createTrainValidationSplit(
+    const { trainDataset: s, validationDataset: n } = await this.trainer.createTrainValidationSplit(
       e,
       t?.batchSize || 32,
       t?.validationSplit || 0.1
-    );
+    ), r = e.reduce((i, a) => i + a.length, 0) * (1 - (t?.validationSplit || 0));
     this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), await this.trainer.trainOnDataset(
-      a,
+      s,
       {
         prompt: t?.prompt,
         logInterval: t?.logInterval || 10,
         desiredLoss: t?.desiredLoss || 0.01,
         maxSteps: t?.maxSteps || 1e3,
-        onStep: async (i) => {
-          const s = this.listeners("log");
-          for (const n of s)
-            await n(i);
+        onStep: async (i, a) => {
+          const l = this.listeners("log");
+          for (const h of l)
+            await h(i, {
+              ...a,
+              progress: a.totalSamples / r,
+              remaining: Math.max(
+                0,
+                (r - a.totalSamples) / a.totalSamples * a.duration
+              )
+            });
         }
       },
-      r
+      n
     ), this.emit("stop");
   }
 }
 export {
-  c as default
+  S as default
 };

package/dist/training/FullTrainer.js CHANGED Viewed

@@ -1,69 +1,77 @@
-import { generateText as w } from "../utilities/generate.js";
-import x from "./Trainer.js";
-import g from "./Evaluator.js";
+import { generateText as v } from "../utilities/generate.js";
+import L from "./Trainer.js";
+import x from "./Evaluator.js";
 import { a as h } from "../index-pWA4_lUh.js";
-const S = {
+const D = {
   desiredLoss: 0.01,
   logInterval: 1,
   maxSteps: 1e3
 };
-class D extends x {
+class E extends L {
   constructor(r, i, o = 3e-4) {
     super(r, i, o);
   }
   // Train for multiple epochs using Dataset API - FIXED memory leaks
   async trainOnDataset(r, i, o) {
-    const { desiredLoss: m, logInterval: u, onStep: n, prompt: l, maxSteps: d } = {
-      ...S,
+    const { desiredLoss: u, logInterval: d, onStep: l, prompt: c, maxSteps: g } = {
+      ...D,
       ...i
-    }, t = {
+    }, n = Date.now(), t = {
       step: 0,
       lastLoss: 1e6,
       totalSteps: 0,
       losses: [],
       validationLosses: [],
+      logStartTime: n,
+      trainingDuration: 0,
       ...this.lastState || {}
     };
-    this.lastState = t, this.dummyPass(), this.model.trainable = !0;
-    const f = Date.now();
-    this.running = !0;
-    const c = o ? new g(this.model, o) : void 0, v = await r.iterator();
+    this.lastState = t, this.dummyPass(), this.model.trainable = !0, this.running = !0, t.logStartTime = n;
+    const m = o ? new x(this.model, o) : void 0, S = await r.iterator();
     try {
-      for (; this.running && !(t.lastLoss < m); ) {
-        const e = await v.next();
-        if (e.done) break;
-        const p = e.value, L = this.trainBatch(t, p), a = {
+      for (; this.running && !(t.lastLoss < u); ) {
+        const a = await S.next();
+        if (a.done) break;
+        const p = a.value, f = this.trainBatch(t, p), s = {
           loss: t.lastLoss,
           step: t.step,
-          time: Date.now() - f,
+          time: Date.now() - n,
           batchSize: p.xs.shape[0]
         };
-        if (this.model.log.push(a), t.step % u === 0) {
-          if (await L, c)
+        if (this.model.log.push(s), t.step % d === 0) {
+          await f;
+          const w = Date.now();
+          if (t.trainingDuration += w - t.logStartTime, m)
             try {
-              const s = await c.evaluate(5);
-              t.validationLosses.push(s), a.valLoss = s;
-            } catch (s) {
-              console.error("Validation error:", s);
+              const e = await m.evaluate(5);
+              t.validationLosses.push(e), s.valLoss = e;
+            } catch (e) {
+              console.error("Validation error:", e);
             }
-          if (n) {
-            if (l) {
-              const s = await w(this.tokenizer, this.model, l, 100, {
+          if (l) {
+            if (c) {
+              const T = await v(this.tokenizer, this.model, c, 100, {
                 temperature: 0.8
               });
-              a.example = s;
+              s.example = T;
             }
-            await n(a);
+            const e = {
+              duration: t.trainingDuration,
+              totalSamples: t.totalSteps * s.batchSize,
+              samplesPerSecond: t.totalSteps * s.batchSize / (t.trainingDuration / 1e3)
+            };
+            await l(s, e);
           }
+          t.logStartTime = Date.now();
         }
-        t.step >= d && this.stop();
+        t.step >= g && this.stop();
       }
-    } catch (e) {
-      throw console.error("Training error:", e), h(), e;
+    } catch (a) {
+      throw console.error("Training error:", a), h(), a;
     }
     return h(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
   }
 }
 export {
-  D as default
+  E as default
 };

package/dist/training/LayerTrainer.js CHANGED Viewed

@@ -14,19 +14,19 @@ class E extends v {
   trainingPattern = [];
   startPass = 0;
   startLayer = 0;
-  constructor(s, r, e = 3e-4) {
-    if (super(s, r, e), this.trainingPattern = w[s.config.nLayer - 1] || [], s.log.length > 0) {
-      const i = s.log[s.log.length - 1];
+  constructor(a, r, e = 3e-4) {
+    if (super(a, r, e), this.trainingPattern = w[a.config.nLayer - 1] || [], a.log.length > 0) {
+      const i = a.log[a.log.length - 1];
       i.pass !== void 0 && i.layer !== void 0 && (this.startPass = i.pass, this.startLayer = i.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
     }
   }
-  applyTrainingPattern(s) {
-    const r = s < this.trainingPattern.length ? s : this.trainingPattern.length - 1, e = this.trainingPattern[r];
+  applyTrainingPattern(a) {
+    const r = a < this.trainingPattern.length ? a : this.trainingPattern.length - 1, e = this.trainingPattern[r];
     this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", r, e);
   }
   // Train for multiple epochs using Dataset API - FIXED memory leaks
-  async trainOnDataset(s, r, e) {
-    const { desiredLoss: i, logInterval: L, stepsPerLayer: f, onLayerChange: o, onPassComplete: p, onStep: h, prompt: c } = {
+  async trainOnDataset(a, r, e) {
+    const { desiredLoss: i, logInterval: L, stepsPerLayer: d, onLayerChange: l, onPassComplete: p, onStep: h, prompt: c } = {
       ...x,
       ...r
     }, t = {
@@ -37,50 +37,55 @@ class E extends v {
       lastLoss: 1e6,
       totalSteps: 0,
       losses: [],
-      validationLosses: []
+      validationLosses: [],
+      trainingDuration: 0
     };
     this.dummyPass();
-    const d = Date.now();
+    const S = Date.now();
     this.startPass = 0, this.startLayer = 0;
-    const g = e ? new T(this.model, e) : void 0, P = await s.iterator();
+    const g = e ? new T(this.model, e) : void 0, f = await a.iterator();
     this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
     try {
       for (; !(t.lastLoss < i); ) {
-        const n = await P.next();
-        if (n.done) break;
-        const y = n.value, S = this.trainBatch(t, y);
+        const o = await f.next();
+        if (o.done) break;
+        const y = o.value, P = this.trainBatch(t, y);
         t.stepSinceLayerChange++;
-        const l = {
+        const n = {
           loss: t.lastLoss,
           step: t.step,
-          time: Date.now() - d,
+          time: Date.now() - S,
           batchSize: y.xs.shape[0],
           pass: t.pass,
           layer: t.layerStep % this.model.config.nLayer
         };
-        if (this.model.log.push(l), t.step % L === 0) {
-          if (await S, g)
+        if (this.model.log.push(n), t.step % L === 0) {
+          if (await P, g)
             try {
-              const a = await g.evaluate(5);
-              t.validationLosses.push(a), l.valLoss = a;
-            } catch (a) {
-              console.error("Validation error:", a);
+              const s = await g.evaluate(5);
+              t.validationLosses.push(s), n.valLoss = s;
+            } catch (s) {
+              console.error("Validation error:", s);
             }
           if (h) {
             if (c) {
-              const a = await u(this.tokenizer, this.model, c, 100, {
+              const s = await u(this.tokenizer, this.model, c, 100, {
                 temperature: 0.8,
                 topK: 10
               });
-              l.example = a;
+              n.example = s;
             }
-            await h(l);
+            await h(n, {
+              duration: t.trainingDuration,
+              totalSamples: t.totalSteps * n.batchSize,
+              samplesPerSecond: t.totalSteps * n.batchSize / (t.trainingDuration / 1e3)
+            });
           }
         }
-        t.stepSinceLayerChange >= f && (t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (o && await o(t.layerStep, t.pass), p && await p(t.pass), t.pass++) : o && await o(t.layerStep, t.pass), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length));
+        t.stepSinceLayerChange >= d && (t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (l && await l(t.layerStep, t.pass), p && await p(t.pass), t.pass++) : l && await l(t.layerStep, t.pass), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length));
       }
-    } catch (n) {
-      throw console.error("Training error:", n), m(), n;
+    } catch (o) {
+      throw console.error("Training error:", o), m(), o;
     }
     return m(), { losses: t.losses, validationLosses: t.validationLosses };
   }

package/dist/training/Trainer.d.ts CHANGED Viewed

@@ -12,6 +12,11 @@ export interface TrainingState {
     losses: number[];
     validationLosses: number[];
 }
+export interface TrainingProgress {
+    duration: number;
+    totalSamples: number;
+    samplesPerSecond: number;
+}
 export interface AdamConfig {
     learningRateFactor: number;
     beta1: number;
@@ -23,7 +28,7 @@ export interface TrainingOptions {
     logInterval: number;
     prompt?: string;
     maxSteps: number;
-    onStep?: (log: TrainingLogEntry) => Promise<void> | void;
+    onStep?: (log: TrainingLogEntry, progress: TrainingProgress) => Promise<void> | void;
 }
 export default abstract class GPTTrainer {
     protected tokenizer: ITokeniser;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.3.1",
+    "version": "0.3.2",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",