npm - @genai-fi/nanogpt - Versions diffs - 0.7.2 → 0.7.3 - Mend

@genai-fi/nanogpt 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/Generator.d.ts +11 -2
package/dist/Generator.js +76 -63
package/dist/TeachableLLM.js +28 -27
package/dist/Trainer.d.ts +6 -1
package/dist/Trainer.js +53 -19
package/dist/training/FullTrainer.d.ts +15 -2
package/dist/training/FullTrainer.js +97 -51
package/dist/training/Trainer.d.ts +10 -0
package/package.json +1 -1

package/dist/Generator.d.ts CHANGED Viewed

@@ -9,11 +9,20 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
     private readonly model;
     private readonly tokeniser;
     private active;
+    private cache;
+    private initialPrompt;
+    private outputText;
+    private actualTokeniser;
+    private lastToken;
     constructor(model: NanoGPT, tokeniser: ITokeniser);
     private tokenisePrompt;
-    private generateNoCache;
     private processResponse;
-    private generateCache;
+    private _generate;
+    reset(): void;
+    dispose(): void;
+    private initialise;
+    step(prompt?: string, options?: IGenerateOptions): Promise<string>;
     generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
     stop(): void;
+    getText(): string;
 }

package/dist/Generator.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { E as u } from "./index-Dwqa6Zy2.js";
+import { E as l } from "./index-Dwqa6Zy2.js";
 import "./index-BoWRt-10.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
@@ -29,7 +29,7 @@ import "./ops/webgl/gatherSub.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./jszip.min-CjP2V1VV.js";
-import f from "./tokeniser/CharTokeniser.js";
+import u from "./tokeniser/CharTokeniser.js";
 import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
 import "./ops/cpu/adamMoments.js";
@@ -39,10 +39,10 @@ import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
 import "./gelu-C-dPj6Ku.js";
 import "./ops/webgl/log.js";
-import { t as d } from "./tensor2d-wxPAnDQy.js";
-import { c as g } from "./concat-CsxrgovM.js";
+import { t as p } from "./tensor2d-wxPAnDQy.js";
+import { c as f } from "./concat-CsxrgovM.js";
 const k = [
-  ...Array.from({ length: 95 }, (a, t) => String.fromCharCode(t + 32)),
+  ...Array.from({ length: 95 }, (r, t) => String.fromCharCode(t + 32)),
   // ASCII
   // Spanish accented letters and punctuation
   ..."áéíóúüñ¿¡",
@@ -53,80 +53,93 @@ const k = [
   // Cyrillic letters
   ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
 ];
-function w(a, t) {
-  return a.length === t ? a : a.length > t ? a.slice(0, t) : a.concat(Array(t - a.length).fill(""));
+function d(r, t) {
+  return r.length === t ? r : r.length > t ? r.slice(0, t) : r.concat(Array(t - r.length).fill(""));
 }
-class pt extends u {
-  constructor(t, o) {
-    super(), this.model = t, this.tokeniser = o;
+class nt extends l {
+  constructor(t, i) {
+    super(), this.model = t, this.tokeniser = i, this.actualTokeniser = i;
   }
   active = !1;
-  async tokenisePrompt(t, o) {
-    const r = o ? await t.tokenise([o], !0) : [[t.eosToken]];
-    return d(r, [1, r[0].length], "int32");
+  cache = null;
+  initialPrompt = null;
+  outputText = "";
+  actualTokeniser;
+  lastToken = -1;
+  async tokenisePrompt(t, i) {
+    const e = i ? await t.tokenise([i], !0) : [[t.eosToken]];
+    return p(e, [1, e[0].length], "int32");
   }
-  async generateNoCache(t, o, r) {
-    let i = await this.tokenisePrompt(t, o), s = o || "";
-    const n = r?.maxLength ?? 1e3;
-    for (let m = 0; m < n && this.active; m++) {
-      const {
-        output: e,
-        attention: p,
-        probabilities: c
-      } = await this.model.generate(i, void 0, r), h = i;
-      i = g([i, e], 1), h.dispose();
-      const l = await this.processResponse(t, e, p, c);
-      if (e.dispose(), l === null)
-        break;
-      s += l;
-    }
-    return i.dispose(), s;
-  }
-  async processResponse(t, o, r, i) {
-    const s = (await o.array())[0][0];
-    if (s === this.tokeniser.eosToken)
+  async processResponse(t, i, e, o) {
+    const s = (await i.array())[0][0];
+    if (this.lastToken = s, s === this.tokeniser.eosToken)
       return null;
     const n = await t.decode([s]);
-    let m;
-    r && (m = await Promise.all(r.map((p) => p.array().then((c) => c))), r.forEach((p) => p.dispose()));
-    let e;
-    return i && (e = await i.array(), i.dispose()), this.emit("tokens", [s], n, m, e), n;
+    let c;
+    e && (c = await Promise.all(e.map((h) => h.array().then((m) => m))), e.forEach((h) => h.dispose()));
+    let a;
+    return o && (a = await o.array(), o.dispose()), this.emit("tokens", [s], n, c, a), n;
   }
-  async generateCache(t, o, r) {
-    let i = await this.tokenisePrompt(t, o), s = o || "";
-    const n = new Array(this.model.config.gpt.nLayer);
-    for (let e = 0; e < this.model.config.gpt.nLayer; e++)
-      n[e] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
-    const m = r?.maxLength ?? 1e3;
-    for (let e = 0; e < m && this.active; e++) {
+  async _generate(t) {
+    let i = this.lastToken >= 0 && this.cache ? p([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
+    const e = t?.maxLength ?? 1e3;
+    for (let o = 0; o < e && this.active; o++) {
       const {
-        output: p,
-        probabilities: c,
-        attention: h
-      } = await this.model.generate(i, n, {
-        ...r,
-        usePadding: !1
+        output: s,
+        probabilities: n,
+        attention: c
+      } = await this.model.generate(i, this.cache ? this.cache : void 0, {
+        ...t,
+        usePadding: !this.cache
       });
-      i.dispose(), i = p;
-      const l = await this.processResponse(t, p, h, c);
-      if (l === null)
+      if (this.cache)
+        i.dispose(), i = s;
+      else {
+        const h = i;
+        i = f([i, s], 1), h.dispose();
+      }
+      const a = await this.processResponse(this.actualTokeniser, s, c, n);
+      if (this.cache || s.dispose(), a === null)
         break;
-      s += l;
+      this.outputText += a;
+    }
+    return i.dispose(), this.outputText;
+  }
+  reset() {
+    this.cache && (this.cache.forEach((t) => {
+      t && (t.k && t.k.dispose(), t.v && t.v.dispose());
+    }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1;
+  }
+  dispose() {
+    this.reset();
+  }
+  initialise(t, i) {
+    const e = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t ?? null;
+    if (this.cache && i?.noCache && this.reset(), this.initialPrompt = e || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !i?.noCache && this.model.config.gpt.useRope) {
+      const s = new Array(this.model.config.gpt.nLayer);
+      for (let n = 0; n < this.model.config.gpt.nLayer; n++)
+        s[n] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
+      this.cache = s, this.lastToken = -1;
     }
-    return n.forEach((e) => {
-      e && (e.k && e.k.dispose(), e.v && e.v.dispose());
-    }), i.dispose(), s;
+    const o = this.tokeniser.trained ? this.tokeniser : new u(d(k, this.tokeniser.vocabSize));
+    this.actualTokeniser = o;
   }
-  async generate(t, o) {
-    const r = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t;
-    this.active = !0, this.emit("start");
-    const i = this.tokeniser.trained ? this.tokeniser : new f(w(k, this.tokeniser.vocabSize)), n = await (this.model.config.gpt.useRope && !o?.noCache ? this.generateCache(i, r, o) : this.generateNoCache(i, r, o));
-    return this.active = !1, this.emit("stop"), n;
+  async step(t, i) {
+    const e = { ...i, maxLength: 1 };
+    return this.generate(t, e);
+  }
+  async generate(t, i) {
+    this.initialise(t, i), this.active = !0, this.emit("start");
+    const o = await this._generate(i);
+    return this.active = !1, this.emit("stop"), o;
   }
   stop() {
     this.active = !1;
   }
+  getText() {
+    return this.outputText;
+  }
 }
 export {
-  pt as default
+  nt as default
 };

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,12 +1,12 @@
 import { defaultConfig as _ } from "./config.js";
 import f from "./NanoGPTModel.js";
-import { saveModel as u } from "./utilities/save.js";
-import { loadModel as d } from "./loader/load.js";
-import l from "./Generator.js";
+import { saveModel as d } from "./utilities/save.js";
+import { loadModel as l } from "./loader/load.js";
+import u from "./Generator.js";
 import p from "./Trainer.js";
-import { E as g } from "./index-Dwqa6Zy2.js";
+import { E as c } from "./index-Dwqa6Zy2.js";
 import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
-import c from "./tokeniser/CharTokeniser.js";
+import g from "./tokeniser/CharTokeniser.js";
 import k from "./tokeniser/bpe.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./index-Tf7vU29b.js";
@@ -49,7 +49,7 @@ import "./ops/cpu/adamAdjust.js";
 import "./ops/webgl/adamAdjust.js";
 import w from "./utilities/profile.js";
 class a {
-  ee = new g();
+  ee = new c();
   _config;
   _model;
   _tokeniser;
@@ -92,8 +92,8 @@ class a {
     return this._status === "busy" || this._status === "training";
   }
   estimateTrainingMemoryUsage(t) {
-    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
-    return i * 0.66 + o * 4;
+    const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, r = e.perBatch * t, o = e.gradients;
+    return r * 0.66 + o * 4;
   }
   setStatus(t) {
     this._status !== t && (this._status = t, this.ee.emit("status", t));
@@ -101,32 +101,32 @@ class a {
   saveModel(t) {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    return u(this._model, this._tokeniser, {
+    return d(this._model, this._tokeniser, {
       ...t,
       name: t?.name || this.meta.name
     });
   }
   static loadModel(t) {
     const e = new a();
-    return d(t).then(({ model: i, tokeniser: o, name: s }) => {
-      e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
-        e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
-      }).catch((r) => {
-        e.setStatus("error"), e.ee.emit("error", r);
+    return l(t).then(({ model: r, tokeniser: o, name: s }) => {
+      e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
+        e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
+      }).catch((i) => {
+        e.setStatus("error"), e.ee.emit("error", i);
       });
-    }).catch((i) => {
-      e.setStatus("error"), e.ee.emit("error", i);
+    }).catch((r) => {
+      e.setStatus("error"), e.ee.emit("error", r);
     }), e;
   }
   static create(t, e = {}) {
-    const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
-    return r.setStatus("warmup"), m(s).then((n) => {
-      r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
-        h === "trained" && r.setStatus("ready");
+    const r = { ..._, ...e }, o = t === "char" ? new g(r.vocabSize) : new k(r.vocabSize), s = new f(r), i = new a(o, s);
+    return i.setStatus("warmup"), m(s).then((n) => {
+      i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
+        h === "trained" && i.setStatus("ready");
       }));
     }).catch((n) => {
-      r.setStatus("error"), r.ee.emit("error", n);
-    }), r;
+      i.setStatus("error"), i.ee.emit("error", n);
+    }), i;
   }
   getProfiler() {
     return this._model?.getProfiler();
@@ -149,14 +149,15 @@ class a {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
     const t = new p(this._model, this._tokeniser);
-    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
+    return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
       const o = this.ee.listeners("trainStep");
       for (const s of o)
-        await s(e, i);
+        await s(e, r);
     }), t;
   }
-  train(t, e) {
-    return this.trainer().train(t, e);
+  async train(t, e) {
+    const r = this.trainer();
+    await r.prepare(t, e), await r.train(e);
   }
   async trainTokeniser(t) {
     if (!this._tokeniser)
@@ -167,7 +168,7 @@ class a {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new l(this._model, this._tokeniser);
+    const t = new u(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {

package/dist/Trainer.d.ts CHANGED Viewed

@@ -14,8 +14,13 @@ export interface ITrainerOptions {
 export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     private trainer;
     private hasTrained;
+    private trainDataset?;
+    private validationDataset?;
+    private totalSamples;
     constructor(model: NanoGPT, tokeniser: ITokeniser);
     stop(): void;
     reset(): void;
-    train(text: string[], options?: ITrainerOptions): Promise<void>;
+    prepare(text: string[], options?: ITrainerOptions): Promise<void>;
+    train(options?: ITrainerOptions): Promise<void>;
+    step(options?: ITrainerOptions): Promise<void>;
 }

package/dist/Trainer.js CHANGED Viewed

@@ -1,10 +1,13 @@
-import { E as h } from "./index-Dwqa6Zy2.js";
-import m from "./training/FullTrainer.js";
-class p extends h {
+import { E as l } from "./index-Dwqa6Zy2.js";
+import h from "./training/FullTrainer.js";
+class p extends l {
   trainer;
   hasTrained = !1;
-  constructor(e, t) {
-    super(), this.trainer = new m(e, t, 1e-3);
+  trainDataset;
+  validationDataset;
+  totalSamples = 0;
+  constructor(t, e) {
+    super(), this.trainer = new h(t, e, 1e-3);
   }
   stop() {
     this.trainer.stop();
@@ -12,36 +15,67 @@ class p extends h {
   reset() {
     this.hasTrained = !1, this.trainer.reset();
   }
-  async train(e, t) {
-    const { trainDataset: s, validationDataset: n } = await this.trainer.createTrainValidationSplit(
-      e,
-      t?.batchSize || 32,
-      t?.validationSplit || 0.1
-    ), r = e.reduce((i, a) => i + a.length, 0) * (1 - (t?.validationSplit || 0));
+  async prepare(t, e) {
+    const { trainDataset: a, validationDataset: s } = await this.trainer.createTrainValidationSplit(
+      t,
+      e?.batchSize || 32,
+      e?.validationSplit || 0.1
+    ), i = t.reduce((r, n) => r + n.length, 0) * (1 - (e?.validationSplit || 0));
+    this.trainDataset = a, this.validationDataset = s, this.totalSamples = i;
+  }
+  async train(t) {
+    if (!this.trainDataset || !this.validationDataset)
+      throw new Error("Datasets not prepared");
     this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), await this.trainer.trainOnDataset(
-      s,
+      this.trainDataset,
       {
         prompt: t?.prompt,
         logInterval: t?.logInterval || 10,
         desiredLoss: t?.desiredLoss || 0.01,
         maxSteps: t?.maxSteps || 1e3,
         advancedMetrics: t?.advancedMetrics || !1,
-        onStep: async (i, a) => {
-          const l = this.listeners("log");
-          for (const d of l)
-            await d(i, {
+        onStep: async (e, a) => {
+          const s = this.listeners("log");
+          for (const i of s)
+            await i(e, {
               ...a,
-              progress: a.totalSamples / r,
+              progress: a.totalSamples / this.totalSamples,
               remaining: Math.max(
                 0,
-                (r - a.totalSamples) / a.totalSamples * a.duration
+                (this.totalSamples - a.totalSamples) / a.totalSamples * a.duration
               )
             });
         }
       },
-      n
+      this.validationDataset
     ), this.emit("stop");
   }
+  async step(t) {
+    if (!this.trainDataset || !this.validationDataset)
+      throw new Error("Datasets not prepared");
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start");
+    const { log: e, progress: a } = await this.trainer.stepDataset(
+      this.trainDataset,
+      {
+        prompt: t?.prompt,
+        logInterval: t?.logInterval || 10,
+        desiredLoss: t?.desiredLoss || 0.01,
+        maxSteps: t?.maxSteps || 1e3,
+        advancedMetrics: t?.advancedMetrics || !1
+      },
+      this.validationDataset
+    ), s = this.listeners("log");
+    for (const i of s)
+      await i(e, {
+        ...a,
+        progress: a.totalSamples / this.totalSamples,
+        remaining: Math.max(
+          0,
+          (this.totalSamples - a.totalSamples) / a.totalSamples * a.duration
+        )
+      });
+    this.emit("stop");
+  }
 }
 export {
   p as default

package/dist/training/FullTrainer.d.ts CHANGED Viewed

@@ -1,10 +1,23 @@
 import { ITokeniser } from '../tokeniser/type';
-import { default as NanoGPT } from '../NanoGPTModel';
-import { default as GPTTrainer, TrainingOptions } from './Trainer';
+import { default as NanoGPT, TrainingLogEntry } from '../NanoGPTModel';
+import { default as GPTTrainer, TrainingOptions, TrainingProgress } from './Trainer';
 import { Tensor } from '@tensorflow/tfjs-core';
 import { Dataset } from '@tensorflow/tfjs-data';
 export default class FullTrainer extends GPTTrainer {
     constructor(model: NanoGPT, tokenizer: ITokeniser, learningRate?: number);
+    private createEmptyState;
+    private createLogEntry;
+    private createProgress;
+    stepDataset(dataset: Dataset<{
+        xs: Tensor;
+        ys: Tensor;
+    }>, options: Partial<TrainingOptions>, validationDataset?: Dataset<{
+        xs: Tensor;
+        ys: Tensor;
+    }>): Promise<{
+        log: TrainingLogEntry;
+        progress: TrainingProgress;
+    }>;
     trainOnDataset(dataset: Dataset<{
         xs: Tensor;
         ys: Tensor;

package/dist/training/FullTrainer.js CHANGED Viewed

@@ -1,81 +1,127 @@
-import { generateText as w } from "../utilities/generate.js";
-import T from "./Trainer.js";
-import L from "./Evaluator.js";
-import { d as h } from "../index-BoWRt-10.js";
-import x from "../utilities/profile.js";
-const y = {
+import { generateText as v } from "../utilities/generate.js";
+import x from "./Trainer.js";
+import S from "./Evaluator.js";
+import { d as w } from "../index-BoWRt-10.js";
+import y from "../utilities/profile.js";
+const T = {
   desiredLoss: 0.01,
   logInterval: 1,
   maxSteps: 1e3
 };
-class E extends T {
-  constructor(i, e, r = 3e-4) {
-    super(i, e, r);
+class z extends x {
+  constructor(r, t, s = 3e-4) {
+    super(r, t, s);
   }
-  // Train for multiple epochs using Dataset API - FIXED memory leaks
-  async trainOnDataset(i, e, r) {
-    const { logInterval: g, onStep: l, prompt: c, maxSteps: u } = {
-      ...y,
-      ...e
-    }, n = Date.now(), t = {
+  createEmptyState() {
+    return {
       step: 0,
       lastLoss: 1e6,
       totalSteps: 0,
       losses: [],
       validationLosses: [],
-      logStartTime: n,
+      logStartTime: 0,
       trainingDuration: 0,
       ...this.lastState || {}
     };
-    this.lastState = t, await this.dummyPass(), this.model.trainable = !0, e?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new x())), this.running = !0, t.logStartTime = n;
-    const m = r ? new L(this.model, r) : void 0, f = await i.iterator();
+  }
+  createLogEntry(r, t, s, h) {
+    return {
+      loss: r.lastLoss,
+      step: r.step,
+      time: Date.now() - t,
+      batchSize: s,
+      learningRate: h ? this.optimizer.lr : void 0
+    };
+  }
+  createProgress(r, t, s) {
+    return {
+      duration: r.trainingDuration,
+      totalSamples: r.totalSteps * t.batchSize,
+      samplesPerSecond: r.totalSteps * t.batchSize / (r.trainingDuration / 1e3),
+      memory: s ? this.model.getProfiler()?.getPeakMemory() || 0 : void 0
+    };
+  }
+  async stepDataset(r, t, s) {
+    const { logInterval: h, prompt: m } = {
+      ...T,
+      ...t
+    }, g = Date.now(), a = this.createEmptyState();
+    this.lastState = a, await this.dummyPass(), this.model.trainable = !0, t?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new y())), this.running = !0, a.logStartTime = g;
+    const p = s ? new S(this.model, s) : void 0, e = await r.iterator();
+    try {
+      for (; this.running; ) {
+        const i = await e.next();
+        if (i.done) break;
+        const u = i.value, o = this.trainBatch(a, u), n = this.createLogEntry(a, g, u.xs.shape[0], t?.advancedMetrics);
+        if (this.model.log.push(n), a.step % h === 0) {
+          await o.data();
+          const f = Date.now();
+          if (a.trainingDuration += f - a.logStartTime, p)
+            try {
+              const l = await p.evaluate(5);
+              a.validationLosses.push(l), n.valLoss = l;
+            } catch (l) {
+              console.error("Validation error:", l);
+            }
+          if (m) {
+            const l = await v(this.tokenizer, this.model, m, 100, {
+              temperature: 0.8
+            });
+            n.example = l;
+          }
+          const c = this.createProgress(a, n, t?.advancedMetrics);
+          return o.dispose(), this.stop(), { log: n, progress: c };
+        }
+        o.dispose();
+      }
+    } catch (i) {
+      throw console.error("Training error:", i), w(), i;
+    }
+    throw w(), this.running = !1, new Error("No log returned before training stopped.");
+  }
+  // Train for multiple epochs using Dataset API - FIXED memory leaks
+  async trainOnDataset(r, t, s) {
+    const { logInterval: h, onStep: m, prompt: g, maxSteps: a } = {
+      ...T,
+      ...t
+    }, p = Date.now(), e = this.createEmptyState();
+    this.lastState = e, await this.dummyPass(), this.model.trainable = !0, t?.advancedMetrics && (this.model.getProfiler() || (this.model.config.layerConfig.profiler = new y())), this.running = !0, e.logStartTime = p;
+    const i = s ? new S(this.model, s) : void 0, u = await r.iterator();
     try {
       for (; this.running; ) {
-        const o = await f.next();
+        const o = await u.next();
         if (o.done) break;
-        const d = o.value, p = this.trainBatch(t, d), s = {
-          loss: t.lastLoss,
-          step: t.step,
-          time: Date.now() - n,
-          batchSize: d.xs.shape[0],
-          learningRate: e?.advancedMetrics ? this.optimizer.lr : void 0
-          //gradientNorm: options?.advancedMetrics ? await state.gradientNorm : undefined,
-        };
-        if (this.model.log.push(s), t.step % g === 0) {
-          await p.data();
-          const S = Date.now();
-          if (t.trainingDuration += S - t.logStartTime, m)
+        const n = o.value, f = this.trainBatch(e, n), c = this.createLogEntry(e, p, n.xs.shape[0], t?.advancedMetrics);
+        if (this.model.log.push(c), e.step % h === 0) {
+          await f.data();
+          const l = Date.now();
+          if (e.trainingDuration += l - e.logStartTime, i)
             try {
-              const a = await m.evaluate(5);
-              t.validationLosses.push(a), s.valLoss = a;
-            } catch (a) {
-              console.error("Validation error:", a);
+              const d = await i.evaluate(5);
+              e.validationLosses.push(d), c.valLoss = d;
+            } catch (d) {
+              console.error("Validation error:", d);
             }
-          if (l) {
-            if (c) {
-              const v = await w(this.tokenizer, this.model, c, 100, {
+          if (m) {
+            if (g) {
+              const L = await v(this.tokenizer, this.model, g, 100, {
                 temperature: 0.8
               });
-              s.example = v;
+              c.example = L;
             }
-            const a = {
-              duration: t.trainingDuration,
-              totalSamples: t.totalSteps * s.batchSize,
-              samplesPerSecond: t.totalSteps * s.batchSize / (t.trainingDuration / 1e3),
-              memory: e.advancedMetrics ? this.model.getProfiler()?.getPeakMemory() || 0 : void 0
-            };
-            await l(s, a);
+            const d = this.createProgress(e, c, t?.advancedMetrics);
+            await m(c, d);
           }
-          t.logStartTime = Date.now();
+          e.logStartTime = Date.now();
         }
-        p.dispose(), t.step >= u && this.stop();
+        f.dispose(), e.step >= a && this.stop();
       }
     } catch (o) {
-      throw console.error("Training error:", o), h(), o;
+      throw console.error("Training error:", o), w(), o;
     }
-    return h(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
+    return w(), this.running = !1, { losses: e.losses, validationLosses: e.validationLosses };
   }
 }
 export {
-  E as default
+  z as default
 };

package/dist/training/Trainer.d.ts CHANGED Viewed

@@ -66,6 +66,16 @@ export default abstract class GPTTrainer {
         losses: number[];
         validationLosses: number[];
     }>;
+    abstract stepDataset(dataset: Dataset<{
+        xs: Tensor;
+        ys: Tensor;
+    }>, options: Partial<TrainingOptions>, validationDataset?: Dataset<{
+        xs: Tensor;
+        ys: Tensor;
+    }>): Promise<{
+        log: TrainingLogEntry;
+        progress: TrainingProgress;
+    }>;
     createTrainValidationSplit(textData: string[], batchSize?: number, validationSplit?: number): Promise<{
         trainDataset: Dataset<{
             xs: Tensor;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.7.2",
+    "version": "0.7.3",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",