npm - @genai-fi/nanogpt - Versions diffs - 0.15.13 → 0.15.14 - Mend

@genai-fi/nanogpt 0.15.13 → 0.15.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/Trainer.js +10 -5
package/dist/data/textLoader.js +47 -41
package/dist/training/BasicTrainer.js +62 -62
package/dist/training/Evaluator.d.ts +2 -1
package/dist/training/Evaluator.js +19 -18
package/dist/training/SFTDatasetBuilder.js +43 -36
package/dist/training/tasks/ConversationTask.d.ts +2 -2
package/dist/training/tasks/ConversationTask.js +13 -11
package/dist/training/tasks/PretrainingTask.d.ts +1 -2
package/dist/training/tasks/PretrainingTask.js +4 -14
package/dist/training/tasks/StartSentenceTask.d.ts +1 -2
package/dist/training/tasks/StartSentenceTask.js +2 -7
package/dist/training/tasks/Task.d.ts +1 -2
package/dist/training/tasks/splitter.d.ts +5 -0
package/dist/training/tasks/splitter.js +21 -0
package/dist/training/validation.js +1 -1
package/package.json +1 -1

package/dist/Trainer.js CHANGED Viewed

@@ -1,7 +1,8 @@
 import { E as g } from "./index-DvYrXKkX.js";
 import o from "./training/PreTrainer.js";
-import { createTrainValidationSplit as p } from "./training/validation.js";
+import { createTrainValidationSplit as d } from "./training/validation.js";
 import h from "./training/SFTTrainer.js";
+import p from "./training/tasks/splitter.js";
 class l extends g {
   trainer;
   trainingType = "pretraining";
@@ -81,7 +82,7 @@ class l extends g {
   async prepare(t = []) {
     const i = this.options;
     if (this.trainingType === "pretraining" && this.trainer instanceof o) {
-      const { trainDataset: e, validationDataset: a, size: r, trainState: n } = await p(
+      const { trainDataset: e, validationDataset: a, size: r, trainState: n } = await d(
         t,
         this.trainer.tokenizer,
         this.trainer.datasetBuilder,
@@ -92,12 +93,16 @@ class l extends g {
     } else if (this.trainingType === "sft" && this.trainer instanceof h) {
       if (t instanceof Uint16Array)
         throw new Error("SFT training requires Task[] input");
-      const e = await this.trainer.datasetBuilder.createSFTDataset(
-        t,
+      const e = p(t, i?.validationSplit || 0.1), a = await this.trainer.datasetBuilder.createSFTDataset(
+        [e.training],
+        i?.batchSize || 32,
+        -100
+      ), r = await this.trainer.datasetBuilder.createSFTDataset(
+        [e.validation],
         i?.batchSize || 32,
         -100
       );
-      this.trainDataset = e, this.totalSamples = t.reduce((a, r) => a + r.length, 0), this.options.epochSteps = Math.ceil(this.totalSamples / (i?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
+      this.validationDataset = r, this.trainDataset = a, this.totalSamples = t.reduce((n, s) => n + s.length, 0), this.options.epochSteps = Math.ceil(this.totalSamples / (i?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
     }
   }
   configureModel(t) {

package/dist/data/textLoader.js CHANGED Viewed

@@ -1,14 +1,14 @@
 import { p as u } from "../papaparse.min-C0cScC2i.js";
-import { loadParquet as d } from "./parquet.js";
-import { loadPDF as f } from "./pdf.js";
+import { loadParquet as f } from "./parquet.js";
+import { loadPDF as d } from "./pdf.js";
 import { loadDOCX as m } from "./docx.js";
 import { z as x } from "../jszip.min-BZhlzntC.js";
-function w(t, n) {
-  const r = t.findIndex((i) => i.toLowerCase() === n.toLowerCase());
-  return r === -1 ? 0 : r;
+function y(t, r) {
+  const a = t.findIndex((i) => i.toLowerCase() === r.toLowerCase());
+  return a === -1 ? 0 : a;
 }
-function y(t) {
-  return t.every((n) => n.length < 64);
+function w(t) {
+  return t.every((r) => r.length < 64);
 }
 function h(t) {
   return t.split(".").pop() || "";
@@ -35,66 +35,72 @@ function g(t) {
       return "unknown";
   }
 }
-async function z(t, n) {
-  const r = t.type !== "" ? t.type : g(t.name);
-  if (r === "application/parquet")
-    return d(t, n?.maxSize, n?.column);
-  if (r === "application/pdf")
-    return f(t, n?.maxSize);
-  if (r === "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+function j(t) {
+  if (!Array.isArray(t)) return !1;
+  const r = t[0];
+  return typeof r == "object" && r !== null && "role" in r && "content" in r && typeof r.role == "string" && typeof r.content == "string";
+}
+async function z(t, r) {
+  const a = t.type !== "" ? t.type : g(t.name);
+  if (a === "application/parquet")
+    return f(t, r?.maxSize, r?.column);
+  if (a === "application/pdf")
+    return d(t, r?.maxSize);
+  if (a === "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
     return m(t);
-  if (r === "application/json") {
-    const i = await t.text(), a = JSON.parse(i);
-    if (Array.isArray(a))
-      return a.map(
+  if (a === "application/json") {
+    const i = await t.text(), o = JSON.parse(i);
+    if (Array.isArray(o))
+      return o.map(
         (e) => typeof e == "string" ? e : "text" in e ? e.text : JSON.stringify(e)
       );
     throw new Error("Expected JSON array");
   }
-  if (r === "application/jsonl")
+  if (a === "application/jsonl")
     return (await t.text()).split(`
-`).filter((a) => a.trim() !== "").map((a) => {
+`).filter((o) => o.trim() !== "").map((o) => {
       try {
-        const e = JSON.parse(a);
-        return typeof e == "string" ? e : "text" in e ? e.text : JSON.stringify(e);
+        const e = JSON.parse(o);
+        return j(e) ? e.map((n) => `${n.content}`).join(`
+`) : typeof e == "string" ? e : "text" in e ? e.text : JSON.stringify(e);
       } catch {
-        return a;
+        return o;
       }
     });
-  if (r === "application/zip") {
-    const i = await x.loadAsync(t), a = [];
+  if (a === "application/zip") {
+    const i = await x.loadAsync(t), o = [];
     for (const e of Object.keys(i.files)) {
-      const o = i.file(e);
-      if (o) {
-        const c = await o.async("blob"), p = await z(new File([c], e), n);
-        a.push(...p);
+      const n = i.file(e);
+      if (n) {
+        const s = await n.async("blob"), c = await z(new File([s], e), r);
+        o.push(...c);
       }
     }
-    return a;
+    return o;
   }
-  if (r === "text/csv") {
+  if (a === "text/csv") {
     const i = await t.text();
-    return new Promise((a, e) => {
+    return new Promise((o, e) => {
       u.parse(i, {
         header: !1,
         skipEmptyLines: !0,
         delimiter: ",",
-        complete: (o) => {
-          if (o.errors.length > 0)
-            console.error(o.errors), e(new Error("Error parsing file"));
+        complete: (n) => {
+          if (n.errors.length > 0)
+            console.error(n.errors), e(new Error("Error parsing file"));
           else {
-            const c = w(o.data[0], n?.column || "text"), s = n?.hasHeader ?? y(o.data[0]) ? o.data.slice(1) : o.data;
-            a(s.map((l) => l[c]));
+            const s = y(n.data[0], r?.column || "text"), p = r?.hasHeader ?? w(n.data[0]) ? n.data.slice(1) : n.data;
+            o(p.map((l) => l[s]));
           }
         },
-        error: (o) => {
-          e(o);
+        error: (n) => {
+          e(n);
         }
       });
     });
-  } else if (r === "text/plain")
+  } else if (a === "text/plain")
     return [await t.text()];
-  throw new Error(`Unsupported file type: ${r}`);
+  throw new Error(`Unsupported file type: ${a}`);
 }
 export {
   z as default

package/dist/training/BasicTrainer.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import u from "./Evaluator.js";
-import { t as L, v as P, k as h, d as g, a as y } from "../index-CUXkjxiT.js";
+import { t as z, v as P, k as g, d as p, a as y } from "../index-CUXkjxiT.js";
 import S from "../utilities/profile.js";
-import { createTensorStatistics as x } from "../checks/weights.js";
-import { calculateLoss as k, calculateAccuracy as T } from "./loss.js";
+import { createTensorStatistics as k } from "../checks/weights.js";
+import { calculateLoss as x, calculateAccuracy as T } from "./loss.js";
 import { AdamWOptimizer as N } from "./AdamW.js";
 import { z as w } from "../zeros-DvZpK8s6.js";
 const v = {
@@ -23,11 +23,11 @@ const v = {
   lossScaling: 1
 };
 class G {
-  constructor(e, i, o, c) {
-    this.tokenizer = i, this.model = e, this.optimizerConfig = {
+  constructor(s, i, o, c) {
+    this.tokenizer = i, this.model = s, this.optimizerConfig = {
       ...b,
       ...o,
-      lossScaling: e.lossScaling
+      lossScaling: s.lossScaling
     };
     const l = c || new N(this.optimizerConfig);
     c && c.updateConfig(this.optimizerConfig), this.optimizer = l;
@@ -44,26 +44,26 @@ class G {
   _labelSmoothing = 0;
   _layerDrop = 0;
   _dropout = 0;
-  setGradientCheckpointing(e) {
-    this._gradientCheckpointing = e;
+  setGradientCheckpointing(s) {
+    this._gradientCheckpointing = s;
   }
-  setMixedPrecision(e) {
-    this._mixedPrecision = e;
+  setMixedPrecision(s) {
+    this._mixedPrecision = s;
   }
-  setLabelSmoothing(e) {
-    this._labelSmoothing = e;
+  setLabelSmoothing(s) {
+    this._labelSmoothing = s;
   }
-  setDropout(e) {
-    this._dropout = e;
+  setDropout(s) {
+    this._dropout = s;
   }
-  setLayerDrop(e) {
-    this._layerDrop = e;
+  setLayerDrop(s) {
+    this._layerDrop = s;
   }
-  setLearningRate(e) {
-    this.optimizerConfig.learningRate = e, this.updateOptimizer();
+  setLearningRate(s) {
+    this.optimizerConfig.learningRate = s, this.updateOptimizer();
   }
-  setMetrics(e) {
-    this.metrics = new Set(e);
+  setMetrics(s) {
+    this.metrics = new Set(s);
   }
   reset() {
     this.lastState = void 0, this.running = !1;
@@ -77,12 +77,12 @@ class G {
   getOptimizer() {
     return this.optimizer;
   }
-  updateOptimizer(e) {
-    e && (this.optimizerConfig = { ...this.optimizerConfig, ...e }), this.optimizer.updateConfig(this.optimizerConfig);
+  updateOptimizer(s) {
+    s && (this.optimizerConfig = { ...this.optimizerConfig, ...s }), this.optimizer.updateConfig(this.optimizerConfig);
   }
   // A single forward pass, backward pass, and optimizer step
-  trainStep(e, i, o = !1, c = !1) {
-    return L(() => {
+  trainStep(s, i, o = !1, c = !1) {
+    return z(() => {
       this.model.getProfiler()?.startMemory();
       const { xs: l, ys: a } = i, d = () => {
         const r = this.model.forward(
@@ -94,31 +94,31 @@ class G {
             layerDrop: this._layerDrop
           },
           l
-        ), s = k(r, a, this.maskedLoss, !1, this._labelSmoothing);
-        this.metrics.has("accuracy") && (e.accuracy = T(r, a), h(e.accuracy)), r.dispose();
-        const m = s.mul(y(this.optimizerConfig.lossScaling));
-        return s.dispose(), m;
+        ), e = x(r, a, this.maskedLoss, !1, this._labelSmoothing);
+        this.metrics.has("accuracy") && (s.accuracy = T(r, a), g(s.accuracy)), r.dispose();
+        const m = e.mul(y(this.optimizerConfig.lossScaling));
+        return e.dispose(), m;
       }, { value: t, grads: n } = P(d);
       if (o)
         this.model.getProfiler()?.endMemory("Training");
       else {
         const r = this.optimizer.applyGradients(n);
-        this.metrics.has("gradientNorm") ? (e.gradientNorm = r, h(r)) : (e.gradientNorm = void 0, r.dispose());
-        const s = Object.keys(n);
-        this.model.weightStore.touchVariables(s), this.model.getProfiler()?.endMemory("Training"), c ? (e.gradients = n, Object.values(n).forEach((m) => h(m))) : g(n);
+        this.metrics.has("gradientNorm") ? (s.gradientNorm = r, g(r)) : (s.gradientNorm = void 0, r.dispose());
+        const e = Object.keys(n);
+        this.model.weightStore.touchVariables(e), this.model.getProfiler()?.endMemory("Training"), c ? (s.gradients = n, Object.values(n).forEach((m) => g(m))) : p(n);
       }
       return t.mul(y(1 / this.optimizerConfig.lossScaling));
     });
   }
   async dummyPass() {
-    const e = w([1, this.model.config.blockSize], "int32"), i = w([1, this.model.config.blockSize], "int32");
+    const s = w([1, this.model.config.blockSize], "int32"), i = w([1, this.model.config.blockSize], "int32");
     try {
-      const o = this.trainStep({}, { xs: e, ys: i }, !0);
+      const o = this.trainStep({}, { xs: s, ys: i }, !0);
       await o.data(), o.dispose();
     } catch (o) {
       console.error("Error during dummy pass:", o);
     } finally {
-      e.dispose(), i.dispose();
+      s.dispose(), i.dispose();
     }
   }
   dispose() {
@@ -136,7 +136,7 @@ class G {
       ...this.lastState || {}
     };
   }
-  async stepDataset(e, i, o) {
+  async stepDataset(s, i, o) {
     const { logInterval: c = 10 } = {
       ...v,
       ...i
@@ -144,21 +144,21 @@ class G {
     i.metrics && this.setMetrics(i.metrics);
     const l = Date.now(), a = this.createEmptyState();
     this.lastState = a, await this.dummyPass(), this.metrics.has("memoryUsage") && (this.model.getProfiler() || this.model.setProfiler(new S())), this.running = !0, a.logStartTime = l;
-    const d = o ? new u(this.model, o) : void 0, t = await e.iterator();
+    const d = o ? new u(this.model, o, void 0, this.maskedLoss) : void 0, t = await s.iterator();
     try {
       for (; this.running; ) {
         const n = await t.next();
         if (n.done) break;
-        const r = n.value, s = this.trainStep(a, r, !1);
-        r.xs.dispose(), r.ys.dispose(), a.step++, a.totalSteps++, a.step % c === 0 ? await this.performLogging(s, r.xs.shape[0], i, d) : (a.gradientNorm && (a.gradientNorm.dispose(), a.gradientNorm = void 0), a.accuracy && (a.accuracy.dispose(), a.accuracy = void 0)), s.dispose();
+        const r = n.value, e = this.trainStep(a, r, !1);
+        r.xs.dispose(), r.ys.dispose(), a.step++, a.totalSteps++, a.step % c === 0 ? await this.performLogging(e, r.xs.shape[0], i, d) : (a.gradientNorm && (a.gradientNorm.dispose(), a.gradientNorm = void 0), a.accuracy && (a.accuracy.dispose(), a.accuracy = void 0)), e.dispose();
       }
     } catch (n) {
-      throw console.error("Training error:", n), g(), n;
+      throw console.error("Training error:", n), p(), n;
     }
-    throw g(), this.running = !1, new Error("No log returned before training stopped.");
+    throw p(), this.running = !1, new Error("No log returned before training stopped.");
   }
-  async performLogging(e, i, o, c) {
-    const l = o?.onStep, a = this.metrics.has("gradientStatistics"), d = (await e.data())[0], t = this.lastState;
+  async performLogging(s, i, o, c) {
+    const l = o?.onStep, a = this.metrics.has("gradientStatistics"), d = (await s.data())[0], t = this.lastState;
     t.lastLoss = d;
     const n = Date.now();
     t.trainingDuration += n - t.logStartTime;
@@ -184,25 +184,25 @@ class G {
       batchSize: i,
       loss: t.lastLoss
     }, a && t.gradients) {
-      const s = /* @__PURE__ */ new Map();
-      for (const [m, p] of Object.entries(t.gradients))
-        s.set(m, await x(p)), p.dispose();
-      r.gradientMetrics = s;
+      const e = /* @__PURE__ */ new Map();
+      for (const [m, h] of Object.entries(t.gradients))
+        e.set(m, await k(h)), h.dispose();
+      r.gradientMetrics = e;
     }
     if (c)
       try {
-        const s = await c.evaluate(5);
-        Array.isArray(s) ? r.validationMetrics = { loss: s[0].loss, accuracy: s[0].accuracy } : (t.validationLosses.push(s.loss), r.validationMetrics = {
-          accuracy: s.accuracy,
-          loss: s.loss,
-          perplexity: this.metrics.has("perplexity") ? Math.exp(s.loss) : void 0
+        const e = await c.evaluate(5);
+        Array.isArray(e) ? r.validationMetrics = { loss: e[0].loss, accuracy: e[0].accuracy } : (t.validationLosses.push(e.loss), r.validationMetrics = {
+          accuracy: e.accuracy,
+          loss: e.loss,
+          perplexity: this.metrics.has("perplexity") ? Math.exp(e.loss) : void 0
         });
-      } catch (s) {
-        console.error("Validation error:", s);
+      } catch (e) {
+        console.error("Validation error:", e);
       }
     l && await l(r), t.logStartTime = Date.now();
   }
-  async trainOnDataset(e, i, o) {
+  async trainOnDataset(s, i, o) {
     const { logInterval: c = 10, maxEpochs: l = 1 / 0 } = {
       ...v,
       ...i
@@ -210,18 +210,18 @@ class G {
     i.metrics && this.setMetrics(i.metrics);
     const d = Date.now(), t = this.createEmptyState();
     this.lastState = t, await this.dummyPass(), i?.metrics?.includes("memoryUsage") && (this.model.getProfiler() || this.model.setProfiler(new S())), this.running = !0, t.logStartTime = d;
-    const n = o ? new u(this.model, o) : void 0, r = await e.iterator();
+    const n = o ? new u(this.model, o, void 0, this.maskedLoss) : void 0, r = await s.iterator();
     try {
       for (; this.running; ) {
-        const s = await r.next();
-        if (s.done) break;
-        const m = s.value, p = t.step % c === 0, z = (i?.metrics?.includes("gradientStatistics") || !1) && p, f = this.trainStep(t, m, !1, z);
-        m.xs.dispose(), m.ys.dispose(), t.step++, t.totalSteps++, p ? await this.performLogging(f, m.xs.shape[0], i, n) : (t.gradientNorm && (t.gradientNorm.dispose(), t.gradientNorm = void 0), t.accuracy && (t.accuracy.dispose(), t.accuracy = void 0)), f.dispose(), t.step >= a && this.stop();
+        const e = await r.next();
+        if (e.done) break;
+        const m = e.value, h = t.step % c === 0, L = (i?.metrics?.includes("gradientStatistics") || !1) && h, f = this.trainStep(t, m, !1, L);
+        m.xs.dispose(), m.ys.dispose(), t.step++, t.totalSteps++, h ? await this.performLogging(f, m.xs.shape[0], i, n) : (t.gradientNorm && (t.gradientNorm.dispose(), t.gradientNorm = void 0), t.accuracy && (t.accuracy.dispose(), t.accuracy = void 0)), f.dispose(), t.step >= a && this.stop();
       }
-    } catch (s) {
-      throw console.error("Training error:", s), g(), s;
+    } catch (e) {
+      throw console.error("Training error:", e), p(), e;
     }
-    return g(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
+    return p(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
   }
 }
 export {

package/dist/training/Evaluator.d.ts CHANGED Viewed

@@ -11,7 +11,8 @@ export default class Evaluator {
     private iterator?;
     private xs?;
     private ys?;
-    constructor(model: Model<ModelForwardAttributes>, dataset: Dataset<TensorContainer> | Conversation[][], tokeniser?: ITokeniser);
+    private masked;
+    constructor(model: Model<ModelForwardAttributes>, dataset: Dataset<TensorContainer> | Conversation[][], tokeniser?: ITokeniser, masked?: boolean);
     dispose(): void;
     private calculateBatchLoss;
     evaluate(maxBatches?: number): Promise<Result | Result[]>;

package/dist/training/Evaluator.js CHANGED Viewed

@@ -2,12 +2,12 @@ import { t as p } from "../index-CUXkjxiT.js";
 import { calculateLoss as d, calculateAccuracy as m } from "./loss.js";
 import { buildSFTExample as x } from "./SFTDatasetBuilder.js";
 import { t as h } from "../tensor-BWFldCso.js";
-class g {
-  constructor(c, t, o) {
-    if (this.model = c, Array.isArray(t)) {
+class k {
+  constructor(i, t, o, c) {
+    if (this.model = i, this.masked = !!c, Array.isArray(t)) {
       if (!o)
         throw new Error("Tokeniser is required when dataset is an array of conversations");
-      const a = t.map((s) => x(s, -100, o, c.config.blockSize)).filter((s) => s !== null);
+      const a = t.map((s) => x(s, -100, o, i.config.blockSize)).filter((s) => s !== null);
       if (a.length === 0)
         return;
       this.xs = h(a.map((s) => s.xs)), this.ys = h(a.map((s) => s.ys));
@@ -17,32 +17,33 @@ class g {
   iterator;
   xs;
   ys;
+  masked = !1;
   dispose() {
     this.xs && this.xs.dispose(), this.ys && this.ys.dispose();
   }
-  async calculateBatchLoss(c, t, o, a) {
-    const [s, e] = p(() => {
-      const r = this.model.forward({ training: !1 }, c), y = d(r, t, a, o), f = m(r, t);
+  async calculateBatchLoss(i, t, o, c) {
+    const [a, s] = p(() => {
+      const r = this.model.forward({ training: !1 }, i), y = d(r, t, c, o), f = m(r, t);
       return r.dispose(), [y, f];
-    }), n = await s.array(), u = await e.array(), i = n, l = u;
-    return e.dispose(), s.dispose(), Array.isArray(i) ? i.map((r) => ({ loss: r, accuracy: l })) : { loss: i, accuracy: l };
+    }), n = await a.array(), u = await s.array(), e = n, l = u;
+    return s.dispose(), a.dispose(), Array.isArray(e) ? e.map((r) => ({ loss: r, accuracy: l })) : { loss: e, accuracy: l };
   }
-  async evaluate(c = 100) {
-    let t = 0, o = 0, a = 0;
+  async evaluate(i = 100) {
+    let t = 0, o = 0, c = 0;
     if (this.iterator) {
-      const s = await this.iterator;
-      for (let e = 0; e < c; e++) {
-        const n = await s.next();
+      const a = await this.iterator;
+      for (let s = 0; s < i; s++) {
+        const n = await a.next();
         if (n.done) break;
-        const u = n.value, { xs: i, ys: l } = u, r = await this.calculateBatchLoss(i, l, !1, !1);
-        i.dispose(), l.dispose(), t += r.loss, o += r.accuracy, a++;
+        const u = n.value, { xs: e, ys: l } = u, r = await this.calculateBatchLoss(e, l, !1, this.masked);
+        e.dispose(), l.dispose(), t += r.loss, o += r.accuracy, c++;
       }
-      return { loss: t / a, accuracy: o / a };
+      return { loss: t / c, accuracy: o / c };
     } else if (this.xs && this.ys)
       return this.calculateBatchLoss(this.xs, this.ys, !0, !0);
     throw new Error("No data available for evaluation");
   }
 }
 export {
-  g as default
+  k as default
 };

package/dist/training/SFTDatasetBuilder.js CHANGED Viewed

@@ -1,50 +1,50 @@
-import { t as x } from "../index-CUXkjxiT.js";
+import { t as y } from "../index-CUXkjxiT.js";
 import "../dataset-CGGp1z9P.js";
 import { g as I } from "../readers-iz5u3HBo.js";
 import "../index-Cp39cXWe.js";
-function w(u, a, t, r) {
-  const s = [t.bosToken], n = [!1], f = {
+function w(p, o, t, l) {
+  const s = [t.bosToken], a = [!1], u = {
     user: t.getSpecialTokenIndex("<|user_start|>"),
     assistant: t.getSpecialTokenIndex("<|assistant_start|>"),
     system: t.getSpecialTokenIndex("<|system_start|>")
-  }, i = {
+  }, c = {
     user: t.getSpecialTokenIndex("<|user_end|>"),
     assistant: t.getSpecialTokenIndex("<|assistant_end|>"),
     system: t.getSpecialTokenIndex("<|system_end|>")
   };
-  for (const e of u) {
-    const c = f[e.role], h = i[e.role];
-    if (!c || !h)
+  for (const e of p) {
+    const r = u[e.role], h = c[e.role];
+    if (!r || !h)
       throw new Error(`Missing special tokens for role: ${e.role}`);
-    s.push(c), n.push(!1);
+    s.push(r), a.push(!1);
     const m = e.role === "assistant", S = t.encode(e.content);
     for (const T of S) {
       s.push(T);
-      const y = t.isSpecialToken(T);
-      n.push(m && !y);
+      const x = t.isSpecialToken(T);
+      a.push(m && !x);
     }
-    s.push(h), n.push(m);
+    s.push(h), a.push(m);
   }
-  s.push(t.eosToken), n.push(!1);
-  const o = r + 1;
-  if (s.length < o) {
-    const e = o - s.length, c = t.getSpecialTokenIndex("<pad>");
+  s.push(t.eosToken), a.push(!1);
+  const n = l + 1;
+  if (s.length < n) {
+    const e = n - s.length, r = t.getSpecialTokenIndex("<pad>");
     for (let h = 0; h < e; h++)
-      s.push(c), n.push(!1);
-  } else s.length > o && (s.length = o, n.length = o);
-  const p = new Int32Array(s.slice(0, r)), l = s.slice(1, r + 1), k = n.slice(1, r + 1), d = new Int32Array(l.length);
+      s.push(r), a.push(!1);
+  } else s.length > n && (s.length = n, a.length = n);
+  const f = new Int32Array(s.slice(0, l)), i = s.slice(1, l + 1), k = a.slice(1, l + 1), d = new Int32Array(i.length);
   let g = !1;
-  for (let e = 0; e < l.length; e++) {
-    const c = k[e] ? l[e] : a;
-    d[e] = c, c !== a && (g = !0);
+  for (let e = 0; e < i.length; e++) {
+    const r = k[e] ? i[e] : o;
+    d[e] = r, r !== o && (g = !0);
   }
-  return g ? { xs: p, ys: d } : null;
+  return g ? { xs: f, ys: d } : null;
 }
-class A {
+class D {
   tokenizer;
   blockSize;
-  constructor(a, t = 128) {
-    this.tokenizer = a, this.blockSize = t;
+  constructor(o, t = 128) {
+    this.tokenizer = o, this.blockSize = t;
   }
   /**
    * Create SFT dataset from structured conversations.
@@ -52,25 +52,32 @@ class A {
    * - Pads with eosToken and masks padding.
    * - Masks non-assistant tokens in labels with ignoreIndex (default -100).
    */
-  async createSFTDataset(a, t = 32, r = -100) {
-    if (!a.length)
+  async createSFTDataset(o, t = 32, l = -100) {
+    if (!o.length)
       throw new Error("No conversations provided.");
-    const s = this.tokenizer, n = this.blockSize;
+    const s = this.tokenizer, a = this.blockSize;
+    for (const c of o)
+      c.shuffle();
     return I(function* () {
       for (; ; ) {
-        const i = Math.floor(Math.random() * a.length), p = a[i].getRandomConversation(), l = w(p, r, s, n);
-        l && (yield l);
+        const c = Math.floor(Math.random() * o.length), n = o[c], f = n.nextConversation();
+        if (!f) {
+          n.shuffle();
+          continue;
+        }
+        const i = w(f, l, s, a);
+        i && (yield i);
       }
-    }).batch(t).map((i) => {
-      const o = i;
-      return x(() => ({
-        xs: o.xs.cast("int32"),
-        ys: o.ys.cast("int32")
+    }).batch(t).map((c) => {
+      const n = c;
+      return y(() => ({
+        xs: n.xs.cast("int32"),
+        ys: n.ys.cast("int32")
       }));
     }).prefetch(2);
   }
 }
 export {
-  A as SFTDatasetBuilder,
+  D as SFTDatasetBuilder,
   w as buildSFTExample
 };

package/dist/training/tasks/ConversationTask.d.ts CHANGED Viewed

@@ -2,13 +2,13 @@ import { Conversation, ITokeniser } from '../../main';
 import { Task } from './Task';
 export default class ConversationTask extends Task {
     private rawConvo;
+    private shuffledIndices;
     private index;
     get length(): number;
     constructor(conversations: Conversation[][]);
     hasMoreConversations(): boolean;
     nextConversation(): Conversation[] | null;
     nextTokens(tokeniser: ITokeniser): number[] | null;
-    getRandomConversation(): Conversation[];
-    getRandomTokens(tokeniser: ITokeniser): number[];
+    shuffle(): void;
     estimateTokens(tokeniser: ITokeniser): Promise<number>;
 }

package/dist/training/tasks/ConversationTask.js CHANGED Viewed

@@ -1,6 +1,8 @@
 import { Task as t } from "./Task.js";
+import { shuffle as s } from "../DatasetBuilder.js";
 class a extends t {
   rawConvo;
+  shuffledIndices = null;
   index = 0;
   get length() {
     return this.rawConvo.length;
@@ -14,20 +16,20 @@ class a extends t {
   nextConversation() {
     if (this.index >= this.rawConvo.length)
       return null;
-    const n = this.rawConvo[this.index];
+    const n = this.rawConvo[this.shuffledIndices ? this.shuffledIndices[this.index] : this.index];
     return this.index++, n;
   }
   nextTokens(n) {
-    const o = this.nextConversation();
-    return o ? n.encodeConversation(o) : null;
-  }
-  getRandomConversation() {
-    const n = Math.floor(Math.random() * this.rawConvo.length);
-    return this.rawConvo[n];
-  }
-  getRandomTokens(n) {
-    const o = Math.floor(Math.random() * this.rawConvo.length);
-    return n.encodeConversation(this.rawConvo[o]);
+    const e = this.nextConversation();
+    return e ? n.encodeConversation(e) : null;
+  }
+  shuffle() {
+    if (!this.shuffledIndices) {
+      this.shuffledIndices = new Uint32Array(this.rawConvo.length);
+      for (let n = 0; n < this.rawConvo.length; n++)
+        this.shuffledIndices[n] = n;
+    }
+    s(this.shuffledIndices), this.index = 0;
   }
   async estimateTokens(n) {
     return (await n.encodeConversation(this.rawConvo[0])).length * this.length;

package/dist/training/tasks/PretrainingTask.d.ts CHANGED Viewed

@@ -8,7 +8,6 @@ export default class PretrainingTask extends Task {
     hasMoreConversations(): boolean;
     nextConversation(): Conversation[] | null;
     nextTokens(tokeniser: ITokeniser): number[] | null;
-    getRandomConversation(): Conversation[];
-    getRandomTokens(tokeniser: ITokeniser): number[];
+    shuffle(): void;
     estimateTokens(tokeniser: ITokeniser): Promise<number>;
 }

package/dist/training/tasks/PretrainingTask.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Task as n } from "./Task.js";
-class i extends n {
+class r extends n {
   rawText;
   index = 0;
   get length() {
@@ -26,18 +26,8 @@ class i extends n {
     const e = t.encodeSequence(this.rawText[this.index]);
     return this.index++, e;
   }
-  getRandomConversation() {
-    const t = Math.floor(Math.random() * this.rawText.length);
-    return [
-      {
-        role: "assistant",
-        content: this.rawText[t]
-      }
-    ];
-  }
-  getRandomTokens(t) {
-    const e = Math.floor(Math.random() * this.rawText.length);
-    return t.encodeSequence(this.rawText[e]);
+  shuffle() {
+    this.index = 0;
   }
   async estimateTokens(t) {
     return (await t.encodeConversation([
@@ -49,5 +39,5 @@ class i extends n {
   }
 }
 export {
-  i as default
+  r as default
 };

package/dist/training/tasks/StartSentenceTask.d.ts CHANGED Viewed

@@ -8,8 +8,7 @@ export default class StartSentenceTask extends Task {
     hasMoreConversations(): boolean;
     nextConversation(): Conversation[] | null;
     nextTokens(tokeniser: ITokeniser): number[] | null;
-    getRandomConversation(): Conversation[];
-    getRandomTokens(tokeniser: ITokeniser): number[];
+    shuffle(): void;
     private conversationFromString;
     estimateTokens(tokeniser: ITokeniser): Promise<number>;
 }

package/dist/training/tasks/StartSentenceTask.js CHANGED Viewed

@@ -21,13 +21,8 @@ class a extends e {
     const n = this.nextConversation();
     return n ? t.encodeConversation(n) : null;
   }
-  getRandomConversation() {
-    const t = Math.floor(Math.random() * this.rawText.length);
-    return this.conversationFromString(this.rawText[t]);
-  }
-  getRandomTokens(t) {
-    const n = this.getRandomConversation();
-    return t.encodeConversation(n);
+  shuffle() {
+    this.index = 0;
   }
   conversationFromString(t) {
     const n = t.indexOf(".");

package/dist/training/tasks/Task.d.ts CHANGED Viewed

@@ -5,7 +5,6 @@ export declare abstract class Task {
     abstract nextConversation(): Conversation[] | null;
     abstract nextTokens(tokeniser: ITokeniser): number[] | null;
     abstract estimateTokens(tokeniser: ITokeniser): Promise<number>;
-    abstract getRandomConversation(): Conversation[];
-    abstract getRandomTokens(tokeniser: ITokeniser): number[];
+    abstract shuffle(): void;
 }
 export declare function tokensFromTasks(tasks: Task[], tokenizer: ITokeniser, cb?: (tokens: number) => void): Promise<Uint16Array>;

package/dist/training/tasks/splitter.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import { Task } from './Task';
+export default function splitValidation(tasks: Task[], validationSplit: number): {
+    training: Task;
+    validation: Task;
+};

package/dist/training/tasks/splitter.js ADDED Viewed

@@ -0,0 +1,21 @@
+import s from "./ConversationTask.js";
+function f(e, o) {
+  if (o <= 0 || o >= 1)
+    throw new Error("validationSplit must be between 0 and 1");
+  e.forEach((n) => n.shuffle());
+  const r = [], a = [];
+  for (const n of e)
+    for (; n.hasMoreConversations(); ) {
+      const t = n.nextConversation();
+      if (!t)
+        break;
+      Math.random() < o ? a.push(t) : r.push(t);
+    }
+  return {
+    training: new s(r),
+    validation: new s(a)
+  };
+}
+export {
+  f as default
+};

package/dist/training/validation.js CHANGED Viewed

@@ -39,8 +39,8 @@ import "../ops/webgl/adamAdjust.js";
 import "../ops/cpu/adamMoments.js";
 import "../ops/webgl/adamMoments.js";
 import { PAGE_FACTOR as m, shuffle as h } from "./DatasetBuilder.js";
-import "../papaparse.min-C0cScC2i.js";
 import { tokensFromTasks as k } from "./tasks/Task.js";
+import "../papaparse.min-C0cScC2i.js";
 import "../ops/cpu/matMulGelu.js";
 import "../matMulGelu-JNLZqKQp.js";
 import "../ops/grads/matMulGelu.js";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.15.13",
+    "version": "0.15.14",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",