npm - @genai-fi/nanogpt - Versions diffs - 0.5.0 → 0.5.2 - Mend

@genai-fi/nanogpt 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/dist/Generator.js +95 -46
package/dist/NanoGPTModel.d.ts +3 -2
package/dist/NanoGPTModel.js +91 -76
package/dist/{Reshape-BE5rA4rT.js → Reshape-Bt_t7RNz.js} +4 -4
package/dist/TeachableLLM.js +1 -1
package/dist/TiedEmbedding-DORsPlNL.js +44 -0
package/dist/{axis_util-97KkkyRQ.js → axis_util-CVbf1vmL.js} +3 -3
package/dist/{broadcast_to-CMlkG8NS.js → broadcast_to-BBoMQXbL.js} +4 -4
package/dist/{concat-Cxbo2sOz.js → concat-BRRtq4S2.js} +1 -1
package/dist/dataset-ZHEPJmED.js +1226 -0
package/dist/{dropout-kbDY39Ci.js → dropout-lQm_YyX3.js} +1 -1
package/dist/{gather-Bxe1Qip8.js → gather-BWyutxwi.js} +3 -3
package/dist/{gpgpu_math-C0zyxKFi.js → gpgpu_math-Df7gzJWH.js} +1 -1
package/dist/{index-iNhkcAEQ.js → index-CnHyhpKc.js} +32 -32
package/dist/{kernel_funcs_utils-C4eIk4fE.js → kernel_funcs_utils-Dqo82NH4.js} +25 -25
package/dist/layers/BaseLayer.js +114 -3
package/dist/layers/CausalSelfAttention.d.ts +2 -3
package/dist/layers/CausalSelfAttention.js +31 -30
package/dist/layers/MLP.js +10 -9
package/dist/layers/RMSNorm.js +12 -11
package/dist/layers/RoPECache.js +3 -3
package/dist/layers/TiedEmbedding.js +8 -6
package/dist/layers/TransformerBlock.js +2 -2
package/dist/{log_sum_exp-CkumwesB.js → log_sum_exp-CRH7Np9v.js} +12 -12
package/dist/main.js +1 -1
package/dist/{mat_mul-D0SifYfJ.js → mat_mul-DeGU1U_C.js} +3 -3
package/dist/{max-CYaAjEEp.js → max-CcnEArWK.js} +3 -3
package/dist/{moments-B06NlR_V.js → moments-DLTE6-1p.js} +4 -4
package/dist/{norm-D3676xIo.js → norm-BpWsOapl.js} +5 -5
package/dist/{ones-BIeFnPHR.js → ones-CDWGzVnm.js} +6 -6
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +5 -5
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +5 -5
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.js +1 -1
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +27 -27
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +1 -1
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +1 -1
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +36 -36
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/matMulGelu.js +22 -22
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{ops-ObfXLHYQ.js → ops-DzQTmLIl.js} +60 -60
package/dist/{TiedEmbedding-DsDRvLB0.js → random_width-DI2h9CMs.js} +1215 -1250
package/dist/{range-BsFU-SNG.js → range-CkOJ7090.js} +1 -1
package/dist/{reshape-DxTPgnwL.js → reshape-CTIbqjwm.js} +1 -1
package/dist/{sin-BOX-JVAj.js → sin-HzioENy_.js} +5 -5
package/dist/{slice_util-D-kaD4ZV.js → slice_util-n4wHKmex.js} +1 -1
package/dist/{softmax-BjsptB07.js → softmax-DX6qXAbm.js} +2 -2
package/dist/{split-BCbrzthj.js → split-CVwhL8Oe.js} +3 -3
package/dist/{stack--cqr9Dgc.js → stack-S2-D2JAQ.js} +1 -1
package/dist/{sum-B_92TaHD.js → sum-UdfvaNhB.js} +4 -4
package/dist/{tensor-CfiPXsW4.js → tensor-IZex6Bwp.js} +1 -1
package/dist/{tensor2d-tSxWdFMH.js → tensor2d-CqtBzOKq.js} +1 -1
package/dist/{tfjs_backend-NucKez4s.js → tfjs_backend-DX9yVvwk.js} +41 -41
package/dist/tokeniser/CharTokeniser.js +27 -27
package/dist/tokeniser/bpe.d.ts +1 -0
package/dist/tokeniser/bpe.js +38 -35
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +22 -1242
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +5 -5
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/save.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/variable-BGvK-VN3.js +23 -0
package/dist/{zeros-NMYTayy7.js → zeros-CYMicyqz.js} +3 -3
package/package.json +1 -1
package/dist/BaseLayer-BhrMN8JO.js +0 -135

package/dist/{range-BsFU-SNG.js → range-CkOJ7090.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as e, R as f } from "./index-iNhkcAEQ.js";
+import { E as e, R as f } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{reshape-DxTPgnwL.js → reshape-CTIbqjwm.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, i as t, E as a, u as p } from "./index-iNhkcAEQ.js";
+import { o, j as t, E as a, w as p } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{sin-BOX-JVAj.js → sin-HzioENy_.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, i as t, E as c, _ as i, $ as a } from "./index-iNhkcAEQ.js";
+import { o, j as t, E as c, _ as a, $ as e } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,11 +15,11 @@ import { o, i as t, E as c, _ as i, $ as a } from "./index-iNhkcAEQ.js";
  * limitations under the License.
  * =============================================================================
  */
-function e(n) {
+function i(n) {
   const s = { x: t(n, "x", "cos", "float32") };
-  return c.runKernel(i, s);
+  return c.runKernel(a, s);
 }
-const f = /* @__PURE__ */ o({ cos_: e });
+const f = /* @__PURE__ */ o({ cos_: i });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -38,7 +38,7 @@ const f = /* @__PURE__ */ o({ cos_: e });
  */
 function x(n) {
   const s = { x: t(n, "x", "sin", "float32") };
-  return c.runKernel(a, s);
+  return c.runKernel(e, s);
 }
 const p = /* @__PURE__ */ o({ sin_: x });
 export {

package/dist/{slice_util-D-kaD4ZV.js → slice_util-n4wHKmex.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { k as s } from "./index-iNhkcAEQ.js";
+import { l as s } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2021 Google LLC. All Rights Reserved.

package/dist/{softmax-BjsptB07.js → softmax-DX6qXAbm.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as r, i as f, E as i, S as e } from "./index-iNhkcAEQ.js";
+import { o as r, j as f, E as e, S as i } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -20,7 +20,7 @@ function l(s, o = -1) {
   if (o === -1 && (o = t.rank - 1), o !== t.rank - 1)
     throw Error(`Softmax along a non-last dimension is not yet supported. Logits was rank ${t.rank} and dim was ${o}`);
   const n = { logits: t }, a = { dim: o };
-  return i.runKernel(e, n, a);
+  return e.runKernel(i, n, a);
 }
 const p = /* @__PURE__ */ r({ softmax_: l });
 export {

package/dist/{split-BCbrzthj.js → split-CVwhL8Oe.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as i, i as p, E as a, w as c } from "./index-iNhkcAEQ.js";
+import { o as p, j as i, E as a, x as c } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -16,10 +16,10 @@ import { o as i, i as p, E as a, w as c } from "./index-iNhkcAEQ.js";
  * =============================================================================
  */
 function e(t, s, o = 0) {
-  const n = { x: p(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
+  const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
   return a.runKernel(c, n, r);
 }
-const u = /* @__PURE__ */ i({ split_: e });
+const u = /* @__PURE__ */ p({ split_: e });
 export {
   u as s
 };

package/dist/{stack--cqr9Dgc.js → stack-S2-D2JAQ.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, j as c, k as n, E as k, P as i } from "./index-iNhkcAEQ.js";
+import { o as e, k as c, l as n, E as k, P as i } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{sum-B_92TaHD.js → sum-UdfvaNhB.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, i as u, y as c, E as i, D as l } from "./index-iNhkcAEQ.js";
+import { o as e, j as u, D as c, E as l, F as m } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as e, i as u, y as c, E as i, D as l } from "./index-iNhkcAEQ.js";
  * limitations under the License.
  * =============================================================================
  */
-function m(t, o = null, n = !1) {
+function i(t, o = null, n = !1) {
   let s = u(t, "x", "sum");
   s.dtype === "bool" && (s = c(s, "int32"));
   const r = { x: s }, a = { axis: o, keepDims: n };
-  return i.runKernel(l, r, a);
+  return l.runKernel(m, r, a);
 }
-const f = /* @__PURE__ */ e({ sum_: m });
+const f = /* @__PURE__ */ e({ sum_: i });
 export {
   f as s
 };

package/dist/{tensor-CfiPXsW4.js → tensor-IZex6Bwp.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { I as t, J as a } from "./index-iNhkcAEQ.js";
+import { J as t, K as a } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tensor2d-tSxWdFMH.js → tensor2d-CqtBzOKq.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { H as t, I as s, J as a } from "./index-iNhkcAEQ.js";
+import { I as t, J as s, K as a } from "./index-CnHyhpKc.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tfjs_backend-NucKez4s.js → tfjs_backend-DX9yVvwk.js} RENAMED Viewed

@@ -1,11 +1,11 @@
-import { o as h, i as f, E as $, ap as Te, k as _, g as Ee, aq as xe, ar as Ie, as as Le, at as Ne, au as be, av as Ce, aw as Pe, b as Q, ax as Fe, a9 as U, q as ae, p as ie, N as le, c as fe, ay as he, am as pe, az as je, t as S, y as $e, ai as Me, a4 as Be } from "./index-iNhkcAEQ.js";
-import { s as C, t as Ke, a as Ue, b as ve } from "./ops-ObfXLHYQ.js";
-import { r as Re, d as Ve } from "./dropout-kbDY39Ci.js";
-import { r as u } from "./reshape-DxTPgnwL.js";
-import { g as qe } from "./gather-Bxe1Qip8.js";
-import { s as Ge } from "./sum-B_92TaHD.js";
-import { m as A } from "./mat_mul-D0SifYfJ.js";
-import { c as M } from "./concat-Cxbo2sOz.js";
+import { o as h, j as f, E as $, ap as Te, l as _, g as Ee, aq as xe, ar as Ie, as as Le, at as be, au as Ne, av as Ce, aw as Pe, b as H, ax as Fe, a9 as U, u as ae, q as ie, Q as le, c as fe, ay as he, aj as pe, az as je, t as S, D as $e, am as Me, a4 as Be } from "./index-CnHyhpKc.js";
+import { s as C, t as Ke, a as Ue, b as ve } from "./ops-DzQTmLIl.js";
+import { r as Re, d as Ve } from "./dropout-lQm_YyX3.js";
+import { r as u } from "./reshape-CTIbqjwm.js";
+import { g as qe } from "./gather-BWyutxwi.js";
+import { s as Ge } from "./sum-UdfvaNhB.js";
+import { m as A } from "./mat_mul-DeGU1U_C.js";
+import { c as M } from "./concat-BRRtq4S2.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -51,14 +51,14 @@ function We(e, n, t) {
   return $.runKernel(xe, s, o);
 }
 const Ye = /* @__PURE__ */ h({ clipByValue_: We });
-function He(e) {
+function Qe(e) {
   return M(
     e,
     0
     /* axis */
   );
 }
-const Qe = /* @__PURE__ */ h({ concat1d_: He });
+const He = /* @__PURE__ */ h({ concat1d_: Qe });
 function Xe(e, n) {
   return M(e, n);
 }
@@ -131,7 +131,7 @@ const un = /* @__PURE__ */ h({ leakyRelu_: on });
  */
 function cn(e, n) {
   const t = f(e, "x", "prelu"), r = f(n, "alpha", "prelu"), s = { x: t, alpha: r };
-  return $.runKernel(Ne, s);
+  return $.runKernel(be, s);
 }
 const an = /* @__PURE__ */ h({ prelu_: cn });
 /**
@@ -152,7 +152,7 @@ const an = /* @__PURE__ */ h({ prelu_: cn });
  */
 function ln(e) {
   const t = { x: f(e, "x", "relu") };
-  return $.runKernel(be, t);
+  return $.runKernel(Ne, t);
 }
 const fn = /* @__PURE__ */ h({ relu_: ln });
 /**
@@ -301,7 +301,7 @@ function An(e, n, t) {
   if (t == null || t === "linear")
     return e;
   if (t === "relu")
-    return Q(e, wn(n));
+    return H(e, wn(n));
   throw new Error(`Cannot compute gradient for fused activation ${t}.`);
 }
 function Sn(e, n) {
@@ -359,12 +359,12 @@ function _n({ a: e, b: n, transposeA: t = !1, transposeB: r = !1, bias: s, activ
   i != null && (se = f(i, "prelu weights", "fused matMul"));
   const oe = (D, P) => {
     const [y, O, T, B] = P, w = An(u(D, T.shape), T, o);
-    let L, N;
-    if (!t && !r ? (L = A(w, O, !1, !0), N = A(y, w, !0, !1)) : !t && r ? (L = A(w, O, !1, !1), N = A(w, y, !0, !1)) : t && !r ? (L = A(O, w, !1, !0), N = A(y, w, !1, !1)) : (L = A(O, w, !0, !0), N = A(w, y, !0, !0)), s != null) {
+    let L, b;
+    if (!t && !r ? (L = A(w, O, !1, !0), b = A(y, w, !0, !1)) : !t && r ? (L = A(w, O, !1, !1), b = A(w, y, !0, !1)) : t && !r ? (L = A(O, w, !1, !0), b = A(y, w, !1, !1)) : (L = A(O, w, !0, !0), b = A(w, y, !0, !0)), s != null) {
       const De = Sn(B, w);
-      return [L, N, De];
+      return [L, b, De];
     } else
-      return [L, N];
+      return [L, b];
   }, ue = {
     a: V,
     b: q,
@@ -466,11 +466,11 @@ function Wn(e) {
 function Yn(e) {
   return Array.isArray(e) ? e : [e];
 }
-function Hn(e) {
+function Qn(e) {
   const t = e.replace(/(.)([A-Z][a-z0-9]+)/g, "$1_$2").replace(/([a-z])([A-Z])/g, "$1_$2").toLowerCase();
   return t[0] !== "_" ? t : "private" + t;
 }
-function Qn(e) {
+function Hn(e) {
   return e.length <= 1 || e.indexOf("_") === -1 ? e : e.replace(/[_]+(\w|$)/g, (n, t) => t.toUpperCase());
 }
 let m = {};
@@ -593,8 +593,8 @@ function ot(e) {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-const b = /* @__PURE__ */ new Map();
-function Nn(e) {
+const N = /* @__PURE__ */ new Map();
+function bn(e) {
   v(Dn, "DataFormat", e);
 }
 function ut(e) {
@@ -616,22 +616,22 @@ function it(e, n) {
     throw F.pop(), t;
   }
 }
-function bn() {
+function Nn() {
   return F.length === 0 ? "" : F.join(ge) + ge;
 }
 function lt(e) {
   if (!Oe(e))
     throw new Error("Not a valid tensor name: '" + e + "'");
-  return bn() + e;
+  return Nn() + e;
 }
 function ft(e) {
   if (!Oe(e))
     throw new Error("Not a valid tensor name: '" + e + "'");
-  b.has(e) || b.set(e, 0);
-  const n = b.get(e);
-  if (b.set(e, b.get(e) + 1), n > 0) {
+  N.has(e) || N.set(e, 0);
+  const n = N.get(e);
+  if (N.set(e, N.get(e) + 1), n > 0) {
     const t = `${e}_${n}`;
-    return b.set(t, 1), t;
+    return N.set(t, 1), t;
   } else
     return e;
 }
@@ -835,7 +835,7 @@ function yt(e, n = -1) {
 function Ot(e, n) {
   switch (e.rank) {
     case 1:
-      return Qe([e, n]);
+      return He([e, n]);
     case 2:
       return ze([e, n], 0);
     case 3:
@@ -868,7 +868,7 @@ function Dt(e, n, t, r) {
       b: n,
       transposeA: !1,
       transposeB: !1,
-      bias: r ? H(e.rank, r, Y()) : null,
+      bias: r ? Q(e.rank, r, Y()) : null,
       activation: t
     });
   {
@@ -882,7 +882,7 @@ function Dt(e, n, t, r) {
       b: n,
       transposeA: !1,
       transposeB: !1,
-      bias: r ? H(e.rank, r, Y()) : null,
+      bias: r ? Q(e.rank, r, Y()) : null,
       activation: t
     }), g);
   }
@@ -891,9 +891,9 @@ function Tt(e, n, t) {
   return S(() => (Array.isArray(n) ? n = Ke(n, "int32") : n = $e(n, "int32"), qe(e, n, t)));
 }
 function Et(e) {
-  return Q(e, e);
+  return H(e, e);
 }
-function H(e, n, t) {
+function Q(e, n, t) {
   const r = n.shape;
   if (n.rank !== 1 && n.rank !== e)
     throw new l(`Unexpected bias dimensions: ${n.rank}; expected it to be 1 or ${e}`);
@@ -917,7 +917,7 @@ function H(e, n, t) {
   throw new l(`Unsupported input rank by biasAdd: ${n.rank}`);
 }
 function xt(e, n, t) {
-  return S(() => (t == null && (t = Y()), Nn(t), U(e, H(e.rank, n, t))));
+  return S(() => (t == null && (t = Y()), bn(t), U(e, Q(e.rank, n, t))));
 }
 function It(e, n = 1) {
   if (n !== 1)
@@ -927,12 +927,12 @@ function It(e, n = 1) {
 function Lt(e) {
   return S(() => Me(e, U(Be(e), 1)));
 }
-function Nt(e, n, t, r) {
+function bt(e, n, t, r) {
   return S(() => Ve(e, n, t, r));
 }
-function bt(e) {
+function Nt(e) {
   return S(() => {
-    const n = U(0.5, Q(0.2, e));
+    const n = U(0.5, H(0.2, e));
     return Ye(n, 0, 1);
   });
 }
@@ -943,7 +943,7 @@ export {
   Ln as $,
   Ae as A,
   $t as B,
-  Qn as C,
+  Hn as C,
   nt as D,
   et as E,
   Jn as F,
@@ -955,7 +955,7 @@ export {
   Zn as L,
   It as M,
   j as N,
-  bt as O,
+  Nt as O,
   Lt as P,
   un as Q,
   Se as R,
@@ -976,7 +976,7 @@ export {
   pt as a4,
   Ot as a5,
   Ct as a6,
-  Nt as a7,
+  bt as a7,
   yt as a8,
   At as a9,
   kt as aa,
@@ -985,7 +985,7 @@ export {
   Sn as b,
   v as c,
   Dt as d,
-  Nn as e,
+  bn as e,
   _e as f,
   An as g,
   zn as h,
@@ -1000,7 +1000,7 @@ export {
   Ye as q,
   _t as r,
   On as s,
-  Hn as t,
+  Qn as t,
   gt as u,
   fn as v,
   st as w,

package/dist/tokeniser/CharTokeniser.js CHANGED Viewed

@@ -10,14 +10,14 @@ class b extends k {
   constructor(t) {
     if (super(), Array.isArray(t)) {
       if (this.vocab = t, this.vocab.length > 0)
-        this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<pad>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("_")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf(" ")), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((i, n) => {
-          this.cache.set(i, n);
+        this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf(""), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<unk>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<pad>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("_")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf(" ")), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab = this.vocab.map((e) => e === "<pad>" ? "" : e), this.vocab.forEach((e, n) => {
+          this.cache.set(e, n);
         });
       else
         throw new Error("Vocab cannot be empty");
       this._trained = !0;
     } else
-      this.vocabSize = t, this.vocab = new Array(this.vocabSize).fill("<pad>"), this.vocab[0] = "<eos>", this.vocab[1] = "<unk>", this.eosToken = 0, this.unkToken = 1, this.cache.set("<eos>", 0), this.cache.set("<unk>", 1);
+      this.vocabSize = t, this.vocab = new Array(this.vocabSize).fill(""), this.vocab[0] = "<eos>", this.vocab[1] = "", this.eosToken = 0, this.unkToken = 1, this.cache.set("<eos>", 0), this.cache.set("", 1);
   }
   get trained() {
     return this.vocab.length === this.vocabSize && this._trained;
@@ -25,36 +25,36 @@ class b extends k {
   destroy() {
   }
   async train(t) {
-    const i = t.map((s) => s.split("")).flat(), n = new Set(i), e = Array.from(n), c = this.vocab.indexOf("<pad>"), a = this.vocabSize - u.length;
-    if (c === -1)
+    const e = t.map((i) => i.split("")).flat(), n = new Set(e), s = Array.from(n), h = this.vocab.indexOf("", this.unkToken + 1), o = this.vocabSize - u.length;
+    if (h === -1)
       return this.vocabSize;
-    if (this._trained = !0, e.length > a) {
-      const s = /* @__PURE__ */ new Map();
-      i.forEach((o) => {
-        s.set(o, (s.get(o) || 0) + 1);
-      }), e.sort((o, r) => (s.get(o) || 0) - (s.get(r) || 0)), e.splice(0, e.length - a);
+    if (this._trained = !0, s.length > o) {
+      const i = /* @__PURE__ */ new Map();
+      e.forEach((a) => {
+        i.set(a, (i.get(a) || 0) + 1);
+      }), s.sort((a, r) => (i.get(a) || 0) - (i.get(r) || 0)), s.splice(0, s.length - o);
     }
-    let h = c;
-    if (h !== -1) {
-      const s = new Set(this.vocab);
-      for (const o of e)
-        if (!s.has(o) && (this.vocab[h] = o, s.add(o), h = this.vocab.indexOf("<pad>", h + 1), h === -1))
+    let c = h;
+    if (c !== -1) {
+      const i = new Set(this.vocab);
+      for (const a of s)
+        if (!i.has(a) && (this.vocab[c] = a, i.add(a), c = this.vocab.indexOf("", c + 1), c === -1))
           break;
     }
-    return this.cache.clear(), this.vocab.forEach((s, o) => {
-      this.cache.set(s, o);
+    return this.cache.clear(), this.vocab.forEach((i, a) => {
+      this.cache.set(i, a);
     }), this.emit("trainStatus", "trained"), this.vocabSize;
   }
-  async tokenise(t, i) {
+  async tokenise(t, e) {
     if (!this.trained)
       throw new Error("Tokeniser not trained");
-    return t.map((e) => i ? e.split("").map((c) => this.cache.get(c) ?? this.unkToken) : e.split("").map((c) => {
-      const a = this.cache.get(c);
-      return a !== void 0 ? this.vocab[a] : "<unk>";
+    return t.map((s) => e ? s.split("").map((h) => this.cache.get(h) ?? this.unkToken) : s.split("").map((h) => {
+      const o = this.cache.get(h);
+      return o !== void 0 ? this.vocab[o] : "";
     }));
   }
   async detokenise(t) {
-    return t.map((n) => n.map((e) => this.vocab[e]).join(""));
+    return t.map((n) => n.map((s) => this.vocab[s]).join(""));
   }
   async encode(t) {
     return (await this.tokenise([t], !0))[0];
@@ -68,11 +68,11 @@ class b extends k {
   async getMerges() {
     return [];
   }
-  async createTrainingData(t, i = 5) {
-    const n = await this.tokenise(t, !0), e = [], c = [];
-    for (let a = 0; a < n.length - i; a++)
-      e.push(...n[a].slice(0, i)), c.push(n[a + 1][0]);
-    return [e, c];
+  async createTrainingData(t, e = 5) {
+    const n = await this.tokenise(t, !0), s = [], h = [];
+    for (let o = 0; o < n.length - e; o++)
+      s.push(...n[o].slice(0, e)), h.push(n[o + 1][0]);
+    return [s, h];
   }
 }
 export {

package/dist/tokeniser/bpe.d.ts CHANGED Viewed

@@ -12,6 +12,7 @@ export default class BPETokeniser extends EE<'trainStatus'> implements ITokenise
     get trained(): boolean;
     get vocabSize(): number;
     get eosToken(): number;
+    get unkToken(): number;
     train(text: string[]): Promise<number>;
     getVocab(): string[];
     getMerges(): Promise<[string, string][]>;

package/dist/tokeniser/bpe.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import p from "../utilities/tokenParse.js";
-import { E as g } from "../index-Dwqa6Zy2.js";
+import l from "../utilities/tokenParse.js";
+import { E as f } from "../index-Dwqa6Zy2.js";
 function u(o, e) {
   return `${o}-::-${e}`;
 }
@@ -7,25 +7,25 @@ function k(o) {
   const e = /* @__PURE__ */ new Map();
   for (let s = 0; s < o.length; s++) {
     const t = o[s];
-    for (let n = 0; n < t.length - 1; n++) {
-      const r = u(t[n], t[n + 1]), a = e.get(r) || {
-        a: t[n],
-        b: t[n + 1],
+    for (let r = 0; r < t.length - 1; r++) {
+      const n = u(t[r], t[r + 1]), a = e.get(n) || {
+        a: t[r],
+        b: t[r + 1],
         count: 0,
         instances: /* @__PURE__ */ new Set()
       };
-      a.count += 1, a.instances.add(s), e.set(r, a);
+      a.count += 1, a.instances.add(s), e.set(n, a);
     }
   }
   return { pairs: e, tokens: o };
 }
-function h(o, e, s, t, n) {
-  const r = u(e, s);
-  if (o.pairs.has(r)) {
-    const a = o.pairs.get(r);
-    a.count += n, n > 0 ? a.instances.add(t) : a.count <= 0 ? o.pairs.delete(r) : a.instances.delete(t);
+function h(o, e, s, t, r) {
+  const n = u(e, s);
+  if (o.pairs.has(n)) {
+    const a = o.pairs.get(n);
+    a.count += r, r > 0 ? a.instances.add(t) : a.count <= 0 ? o.pairs.delete(n) : a.instances.delete(t);
   } else
-    o.pairs.set(r, { a: e, b: s, count: n, instances: /* @__PURE__ */ new Set([t]) });
+    o.pairs.set(n, { a: e, b: s, count: r, instances: /* @__PURE__ */ new Set([t]) });
 }
 function b(o) {
   let e = null, s = 0;
@@ -36,33 +36,33 @@ function b(o) {
 function d(o, e) {
   return o.map((s) => {
     const t = [];
-    for (let n = 0; n < s.length; n++)
-      n < s.length - 1 && s[n] === e[0] && s[n + 1] === e[1] ? (t.push(e[0] + e[1]), n++) : t.push(s[n]);
+    for (let r = 0; r < s.length; r++)
+      r < s.length - 1 && s[r] === e[0] && s[r + 1] === e[1] ? (t.push(e[0] + e[1]), r++) : t.push(s[r]);
     return t;
   });
 }
 function m(o, e) {
   e.instances.forEach((s) => {
-    const t = o.tokens[s], n = [];
-    for (let r = 0; r < t.length; r++)
-      if (r < t.length - 1 && t[r] === e.a && t[r + 1] === e.b) {
+    const t = o.tokens[s], r = [];
+    for (let n = 0; n < t.length; n++)
+      if (n < t.length - 1 && t[n] === e.a && t[n + 1] === e.b) {
         const a = e.a + e.b;
-        n.push(a), r > 0 && (h(o, t[r - 1], e.a, s, -1), h(o, t[r - 1], a, s, 1)), r++, r < t.length - 1 && (h(o, e.b, t[r + 1], s, -1), h(o, a, t[r + 1], s, 1));
+        r.push(a), n > 0 && (h(o, t[n - 1], e.a, s, -1), h(o, t[n - 1], a, s, 1)), n++, n < t.length - 1 && (h(o, e.b, t[n + 1], s, -1), h(o, a, t[n + 1], s, 1));
       } else
-        n.push(t[r]);
-    o.tokens[s] = n;
+        r.push(t[n]);
+    o.tokens[s] = r;
   }), o.pairs.delete(u(e.a, e.b));
 }
-class S extends g {
+class S extends f {
   targetSize;
   vocab = /* @__PURE__ */ new Set();
   vocabIndex = /* @__PURE__ */ new Map();
   merges = [];
   pretokenMap = /* @__PURE__ */ new Map();
   constructor(e, s) {
-    super(), Array.isArray(e) ? (e.forEach((t, n) => {
-      this.vocab.add(t), this.vocabIndex.set(t, n);
-    }), s && (this.merges = s), this.targetSize = e.length) : (this.vocab.add("<eos>"), this.vocab.add("<unk>"), this.targetSize = e);
+    super(), Array.isArray(e) ? (e.forEach((t, r) => {
+      this.vocab.add(t), this.vocabIndex.set(t, r);
+    }), s && (this.merges = s), this.targetSize = e.length) : (this.vocab.add("<eos>"), this.vocab.add(""), this.targetSize = e);
   }
   destroy() {
     this.vocab.clear(), this.vocabIndex.clear(), this.merges = [], this.pretokenMap.clear();
@@ -76,23 +76,26 @@ class S extends g {
   get eosToken() {
     return this.vocabIndex.get("<eos>") ?? 0;
   }
+  get unkToken() {
+    return this.vocabIndex.get("") ?? 1;
+  }
   async train(e) {
-    const s = e.map((i) => p(i)).flat(1), t = new Set(s);
-    this.vocab = /* @__PURE__ */ new Set(), this.pretokenMap.clear(), this.merges = [], this.vocab.add("<eos>");
-    const n = Array.from(t), r = n.map((i) => Array.from(i).map((c) => (this.vocab.add(c), c))), a = k(r);
+    const s = e.map((i) => l(i)).flat(1), t = new Set(s);
+    this.vocab = /* @__PURE__ */ new Set(), this.pretokenMap.clear(), this.merges = [], this.vocab.add("<eos>"), this.vocab.add("");
+    const r = Array.from(t), n = r.map((i) => Array.from(i).map((c) => (this.vocab.add(c), c))), a = k(n);
     for (; this.vocab.size < this.targetSize && this.merges.length < this.targetSize; ) {
       const i = b(a);
       if (!i)
         break;
       this.merges.push([i.a, i.b]), this.vocab.add(i.a + i.b), m(a, i);
     }
-    n.forEach((i, l) => {
-      const c = r[l];
+    r.forEach((i, p) => {
+      const c = n[p];
       this.pretokenMap.set(i, c);
     }), this.vocabIndex.clear();
-    let f = 0;
+    let g = 0;
     for (const i of this.vocab.keys())
-      this.vocabIndex.set(i, f++);
+      this.vocabIndex.set(i, g++);
     return this.emit("trainStatus", "trained"), this.vocab.size;
   }
   getVocab() {
@@ -108,15 +111,15 @@ class S extends g {
     }), this.pretokenMap.set(e, s), s;
   }
   tokeniseStrings(e) {
-    return e.map((s) => p(s).map((r) => this.pretokenMap.has(r) ? this.pretokenMap.get(r) : this.tokeniseWord(r)).flat(1));
+    return e.map((s) => l(s).map((n) => this.pretokenMap.has(n) ? this.pretokenMap.get(n) : this.tokeniseWord(n)).flat(1));
   }
   async tokenise(e, s) {
     const t = this.tokeniseStrings(e);
-    return s ? t.map((n) => n.map((r) => this.vocabIndex.get(r) ?? -1)) : t;
+    return s ? t.map((r) => r.map((n) => this.vocabIndex.get(n) ?? this.unkToken)) : t.map((r) => r.map((n) => this.vocab.has(n) ? n : ""));
   }
   async detokenise(e) {
     const s = this.getVocab();
-    return e.map((n) => n.map((r) => s[r]).join(""));
+    return e.map((r) => r.map((n) => s[n]).join(""));
   }
   async encode(e) {
     return (await this.tokenise([e], !0))[0];

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, b as c, f as h, s as g, e as o } from "../index-iNhkcAEQ.js";
+import { A as r, b as c, f as h, s as g, e as o } from "../index-CnHyhpKc.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;