npm - @genai-fi/nanogpt - Versions diffs - 0.12.1 → 0.12.2 - Mend

@genai-fi/nanogpt 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/tokeniser/bpe.js +11 -11
package/package.json +1 -1

package/dist/tokeniser/bpe.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import l from "../utilities/tokenParse.js";
-import d, { SPECIALS as f } from "./BaseTokeniser.js";
+import p from "../utilities/tokenParse.js";
+import f, { SPECIALS as g } from "./BaseTokeniser.js";
 function u(o, e) {
   return `${o}-::-${e}`;
 }
@@ -53,7 +53,7 @@ function v(o, e) {
     o.tokens[s] = n;
   }), o.pairs.delete(u(e.a, e.b));
 }
-class x extends d {
+class x extends f {
   targetSize;
   vocab = /* @__PURE__ */ new Set();
   vocabIndex = /* @__PURE__ */ new Map();
@@ -62,7 +62,7 @@ class x extends d {
   constructor(e, s) {
     super(), Array.isArray(e) ? (e.forEach((t, n) => {
       this.vocab.add(t), this.vocabIndex.set(t, n);
-    }), s && (this.merges = s), this.targetSize = e.length, f.forEach((t) => {
+    }), s && (this.merges = s), this.targetSize = e.length, g.forEach((t) => {
       const n = e.indexOf(t);
       n !== -1 && this.addSpecialToken(t, n);
     })) : (this.addSpecialTokens(), this.targetSize = e);
@@ -80,7 +80,7 @@ class x extends d {
     this.vocab.clear(), this.vocabIndex.clear(), this.merges = [], this.pretokenMap.clear();
   }
   get trained() {
-    return this.vocab.size === this.targetSize && this.merges.length > 0;
+    return this.vocab.size > g.length && this.vocab.size <= this.targetSize && this.merges.length > 0;
   }
   get vocabSize() {
     return this.vocab.size;
@@ -95,7 +95,7 @@ class x extends d {
     return this.vocabIndex.get("") ?? 1;
   }
   async train(e) {
-    const s = e.map((a) => l(a)).flat(1), t = new Set(s);
+    const s = e.map((a) => p(a)).flat(1), t = new Set(s);
     this.vocab = /* @__PURE__ */ new Set(), this.pretokenMap.clear(), this.merges = [], this.addSpecialTokens();
     const n = Array.from(t), r = n.map((a) => Array.from(a).map((c) => (this.vocab.add(c), c))), i = b(r);
     for (; this.vocab.size < this.targetSize && this.merges.length < this.targetSize; ) {
@@ -104,13 +104,13 @@ class x extends d {
         break;
       this.merges.push([a.a, a.b]), this.vocab.add(a.a + a.b), v(i, a);
     }
-    n.forEach((a, p) => {
-      const c = r[p];
+    n.forEach((a, l) => {
+      const c = r[l];
       this.pretokenMap.set(a, c);
     }), this.vocabIndex.clear();
-    let g = 0;
+    let d = 0;
     for (const a of this.vocab.keys())
-      this.vocabIndex.set(a, g++);
+      this.vocabIndex.set(a, d++);
     return this.emit("trainStatus", "trained"), this.vocab.size;
   }
   getVocab() {
@@ -126,7 +126,7 @@ class x extends d {
     }), this.pretokenMap.set(e, s), s;
   }
   tokeniseStrings(e) {
-    return e.map((s) => l(s).map((r) => this.pretokenMap.has(r) ? this.pretokenMap.get(r) : this.tokeniseWord(r)).flat(1));
+    return e.map((s) => p(s).map((r) => this.pretokenMap.has(r) ? this.pretokenMap.get(r) : this.tokeniseWord(r)).flat(1));
   }
   tokenise(e, s) {
     const t = this.tokeniseStrings(e);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.12.1",
+    "version": "0.12.2",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",