npm - @genai-fi/nanogpt - Versions diffs - 0.6.2 → 0.7.0 - Mend

@genai-fi/nanogpt 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

package/dist/Generator.js +11 -11
package/dist/NanoGPTModel.d.ts +2 -2
package/dist/NanoGPTModel.js +104 -136
package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} +26 -25
package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} +5 -5
package/dist/TeachableLLM.d.ts +3 -0
package/dist/TeachableLLM.js +50 -47
package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} +4 -4
package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} +3 -3
package/dist/backend.d.ts +1 -0
package/dist/backend.js +7 -0
package/dist/backend_util-DWiwsi2N.js +749 -0
package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-C4v-j9yA.js} +2 -2
package/dist/{concat-5aPGqw3Z.js → concat-CsHeR4zV.js} +8 -8
package/dist/{dataset-pgqp-YfL.js → dataset-JDyjG3QR.js} +3 -3
package/dist/{dropout-Bciw46HT.js → dropout-hpDwECTe.js} +7 -7
package/dist/{gather-DjyCjmOD.js → gather-D0_gPiBz.js} +4 -4
package/dist/gelu-uyHP1x1f.js +26 -0
package/dist/gpgpu_math-DJm3ZTAf.js +2371 -0
package/dist/index-BPPzKVdR.js +12099 -0
package/dist/{index-BAzbokzv.js → index-C0dhsYom.js} +405 -389
package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CwRTFqrc.js} +31 -30
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +5 -5
package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-D086OgZJ.js} +15 -15
package/dist/main.d.ts +2 -0
package/dist/main.js +9 -5
package/dist/{mat_mul-7121rsJk.js → mat_mul-1nwdPkQ_.js} +4 -4
package/dist/{max-DtlIuVeW.js → max-BQc2Aj-I.js} +4 -4
package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-Gzf3I9UV.js} +1 -1
package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
package/dist/{ones-BBlSRqn1.js → ones-D63HpSF_.js} +2 -2
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.js +8 -8
package/dist/ops/cpu/attentionMask.js +9 -9
package/dist/ops/cpu/fusedSoftmax.js +17 -11
package/dist/ops/cpu/gatherSub.js +7 -7
package/dist/ops/cpu/gelu.js +13 -13
package/dist/ops/cpu/matMulGelu.js +36 -24
package/dist/ops/cpu/matMulMul.js +14 -8
package/dist/ops/cpu/mulDropout.js +9 -3
package/dist/ops/cpu/normRMS.js +5 -5
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +11 -11
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +3 -24
package/dist/ops/grads/matMulGelu.js +5 -5
package/dist/ops/grads/normRMS.js +6 -6
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +3 -3
package/dist/ops/matMulGelu.js +1 -1
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/rope.js +4 -4
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +5 -5
package/dist/ops/webgl/matMulGelu.js +17 -17
package/dist/ops/webgl/matMulMul.js +1 -1
package/dist/ops/webgl/mulDropout.js +4 -4
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/appendCache.js +56 -0
package/dist/ops/webgpu/attentionMask.d.ts +1 -0
package/dist/ops/webgpu/attentionMask.js +64 -0
package/dist/ops/webgpu/gatherSub.d.ts +1 -0
package/dist/ops/webgpu/gatherSub.js +37 -0
package/dist/ops/webgpu/gelu.d.ts +14 -0
package/dist/ops/webgpu/gelu.js +86 -0
package/dist/ops/webgpu/index.d.ts +0 -0
package/dist/ops/webgpu/index.js +8 -0
package/dist/ops/webgpu/normRMS.d.ts +1 -0
package/dist/ops/webgpu/normRMS.js +115 -0
package/dist/ops/webgpu/qkv.d.ts +1 -0
package/dist/ops/webgpu/qkv.js +56 -0
package/dist/ops/webgpu/rope.d.ts +1 -0
package/dist/ops/webgpu/rope.js +68 -0
package/dist/ops/webgpu/scatterSub.d.ts +1 -0
package/dist/ops/webgpu/scatterSub.js +37 -0
package/dist/{ops-C0sQEcPw.js → ops-CIQLNshk.js} +452 -503
package/dist/{random_width-DWzaOgrn.js → random_width-DkYP8W8N.js} +143 -144
package/dist/{range-DYsrnfiy.js → range-CYzpQY53.js} +1 -1
package/dist/{reciprocal-CJQeasVa.js → reciprocal-_A9yv27J.js} +1 -1
package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-guvSxp7M.js} +202 -200
package/dist/{reshape-krWGKraP.js → reshape-BMUzc1UY.js} +3 -3
package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-IRBqKz_b.js} +3 -3
package/dist/{selu_util-sntGesxr.js → selu_util-Dt_iuXaq.js} +6 -6
package/dist/shared-BNa2q6jD.js +69 -0
package/dist/{shared-Ca6iDobD.js → shared-CDu9S76h.js} +541 -606
package/dist/{sin-D_h-qCSx.js → sin-Cocju-BY.js} +6 -6
package/dist/{softmax-fsdtf6JC.js → softmax-GPNK3o-U.js} +3 -3
package/dist/{split-eiktj-6L.js → split-CHzJjxDv.js} +4 -4
package/dist/{stack-dfEEz2OY.js → stack-Dpgg_1W1.js} +2 -2
package/dist/{sum-BE_Irnim.js → sum-B8wEpKsg.js} +5 -5
package/dist/{tensor-Xyi595sG.js → tensor-RvZVNmg0.js} +1 -1
package/dist/{tensor2d-CPEkynbH.js → tensor2d-B_kyod7_.js} +1 -1
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +2 -2
package/dist/training/Evaluator.js +1 -1
package/dist/training/FullTrainer.js +20 -20
package/dist/training/Trainer.d.ts +5 -6
package/dist/training/Trainer.js +59 -60
package/dist/training/sparseCrossEntropy.js +19 -26
package/dist/utilities/dummy.js +19 -19
package/dist/utilities/generate.js +15 -16
package/dist/utilities/multinomialCPU.d.ts +2 -0
package/dist/utilities/multinomialCPU.js +13 -0
package/dist/utilities/performance.d.ts +2 -0
package/dist/utilities/performance.js +16 -0
package/dist/utilities/profile.d.ts +1 -0
package/dist/utilities/profile.js +9 -6
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/weights.js +2 -2
package/dist/{variable-wSS22xj5.js → variable-DXEUOwew.js} +1 -1
package/dist/webgpu_util-g13LvDIv.js +625 -0
package/dist/{zeros-YJDE7oRb.js → zeros-DCPCdFGq.js} +8 -8
package/package.json +2 -1
package/dist/gpgpu_math-CNslybmD.js +0 -3115
package/dist/norm-CzltS9Fz.js +0 -86
package/dist/ops/node/sparseCrossEntropy.js +0 -11
/package/dist/ops/{node/sparseCrossEntropy.d.ts → webgpu/appendCache.d.ts} +0 -0

package/dist/Generator.js CHANGED Viewed

@@ -1,15 +1,15 @@
 import { E as u } from "./index-Dwqa6Zy2.js";
-import "./index-BAzbokzv.js";
+import "./index-C0dhsYom.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/qkv.js";
 import "./ops/webgl/qkv.js";
 import "./ops/grads/qkv.js";
-import "./random_width-DWzaOgrn.js";
-import "./register_all_kernels-BfFCQAqs.js";
+import "./random_width-DkYP8W8N.js";
+import "./register_all_kernels-guvSxp7M.js";
 import "./index-Tf7vU29b.js";
-import "./dataset-pgqp-YfL.js";
+import "./dataset-JDyjG3QR.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
 import "./ops/grads/rope.js";
@@ -33,10 +33,10 @@ import f from "./tokeniser/CharTokeniser.js";
 import "./papaparse.min-C8l2Kvo1.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./ops/grads/gelu.js";
+import "./gelu-uyHP1x1f.js";
 import "./ops/webgl/log.js";
-import { t as d } from "./tensor2d-CPEkynbH.js";
-import { c as g } from "./concat-5aPGqw3Z.js";
+import { t as d } from "./tensor2d-B_kyod7_.js";
+import { c as g } from "./concat-CsHeR4zV.js";
 const k = [
   ...Array.from({ length: 95 }, (a, t) => String.fromCharCode(t + 32)),
   // ASCII
@@ -49,7 +49,7 @@ const k = [
   // Cyrillic letters
   ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
 ];
-function T(a, t) {
+function w(a, t) {
   return a.length === t ? a : a.length > t ? a.slice(0, t) : a.concat(Array(t - a.length).fill(""));
 }
 class rt extends u {
@@ -69,7 +69,7 @@ class rt extends u {
         output: e,
         attention: p,
         probabilities: c
-      } = this.model.generate(i, void 0, r), h = i;
+      } = await this.model.generate(i, void 0, r), h = i;
       i = g([i, e], 1), h.dispose();
       const l = await this.processResponse(t, e, p, c);
       if (e.dispose(), l === null)
@@ -99,7 +99,7 @@ class rt extends u {
         output: p,
         probabilities: c,
         attention: h
-      } = this.model.generate(i, n, {
+      } = await this.model.generate(i, n, {
         ...r,
         usePadding: !1
       });
@@ -116,7 +116,7 @@ class rt extends u {
   async generate(t, o) {
     const r = t && t.length > this.model.config.gpt.blockSize ? t.slice(-this.model.config.gpt.blockSize) : t;
     this.active = !0, this.emit("start");
-    const i = this.tokeniser.trained ? this.tokeniser : new f(T(k, this.tokeniser.vocabSize)), n = await (this.model.config.gpt.useRope && !o?.noCache ? this.generateCache(i, r, o) : this.generateNoCache(i, r, o));
+    const i = this.tokeniser.trained ? this.tokeniser : new f(w(k, this.tokeniser.vocabSize)), n = await (this.model.config.gpt.useRope && !o?.noCache ? this.generateCache(i, r, o) : this.generateNoCache(i, r, o));
     return this.active = !1, this.emit("stop"), n;
   }
   stop() {

package/dist/NanoGPTModel.d.ts CHANGED Viewed

@@ -42,11 +42,11 @@ export default class NanoGPT extends BaseLayer<ModelForwardAttributes> {
     private validateInput;
     private calculateLoss;
     forward(attrs: ModelForwardAttributes, idx: Tensor, targets?: Tensor): Tensor[];
-    generate(idx: Tensor, cache?: KVCache[], options?: GenerateOptions): {
+    generate(idx: Tensor, cache?: KVCache[], options?: GenerateOptions): Promise<{
         output: Tensor;
         probabilities?: Tensor;
         attention?: Tensor[];
-    };
+    }>;
     getNumParams(): number;
     dispose(): void;
 }

package/dist/NanoGPTModel.js CHANGED Viewed

@@ -1,19 +1,19 @@
-import { defaultConfig as F } from "./config.js";
-import O from "./layers/TransformerBlock.js";
-import { T as _, r as D } from "./TiedEmbedding-9WeDwvjO.js";
-import K from "./layers/RoPECache.js";
-import N from "./layers/RMSNorm.js";
-import { estimateParameterCount as R } from "./utilities/parameters.js";
-import { createSoftmaxCrossEntropyWithGrad as A } from "./training/sparseCrossEntropy.js";
-import G from "./layers/BaseLayer.js";
-import { E as B, D as V, p as j } from "./random_width-DWzaOgrn.js";
-import { o as W, q as H, E as J, a6 as Q, t as z, a7 as U, s as v, k as X } from "./index-BAzbokzv.js";
-import { m as Y, t as Z } from "./register_all_kernels-BfFCQAqs.js";
-import { r as L } from "./reshape-krWGKraP.js";
-import { r as tt } from "./range-DYsrnfiy.js";
-import { s as M } from "./softmax-fsdtf6JC.js";
-import { t as et } from "./ops-C0sQEcPw.js";
-import { g as ot } from "./gather-DjyCjmOD.js";
+import { defaultConfig as M } from "./config.js";
+import v from "./layers/TransformerBlock.js";
+import { T as x, r as T } from "./TiedEmbedding-BLltddza.js";
+import F from "./layers/RoPECache.js";
+import O from "./layers/RMSNorm.js";
+import { estimateParameterCount as _ } from "./utilities/parameters.js";
+import { createSoftmaxCrossEntropyWithGrad as D } from "./training/sparseCrossEntropy.js";
+import K from "./layers/BaseLayer.js";
+import { E as N, D as R, p as q } from "./random_width-DkYP8W8N.js";
+import { x as A, y as G, E as B, a5 as V, t as C, a6 as j, b as z, o as U } from "./index-C0dhsYom.js";
+import W from "./utilities/multinomialCPU.js";
+import { m as H, t as J } from "./register_all_kernels-guvSxp7M.js";
+import { r as P } from "./reshape-BMUzc1UY.js";
+import { r as Q } from "./range-CYzpQY53.js";
+import { s as $ } from "./softmax-GPNK3o-U.js";
+import { g as X } from "./gather-D0_gPiBz.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -30,17 +30,17 @@ import { g as ot } from "./gather-DjyCjmOD.js";
  * limitations under the License.
  * =============================================================================
  */
-function st(u, t, e, o = !1) {
-  const r = H(u, "logits", "multinomial"), s = r.size, n = r.rank;
+function Y(u, t, o, e = !1) {
+  const l = G(u, "logits", "multinomial"), s = l.size, r = l.rank;
   if (s < 2)
     throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
-  if (n > 2)
-    throw new Error(`Rank of probabilities must be 1 or 2, but is ${n}`);
-  e = e || Math.random();
-  const i = { logits: n === 1 ? L(r, [1, -1]) : r }, h = { numSamples: t, seed: e, normalized: o }, c = J.runKernel(Q, i, h);
-  return n === 1 ? L(c, [c.size]) : c;
+  if (r > 2)
+    throw new Error(`Rank of probabilities must be 1 or 2, but is ${r}`);
+  o = o || Math.random();
+  const n = { logits: r === 1 ? P(l, [1, -1]) : l }, a = { numSamples: t, seed: o, normalized: e }, i = B.runKernel(V, n, a);
+  return r === 1 ? P(i, [i.size]) : i;
 }
-const C = /* @__PURE__ */ W({ multinomial_: st });
+const I = /* @__PURE__ */ A({ multinomial_: Y });
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -50,13 +50,13 @@ const C = /* @__PURE__ */ W({ multinomial_: st });
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function nt(u) {
-  return new V(u);
+function Z(u) {
+  return new R(u);
 }
-function it(u) {
-  return new B(u);
+function tt(u) {
+  return new N(u);
 }
-class St extends G {
+class bt extends K {
   wte;
   // Token embeddings
   wpe;
@@ -70,15 +70,15 @@ class St extends G {
   log = [];
   // Training log
   constructor(t = {}) {
-    super({ gpt: { ...F, ...t }, layerConfig: {} }), this.wte = new _(this.config, "token_embedding", this), this.config.gpt.useRope === !1 ? this.wpe = it({
+    super({ gpt: { ...M, ...t }, layerConfig: {} }), this.wte = new x(this.config, "token_embedding", this), this.config.gpt.useRope === !1 ? this.wpe = tt({
       inputDim: this.config.gpt.blockSize,
       outputDim: this.config.gpt.nEmbed,
       name: "positional_embedding",
-      embeddingsInitializer: D({ mean: 0, stddev: 0.02 })
-    }) : (this.ropeCache = new K(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = nt({ rate: this.config.gpt.dropout }), this.blocks = [];
-    for (let e = 0; e < this.config.gpt.nLayer; e++)
-      this.blocks.push(new O(e, this.config, this));
-    this.lnF = new N(this.config, "final_rms_norm", this);
+      embeddingsInitializer: T({ mean: 0, stddev: 0.02 })
+    }) : (this.ropeCache = new F(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = Z({ rate: this.config.gpt.dropout }), this.blocks = [];
+    for (let o = 0; o < this.config.gpt.nLayer; o++)
+      this.blocks.push(new v(o, this.config, this));
+    this.lnF = new O(this.config, "final_rms_norm", this);
   }
   get checkpointing() {
     return this.config.layerConfig.checkpointing === !0;
@@ -86,27 +86,27 @@ class St extends G {
   set checkpointing(t) {
     this.config.layerConfig.checkpointing = t;
   }
-  inputPhase(t, e, o = !1) {
-    return z(() => {
-      const r = this.wte.embed(t);
+  inputPhase(t, o, e = !1) {
+    return C(() => {
+      const l = this.wte.embed(t);
       if (this.config.gpt.useRope === !1) {
-        const [, s] = t.shape, n = this.config.gpt.blockSize, p = tt(0, s, 1, "int32"), i = Y(U(p, v(e, "int32")), v(n, "int32")), h = this.wpe.apply(i), c = r.add(h);
-        return this.drop.apply(c, { training: o });
+        const [, s] = t.shape, r = this.config.gpt.blockSize, g = Q(0, s, 1, "int32"), n = H(j(g, z(o, "int32")), z(r, "int32")), a = this.wpe.apply(n), i = l.add(a);
+        return this.drop.apply(i, { training: e });
       } else
-        return this.drop.apply(r, { training: o });
+        return this.drop.apply(l, { training: e });
     });
   }
   setSkipMask(t) {
     if (t.length !== this.blocks.length)
       throw new Error(`Mask length ${t.length} does not match number of blocks ${this.blocks.length}`);
-    for (let e = 0; e < this.blocks.length; e++)
-      this.blocks[e].skipped = t[e];
+    for (let o = 0; o < this.blocks.length; o++)
+      this.blocks[o].skipped = t[o];
   }
   setTrainableMask(t) {
     if (t.length !== this.blocks.length)
       throw new Error(`Mask length ${t.length} does not match number of blocks ${this.blocks.length}`);
-    for (let e = 0; e < this.blocks.length; e++)
-      this.blocks[e].trainable = t[e];
+    for (let o = 0; o < this.blocks.length; o++)
+      this.blocks[o].trainable = t[o];
   }
   validateInput(t) {
     if (t.shape.length !== 2)
@@ -116,120 +116,88 @@ class St extends G {
     if (t.dtype !== "int32")
       throw new Error(`Input tensor must be of type int32, got ${t.dtype}`);
   }
-  calculateLoss(t, e) {
+  calculateLoss(t, o) {
     try {
-      return A()(t, e).mean();
-    } catch (o) {
-      throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
+      return D()(t, o).mean();
+    } catch (e) {
+      throw console.error("Error computing loss:", e), new Error(`Loss computation failed: ${e}`);
     }
   }
-  // Attention rollout per Abnar & Zuidema (2020)
-  // Expects list of (B, T, T) attention matrices already averaged over heads.
-  /*private computeAttentionRollout(attentions: Tensor[]): Tensor {
-          return tidy(() => {
-              if (attentions.length === 0) {
-                  throw new Error('No attentions for rollout');
-              }
-              const [B, Q, K] = attentions[0].shape as number[];
-              // Validate shapes are consistent
-              for (const a of attentions) {
-                  const [b2, q2, k2] = a.shape as number[];
-                  if (b2 !== B || q2 !== Q || k2 !== K) {
-                      throw new Error(
-                          `Inconsistent attention shapes in rollout: expected [${B},${Q},${K}] got [${b2},${q2},${k2}]`
-                      );
-                  }
-              }
-              // Always slice to [B, Q, Q] for rollout
-              const attentionsSliced = attentions.map((att) => att.slice([0, 0, 0], [B, Q, Q]));
-              const ey = eye(Q, Q).expandDims(0); // (1,Q,Q)
-              let rollout = ey.tile([B, 1, 1]); // (B,Q,Q)
-              for (const att of attentionsSliced) {
-                  const a = att.add(ey);
-                  const aNorm = a.div(a.sum(-1, true)); // (B,Q,Q)
-                  rollout = aNorm.matMul(rollout); // (B,Q,Q)
-              }
-              return rollout;
-          });
-      }*/
-  forward(t, e, o) {
-    return this.validateInput(e), z(() => {
+  forward(t, o, e) {
+    return this.validateInput(o), C(() => {
       this.startMemory();
-      const r = t.cache?.[0]?.length ?? 0;
-      let s = this.inputPhase(e, r, t.training);
+      const l = t.cache?.[0]?.length ?? 0;
+      let s = this.inputPhase(o, l, t.training);
       if (t.cache && t.cache.length !== this.blocks.length)
         throw console.error("Cache", t.cache), new Error(
           `Cache length ${t.cache.length} does not match number of blocks ${this.blocks.length}`
         );
-      for (let i = 0; i < this.blocks.length; i++) {
-        const h = this.blocks[i], c = Math.random() * 1e9, g = {
+      for (let n = 0; n < this.blocks.length; n++) {
+        const a = this.blocks[n], i = Math.random() * 1e9, d = {
           training: t.training,
-          seed: c,
+          seed: i,
           attentionScores: t.attentionScores,
-          pastKV: t.cache ? t.cache[i] : void 0
-        }, E = this.config.layerConfig.checkpointing && t.training ? h.callCheckpoint(g, s) : h.call(g, s);
-        s.dispose(), s = E;
+          pastKV: t.cache ? t.cache[n] : void 0
+        }, S = this.config.layerConfig.checkpointing && t.training ? a.callCheckpoint(d, s) : a.call(d, s);
+        s.dispose(), s = S;
       }
       s = this.lnF.call(t, s);
-      const n = this.wte.project(s);
+      const r = this.wte.project(s);
       s.dispose();
-      let p;
-      return o && (p = this.calculateLoss(n, o)), this.endMemory("Forward"), p ? [n, p] : [n];
+      let g;
+      return e && (g = this.calculateLoss(r, e)), this.endMemory("Forward"), g ? [r, g] : [r];
     });
   }
-  generate(t, e, o) {
-    const r = o?.temperature ?? 1, s = o?.topK, n = o?.topP, p = o?.usePadding ?? !1;
-    return z(() => {
-      const i = t, h = i.shape[1], c = h <= this.config.gpt.blockSize ? i : i.slice(
-        [0, h - this.config.gpt.blockSize],
-        [i.shape[0], this.config.gpt.blockSize]
-      ), g = p ? this.config.gpt.blockSize - c.shape[1] : 0, E = g > 0 ? j(c, [
+  async generate(t, o, e) {
+    const l = e?.temperature ?? 1, s = e?.topK, r = e?.topP, g = e?.usePadding ?? !1, n = {
+      training: !1,
+      attentionScores: e?.attentionScores ? {
+        attentionOut: []
+      } : void 0,
+      cache: o
+    }, a = C(() => {
+      const p = t, m = p.shape[1], h = m <= this.config.gpt.blockSize ? p : p.slice(
+        [0, m - this.config.gpt.blockSize],
+        [p.shape[0], this.config.gpt.blockSize]
+      ), b = g ? this.config.gpt.blockSize - h.shape[1] : 0, w = b > 0 ? q(h, [
         [0, 0],
-        [0, g]
-      ]) : c, f = {
-        training: !1,
-        attentionScores: o?.attentionScores ? {
-          attentionOut: []
-        } : void 0,
-        cache: e
-      }, [d] = this.forward(f, E), $ = d.shape[1] - 1 - g, q = d.slice([0, $, 0], [d.shape[0], 1, d.shape[2]]);
-      f.attentionScores?.attentionOut && f.attentionScores.attentionOut.forEach((l, b) => {
-        l.shape[1] !== 1 && (f.attentionScores.attentionOut[b] = X(
-          l.slice([0, $, 0], [l.shape[0], 1, l.shape[2]])
-        ), l.dispose());
-      }), d.dispose();
-      const w = q.div(r);
-      let m;
-      if (n) {
-        const l = M(w.squeeze([1])), b = l.arraySync()[0];
-        l.dispose();
-        const y = b.map((a, k) => ({ prob: a, index: k })).sort((a, k) => k.prob - a.prob);
-        let P = 0;
-        const S = new Array(y.length).fill(0);
-        for (const a of y)
-          if (P += a.prob, S[a.index] = a.prob, P >= n)
-            break;
-        const x = S.reduce((a, k) => a + k, 0), T = S.map((a) => a / x);
-        m = C(et(T), 1, void 0, !0);
-      } else if (s) {
-        const { values: l, indices: b } = Z(w, s), y = C(l.squeeze([1]), 1);
-        m = ot(b.squeeze([1]), y, 1);
-      } else
-        m = C(w.squeeze([1]), 1);
-      let I;
-      return o?.includeProbabilities && (I = M(w.squeeze([1]))), m = m.reshape([1, 1]), { output: m, probabilities: I, attention: f.attentionScores?.attentionOut };
+        [0, b]
+      ]) : h, [f] = this.forward(n, w), E = f.shape[1] - 1 - b, c = f.slice([0, E, 0], [f.shape[0], 1, f.shape[2]]);
+      return n.attentionScores?.attentionOut && n.attentionScores.attentionOut.forEach((y, L) => {
+        y.shape[1] !== 1 && (n.attentionScores.attentionOut[L] = U(
+          y.slice([0, E, 0], [y.shape[0], 1, y.shape[2]])
+        ), y.dispose());
+      }), f.dispose(), c.div(l).squeeze([1]);
     });
+    let i;
+    if (r) {
+      const p = $(a), m = await p.array();
+      p.dispose();
+      const h = m[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
+      let b = 0;
+      const w = new Array(h.length).fill(0);
+      for (const c of h)
+        if (b += c.prob, w[c.index] = c.prob, b >= r)
+          break;
+      const f = w.reduce((c, k) => c + k, 0), E = w.map((c) => c / f);
+      i = W(E);
+    } else if (s) {
+      const { values: p, indices: m } = J(a, s), h = I(p, 1);
+      i = X(m, h, 1), p.dispose(), m.dispose(), h.dispose();
+    } else
+      i = I(a, 1);
+    let d;
+    e?.includeProbabilities && (d = $(a));
+    const S = i.reshape([1, 1]);
+    return i.dispose(), i = S, a.dispose(), { output: i, probabilities: d, attention: n.attentionScores?.attentionOut };
   }
   getNumParams() {
-    return R(this.config.gpt);
+    return _(this.config.gpt);
   }
   dispose() {
     this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
   }
 }
 export {
-  St as default
+  bt as default
 };

package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} RENAMED Viewed

@@ -1,9 +1,10 @@
-import { ao as T, ac as E, p as O, g as V, aw as B, N as F, M as j, ax as K } from "./index-BAzbokzv.js";
-import { r as $ } from "./Reshape-t7Kcikjk.js";
-import { g as A, a as k, b as C, c as N, e as R } from "./axis_util-Bu4h7XWV.js";
-import { t as U, m as W } from "./shared-Ca6iDobD.js";
-import { j as _, f as y } from "./gpgpu_math-CNslybmD.js";
-import { g as G, b as L } from "./kernel_funcs_utils-CUxJCg0g.js";
+import { ao as T, ac as E, p as O, j as V, aw as B, U as F, N as U, ax as j } from "./index-C0dhsYom.js";
+import { r as $ } from "./Reshape-BLijOA8h.js";
+import { g as A, a as k, b as C, c as N, e as R } from "./axis_util-DaAl5MER.js";
+import { t as K, m as W } from "./shared-BNa2q6jD.js";
+import { c as _ } from "./backend_util-DWiwsi2N.js";
+import { f as y } from "./gpgpu_math-DJm3ZTAf.js";
+import { g as G, b as L } from "./kernel_funcs_utils-CwRTFqrc.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -240,7 +241,7 @@ function q(a) {
   }
   return s;
 }
-function M(a, s, e, t) {
+function P(a, s, e, t) {
   const n = q(a.shape);
   let l = a;
   for (let r = 0; r < n.length; r++) {
@@ -355,7 +356,7 @@ class J {
  * limitations under the License.
  * =============================================================================
  */
-function P(a, s, e) {
+function D(a, s, e) {
   const t = E().getBool("WEBGL_PACK_ARRAY_OPERATIONS") ? new J(a.shape, s) : new Y(a.shape, s);
   return e.runWebGLProgram(t, [a], a.dtype);
 }
@@ -380,11 +381,11 @@ function Q(a, s, e, t) {
   let i = r;
   const c = A(i, l), o = c != null;
   let u = a;
-  o && (u = P(a, c, t), i = k(i.length, l)), C("sum", i, l);
+  o && (u = D(a, c, t), i = k(i.length, l)), C("sum", i, l);
   const [p, h] = N(u.shape, i);
   let d = p;
   e && (d = R(p, r));
-  const f = V(h), g = V(a.shape) / f, x = $({ inputs: { x: u }, attrs: { shape: [g, f] }, backend: t }), b = B(a.dtype), I = M(x, b, "sum", t), m = $({ inputs: { x: I }, attrs: { shape: d }, backend: t });
+  const f = V(h), g = V(a.shape) / f, x = $({ inputs: { x: u }, attrs: { shape: [g, f] }, backend: t }), b = B(a.dtype), I = P(x, b, "sum", t), m = $({ inputs: { x: I }, attrs: { shape: d }, backend: t });
   return t.disposeIntermediateTensorInfo(x), t.disposeIntermediateTensorInfo(I), o && t.disposeIntermediateTensorInfo(u), m;
 }
 /**
@@ -407,7 +408,7 @@ function Z(a) {
   const { inputs: s, backend: e, attrs: t } = a, { x: n } = s, { axis: l, keepDims: r } = t;
   return Q(n, l, r, e);
 }
-const de = {
+const pe = {
   kernelName: F,
   backendName: "webgl",
   kernelFunc: Z
@@ -429,7 +430,7 @@ const de = {
  * =============================================================================
  */
 function ee(a, s, e, t) {
-  const n = V(s), r = V(a.shape) / n, i = $({ inputs: { x: a }, attrs: { shape: [r, n] }, backend: t }), c = M(i, a.dtype, "max", t), o = $({ inputs: { x: c }, attrs: { shape: e }, backend: t });
+  const n = V(s), r = V(a.shape) / n, i = $({ inputs: { x: a }, attrs: { shape: [r, n] }, backend: t }), c = P(i, a.dtype, "max", t), o = $({ inputs: { x: c }, attrs: { shape: e }, backend: t });
   return t.disposeIntermediateTensorInfo(i), t.disposeIntermediateTensorInfo(c), o;
 }
 /**
@@ -458,12 +459,12 @@ function te(a) {
       const I = e.texData.get(d.dataId).values, m = new Array(i);
       for (let v = 0; v < m.length; v++)
         m[v] = n.shape[u[v]];
-      const z = U(I, n.shape, n.dtype, u, m);
+      const z = K(I, n.shape, n.dtype, u, m);
       d = e.makeTensorInfo(m, n.dtype);
-      const D = e.texData.get(d.dataId);
-      D.values = z;
+      const M = e.texData.get(d.dataId);
+      M.values = z;
     } else
-      d = P(n, u, e);
+      d = D(n, u, e);
     o = k(o.length, i);
   }
   C("max", o, i);
@@ -480,8 +481,8 @@ function te(a) {
     x = ee(d, S, g, e);
   return p && e.disposeIntermediateTensorInfo(d), x;
 }
-const pe = {
-  kernelName: j,
+const he = {
+  kernelName: U,
   backendName: "webgl",
   kernelFunc: te
 };
@@ -523,18 +524,18 @@ return a / b;`, se = `
   }
   return result;
-`, ne = L({ opSnippet: ae, packedOpSnippet: se, checkOutOfBounds: !0 }), he = {
-  kernelName: K,
+`, ne = L({ opSnippet: ae, packedOpSnippet: se, checkOutOfBounds: !0 }), fe = {
+  kernelName: j,
   backendName: "webgl",
   kernelFunc: ne
 };
 export {
-  M as a,
-  pe as b,
-  he as c,
-  de as d,
+  P as a,
+  he as b,
+  fe as c,
+  pe as d,
   te as m,
   ne as r,
   Z as s,
-  P as t
+  D as t
 };

package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { g as c, aa as C, i as f, D as R } from "./index-BAzbokzv.js";
-import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-CNslybmD.js";
+import { j as c, a9 as C, l as f, I as R } from "./index-C0dhsYom.js";
+import { u as g, g as I, a as x, b as F, c as $, d as u, e as l, i as m } from "./gpgpu_math-DJm3ZTAf.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -82,14 +82,14 @@ function v(s, t) {
 function b(s, t, i) {
   const a = [
     u(s.shape),
-    ...m(s.shape)
+    ...l(s.shape)
   ], e = {
     dtype: s.dtype,
     shape: a,
     dataId: s.dataId
   }, o = [
     u(t),
-    ...m(t)
+    ...l(t)
   ], r = new S(o, a), p = !0, n = [a], h = i.runWebGLProgram(r, [e], s.dtype, n, p);
   return { dataId: h.dataId, shape: t, dtype: h.dtype };
 }
@@ -113,7 +113,7 @@ function y(s) {
   const { inputs: t, backend: i, attrs: a } = s, { x: e } = t, { shape: o } = a, r = i, p = c(e.shape), n = C(o, p), h = c(n);
   f(p === h, () => `The new shape (${n}) has ${h} elements and the old shape (${e.shape}) has ${p} elements. The new shape and old shape must have the same number of elements.`);
   const d = r.texData.get(e.dataId);
-  return d.isPacked && !l(e.shape, n) && !(d.texture !== null && l(d.shape, n)) ? b(e, n, r) : (r.incRef(e.dataId), { dataId: e.dataId, shape: n, dtype: e.dtype });
+  return d.isPacked && !m(e.shape, n) && !(d.texture !== null && m(d.shape, n)) ? b(e, n, r) : (r.incRef(e.dataId), { dataId: e.dataId, shape: n, dtype: e.dtype });
 }
 const U = {
   kernelName: R,

package/dist/TeachableLLM.d.ts CHANGED Viewed

@@ -22,12 +22,15 @@ export default class TeachableLLM {
     meta: TeachableLLMMeta;
     constructor(tokeniser?: ITokeniser, model?: NanoGPT);
     get vocab(): string[];
+    /** Model is fully loaded */
     get loaded(): boolean;
     get config(): GPTConfig;
     get model(): NanoGPT;
     get tokeniser(): ITokeniser;
     get status(): TeachableLLMStatus;
+    /** Model is both ready and not busy */
     get ready(): boolean;
+    get busy(): boolean;
     estimateTrainingMemoryUsage(batchSize: number): number;
     private setStatus;
     saveModel(options?: SaveOptions): Promise<Blob>;