npm - @genai-fi/nanogpt - Versions diffs - 0.10.3 → 0.12.0 - Mend

@genai-fi/nanogpt 0.10.3 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (238) hide show

package/dist/Generator.d.ts +10 -5
package/dist/Generator.js +1789 -1765
package/dist/{RealDiv-KAPDe8zB.js → RealDiv-C8neBwFi.js} +15 -15
package/dist/{Reshape-BYkmUnAv.js → Reshape-Bd4V_4X7.js} +1 -1
package/dist/{Reshape-Zt6eb7yh.js → Reshape-Ck29jQSY.js} +5 -5
package/dist/TeachableLLM.d.ts +5 -3
package/dist/TeachableLLM.js +14 -14
package/dist/Trainer.d.ts +3 -1
package/dist/Trainer.js +11 -8
package/dist/{axis_util-BaG7mf5A.js → axis_util-DGqbT-FX.js} +3 -3
package/dist/backend.js +2 -2
package/dist/{backend_util-RCe-rHaj.js → backend_util-DC3rBo_H.js} +18 -18
package/dist/{backend_webgpu-DE3ACOLx.js → backend_webgpu-mbhNnlx9.js} +3 -3
package/dist/{broadcast_to-B3eYlZm7.js → broadcast_to-D1Dmg2Oz.js} +2 -2
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +2 -2
package/dist/checks/normRMS.js +4 -4
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.js +2 -2
package/dist/checks/qkv.js +4 -4
package/dist/checks/rope.js +2 -2
package/dist/{clip_by_value-BnO7-a88.js → clip_by_value-fg2aKzUy.js} +5 -5
package/dist/complex-Cyg-eQeZ.js +11 -0
package/dist/concat-CSm2rMwe.js +17 -0
package/dist/{concat_util-DpW8mL_l.js → concat_util-D0je5Ppu.js} +1 -1
package/dist/{dataset-BcwmTGYc.js → dataset-CVIJu7Xa.js} +7 -7
package/dist/{dropout-BcvN9JYi.js → dropout-DLhSMNTZ.js} +9 -9
package/dist/expand_dims-ChkuOp6I.js +11 -0
package/dist/{exports_initializers-Hta_rEnm.js → exports_initializers-1KWPiStI.js} +1 -1
package/dist/{floor-D5QdR_le.js → floor-BRMPgeIs.js} +1 -1
package/dist/{gather-D3JcZUaI.js → gather-BSULDalH.js} +1 -1
package/dist/{gelu-CjNPL4OH.js → gelu-BK1k-n1i.js} +1 -1
package/dist/{gpgpu_math-DAOmgtXR.js → gpgpu_math-BJSTk_mW.js} +25 -25
package/dist/{index-BwexR4lA.js → index-BBVLAXZD.js} +89 -89
package/dist/{index-DOvlwCh-.js → index-Duu1Lvvv.js} +53 -53
package/dist/{kernel_funcs_utils-CCzYdUZg.js → kernel_funcs_utils-BtYrPoJu.js} +6 -6
package/dist/layers/BaseLayer.js +2 -2
package/dist/layers/CausalSelfAttention.js +6 -6
package/dist/layers/MLP.js +4 -4
package/dist/layers/PositionEmbedding.js +5 -5
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +9 -9
package/dist/log_sum_exp-CVqLsVLl.js +39 -0
package/dist/main.d.ts +10 -1
package/dist/main.js +68 -58
package/dist/{matMul16-BWRSOCWB.js → matMul16-xswmhSuF.js} +3 -3
package/dist/{matMulGelu-CzfgT6Wq.js → matMulGelu-BpvgnYG8.js} +14 -14
package/dist/mat_mul-Bn2BDpT4.js +11 -0
package/dist/{mod-AnXEvvpo.js → mod-B4AUd1Np.js} +1 -1
package/dist/models/NanoGPTV1.js +2 -2
package/dist/models/model.js +9 -9
package/dist/{ones-D2rT0xk2.js → ones-CBI1AQjb.js} +3 -3
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/add16.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/concat16.js +2 -2
package/dist/ops/cpu/adamAdjust.js +9 -9
package/dist/ops/cpu/adamMoments.js +5 -5
package/dist/ops/cpu/appendCache.js +6 -6
package/dist/ops/cpu/attentionMask.js +10 -10
package/dist/ops/cpu/fusedSoftmax.js +5 -5
package/dist/ops/cpu/gatherSub.js +9 -9
package/dist/ops/cpu/gelu.js +5 -5
package/dist/ops/cpu/matMul16.js +2 -2
package/dist/ops/cpu/matMulGelu.js +3 -3
package/dist/ops/cpu/matMulMul.js +5 -5
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +7 -7
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +11 -11
package/dist/ops/dot16.js +2 -2
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/add16.js +4 -4
package/dist/ops/grads/attentionMask.js +2 -2
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMul16.js +3 -3
package/dist/ops/grads/matMulGelu.js +6 -6
package/dist/ops/grads/normRMS.js +4 -4
package/dist/ops/grads/pack16.js +3 -3
package/dist/ops/grads/qkv.js +10 -10
package/dist/ops/grads/rope.js +2 -2
package/dist/ops/grads/softmax16.js +1 -1
package/dist/ops/grads/unpack16.js +2 -2
package/dist/ops/matMul16.js +3 -3
package/dist/ops/matMulGelu.js +2 -2
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mul16.js +1 -1
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +1 -1
package/dist/ops/pack16.js +2 -2
package/dist/ops/qkv.js +1 -1
package/dist/ops/reshape16.js +2 -2
package/dist/ops/rope.js +2 -2
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.js +2 -2
package/dist/ops/softmax16.js +1 -1
package/dist/ops/sub16.js +1 -1
package/dist/ops/sum16.js +2 -2
package/dist/ops/transpose16.js +6 -6
package/dist/ops/unpack16.js +2 -2
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +4 -4
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMul16.js +8 -8
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +7 -7
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +7 -7
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.js +3 -3
package/dist/ops/webgpu/adamMoments.js +5 -5
package/dist/ops/webgpu/add16.js +1 -1
package/dist/ops/webgpu/appendCache.js +3 -3
package/dist/ops/webgpu/attentionMask.js +2 -2
package/dist/ops/webgpu/attentionMask32_program.js +2 -2
package/dist/ops/webgpu/concat16.js +5 -5
package/dist/ops/webgpu/gatherSub.js +5 -5
package/dist/ops/webgpu/gelu.js +3 -3
package/dist/ops/webgpu/matMul16.js +19 -19
package/dist/ops/webgpu/matMul16_program.js +2 -2
package/dist/ops/webgpu/mul16.js +4 -4
package/dist/ops/webgpu/normRMS.js +6 -6
package/dist/ops/webgpu/normRMSGrad.js +4 -4
package/dist/ops/webgpu/pack16.js +3 -3
package/dist/ops/webgpu/pack16_program.js +2 -2
package/dist/ops/webgpu/qkv.js +8 -8
package/dist/ops/webgpu/rope.js +3 -3
package/dist/ops/webgpu/scatterSub.js +3 -3
package/dist/ops/webgpu/slice16.js +4 -4
package/dist/ops/webgpu/softmax16.js +4 -4
package/dist/ops/webgpu/softmax16_program.js +2 -2
package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
package/dist/ops/webgpu/softmax16grad.js +1 -1
package/dist/ops/webgpu/sub16.js +4 -4
package/dist/ops/webgpu/sum16.js +5 -5
package/dist/ops/webgpu/transpose16.js +2 -2
package/dist/ops/webgpu/transpose16_program.js +2 -2
package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
package/dist/ops/webgpu/unpack16.js +5 -5
package/dist/ops/webgpu/utils/binary_op.js +3 -3
package/dist/ops/webgpu/utils/reductions.js +4 -4
package/dist/{ops-B5yanEdW.js → ops-C2_OXuZ4.js} +69 -69
package/dist/{pack16-nQ6JaLo-.js → pack16-atD0eYRm.js} +9 -9
package/dist/patches/webgpu_backend.js +6 -6
package/dist/patches/webgpu_base.js +1 -1
package/dist/patches/webgpu_program.js +8 -8
package/dist/{random_width-or-CEftb.js → random_width-BN4wGJaW.js} +33 -33
package/dist/range-DKmP1-OQ.js +10 -0
package/dist/relu-BsXmGzzu.js +9 -0
package/dist/{reshape-ByE68wS9.js → reshape-BI0yzp1T.js} +1 -1
package/dist/{resize_nearest_neighbor-B19mCEg2.js → resize_nearest_neighbor-BA_BX-ub.js} +26 -26
package/dist/{rope-Ir4mTyD1.js → rope-DJ7Y7c-u.js} +1 -1
package/dist/{scatter_nd_util-lvSiX8q4.js → scatter_nd_util-k9MUVUkn.js} +1 -1
package/dist/{selu_util-kbhpTdYD.js → selu_util-DyW0X1WG.js} +5 -5
package/dist/{shared-DT1TkE6w.js → shared-Q3BS6T03.js} +1 -1
package/dist/{shared-dntlHIDQ.js → shared-nnSWpC3u.js} +86 -86
package/dist/{slice-BfEGSH82.js → slice-wBNvzVyz.js} +1 -1
package/dist/{slice_util-uTKwiEpW.js → slice_util-zN8KFC5I.js} +1 -1
package/dist/{softmax-CA5jFsLR.js → softmax-DfuYyjMh.js} +1 -1
package/dist/split-BYrLboMq.js +9 -0
package/dist/squeeze-Bk8Brcct.js +10 -0
package/dist/{stack-Cf4n9h0N.js → stack-CDWShFHF.js} +1 -1
package/dist/{step-CINUs5QB.js → step-BS5JXRR6.js} +23 -23
package/dist/{sum-DWAtNGez.js → sum-BPUfDB2X.js} +3 -3
package/dist/tensor-CEt9Nm2s.js +8 -0
package/dist/tensor1d-Cc_KCIDg.js +11 -0
package/dist/{tensor2d-Bs9wZRc7.js → tensor2d-BN97fF71.js} +3 -3
package/dist/{tensor4d-BARPdTaS.js → tensor4d-vuDDgdUI.js} +1 -1
package/dist/{tfjs_backend-y1cvNhLA.js → tfjs_backend-806hyYve.js} +49 -49
package/dist/{tile-mbfagpsB.js → tile-OWUvpIVt.js} +3 -3
package/dist/tokeniser/BaseTokeniser.d.ts +25 -0
package/dist/tokeniser/BaseTokeniser.js +94 -0
package/dist/tokeniser/CharTokeniser.d.ts +10 -9
package/dist/tokeniser/CharTokeniser.js +44 -30
package/dist/tokeniser/bpe.d.ts +10 -9
package/dist/tokeniser/bpe.js +67 -52
package/dist/tokeniser/type.d.ts +14 -5
package/dist/training/Adam.js +2 -2
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.d.ts +3 -3
package/dist/training/DatasetBuilder.js +34 -38
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.d.ts +4 -3
package/dist/training/Trainer.js +22 -25
package/dist/training/sparseCrossEntropy.js +3 -3
package/dist/training/tasks/ConversationTask.d.ts +11 -0
package/dist/training/tasks/ConversationTask.js +26 -0
package/dist/training/tasks/PretrainingTask.d.ts +11 -0
package/dist/training/tasks/PretrainingTask.js +34 -0
package/dist/training/tasks/StartSentenceTask.d.ts +12 -0
package/dist/training/tasks/StartSentenceTask.js +42 -0
package/dist/training/tasks/Task.d.ts +8 -0
package/dist/training/tasks/Task.js +41 -0
package/dist/{transpose-ClWiBS_b.js → transpose-BUkQCJp9.js} +6 -6
package/dist/{unsorted_segment_sum-BDDhB_E6.js → unsorted_segment_sum-BljxHhCY.js} +5 -5
package/dist/utilities/dummy.js +3 -3
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.js +1 -1
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.d.ts +1 -1
package/dist/utilities/sentences.js +11 -11
package/dist/utilities/weights.js +2 -2
package/dist/{variable-WawDEaAb.js → variable-DPt_Iuog.js} +1 -1
package/dist/{webgpu_program-DuOXPQol.js → webgpu_program-BpWRlghH.js} +3 -3
package/dist/{webgpu_util-RxEF33Rj.js → webgpu_util-DMiKzzQM.js} +7 -7
package/dist/{zeros-KnWaWf-X.js → zeros-5YROwwUH.js} +2 -2
package/dist/{zeros_like-DvE73F4e.js → zeros_like-De4n1C3m.js} +71 -71
package/package.json +1 -1
package/dist/complex-DjxcVmoX.js +0 -11
package/dist/concat-BV8bt5H-.js +0 -17
package/dist/expand_dims-DT4tEPwA.js +0 -11
package/dist/log_sum_exp-ngO0-4pK.js +0 -39
package/dist/mat_mul-SjpJRLyL.js +0 -11
package/dist/range-BklejeeW.js +0 -10
package/dist/relu-CP0ZcxWO.js +0 -9
package/dist/split-CVLc0w--.js +0 -9
package/dist/squeeze-C7Z2srUo.js +0 -10
package/dist/tensor-DJoc7gJU.js +0 -8
package/dist/tensor1d-D11P_7Dp.js +0 -11

package/dist/training/Trainer.js CHANGED Viewed

@@ -1,10 +1,11 @@
-import { DatasetBuilder as f, flattenTokens as h, PAGE_FACTOR as y } from "./DatasetBuilder.js";
+import { DatasetBuilder as u, PAGE_FACTOR as f } from "./DatasetBuilder.js";
 import z from "./AdamExt.js";
-import { t as S, v as k, k as x, d as p, b as m } from "../index-DOvlwCh-.js";
-import { z as g } from "../zeros-KnWaWf-X.js";
-class M {
+import { t as S, v as y, k, d as h, b as p } from "../index-Duu1Lvvv.js";
+import { tokensFromTasks as x } from "./tasks/Task.js";
+import { z as m } from "../zeros-5YROwwUH.js";
+class B {
   constructor(t, e, s = 1e-3) {
-    this.tokenizer = e, this.model = t, this.lossScaling = t.lossScaling, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new f(e, t.config.blockSize);
+    this.tokenizer = e, this.model = t, this.lossScaling = t.lossScaling, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new u(e, t.config.blockSize);
   }
   model;
   optimizer;
@@ -53,8 +54,8 @@ class M {
   trainStep(t, e, s = !1, i = !1) {
     return S(() => {
       this.model.getProfiler()?.startMemory();
-      const { xs: a, ys: l } = e, c = () => {
-        const [n, d] = this.model.forward(
+      const { xs: a, ys: l } = e, d = () => {
+        const [o, c] = this.model.forward(
           {
             training: !0,
             checkpointing: this._gradientCheckpointing,
@@ -63,15 +64,15 @@ class M {
           a,
           l
         );
-        n.dispose();
-        const u = d.mul(m(this.lossScaling));
-        return d.dispose(), u;
-      }, { value: o, grads: r } = k(c);
-      return s ? this.model.getProfiler()?.endMemory("Training") : (this.optimizer.applyGradients(r), this.model.getProfiler()?.endMemory("Training"), i ? (t.gradients = r, Object.values(r).forEach((n) => x(n))) : p(r)), o.mul(m(1 / this.lossScaling));
+        o.dispose();
+        const g = c.mul(p(this.lossScaling));
+        return c.dispose(), g;
+      }, { value: n, grads: r } = y(d);
+      return s ? this.model.getProfiler()?.endMemory("Training") : (this.optimizer.applyGradients(r), this.model.getProfiler()?.endMemory("Training"), i ? (t.gradients = r, Object.values(r).forEach((o) => k(o))) : h(r)), n.mul(p(1 / this.lossScaling));
     });
   }
   async dummyPass() {
-    const t = g([1, this.model.config.blockSize], "int32"), e = g([1, this.model.config.blockSize], "int32");
+    const t = m([1, this.model.config.blockSize], "int32"), e = m([1, this.model.config.blockSize], "int32");
     try {
       const s = this.trainStep({}, { xs: t, ys: e }, !0);
       await s.data(), s.dispose();
@@ -86,34 +87,30 @@ class M {
       const i = this.trainStep(t, e, !1, s);
       return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, i;
     } catch (i) {
-      throw console.error(`Error processing batch at step ${t.step}:`, i), p(), i;
+      throw console.error(`Error processing batch at step ${t.step}:`, i), h(), i;
     }
   }
   async createTrainValidationSplit(t, e = 32, s = 0.1) {
-    const i = await h(t, this.tokenizer), a = /* @__PURE__ */ new Set();
+    const i = await x(t, this.tokenizer), a = /* @__PURE__ */ new Set();
     if (s > 0) {
-      const o = Math.floor(i.length / (this.datasetBuilder.blockSize * y)), r = Math.max(1, Math.floor(o * s));
+      const n = Math.floor(i.length / (this.datasetBuilder.blockSize * f)), r = Math.max(1, Math.floor(n * s));
       for (; a.size < r; ) {
-        const n = Math.floor(Math.random() * o);
-        a.add(n);
+        const o = Math.floor(Math.random() * n);
+        a.add(o);
       }
     }
-    const l = await this.datasetBuilder.createTextDataset(i, e, a, !1), c = await this.datasetBuilder.createTextDataset(
+    const l = await this.datasetBuilder.createTextDataset(i, e, a, !1), d = await this.datasetBuilder.createTextDataset(
       i,
       e,
       a,
       !0
     );
-    return { trainDataset: l, validationDataset: c };
-  }
-  async createDataset(t, e = 32) {
-    const s = await h(t, this.tokenizer);
-    return await this.datasetBuilder.createTextDataset(s, e);
+    return { trainDataset: l, validationDataset: d, size: i.length };
   }
   dispose() {
     this.optimizer && this.optimizer.dispose();
   }
 }
 export {
-  M as default
+  B as default
 };

package/dist/training/sparseCrossEntropy.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { gatherSub as x } from "../ops/gatherSub.js";
 import { scatterSub as L } from "../ops/scatterSub.js";
-import { a6 as C, t as u, a7 as E, c as G } from "../index-DOvlwCh-.js";
-import { s as y } from "../softmax-CA5jFsLR.js";
-import { m as z, l as v } from "../log_sum_exp-ngO0-4pK.js";
+import { a1 as C, t as u, a2 as E, c as G } from "../index-Duu1Lvvv.js";
+import { s as y } from "../softmax-DfuYyjMh.js";
+import { m as z, l as v } from "../log_sum_exp-CVqLsVLl.js";
 function k(t, s) {
   return u(() => {
     const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = G(h, r), d = v(a, -1);

package/dist/training/tasks/ConversationTask.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { Conversation, ITokeniser } from '../../main';
+import { Task } from './Task';
+export default class ConversationTask extends Task {
+    private rawConvo;
+    private index;
+    get length(): number;
+    constructor(conversations: Conversation[][]);
+    hasMoreConversations(): boolean;
+    nextConversation(): Conversation[] | null;
+    estimateTokens(tokeniser: ITokeniser): Promise<number>;
+}

package/dist/training/tasks/ConversationTask.js ADDED Viewed

@@ -0,0 +1,26 @@
+import { Task as t } from "./Task.js";
+class s extends t {
+  rawConvo;
+  index = 0;
+  get length() {
+    return this.rawConvo.length;
+  }
+  constructor(n) {
+    super(), this.rawConvo = n;
+  }
+  hasMoreConversations() {
+    return this.index < this.rawConvo.length;
+  }
+  nextConversation() {
+    if (this.index >= this.rawConvo.length)
+      return null;
+    const n = this.rawConvo[this.index];
+    return this.index++, n;
+  }
+  async estimateTokens(n) {
+    return (await n.encodeConversation(this.rawConvo[0])).length * this.length;
+  }
+}
+export {
+  s as default
+};

package/dist/training/tasks/PretrainingTask.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { Conversation, ITokeniser } from '../../main';
+import { Task } from './Task';
+export default class PretrainingTask extends Task {
+    private rawText;
+    private index;
+    get length(): number;
+    constructor(texts: string[]);
+    hasMoreConversations(): boolean;
+    nextConversation(): Conversation[] | null;
+    estimateTokens(tokeniser: ITokeniser): Promise<number>;
+}

package/dist/training/tasks/PretrainingTask.js ADDED Viewed

@@ -0,0 +1,34 @@
+import { Task as e } from "./Task.js";
+class r extends e {
+  rawText;
+  index = 0;
+  get length() {
+    return this.rawText.length;
+  }
+  constructor(t) {
+    super(), this.rawText = t;
+  }
+  hasMoreConversations() {
+    return this.index < this.rawText.length;
+  }
+  nextConversation() {
+    if (this.index >= this.rawText.length)
+      return null;
+    const t = {
+      role: "assistant",
+      content: this.rawText[this.index]
+    };
+    return this.index++, [t];
+  }
+  async estimateTokens(t) {
+    return (await t.encodeConversation([
+      {
+        role: "assistant",
+        content: this.rawText[0]
+      }
+    ])).length * this.length;
+  }
+}
+export {
+  r as default
+};

package/dist/training/tasks/StartSentenceTask.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import { Conversation, ITokeniser } from '../../main';
+import { Task } from './Task';
+export default class StartSentenceTask extends Task {
+    private rawText;
+    private index;
+    get length(): number;
+    constructor(texts: string[]);
+    hasMoreConversations(): boolean;
+    nextConversation(): Conversation[] | null;
+    private conversationFromString;
+    estimateTokens(tokeniser: ITokeniser): Promise<number>;
+}

package/dist/training/tasks/StartSentenceTask.js ADDED Viewed

@@ -0,0 +1,42 @@
+import { Task as e } from "./Task.js";
+class a extends e {
+  rawText;
+  index = 0;
+  get length() {
+    return this.rawText.length;
+  }
+  constructor(t) {
+    super(), this.rawText = t;
+  }
+  hasMoreConversations() {
+    return this.index < this.rawText.length;
+  }
+  nextConversation() {
+    if (this.index >= this.rawText.length)
+      return null;
+    const t = this.rawText[this.index];
+    return this.index++, this.conversationFromString(t);
+  }
+  conversationFromString(t) {
+    const n = t.indexOf(".");
+    return n === -1 ? [{
+      role: "assistant",
+      content: this.rawText[this.index]
+    }] : [
+      {
+        role: "user",
+        content: t.slice(0, n + 1).trim()
+      },
+      {
+        role: "assistant",
+        content: t.slice(n + 1).trim()
+      }
+    ];
+  }
+  async estimateTokens(t) {
+    return (await t.encodeConversation(this.conversationFromString(this.rawText[0]))).length * this.length;
+  }
+}
+export {
+  a as default
+};

package/dist/training/tasks/Task.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import { Conversation, ITokeniser } from '../../main';
+export declare abstract class Task {
+    abstract get length(): number;
+    abstract hasMoreConversations(): boolean;
+    abstract nextConversation(): Conversation[] | null;
+    abstract estimateTokens(tokeniser: ITokeniser): Promise<number>;
+}
+export declare function tokensFromTasks(tasks: Task[], tokenizer: ITokeniser): Promise<Uint16Array>;

package/dist/training/tasks/Task.js ADDED Viewed

@@ -0,0 +1,41 @@
+class g {
+}
+function h(f, a, l, e, r) {
+  for (let i = 0; i < f.length; i++) {
+    const c = f[i].nextConversation();
+    if (c) {
+      const o = l.encodeConversation(c), s = a[a.length - 1];
+      if (e.offset + o.length > s.length) {
+        const n = s.length - e.offset;
+        s.set(o.slice(0, n), e.offset);
+        const t = new Uint16Array(Math.floor(r * 0.1) + 100);
+        t.set(o.slice(n), 0), a.push(t), e.offset = o.length - n;
+      } else
+        s.set(o, e.offset), e.offset += o.length;
+    }
+  }
+}
+async function w(f, a) {
+  const l = (await Promise.all(f.map((n) => n.estimateTokens(a)))).reduce(
+    (n, t) => n + t,
+    0
+  ), e = [new Uint16Array(l)], r = {
+    offset: 0
+  };
+  let i = performance.now();
+  for (; r.offset < l && (h(f, e, a, r, l), !f.every((t) => !t.hasMoreConversations())); )
+    performance.now() - i > 40 && (await new Promise(requestAnimationFrame), i = performance.now());
+  if (e.length === 1)
+    return e[0].subarray(0, r.offset);
+  const c = e.reduce((n, t) => n + t.length, 0) - (e[e.length - 1].length - r.offset), o = new Uint16Array(c);
+  let s = 0;
+  for (let n = 0; n < e.length; n++) {
+    const t = e[n];
+    n === e.length - 1 ? (o.set(t.subarray(0, r.offset), s), s += r.offset) : (o.set(t, s), s += t.length);
+  }
+  return o;
+}
+export {
+  g as Task,
+  w as tokensFromTasks
+};

package/dist/{transpose-ClWiBS_b.js → transpose-BUkQCJp9.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { A as u, B as i, E as o, ap as $, aq as g, ar as m, l, t as x, as as p } from "./index-DOvlwCh-.js";
-import { c as k } from "./complex-DjxcVmoX.js";
+import { o as u, q as i, E as o, ap as $, aq as g, ar as x, x as l, t as m, as as p } from "./index-Duu1Lvvv.js";
+import { c as k } from "./complex-Cyg-eQeZ.js";
 function K(r) {
   const e = { input: i(r, "input", "imag") };
   return o.runKernel($, e);
@@ -12,7 +12,7 @@ function E(r) {
 const _ = /* @__PURE__ */ u({ neg_: E });
 function b(r) {
   const e = { input: i(r, "input", "real") };
-  return o.runKernel(m, e);
+  return o.runKernel(x, e);
 }
 const d = /* @__PURE__ */ u({ real_: b });
 function N(r, t, e) {
@@ -22,15 +22,15 @@ function N(r, t, e) {
   }), n.rank <= 1)
     return n.clone();
   const f = { x: n }, c = { perm: t };
-  return n.dtype === "complex64" ? x(() => {
+  return n.dtype === "complex64" ? m(() => {
     let s = d(n), a = h(n);
     return s = o.runKernel(p, { x: s }, c), a = o.runKernel(p, { x: a }, c), e && (a = _(a)), k(s, a);
   }) : o.runKernel(p, f, c);
 }
-const y = /* @__PURE__ */ u({ transpose_: N });
+const v = /* @__PURE__ */ u({ transpose_: N });
 export {
   h as i,
   _ as n,
   d as r,
-  y as t
+  v as t
 };

package/dist/{unsorted_segment_sum-BDDhB_E6.js → unsorted_segment_sum-BljxHhCY.js} RENAMED Viewed

@@ -1,8 +1,8 @@
-import { A as h, B as c, E as d, bo as T, bp as q, bq as H, l, br as P, X as _, bs as y, bt as B, bu as I, bv as W, bw as A, bx as G, by as L, bz as O, bA as z, bB as F, L as M, a3 as j, bC as J, bD as Q, bE as U, a6 as V, c as N, m as X, bF as Y, bG as Z, bH as R, bI as nn, bJ as tn, bK as sn, bL as en, bM as rn, bN as on, bO as an, bP as un, aG as cn, bQ as ln } from "./index-DOvlwCh-.js";
-import { k as C, c as g, m as D } from "./step-CINUs5QB.js";
-import { r as b } from "./reshape-ByE68wS9.js";
-import { m as pn, a as hn, e as w } from "./log_sum_exp-ngO0-4pK.js";
-import { s as K } from "./sum-DWAtNGez.js";
+import { o as h, q as c, E as d, bo as T, bp as q, bq as H, x as l, br as P, L as _, bs as y, bt as B, bu as I, bv as W, bw as A, bx as G, by as L, bz as O, bA as z, bB as F, B as M, _ as j, bC as J, bD as Q, bE as U, a1 as V, c as N, m as X, bF as Y, bG as Z, bH as R, bI as nn, bJ as tn, bK as sn, bL as en, bM as rn, bN as on, bO as an, bP as un, aG as cn, bQ as ln } from "./index-Duu1Lvvv.js";
+import { k as C, c as g, m as D } from "./step-BS5JXRR6.js";
+import { r as b } from "./reshape-BI0yzp1T.js";
+import { m as pn, a as hn, e as w } from "./log_sum_exp-CVqLsVLl.js";
+import { s as K } from "./sum-BPUfDB2X.js";
 function fn(s, n = null, t = !1) {
   const i = { x: c(s, "x", "all", "bool") }, o = { axis: n, keepDims: t };
   return d.runKernel(T, i, o);

package/dist/utilities/dummy.js CHANGED Viewed

@@ -1,6 +1,6 @@
-import { a as y, e as S, v as w } from "../index-DOvlwCh-.js";
-import { z as m } from "../zeros-KnWaWf-X.js";
-import { o as P } from "../ones-D2rT0xk2.js";
+import { a as y, e as S, v as w } from "../index-Duu1Lvvv.js";
+import { z as m } from "../zeros-5YROwwUH.js";
+import { o as P } from "../ones-CBI1AQjb.js";
 async function b(s) {
   const t = m([1, s.config.blockSize], "int32"), [n, o] = s.forward({ training: !1 }, t);
   await n.data(), n.dispose(), o && o.dispose(), t.dispose();

package/dist/utilities/multinomialCPU.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-DOvlwCh-.js";
-import { t as e } from "../tensor2d-Bs9wZRc7.js";
+import "../index-Duu1Lvvv.js";
+import { t as e } from "../tensor2d-BN97fF71.js";
 function l(n) {
   let r = 0;
   const i = Math.random();

package/dist/utilities/packed.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { e as n } from "../index-DOvlwCh-.js";
+import { e as n } from "../index-Duu1Lvvv.js";
 function o() {
   return n().backendName === "webgpu";
 }

package/dist/utilities/performance.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { t as s } from "../index-DOvlwCh-.js";
+import { t as s } from "../index-Duu1Lvvv.js";
 async function f(e, o = 10, r = !1) {
   for (let t = 0; t < 100; t++) {
     const a = r ? await e() : s(e);

package/dist/utilities/profile.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { a } from "../index-DOvlwCh-.js";
+import { a } from "../index-Duu1Lvvv.js";
 const s = 1024 * 1024;
 class l {
   log = /* @__PURE__ */ new Map();

package/dist/utilities/safetensors.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-DOvlwCh-.js";
-import { t as y } from "../tensor-DJoc7gJU.js";
+import "../index-Duu1Lvvv.js";
+import { t as y } from "../tensor-CEt9Nm2s.js";
 function l(t) {
   if (t === "float32") return "F32";
   if (t === "int32") return "I32";

package/dist/utilities/sentences.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { default as TeachableLLM } from '../TeachableLLM';
 import { Tensor2D, Tensor3D } from '@tensorflow/tfjs-core';
 export declare function meanPooling(embeddings: Tensor3D, attentionMask?: Tensor2D): Tensor2D;
-export declare function sentenceEmbeddingsTensor(model: TeachableLLM, sentences: string[], batchSize?: number): Promise<Tensor2D>;
+export declare function sentenceEmbeddingsTensor(model: TeachableLLM, sentences: string[], batchSize?: number): Tensor2D;
 export declare function sentenceEmbeddings(model: TeachableLLM, sentences: string[], batchSize?: number): Promise<number[][]>;

package/dist/utilities/sentences.js CHANGED Viewed

@@ -1,26 +1,26 @@
-import { m as w } from "../index-DOvlwCh-.js";
-import { t as g } from "../tensor2d-Bs9wZRc7.js";
-import { e as y } from "../expand_dims-DT4tEPwA.js";
-import { s as h } from "../sum-DWAtNGez.js";
-import { c as T } from "../concat-BV8bt5H-.js";
+import { m as w } from "../index-Duu1Lvvv.js";
+import { t as f } from "../tensor2d-BN97fF71.js";
+import { e as y } from "../expand_dims-ChkuOp6I.js";
+import { s as g } from "../sum-BPUfDB2X.js";
+import { c as T } from "../concat-CSm2rMwe.js";
 const p = 16;
 function A(o, t) {
   if (!t)
     return o.mean(1);
-  const r = y(t, 2), i = w(o, r), e = h(i, 1), s = h(t, 1, !0), c = e.div(s.maximum(1e-9));
+  const r = y(t, 2), i = w(o, r), e = g(i, 1), s = g(t, 1, !0), c = e.div(s.maximum(1e-9));
   return r.dispose(), i.dispose(), e.dispose(), s.dispose(), c;
 }
-async function E(o, t, r = p) {
+function E(o, t, r = p) {
   const i = o.tokeniser, e = o.config.blockSize;
   let s = null, c = 0;
   for (; c < t.length; ) {
-    const m = t.slice(c, c + p), k = await i.tokenise(m, !0), l = [], d = [];
-    for (const n of k)
+    const b = t.slice(c, c + p).map((n) => i.encode(n)), l = [], d = [];
+    for (const n of b)
       n.length > e ? (l.push(n.slice(n.length - e, n.length)), d.push(new Array(e).fill(1))) : n.length < e ? (l.push(n.concat(new Array(e - n.length).fill(0))), d.push(
         new Array(n.length).fill(1).concat(new Array(e - n.length).fill(0))
       )) : (l.push(n), d.push(new Array(e).fill(1)));
-    const b = g(l, [l.length, e], "int32"), u = g(d, [d.length, e], "float32"), f = o.model.forward({ skipLogits: !0, training: !1 }, b)[0], a = A(f, u);
-    if (u.dispose(), f.dispose(), s === null)
+    const k = f(l, [l.length, e], "int32"), m = f(d, [d.length, e], "float32"), u = o.model.forward({ skipLogits: !0, training: !1 }, k)[0], a = A(u, m);
+    if (m.dispose(), u.dispose(), s === null)
       s = a;
     else {
       const n = s;

package/dist/utilities/weights.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import "../index-DOvlwCh-.js";
-import { t as p } from "../tensor-DJoc7gJU.js";
+import "../index-Duu1Lvvv.js";
+import { t as p } from "../tensor-CEt9Nm2s.js";
 function h(n) {
   const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
   let t = 0;

package/dist/{variable-WawDEaAb.js → variable-DPt_Iuog.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as i } from "./index-DOvlwCh-.js";
+import { E as i } from "./index-Duu1Lvvv.js";
 function m(r, a = !0, e, t) {
   return i.makeVariable(r, a, e, t);
 }

package/dist/{webgpu_program-DuOXPQol.js → webgpu_program-BpWRlghH.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as z, n as F, j, ad as E, l as A } from "./index-DOvlwCh-.js";
+import { ac as z, ab as F, aa as E, a8 as j, x as A } from "./index-Duu1Lvvv.js";
 function L(t, s) {
   if (Math.max(...t) > 5)
     throw new Error("Cannot symbolically compute strides for rank > 6 tensor.");
@@ -27,7 +27,7 @@ var w;
 })(w || (w = {}));
 const H = (t, s, e, o, i) => {
   const u = { dtype: o.dtype, shape: o.shape }, n = D(e, u, s), r = t.createShaderModule({ code: n, label: s.constructor.name });
-  let d = j().get("WEBGPU_PRINT_SHADER");
+  let d = E().get("WEBGPU_PRINT_SHADER");
   if (d !== "") {
     d = d.toLowerCase();
     const p = d.split(",");
@@ -281,7 +281,7 @@ function y(t, s = "") {
   const e = t.length, o = s !== "" ? `get${s.charAt(0).toUpperCase() + s.slice(1)}CoordsFromIndex` : "getCoordsFromIndex", i = s !== "" ? `${s.charAt(0).toLowerCase() + s.slice(1)}ShapeStrides` : "outShapeStrides";
   if (e <= 1)
     return `fn ${o}(index : i32) -> i32 { return index; }`;
-  const u = E(t), n = g(e), r = [];
+  const u = j(t), n = g(e), r = [];
   for (let p = 0; p < e; p++)
     r.push(`d${p}`);
   if (u.length === 1)

package/dist/{webgpu_util-RxEF33Rj.js → webgpu_util-DMiKzzQM.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { l as u } from "./index-DOvlwCh-.js";
+import { x as u } from "./index-Duu1Lvvv.js";
 const c = (r) => {
   let t = 1;
   for (let n = 0; n < r.length; n++)
@@ -23,16 +23,16 @@ function p(r, t, n = !1) {
   const a = c(r.x.map((i) => t[i])), o = c(r.y.map((i) => t[i]));
   return a <= 4 ? [4, 16, 1] : o <= 4 ? [16, 4, 1] : [16, 16, 1];
 }
-function M(r, t, n = !1) {
+function x(r, t, n = !1) {
   if (n)
     return [4, 4, 1];
   const a = c(r.x.map((i) => t[i])), o = c(r.y.map((i) => t[i]));
   return a <= 4 ? [1, 2, 1] : o <= 4 ? [2, 1, 1] : [2, 2, 1];
 }
-function h(r) {
+function M(r) {
   return { x: r.map((t, n) => n) };
 }
-function x(r) {
+function h(r) {
   if (r === "float32" || r === "int32" || r === "bool" || r === "string" || r === "packedF16")
     return 4;
   if (r === "complex64")
@@ -52,13 +52,13 @@ var s;
   r[r.MatMulReduceProgram = 0] = "MatMulReduceProgram", r[r.MatMulSplitKProgram = 1] = "MatMulSplitKProgram", r[r.MatMulSmallOutputSizeProgram = 2] = "MatMulSmallOutputSizeProgram", r[r.MatMulPackedProgram = 3] = "MatMulPackedProgram", r[r.MatMulMax = 4] = "MatMulMax";
 })(s || (s = {}));
 export {
-  x as G,
+  h as G,
   s as M,
   d as a,
   b,
   m as c,
   p as d,
-  M as e,
-  h as f,
+  x as e,
+  M as f,
   g as i
 };

package/dist/{zeros-KnWaWf-X.js → zeros-5YROwwUH.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { C as n, Z as m, h as i, E as c } from "./index-DOvlwCh-.js";
-import { c as f } from "./complex-DjxcVmoX.js";
+import { u as n, Q as m, U as i, E as c } from "./index-Duu1Lvvv.js";
+import { c as f } from "./complex-Cyg-eQeZ.js";
 function e(o, r = "float32") {
   if (n(o), r === "complex64") {
     const s = e(o, "float32"), t = e(o, "float32");