npm - @genai-fi/nanogpt - Versions diffs - 0.2.12 → 0.3.1 - Mend

@genai-fi/nanogpt 0.2.12 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/dist/Generator.js +30 -25
package/dist/NanoGPTModel.d.ts +13 -14
package/dist/NanoGPTModel.js +142 -70
package/dist/TeachableLLM.d.ts +16 -7
package/dist/TeachableLLM.js +81 -44
package/dist/Trainer.js +8 -8
package/dist/concat-BIZS_td9.js +33 -0
package/dist/data/parquet.js +1 -1
package/dist/exports_layers-tbTBcwMM.js +25 -0
package/dist/{sum-D7fu15XL.js → gather-BPGW8RsB.js} +6 -8
package/dist/index-C4L8Cm77.js +349 -0
package/dist/{index-YPKosni4.js → index-pWA4_lUh.js} +1020 -782
package/dist/layers/CausalSelfAttention.d.ts +11 -11
package/dist/layers/CausalSelfAttention.js +71 -63
package/dist/layers/MLP.d.ts +6 -7
package/dist/layers/MLP.js +18 -16
package/dist/layers/RMSNorm.d.ts +6 -7
package/dist/layers/RMSNorm.js +15 -13
package/dist/layers/RoPECache.d.ts +4 -5
package/dist/layers/RoPECache.js +36 -12
package/dist/layers/TiedEmbedding.d.ts +7 -8
package/dist/layers/TiedEmbedding.js +16 -418
package/dist/layers/TransformerBlock.d.ts +8 -9
package/dist/layers/TransformerBlock.js +12 -12
package/dist/main.d.ts +2 -0
package/dist/main.js +35 -21
package/dist/{mat_mul-Bu7bhLms.js → mat_mul-D7_a4KJn.js} +5 -5
package/dist/moments-DfcpfwKi.js +132 -0
package/dist/ones-Cog-G2ag.js +29 -0
package/dist/ops/appendCache.d.ts +2 -0
package/dist/ops/appendCache.js +9 -0
package/dist/ops/attentionMask.d.ts +1 -1
package/dist/ops/attentionMask.js +7 -85
package/dist/ops/cpu/appendCache.d.ts +2 -0
package/dist/ops/cpu/appendCache.js +28 -0
package/dist/ops/cpu/attentionMask.js +18 -0
package/dist/ops/cpu/gatherSub.d.ts +1 -0
package/dist/ops/cpu/gatherSub.js +34 -0
package/dist/ops/cpu/qkv.d.ts +5 -0
package/dist/ops/cpu/qkv.js +38 -0
package/dist/ops/cpu/rope.d.ts +6 -0
package/dist/ops/cpu/rope.js +38 -0
package/dist/ops/cpu/scatterSub.d.ts +1 -0
package/dist/ops/cpu/scatterSub.js +70 -0
package/dist/ops/gatherSub.d.ts +1 -1
package/dist/ops/gatherSub.js +6 -63
package/dist/ops/grads/attentionMask.d.ts +1 -0
package/dist/ops/grads/attentionMask.js +21 -0
package/dist/ops/grads/qkv.d.ts +1 -0
package/dist/ops/grads/qkv.js +20 -0
package/dist/ops/grads/rope.d.ts +1 -0
package/dist/ops/grads/rope.js +14 -0
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/qkv.d.ts +1 -6
package/dist/ops/qkv.js +7 -124
package/dist/ops/rope.d.ts +0 -5
package/dist/ops/rope.js +7 -151
package/dist/ops/scatterSub.d.ts +1 -1
package/dist/ops/scatterSub.js +6 -147
package/dist/ops/webgl/appendCache.d.ts +1 -0
package/dist/ops/webgl/appendCache.js +43 -0
package/dist/ops/webgl/attentionMask.d.ts +1 -0
package/dist/ops/webgl/attentionMask.js +43 -0
package/dist/ops/webgl/gatherSub.d.ts +1 -0
package/dist/ops/webgl/gatherSub.js +27 -0
package/dist/ops/webgl/qkv.d.ts +1 -0
package/dist/ops/webgl/qkv.js +46 -0
package/dist/ops/webgl/rope.d.ts +1 -0
package/dist/ops/webgl/rope.js +56 -0
package/dist/ops/webgl/scatterSub.d.ts +1 -0
package/dist/ops/webgl/scatterSub.js +27 -0
package/dist/{parquet-BRl5lE_I.js → parquet-C0Tlmv9c.js} +3045 -3048
package/dist/random_width-oeUIlUZj.js +15487 -0
package/dist/range-CcDl05lo.js +26 -0
package/dist/{reshape-DmnmKT6r.js → reshape-C8CR_Bad.js} +3 -3
package/dist/sin-BJIrfnj7.js +47 -0
package/dist/softmax-Be_lsqUc.js +105 -0
package/dist/{complex-CJ-qCcLB.js → split-DZbvruEP.js} +6 -8
package/dist/stack-BMm-efee.js +27 -0
package/dist/sum-C7Mgy9Bw.js +104 -0
package/dist/tensor-DJVbYhh1.js +24 -0
package/dist/tensor2d-ZuQSh2D-.js +30 -0
package/dist/tokeniser/bpe.d.ts +17 -6
package/dist/tokeniser/bpe.js +89 -61
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.d.ts +6 -6
package/dist/training/DatasetBuilder.js +1262 -17
package/dist/training/Evaluator.d.ts +3 -2
package/dist/training/FullTrainer.d.ts +9 -8
package/dist/training/FullTrainer.js +26 -25
package/dist/training/LayerTrainer.d.ts +9 -8
package/dist/training/LayerTrainer.js +34 -33
package/dist/training/Trainer.d.ts +22 -21
package/dist/training/Trainer.js +21 -18
package/dist/training/sparseCrossEntropy.js +22 -166
package/dist/utilities/dummy.js +10 -8
package/dist/utilities/generate.js +14 -11
package/dist/utilities/load.d.ts +1 -2
package/dist/utilities/load.js +37 -35
package/dist/utilities/profile.js +1 -1
package/dist/utilities/save.js +14 -9
package/dist/utilities/tokenParse.d.ts +1 -1
package/dist/utilities/tokenParse.js +7 -61
package/dist/utilities/weights.d.ts +3 -3
package/dist/utilities/weights.js +21 -19
package/dist/variable-Dl_ub3pk.js +23 -0
package/dist/{stack-BtKpB0Ry.js → zeros-CCy9C3uU.js} +18 -16
package/package.json +2 -1
package/dist/assets/worker-BYeSPNkq.js +0 -1
package/dist/tokeniser/NodeTokeniser.d.ts +0 -20
package/dist/tokeniser/NodeTokeniser.js +0 -46
package/dist/tokeniser/WebTokeniser.d.ts +0 -18
package/dist/tokeniser/WebTokeniser.js +0 -96
package/dist/tokeniser/worker.js +0 -53
/package/dist/{tokeniser/worker.d.ts → ops/cpu/attentionMask.d.ts} +0 -0

package/dist/layers/TiedEmbedding.js CHANGED Viewed

@@ -1,438 +1,36 @@
-import { o as h, d as i, E as o, K as X, N as Y, O as Z, Q as J, T as ee, U as te, V as se, W as ne, X as re, Y as ue, l as L, I as ae, Z as A, a as ie, _ as oe, D as le, f as q, v as C, $ as P, H as U, a0 as H } from "../index-YPKosni4.js";
-import { r as f } from "../reshape-DmnmKT6r.js";
-import { s as ce } from "../sum-D7fu15XL.js";
-import { m } from "../mat_mul-Bu7bhLms.js";
-import { c as pe } from "../complex-CJ-qCcLB.js";
-/**
- * @license
- * Copyright 2018 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function he(t) {
-  const s = { x: i(t, "x", "sigmoid", "float32") };
-  return o.runKernel(X, s);
-}
-const fe = /* @__PURE__ */ h({ sigmoid_: he });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function de(t) {
-  const s = { x: i(t, "x", "elu", "float32") };
-  return o.runKernel(Y, s);
-}
-const me = /* @__PURE__ */ h({ elu_: de });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function ge(t) {
-  const s = { input: i(t, "input", "imag") };
-  return o.runKernel(Z, s);
-}
-const $e = /* @__PURE__ */ h({ imag_: ge });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function xe(t, e = 0.2) {
-  const n = { x: i(t, "x", "leakyRelu") }, r = { alpha: e };
-  return o.runKernel(J, n, r);
-}
-const ke = /* @__PURE__ */ h({ leakyRelu_: xe });
-/**
- * @license
- * Copyright 2018 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function De(t) {
-  const s = { x: i(t, "x", "neg") };
-  return o.runKernel(ee, s);
-}
-const be = /* @__PURE__ */ h({ neg_: De });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function ye(t, e) {
-  const s = i(t, "x", "prelu"), n = i(e, "alpha", "prelu"), r = { x: s, alpha: n };
-  return o.runKernel(te, r);
-}
-const Se = /* @__PURE__ */ h({ prelu_: ye });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function Ke(t) {
-  const s = { input: i(t, "input", "real") };
-  return o.runKernel(se, s);
-}
-const _e = /* @__PURE__ */ h({ real_: Ke });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function Me(t) {
-  const s = { x: i(t, "x", "relu") };
-  return o.runKernel(ne, s);
-}
-const We = /* @__PURE__ */ h({ relu_: Me });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function we(t) {
-  const s = { x: i(t, "x", "relu6") };
-  return o.runKernel(re, s);
-}
-const ze = /* @__PURE__ */ h({ relu6_: we });
-/**
- * @license
- * Copyright 2018 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function Ee(t, e = 0) {
-  const n = { x: i(t, "x", "step") }, r = { alpha: e };
-  return o.runKernel(ue, n, r);
-}
-const Oe = /* @__PURE__ */ h({ step_: Ee });
-/**
- * @license
- * Copyright 2018 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function Fe(t, e, s) {
-  const n = i(t, "x", "transpose");
-  if (e == null && (e = n.shape.map((l, p) => p).reverse()), L(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
-    L(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
-  }), n.rank <= 1)
-    return n.clone();
-  const r = { x: n }, c = { perm: e };
-  return n.dtype === "complex64" ? ae(() => {
-    let l = _e(n), p = $e(n);
-    return l = o.runKernel(A, { x: l }, c), p = o.runKernel(A, { x: p }, c), s && (p = be(p)), pe(l, p);
-  }) : o.runKernel(A, r, c);
-}
-const Re = /* @__PURE__ */ h({ transpose_: Fe });
-/**
- * @license
- * Copyright 2019 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function Ae(t, e, s) {
-  if (s == null || s === "linear")
-    return t;
-  if (s === "relu")
-    return ie(t, Oe(e));
-  throw new Error(`Cannot compute gradient for fused activation ${s}.`);
-}
-function Le(t, e) {
-  let s = e;
-  const n = oe(t.shape, e.shape);
-  return n.length > 0 && (s = ce(s, n)), f(s, t.shape);
-}
-function Te(t, e, s, n) {
-  if (e === "linear")
-    return t;
-  if (e === "relu")
-    return We(t);
-  if (e === "elu")
-    return me(t);
-  if (e === "relu6")
-    return ze(t);
-  if (e === "prelu")
-    return Se(t, s);
-  if (e === "leakyrelu")
-    return ke(t, n);
-  if (e === "sigmoid")
-    return fe(t);
-  throw new Error(`Unknown fused activation ${e}.`);
-}
-const Be = (t, e) => !(t > 0) || e === "linear";
-/**
- * @license
- * Copyright 2019 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: c = "linear", preluActivationWeights: l, leakyreluAlpha: p = 0.2 }) {
-  if (Be(o.state.gradientDepth, c) === !1) {
-    let x = m(t, e, s, n);
-    return r != null && (x = le(x, r)), Te(x, c, l, p);
-  }
-  let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
-  [u, a] = q(u, a);
-  const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
-  L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
-  const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
-  let S;
-  r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
-  let v;
-  l != null && (v = i(l, "prelu weights", "fused matMul"));
-  const G = (x, M) => {
-    const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
-    let K, _;
-    if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
-      const V = Le(z, d);
-      return [K, _, V];
-    } else
-      return [K, _];
-  }, I = {
-    a: F,
-    b: R,
-    bias: S,
-    preluActivationWeights: v
-  }, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
-  return r == null ? U((M, g, $) => {
-    const k = (
-      // tslint:disable-next-line: no-unnecessary-type-assertion
-      o.runKernel(H, I, j)
-    );
-    return $([M, g, k]), { value: f(k, O), gradFunc: G };
-  })(F, R) : U((M, g, $, k) => {
-    const z = (
-      // tslint:disable-next-line: no-unnecessary-type-assertion
-      o.runKernel(H, I, j)
-    );
-    return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
-  })(F, R, S);
-}
-const Q = /* @__PURE__ */ h({ fusedMatMul_: Ne });
-/**
- * @license
- * Copyright 2018 Google LLC
- *
- * Use of this source code is governed by an MIT-style
- * license that can be found in the LICENSE file or at
- * https://opensource.org/licenses/MIT.
- * =============================================================================
- */
-class E extends Error {
-  constructor(e) {
-    super(e), Object.setPrototypeOf(this, E.prototype);
-  }
-}
-/**
- * @license
- * Copyright 2018 Google LLC
- *
- * Use of this source code is governed by an MIT-style
- * license that can be found in the LICENSE file or at
- * https://opensource.org/licenses/MIT.
- * =============================================================================
- */
-function ve(t, e, s, n) {
-  if (t.rank < 2 || e.rank < 2)
-    throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
-  if (e.rank >= 3) {
-    const r = t.shape.slice(-1)[0], c = e.shape.slice(-2)[0];
-    if (r !== c)
-      throw new E(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and  y shape = ${e.shape}`);
-  }
-  if (t.rank === 2 && e.rank === 2)
-    return Q({
-      a: t,
-      b: e,
-      transposeA: !1,
-      transposeB: !1,
-      bias: null,
-      activation: s
-    });
-  {
-    const r = t.shape.slice(), c = r.pop();
-    t = f(t, [-1, c]);
-    const l = e.shape.slice(), p = l.pop(), u = l.pop(), a = [...l, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
-    e = f(Re(e, D), [u, -1]);
-    const b = [...r, ...a];
-    return f(Q({
-      a: t,
-      b: e,
-      transposeA: !1,
-      transposeB: !1,
-      bias: null,
-      activation: s
-    }), b);
-  }
-}
-class Ue {
+import { r as t, d as s } from "../random_width-oeUIlUZj.js";
+import "../index-pWA4_lUh.js";
+import { v as r } from "../variable-Dl_ub3pk.js";
+import { g as d } from "../gather-BPGW8RsB.js";
+class b {
   vocabSize;
   embedDim;
-  tf;
   tiedWeights;
   initializer;
-  constructor(e, s, n) {
-    this.vocabSize = s.vocabSize, this.embedDim = s.embedDim, this.tf = e, this.initializer = this.tf.initializers.randomNormal({
+  constructor(i, e) {
+    this.vocabSize = i.vocabSize, this.embedDim = i.embedDim, this.initializer = t({
       mean: 0,
       stddev: 0.02
-    }), this.tiedWeights = this.tf.variable(
+    }), this.tiedWeights = r(
       this.initializer.apply([this.vocabSize, this.embedDim]),
       !0,
-      n || "tied_embedding"
+      e || "tied_embedding"
     );
   }
   get variables() {
     return [this.tiedWeights];
   }
-  embed(e) {
-    return this.tf.gather(this.tiedWeights, e, 0);
+  embed(i) {
+    return d(this.tiedWeights, i, 0);
   }
-  project(e) {
-    return ve(e, this.tiedWeights.transpose());
+  project(i) {
+    return s(i, this.tiedWeights.transpose());
   }
   getWeights() {
     return [this.tiedWeights];
   }
-  setWeights(e) {
-    this.tiedWeights.assign(e[0]);
+  setWeights(i) {
+    this.tiedWeights.assign(i[0]);
   }
   getConfig() {
     return {
@@ -445,5 +43,5 @@ class Ue {
   }
 }
 export {
-  Ue as default
+  b as default
 };

package/dist/layers/TransformerBlock.d.ts CHANGED Viewed

@@ -1,29 +1,28 @@
-import { default as TF } from '@tensorflow/tfjs';
 import { GPTConfig } from '../config';
 import { KVCache } from './CausalSelfAttention';
 import { default as RoPECache } from './RoPECache';
 import { default as MemoryProfiler } from '../utilities/profile';
 import { default as BaseLayer } from './BaseLayer';
+import { Tensor, Variable } from '@tensorflow/tfjs-core';
 export default class Block extends BaseLayer {
     private ln1;
     private attn;
     private ln2;
     private mlp;
-    private tf;
     private index;
     private _trainable;
     skipped: boolean;
-    constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache);
+    constructor(index: number, config: GPTConfig, ropeCache?: RoPECache);
     setProfiler(value: MemoryProfiler | undefined): void;
-    get variables(): TF.Variable[];
+    get variables(): Variable[];
     get trainable(): boolean;
     set trainable(value: boolean);
-    saveWeights(map: Map<string, TF.Tensor[]>): void;
-    loadWeights(weights: Map<string, TF.Tensor[]>): void;
+    saveWeights(map: Map<string, Tensor[]>): void;
+    loadWeights(weights: Map<string, Tensor[]>): void;
     private getMLPOutput;
-    call(x: TF.Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
-        output: TF.Tensor;
-        attention?: TF.Tensor;
+    call(x: Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
+        output: Tensor;
+        attention?: Tensor;
         cache?: KVCache;
     };
     dispose(): void;

package/dist/layers/TransformerBlock.js CHANGED Viewed

@@ -1,18 +1,18 @@
-import a from "./CausalSelfAttention.js";
+import h from "./CausalSelfAttention.js";
 import o from "./MLP.js";
 import r from "./RMSNorm.js";
 import p from "./BaseLayer.js";
-class f extends p {
+import { t as d } from "../index-pWA4_lUh.js";
+class g extends p {
   ln1;
   attn;
   ln2;
   mlp;
-  tf;
   index;
   _trainable = !0;
   skipped = !1;
-  constructor(t, i, s, e) {
-    super(), this.tf = t, this.index = i, this.ln1 = new r(t, [s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new a(this.tf, this.index, s, e), this.ln2 = new r(t, [s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.tf, this.index, s);
+  constructor(t, s, i) {
+    super(), this.index = t, this.ln1 = new r([s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new h(this.index, s, i), this.ln2 = new r([s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.index, s);
   }
   setProfiler(t) {
     this._profiler = t, this.attn.setProfiler(t), this.mlp.setProfiler(t), this.ln1.setProfiler(t), this.ln2.setProfiler(t);
@@ -37,17 +37,17 @@ class f extends p {
   loadWeights(t) {
     this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_rms1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_rms2`) || []);
   }
-  getMLPOutput(t, i) {
-    const s = this.ln2.apply(t), e = this.mlp.call(s, i);
+  getMLPOutput(t, s) {
+    const i = this.ln2.apply(t), e = this.mlp.call(i, s);
     return t.add(e);
   }
-  call(t, i = !1, s = !1, e) {
-    return this.tf.tidy(() => {
+  call(t, s = !1, i = !1, e) {
+    return d(() => {
       if (this.skipped)
         return { output: t };
-      const l = this.ln1.apply(t), n = this.attn.call(l, i, s, e), h = t.add(n.output);
+      const l = this.ln1.apply(t), n = this.attn.call(l, s, i, e), a = t.add(n.output);
       return {
-        output: this.getMLPOutput(h, i),
+        output: this.getMLPOutput(a, s),
         attention: n.attention,
         cache: n.presentKV
       };
@@ -58,5 +58,5 @@ class f extends p {
   }
 }
 export {
-  f as default
+  g as default
 };

package/dist/main.d.ts CHANGED Viewed

@@ -1,10 +1,12 @@
 export { default as NanoGPT } from './NanoGPTModel';
 export { default as TeachableLLM } from './TeachableLLM';
 export { default as CharTokeniser } from './tokeniser/CharTokeniser';
+export { default as BPETokeniser } from './tokeniser/bpe';
 export { default as waitForModel } from './utilities/waitForModel';
 export { default as loadTextData } from './data/textLoader';
 export type { ITrainerOptions } from './Trainer';
 export type { IGenerateOptions } from './Generator';
 export type { TrainingLogEntry } from './NanoGPTModel';
+export type { ITokeniser } from './tokeniser/type';
 export type { GPTConfig } from './config';
 export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';

package/dist/main.js CHANGED Viewed

@@ -1,23 +1,37 @@
-import { default as s } from "./NanoGPTModel.js";
-import { default as p } from "./TeachableLLM.js";
-import { default as d } from "./tokeniser/CharTokeniser.js";
-import { default as x } from "./utilities/waitForModel.js";
-import { default as T } from "./data/textLoader.js";
-import { estimateMemoryUsage as M, estimateParameterCount as C, estimateResources as c, estimateTrainingMemoryUsage as h, validateConfig as y } from "./utilities/parameters.js";
-import "./ops/scatterSub.js";
-import "./ops/gatherSub.js";
-import "./ops/attentionMask.js";
-import "./ops/qkv.js";
-import "./ops/rope.js";
+import { default as P } from "./NanoGPTModel.js";
+import { default as h } from "./TeachableLLM.js";
+import { default as y } from "./tokeniser/CharTokeniser.js";
+import { default as U } from "./tokeniser/bpe.js";
+import { default as v } from "./utilities/waitForModel.js";
+import { default as B } from "./data/textLoader.js";
+import { estimateMemoryUsage as E, estimateParameterCount as F, estimateResources as G, estimateTrainingMemoryUsage as N, validateConfig as R } from "./utilities/parameters.js";
+import "./index-pWA4_lUh.js";
+import "./ops/cpu/scatterSub.js";
+import "./ops/webgl/scatterSub.js";
+import "./ops/cpu/gatherSub.js";
+import "./ops/webgl/gatherSub.js";
+import "./ops/cpu/attentionMask.js";
+import "./ops/webgl/attentionMask.js";
+import "./ops/grads/attentionMask.js";
+import "./ops/cpu/qkv.js";
+import "./ops/webgl/qkv.js";
+import "./ops/grads/qkv.js";
+import "@tensorflow/tfjs";
+import "./ops/cpu/rope.js";
+import "./ops/webgl/rope.js";
+import "./ops/grads/rope.js";
+import "./ops/cpu/appendCache.js";
+import "./ops/webgl/appendCache.js";
 export {
-  d as CharTokeniser,
-  s as NanoGPT,
-  p as TeachableLLM,
-  M as estimateMemoryUsage,
-  C as estimateParameterCount,
-  c as estimateResources,
-  h as estimateTrainingMemoryUsage,
-  T as loadTextData,
-  y as validateConfig,
-  x as waitForModel
+  U as BPETokeniser,
+  y as CharTokeniser,
+  P as NanoGPT,
+  h as TeachableLLM,
+  E as estimateMemoryUsage,
+  F as estimateParameterCount,
+  G as estimateResources,
+  N as estimateTrainingMemoryUsage,
+  B as loadTextData,
+  R as validateConfig,
+  v as waitForModel
 };

package/dist/{mat_mul-Bu7bhLms.js → mat_mul-D7_a4KJn.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as m, d as s, f as c, E as M, B as f } from "./index-YPKosni4.js";
+import { o as m, h as s, p as c, E as M, B as p } from "./index-pWA4_lUh.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as m, d as s, f as c, E as M, B as f } from "./index-YPKosni4.js";
  * limitations under the License.
  * =============================================================================
  */
-function p(e, o, n = !1, l = !1) {
+function f(e, o, n = !1, l = !1) {
   let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
   [a, t] = c(a, t);
   const r = { a, b: t }, u = { transposeA: n, transposeB: l };
-  return M.runKernel(f, r, u);
+  return M.runKernel(p, r, u);
 }
-const i = /* @__PURE__ */ m({ matMul_: p });
+const h = /* @__PURE__ */ m({ matMul_: f });
 export {
-  i as m
+  h as m
 };