npm - @genai-fi/nanogpt - Versions diffs - 0.4.2 → 0.4.4 - Mend

@genai-fi/nanogpt 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/dist/Generator.js +3 -3
package/dist/NanoGPTModel.js +73 -76
package/dist/Reshape-CiAY8ltP.js +212 -0
package/dist/TeachableLLM.js +7 -1
package/dist/{TiedEmbedding-CnJ1bx4q.js → TiedEmbedding-DznFwzcB.js} +244 -244
package/dist/{axis_util-BgTGy5w8.js → axis_util-QP0LdI1v.js} +1 -1
package/dist/{concat-CuRsVY-K.js → concat-DvWM7HGZ.js} +1 -1
package/dist/data/parquet.js +9 -6
package/dist/data/textLoader.js +6 -5
package/dist/{dropout-DfDdklfL.js → dropout-DFEXTPV0.js} +4 -4
package/dist/{gather-ZYRWhmXR.js → gather-C5D8PxwA.js} +1 -1
package/dist/gpgpu_math-CUzjlO9A.js +23 -0
package/dist/{index-C4JCoBvj.js → index--6vO-cOz.js} +87 -87
package/dist/{kernel_funcs_utils-CAd1h9X1.js → kernel_funcs_utils-C6YBCuOt.js} +72 -91
package/dist/layers/CausalSelfAttention.js +44 -44
package/dist/layers/MLP.js +31 -33
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +3 -3
package/dist/layers/TiedEmbedding.js +5 -5
package/dist/layers/TransformerBlock.js +1 -1
package/dist/{log_sum_exp-BswFnwOb.js → log_sum_exp-CiEy1aUe.js} +7 -7
package/dist/main.js +25 -19
package/dist/{mat_mul-415y5Qn2.js → mat_mul-BEHRPMh0.js} +1 -1
package/dist/{max-CP_9O2Yd.js → max-BUShNgfh.js} +1 -1
package/dist/{moments-CjeIaVdp.js → moments-DYOHXoRV.js} +5 -5
package/dist/{norm-CZM380I3.js → norm-DSva3hI3.js} +13 -13
package/dist/{ones-Bf3YR48P.js → ones-D6kB8bdY.js} +2 -2
package/dist/ops/appendCache.d.ts +1 -1
package/dist/ops/appendCache.js +10 -4
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/cpu/appendCache.d.ts +1 -2
package/dist/ops/cpu/appendCache.js +15 -20
package/dist/ops/cpu/attentionMask.js +10 -10
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +4 -4
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMulGelu.d.ts +1 -0
package/dist/ops/cpu/matMulGelu.js +40 -0
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +4 -4
package/dist/ops/fusedSoftmax.js +1 -1
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/grads/attentionMask.js +1 -1
package/dist/ops/grads/fusedSoftmax.js +2 -2
package/dist/ops/grads/gelu.js +24 -3
package/dist/ops/grads/matMulGelu.d.ts +1 -0
package/dist/ops/grads/matMulGelu.js +17 -0
package/dist/ops/grads/qkv.js +1 -1
package/dist/ops/grads/rope.js +1 -1
package/dist/ops/matMulGelu.d.ts +3 -0
package/dist/ops/matMulGelu.js +14 -0
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/node/sparseCrossEntropy.js +1 -1
package/dist/ops/qkv.js +1 -1
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/webgl/appendCache.js +14 -13
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/fusedSoftmax.js +689 -895
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/matMulGelu.d.ts +20 -0
package/dist/ops/webgl/matMulGelu.js +166 -0
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/{range-9AzeApCc.js → range-C_vpUjBu.js} +1 -1
package/dist/{reshape-Boe4DuIO.js → reshape-z51Eu-re.js} +1 -1
package/dist/{sin-KmhiDuMa.js → sin-H567uayl.js} +1 -1
package/dist/{slice_util-19zDNNSn.js → slice_util-BdhYwFY_.js} +2 -2
package/dist/{softmax-Cujsg4ay.js → softmax-Dsxflvdl.js} +1 -1
package/dist/{split-DbcNm1-i.js → split-B_k_jwud.js} +1 -1
package/dist/{stack-D1YjmgKN.js → stack-CmqSdsfs.js} +1 -1
package/dist/{sum-R28pucR5.js → sum-DdkDf2MG.js} +1 -1
package/dist/{tensor-BVeHdl7V.js → tensor-BGYi41cj.js} +1 -1
package/dist/{tensor2d-DqFGNs_K.js → tensor2d-DUr_htjt.js} +1 -1
package/dist/{tfjs_backend-Cug-PH75.js → tfjs_backend-DuKis_xG.js} +46 -46
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +18 -18
package/dist/training/FullTrainer.js +1 -1
package/dist/training/Trainer.js +5 -5
package/dist/training/sparseCrossEntropy.js +4 -4
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/generate.js +3 -3
package/dist/utilities/load.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/weights.js +2 -2
package/dist/{variable-LJT9Ld63.js → variable-BJTZ3jOy.js} +1 -1
package/dist/{zeros-dnQxFgAD.js → zeros-8xl-W2DC.js} +1 -1
package/package.json +1 -1
package/dist/gelu-CnCt17Lk.js +0 -26

package/dist/ops/webgl/gatherSub.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as l } from "../../index-C4JCoBvj.js";
+import { r as l } from "../../index--6vO-cOz.js";
 class u {
   variableNames = ["labels", "logits", "values"];
   outputShape;

package/dist/ops/webgl/gelu.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { r as a } from "../../index-C4JCoBvj.js";
-import { u as s, C as x } from "../../kernel_funcs_utils-CAd1h9X1.js";
+import { r as a } from "../../index--6vO-cOz.js";
+import { u as s, C as x } from "../../kernel_funcs_utils-C6YBCuOt.js";
 const t = 0.7978845608028654, r = 0.044715, c = x + `
     float x3 = x * x * x;
     float inner = x + ${r} * x3;

package/dist/ops/webgl/matMulGelu.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import { TensorInfo } from '@tensorflow/tfjs-core';
+import { MathBackendWebGL } from '@tensorflow/tfjs-backend-webgl';
+export declare const MATMUL_SHARED_DIM_THRESHOLD = 1000;
+type BatchMatMulConfig = {
+    a: TensorInfo;
+    b: TensorInfo;
+    transposeA: boolean;
+    transposeB: boolean;
+    backend: MathBackendWebGL;
+    activationSnippet: string;
+};
+export declare function batchMatMulGeluImpl({ a, b, transposeA, transposeB, backend, activationSnippet, }: BatchMatMulConfig): TensorInfo;
+export declare function batchMatMulKernel(args: {
+    inputs: {
+        x: TensorInfo;
+        kernel: TensorInfo;
+    };
+    backend: MathBackendWebGL;
+}): TensorInfo;
+export {};

package/dist/ops/webgl/matMulGelu.js ADDED Viewed

@@ -0,0 +1,166 @@
+import { r as G, t as P, e as R, b as I, n as k, O as L, j as F, Q as U } from "../../index--6vO-cOz.js";
+import { r as g } from "../../Reshape-CiAY8ltP.js";
+import { u as H } from "../../gpgpu_math-CUzjlO9A.js";
+import { m as z } from "../../mat_mul-BEHRPMh0.js";
+/**
+ * @license
+ * Copyright 2018 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+class W {
+  constructor(e, s, a, n = !1, c = !1, o = !1, r = null, i = !1, u = !1) {
+    this.variableNames = ["matrixA", "matrixB"], this.packedInputs = !0, this.packedOutput = !0, this.outputShape = a, this.enableShapeUniforms = H(this.outputShape.length);
+    const p = n ? e[1] : e[2], l = Math.ceil(p / 2), b = n ? "i * 2, rc.y" : "rc.y, i * 2", M = c ? "rc.z, i * 2" : "i * 2, rc.z", h = n ? ["a.xxyy", "a.zzww"] : ["a.xxzz", "a.yyww"], d = c ? ["b.xzxz", "b.ywyw"] : ["b.xyxy", "b.zwzw"];
+    let m = "", v = "";
+    r && (i ? m = `vec4 activation(vec4 a) {
+          vec4 b = getPreluActivationWeightsAtOutCoords();
+          ${r}
+        }` : u ? m = `vec4 activation(vec4 a) {
+          vec4 b = getLeakyreluAlphaAtOutCoords();
+          ${r}
+        }` : m = `vec4 activation(vec4 x) {
+          ${r}
+        }`, v = "result = activation(result);");
+    const $ = o ? "result += getBiasAtOutCoords();" : "";
+    o && this.variableNames.push("bias"), i && this.variableNames.push("preluActivationWeights"), u && this.variableNames.push("leakyreluAlpha");
+    let f = "rc.x", x = "rc.x";
+    e[0] < s[0] ? f = `imod(rc.x, ${e[0]})` : s[0] < e[0] && (x = `imod(rc.x, ${s[0]})`), this.userCode = `
+      ${m}
+      // Don't use uniform for sharedDimensionPacked for performance.
+      const float sharedDimension = ${l}.0;
+      vec4 dot2x2ARowBCol(ivec3 rc) {
+        vec4 result = vec4(0);
+        int batchA = ${f};
+        int batchB = ${x};
+        for (int i = 0; i < ${l}; i++) {
+          vec4 a = getMatrixA(batchA, ${b});
+          vec4 b = getMatrixB(batchB, ${M});
+          // These swizzled products need to be separately added.
+          // See: https://github.com/tensorflow/tfjs/issues/1735
+          result += (${h[0]} * ${d[0]});
+          result += (${h[1]} * ${d[1]});
+        }
+        return result;
+      }
+      void main() {
+        ivec3 rc = getOutputCoords();
+        vec4 result = dot2x2ARowBCol(rc);
+        ${$}
+        ${v}
+        setOutput(result);
+      }
+    `;
+  }
+}
+const S = 0.7978845608028654, w = 0.044715, j = `
+    vec4 x3 = x * x * x;
+    vec4 inner = x + ${w} * x3;
+    inner = ${S} * inner;
+    inner = tanh(inner);
+    inner = 0.5 * (1.0 + inner);
+    vec4 result = x * inner;
+    return result;
+`, q = `
+    vec4 x2 = x * x;
+    vec4 x3 = x2 * x;
+    vec4 u  = ${S} * (x + ${w} * x3);
+    vec4 t  = tanh(u);
+    vec4 sech2 = 1.0 - t * t;
+    vec4 du_dx = ${S} * (1.0 + 3.0 * ${w} * x2);
+    vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * x * sech2 * du_dx;
+    return dgelu;
+`, se = 1e3;
+function B({
+  a: t,
+  b: e,
+  transposeA: s,
+  transposeB: a,
+  backend: n,
+  activationSnippet: c
+}) {
+  const o = t.shape.length, r = e.shape.length, i = s ? t.shape[o - 2] : t.shape[o - 1], u = a ? e.shape[r - 1] : e.shape[r - 2], p = s ? t.shape[o - 1] : t.shape[o - 2], l = a ? e.shape[r - 2] : e.shape[r - 1], b = t.shape.slice(0, -2), M = e.shape.slice(0, -2), h = k(b), d = k(M), v = L(t.shape.slice(0, -2), e.shape.slice(0, -2)).concat([p, l]);
+  F(
+    i === u,
+    () => `Error in matMul: inner shapes (${i}) and (${u}) of Tensors with shapes ${t.shape} and ${e.shape} and transposeA=${s} and transposeB=${a} must match.`
+  );
+  const $ = s ? [h, i, p] : [h, p, i], f = a ? [d, l, u] : [d, u, l], x = g({ inputs: { x: t }, backend: n, attrs: { shape: $ } }), A = g({ inputs: { x: e }, backend: n, attrs: { shape: f } }), y = [x, A], C = Math.max(h, d), O = c, E = U(t.dtype, e.dtype), N = new W(
+    $,
+    f,
+    [C, p, l],
+    s,
+    a,
+    !1,
+    O,
+    !1,
+    !1
+  ), T = [x, A], D = n.runWebGLProgram(N, T, E), _ = g({ inputs: { x: D }, backend: n, attrs: { shape: v } });
+  y.push(D);
+  for (const K of y)
+    n.disposeIntermediateTensorInfo(K);
+  return _;
+}
+function Q(t) {
+  const { inputs: e, backend: s } = t, { x: a, kernel: n } = e;
+  if (a === void 0 || n === void 0)
+    throw new Error("BatchMatMul requires two input tensors.");
+  return B({
+    a,
+    b: n,
+    transposeA: !1,
+    transposeB: !1,
+    backend: s,
+    activationSnippet: j
+  });
+}
+const J = {
+  kernelName: "MatMulGelu",
+  backendName: "webgl",
+  kernelFunc: Q
+};
+G(J);
+function V(t) {
+  const { dy: e, x: s, kernel: a } = t.inputs, n = t.backend;
+  return P(() => {
+    const c = R().makeTensorFromTensorInfo(
+      B({
+        a: s,
+        b: a,
+        transposeA: !1,
+        transposeB: !1,
+        backend: n,
+        activationSnippet: q
+      })
+    ), o = I(e, c);
+    c.dispose();
+    const r = z(o, a, !1, !0), i = z(s, o, !0, !1);
+    return [r, i];
+  });
+}
+const X = {
+  kernelName: "MatMulGeluGrad",
+  backendName: "webgl",
+  kernelFunc: V
+};
+G(X);
+export {
+  se as MATMUL_SHARED_DIM_THRESHOLD,
+  B as batchMatMulGeluImpl,
+  Q as batchMatMulKernel
+};

package/dist/ops/webgl/mulDropout.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as m } from "../../index-C4JCoBvj.js";
+import { r as m } from "../../index--6vO-cOz.js";
 class f {
   variableNames = ["a", "b"];
   outputShape;

package/dist/ops/webgl/qkv.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as i } from "../../index-C4JCoBvj.js";
+import { r as i } from "../../index--6vO-cOz.js";
 class l {
   variableNames = ["x", "kernel"];
   outputShape;

package/dist/ops/webgl/rope.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as u } from "../../index-C4JCoBvj.js";
+import { r as u } from "../../index--6vO-cOz.js";
 class l {
   variableNames = ["x", "sin", "cos"];
   outputShape;

package/dist/ops/webgl/scatterSub.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { r as i } from "../../index-C4JCoBvj.js";
+import { r as i } from "../../index--6vO-cOz.js";
 class u {
   variableNames = ["labels", "softmaxProbs", "dy"];
   outputShape;

package/dist/{range-9AzeApCc.js → range-C_vpUjBu.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { E as e, R as f } from "./index-C4JCoBvj.js";
+import { E as e, R as f } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{reshape-Boe4DuIO.js → reshape-z51Eu-re.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, h as t, E as a, q as p } from "./index-C4JCoBvj.js";
+import { o, h as t, E as a, q as p } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{sin-KmhiDuMa.js → sin-H567uayl.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o, h as t, E as c, Q as a, T as e } from "./index-C4JCoBvj.js";
+import { o, h as t, E as c, V as a, W as e } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{slice_util-19zDNNSn.js → slice_util-BdhYwFY_.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { o as u, h as p, k as g, w as m, E as w, a1 as x, j as i } from "./index-C4JCoBvj.js";
-import { r as y } from "./reshape-Boe4DuIO.js";
+import { o as u, h as p, k as g, w as m, E as w, a4 as x, j as i } from "./index--6vO-cOz.js";
+import { r as y } from "./reshape-z51Eu-re.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{softmax-Cujsg4ay.js → softmax-Dsxflvdl.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as r, h as f, E as e, S as i } from "./index-C4JCoBvj.js";
+import { o as r, h as f, E as e, S as i } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{split-DbcNm1-i.js → split-B_k_jwud.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as p, h as i, E as a, u as c } from "./index-C4JCoBvj.js";
+import { o as p, h as i, E as a, u as c } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{stack-D1YjmgKN.js → stack-CmqSdsfs.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, i as c, j as n, E as i, P as k } from "./index-C4JCoBvj.js";
+import { o as e, i as c, j as n, E as i, P as k } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.

package/dist/{sum-R28pucR5.js → sum-DdkDf2MG.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { o as e, h as u, x as c, E as l, y as m } from "./index-C4JCoBvj.js";
+import { o as e, h as u, x as c, E as l, y as m } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tensor-BVeHdl7V.js → tensor-BGYi41cj.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { J as t, K as a } from "./index-C4JCoBvj.js";
+import { J as t, K as a } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tensor2d-DqFGNs_K.js → tensor2d-DUr_htjt.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { I as t, J as s, K as a } from "./index-C4JCoBvj.js";
+import { I as t, J as s, K as a } from "./index--6vO-cOz.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.

package/dist/{tfjs_backend-Cug-PH75.js → tfjs_backend-DuKis_xG.js} RENAMED Viewed

@@ -1,18 +1,18 @@
-import { o as f, h as l, E as d, an as nn, ao as tn, j as p, ap as sn, D as Ve, aq as rn, al as P, ar as on, as as an, at as cn, au as un, a1 as ln, aa as pn, p as Y, av as fn, aw as hn, ax as dn, ay as mn, az as gn, aA as $n, aB as bn, aC as kn, x as C, aD as xn, aE as wn, aF as An, aG as yn, aH as _n, aI as In, aJ as En, aK as Sn, I as Tn, J as Nn, K as Dn, aL as Mn, t as K, aM as he, b as I, aN as On, a5 as U, n as Me, c as Oe, aO as Be, aP as Bn, aQ as Pn, aR as Kn, aS as Rn, aT as qn, aU as jn, f as Fn, aV as Gn, aW as Ln, ad as L, s as q, aX as Vn, w as de, a as Cn, ai as Pe, aY as vn, $ as zn } from "./index-C4JCoBvj.js";
-import { r as $ } from "./reshape-Boe4DuIO.js";
-import { s as Ce } from "./split-DbcNm1-i.js";
-import { s as G } from "./sum-R28pucR5.js";
-import { b as me } from "./slice_util-19zDNNSn.js";
-import { r as ue } from "./range-9AzeApCc.js";
-import { t as Wn } from "./tensor-BVeHdl7V.js";
-import { s as le } from "./stack-D1YjmgKN.js";
-import { c as Jn, z as Yn } from "./zeros-dnQxFgAD.js";
-import { n as ve } from "./norm-CZM380I3.js";
-import { c as V } from "./concat-CuRsVY-K.js";
-import { m as y } from "./mat_mul-415y5Qn2.js";
-import { t as ge } from "./tensor2d-DqFGNs_K.js";
-import { r as Un, d as Xn } from "./dropout-DfDdklfL.js";
-import { g as Zn } from "./gather-ZYRWhmXR.js";
+import { o as f, h as l, E as d, ap as nn, aq as tn, j as p, ar as sn, D as Ve, as as rn, O as P, at as on, au as an, av as cn, aw as un, a4 as ln, ag as pn, p as Y, ax as fn, ay as hn, az as dn, aA as mn, aB as gn, aC as $n, aD as bn, aE as kn, x as C, aF as xn, aG as wn, aH as An, aI as yn, aJ as _n, aK as In, aL as En, aM as Sn, I as Tn, J as Nn, K as Dn, aN as Mn, t as K, aO as he, b as I, aP as On, a8 as U, n as Me, c as Oe, aQ as Be, aR as Bn, aS as Pn, aT as Kn, aU as Rn, aV as qn, aW as jn, f as Fn, aX as Gn, aY as Ln, ai as L, s as q, aZ as Vn, w as de, a as Cn, am as Pe, a_ as vn, a2 as zn } from "./index--6vO-cOz.js";
+import { r as $ } from "./reshape-z51Eu-re.js";
+import { s as Ce } from "./split-B_k_jwud.js";
+import { s as G } from "./sum-DdkDf2MG.js";
+import { b as me } from "./slice_util-BdhYwFY_.js";
+import { r as ue } from "./range-C_vpUjBu.js";
+import { t as Wn } from "./tensor-BGYi41cj.js";
+import { s as le } from "./stack-CmqSdsfs.js";
+import { c as Jn, z as Yn } from "./zeros-8xl-W2DC.js";
+import { n as ve } from "./norm-DSva3hI3.js";
+import { c as V } from "./concat-DvWM7HGZ.js";
+import { m as y } from "./mat_mul-BEHRPMh0.js";
+import { t as ge } from "./tensor2d-DUr_htjt.js";
+import { r as Un, d as Zn } from "./dropout-DFEXTPV0.js";
+import { g as Xn } from "./gather-C5D8PxwA.js";
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -708,7 +708,7 @@ function Ut(e, n = 0) {
   const s = { x: l(e, "x", "step") }, r = { alpha: n };
   return d.runKernel(Sn, s, r);
 }
-const Xt = /* @__PURE__ */ f({ step_: Ut });
+const Zt = /* @__PURE__ */ f({ step_: Ut });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -748,13 +748,13 @@ function B(e, n) {
  * limitations under the License.
  * =============================================================================
  */
-function Zt(e, n = 0) {
+function Xt(e, n = 0) {
   const t = l(e, "x", "unstack", "string_or_numeric");
   p(n >= -t.shape.length && n < t.shape.length, () => `Axis = ${n} is not in [-${t.shape.length}, ${t.shape.length})`);
   const s = { value: t }, r = { axis: n };
   return d.runKernel(Mn, s, r);
 }
-const Ye = /* @__PURE__ */ f({ unstack_: Zt });
+const Ye = /* @__PURE__ */ f({ unstack_: Xt });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -804,7 +804,7 @@ function Qt(e, n, t) {
   if (t == null || t === "linear")
     return e;
   if (t === "relu")
-    return I(e, Xt(n));
+    return I(e, Zt(n));
   throw new Error(`Cannot compute gradient for fused activation ${t}.`);
 }
 function es(e, n) {
@@ -862,13 +862,13 @@ function ss({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activ
   a != null && (te = l(a, "prelu weights", "fused matMul"));
   const v = (z, se) => {
     const [j, F, W, ce] = se, R = Qt($(z, W.shape), W, o);
-    let Z, H;
-    if (!t && !s ? (Z = y(R, F, !1, !0), H = y(j, R, !0, !1)) : !t && s ? (Z = y(R, F, !1, !1), H = y(R, j, !0, !1)) : t && !s ? (Z = y(F, R, !1, !0), H = y(j, R, !1, !1)) : (Z = y(F, R, !0, !0), H = y(R, j, !0, !0)), r != null) {
+    let X, H;
+    if (!t && !s ? (X = y(R, F, !1, !0), H = y(j, R, !0, !1)) : !t && s ? (X = y(R, F, !1, !1), H = y(R, j, !0, !1)) : t && !s ? (X = y(F, R, !1, !0), H = y(j, R, !1, !1)) : (X = y(F, R, !0, !0), H = y(R, j, !0, !0)), r != null) {
       const en = es(ce, R);
-      return [Z, H, en];
+      return [X, H, en];
     } else
-      return [Z, H];
-  }, X = {
+      return [X, H];
+  }, Z = {
     a: M,
     b: O,
     bias: N,
@@ -877,13 +877,13 @@ function ss({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activ
   return r == null ? Oe((se, j, F) => {
     const W = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      d.runKernel(Be, X, De)
+      d.runKernel(Be, Z, De)
     );
     return F([se, j, W]), { value: $(W, T), gradFunc: v };
   })(M, O) : Oe((se, j, F, W) => {
     const ce = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      d.runKernel(Be, X, De)
+      d.runKernel(Be, Z, De)
     );
     return W([se, j, ce, F]), { value: $(ce, T), gradFunc: v };
   })(M, O, N);
@@ -1575,15 +1575,15 @@ function Fe(e, n = !1) {
         if (c === 0)
           o = q(M, y(O, y(N, M)));
         else {
-          const X = q(M, y(O, y(N, M)));
-          o = V([_(o, [0, 0], [c, s]), X], 0);
+          const Z = q(M, y(O, y(N, M)));
+          o = V([_(o, [0, 0], [c, s]), Z], 0);
         }
         const te = xe(O), v = _(r, [0, c], [t, r.shape[1] - c]);
         if (c === 0)
           r = q(v, y(y(v, i), te));
         else {
-          const X = q(v, y(y(v, i), te));
-          r = V([_(r, [0, 0], [t, c]), X], 1);
+          const Z = q(v, y(y(v, i), te));
+          r = V([_(r, [0, 0], [t, c]), Z], 1);
         }
         return [i, o, r];
       }), Cn([m, h, b]);
@@ -1638,7 +1638,7 @@ const Ar = {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-const Xs = ["channelsFirst", "channelsLast"], Zs = ["nearest", "bilinear"], Hs = ["valid", "same", "causal"], Qs = ["max", "avg"], _r = ["sum", "mul", "concat", "ave"];
+const Zs = ["channelsFirst", "channelsLast"], Xs = ["nearest", "bilinear"], Hs = ["valid", "same", "causal"], Qs = ["max", "avg"], _r = ["sum", "mul", "concat", "ave"];
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -1653,9 +1653,9 @@ class Ue extends Error {
     super(n), Object.setPrototypeOf(this, Ue.prototype);
   }
 }
-class Xe extends Error {
+class Ze extends Error {
   constructor(n) {
-    super(n), Object.setPrototypeOf(this, Xe.prototype);
+    super(n), Object.setPrototypeOf(this, Ze.prototype);
   }
 }
 class A extends Error {
@@ -1812,10 +1812,10 @@ function Rr(e, n, t = 0, s = 1 / 0) {
   return Ge(t >= 0), Ge(s >= t), Array.isArray(e) && e.length >= t && e.length <= s && e.every((r) => typeof r === n);
 }
 function nr(e, n) {
-  Array.isArray(e) ? (p(e.length > 0, () => `${n} is unexpectedly an empty array.`), e.forEach((t, s) => nr(t, `element ${s + 1} of ${n}`))) : p(Number.isInteger(e) && e > 0, () => `Expected ${n} to be a positive integer, but got ${Ze(e)}.`);
+  Array.isArray(e) ? (p(e.length > 0, () => `${n} is unexpectedly an empty array.`), e.forEach((t, s) => nr(t, `element ${s + 1} of ${n}`))) : p(Number.isInteger(e) && e > 0, () => `Expected ${n} to be a positive integer, but got ${Xe(e)}.`);
 }
-function Ze(e) {
-  return e === null ? "null" : Array.isArray(e) ? "[" + e.map((n) => Ze(n)).join(",") + "]" : typeof e == "string" ? `"${e}"` : `${e}`;
+function Xe(e) {
+  return e === null ? "null" : Array.isArray(e) ? "[" + e.map((n) => Xe(n)).join(",") + "]" : typeof e == "string" ? `"${e}"` : `${e}`;
 }
 function qr(e, n, t) {
   let s = t != null ? t() : Pe(), r;
@@ -1838,10 +1838,10 @@ function jr(e) {
  */
 const Q = /* @__PURE__ */ new Map();
 function tr(e) {
-  fe(Xs, "DataFormat", e);
+  fe(Zs, "DataFormat", e);
 }
 function Fr(e) {
-  fe(Zs, "InterpolationFormat", e);
+  fe(Xs, "InterpolationFormat", e);
 }
 function Gr(e) {
   fe(Hs, "PaddingMode", e);
@@ -1954,14 +1954,14 @@ function Ae() {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function Xr(e, n) {
+function Zr(e, n) {
   return C(e, n);
 }
 function or(e, n = -1) {
   const t = e.shape.slice();
   return n < 0 && (n = t.length + n + 1), t.splice(n, 0, 1), $(e, t);
 }
-function Zr(e, n) {
+function Xr(e, n) {
   return K(() => {
     if (e.shape.length !== 2)
       throw new A(`repeat() expects a rank-2 tensor, but received a rank-${e.shape.length} tensor.`);
@@ -2131,7 +2131,7 @@ function ro(e, n, t, s) {
   }
 }
 function oo(e, n, t) {
-  return K(() => (Array.isArray(n) ? n = B(n, "int32") : n = C(n, "int32"), Zn(e, n, t)));
+  return K(() => (Array.isArray(n) ? n = B(n, "int32") : n = C(n, "int32"), Xn(e, n, t)));
 }
 function ao(e) {
   return I(e, e);
@@ -2171,7 +2171,7 @@ function uo(e) {
   return K(() => L(e, U(zn(e), 1)));
 }
 function lo(e, n, t, s) {
-  return K(() => Xn(e, n, t, s));
+  return K(() => Zn(e, n, t, s));
 }
 function po(e) {
   return K(() => {
@@ -2200,8 +2200,8 @@ export {
   ie as N,
   ao as O,
   Ge as P,
-  Xr as Q,
-  Xe as R,
+  Zr as Q,
+  Ze as R,
   Yt as S,
   Dr as T,
   Pr as U,
@@ -2240,7 +2240,7 @@ export {
   lo as ap,
   no as aq,
   Qr as ar,
-  Zr as as,
+  Xr as as,
   Lr as at,
   _r as au,
   es as b,
@@ -2258,7 +2258,7 @@ export {
   Sr as n,
   Tr as o,
   Vr as p,
-  Xt as q,
+  Zt as q,
   so as r,
   ts as s,
   Nr as t,

package/dist/training/AdamExt.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as r, b as c, f as h, s as g, e as o } from "../index-C4JCoBvj.js";
+import { A as r, b as c, f as h, s as g, e as o } from "../index--6vO-cOz.js";
 class u extends r {
   constructor(t, e, s, a, i) {
     super(t, e, s, a), this.config = i, this.startLearningRate = t;

package/dist/training/DatasetBuilder.js CHANGED Viewed

@@ -1,7 +1,7 @@
-import { ae as $, ac as m, af as M, a as R, ag as f, ah as v, ai as z, j as _, t as x } from "../index-C4JCoBvj.js";
+import { aj as $, ah as d, L as M, a as R, ak as f, al as v, am as z, j as _, t as x } from "../index--6vO-cOz.js";
 import { s as E } from "../index-C4L8Cm77.js";
-import { s as P } from "../stack-D1YjmgKN.js";
-import { t as D } from "../tensor-BVeHdl7V.js";
+import { s as P } from "../stack-CmqSdsfs.js";
+import { t as D } from "../tensor-BGYi41cj.js";
 import "../index-Tf7vU29b.js";
 /**
  * @license
@@ -82,10 +82,10 @@ function p(s) {
     const { StringDecoder: e } = require("string_decoder");
     t = s instanceof e;
   }
-  return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof m) && !(s instanceof Promise) && !t);
+  return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof d) && !(s instanceof Promise) && !t);
 }
 function H(s) {
-  return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof m || $(s);
+  return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof d || $(s);
 }
 function q(s) {
   return s === null || typeof s != "object" && typeof s != "function";
@@ -111,7 +111,7 @@ function Q(s) {
   return L(s, G);
 }
 function G(s) {
-  return s instanceof m ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
+  return s instanceof d ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
 }
 /**
  * @license
@@ -477,7 +477,7 @@ class i {
    *   of the original element type.
    */
   rowMajorBatch(t, e = !0) {
-    return new j(this, t, e);
+    return new K(this, t, e);
   }
   /**
    * Groups elements into batches, represented in column-major form.
@@ -535,7 +535,7 @@ class i {
    *   unaltered.
    */
   take(t) {
-    return t < 0 || t == null ? this : new K(this, t);
+    return t < 0 || t == null ? this : new j(this, t);
   }
   /**
    * Skips the first `count` items in this stream.
@@ -641,7 +641,7 @@ class X extends i {
     return this.upstream.next();
   }
 }
-class K extends i {
+class j extends i {
   constructor(t, e) {
     super(), this.upstream = t, this.maxCount = e, this.count = 0;
   }
@@ -652,7 +652,7 @@ class K extends i {
     return this.count++ >= this.maxCount ? { value: null, done: !0 } : this.upstream.next();
   }
 }
-class j extends i {
+class K extends i {
   constructor(t, e, r = !0) {
     super(), this.upstream = t, this.batchSize = e, this.enableSmallLastBatch = r, this.lastRead = Promise.resolve({ value: null, done: !1 });
   }
@@ -1219,7 +1219,7 @@ function at(s) {
 function it(s) {
   if (s.length === 0)
     throw new Error("Can't make a batch of zero elements.");
-  return s[0] instanceof m ? P(s) : D(s);
+  return s[0] instanceof d ? P(s) : D(s);
 }
 /**
  * @license
@@ -1244,7 +1244,7 @@ function ut(s) {
     return k(() => t.next());
   });
 }
-class dt {
+class mt {
   tokenizer;
   blockSize;
   constructor(t, e = 128) {
@@ -1257,20 +1257,20 @@ class dt {
       n === 1 ? void 0 : Math.floor(n * h.length)
     ), w = (function* () {
       for (; ; ) {
-        const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), d = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
-        yield { xs: d, ys: B };
+        const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), m = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
+        yield { xs: m, ys: B };
       }
     }).bind(this);
     return ut(w).batch(e).map((u) => {
-      const d = u;
+      const m = u;
       return x(() => ({
-        xs: d.xs.cast("int32"),
-        ys: d.ys.cast("int32")
+        xs: m.xs.cast("int32"),
+        ys: m.ys.cast("int32")
         // this.tf.oneHot(batchData.ys.cast('int32'), this.tokenizer.vocabSize),
       }));
     }).prefetch(2);
   }
 }
 export {
-  dt as DatasetBuilder
+  mt as DatasetBuilder
 };