@genai-fi/nanogpt 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/dist/Generator.js +3 -3
  2. package/dist/NanoGPTModel.js +73 -76
  3. package/dist/Reshape-CiAY8ltP.js +212 -0
  4. package/dist/TeachableLLM.js +7 -1
  5. package/dist/{TiedEmbedding-CnJ1bx4q.js → TiedEmbedding-DznFwzcB.js} +244 -244
  6. package/dist/{axis_util-BgTGy5w8.js → axis_util-QP0LdI1v.js} +1 -1
  7. package/dist/{concat-CuRsVY-K.js → concat-DvWM7HGZ.js} +1 -1
  8. package/dist/data/parquet.js +9 -6
  9. package/dist/data/textLoader.js +6 -5
  10. package/dist/{dropout-DfDdklfL.js → dropout-DFEXTPV0.js} +4 -4
  11. package/dist/{gather-ZYRWhmXR.js → gather-C5D8PxwA.js} +1 -1
  12. package/dist/gpgpu_math-CUzjlO9A.js +23 -0
  13. package/dist/{index-C4JCoBvj.js → index--6vO-cOz.js} +87 -87
  14. package/dist/{kernel_funcs_utils-CAd1h9X1.js → kernel_funcs_utils-C6YBCuOt.js} +72 -91
  15. package/dist/layers/CausalSelfAttention.js +44 -44
  16. package/dist/layers/MLP.js +31 -33
  17. package/dist/layers/RMSNorm.js +3 -3
  18. package/dist/layers/RoPECache.js +3 -3
  19. package/dist/layers/TiedEmbedding.js +5 -5
  20. package/dist/layers/TransformerBlock.js +1 -1
  21. package/dist/{log_sum_exp-BswFnwOb.js → log_sum_exp-CiEy1aUe.js} +7 -7
  22. package/dist/main.js +25 -19
  23. package/dist/{mat_mul-415y5Qn2.js → mat_mul-BEHRPMh0.js} +1 -1
  24. package/dist/{max-CP_9O2Yd.js → max-BUShNgfh.js} +1 -1
  25. package/dist/{moments-CjeIaVdp.js → moments-DYOHXoRV.js} +5 -5
  26. package/dist/{norm-CZM380I3.js → norm-DSva3hI3.js} +13 -13
  27. package/dist/{ones-Bf3YR48P.js → ones-D6kB8bdY.js} +2 -2
  28. package/dist/ops/appendCache.d.ts +1 -1
  29. package/dist/ops/appendCache.js +10 -4
  30. package/dist/ops/attentionMask.js +1 -1
  31. package/dist/ops/cpu/appendCache.d.ts +1 -2
  32. package/dist/ops/cpu/appendCache.js +15 -20
  33. package/dist/ops/cpu/attentionMask.js +10 -10
  34. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  35. package/dist/ops/cpu/gatherSub.js +4 -4
  36. package/dist/ops/cpu/gelu.js +1 -1
  37. package/dist/ops/cpu/matMulGelu.d.ts +1 -0
  38. package/dist/ops/cpu/matMulGelu.js +40 -0
  39. package/dist/ops/cpu/mulDropout.js +1 -1
  40. package/dist/ops/cpu/qkv.js +3 -3
  41. package/dist/ops/cpu/rope.js +5 -5
  42. package/dist/ops/cpu/scatterSub.js +4 -4
  43. package/dist/ops/fusedSoftmax.js +1 -1
  44. package/dist/ops/gatherSub.js +1 -1
  45. package/dist/ops/gelu.js +2 -2
  46. package/dist/ops/grads/attentionMask.js +1 -1
  47. package/dist/ops/grads/fusedSoftmax.js +2 -2
  48. package/dist/ops/grads/gelu.js +24 -3
  49. package/dist/ops/grads/matMulGelu.d.ts +1 -0
  50. package/dist/ops/grads/matMulGelu.js +17 -0
  51. package/dist/ops/grads/qkv.js +1 -1
  52. package/dist/ops/grads/rope.js +1 -1
  53. package/dist/ops/matMulGelu.d.ts +3 -0
  54. package/dist/ops/matMulGelu.js +14 -0
  55. package/dist/ops/mulDrop.js +1 -1
  56. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  57. package/dist/ops/qkv.js +1 -1
  58. package/dist/ops/scatterSub.js +1 -1
  59. package/dist/ops/webgl/appendCache.js +14 -13
  60. package/dist/ops/webgl/attentionMask.js +1 -1
  61. package/dist/ops/webgl/fusedSoftmax.js +689 -895
  62. package/dist/ops/webgl/gatherSub.js +1 -1
  63. package/dist/ops/webgl/gelu.js +2 -2
  64. package/dist/ops/webgl/matMulGelu.d.ts +20 -0
  65. package/dist/ops/webgl/matMulGelu.js +166 -0
  66. package/dist/ops/webgl/mulDropout.js +1 -1
  67. package/dist/ops/webgl/qkv.js +1 -1
  68. package/dist/ops/webgl/rope.js +1 -1
  69. package/dist/ops/webgl/scatterSub.js +1 -1
  70. package/dist/{range-9AzeApCc.js → range-C_vpUjBu.js} +1 -1
  71. package/dist/{reshape-Boe4DuIO.js → reshape-z51Eu-re.js} +1 -1
  72. package/dist/{sin-KmhiDuMa.js → sin-H567uayl.js} +1 -1
  73. package/dist/{slice_util-19zDNNSn.js → slice_util-BdhYwFY_.js} +2 -2
  74. package/dist/{softmax-Cujsg4ay.js → softmax-Dsxflvdl.js} +1 -1
  75. package/dist/{split-DbcNm1-i.js → split-B_k_jwud.js} +1 -1
  76. package/dist/{stack-D1YjmgKN.js → stack-CmqSdsfs.js} +1 -1
  77. package/dist/{sum-R28pucR5.js → sum-DdkDf2MG.js} +1 -1
  78. package/dist/{tensor-BVeHdl7V.js → tensor-BGYi41cj.js} +1 -1
  79. package/dist/{tensor2d-DqFGNs_K.js → tensor2d-DUr_htjt.js} +1 -1
  80. package/dist/{tfjs_backend-Cug-PH75.js → tfjs_backend-DuKis_xG.js} +46 -46
  81. package/dist/training/AdamExt.js +1 -1
  82. package/dist/training/DatasetBuilder.js +18 -18
  83. package/dist/training/FullTrainer.js +1 -1
  84. package/dist/training/Trainer.js +5 -5
  85. package/dist/training/sparseCrossEntropy.js +4 -4
  86. package/dist/utilities/dummy.js +2 -2
  87. package/dist/utilities/generate.js +3 -3
  88. package/dist/utilities/load.js +1 -1
  89. package/dist/utilities/profile.js +1 -1
  90. package/dist/utilities/weights.js +2 -2
  91. package/dist/{variable-LJT9Ld63.js → variable-BJTZ3jOy.js} +1 -1
  92. package/dist/{zeros-dnQxFgAD.js → zeros-8xl-W2DC.js} +1 -1
  93. package/package.json +1 -1
  94. package/dist/gelu-CnCt17Lk.js +0 -26
@@ -1,4 +1,4 @@
1
- import { r as l } from "../../index-C4JCoBvj.js";
1
+ import { r as l } from "../../index--6vO-cOz.js";
2
2
  class u {
3
3
  variableNames = ["labels", "logits", "values"];
4
4
  outputShape;
@@ -1,5 +1,5 @@
1
- import { r as a } from "../../index-C4JCoBvj.js";
2
- import { u as s, C as x } from "../../kernel_funcs_utils-CAd1h9X1.js";
1
+ import { r as a } from "../../index--6vO-cOz.js";
2
+ import { u as s, C as x } from "../../kernel_funcs_utils-C6YBCuOt.js";
3
3
  const t = 0.7978845608028654, r = 0.044715, c = x + `
4
4
  float x3 = x * x * x;
5
5
  float inner = x + ${r} * x3;
@@ -0,0 +1,20 @@
1
+ import { TensorInfo } from '@tensorflow/tfjs-core';
2
+ import { MathBackendWebGL } from '@tensorflow/tfjs-backend-webgl';
3
+ export declare const MATMUL_SHARED_DIM_THRESHOLD = 1000;
4
+ type BatchMatMulConfig = {
5
+ a: TensorInfo;
6
+ b: TensorInfo;
7
+ transposeA: boolean;
8
+ transposeB: boolean;
9
+ backend: MathBackendWebGL;
10
+ activationSnippet: string;
11
+ };
12
+ export declare function batchMatMulGeluImpl({ a, b, transposeA, transposeB, backend, activationSnippet, }: BatchMatMulConfig): TensorInfo;
13
+ export declare function batchMatMulKernel(args: {
14
+ inputs: {
15
+ x: TensorInfo;
16
+ kernel: TensorInfo;
17
+ };
18
+ backend: MathBackendWebGL;
19
+ }): TensorInfo;
20
+ export {};
@@ -0,0 +1,166 @@
1
+ import { r as G, t as P, e as R, b as I, n as k, O as L, j as F, Q as U } from "../../index--6vO-cOz.js";
2
+ import { r as g } from "../../Reshape-CiAY8ltP.js";
3
+ import { u as H } from "../../gpgpu_math-CUzjlO9A.js";
4
+ import { m as z } from "../../mat_mul-BEHRPMh0.js";
5
+ /**
6
+ * @license
7
+ * Copyright 2018 Google LLC. All Rights Reserved.
8
+ * Licensed under the Apache License, Version 2.0 (the "License");
9
+ * you may not use this file except in compliance with the License.
10
+ * You may obtain a copy of the License at
11
+ *
12
+ * http://www.apache.org/licenses/LICENSE-2.0
13
+ *
14
+ * Unless required by applicable law or agreed to in writing, software
15
+ * distributed under the License is distributed on an "AS IS" BASIS,
16
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ * See the License for the specific language governing permissions and
18
+ * limitations under the License.
19
+ * =============================================================================
20
+ */
21
+ class W {
22
+ constructor(e, s, a, n = !1, c = !1, o = !1, r = null, i = !1, u = !1) {
23
+ this.variableNames = ["matrixA", "matrixB"], this.packedInputs = !0, this.packedOutput = !0, this.outputShape = a, this.enableShapeUniforms = H(this.outputShape.length);
24
+ const p = n ? e[1] : e[2], l = Math.ceil(p / 2), b = n ? "i * 2, rc.y" : "rc.y, i * 2", M = c ? "rc.z, i * 2" : "i * 2, rc.z", h = n ? ["a.xxyy", "a.zzww"] : ["a.xxzz", "a.yyww"], d = c ? ["b.xzxz", "b.ywyw"] : ["b.xyxy", "b.zwzw"];
25
+ let m = "", v = "";
26
+ r && (i ? m = `vec4 activation(vec4 a) {
27
+ vec4 b = getPreluActivationWeightsAtOutCoords();
28
+ ${r}
29
+ }` : u ? m = `vec4 activation(vec4 a) {
30
+ vec4 b = getLeakyreluAlphaAtOutCoords();
31
+ ${r}
32
+ }` : m = `vec4 activation(vec4 x) {
33
+ ${r}
34
+ }`, v = "result = activation(result);");
35
+ const $ = o ? "result += getBiasAtOutCoords();" : "";
36
+ o && this.variableNames.push("bias"), i && this.variableNames.push("preluActivationWeights"), u && this.variableNames.push("leakyreluAlpha");
37
+ let f = "rc.x", x = "rc.x";
38
+ e[0] < s[0] ? f = `imod(rc.x, ${e[0]})` : s[0] < e[0] && (x = `imod(rc.x, ${s[0]})`), this.userCode = `
39
+ ${m}
40
+ // Don't use uniform for sharedDimensionPacked for performance.
41
+ const float sharedDimension = ${l}.0;
42
+
43
+ vec4 dot2x2ARowBCol(ivec3 rc) {
44
+ vec4 result = vec4(0);
45
+ int batchA = ${f};
46
+ int batchB = ${x};
47
+ for (int i = 0; i < ${l}; i++) {
48
+ vec4 a = getMatrixA(batchA, ${b});
49
+ vec4 b = getMatrixB(batchB, ${M});
50
+
51
+ // These swizzled products need to be separately added.
52
+ // See: https://github.com/tensorflow/tfjs/issues/1735
53
+ result += (${h[0]} * ${d[0]});
54
+ result += (${h[1]} * ${d[1]});
55
+ }
56
+ return result;
57
+ }
58
+
59
+ void main() {
60
+ ivec3 rc = getOutputCoords();
61
+ vec4 result = dot2x2ARowBCol(rc);
62
+
63
+ ${$}
64
+
65
+ ${v}
66
+
67
+ setOutput(result);
68
+ }
69
+ `;
70
+ }
71
+ }
72
+ const S = 0.7978845608028654, w = 0.044715, j = `
73
+ vec4 x3 = x * x * x;
74
+ vec4 inner = x + ${w} * x3;
75
+ inner = ${S} * inner;
76
+ inner = tanh(inner);
77
+ inner = 0.5 * (1.0 + inner);
78
+ vec4 result = x * inner;
79
+ return result;
80
+ `, q = `
81
+ vec4 x2 = x * x;
82
+ vec4 x3 = x2 * x;
83
+ vec4 u = ${S} * (x + ${w} * x3);
84
+ vec4 t = tanh(u);
85
+ vec4 sech2 = 1.0 - t * t;
86
+ vec4 du_dx = ${S} * (1.0 + 3.0 * ${w} * x2);
87
+ vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * x * sech2 * du_dx;
88
+ return dgelu;
89
+ `, se = 1e3;
90
+ function B({
91
+ a: t,
92
+ b: e,
93
+ transposeA: s,
94
+ transposeB: a,
95
+ backend: n,
96
+ activationSnippet: c
97
+ }) {
98
+ const o = t.shape.length, r = e.shape.length, i = s ? t.shape[o - 2] : t.shape[o - 1], u = a ? e.shape[r - 1] : e.shape[r - 2], p = s ? t.shape[o - 1] : t.shape[o - 2], l = a ? e.shape[r - 2] : e.shape[r - 1], b = t.shape.slice(0, -2), M = e.shape.slice(0, -2), h = k(b), d = k(M), v = L(t.shape.slice(0, -2), e.shape.slice(0, -2)).concat([p, l]);
99
+ F(
100
+ i === u,
101
+ () => `Error in matMul: inner shapes (${i}) and (${u}) of Tensors with shapes ${t.shape} and ${e.shape} and transposeA=${s} and transposeB=${a} must match.`
102
+ );
103
+ const $ = s ? [h, i, p] : [h, p, i], f = a ? [d, l, u] : [d, u, l], x = g({ inputs: { x: t }, backend: n, attrs: { shape: $ } }), A = g({ inputs: { x: e }, backend: n, attrs: { shape: f } }), y = [x, A], C = Math.max(h, d), O = c, E = U(t.dtype, e.dtype), N = new W(
104
+ $,
105
+ f,
106
+ [C, p, l],
107
+ s,
108
+ a,
109
+ !1,
110
+ O,
111
+ !1,
112
+ !1
113
+ ), T = [x, A], D = n.runWebGLProgram(N, T, E), _ = g({ inputs: { x: D }, backend: n, attrs: { shape: v } });
114
+ y.push(D);
115
+ for (const K of y)
116
+ n.disposeIntermediateTensorInfo(K);
117
+ return _;
118
+ }
119
+ function Q(t) {
120
+ const { inputs: e, backend: s } = t, { x: a, kernel: n } = e;
121
+ if (a === void 0 || n === void 0)
122
+ throw new Error("BatchMatMul requires two input tensors.");
123
+ return B({
124
+ a,
125
+ b: n,
126
+ transposeA: !1,
127
+ transposeB: !1,
128
+ backend: s,
129
+ activationSnippet: j
130
+ });
131
+ }
132
+ const J = {
133
+ kernelName: "MatMulGelu",
134
+ backendName: "webgl",
135
+ kernelFunc: Q
136
+ };
137
+ G(J);
138
+ function V(t) {
139
+ const { dy: e, x: s, kernel: a } = t.inputs, n = t.backend;
140
+ return P(() => {
141
+ const c = R().makeTensorFromTensorInfo(
142
+ B({
143
+ a: s,
144
+ b: a,
145
+ transposeA: !1,
146
+ transposeB: !1,
147
+ backend: n,
148
+ activationSnippet: q
149
+ })
150
+ ), o = I(e, c);
151
+ c.dispose();
152
+ const r = z(o, a, !1, !0), i = z(s, o, !0, !1);
153
+ return [r, i];
154
+ });
155
+ }
156
+ const X = {
157
+ kernelName: "MatMulGeluGrad",
158
+ backendName: "webgl",
159
+ kernelFunc: V
160
+ };
161
+ G(X);
162
+ export {
163
+ se as MATMUL_SHARED_DIM_THRESHOLD,
164
+ B as batchMatMulGeluImpl,
165
+ Q as batchMatMulKernel
166
+ };
@@ -1,4 +1,4 @@
1
- import { r as m } from "../../index-C4JCoBvj.js";
1
+ import { r as m } from "../../index--6vO-cOz.js";
2
2
  class f {
3
3
  variableNames = ["a", "b"];
4
4
  outputShape;
@@ -1,4 +1,4 @@
1
- import { r as i } from "../../index-C4JCoBvj.js";
1
+ import { r as i } from "../../index--6vO-cOz.js";
2
2
  class l {
3
3
  variableNames = ["x", "kernel"];
4
4
  outputShape;
@@ -1,4 +1,4 @@
1
- import { r as u } from "../../index-C4JCoBvj.js";
1
+ import { r as u } from "../../index--6vO-cOz.js";
2
2
  class l {
3
3
  variableNames = ["x", "sin", "cos"];
4
4
  outputShape;
@@ -1,4 +1,4 @@
1
- import { r as i } from "../../index-C4JCoBvj.js";
1
+ import { r as i } from "../../index--6vO-cOz.js";
2
2
  class u {
3
3
  variableNames = ["labels", "softmaxProbs", "dy"];
4
4
  outputShape;
@@ -1,4 +1,4 @@
1
- import { E as e, R as f } from "./index-C4JCoBvj.js";
1
+ import { E as e, R as f } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o, h as t, E as a, q as p } from "./index-C4JCoBvj.js";
1
+ import { o, h as t, E as a, q as p } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o, h as t, E as c, Q as a, T as e } from "./index-C4JCoBvj.js";
1
+ import { o, h as t, E as c, V as a, W as e } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,5 +1,5 @@
1
- import { o as u, h as p, k as g, w as m, E as w, a1 as x, j as i } from "./index-C4JCoBvj.js";
2
- import { r as y } from "./reshape-Boe4DuIO.js";
1
+ import { o as u, h as p, k as g, w as m, E as w, a4 as x, j as i } from "./index--6vO-cOz.js";
2
+ import { r as y } from "./reshape-z51Eu-re.js";
3
3
  /**
4
4
  * @license
5
5
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o as r, h as f, E as e, S as i } from "./index-C4JCoBvj.js";
1
+ import { o as r, h as f, E as e, S as i } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o as p, h as i, E as a, u as c } from "./index-C4JCoBvj.js";
1
+ import { o as p, h as i, E as a, u as c } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o as e, i as c, j as n, E as i, P as k } from "./index-C4JCoBvj.js";
1
+ import { o as e, i as c, j as n, E as i, P as k } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o as e, h as u, x as c, E as l, y as m } from "./index-C4JCoBvj.js";
1
+ import { o as e, h as u, x as c, E as l, y as m } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { J as t, K as a } from "./index-C4JCoBvj.js";
1
+ import { J as t, K as a } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { I as t, J as s, K as a } from "./index-C4JCoBvj.js";
1
+ import { I as t, J as s, K as a } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,18 +1,18 @@
1
- import { o as f, h as l, E as d, an as nn, ao as tn, j as p, ap as sn, D as Ve, aq as rn, al as P, ar as on, as as an, at as cn, au as un, a1 as ln, aa as pn, p as Y, av as fn, aw as hn, ax as dn, ay as mn, az as gn, aA as $n, aB as bn, aC as kn, x as C, aD as xn, aE as wn, aF as An, aG as yn, aH as _n, aI as In, aJ as En, aK as Sn, I as Tn, J as Nn, K as Dn, aL as Mn, t as K, aM as he, b as I, aN as On, a5 as U, n as Me, c as Oe, aO as Be, aP as Bn, aQ as Pn, aR as Kn, aS as Rn, aT as qn, aU as jn, f as Fn, aV as Gn, aW as Ln, ad as L, s as q, aX as Vn, w as de, a as Cn, ai as Pe, aY as vn, $ as zn } from "./index-C4JCoBvj.js";
2
- import { r as $ } from "./reshape-Boe4DuIO.js";
3
- import { s as Ce } from "./split-DbcNm1-i.js";
4
- import { s as G } from "./sum-R28pucR5.js";
5
- import { b as me } from "./slice_util-19zDNNSn.js";
6
- import { r as ue } from "./range-9AzeApCc.js";
7
- import { t as Wn } from "./tensor-BVeHdl7V.js";
8
- import { s as le } from "./stack-D1YjmgKN.js";
9
- import { c as Jn, z as Yn } from "./zeros-dnQxFgAD.js";
10
- import { n as ve } from "./norm-CZM380I3.js";
11
- import { c as V } from "./concat-CuRsVY-K.js";
12
- import { m as y } from "./mat_mul-415y5Qn2.js";
13
- import { t as ge } from "./tensor2d-DqFGNs_K.js";
14
- import { r as Un, d as Xn } from "./dropout-DfDdklfL.js";
15
- import { g as Zn } from "./gather-ZYRWhmXR.js";
1
+ import { o as f, h as l, E as d, ap as nn, aq as tn, j as p, ar as sn, D as Ve, as as rn, O as P, at as on, au as an, av as cn, aw as un, a4 as ln, ag as pn, p as Y, ax as fn, ay as hn, az as dn, aA as mn, aB as gn, aC as $n, aD as bn, aE as kn, x as C, aF as xn, aG as wn, aH as An, aI as yn, aJ as _n, aK as In, aL as En, aM as Sn, I as Tn, J as Nn, K as Dn, aN as Mn, t as K, aO as he, b as I, aP as On, a8 as U, n as Me, c as Oe, aQ as Be, aR as Bn, aS as Pn, aT as Kn, aU as Rn, aV as qn, aW as jn, f as Fn, aX as Gn, aY as Ln, ai as L, s as q, aZ as Vn, w as de, a as Cn, am as Pe, a_ as vn, a2 as zn } from "./index--6vO-cOz.js";
2
+ import { r as $ } from "./reshape-z51Eu-re.js";
3
+ import { s as Ce } from "./split-B_k_jwud.js";
4
+ import { s as G } from "./sum-DdkDf2MG.js";
5
+ import { b as me } from "./slice_util-BdhYwFY_.js";
6
+ import { r as ue } from "./range-C_vpUjBu.js";
7
+ import { t as Wn } from "./tensor-BGYi41cj.js";
8
+ import { s as le } from "./stack-CmqSdsfs.js";
9
+ import { c as Jn, z as Yn } from "./zeros-8xl-W2DC.js";
10
+ import { n as ve } from "./norm-DSva3hI3.js";
11
+ import { c as V } from "./concat-DvWM7HGZ.js";
12
+ import { m as y } from "./mat_mul-BEHRPMh0.js";
13
+ import { t as ge } from "./tensor2d-DUr_htjt.js";
14
+ import { r as Un, d as Zn } from "./dropout-DFEXTPV0.js";
15
+ import { g as Xn } from "./gather-C5D8PxwA.js";
16
16
  /**
17
17
  * @license
18
18
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -708,7 +708,7 @@ function Ut(e, n = 0) {
708
708
  const s = { x: l(e, "x", "step") }, r = { alpha: n };
709
709
  return d.runKernel(Sn, s, r);
710
710
  }
711
- const Xt = /* @__PURE__ */ f({ step_: Ut });
711
+ const Zt = /* @__PURE__ */ f({ step_: Ut });
712
712
  /**
713
713
  * @license
714
714
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -748,13 +748,13 @@ function B(e, n) {
748
748
  * limitations under the License.
749
749
  * =============================================================================
750
750
  */
751
- function Zt(e, n = 0) {
751
+ function Xt(e, n = 0) {
752
752
  const t = l(e, "x", "unstack", "string_or_numeric");
753
753
  p(n >= -t.shape.length && n < t.shape.length, () => `Axis = ${n} is not in [-${t.shape.length}, ${t.shape.length})`);
754
754
  const s = { value: t }, r = { axis: n };
755
755
  return d.runKernel(Mn, s, r);
756
756
  }
757
- const Ye = /* @__PURE__ */ f({ unstack_: Zt });
757
+ const Ye = /* @__PURE__ */ f({ unstack_: Xt });
758
758
  /**
759
759
  * @license
760
760
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -804,7 +804,7 @@ function Qt(e, n, t) {
804
804
  if (t == null || t === "linear")
805
805
  return e;
806
806
  if (t === "relu")
807
- return I(e, Xt(n));
807
+ return I(e, Zt(n));
808
808
  throw new Error(`Cannot compute gradient for fused activation ${t}.`);
809
809
  }
810
810
  function es(e, n) {
@@ -862,13 +862,13 @@ function ss({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activ
862
862
  a != null && (te = l(a, "prelu weights", "fused matMul"));
863
863
  const v = (z, se) => {
864
864
  const [j, F, W, ce] = se, R = Qt($(z, W.shape), W, o);
865
- let Z, H;
866
- if (!t && !s ? (Z = y(R, F, !1, !0), H = y(j, R, !0, !1)) : !t && s ? (Z = y(R, F, !1, !1), H = y(R, j, !0, !1)) : t && !s ? (Z = y(F, R, !1, !0), H = y(j, R, !1, !1)) : (Z = y(F, R, !0, !0), H = y(R, j, !0, !0)), r != null) {
865
+ let X, H;
866
+ if (!t && !s ? (X = y(R, F, !1, !0), H = y(j, R, !0, !1)) : !t && s ? (X = y(R, F, !1, !1), H = y(R, j, !0, !1)) : t && !s ? (X = y(F, R, !1, !0), H = y(j, R, !1, !1)) : (X = y(F, R, !0, !0), H = y(R, j, !0, !0)), r != null) {
867
867
  const en = es(ce, R);
868
- return [Z, H, en];
868
+ return [X, H, en];
869
869
  } else
870
- return [Z, H];
871
- }, X = {
870
+ return [X, H];
871
+ }, Z = {
872
872
  a: M,
873
873
  b: O,
874
874
  bias: N,
@@ -877,13 +877,13 @@ function ss({ a: e, b: n, transposeA: t = !1, transposeB: s = !1, bias: r, activ
877
877
  return r == null ? Oe((se, j, F) => {
878
878
  const W = (
879
879
  // tslint:disable-next-line: no-unnecessary-type-assertion
880
- d.runKernel(Be, X, De)
880
+ d.runKernel(Be, Z, De)
881
881
  );
882
882
  return F([se, j, W]), { value: $(W, T), gradFunc: v };
883
883
  })(M, O) : Oe((se, j, F, W) => {
884
884
  const ce = (
885
885
  // tslint:disable-next-line: no-unnecessary-type-assertion
886
- d.runKernel(Be, X, De)
886
+ d.runKernel(Be, Z, De)
887
887
  );
888
888
  return W([se, j, ce, F]), { value: $(ce, T), gradFunc: v };
889
889
  })(M, O, N);
@@ -1575,15 +1575,15 @@ function Fe(e, n = !1) {
1575
1575
  if (c === 0)
1576
1576
  o = q(M, y(O, y(N, M)));
1577
1577
  else {
1578
- const X = q(M, y(O, y(N, M)));
1579
- o = V([_(o, [0, 0], [c, s]), X], 0);
1578
+ const Z = q(M, y(O, y(N, M)));
1579
+ o = V([_(o, [0, 0], [c, s]), Z], 0);
1580
1580
  }
1581
1581
  const te = xe(O), v = _(r, [0, c], [t, r.shape[1] - c]);
1582
1582
  if (c === 0)
1583
1583
  r = q(v, y(y(v, i), te));
1584
1584
  else {
1585
- const X = q(v, y(y(v, i), te));
1586
- r = V([_(r, [0, 0], [t, c]), X], 1);
1585
+ const Z = q(v, y(y(v, i), te));
1586
+ r = V([_(r, [0, 0], [t, c]), Z], 1);
1587
1587
  }
1588
1588
  return [i, o, r];
1589
1589
  }), Cn([m, h, b]);
@@ -1638,7 +1638,7 @@ const Ar = {
1638
1638
  * https://opensource.org/licenses/MIT.
1639
1639
  * =============================================================================
1640
1640
  */
1641
- const Xs = ["channelsFirst", "channelsLast"], Zs = ["nearest", "bilinear"], Hs = ["valid", "same", "causal"], Qs = ["max", "avg"], _r = ["sum", "mul", "concat", "ave"];
1641
+ const Zs = ["channelsFirst", "channelsLast"], Xs = ["nearest", "bilinear"], Hs = ["valid", "same", "causal"], Qs = ["max", "avg"], _r = ["sum", "mul", "concat", "ave"];
1642
1642
  /**
1643
1643
  * @license
1644
1644
  * Copyright 2018 Google LLC
@@ -1653,9 +1653,9 @@ class Ue extends Error {
1653
1653
  super(n), Object.setPrototypeOf(this, Ue.prototype);
1654
1654
  }
1655
1655
  }
1656
- class Xe extends Error {
1656
+ class Ze extends Error {
1657
1657
  constructor(n) {
1658
- super(n), Object.setPrototypeOf(this, Xe.prototype);
1658
+ super(n), Object.setPrototypeOf(this, Ze.prototype);
1659
1659
  }
1660
1660
  }
1661
1661
  class A extends Error {
@@ -1812,10 +1812,10 @@ function Rr(e, n, t = 0, s = 1 / 0) {
1812
1812
  return Ge(t >= 0), Ge(s >= t), Array.isArray(e) && e.length >= t && e.length <= s && e.every((r) => typeof r === n);
1813
1813
  }
1814
1814
  function nr(e, n) {
1815
- Array.isArray(e) ? (p(e.length > 0, () => `${n} is unexpectedly an empty array.`), e.forEach((t, s) => nr(t, `element ${s + 1} of ${n}`))) : p(Number.isInteger(e) && e > 0, () => `Expected ${n} to be a positive integer, but got ${Ze(e)}.`);
1815
+ Array.isArray(e) ? (p(e.length > 0, () => `${n} is unexpectedly an empty array.`), e.forEach((t, s) => nr(t, `element ${s + 1} of ${n}`))) : p(Number.isInteger(e) && e > 0, () => `Expected ${n} to be a positive integer, but got ${Xe(e)}.`);
1816
1816
  }
1817
- function Ze(e) {
1818
- return e === null ? "null" : Array.isArray(e) ? "[" + e.map((n) => Ze(n)).join(",") + "]" : typeof e == "string" ? `"${e}"` : `${e}`;
1817
+ function Xe(e) {
1818
+ return e === null ? "null" : Array.isArray(e) ? "[" + e.map((n) => Xe(n)).join(",") + "]" : typeof e == "string" ? `"${e}"` : `${e}`;
1819
1819
  }
1820
1820
  function qr(e, n, t) {
1821
1821
  let s = t != null ? t() : Pe(), r;
@@ -1838,10 +1838,10 @@ function jr(e) {
1838
1838
  */
1839
1839
  const Q = /* @__PURE__ */ new Map();
1840
1840
  function tr(e) {
1841
- fe(Xs, "DataFormat", e);
1841
+ fe(Zs, "DataFormat", e);
1842
1842
  }
1843
1843
  function Fr(e) {
1844
- fe(Zs, "InterpolationFormat", e);
1844
+ fe(Xs, "InterpolationFormat", e);
1845
1845
  }
1846
1846
  function Gr(e) {
1847
1847
  fe(Hs, "PaddingMode", e);
@@ -1954,14 +1954,14 @@ function Ae() {
1954
1954
  * https://opensource.org/licenses/MIT.
1955
1955
  * =============================================================================
1956
1956
  */
1957
- function Xr(e, n) {
1957
+ function Zr(e, n) {
1958
1958
  return C(e, n);
1959
1959
  }
1960
1960
  function or(e, n = -1) {
1961
1961
  const t = e.shape.slice();
1962
1962
  return n < 0 && (n = t.length + n + 1), t.splice(n, 0, 1), $(e, t);
1963
1963
  }
1964
- function Zr(e, n) {
1964
+ function Xr(e, n) {
1965
1965
  return K(() => {
1966
1966
  if (e.shape.length !== 2)
1967
1967
  throw new A(`repeat() expects a rank-2 tensor, but received a rank-${e.shape.length} tensor.`);
@@ -2131,7 +2131,7 @@ function ro(e, n, t, s) {
2131
2131
  }
2132
2132
  }
2133
2133
  function oo(e, n, t) {
2134
- return K(() => (Array.isArray(n) ? n = B(n, "int32") : n = C(n, "int32"), Zn(e, n, t)));
2134
+ return K(() => (Array.isArray(n) ? n = B(n, "int32") : n = C(n, "int32"), Xn(e, n, t)));
2135
2135
  }
2136
2136
  function ao(e) {
2137
2137
  return I(e, e);
@@ -2171,7 +2171,7 @@ function uo(e) {
2171
2171
  return K(() => L(e, U(zn(e), 1)));
2172
2172
  }
2173
2173
  function lo(e, n, t, s) {
2174
- return K(() => Xn(e, n, t, s));
2174
+ return K(() => Zn(e, n, t, s));
2175
2175
  }
2176
2176
  function po(e) {
2177
2177
  return K(() => {
@@ -2200,8 +2200,8 @@ export {
2200
2200
  ie as N,
2201
2201
  ao as O,
2202
2202
  Ge as P,
2203
- Xr as Q,
2204
- Xe as R,
2203
+ Zr as Q,
2204
+ Ze as R,
2205
2205
  Yt as S,
2206
2206
  Dr as T,
2207
2207
  Pr as U,
@@ -2240,7 +2240,7 @@ export {
2240
2240
  lo as ap,
2241
2241
  no as aq,
2242
2242
  Qr as ar,
2243
- Zr as as,
2243
+ Xr as as,
2244
2244
  Lr as at,
2245
2245
  _r as au,
2246
2246
  es as b,
@@ -2258,7 +2258,7 @@ export {
2258
2258
  Sr as n,
2259
2259
  Tr as o,
2260
2260
  Vr as p,
2261
- Xt as q,
2261
+ Zt as q,
2262
2262
  so as r,
2263
2263
  ts as s,
2264
2264
  Nr as t,
@@ -1,4 +1,4 @@
1
- import { A as r, b as c, f as h, s as g, e as o } from "../index-C4JCoBvj.js";
1
+ import { A as r, b as c, f as h, s as g, e as o } from "../index--6vO-cOz.js";
2
2
  class u extends r {
3
3
  constructor(t, e, s, a, i) {
4
4
  super(t, e, s, a), this.config = i, this.startLearningRate = t;
@@ -1,7 +1,7 @@
1
- import { ae as $, ac as m, af as M, a as R, ag as f, ah as v, ai as z, j as _, t as x } from "../index-C4JCoBvj.js";
1
+ import { aj as $, ah as d, L as M, a as R, ak as f, al as v, am as z, j as _, t as x } from "../index--6vO-cOz.js";
2
2
  import { s as E } from "../index-C4L8Cm77.js";
3
- import { s as P } from "../stack-D1YjmgKN.js";
4
- import { t as D } from "../tensor-BVeHdl7V.js";
3
+ import { s as P } from "../stack-CmqSdsfs.js";
4
+ import { t as D } from "../tensor-BGYi41cj.js";
5
5
  import "../index-Tf7vU29b.js";
6
6
  /**
7
7
  * @license
@@ -82,10 +82,10 @@ function p(s) {
82
82
  const { StringDecoder: e } = require("string_decoder");
83
83
  t = s instanceof e;
84
84
  }
85
- return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof m) && !(s instanceof Promise) && !t);
85
+ return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof d) && !(s instanceof Promise) && !t);
86
86
  }
87
87
  function H(s) {
88
- return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof m || $(s);
88
+ return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof d || $(s);
89
89
  }
90
90
  function q(s) {
91
91
  return s === null || typeof s != "object" && typeof s != "function";
@@ -111,7 +111,7 @@ function Q(s) {
111
111
  return L(s, G);
112
112
  }
113
113
  function G(s) {
114
- return s instanceof m ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
114
+ return s instanceof d ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
115
115
  }
116
116
  /**
117
117
  * @license
@@ -477,7 +477,7 @@ class i {
477
477
  * of the original element type.
478
478
  */
479
479
  rowMajorBatch(t, e = !0) {
480
- return new j(this, t, e);
480
+ return new K(this, t, e);
481
481
  }
482
482
  /**
483
483
  * Groups elements into batches, represented in column-major form.
@@ -535,7 +535,7 @@ class i {
535
535
  * unaltered.
536
536
  */
537
537
  take(t) {
538
- return t < 0 || t == null ? this : new K(this, t);
538
+ return t < 0 || t == null ? this : new j(this, t);
539
539
  }
540
540
  /**
541
541
  * Skips the first `count` items in this stream.
@@ -641,7 +641,7 @@ class X extends i {
641
641
  return this.upstream.next();
642
642
  }
643
643
  }
644
- class K extends i {
644
+ class j extends i {
645
645
  constructor(t, e) {
646
646
  super(), this.upstream = t, this.maxCount = e, this.count = 0;
647
647
  }
@@ -652,7 +652,7 @@ class K extends i {
652
652
  return this.count++ >= this.maxCount ? { value: null, done: !0 } : this.upstream.next();
653
653
  }
654
654
  }
655
- class j extends i {
655
+ class K extends i {
656
656
  constructor(t, e, r = !0) {
657
657
  super(), this.upstream = t, this.batchSize = e, this.enableSmallLastBatch = r, this.lastRead = Promise.resolve({ value: null, done: !1 });
658
658
  }
@@ -1219,7 +1219,7 @@ function at(s) {
1219
1219
  function it(s) {
1220
1220
  if (s.length === 0)
1221
1221
  throw new Error("Can't make a batch of zero elements.");
1222
- return s[0] instanceof m ? P(s) : D(s);
1222
+ return s[0] instanceof d ? P(s) : D(s);
1223
1223
  }
1224
1224
  /**
1225
1225
  * @license
@@ -1244,7 +1244,7 @@ function ut(s) {
1244
1244
  return k(() => t.next());
1245
1245
  });
1246
1246
  }
1247
- class dt {
1247
+ class mt {
1248
1248
  tokenizer;
1249
1249
  blockSize;
1250
1250
  constructor(t, e = 128) {
@@ -1257,20 +1257,20 @@ class dt {
1257
1257
  n === 1 ? void 0 : Math.floor(n * h.length)
1258
1258
  ), w = (function* () {
1259
1259
  for (; ; ) {
1260
- const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), d = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
1261
- yield { xs: d, ys: B };
1260
+ const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), m = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
1261
+ yield { xs: m, ys: B };
1262
1262
  }
1263
1263
  }).bind(this);
1264
1264
  return ut(w).batch(e).map((u) => {
1265
- const d = u;
1265
+ const m = u;
1266
1266
  return x(() => ({
1267
- xs: d.xs.cast("int32"),
1268
- ys: d.ys.cast("int32")
1267
+ xs: m.xs.cast("int32"),
1268
+ ys: m.ys.cast("int32")
1269
1269
  // this.tf.oneHot(batchData.ys.cast('int32'), this.tokenizer.vocabSize),
1270
1270
  }));
1271
1271
  }).prefetch(2);
1272
1272
  }
1273
1273
  }
1274
1274
  export {
1275
- dt as DatasetBuilder
1275
+ mt as DatasetBuilder
1276
1276
  };