@genai-fi/nanogpt 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +7 -7
- package/dist/NanoGPTModel.js +70 -121
- package/dist/RealDiv-BYViZwhN.js +540 -0
- package/dist/Reshape-t7Kcikjk.js +127 -0
- package/dist/TeachableLLM.d.ts +2 -0
- package/dist/TeachableLLM.js +34 -27
- package/dist/{TiedEmbedding-BhxWO8QR.js → TiedEmbedding-9WeDwvjO.js} +12 -13
- package/dist/{axis_util-D17qZRQm.js → axis_util-Bu4h7XWV.js} +14 -12
- package/dist/{broadcast_to-BMQLjvt_.js → broadcast_to-DARN-DBD.js} +2 -2
- package/dist/{concat-DhZfF1GY.js → concat-5aPGqw3Z.js} +3 -3
- package/dist/{dataset-oilnemHf.js → dataset-pgqp-YfL.js} +3 -3
- package/dist/{dropout-CrMQPCeG.js → dropout-Bciw46HT.js} +7 -7
- package/dist/{gather-DZCMHZuN.js → gather-DjyCjmOD.js} +1 -1
- package/dist/gpgpu_math-CNslybmD.js +3115 -0
- package/dist/{index-bMBtI-WR.js → index-BAzbokzv.js} +846 -649
- package/dist/{kernel_funcs_utils-CNmjLWnB.js → kernel_funcs_utils-CUxJCg0g.js} +232 -138
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +13 -33
- package/dist/layers/TiedEmbedding.js +6 -7
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/load.d.ts +13 -0
- package/dist/loader/load.js +27 -0
- package/dist/loader/loadHF.d.ts +7 -0
- package/dist/loader/loadHF.js +22 -0
- package/dist/{utilities/load.d.ts → loader/loadTransformers.d.ts} +11 -11
- package/dist/loader/loadTransformers.js +28 -0
- package/dist/loader/newZipLoad.d.ts +8 -0
- package/dist/loader/newZipLoad.js +21 -0
- package/dist/loader/oldZipLoad.d.ts +7 -0
- package/dist/loader/oldZipLoad.js +76 -0
- package/dist/{log_sum_exp-BHdkCb4s.js → log_sum_exp-YEo2h3gb.js} +14 -14
- package/dist/main.js +23 -20
- package/dist/{mat_mul-BsrLfy81.js → mat_mul-7121rsJk.js} +1 -1
- package/dist/{max-DechV4Bc.js → max-DtlIuVeW.js} +1 -1
- package/dist/mulmat_packed_gpu-D4nKF7Je.js +71 -0
- package/dist/{norm-B9hWHZH1.js → norm-CzltS9Fz.js} +16 -16
- package/dist/{ones-g0K8jVwm.js → ones-BBlSRqn1.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +6 -6
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +9 -9
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +1 -1
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +17 -48
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +1 -1
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +4 -4
- package/dist/ops/grads/gelu.js +1 -1
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +8 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +29 -560
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +46 -113
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{ops-Mv7Ta72x.js → ops-C0sQEcPw.js} +117 -109
- package/dist/{random_width-BBAWzDym.js → random_width-DWzaOgrn.js} +6925 -6291
- package/dist/{range-DMaG9A3G.js → range-DYsrnfiy.js} +1 -1
- package/dist/{gpgpu_math-Ctc31slO.js → reciprocal-CJQeasVa.js} +7 -5
- package/dist/register_all_kernels-BfFCQAqs.js +21397 -0
- package/dist/{reshape-T4yDEqoF.js → reshape-krWGKraP.js} +1 -1
- package/dist/scatter_nd_util-93ln7Hut.js +46 -0
- package/dist/selu_util-sntGesxr.js +740 -0
- package/dist/{shared-XNAoXhOa.js → shared-Ca6iDobD.js} +1462 -1089
- package/dist/{sin-EEhbrRO_.js → sin-D_h-qCSx.js} +1 -1
- package/dist/{softmax-B2_IKPDR.js → softmax-fsdtf6JC.js} +1 -1
- package/dist/{split-dcks18H1.js → split-eiktj-6L.js} +1 -1
- package/dist/{stack-lpJ5kYvE.js → stack-dfEEz2OY.js} +2 -2
- package/dist/{sum-CutF5lj2.js → sum-BE_Irnim.js} +1 -1
- package/dist/{tensor-C15NA2LA.js → tensor-Xyi595sG.js} +1 -1
- package/dist/{tensor2d-DZ_e5eKM.js → tensor2d-CPEkynbH.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +3 -3
- package/dist/training/sparseCrossEntropy.js +5 -5
- package/dist/utilities/dummy.d.ts +6 -0
- package/dist/utilities/dummy.js +31 -10
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/profile.d.ts +5 -0
- package/dist/utilities/profile.js +10 -7
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/save.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-CdRKKp8x.js → variable-wSS22xj5.js} +1 -1
- package/dist/{zeros-CAbHfODe.js → zeros-YJDE7oRb.js} +4 -4
- package/package.json +2 -8
- package/dist/Reshape-CLOrdpve.js +0 -212
- package/dist/slice_util-Ddk0uxGJ.js +0 -49
- package/dist/tfjs_backend-BDb8r9qx.js +0 -1010
- package/dist/utilities/load.js +0 -99
|
@@ -1,129 +1,62 @@
|
|
|
1
|
-
import { r as
|
|
2
|
-
import { r as
|
|
3
|
-
import {
|
|
4
|
-
import { m as
|
|
5
|
-
|
|
6
|
-
* @license
|
|
7
|
-
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
8
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
-
* you may not use this file except in compliance with the License.
|
|
10
|
-
* You may obtain a copy of the License at
|
|
11
|
-
*
|
|
12
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
-
*
|
|
14
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
15
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
-
* See the License for the specific language governing permissions and
|
|
18
|
-
* limitations under the License.
|
|
19
|
-
* =============================================================================
|
|
20
|
-
*/
|
|
21
|
-
class W {
|
|
22
|
-
constructor(e, s, n, a = !1, c = !1, o = !1, r = null, u = !1, l = !1) {
|
|
23
|
-
this.variableNames = ["matrixA", "matrixB"], this.packedInputs = !0, this.packedOutput = !0, this.outputShape = n, this.enableShapeUniforms = H(this.outputShape.length);
|
|
24
|
-
const h = a ? e[1] : e[2], p = Math.ceil(h / 2), d = a ? "i * 2, rc.y" : "rc.y, i * 2", $ = c ? "rc.z, i * 2" : "i * 2, rc.z", x = a ? ["a.xxyy", "a.zzww"] : ["a.xxzz", "a.yyww"], m = c ? ["b.xzxz", "b.ywyw"] : ["b.xyxy", "b.zwzw"];
|
|
25
|
-
let i = "", b = "";
|
|
26
|
-
r && (u ? i = `vec4 activation(vec4 a) {
|
|
27
|
-
vec4 b = getPreluActivationWeightsAtOutCoords();
|
|
28
|
-
${r}
|
|
29
|
-
}` : l ? i = `vec4 activation(vec4 a) {
|
|
30
|
-
vec4 b = getLeakyreluAlphaAtOutCoords();
|
|
31
|
-
${r}
|
|
32
|
-
}` : i = `vec4 activation(vec4 x) {
|
|
33
|
-
${r}
|
|
34
|
-
}`, b = "result = activation(result);");
|
|
35
|
-
const M = o ? "result += getBiasAtOutCoords();" : "";
|
|
36
|
-
o && this.variableNames.push("bias"), u && this.variableNames.push("preluActivationWeights"), l && this.variableNames.push("leakyreluAlpha");
|
|
37
|
-
let f = "rc.x", v = "rc.x";
|
|
38
|
-
e[0] < s[0] ? f = `imod(rc.x, ${e[0]})` : s[0] < e[0] && (v = `imod(rc.x, ${s[0]})`), this.userCode = `
|
|
39
|
-
${i}
|
|
40
|
-
// Don't use uniform for sharedDimensionPacked for performance.
|
|
41
|
-
const float sharedDimension = ${p}.0;
|
|
42
|
-
|
|
43
|
-
vec4 dot2x2ARowBCol(ivec3 rc) {
|
|
44
|
-
vec4 result = vec4(0);
|
|
45
|
-
int batchA = ${f};
|
|
46
|
-
int batchB = ${v};
|
|
47
|
-
for (int i = 0; i < ${p}; i++) {
|
|
48
|
-
vec4 a = getMatrixA(batchA, ${d});
|
|
49
|
-
vec4 b = getMatrixB(batchB, ${$});
|
|
50
|
-
|
|
51
|
-
// These swizzled products need to be separately added.
|
|
52
|
-
// See: https://github.com/tensorflow/tfjs/issues/1735
|
|
53
|
-
result += (${x[0]} * ${m[0]});
|
|
54
|
-
result += (${x[1]} * ${m[1]});
|
|
55
|
-
}
|
|
56
|
-
return result;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
void main() {
|
|
60
|
-
ivec3 rc = getOutputCoords();
|
|
61
|
-
vec4 result = dot2x2ARowBCol(rc);
|
|
62
|
-
|
|
63
|
-
${M}
|
|
64
|
-
|
|
65
|
-
${b}
|
|
66
|
-
|
|
67
|
-
setOutput(result);
|
|
68
|
-
}
|
|
69
|
-
`;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
const g = 0.7978845608028654, w = 0.044715, q = `
|
|
1
|
+
import { r as _, t as R, e as C, g as A, h as N, i as H, u as O } from "../../index-BAzbokzv.js";
|
|
2
|
+
import { r as f } from "../../Reshape-t7Kcikjk.js";
|
|
3
|
+
import { M as U } from "../../mulmat_packed_gpu-D4nKF7Je.js";
|
|
4
|
+
import { m as E } from "../../mat_mul-7121rsJk.js";
|
|
5
|
+
const M = 0.7978845608028654, x = 0.044715, q = `
|
|
73
6
|
vec4 x3 = x * x * x;
|
|
74
|
-
vec4 inner = x + ${
|
|
75
|
-
inner = ${
|
|
7
|
+
vec4 inner = x + ${x} * x3;
|
|
8
|
+
inner = ${M} * inner;
|
|
76
9
|
inner = tanh(inner);
|
|
77
10
|
inner = 0.5 * (1.0 + inner);
|
|
78
11
|
vec4 result = x * inner;
|
|
79
12
|
return result;
|
|
80
|
-
`,
|
|
13
|
+
`, z = `
|
|
81
14
|
vec4 a2 = a * a;
|
|
82
15
|
vec4 a3 = a2 * a;
|
|
83
|
-
vec4 u = ${
|
|
16
|
+
vec4 u = ${M} * (a + ${x} * a3);
|
|
84
17
|
vec4 t = tanh(u);
|
|
85
18
|
vec4 sech2 = 1.0 - t * t;
|
|
86
|
-
vec4 du_dx = ${
|
|
19
|
+
vec4 du_dx = ${M} * (1.0 + 3.0 * ${x} * a2);
|
|
87
20
|
vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
|
|
88
21
|
return dgelu * b;
|
|
89
|
-
`,
|
|
90
|
-
function
|
|
91
|
-
a:
|
|
92
|
-
b:
|
|
22
|
+
`, te = 1e3;
|
|
23
|
+
function w({
|
|
24
|
+
a: e,
|
|
25
|
+
b: t,
|
|
93
26
|
transposeA: s,
|
|
94
27
|
transposeB: n,
|
|
95
28
|
backend: a,
|
|
96
29
|
activationSnippet: c,
|
|
97
30
|
multiplier: o
|
|
98
31
|
}) {
|
|
99
|
-
const r =
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
() => `Error in matMul: inner shapes (${
|
|
32
|
+
const r = e.shape.length, u = t.shape.length, i = s ? e.shape[r - 2] : e.shape[r - 1], p = n ? t.shape[u - 1] : t.shape[u - 2], h = s ? e.shape[r - 1] : e.shape[r - 2], l = n ? t.shape[u - 2] : t.shape[u - 1], K = e.shape.slice(0, -2), T = t.shape.slice(0, -2), d = A(K), m = A(T), b = N(e.shape.slice(0, -2), t.shape.slice(0, -2)).concat([h, l]);
|
|
33
|
+
H(
|
|
34
|
+
i === p,
|
|
35
|
+
() => `Error in matMul: inner shapes (${i}) and (${p}) of Tensors with shapes ${e.shape} and ${t.shape} and transposeA=${s} and transposeB=${n} must match.`
|
|
103
36
|
);
|
|
104
|
-
const
|
|
105
|
-
f,
|
|
37
|
+
const v = s ? [d, i, h] : [d, h, i], S = n ? [m, l, p] : [m, p, l], g = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), D = f({ inputs: { x: t }, backend: a, attrs: { shape: S } }), G = [g, D], y = Math.max(d, m), L = c, B = O(e.dtype, t.dtype), F = new U(
|
|
106
38
|
v,
|
|
107
|
-
|
|
39
|
+
S,
|
|
40
|
+
[y, h, l],
|
|
108
41
|
s,
|
|
109
42
|
n,
|
|
110
43
|
!1,
|
|
111
|
-
|
|
44
|
+
L,
|
|
112
45
|
!!o,
|
|
113
46
|
!1
|
|
114
|
-
), k = [
|
|
47
|
+
), k = [g, D];
|
|
115
48
|
o && k.push(o);
|
|
116
|
-
const
|
|
117
|
-
|
|
118
|
-
for (const P of
|
|
49
|
+
const $ = a.runWebGLProgram(F, k, B), I = f({ inputs: { x: $ }, backend: a, attrs: { shape: b } });
|
|
50
|
+
G.push($);
|
|
51
|
+
for (const P of G)
|
|
119
52
|
a.disposeIntermediateTensorInfo(P);
|
|
120
|
-
return
|
|
53
|
+
return I;
|
|
121
54
|
}
|
|
122
|
-
function
|
|
123
|
-
const { inputs:
|
|
55
|
+
function W(e) {
|
|
56
|
+
const { inputs: t, backend: s } = e, { x: n, kernel: a } = t;
|
|
124
57
|
if (n === void 0 || a === void 0)
|
|
125
58
|
throw new Error("BatchMatMul requires two input tensors.");
|
|
126
|
-
return
|
|
59
|
+
return w({
|
|
127
60
|
a: n,
|
|
128
61
|
b: a,
|
|
129
62
|
transposeA: !1,
|
|
@@ -132,37 +65,37 @@ function Q(t) {
|
|
|
132
65
|
activationSnippet: q
|
|
133
66
|
});
|
|
134
67
|
}
|
|
135
|
-
const
|
|
68
|
+
const j = {
|
|
136
69
|
kernelName: "MatMulGelu",
|
|
137
70
|
backendName: "webgl",
|
|
138
|
-
kernelFunc:
|
|
71
|
+
kernelFunc: W
|
|
139
72
|
};
|
|
140
|
-
|
|
141
|
-
function
|
|
142
|
-
const { dy:
|
|
73
|
+
_(j);
|
|
74
|
+
function J(e) {
|
|
75
|
+
const { dy: t, x: s, kernel: n } = e.inputs, a = e.backend;
|
|
143
76
|
return R(() => {
|
|
144
|
-
const c =
|
|
145
|
-
|
|
77
|
+
const c = C().makeTensorFromTensorInfo(
|
|
78
|
+
w({
|
|
146
79
|
a: s,
|
|
147
80
|
b: n,
|
|
148
81
|
transposeA: !1,
|
|
149
82
|
transposeB: !1,
|
|
150
83
|
backend: a,
|
|
151
|
-
activationSnippet:
|
|
152
|
-
multiplier:
|
|
84
|
+
activationSnippet: z,
|
|
85
|
+
multiplier: t
|
|
153
86
|
})
|
|
154
|
-
), o =
|
|
87
|
+
), o = E(c, n, !1, !0), r = E(s, c, !0, !1);
|
|
155
88
|
return [o, r];
|
|
156
89
|
});
|
|
157
90
|
}
|
|
158
|
-
const
|
|
91
|
+
const Q = {
|
|
159
92
|
kernelName: "MatMulGeluGrad",
|
|
160
93
|
backendName: "webgl",
|
|
161
|
-
kernelFunc:
|
|
94
|
+
kernelFunc: J
|
|
162
95
|
};
|
|
163
|
-
|
|
96
|
+
_(Q);
|
|
164
97
|
export {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
98
|
+
te as MATMUL_SHARED_DIM_THRESHOLD,
|
|
99
|
+
w as batchMatMulGeluImpl,
|
|
100
|
+
W as batchMatMulKernel
|
|
168
101
|
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { r as p, e as G } from "../../index-
|
|
2
|
-
import { s as x } from "../../sum-
|
|
1
|
+
import { r as p, e as G } from "../../index-BAzbokzv.js";
|
|
2
|
+
import { s as x } from "../../sum-BE_Irnim.js";
|
|
3
3
|
class y {
|
|
4
4
|
variableNames = ["x", "meanSquare", "gamma"];
|
|
5
5
|
outputShape;
|
package/dist/ops/webgl/qkv.js
CHANGED
package/dist/ops/webgl/rope.js
CHANGED