@genai-fi/nanogpt 0.8.3 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/checks/gelu.js +5 -5
- package/dist/checks/index.d.ts +2 -0
- package/dist/checks/index.js +9 -7
- package/dist/checks/matMulGelu.d.ts +1 -0
- package/dist/checks/matMulGelu.js +32 -0
- package/dist/ops/webgl/gelu.js +8 -8
- package/dist/ops/webgl/matMulGelu.js +47 -37
- package/dist/ops/webgpu/gelu.js +20 -14
- package/package.json +1 -1
package/dist/checks/gelu.js
CHANGED
|
@@ -4,12 +4,12 @@ async function m(t) {
|
|
|
4
4
|
await e(t);
|
|
5
5
|
const r = s(
|
|
6
6
|
[
|
|
7
|
-
[0.1, 0.2, 0, 0],
|
|
8
|
-
[0.1, 0.2, 0, 0],
|
|
9
|
-
[0, 0, 0, 0],
|
|
10
|
-
[0, 0, 0, 0]
|
|
7
|
+
[0.1, 0.2, 0, 0, 1230, 1232331234, -12234234],
|
|
8
|
+
[0.1, 0.2, 0, 0, -1230, -1232331234, 12234234],
|
|
9
|
+
[0, 0, 0, 0, -1, 0, 0],
|
|
10
|
+
[0, 0, 0, 0, -0.1, 1e-3, 0]
|
|
11
11
|
],
|
|
12
|
-
[4,
|
|
12
|
+
[4, 7]
|
|
13
13
|
);
|
|
14
14
|
return await o().runKernel("Gelu", { x: r }).array();
|
|
15
15
|
}
|
package/dist/checks/index.d.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { execute as gelu } from './gelu';
|
|
|
5
5
|
import { execute as normRMSGrad } from './normRMSGrad';
|
|
6
6
|
import { execute as appendCache } from './appendCache';
|
|
7
7
|
import { execute as attentionMask } from './attentionMask';
|
|
8
|
+
import { execute as matMulGelu } from './matMulGelu';
|
|
8
9
|
import { default as runCheck } from './check';
|
|
9
10
|
import { createWeightStatistics, createTensorStatistics } from './weights';
|
|
10
11
|
declare const checks: {
|
|
@@ -15,6 +16,7 @@ declare const checks: {
|
|
|
15
16
|
normRMSGrad: typeof normRMSGrad;
|
|
16
17
|
appendCache: typeof appendCache;
|
|
17
18
|
attentionMask: typeof attentionMask;
|
|
19
|
+
matMulGelu: typeof matMulGelu;
|
|
18
20
|
runCheck: typeof runCheck;
|
|
19
21
|
createLayerWeightStatistics: typeof createWeightStatistics;
|
|
20
22
|
createWeightStatistics: typeof createTensorStatistics;
|
package/dist/checks/index.js
CHANGED
|
@@ -4,9 +4,10 @@ import { execute as r } from "./qkv.js";
|
|
|
4
4
|
import { execute as c } from "./gelu.js";
|
|
5
5
|
import { execute as o } from "./normRMSGrad.js";
|
|
6
6
|
import { execute as a } from "./appendCache.js";
|
|
7
|
-
import { execute as
|
|
8
|
-
import
|
|
9
|
-
import
|
|
7
|
+
import { execute as m } from "./attentionMask.js";
|
|
8
|
+
import { execute as i } from "./matMulGelu.js";
|
|
9
|
+
import s from "./check.js";
|
|
10
|
+
import { createTensorStatistics as u, createWeightStatistics as x } from "./weights.js";
|
|
10
11
|
const d = {
|
|
11
12
|
rope: e,
|
|
12
13
|
qkv: r,
|
|
@@ -14,10 +15,11 @@ const d = {
|
|
|
14
15
|
normRMS: t,
|
|
15
16
|
normRMSGrad: o,
|
|
16
17
|
appendCache: a,
|
|
17
|
-
attentionMask:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
attentionMask: m,
|
|
19
|
+
matMulGelu: i,
|
|
20
|
+
runCheck: s,
|
|
21
|
+
createLayerWeightStatistics: x,
|
|
22
|
+
createWeightStatistics: u
|
|
21
23
|
};
|
|
22
24
|
export {
|
|
23
25
|
d as default
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][]>;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { s as n, e as s } from "../index-DdmHGZjq.js";
|
|
2
|
+
import "../random_width-DKGeiFuR.js";
|
|
3
|
+
import "../register_all_kernels-Do9VvZmo.js";
|
|
4
|
+
import "../index-Tf7vU29b.js";
|
|
5
|
+
import "../dataset-DPPl-iLT.js";
|
|
6
|
+
import { t as e } from "../tensor2d-CObBWBkW.js";
|
|
7
|
+
async function f(t) {
|
|
8
|
+
await n(t);
|
|
9
|
+
const r = e(
|
|
10
|
+
[
|
|
11
|
+
[0.1, 0.2, 9, 10, 11],
|
|
12
|
+
[0.3, 0.4, -9, -10, -11],
|
|
13
|
+
[0.3, 0.4, -9, -10, -11],
|
|
14
|
+
[0.3, 0.4, -9, -10, -11],
|
|
15
|
+
[0.3, 0.4, -9, -10, -11]
|
|
16
|
+
],
|
|
17
|
+
[5, 5]
|
|
18
|
+
), o = e(
|
|
19
|
+
[
|
|
20
|
+
[0.5, 0.6, 7e4, -8e3, 0],
|
|
21
|
+
[0.7, 0.8, -7e4, 8e4, 0],
|
|
22
|
+
[0.7, 0.8, -7e4, 8e4, 0],
|
|
23
|
+
[0.7, 0.8, -7e4, 8e4, 0],
|
|
24
|
+
[0.7, 0.8, -7e4, 8e4, 0]
|
|
25
|
+
],
|
|
26
|
+
[5, 5]
|
|
27
|
+
);
|
|
28
|
+
return await s().runKernel("MatMulGelu", { x: o, kernel: r }).array();
|
|
29
|
+
}
|
|
30
|
+
export {
|
|
31
|
+
f as execute
|
|
32
|
+
};
|
package/dist/ops/webgl/gelu.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import { f as a } from "../../index-DdmHGZjq.js";
|
|
2
|
-
import { u as s, C as
|
|
3
|
-
const t = 0.7978845608028654, r = 0.044715, c =
|
|
2
|
+
import { u as s, C as i } from "../../kernel_funcs_utils-CDfFpUab.js";
|
|
3
|
+
const t = 0.7978845608028654, r = 0.044715, c = i + `
|
|
4
4
|
float x3 = x * x * x;
|
|
5
5
|
float inner = x + ${r} * x3;
|
|
6
6
|
inner = ${t} * inner;
|
|
7
|
-
inner = tanh(inner);
|
|
7
|
+
inner = abs(inner) > 15.0 ? sign(inner) : tanh(inner);
|
|
8
8
|
inner = 0.5 * (1.0 + inner);
|
|
9
|
-
|
|
10
|
-
return
|
|
11
|
-
`, d = s({ opSnippet: c }),
|
|
9
|
+
inner = x * inner;
|
|
10
|
+
return inner;
|
|
11
|
+
`, d = s({ opSnippet: c }), x = {
|
|
12
12
|
kernelName: "Gelu",
|
|
13
13
|
backendName: "webgl",
|
|
14
14
|
kernelFunc: d
|
|
15
15
|
};
|
|
16
|
-
a(
|
|
16
|
+
a(x);
|
|
17
17
|
class f {
|
|
18
18
|
// Inputs: dy, x
|
|
19
19
|
variableNames = ["dy", "x"];
|
|
@@ -27,7 +27,7 @@ class f {
|
|
|
27
27
|
float x2 = x * x;
|
|
28
28
|
float x3 = x2 * x;
|
|
29
29
|
float u = ${t} * (x + ${r} * x3);
|
|
30
|
-
float t
|
|
30
|
+
float t = abs(u) > 15.0 ? sign(u) : tanh(u);
|
|
31
31
|
float sech2 = 1.0 - t * t;
|
|
32
32
|
float du_dx = ${t} * (1.0 + 3.0 * ${r} * x2);
|
|
33
33
|
float dgelu = 0.5 * (1.0 + t) + 0.5 * x * sech2 * du_dx;
|
|
@@ -1,63 +1,73 @@
|
|
|
1
|
-
import { f as
|
|
1
|
+
import { f as E, t as R, e as C, j as $, l as N, n as H, u as O } from "../../index-DdmHGZjq.js";
|
|
2
2
|
import { r as f } from "../../Reshape-Bh_jzKzV.js";
|
|
3
3
|
import { M as U } from "../../mulmat_packed_gpu-q_Gmwyld.js";
|
|
4
|
-
import { m as
|
|
5
|
-
const M = 0.7978845608028654,
|
|
4
|
+
import { m as A } from "../../mat_mul-Dpy2mMRu.js";
|
|
5
|
+
const M = 0.7978845608028654, g = 0.044715, j = `
|
|
6
6
|
vec4 x3 = x * x * x;
|
|
7
|
-
vec4 inner = x + ${
|
|
7
|
+
vec4 inner = x + ${g} * x3;
|
|
8
8
|
inner = ${M} * inner;
|
|
9
|
-
inner =
|
|
9
|
+
inner = vec4(
|
|
10
|
+
abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
|
|
11
|
+
abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
|
|
12
|
+
abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
|
|
13
|
+
abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
|
|
14
|
+
);
|
|
10
15
|
inner = 0.5 * (1.0 + inner);
|
|
11
16
|
vec4 result = x * inner;
|
|
12
17
|
return result;
|
|
13
18
|
`, q = `
|
|
14
19
|
vec4 a2 = a * a;
|
|
15
20
|
vec4 a3 = a2 * a;
|
|
16
|
-
vec4 u = ${M} * (a + ${
|
|
17
|
-
vec4 t
|
|
21
|
+
vec4 u = ${M} * (a + ${g} * a3);
|
|
22
|
+
vec4 t = vec4(
|
|
23
|
+
abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
|
|
24
|
+
abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
|
|
25
|
+
abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
|
|
26
|
+
abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
|
|
27
|
+
);
|
|
18
28
|
vec4 sech2 = 1.0 - t * t;
|
|
19
|
-
vec4 du_dx = ${M} * (1.0 + 3.0 * ${
|
|
29
|
+
vec4 du_dx = ${M} * (1.0 + 3.0 * ${g} * a2);
|
|
20
30
|
vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
|
|
21
31
|
return dgelu * b;
|
|
22
|
-
`,
|
|
23
|
-
function
|
|
32
|
+
`, ne = 1e3;
|
|
33
|
+
function _({
|
|
24
34
|
a: e,
|
|
25
|
-
b:
|
|
35
|
+
b: n,
|
|
26
36
|
transposeA: s,
|
|
27
|
-
transposeB:
|
|
37
|
+
transposeB: t,
|
|
28
38
|
backend: a,
|
|
29
39
|
activationSnippet: c,
|
|
30
40
|
multiplier: o
|
|
31
41
|
}) {
|
|
32
|
-
const r = e.shape.length,
|
|
42
|
+
const r = e.shape.length, i = n.shape.length, u = s ? e.shape[r - 2] : e.shape[r - 1], h = t ? n.shape[i - 1] : n.shape[i - 2], p = s ? e.shape[r - 1] : e.shape[r - 2], l = t ? n.shape[i - 2] : n.shape[i - 1], w = e.shape.slice(0, -2), K = n.shape.slice(0, -2), d = $(w), m = $(K), T = N(e.shape.slice(0, -2), n.shape.slice(0, -2)).concat([p, l]);
|
|
33
43
|
H(
|
|
34
|
-
|
|
35
|
-
() => `Error in matMul: inner shapes (${
|
|
44
|
+
u === h,
|
|
45
|
+
() => `Error in matMul: inner shapes (${u}) and (${h}) of Tensors with shapes ${e.shape} and ${n.shape} and transposeA=${s} and transposeB=${t} must match.`
|
|
36
46
|
);
|
|
37
|
-
const v = s ? [d,
|
|
47
|
+
const v = s ? [d, u, p] : [d, p, u], x = t ? [m, l, h] : [m, h, l], S = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), b = f({ inputs: { x: n }, backend: a, attrs: { shape: x } }), D = [S, b], y = Math.max(d, m), L = c, B = O(e.dtype, n.dtype), F = new U(
|
|
38
48
|
v,
|
|
39
|
-
|
|
40
|
-
[y,
|
|
49
|
+
x,
|
|
50
|
+
[y, p, l],
|
|
41
51
|
s,
|
|
42
|
-
|
|
52
|
+
t,
|
|
43
53
|
!1,
|
|
44
54
|
L,
|
|
45
55
|
!!o,
|
|
46
56
|
!1
|
|
47
|
-
),
|
|
48
|
-
o &&
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
for (const P of
|
|
57
|
+
), G = [S, b];
|
|
58
|
+
o && G.push(o);
|
|
59
|
+
const k = a.runWebGLProgram(F, G, B), I = f({ inputs: { x: k }, backend: a, attrs: { shape: T } });
|
|
60
|
+
D.push(k);
|
|
61
|
+
for (const P of D)
|
|
52
62
|
a.disposeIntermediateTensorInfo(P);
|
|
53
63
|
return I;
|
|
54
64
|
}
|
|
55
65
|
function z(e) {
|
|
56
|
-
const { inputs:
|
|
57
|
-
if (
|
|
66
|
+
const { inputs: n, backend: s } = e, { x: t, kernel: a } = n;
|
|
67
|
+
if (t === void 0 || a === void 0)
|
|
58
68
|
throw new Error("BatchMatMul requires two input tensors.");
|
|
59
|
-
return
|
|
60
|
-
a:
|
|
69
|
+
return _({
|
|
70
|
+
a: t,
|
|
61
71
|
b: a,
|
|
62
72
|
transposeA: !1,
|
|
63
73
|
transposeB: !1,
|
|
@@ -70,21 +80,21 @@ const W = {
|
|
|
70
80
|
backendName: "webgl",
|
|
71
81
|
kernelFunc: z
|
|
72
82
|
};
|
|
73
|
-
|
|
83
|
+
E(W);
|
|
74
84
|
function J(e) {
|
|
75
|
-
const { dy:
|
|
85
|
+
const { dy: n, x: s, kernel: t } = e.inputs, a = e.backend;
|
|
76
86
|
return R(() => {
|
|
77
87
|
const c = C().makeTensorFromTensorInfo(
|
|
78
|
-
|
|
88
|
+
_({
|
|
79
89
|
a: s,
|
|
80
|
-
b:
|
|
90
|
+
b: t,
|
|
81
91
|
transposeA: !1,
|
|
82
92
|
transposeB: !1,
|
|
83
93
|
backend: a,
|
|
84
94
|
activationSnippet: q,
|
|
85
|
-
multiplier:
|
|
95
|
+
multiplier: n
|
|
86
96
|
})
|
|
87
|
-
), o =
|
|
97
|
+
), o = A(c, t, !1, !0), r = A(s, c, !0, !1);
|
|
88
98
|
return [o, r];
|
|
89
99
|
});
|
|
90
100
|
}
|
|
@@ -93,9 +103,9 @@ const Q = {
|
|
|
93
103
|
backendName: "webgl",
|
|
94
104
|
kernelFunc: J
|
|
95
105
|
};
|
|
96
|
-
|
|
106
|
+
E(Q);
|
|
97
107
|
export {
|
|
98
|
-
|
|
99
|
-
|
|
108
|
+
ne as MATMUL_SHARED_DIM_THRESHOLD,
|
|
109
|
+
_ as batchMatMulGeluImpl,
|
|
100
110
|
z as batchMatMulKernel
|
|
101
111
|
};
|
package/dist/ops/webgpu/gelu.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { f as i } from "../../index-DdmHGZjq.js";
|
|
2
2
|
import { g as o } from "../../webgpu_program-Dhk9R5aG.js";
|
|
3
3
|
import { f as s, c as p } from "../../webgpu_util-BqGnZg8t.js";
|
|
4
|
-
const
|
|
5
|
-
class
|
|
4
|
+
const a = 0.7978845608028654, u = 0.044715;
|
|
5
|
+
class h {
|
|
6
6
|
outputShape;
|
|
7
7
|
shaderKey;
|
|
8
8
|
dispatchLayout;
|
|
@@ -15,11 +15,14 @@ class c {
|
|
|
15
15
|
}
|
|
16
16
|
getUserCode() {
|
|
17
17
|
return `
|
|
18
|
+
fn polyTanh(x: f32) -> f32 {
|
|
19
|
+
return select(tanh(x), sign(x), abs(x) > 15.0);
|
|
20
|
+
}
|
|
18
21
|
fn unaryOperation(x : f32) -> f32 {
|
|
19
22
|
let x3 = x * x * x;
|
|
20
|
-
var inner = fma(${
|
|
21
|
-
inner = ${
|
|
22
|
-
inner =
|
|
23
|
+
var inner = fma(${u}, x3, x);
|
|
24
|
+
inner = ${a} * inner;
|
|
25
|
+
inner = polyTanh(inner);
|
|
23
26
|
inner = 0.5 * (1.0 + inner);
|
|
24
27
|
return x * inner;
|
|
25
28
|
}
|
|
@@ -32,14 +35,14 @@ class c {
|
|
|
32
35
|
`;
|
|
33
36
|
}
|
|
34
37
|
}
|
|
35
|
-
function
|
|
36
|
-
const { x: e } = t.inputs, n = t.backend, r = new
|
|
38
|
+
function c(t) {
|
|
39
|
+
const { x: e } = t.inputs, n = t.backend, r = new h(e.shape);
|
|
37
40
|
return n.runWebGPUProgram(r, [e], "float32");
|
|
38
41
|
}
|
|
39
42
|
const l = {
|
|
40
43
|
kernelName: "Gelu",
|
|
41
44
|
backendName: "webgpu",
|
|
42
|
-
kernelFunc:
|
|
45
|
+
kernelFunc: c
|
|
43
46
|
};
|
|
44
47
|
i(l);
|
|
45
48
|
class x {
|
|
@@ -56,15 +59,18 @@ class x {
|
|
|
56
59
|
}
|
|
57
60
|
getUserCode() {
|
|
58
61
|
return `
|
|
62
|
+
fn polyTanh(x: f32) -> f32 {
|
|
63
|
+
return select(tanh(x), sign(x), abs(x) > 15.0);
|
|
64
|
+
}
|
|
59
65
|
${o("index")} {
|
|
60
66
|
if (index < uniforms.size) {
|
|
61
67
|
let X = getXByOutputIndex(index);
|
|
62
68
|
let x2 = X * X;
|
|
63
69
|
let x3 = x2 * X;
|
|
64
|
-
let u = ${
|
|
65
|
-
let t =
|
|
70
|
+
let u = ${a} * (X + ${u} * x3);
|
|
71
|
+
let t = polyTanh(u);
|
|
66
72
|
let sech2 = 1.0 - t * t;
|
|
67
|
-
let du_dx = ${
|
|
73
|
+
let du_dx = ${a} * (1.0 + 3.0 * ${u} * x2);
|
|
68
74
|
let dgelu = 0.5 * (1.0 + t) + 0.5 * X * sech2 * du_dx;
|
|
69
75
|
let DY = getDyByOutputIndex(index);
|
|
70
76
|
setOutputAtIndex(index, DY * dgelu);
|
|
@@ -76,12 +82,12 @@ function g(t) {
|
|
|
76
82
|
const { dy: e, x: n } = t.inputs, r = t.backend, d = new x(n.shape);
|
|
77
83
|
return r.runWebGPUProgram(d, [e, n], "float32");
|
|
78
84
|
}
|
|
79
|
-
const
|
|
85
|
+
const f = {
|
|
80
86
|
kernelName: "GeluGrad",
|
|
81
87
|
backendName: "webgpu",
|
|
82
88
|
kernelFunc: g
|
|
83
89
|
};
|
|
84
|
-
i(
|
|
90
|
+
i(f);
|
|
85
91
|
export {
|
|
86
|
-
|
|
92
|
+
h as GeluProgram
|
|
87
93
|
};
|