@genai-fi/nanogpt 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,17 @@
1
1
  import { f as a } from "../../index-DdmHGZjq.js";
2
- import { u as s, C as c } from "../../kernel_funcs_utils-CDfFpUab.js";
3
- const t = 0.7978845608028654, r = 0.044715, d = c + `
2
+ import { u as s, C as i } from "../../kernel_funcs_utils-CDfFpUab.js";
3
+ const t = 0.7978845608028654, r = 0.044715, c = i + `
4
4
  float x3 = x * x * x;
5
5
  float inner = x + ${r} * x3;
6
6
  inner = ${t} * inner;
7
- inner = tanh(inner);
7
+ inner = abs(inner) > 15.0 ? sign(inner) : tanh(inner);
8
8
  inner = 0.5 * (1.0 + inner);
9
9
  inner = x * inner;
10
10
  return inner;
11
- `, i = s({ opSnippet: d }), x = {
11
+ `, d = s({ opSnippet: c }), x = {
12
12
  kernelName: "Gelu",
13
13
  backendName: "webgl",
14
- kernelFunc: i
14
+ kernelFunc: d
15
15
  };
16
16
  a(x);
17
17
  class f {
@@ -27,7 +27,7 @@ class f {
27
27
  float x2 = x * x;
28
28
  float x3 = x2 * x;
29
29
  float u = ${t} * (x + ${r} * x3);
30
- float t = tanh(u);
30
+ float t = abs(u) > 15.0 ? sign(u) : tanh(u);
31
31
  float sech2 = 1.0 - t * t;
32
32
  float du_dx = ${t} * (1.0 + 3.0 * ${r} * x2);
33
33
  float dgelu = 0.5 * (1.0 + t) + 0.5 * x * sech2 * du_dx;
@@ -46,5 +46,5 @@ const p = {
46
46
  };
47
47
  a(p);
48
48
  export {
49
- i as gelu
49
+ d as gelu
50
50
  };
@@ -1,63 +1,73 @@
1
- import { f as _, t as R, e as C, j as A, l as N, n as H, u as O } from "../../index-DdmHGZjq.js";
1
+ import { f as E, t as R, e as C, j as $, l as N, n as H, u as O } from "../../index-DdmHGZjq.js";
2
2
  import { r as f } from "../../Reshape-Bh_jzKzV.js";
3
3
  import { M as U } from "../../mulmat_packed_gpu-q_Gmwyld.js";
4
- import { m as E } from "../../mat_mul-Dpy2mMRu.js";
5
- const M = 0.7978845608028654, x = 0.044715, j = `
4
+ import { m as A } from "../../mat_mul-Dpy2mMRu.js";
5
+ const M = 0.7978845608028654, g = 0.044715, j = `
6
6
  vec4 x3 = x * x * x;
7
- vec4 inner = x + ${x} * x3;
7
+ vec4 inner = x + ${g} * x3;
8
8
  inner = ${M} * inner;
9
- inner = tanh(inner);
9
+ inner = vec4(
10
+ abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
11
+ abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
12
+ abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
13
+ abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
14
+ );
10
15
  inner = 0.5 * (1.0 + inner);
11
16
  vec4 result = x * inner;
12
17
  return result;
13
18
  `, q = `
14
19
  vec4 a2 = a * a;
15
20
  vec4 a3 = a2 * a;
16
- vec4 u = ${M} * (a + ${x} * a3);
17
- vec4 t = tanh(u);
21
+ vec4 u = ${M} * (a + ${g} * a3);
22
+ vec4 t = vec4(
23
+ abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
24
+ abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
25
+ abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
26
+ abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
27
+ );
18
28
  vec4 sech2 = 1.0 - t * t;
19
- vec4 du_dx = ${M} * (1.0 + 3.0 * ${x} * a2);
29
+ vec4 du_dx = ${M} * (1.0 + 3.0 * ${g} * a2);
20
30
  vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
21
31
  return dgelu * b;
22
- `, te = 1e3;
23
- function w({
32
+ `, ne = 1e3;
33
+ function _({
24
34
  a: e,
25
- b: t,
35
+ b: n,
26
36
  transposeA: s,
27
- transposeB: n,
37
+ transposeB: t,
28
38
  backend: a,
29
39
  activationSnippet: c,
30
40
  multiplier: o
31
41
  }) {
32
- const r = e.shape.length, u = t.shape.length, i = s ? e.shape[r - 2] : e.shape[r - 1], p = n ? t.shape[u - 1] : t.shape[u - 2], h = s ? e.shape[r - 1] : e.shape[r - 2], l = n ? t.shape[u - 2] : t.shape[u - 1], K = e.shape.slice(0, -2), T = t.shape.slice(0, -2), d = A(K), m = A(T), b = N(e.shape.slice(0, -2), t.shape.slice(0, -2)).concat([h, l]);
42
+ const r = e.shape.length, i = n.shape.length, u = s ? e.shape[r - 2] : e.shape[r - 1], h = t ? n.shape[i - 1] : n.shape[i - 2], p = s ? e.shape[r - 1] : e.shape[r - 2], l = t ? n.shape[i - 2] : n.shape[i - 1], w = e.shape.slice(0, -2), K = n.shape.slice(0, -2), d = $(w), m = $(K), T = N(e.shape.slice(0, -2), n.shape.slice(0, -2)).concat([p, l]);
33
43
  H(
34
- i === p,
35
- () => `Error in matMul: inner shapes (${i}) and (${p}) of Tensors with shapes ${e.shape} and ${t.shape} and transposeA=${s} and transposeB=${n} must match.`
44
+ u === h,
45
+ () => `Error in matMul: inner shapes (${u}) and (${h}) of Tensors with shapes ${e.shape} and ${n.shape} and transposeA=${s} and transposeB=${t} must match.`
36
46
  );
37
- const v = s ? [d, i, h] : [d, h, i], S = n ? [m, l, p] : [m, p, l], D = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), G = f({ inputs: { x: t }, backend: a, attrs: { shape: S } }), g = [D, G], y = Math.max(d, m), L = c, B = O(e.dtype, t.dtype), F = new U(
47
+ const v = s ? [d, u, p] : [d, p, u], x = t ? [m, l, h] : [m, h, l], S = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), b = f({ inputs: { x: n }, backend: a, attrs: { shape: x } }), D = [S, b], y = Math.max(d, m), L = c, B = O(e.dtype, n.dtype), F = new U(
38
48
  v,
39
- S,
40
- [y, h, l],
49
+ x,
50
+ [y, p, l],
41
51
  s,
42
- n,
52
+ t,
43
53
  !1,
44
54
  L,
45
55
  !!o,
46
56
  !1
47
- ), k = [D, G];
48
- o && k.push(o);
49
- const $ = a.runWebGLProgram(F, k, B), I = f({ inputs: { x: $ }, backend: a, attrs: { shape: b } });
50
- g.push($);
51
- for (const P of g)
57
+ ), G = [S, b];
58
+ o && G.push(o);
59
+ const k = a.runWebGLProgram(F, G, B), I = f({ inputs: { x: k }, backend: a, attrs: { shape: T } });
60
+ D.push(k);
61
+ for (const P of D)
52
62
  a.disposeIntermediateTensorInfo(P);
53
63
  return I;
54
64
  }
55
65
  function z(e) {
56
- const { inputs: t, backend: s } = e, { x: n, kernel: a } = t;
57
- if (n === void 0 || a === void 0)
66
+ const { inputs: n, backend: s } = e, { x: t, kernel: a } = n;
67
+ if (t === void 0 || a === void 0)
58
68
  throw new Error("BatchMatMul requires two input tensors.");
59
- return w({
60
- a: n,
69
+ return _({
70
+ a: t,
61
71
  b: a,
62
72
  transposeA: !1,
63
73
  transposeB: !1,
@@ -70,21 +80,21 @@ const W = {
70
80
  backendName: "webgl",
71
81
  kernelFunc: z
72
82
  };
73
- _(W);
83
+ E(W);
74
84
  function J(e) {
75
- const { dy: t, x: s, kernel: n } = e.inputs, a = e.backend;
85
+ const { dy: n, x: s, kernel: t } = e.inputs, a = e.backend;
76
86
  return R(() => {
77
87
  const c = C().makeTensorFromTensorInfo(
78
- w({
88
+ _({
79
89
  a: s,
80
- b: n,
90
+ b: t,
81
91
  transposeA: !1,
82
92
  transposeB: !1,
83
93
  backend: a,
84
94
  activationSnippet: q,
85
- multiplier: t
95
+ multiplier: n
86
96
  })
87
- ), o = E(c, n, !1, !0), r = E(s, c, !0, !1);
97
+ ), o = A(c, t, !1, !0), r = A(s, c, !0, !1);
88
98
  return [o, r];
89
99
  });
90
100
  }
@@ -93,9 +103,9 @@ const Q = {
93
103
  backendName: "webgl",
94
104
  kernelFunc: J
95
105
  };
96
- _(Q);
106
+ E(Q);
97
107
  export {
98
- te as MATMUL_SHARED_DIM_THRESHOLD,
99
- w as batchMatMulGeluImpl,
108
+ ne as MATMUL_SHARED_DIM_THRESHOLD,
109
+ _ as batchMatMulGeluImpl,
100
110
  z as batchMatMulKernel
101
111
  };
@@ -1,8 +1,8 @@
1
1
  import { f as i } from "../../index-DdmHGZjq.js";
2
2
  import { g as o } from "../../webgpu_program-Dhk9R5aG.js";
3
3
  import { f as s, c as p } from "../../webgpu_util-BqGnZg8t.js";
4
- const u = 0.7978845608028654, a = 0.044715;
5
- class c {
4
+ const a = 0.7978845608028654, u = 0.044715;
5
+ class h {
6
6
  outputShape;
7
7
  shaderKey;
8
8
  dispatchLayout;
@@ -15,11 +15,14 @@ class c {
15
15
  }
16
16
  getUserCode() {
17
17
  return `
18
+ fn polyTanh(x: f32) -> f32 {
19
+ return select(tanh(x), sign(x), abs(x) > 15.0);
20
+ }
18
21
  fn unaryOperation(x : f32) -> f32 {
19
22
  let x3 = x * x * x;
20
- var inner = fma(${a}, x3, x);
21
- inner = ${u} * inner;
22
- inner = tanh(inner);
23
+ var inner = fma(${u}, x3, x);
24
+ inner = ${a} * inner;
25
+ inner = polyTanh(inner);
23
26
  inner = 0.5 * (1.0 + inner);
24
27
  return x * inner;
25
28
  }
@@ -32,14 +35,14 @@ class c {
32
35
  `;
33
36
  }
34
37
  }
35
- function h(t) {
36
- const { x: e } = t.inputs, n = t.backend, r = new c(e.shape);
38
+ function c(t) {
39
+ const { x: e } = t.inputs, n = t.backend, r = new h(e.shape);
37
40
  return n.runWebGPUProgram(r, [e], "float32");
38
41
  }
39
42
  const l = {
40
43
  kernelName: "Gelu",
41
44
  backendName: "webgpu",
42
- kernelFunc: h
45
+ kernelFunc: c
43
46
  };
44
47
  i(l);
45
48
  class x {
@@ -56,15 +59,18 @@ class x {
56
59
  }
57
60
  getUserCode() {
58
61
  return `
62
+ fn polyTanh(x: f32) -> f32 {
63
+ return select(tanh(x), sign(x), abs(x) > 15.0);
64
+ }
59
65
  ${o("index")} {
60
66
  if (index < uniforms.size) {
61
67
  let X = getXByOutputIndex(index);
62
68
  let x2 = X * X;
63
69
  let x3 = x2 * X;
64
- let u = ${u} * (X + ${a} * x3);
65
- let t = tanh(u);
70
+ let u = ${a} * (X + ${u} * x3);
71
+ let t = polyTanh(u);
66
72
  let sech2 = 1.0 - t * t;
67
- let du_dx = ${u} * (1.0 + 3.0 * ${a} * x2);
73
+ let du_dx = ${a} * (1.0 + 3.0 * ${u} * x2);
68
74
  let dgelu = 0.5 * (1.0 + t) + 0.5 * X * sech2 * du_dx;
69
75
  let DY = getDyByOutputIndex(index);
70
76
  setOutputAtIndex(index, DY * dgelu);
@@ -76,12 +82,12 @@ function g(t) {
76
82
  const { dy: e, x: n } = t.inputs, r = t.backend, d = new x(n.shape);
77
83
  return r.runWebGPUProgram(d, [e, n], "float32");
78
84
  }
79
- const m = {
85
+ const f = {
80
86
  kernelName: "GeluGrad",
81
87
  backendName: "webgpu",
82
88
  kernelFunc: g
83
89
  };
84
- i(m);
90
+ i(f);
85
91
  export {
86
- c as GeluProgram
92
+ h as GeluProgram
87
93
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.8.4",
3
+ "version": "0.8.5",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",