npm - @genai-fi/nanogpt - Versions diffs - 0.8.3 → 0.8.5 - Mend

@genai-fi/nanogpt 0.8.3 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/checks/gelu.js +5 -5
package/dist/checks/index.d.ts +2 -0
package/dist/checks/index.js +9 -7
package/dist/checks/matMulGelu.d.ts +1 -0
package/dist/checks/matMulGelu.js +32 -0
package/dist/ops/webgl/gelu.js +8 -8
package/dist/ops/webgl/matMulGelu.js +47 -37
package/dist/ops/webgpu/gelu.js +20 -14
package/package.json +1 -1

package/dist/checks/gelu.js CHANGED Viewed

@@ -4,12 +4,12 @@ async function m(t) {
   await e(t);
   const r = s(
     [
-      [0.1, 0.2, 0, 0],
-      [0.1, 0.2, 0, 0],
-      [0, 0, 0, 0],
-      [0, 0, 0, 0]
+      [0.1, 0.2, 0, 0, 1230, 1232331234, -12234234],
+      [0.1, 0.2, 0, 0, -1230, -1232331234, 12234234],
+      [0, 0, 0, 0, -1, 0, 0],
+      [0, 0, 0, 0, -0.1, 1e-3, 0]
     ],
-    [4, 4]
+    [4, 7]
   );
   return await o().runKernel("Gelu", { x: r }).array();
 }

package/dist/checks/index.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { execute as gelu } from './gelu';
 import { execute as normRMSGrad } from './normRMSGrad';
 import { execute as appendCache } from './appendCache';
 import { execute as attentionMask } from './attentionMask';
+import { execute as matMulGelu } from './matMulGelu';
 import { default as runCheck } from './check';
 import { createWeightStatistics, createTensorStatistics } from './weights';
 declare const checks: {
@@ -15,6 +16,7 @@ declare const checks: {
     normRMSGrad: typeof normRMSGrad;
     appendCache: typeof appendCache;
     attentionMask: typeof attentionMask;
+    matMulGelu: typeof matMulGelu;
     runCheck: typeof runCheck;
     createLayerWeightStatistics: typeof createWeightStatistics;
     createWeightStatistics: typeof createTensorStatistics;

package/dist/checks/index.js CHANGED Viewed

@@ -4,9 +4,10 @@ import { execute as r } from "./qkv.js";
 import { execute as c } from "./gelu.js";
 import { execute as o } from "./normRMSGrad.js";
 import { execute as a } from "./appendCache.js";
-import { execute as i } from "./attentionMask.js";
-import m from "./check.js";
-import { createTensorStatistics as s, createWeightStatistics as u } from "./weights.js";
+import { execute as m } from "./attentionMask.js";
+import { execute as i } from "./matMulGelu.js";
+import s from "./check.js";
+import { createTensorStatistics as u, createWeightStatistics as x } from "./weights.js";
 const d = {
   rope: e,
   qkv: r,
@@ -14,10 +15,11 @@ const d = {
   normRMS: t,
   normRMSGrad: o,
   appendCache: a,
-  attentionMask: i,
-  runCheck: m,
-  createLayerWeightStatistics: u,
-  createWeightStatistics: s
+  attentionMask: m,
+  matMulGelu: i,
+  runCheck: s,
+  createLayerWeightStatistics: x,
+  createWeightStatistics: u
 };
 export {
   d as default

package/dist/checks/matMulGelu.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function execute(backend: string): Promise<number \| number[] \| number[][] \| number[][][] \| number[][][][] \| number[][][][][] \| number[][][][][][]>;

package/dist/checks/matMulGelu.js ADDED Viewed

@@ -0,0 +1,32 @@
+import { s as n, e as s } from "../index-DdmHGZjq.js";
+import "../random_width-DKGeiFuR.js";
+import "../register_all_kernels-Do9VvZmo.js";
+import "../index-Tf7vU29b.js";
+import "../dataset-DPPl-iLT.js";
+import { t as e } from "../tensor2d-CObBWBkW.js";
+async function f(t) {
+  await n(t);
+  const r = e(
+    [
+      [0.1, 0.2, 9, 10, 11],
+      [0.3, 0.4, -9, -10, -11],
+      [0.3, 0.4, -9, -10, -11],
+      [0.3, 0.4, -9, -10, -11],
+      [0.3, 0.4, -9, -10, -11]
+    ],
+    [5, 5]
+  ), o = e(
+    [
+      [0.5, 0.6, 7e4, -8e3, 0],
+      [0.7, 0.8, -7e4, 8e4, 0],
+      [0.7, 0.8, -7e4, 8e4, 0],
+      [0.7, 0.8, -7e4, 8e4, 0],
+      [0.7, 0.8, -7e4, 8e4, 0]
+    ],
+    [5, 5]
+  );
+  return await s().runKernel("MatMulGelu", { x: o, kernel: r }).array();
+}
+export {
+  f as execute
+};

package/dist/ops/webgl/gelu.js CHANGED Viewed

@@ -1,19 +1,19 @@
 import { f as a } from "../../index-DdmHGZjq.js";
-import { u as s, C as x } from "../../kernel_funcs_utils-CDfFpUab.js";
-const t = 0.7978845608028654, r = 0.044715, c = x + `
+import { u as s, C as i } from "../../kernel_funcs_utils-CDfFpUab.js";
+const t = 0.7978845608028654, r = 0.044715, c = i + `
     float x3 = x * x * x;
     float inner = x + ${r} * x3;
     inner = ${t} * inner;
-    inner = tanh(inner);
+    inner = abs(inner) > 15.0 ? sign(inner) : tanh(inner);
     inner = 0.5 * (1.0 + inner);
-    x = x * inner;
-    return x;
-`, d = s({ opSnippet: c }), i = {
+    inner = x * inner;
+    return inner;
+`, d = s({ opSnippet: c }), x = {
   kernelName: "Gelu",
   backendName: "webgl",
   kernelFunc: d
 };
-a(i);
+a(x);
 class f {
   // Inputs: dy, x
   variableNames = ["dy", "x"];
@@ -27,7 +27,7 @@ class f {
                 float x2 = x * x;
                 float x3 = x2 * x;
                 float u  = ${t} * (x + ${r} * x3);
-                float t  = tanh(u);
+                float t = abs(u) > 15.0 ? sign(u) : tanh(u);
                 float sech2 = 1.0 - t * t;
                 float du_dx = ${t} * (1.0 + 3.0 * ${r} * x2);
                 float dgelu = 0.5 * (1.0 + t) + 0.5 * x * sech2 * du_dx;

package/dist/ops/webgl/matMulGelu.js CHANGED Viewed

@@ -1,63 +1,73 @@
-import { f as _, t as R, e as C, j as A, l as N, n as H, u as O } from "../../index-DdmHGZjq.js";
+import { f as E, t as R, e as C, j as $, l as N, n as H, u as O } from "../../index-DdmHGZjq.js";
 import { r as f } from "../../Reshape-Bh_jzKzV.js";
 import { M as U } from "../../mulmat_packed_gpu-q_Gmwyld.js";
-import { m as E } from "../../mat_mul-Dpy2mMRu.js";
-const M = 0.7978845608028654, x = 0.044715, j = `
+import { m as A } from "../../mat_mul-Dpy2mMRu.js";
+const M = 0.7978845608028654, g = 0.044715, j = `
     vec4 x3 = x * x * x;
-    vec4 inner = x + ${x} * x3;
+    vec4 inner = x + ${g} * x3;
     inner = ${M} * inner;
-    inner = tanh(inner);
+    inner = vec4(
+        abs(inner[0]) > 15.0 ? sign(inner[0]) : tanh(inner[0]),
+        abs(inner[1]) > 15.0 ? sign(inner[1]) : tanh(inner[1]),
+        abs(inner[2]) > 15.0 ? sign(inner[2]) : tanh(inner[2]),
+        abs(inner[3]) > 15.0 ? sign(inner[3]) : tanh(inner[3])
+    );
     inner = 0.5 * (1.0 + inner);
     vec4 result = x * inner;
     return result;
 `, q = `
     vec4 a2 = a * a;
     vec4 a3 = a2 * a;
-    vec4 u  = ${M} * (a + ${x} * a3);
-    vec4 t  = tanh(u);
+    vec4 u  = ${M} * (a + ${g} * a3);
+    vec4 t = vec4(
+        abs(u[0]) > 15.0 ? sign(u[0]) : tanh(u[0]),
+        abs(u[1]) > 15.0 ? sign(u[1]) : tanh(u[1]),
+        abs(u[2]) > 15.0 ? sign(u[2]) : tanh(u[2]),
+        abs(u[3]) > 15.0 ? sign(u[3]) : tanh(u[3])
+    );
     vec4 sech2 = 1.0 - t * t;
-    vec4 du_dx = ${M} * (1.0 + 3.0 * ${x} * a2);
+    vec4 du_dx = ${M} * (1.0 + 3.0 * ${g} * a2);
     vec4 dgelu = 0.5 * (1.0 + t) + 0.5 * a * sech2 * du_dx;
     return dgelu * b;
-`, te = 1e3;
-function w({
+`, ne = 1e3;
+function _({
   a: e,
-  b: t,
+  b: n,
   transposeA: s,
-  transposeB: n,
+  transposeB: t,
   backend: a,
   activationSnippet: c,
   multiplier: o
 }) {
-  const r = e.shape.length, u = t.shape.length, i = s ? e.shape[r - 2] : e.shape[r - 1], p = n ? t.shape[u - 1] : t.shape[u - 2], h = s ? e.shape[r - 1] : e.shape[r - 2], l = n ? t.shape[u - 2] : t.shape[u - 1], K = e.shape.slice(0, -2), T = t.shape.slice(0, -2), d = A(K), m = A(T), b = N(e.shape.slice(0, -2), t.shape.slice(0, -2)).concat([h, l]);
+  const r = e.shape.length, i = n.shape.length, u = s ? e.shape[r - 2] : e.shape[r - 1], h = t ? n.shape[i - 1] : n.shape[i - 2], p = s ? e.shape[r - 1] : e.shape[r - 2], l = t ? n.shape[i - 2] : n.shape[i - 1], w = e.shape.slice(0, -2), K = n.shape.slice(0, -2), d = $(w), m = $(K), T = N(e.shape.slice(0, -2), n.shape.slice(0, -2)).concat([p, l]);
   H(
-    i === p,
-    () => `Error in matMul: inner shapes (${i}) and (${p}) of Tensors with shapes ${e.shape} and ${t.shape} and transposeA=${s} and transposeB=${n} must match.`
+    u === h,
+    () => `Error in matMul: inner shapes (${u}) and (${h}) of Tensors with shapes ${e.shape} and ${n.shape} and transposeA=${s} and transposeB=${t} must match.`
   );
-  const v = s ? [d, i, h] : [d, h, i], S = n ? [m, l, p] : [m, p, l], D = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), G = f({ inputs: { x: t }, backend: a, attrs: { shape: S } }), g = [D, G], y = Math.max(d, m), L = c, B = O(e.dtype, t.dtype), F = new U(
+  const v = s ? [d, u, p] : [d, p, u], x = t ? [m, l, h] : [m, h, l], S = f({ inputs: { x: e }, backend: a, attrs: { shape: v } }), b = f({ inputs: { x: n }, backend: a, attrs: { shape: x } }), D = [S, b], y = Math.max(d, m), L = c, B = O(e.dtype, n.dtype), F = new U(
     v,
-    S,
-    [y, h, l],
+    x,
+    [y, p, l],
     s,
-    n,
+    t,
     !1,
     L,
     !!o,
     !1
-  ), k = [D, G];
-  o && k.push(o);
-  const $ = a.runWebGLProgram(F, k, B), I = f({ inputs: { x: $ }, backend: a, attrs: { shape: b } });
-  g.push($);
-  for (const P of g)
+  ), G = [S, b];
+  o && G.push(o);
+  const k = a.runWebGLProgram(F, G, B), I = f({ inputs: { x: k }, backend: a, attrs: { shape: T } });
+  D.push(k);
+  for (const P of D)
     a.disposeIntermediateTensorInfo(P);
   return I;
 }
 function z(e) {
-  const { inputs: t, backend: s } = e, { x: n, kernel: a } = t;
-  if (n === void 0 || a === void 0)
+  const { inputs: n, backend: s } = e, { x: t, kernel: a } = n;
+  if (t === void 0 || a === void 0)
     throw new Error("BatchMatMul requires two input tensors.");
-  return w({
-    a: n,
+  return _({
+    a: t,
     b: a,
     transposeA: !1,
     transposeB: !1,
@@ -70,21 +80,21 @@ const W = {
   backendName: "webgl",
   kernelFunc: z
 };
-_(W);
+E(W);
 function J(e) {
-  const { dy: t, x: s, kernel: n } = e.inputs, a = e.backend;
+  const { dy: n, x: s, kernel: t } = e.inputs, a = e.backend;
   return R(() => {
     const c = C().makeTensorFromTensorInfo(
-      w({
+      _({
         a: s,
-        b: n,
+        b: t,
         transposeA: !1,
         transposeB: !1,
         backend: a,
         activationSnippet: q,
-        multiplier: t
+        multiplier: n
       })
-    ), o = E(c, n, !1, !0), r = E(s, c, !0, !1);
+    ), o = A(c, t, !1, !0), r = A(s, c, !0, !1);
     return [o, r];
   });
 }
@@ -93,9 +103,9 @@ const Q = {
   backendName: "webgl",
   kernelFunc: J
 };
-_(Q);
+E(Q);
 export {
-  te as MATMUL_SHARED_DIM_THRESHOLD,
-  w as batchMatMulGeluImpl,
+  ne as MATMUL_SHARED_DIM_THRESHOLD,
+  _ as batchMatMulGeluImpl,
   z as batchMatMulKernel
 };

package/dist/ops/webgpu/gelu.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { f as i } from "../../index-DdmHGZjq.js";
 import { g as o } from "../../webgpu_program-Dhk9R5aG.js";
 import { f as s, c as p } from "../../webgpu_util-BqGnZg8t.js";
-const u = 0.7978845608028654, a = 0.044715;
-class c {
+const a = 0.7978845608028654, u = 0.044715;
+class h {
   outputShape;
   shaderKey;
   dispatchLayout;
@@ -15,11 +15,14 @@ class c {
   }
   getUserCode() {
     return `
+      fn polyTanh(x: f32) -> f32 {
+         return select(tanh(x), sign(x), abs(x) > 15.0);
+      }
       fn unaryOperation(x : f32) -> f32 {
         let x3 = x * x * x;
-        var inner = fma(${a}, x3, x);
-        inner = ${u} * inner;
-        inner = tanh(inner);
+        var inner = fma(${u}, x3, x);
+        inner = ${a} * inner;
+        inner = polyTanh(inner);
         inner = 0.5 * (1.0 + inner);
         return x * inner;
       }
@@ -32,14 +35,14 @@ class c {
       `;
   }
 }
-function h(t) {
-  const { x: e } = t.inputs, n = t.backend, r = new c(e.shape);
+function c(t) {
+  const { x: e } = t.inputs, n = t.backend, r = new h(e.shape);
   return n.runWebGPUProgram(r, [e], "float32");
 }
 const l = {
   kernelName: "Gelu",
   backendName: "webgpu",
-  kernelFunc: h
+  kernelFunc: c
 };
 i(l);
 class x {
@@ -56,15 +59,18 @@ class x {
   }
   getUserCode() {
     return `
+            fn polyTanh(x: f32) -> f32 {
+                return select(tanh(x), sign(x), abs(x) > 15.0);
+            }
             ${o("index")} {
                 if (index < uniforms.size) {
                     let X  = getXByOutputIndex(index);
                     let x2 = X * X;
                     let x3 = x2 * X;
-                    let u  = ${u} * (X + ${a} * x3);
-                    let t  = tanh(u);
+                    let u  = ${a} * (X + ${u} * x3);
+                    let t  = polyTanh(u);
                     let sech2 = 1.0 - t * t;
-                    let du_dx = ${u} * (1.0 + 3.0 * ${a} * x2);
+                    let du_dx = ${a} * (1.0 + 3.0 * ${u} * x2);
                     let dgelu = 0.5 * (1.0 + t) + 0.5 * X * sech2 * du_dx;
                     let DY = getDyByOutputIndex(index);
                     setOutputAtIndex(index, DY * dgelu);
@@ -76,12 +82,12 @@ function g(t) {
   const { dy: e, x: n } = t.inputs, r = t.backend, d = new x(n.shape);
   return r.runWebGPUProgram(d, [e, n], "float32");
 }
-const m = {
+const f = {
   kernelName: "GeluGrad",
   backendName: "webgpu",
   kernelFunc: g
 };
-i(m);
+i(f);
 export {
-  c as GeluProgram
+  h as GeluProgram
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@genai-fi/nanogpt",
-    "version": "0.8.3",
+    "version": "0.8.5",
     "type": "module",
     "main": "dist/main.js",
     "types": "dist/main.d.ts",