@genai-fi/nanogpt 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ import { engine as l } from "@tensorflow/tfjs";
2
+ import { r as u, b as k, s as d } from "../index-Dsg28SG6.js";
3
+ import { m as p } from "../mat_mul-BAYDrXvE.js";
4
+ class f {
5
+ variableNames = ["q", "k", "mask"];
6
+ outputShape;
7
+ userCode;
8
+ // enableShapeUniforms = true;
9
+ customUniforms = [{ name: "divisor", type: "float" }];
10
+ constructor(s, n, e, a) {
11
+ this.outputShape = [s, n, e, e], this.userCode = `
12
+ void main() {
13
+ ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
14
+ int b = coords.x;
15
+ int h = coords.y;
16
+ int t1 = coords.z;
17
+ int t2 = coords.w;
18
+
19
+ float sum = 0.0;
20
+ for (int i = 0; i < ${a}; ++i) {
21
+ float qv = getQ(b, h, t1, i);
22
+ float kv = getK(b, h, t2, i); // k is transposed on last two dims
23
+ sum += qv * kv;
24
+ }
25
+
26
+ // Scale by divisor
27
+ float scaled = sum * divisor;
28
+
29
+ // Add mask
30
+ float maskVal = getMask(t1, t2); // mask is [T,T]
31
+
32
+ setOutput(scaled + maskVal);
33
+ }
34
+ `;
35
+ }
36
+ }
37
+ function h(t) {
38
+ const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = t.backend, r = s.shape[0], i = s.shape[2], c = s.shape[1], m = new f(r, c, i, s.shape[3]);
39
+ return o.runWebGLProgram(m, [s, n, e], "float32", [[a]]);
40
+ }
41
+ const v = {
42
+ kernelName: "AttentionMask",
43
+ backendName: "webgl",
44
+ kernelFunc: h
45
+ };
46
+ u(v);
47
+ function b(t) {
48
+ const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = s.shape[2], i = p(s, n, !1, !0).mul(d(a)), c = e.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
49
+ return i.add(c);
50
+ }
51
+ const M = {
52
+ kernelName: "AttentionMask",
53
+ backendName: "cpu",
54
+ kernelFunc: b
55
+ };
56
+ u(M);
57
+ function w(t, s, n, e) {
58
+ return l().runKernel("AttentionMask", { q: t, k: s, mask: n }, { divisor: e });
59
+ }
60
+ const g = {
61
+ kernelName: "AttentionMask",
62
+ inputsToSave: ["q", "k"],
63
+ outputsToSave: [],
64
+ gradFunc: (t, s, n) => {
65
+ if (Array.isArray(t))
66
+ throw new Error("Expected dy to be a single Tensor");
67
+ const [e, a] = s, { divisor: o } = n;
68
+ return {
69
+ q: () => t.matMul(a).mul(o),
70
+ k: () => e.transpose([0, 1, 3, 2]).matMul(t).mul(o).transpose([0, 1, 3, 2]),
71
+ mask: () => t,
72
+ divisor: () => {
73
+ const r = e.matMul(a, !1, !0);
74
+ return t.mul(r).sum();
75
+ }
76
+ };
77
+ }
78
+ };
79
+ k(g);
80
+ export {
81
+ w as attentionMask
82
+ };
@@ -1,6 +1,6 @@
1
1
  import { engine as l } from "@tensorflow/tfjs";
2
- import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-D1SlunD-.js";
3
- import { r as p, s as f } from "../stack-DB2YLlAs.js";
2
+ import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-Dsg28SG6.js";
3
+ import { r as p, s as f } from "../stack-1o648CP_.js";
4
4
  /**
5
5
  * @license
6
6
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { r as o } from "../../index-D1SlunD-.js";
1
+ import { r as o } from "../../index-Dsg28SG6.js";
2
2
  function r(e) {
3
3
  const { logits: t, labels: n } = e.inputs;
4
4
  return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
@@ -1,7 +1,7 @@
1
1
  import { engine as $ } from "@tensorflow/tfjs";
2
- import { i as u, j as S, k as h, E as f, l as E, o as N, c as l, n as y, r as p, a as D, m as x } from "../index-D1SlunD-.js";
3
- import { c as m } from "../complex-D6Bq1XDf.js";
4
- import { r as v, s as T } from "../stack-DB2YLlAs.js";
2
+ import { k as u, l as S, n as p, E as f, p as E, o as N, c as l, q as y, r as h, a as D, m as x } from "../index-Dsg28SG6.js";
3
+ import { c as m } from "../complex-Cd8sqiBC.js";
4
+ import { r as v, s as T } from "../stack-1o648CP_.js";
5
5
  /**
6
6
  * @license
7
7
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -23,7 +23,7 @@ function i(e, t = "float32") {
23
23
  const a = i(e, "float32"), o = i(e, "float32");
24
24
  return m(a, o);
25
25
  }
26
- const r = S(h(e), t);
26
+ const r = S(p(e), t);
27
27
  return f.makeTensor(r, e, t);
28
28
  }
29
29
  /**
@@ -47,7 +47,7 @@ function d(e, t = "float32") {
47
47
  const a = d(e, "float32"), o = i(e, "float32");
48
48
  return m(a, o);
49
49
  }
50
- const r = E(h(e), t);
50
+ const r = E(p(e), t);
51
51
  return f.makeTensor(r, e, t);
52
52
  }
53
53
  function C(e, t, r) {
@@ -131,7 +131,7 @@ const K = {
131
131
  backendName: "webgl",
132
132
  kernelFunc: P
133
133
  };
134
- p(K);
134
+ h(K);
135
135
  function A(e) {
136
136
  const { logits: t, labels: r, dy: a } = e.inputs, o = r.shape[0], s = t.shape[1], n = v(0, o, 1, "int32"), c = T([n, r], 1), b = d([o]), g = I(c, b, [o, s]), k = D(t, g), w = a.reshape([o, 1]);
137
137
  return x(k, w);
@@ -141,10 +141,10 @@ const F = {
141
141
  backendName: "cpu",
142
142
  kernelFunc: A
143
143
  };
144
- p(F);
145
- function M(e, t, r) {
144
+ h(F);
145
+ function R(e, t, r) {
146
146
  return $().runKernel("EfficientScatterSub", { logits: e, labels: t, dy: r }, {});
147
147
  }
148
148
  export {
149
- M as scatterSub
149
+ R as scatterSub
150
150
  };
@@ -1,4 +1,4 @@
1
- import { E as e, R as c, o as f, d as u, f as a, P as i } from "./index-D1SlunD-.js";
1
+ import { E as e, R as c, o as f, g as u, h as a, P as i } from "./index-Dsg28SG6.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,7 +15,7 @@ import { E as e, R as c, o as f, d as u, f as a, P as i } from "./index-D1SlunD-
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function g(n, s, t = 1, r = "float32") {
18
+ function h(n, s, t = 1, r = "float32") {
19
19
  if (t === 0)
20
20
  throw new Error("Cannot have a step of zero");
21
21
  const o = { start: n, stop: s, step: t, dtype: r };
@@ -43,8 +43,8 @@ function k(n, s = 0) {
43
43
  const r = t, o = { axis: s };
44
44
  return e.runKernel(i, r, o);
45
45
  }
46
- const h = /* @__PURE__ */ f({ stack_: k });
46
+ const l = /* @__PURE__ */ f({ stack_: k });
47
47
  export {
48
- g as r,
49
- h as s
48
+ h as r,
49
+ l as s
50
50
  };
@@ -1,4 +1,4 @@
1
- import { o, c as a, E as u, g as p, h as i, S as x } from "./index-D1SlunD-.js";
1
+ import { o, c as a, E as u, i, j as p, S as x } from "./index-Dsg28SG6.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -17,7 +17,7 @@ import { o, c as a, E as u, g as p, h as i, S as x } from "./index-D1SlunD-.js";
17
17
  */
18
18
  function l(n, t) {
19
19
  const s = { x: a(n, "x", "reshape", "string_or_numeric") }, r = { shape: t };
20
- return u.runKernel(p, s, r);
20
+ return u.runKernel(i, s, r);
21
21
  }
22
22
  const h = /* @__PURE__ */ o({ reshape_: l });
23
23
  /**
@@ -38,7 +38,7 @@ const h = /* @__PURE__ */ o({ reshape_: l });
38
38
  */
39
39
  function m(n, t = null, e = !1) {
40
40
  let s = a(n, "x", "sum");
41
- s.dtype === "bool" && (s = i(s, "int32"));
41
+ s.dtype === "bool" && (s = p(s, "int32"));
42
42
  const r = { x: s }, c = { axis: t, keepDims: e };
43
43
  return u.runKernel(x, r, c);
44
44
  }
@@ -1,4 +1,4 @@
1
- import { A as r, m as c, s as h, a as g, e as o } from "../index-D1SlunD-.js";
1
+ import { A as r, m as c, s as h, a as g, e as o } from "../index-Dsg28SG6.js";
2
2
  class u extends r {
3
3
  constructor(t, e, s, a, i) {
4
4
  super(t, e, s, a), this.config = i, this.startLearningRate = t;
@@ -1,7 +1,7 @@
1
1
  import { gatherSub as w } from "../ops/gatherSub.js";
2
2
  import { scatterSub as K } from "../ops/scatterSub.js";
3
- import { o as l, c as d, E as f, M as _, p as z, L as I, q as N, a as E, t as M, u as T, e as m, v as g, w as $, z as S } from "../index-D1SlunD-.js";
4
- import { s as F, r as b } from "../sum-02UQ5Eaq.js";
3
+ import { o as l, c as d, E as f, M as _, t as z, L as I, u as N, a as E, v as M, w as T, e as m, x as g, y as $, z as S } from "../index-Dsg28SG6.js";
4
+ import { s as F, r as b } from "../sum-NWazHI7f.js";
5
5
  /**
6
6
  * @license
7
7
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -25,7 +25,7 @@ function P(n, s, t) {
25
25
  t.indexOf(o) === -1 ? e.push(n[r++]) : e.push(s[c++]);
26
26
  return e;
27
27
  }
28
- function q(n, s) {
28
+ function A(n, s) {
29
29
  const t = s.map((a) => 1);
30
30
  return P(n, t, s);
31
31
  }
@@ -45,11 +45,11 @@ function q(n, s) {
45
45
  * limitations under the License.
46
46
  * =============================================================================
47
47
  */
48
- function A(n, s = null, t = !1) {
48
+ function D(n, s = null, t = !1) {
49
49
  const e = { x: d(n, "x", "max") }, r = { reductionIndices: s, keepDims: t };
50
50
  return f.runKernel(_, e, r);
51
51
  }
52
- const L = /* @__PURE__ */ l({ max_: A });
52
+ const L = /* @__PURE__ */ l({ max_: D });
53
53
  /**
54
54
  * @license
55
55
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -66,11 +66,11 @@ const L = /* @__PURE__ */ l({ max_: A });
66
66
  * limitations under the License.
67
67
  * =============================================================================
68
68
  */
69
- function D(n) {
69
+ function O(n) {
70
70
  const t = { x: d(n, "x", "exp") };
71
71
  return f.runKernel(z, t);
72
72
  }
73
- const O = /* @__PURE__ */ l({ exp_: D });
73
+ const W = /* @__PURE__ */ l({ exp_: O });
74
74
  /**
75
75
  * @license
76
76
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -87,11 +87,11 @@ const O = /* @__PURE__ */ l({ exp_: D });
87
87
  * limitations under the License.
88
88
  * =============================================================================
89
89
  */
90
- function W(n) {
90
+ function j(n) {
91
91
  const t = { x: d(n, "x", "log", "float32") };
92
92
  return f.runKernel(I, t);
93
93
  }
94
- const j = /* @__PURE__ */ l({ log_: W });
94
+ const q = /* @__PURE__ */ l({ log_: j });
95
95
  /**
96
96
  * @license
97
97
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -114,9 +114,9 @@ function B(n, s = null, t = !1) {
114
114
  e,
115
115
  !0
116
116
  /* keepDims */
117
- ), c = E(a, r), o = O(c), p = F(o, e), u = j(p), i = M(b(r, u.shape), u);
117
+ ), c = E(a, r), o = W(c), p = F(o, e), u = q(p), i = M(b(r, u.shape), u);
118
118
  if (t) {
119
- const h = q(i.shape, e);
119
+ const h = A(i.shape, e);
120
120
  return b(i, h);
121
121
  }
122
122
  return i;
@@ -165,7 +165,7 @@ function ss() {
165
165
  (s, t, a) => {
166
166
  const e = s.shape[s.shape.length - 1], c = s.shape.slice(0, -1).reduce((h, x) => h * x, 1), o = s.reshape([c, e]), p = t.reshape([c]).cast("int32"), u = R(o, p);
167
167
  return a([o, p]), o.dispose(), p.dispose(), { value: u, gradFunc: (h, x) => $(() => {
168
- const k = x[0], y = x[1], C = Q(k), G = K(C, y, h), v = S(t);
168
+ const y = x[0], k = x[1], C = Q(y), G = K(C, k, h), v = S(t);
169
169
  return [G.reshape(s.shape), v];
170
170
  }) };
171
171
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.2.7",
3
+ "version": "0.2.9",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",