@genai-fi/nanogpt 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { o as h, c as i, E as o, y as V, D as X, I as Y, F as Z, N as ee, H as te, J as se, K as ne, O as re, Q as ue, g as L, x as ae, T as A, m as ie, U as oe, u as le, b as q, l as C, V as P, w as U, _ as H } from "../index-DQfEAU9u.js";
2
- import { s as ce, r as f } from "../sum-B-O33dgG.js";
3
- import { m } from "../mat_mul-CuHB58-H.js";
4
- import { c as pe } from "../complex-CeoYJn2o.js";
1
+ import { o as h, c as i, E as o, D as V, F as X, I as Y, H as Z, N as ee, J as te, K as se, O as ne, Q as re, T as ue, h as L, y as ae, U as A, m as ie, V as oe, v as le, d as q, n as C, W as P, x as U, _ as H } from "../index-Dsg28SG6.js";
2
+ import { s as ce, r as f } from "../sum-NWazHI7f.js";
3
+ import { m } from "../mat_mul-BAYDrXvE.js";
4
+ import { c as pe } from "../complex-Cd8sqiBC.js";
5
5
  /**
6
6
  * @license
7
7
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -169,7 +169,7 @@ function Me(t) {
169
169
  const s = { x: i(t, "x", "relu") };
170
170
  return o.runKernel(ne, s);
171
171
  }
172
- const we = /* @__PURE__ */ h({ relu_: Me });
172
+ const We = /* @__PURE__ */ h({ relu_: Me });
173
173
  /**
174
174
  * @license
175
175
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -186,11 +186,11 @@ const we = /* @__PURE__ */ h({ relu_: Me });
186
186
  * limitations under the License.
187
187
  * =============================================================================
188
188
  */
189
- function We(t) {
189
+ function we(t) {
190
190
  const s = { x: i(t, "x", "relu6") };
191
191
  return o.runKernel(re, s);
192
192
  }
193
- const ze = /* @__PURE__ */ h({ relu6_: We });
193
+ const ze = /* @__PURE__ */ h({ relu6_: we });
194
194
  /**
195
195
  * @license
196
196
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -273,7 +273,7 @@ function Te(t, e, s, n) {
273
273
  if (e === "linear")
274
274
  return t;
275
275
  if (e === "relu")
276
- return we(t);
276
+ return We(t);
277
277
  if (e === "elu")
278
278
  return me(t);
279
279
  if (e === "relu6")
@@ -310,14 +310,14 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
310
310
  }
311
311
  let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
312
312
  [u, a] = q(u, a);
313
- const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], w = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], W = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
313
+ const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
314
314
  L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
315
- const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([w, W]), F = s ? f(u, [B, D, w]) : f(u, [B, w, D]), R = n ? f(a, [N, W, b]) : f(a, [N, b, W]);
315
+ const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
316
316
  let S;
317
317
  r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
318
- let G;
319
- l != null && (G = i(l, "prelu weights", "fused matMul"));
320
- const I = (x, M) => {
318
+ let v;
319
+ l != null && (v = i(l, "prelu weights", "fused matMul"));
320
+ const G = (x, M) => {
321
321
  const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
322
322
  let K, _;
323
323
  if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
@@ -325,24 +325,24 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
325
325
  return [K, _, Q];
326
326
  } else
327
327
  return [K, _];
328
- }, v = {
328
+ }, I = {
329
329
  a: F,
330
330
  b: R,
331
331
  bias: S,
332
- preluActivationWeights: G
332
+ preluActivationWeights: v
333
333
  }, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
334
334
  return r == null ? U((M, g, $) => {
335
335
  const k = (
336
336
  // tslint:disable-next-line: no-unnecessary-type-assertion
337
- o.runKernel(H, v, j)
337
+ o.runKernel(H, I, j)
338
338
  );
339
- return $([M, g, k]), { value: f(k, O), gradFunc: I };
339
+ return $([M, g, k]), { value: f(k, O), gradFunc: G };
340
340
  })(F, R) : U((M, g, $, k) => {
341
341
  const z = (
342
342
  // tslint:disable-next-line: no-unnecessary-type-assertion
343
- o.runKernel(H, v, j)
343
+ o.runKernel(H, I, j)
344
344
  );
345
- return k([M, g, z, $]), { value: f(z, O), gradFunc: I };
345
+ return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
346
346
  })(F, R, S);
347
347
  }
348
348
  const J = /* @__PURE__ */ h({ fusedMatMul_: Ne });
@@ -369,7 +369,7 @@ class E extends Error {
369
369
  * https://opensource.org/licenses/MIT.
370
370
  * =============================================================================
371
371
  */
372
- function Ge(t, e, s, n) {
372
+ function ve(t, e, s, n) {
373
373
  if (t.rank < 2 || e.rank < 2)
374
374
  throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
375
375
  if (e.rank >= 3) {
@@ -425,7 +425,7 @@ class Pe {
425
425
  return this.tf.gather(this.tiedWeights, e, 0);
426
426
  }
427
427
  project(e) {
428
- return Ge(e, this.tiedWeights.transpose());
428
+ return ve(e, this.tiedWeights.transpose());
429
429
  }
430
430
  getWeights() {
431
431
  return [this.tiedWeights];
@@ -1,4 +1,4 @@
1
- import { o as c, c as s, b as m, E as M, B as p } from "./index-DQfEAU9u.js";
1
+ import { o as c, c as s, d as m, E as M, B as p } from "./index-Dsg28SG6.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as c, c as s, b as m, E as M, B as p } from "./index-DQfEAU9u.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function b(e, o, n = !1, l = !1) {
18
+ function f(e, o, n = !1, l = !1) {
19
19
  let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
20
20
  [a, t] = m(a, t);
21
21
  const r = { a, b: t }, u = { transposeA: n, transposeB: l };
22
22
  return M.runKernel(p, r, u);
23
23
  }
24
- const i = /* @__PURE__ */ c({ matMul_: b });
24
+ const i = /* @__PURE__ */ c({ matMul_: f });
25
25
  export {
26
26
  i as m
27
27
  };
@@ -1,14 +1,14 @@
1
- import { engine as d } from "@tensorflow/tfjs";
2
- import { r as k, s as u } from "../index-DQfEAU9u.js";
3
- import { m as l } from "../mat_mul-CuHB58-H.js";
4
- class p {
1
+ import { engine as l } from "@tensorflow/tfjs";
2
+ import { r as u, b as k, s as d } from "../index-Dsg28SG6.js";
3
+ import { m as p } from "../mat_mul-BAYDrXvE.js";
4
+ class f {
5
5
  variableNames = ["q", "k", "mask"];
6
6
  outputShape;
7
7
  userCode;
8
8
  // enableShapeUniforms = true;
9
9
  customUniforms = [{ name: "divisor", type: "float" }];
10
- constructor(t, e, n, a) {
11
- this.outputShape = [t, e, n, n], this.userCode = `
10
+ constructor(s, n, e, a) {
11
+ this.outputShape = [s, n, e, e], this.userCode = `
12
12
  void main() {
13
13
  ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
14
14
  int b = coords.x;
@@ -34,29 +34,49 @@ class p {
34
34
  `;
35
35
  }
36
36
  }
37
- function f(s) {
38
- const { q: t, k: e, mask: n } = s.inputs, { divisor: a } = s.attrs, o = s.backend, c = t.shape[0], i = t.shape[2], r = t.shape[1], m = new p(c, r, i, t.shape[3]);
39
- return o.runWebGLProgram(m, [t, e, n], "float32", [[a]]);
37
+ function h(t) {
38
+ const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = t.backend, r = s.shape[0], i = s.shape[2], c = s.shape[1], m = new f(r, c, i, s.shape[3]);
39
+ return o.runWebGLProgram(m, [s, n, e], "float32", [[a]]);
40
40
  }
41
- const h = {
41
+ const v = {
42
42
  kernelName: "AttentionMask",
43
43
  backendName: "webgl",
44
- kernelFunc: f
44
+ kernelFunc: h
45
45
  };
46
- k(h);
47
- function b(s) {
48
- const { q: t, k: e, mask: n } = s.inputs, { divisor: a } = s.attrs, o = t.shape[2], i = l(t, e, !1, !0).mul(u(a)), r = n.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
49
- return i.add(r);
46
+ u(v);
47
+ function b(t) {
48
+ const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = s.shape[2], i = p(s, n, !1, !0).mul(d(a)), c = e.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
49
+ return i.add(c);
50
50
  }
51
- const v = {
51
+ const M = {
52
52
  kernelName: "AttentionMask",
53
53
  backendName: "cpu",
54
54
  kernelFunc: b
55
55
  };
56
- k(v);
57
- function C(s, t, e, n) {
58
- return d().runKernel("AttentionMask", { q: s, k: t, mask: e }, { divisor: n });
56
+ u(M);
57
+ function w(t, s, n, e) {
58
+ return l().runKernel("AttentionMask", { q: t, k: s, mask: n }, { divisor: e });
59
59
  }
60
+ const g = {
61
+ kernelName: "AttentionMask",
62
+ inputsToSave: ["q", "k"],
63
+ outputsToSave: [],
64
+ gradFunc: (t, s, n) => {
65
+ if (Array.isArray(t))
66
+ throw new Error("Expected dy to be a single Tensor");
67
+ const [e, a] = s, { divisor: o } = n;
68
+ return {
69
+ q: () => t.matMul(a).mul(o),
70
+ k: () => e.transpose([0, 1, 3, 2]).matMul(t).mul(o).transpose([0, 1, 3, 2]),
71
+ mask: () => t,
72
+ divisor: () => {
73
+ const r = e.matMul(a, !1, !0);
74
+ return t.mul(r).sum();
75
+ }
76
+ };
77
+ }
78
+ };
79
+ k(g);
60
80
  export {
61
- C as attentionMask
81
+ w as attentionMask
62
82
  };
@@ -1,6 +1,6 @@
1
1
  import { engine as l } from "@tensorflow/tfjs";
2
- import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-DQfEAU9u.js";
3
- import { r as p, s as f } from "../stack-C9cTkqpq.js";
2
+ import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-Dsg28SG6.js";
3
+ import { r as p, s as f } from "../stack-1o648CP_.js";
4
4
  /**
5
5
  * @license
6
6
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { r as o } from "../../index-DQfEAU9u.js";
1
+ import { r as o } from "../../index-Dsg28SG6.js";
2
2
  function r(e) {
3
3
  const { logits: t, labels: n } = e.inputs;
4
4
  return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
@@ -1,7 +1,7 @@
1
1
  import { engine as $ } from "@tensorflow/tfjs";
2
- import { j as u, k as S, l as p, E as f, n as E, o as N, c as l, p as y, r as h, a as D, m as x } from "../index-DQfEAU9u.js";
3
- import { c as m } from "../complex-CeoYJn2o.js";
4
- import { r as v, s as T } from "../stack-C9cTkqpq.js";
2
+ import { k as u, l as S, n as p, E as f, p as E, o as N, c as l, q as y, r as h, a as D, m as x } from "../index-Dsg28SG6.js";
3
+ import { c as m } from "../complex-Cd8sqiBC.js";
4
+ import { r as v, s as T } from "../stack-1o648CP_.js";
5
5
  /**
6
6
  * @license
7
7
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -142,9 +142,9 @@ const F = {
142
142
  kernelFunc: A
143
143
  };
144
144
  h(F);
145
- function M(e, t, r) {
145
+ function R(e, t, r) {
146
146
  return $().runKernel("EfficientScatterSub", { logits: e, labels: t, dy: r }, {});
147
147
  }
148
148
  export {
149
- M as scatterSub
149
+ R as scatterSub
150
150
  };
@@ -1,4 +1,4 @@
1
- import { E as e, R as c, o as f, f as u, g as a, P as i } from "./index-DQfEAU9u.js";
1
+ import { E as e, R as c, o as f, g as u, h as a, P as i } from "./index-Dsg28SG6.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -15,7 +15,7 @@ import { E as e, R as c, o as f, f as u, g as a, P as i } from "./index-DQfEAU9u
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function l(n, s, t = 1, r = "float32") {
18
+ function h(n, s, t = 1, r = "float32") {
19
19
  if (t === 0)
20
20
  throw new Error("Cannot have a step of zero");
21
21
  const o = { start: n, stop: s, step: t, dtype: r };
@@ -43,8 +43,8 @@ function k(n, s = 0) {
43
43
  const r = t, o = { axis: s };
44
44
  return e.runKernel(i, r, o);
45
45
  }
46
- const h = /* @__PURE__ */ f({ stack_: k });
46
+ const l = /* @__PURE__ */ f({ stack_: k });
47
47
  export {
48
- l as r,
49
- h as s
48
+ h as r,
49
+ l as s
50
50
  };
@@ -1,4 +1,4 @@
1
- import { o, c as a, E as u, h as i, i as p, S as x } from "./index-DQfEAU9u.js";
1
+ import { o, c as a, E as u, i, j as p, S as x } from "./index-Dsg28SG6.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { A as r, m as c, s as h, a as g, e as o } from "../index-DQfEAU9u.js";
1
+ import { A as r, m as c, s as h, a as g, e as o } from "../index-Dsg28SG6.js";
2
2
  class u extends r {
3
3
  constructor(t, e, s, a, i) {
4
4
  super(t, e, s, a), this.config = i, this.startLearningRate = t;
@@ -1,7 +1,7 @@
1
1
  import { gatherSub as w } from "../ops/gatherSub.js";
2
2
  import { scatterSub as K } from "../ops/scatterSub.js";
3
- import { o as l, c as d, E as f, M as _, q as z, L as I, t as N, a as E, u as M, v as T, e as m, w as g, x as $, z as S } from "../index-DQfEAU9u.js";
4
- import { s as F, r as b } from "../sum-B-O33dgG.js";
3
+ import { o as l, c as d, E as f, M as _, t as z, L as I, u as N, a as E, v as M, w as T, e as m, x as g, y as $, z as S } from "../index-Dsg28SG6.js";
4
+ import { s as F, r as b } from "../sum-NWazHI7f.js";
5
5
  /**
6
6
  * @license
7
7
  * Copyright 2017 Google LLC. All Rights Reserved.
@@ -25,7 +25,7 @@ function P(n, s, t) {
25
25
  t.indexOf(o) === -1 ? e.push(n[r++]) : e.push(s[c++]);
26
26
  return e;
27
27
  }
28
- function q(n, s) {
28
+ function A(n, s) {
29
29
  const t = s.map((a) => 1);
30
30
  return P(n, t, s);
31
31
  }
@@ -45,11 +45,11 @@ function q(n, s) {
45
45
  * limitations under the License.
46
46
  * =============================================================================
47
47
  */
48
- function A(n, s = null, t = !1) {
48
+ function D(n, s = null, t = !1) {
49
49
  const e = { x: d(n, "x", "max") }, r = { reductionIndices: s, keepDims: t };
50
50
  return f.runKernel(_, e, r);
51
51
  }
52
- const L = /* @__PURE__ */ l({ max_: A });
52
+ const L = /* @__PURE__ */ l({ max_: D });
53
53
  /**
54
54
  * @license
55
55
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -66,11 +66,11 @@ const L = /* @__PURE__ */ l({ max_: A });
66
66
  * limitations under the License.
67
67
  * =============================================================================
68
68
  */
69
- function D(n) {
69
+ function O(n) {
70
70
  const t = { x: d(n, "x", "exp") };
71
71
  return f.runKernel(z, t);
72
72
  }
73
- const O = /* @__PURE__ */ l({ exp_: D });
73
+ const W = /* @__PURE__ */ l({ exp_: O });
74
74
  /**
75
75
  * @license
76
76
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -87,11 +87,11 @@ const O = /* @__PURE__ */ l({ exp_: D });
87
87
  * limitations under the License.
88
88
  * =============================================================================
89
89
  */
90
- function W(n) {
90
+ function j(n) {
91
91
  const t = { x: d(n, "x", "log", "float32") };
92
92
  return f.runKernel(I, t);
93
93
  }
94
- const j = /* @__PURE__ */ l({ log_: W });
94
+ const q = /* @__PURE__ */ l({ log_: j });
95
95
  /**
96
96
  * @license
97
97
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -114,9 +114,9 @@ function B(n, s = null, t = !1) {
114
114
  e,
115
115
  !0
116
116
  /* keepDims */
117
- ), c = E(a, r), o = O(c), p = F(o, e), u = j(p), i = M(b(r, u.shape), u);
117
+ ), c = E(a, r), o = W(c), p = F(o, e), u = q(p), i = M(b(r, u.shape), u);
118
118
  if (t) {
119
- const h = q(i.shape, e);
119
+ const h = A(i.shape, e);
120
120
  return b(i, h);
121
121
  }
122
122
  return i;
@@ -165,7 +165,7 @@ function ss() {
165
165
  (s, t, a) => {
166
166
  const e = s.shape[s.shape.length - 1], c = s.shape.slice(0, -1).reduce((h, x) => h * x, 1), o = s.reshape([c, e]), p = t.reshape([c]).cast("int32"), u = R(o, p);
167
167
  return a([o, p]), o.dispose(), p.dispose(), { value: u, gradFunc: (h, x) => $(() => {
168
- const k = x[0], y = x[1], C = Q(k), G = K(C, y, h), v = S(t);
168
+ const y = x[0], k = x[1], C = Q(y), G = K(C, k, h), v = S(t);
169
169
  return [G.reshape(s.shape), v];
170
170
  }) };
171
171
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.2.8",
3
+ "version": "0.2.9",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",