@genai-fi/nanogpt 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{complex-CeoYJn2o.js → complex-Cd8sqiBC.js} +1 -1
- package/dist/{index-DQfEAU9u.js → index-Dsg28SG6.js} +304 -299
- package/dist/layers/TiedEmbedding.js +21 -21
- package/dist/{mat_mul-CuHB58-H.js → mat_mul-BAYDrXvE.js} +3 -3
- package/dist/ops/attentionMask.js +40 -20
- package/dist/ops/gatherSub.js +2 -2
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/scatterSub.js +5 -5
- package/dist/{stack-C9cTkqpq.js → stack-1o648CP_.js} +5 -5
- package/dist/{sum-B-O33dgG.js → sum-NWazHI7f.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/sparseCrossEntropy.js +12 -12
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { o as h, c as i, E as o,
|
|
2
|
-
import { s as ce, r as f } from "../sum-
|
|
3
|
-
import { m } from "../mat_mul-
|
|
4
|
-
import { c as pe } from "../complex-
|
|
1
|
+
import { o as h, c as i, E as o, D as V, F as X, I as Y, H as Z, N as ee, J as te, K as se, O as ne, Q as re, T as ue, h as L, y as ae, U as A, m as ie, V as oe, v as le, d as q, n as C, W as P, x as U, _ as H } from "../index-Dsg28SG6.js";
|
|
2
|
+
import { s as ce, r as f } from "../sum-NWazHI7f.js";
|
|
3
|
+
import { m } from "../mat_mul-BAYDrXvE.js";
|
|
4
|
+
import { c as pe } from "../complex-Cd8sqiBC.js";
|
|
5
5
|
/**
|
|
6
6
|
* @license
|
|
7
7
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -169,7 +169,7 @@ function Me(t) {
|
|
|
169
169
|
const s = { x: i(t, "x", "relu") };
|
|
170
170
|
return o.runKernel(ne, s);
|
|
171
171
|
}
|
|
172
|
-
const
|
|
172
|
+
const We = /* @__PURE__ */ h({ relu_: Me });
|
|
173
173
|
/**
|
|
174
174
|
* @license
|
|
175
175
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -186,11 +186,11 @@ const we = /* @__PURE__ */ h({ relu_: Me });
|
|
|
186
186
|
* limitations under the License.
|
|
187
187
|
* =============================================================================
|
|
188
188
|
*/
|
|
189
|
-
function
|
|
189
|
+
function we(t) {
|
|
190
190
|
const s = { x: i(t, "x", "relu6") };
|
|
191
191
|
return o.runKernel(re, s);
|
|
192
192
|
}
|
|
193
|
-
const ze = /* @__PURE__ */ h({ relu6_:
|
|
193
|
+
const ze = /* @__PURE__ */ h({ relu6_: we });
|
|
194
194
|
/**
|
|
195
195
|
* @license
|
|
196
196
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -273,7 +273,7 @@ function Te(t, e, s, n) {
|
|
|
273
273
|
if (e === "linear")
|
|
274
274
|
return t;
|
|
275
275
|
if (e === "relu")
|
|
276
|
-
return
|
|
276
|
+
return We(t);
|
|
277
277
|
if (e === "elu")
|
|
278
278
|
return me(t);
|
|
279
279
|
if (e === "relu6")
|
|
@@ -310,14 +310,14 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
|
|
|
310
310
|
}
|
|
311
311
|
let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
|
|
312
312
|
[u, a] = q(u, a);
|
|
313
|
-
const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2],
|
|
313
|
+
const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
|
|
314
314
|
L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
|
|
315
|
-
const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([
|
|
315
|
+
const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
|
|
316
316
|
let S;
|
|
317
317
|
r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
|
|
318
|
-
let
|
|
319
|
-
l != null && (
|
|
320
|
-
const
|
|
318
|
+
let v;
|
|
319
|
+
l != null && (v = i(l, "prelu weights", "fused matMul"));
|
|
320
|
+
const G = (x, M) => {
|
|
321
321
|
const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
|
|
322
322
|
let K, _;
|
|
323
323
|
if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
|
|
@@ -325,24 +325,24 @@ function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
|
|
|
325
325
|
return [K, _, Q];
|
|
326
326
|
} else
|
|
327
327
|
return [K, _];
|
|
328
|
-
},
|
|
328
|
+
}, I = {
|
|
329
329
|
a: F,
|
|
330
330
|
b: R,
|
|
331
331
|
bias: S,
|
|
332
|
-
preluActivationWeights:
|
|
332
|
+
preluActivationWeights: v
|
|
333
333
|
}, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
|
|
334
334
|
return r == null ? U((M, g, $) => {
|
|
335
335
|
const k = (
|
|
336
336
|
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
337
|
-
o.runKernel(H,
|
|
337
|
+
o.runKernel(H, I, j)
|
|
338
338
|
);
|
|
339
|
-
return $([M, g, k]), { value: f(k, O), gradFunc:
|
|
339
|
+
return $([M, g, k]), { value: f(k, O), gradFunc: G };
|
|
340
340
|
})(F, R) : U((M, g, $, k) => {
|
|
341
341
|
const z = (
|
|
342
342
|
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
343
|
-
o.runKernel(H,
|
|
343
|
+
o.runKernel(H, I, j)
|
|
344
344
|
);
|
|
345
|
-
return k([M, g, z, $]), { value: f(z, O), gradFunc:
|
|
345
|
+
return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
|
|
346
346
|
})(F, R, S);
|
|
347
347
|
}
|
|
348
348
|
const J = /* @__PURE__ */ h({ fusedMatMul_: Ne });
|
|
@@ -369,7 +369,7 @@ class E extends Error {
|
|
|
369
369
|
* https://opensource.org/licenses/MIT.
|
|
370
370
|
* =============================================================================
|
|
371
371
|
*/
|
|
372
|
-
function
|
|
372
|
+
function ve(t, e, s, n) {
|
|
373
373
|
if (t.rank < 2 || e.rank < 2)
|
|
374
374
|
throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
|
|
375
375
|
if (e.rank >= 3) {
|
|
@@ -425,7 +425,7 @@ class Pe {
|
|
|
425
425
|
return this.tf.gather(this.tiedWeights, e, 0);
|
|
426
426
|
}
|
|
427
427
|
project(e) {
|
|
428
|
-
return
|
|
428
|
+
return ve(e, this.tiedWeights.transpose());
|
|
429
429
|
}
|
|
430
430
|
getWeights() {
|
|
431
431
|
return [this.tiedWeights];
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as c, c as s,
|
|
1
|
+
import { o as c, c as s, d as m, E as M, B as p } from "./index-Dsg28SG6.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,13 +15,13 @@ import { o as c, c as s, b as m, E as M, B as p } from "./index-DQfEAU9u.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function
|
|
18
|
+
function f(e, o, n = !1, l = !1) {
|
|
19
19
|
let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
|
|
20
20
|
[a, t] = m(a, t);
|
|
21
21
|
const r = { a, b: t }, u = { transposeA: n, transposeB: l };
|
|
22
22
|
return M.runKernel(p, r, u);
|
|
23
23
|
}
|
|
24
|
-
const i = /* @__PURE__ */ c({ matMul_:
|
|
24
|
+
const i = /* @__PURE__ */ c({ matMul_: f });
|
|
25
25
|
export {
|
|
26
26
|
i as m
|
|
27
27
|
};
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import { engine as
|
|
2
|
-
import { r as k, s as
|
|
3
|
-
import { m as
|
|
4
|
-
class
|
|
1
|
+
import { engine as l } from "@tensorflow/tfjs";
|
|
2
|
+
import { r as u, b as k, s as d } from "../index-Dsg28SG6.js";
|
|
3
|
+
import { m as p } from "../mat_mul-BAYDrXvE.js";
|
|
4
|
+
class f {
|
|
5
5
|
variableNames = ["q", "k", "mask"];
|
|
6
6
|
outputShape;
|
|
7
7
|
userCode;
|
|
8
8
|
// enableShapeUniforms = true;
|
|
9
9
|
customUniforms = [{ name: "divisor", type: "float" }];
|
|
10
|
-
constructor(
|
|
11
|
-
this.outputShape = [
|
|
10
|
+
constructor(s, n, e, a) {
|
|
11
|
+
this.outputShape = [s, n, e, e], this.userCode = `
|
|
12
12
|
void main() {
|
|
13
13
|
ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
|
|
14
14
|
int b = coords.x;
|
|
@@ -34,29 +34,49 @@ class p {
|
|
|
34
34
|
`;
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
-
function
|
|
38
|
-
const { q:
|
|
39
|
-
return o.runWebGLProgram(m, [
|
|
37
|
+
function h(t) {
|
|
38
|
+
const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = t.backend, r = s.shape[0], i = s.shape[2], c = s.shape[1], m = new f(r, c, i, s.shape[3]);
|
|
39
|
+
return o.runWebGLProgram(m, [s, n, e], "float32", [[a]]);
|
|
40
40
|
}
|
|
41
|
-
const
|
|
41
|
+
const v = {
|
|
42
42
|
kernelName: "AttentionMask",
|
|
43
43
|
backendName: "webgl",
|
|
44
|
-
kernelFunc:
|
|
44
|
+
kernelFunc: h
|
|
45
45
|
};
|
|
46
|
-
|
|
47
|
-
function b(
|
|
48
|
-
const { q:
|
|
49
|
-
return i.add(
|
|
46
|
+
u(v);
|
|
47
|
+
function b(t) {
|
|
48
|
+
const { q: s, k: n, mask: e } = t.inputs, { divisor: a } = t.attrs, o = s.shape[2], i = p(s, n, !1, !0).mul(d(a)), c = e.slice([0, 0], [o, o]).expandDims(0).expandDims(0);
|
|
49
|
+
return i.add(c);
|
|
50
50
|
}
|
|
51
|
-
const
|
|
51
|
+
const M = {
|
|
52
52
|
kernelName: "AttentionMask",
|
|
53
53
|
backendName: "cpu",
|
|
54
54
|
kernelFunc: b
|
|
55
55
|
};
|
|
56
|
-
|
|
57
|
-
function
|
|
58
|
-
return
|
|
56
|
+
u(M);
|
|
57
|
+
function w(t, s, n, e) {
|
|
58
|
+
return l().runKernel("AttentionMask", { q: t, k: s, mask: n }, { divisor: e });
|
|
59
59
|
}
|
|
60
|
+
const g = {
|
|
61
|
+
kernelName: "AttentionMask",
|
|
62
|
+
inputsToSave: ["q", "k"],
|
|
63
|
+
outputsToSave: [],
|
|
64
|
+
gradFunc: (t, s, n) => {
|
|
65
|
+
if (Array.isArray(t))
|
|
66
|
+
throw new Error("Expected dy to be a single Tensor");
|
|
67
|
+
const [e, a] = s, { divisor: o } = n;
|
|
68
|
+
return {
|
|
69
|
+
q: () => t.matMul(a).mul(o),
|
|
70
|
+
k: () => e.transpose([0, 1, 3, 2]).matMul(t).mul(o).transpose([0, 1, 3, 2]),
|
|
71
|
+
mask: () => t,
|
|
72
|
+
divisor: () => {
|
|
73
|
+
const r = e.matMul(a, !1, !0);
|
|
74
|
+
return t.mul(r).sum();
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
k(g);
|
|
60
80
|
export {
|
|
61
|
-
|
|
81
|
+
w as attentionMask
|
|
62
82
|
};
|
package/dist/ops/gatherSub.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { engine as l } from "@tensorflow/tfjs";
|
|
2
|
-
import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-
|
|
3
|
-
import { r as p, s as f } from "../stack-
|
|
2
|
+
import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-Dsg28SG6.js";
|
|
3
|
+
import { r as p, s as f } from "../stack-1o648CP_.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/ops/scatterSub.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { engine as $ } from "@tensorflow/tfjs";
|
|
2
|
-
import {
|
|
3
|
-
import { c as m } from "../complex-
|
|
4
|
-
import { r as v, s as T } from "../stack-
|
|
2
|
+
import { k as u, l as S, n as p, E as f, p as E, o as N, c as l, q as y, r as h, a as D, m as x } from "../index-Dsg28SG6.js";
|
|
3
|
+
import { c as m } from "../complex-Cd8sqiBC.js";
|
|
4
|
+
import { r as v, s as T } from "../stack-1o648CP_.js";
|
|
5
5
|
/**
|
|
6
6
|
* @license
|
|
7
7
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -142,9 +142,9 @@ const F = {
|
|
|
142
142
|
kernelFunc: A
|
|
143
143
|
};
|
|
144
144
|
h(F);
|
|
145
|
-
function
|
|
145
|
+
function R(e, t, r) {
|
|
146
146
|
return $().runKernel("EfficientScatterSub", { logits: e, labels: t, dy: r }, {});
|
|
147
147
|
}
|
|
148
148
|
export {
|
|
149
|
-
|
|
149
|
+
R as scatterSub
|
|
150
150
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { E as e, R as c, o as f,
|
|
1
|
+
import { E as e, R as c, o as f, g as u, h as a, P as i } from "./index-Dsg28SG6.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -15,7 +15,7 @@ import { E as e, R as c, o as f, f as u, g as a, P as i } from "./index-DQfEAU9u
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function
|
|
18
|
+
function h(n, s, t = 1, r = "float32") {
|
|
19
19
|
if (t === 0)
|
|
20
20
|
throw new Error("Cannot have a step of zero");
|
|
21
21
|
const o = { start: n, stop: s, step: t, dtype: r };
|
|
@@ -43,8 +43,8 @@ function k(n, s = 0) {
|
|
|
43
43
|
const r = t, o = { axis: s };
|
|
44
44
|
return e.runKernel(i, r, o);
|
|
45
45
|
}
|
|
46
|
-
const
|
|
46
|
+
const l = /* @__PURE__ */ f({ stack_: k });
|
|
47
47
|
export {
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
h as r,
|
|
49
|
+
l as s
|
|
50
50
|
};
|
package/dist/training/AdamExt.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as r, m as c, s as h, a as g, e as o } from "../index-
|
|
1
|
+
import { A as r, m as c, s as h, a as g, e as o } from "../index-Dsg28SG6.js";
|
|
2
2
|
class u extends r {
|
|
3
3
|
constructor(t, e, s, a, i) {
|
|
4
4
|
super(t, e, s, a), this.config = i, this.startLearningRate = t;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { gatherSub as w } from "../ops/gatherSub.js";
|
|
2
2
|
import { scatterSub as K } from "../ops/scatterSub.js";
|
|
3
|
-
import { o as l, c as d, E as f, M as _,
|
|
4
|
-
import { s as F, r as b } from "../sum-
|
|
3
|
+
import { o as l, c as d, E as f, M as _, t as z, L as I, u as N, a as E, v as M, w as T, e as m, x as g, y as $, z as S } from "../index-Dsg28SG6.js";
|
|
4
|
+
import { s as F, r as b } from "../sum-NWazHI7f.js";
|
|
5
5
|
/**
|
|
6
6
|
* @license
|
|
7
7
|
* Copyright 2017 Google LLC. All Rights Reserved.
|
|
@@ -25,7 +25,7 @@ function P(n, s, t) {
|
|
|
25
25
|
t.indexOf(o) === -1 ? e.push(n[r++]) : e.push(s[c++]);
|
|
26
26
|
return e;
|
|
27
27
|
}
|
|
28
|
-
function
|
|
28
|
+
function A(n, s) {
|
|
29
29
|
const t = s.map((a) => 1);
|
|
30
30
|
return P(n, t, s);
|
|
31
31
|
}
|
|
@@ -45,11 +45,11 @@ function q(n, s) {
|
|
|
45
45
|
* limitations under the License.
|
|
46
46
|
* =============================================================================
|
|
47
47
|
*/
|
|
48
|
-
function
|
|
48
|
+
function D(n, s = null, t = !1) {
|
|
49
49
|
const e = { x: d(n, "x", "max") }, r = { reductionIndices: s, keepDims: t };
|
|
50
50
|
return f.runKernel(_, e, r);
|
|
51
51
|
}
|
|
52
|
-
const L = /* @__PURE__ */ l({ max_:
|
|
52
|
+
const L = /* @__PURE__ */ l({ max_: D });
|
|
53
53
|
/**
|
|
54
54
|
* @license
|
|
55
55
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -66,11 +66,11 @@ const L = /* @__PURE__ */ l({ max_: A });
|
|
|
66
66
|
* limitations under the License.
|
|
67
67
|
* =============================================================================
|
|
68
68
|
*/
|
|
69
|
-
function
|
|
69
|
+
function O(n) {
|
|
70
70
|
const t = { x: d(n, "x", "exp") };
|
|
71
71
|
return f.runKernel(z, t);
|
|
72
72
|
}
|
|
73
|
-
const
|
|
73
|
+
const W = /* @__PURE__ */ l({ exp_: O });
|
|
74
74
|
/**
|
|
75
75
|
* @license
|
|
76
76
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -87,11 +87,11 @@ const O = /* @__PURE__ */ l({ exp_: D });
|
|
|
87
87
|
* limitations under the License.
|
|
88
88
|
* =============================================================================
|
|
89
89
|
*/
|
|
90
|
-
function
|
|
90
|
+
function j(n) {
|
|
91
91
|
const t = { x: d(n, "x", "log", "float32") };
|
|
92
92
|
return f.runKernel(I, t);
|
|
93
93
|
}
|
|
94
|
-
const
|
|
94
|
+
const q = /* @__PURE__ */ l({ log_: j });
|
|
95
95
|
/**
|
|
96
96
|
* @license
|
|
97
97
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -114,9 +114,9 @@ function B(n, s = null, t = !1) {
|
|
|
114
114
|
e,
|
|
115
115
|
!0
|
|
116
116
|
/* keepDims */
|
|
117
|
-
), c = E(a, r), o =
|
|
117
|
+
), c = E(a, r), o = W(c), p = F(o, e), u = q(p), i = M(b(r, u.shape), u);
|
|
118
118
|
if (t) {
|
|
119
|
-
const h =
|
|
119
|
+
const h = A(i.shape, e);
|
|
120
120
|
return b(i, h);
|
|
121
121
|
}
|
|
122
122
|
return i;
|
|
@@ -165,7 +165,7 @@ function ss() {
|
|
|
165
165
|
(s, t, a) => {
|
|
166
166
|
const e = s.shape[s.shape.length - 1], c = s.shape.slice(0, -1).reduce((h, x) => h * x, 1), o = s.reshape([c, e]), p = t.reshape([c]).cast("int32"), u = R(o, p);
|
|
167
167
|
return a([o, p]), o.dispose(), p.dispose(), { value: u, gradFunc: (h, x) => $(() => {
|
|
168
|
-
const
|
|
168
|
+
const y = x[0], k = x[1], C = Q(y), G = K(C, k, h), v = S(t);
|
|
169
169
|
return [G.reshape(s.shape), v];
|
|
170
170
|
}) };
|
|
171
171
|
}
|