@genai-fi/nanogpt 0.5.6 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +8 -7
- package/dist/NanoGPTModel.js +8 -8
- package/dist/{Reshape-Biok_3X1.js → Reshape-CLOrdpve.js} +2 -2
- package/dist/TeachableLLM.js +16 -15
- package/dist/{TiedEmbedding-8S8xn8e6.js → TiedEmbedding-BhxWO8QR.js} +5 -5
- package/dist/{axis_util-BczFISHz.js → axis_util-D17qZRQm.js} +1 -1
- package/dist/{broadcast_to-B7NGsBSh.js → broadcast_to-BMQLjvt_.js} +2 -2
- package/dist/{concat-DdKPyAtw.js → concat-DhZfF1GY.js} +1 -1
- package/dist/{dataset-iqT4Otvb.js → dataset-oilnemHf.js} +3 -3
- package/dist/{dropout-B09InSJS.js → dropout-CrMQPCeG.js} +1 -1
- package/dist/{gather-D6MsdXqc.js → gather-DZCMHZuN.js} +1 -1
- package/dist/{gpgpu_math-BFbOyvk4.js → gpgpu_math-Ctc31slO.js} +1 -1
- package/dist/{index-Du-bmOP8.js → index-bMBtI-WR.js} +50 -50
- package/dist/{kernel_funcs_utils-DShm7-0k.js → kernel_funcs_utils-CNmjLWnB.js} +26 -24
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +3 -3
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/{log_sum_exp-CxfBtUaG.js → log_sum_exp-BHdkCb4s.js} +5 -5
- package/dist/main.js +20 -19
- package/dist/{mat_mul-CbiqIe2d.js → mat_mul-BsrLfy81.js} +1 -1
- package/dist/{max-0Xnlpv8k.js → max-DechV4Bc.js} +1 -1
- package/dist/{norm-01kY9I2B.js → norm-B9hWHZH1.js} +5 -5
- package/dist/{ones-CrutWGas.js → ones-g0K8jVwm.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +3 -3
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +1 -1
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +4 -4
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +1 -1
- package/dist/ops/grads/attentionMask.js +15 -11
- package/dist/ops/grads/fusedSoftmax.js +12 -10
- package/dist/ops/grads/gelu.js +1 -1
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/log.d.ts +0 -0
- package/dist/ops/log.js +1 -0
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +205 -3022
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.d.ts +17 -0
- package/dist/ops/webgl/log.js +39 -0
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{ops-CJNniCAV.js → ops-Mv7Ta72x.js} +13 -13
- package/dist/{random_width-C-v-35bY.js → random_width-BBAWzDym.js} +23 -23
- package/dist/{range-Bvs1hidm.js → range-DMaG9A3G.js} +1 -1
- package/dist/{reshape-BH7eBpwq.js → reshape-T4yDEqoF.js} +1 -1
- package/dist/shared-XNAoXhOa.js +2826 -0
- package/dist/{sin-CPAZXNjH.js → sin-EEhbrRO_.js} +1 -1
- package/dist/{slice_util-DskXqRZa.js → slice_util-Ddk0uxGJ.js} +1 -1
- package/dist/{softmax-DhWoBa7r.js → softmax-B2_IKPDR.js} +1 -1
- package/dist/{split-BCUhuU7B.js → split-dcks18H1.js} +1 -1
- package/dist/{stack-BV1v7l3S.js → stack-lpJ5kYvE.js} +1 -1
- package/dist/{sum-Cvq06317.js → sum-CutF5lj2.js} +1 -1
- package/dist/{tensor-DgTOPY6h.js → tensor-C15NA2LA.js} +1 -1
- package/dist/{tensor2d-CRWjDyUe.js → tensor2d-DZ_e5eKM.js} +1 -1
- package/dist/{tfjs_backend-D9Ytje0G.js → tfjs_backend-BDb8r9qx.js} +28 -28
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +3 -3
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/load.d.ts +25 -0
- package/dist/utilities/load.js +89 -37
- package/dist/utilities/profile.js +4 -4
- package/dist/utilities/safetensors.d.ts +3 -0
- package/dist/utilities/safetensors.js +83 -0
- package/dist/utilities/save.js +47 -29
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-DZ3fF0R2.js → variable-CdRKKp8x.js} +1 -1
- package/dist/{zeros-BaHhQTWf.js → zeros-CAbHfODe.js} +1 -1
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { default as
|
|
2
|
-
import { default as
|
|
3
|
-
import { default as
|
|
4
|
-
import { default as
|
|
5
|
-
import { default as
|
|
6
|
-
import { default as
|
|
7
|
-
import { estimateMemoryUsage as
|
|
8
|
-
import "./index-
|
|
1
|
+
import { default as F } from "./NanoGPTModel.js";
|
|
2
|
+
import { default as N } from "./TeachableLLM.js";
|
|
3
|
+
import { default as j } from "./tokeniser/CharTokeniser.js";
|
|
4
|
+
import { default as z } from "./tokeniser/bpe.js";
|
|
5
|
+
import { default as H } from "./utilities/waitForModel.js";
|
|
6
|
+
import { default as J } from "./data/textLoader.js";
|
|
7
|
+
import { estimateMemoryUsage as O, estimateParameterCount as Q, estimateResources as S, estimateTrainingMemoryUsage as V, validateConfig as W } from "./utilities/parameters.js";
|
|
8
|
+
import "./index-bMBtI-WR.js";
|
|
9
9
|
import "./ops/cpu/scatterSub.js";
|
|
10
10
|
import "./ops/webgl/scatterSub.js";
|
|
11
11
|
import "./ops/cpu/gatherSub.js";
|
|
@@ -34,16 +34,17 @@ import "./ops/grads/gelu.js";
|
|
|
34
34
|
import "./ops/cpu/normRMS.js";
|
|
35
35
|
import "./ops/webgl/normRMS.js";
|
|
36
36
|
import "./ops/grads/normRMS.js";
|
|
37
|
+
import "./ops/webgl/log.js";
|
|
37
38
|
export {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
39
|
+
z as BPETokeniser,
|
|
40
|
+
j as CharTokeniser,
|
|
41
|
+
F as NanoGPT,
|
|
42
|
+
N as TeachableLLM,
|
|
43
|
+
O as estimateMemoryUsage,
|
|
44
|
+
Q as estimateParameterCount,
|
|
45
|
+
S as estimateResources,
|
|
46
|
+
V as estimateTrainingMemoryUsage,
|
|
47
|
+
J as loadTextData,
|
|
48
|
+
W as validateConfig,
|
|
49
|
+
H as waitForModel
|
|
49
50
|
};
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { o as l, j as c, E as y, a0 as E, a1 as w, a2 as o, a3 as u, W as v, f as I, a4 as A } from "./index-
|
|
2
|
-
import { e as $ } from "./axis_util-
|
|
3
|
-
import { m as f } from "./max-
|
|
4
|
-
import { r as h } from "./reshape-
|
|
5
|
-
import { s as t } from "./sum-
|
|
1
|
+
import { o as l, j as c, E as y, a0 as E, a1 as w, a2 as o, a3 as u, W as v, f as I, a4 as A } from "./index-bMBtI-WR.js";
|
|
2
|
+
import { e as $ } from "./axis_util-D17qZRQm.js";
|
|
3
|
+
import { m as f } from "./max-DechV4Bc.js";
|
|
4
|
+
import { r as h } from "./reshape-T4yDEqoF.js";
|
|
5
|
+
import { s as t } from "./sum-CutF5lj2.js";
|
|
6
6
|
/**
|
|
7
7
|
* @license
|
|
8
8
|
* Copyright 2020 Google Inc. All Rights Reserved.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n, p as t, q as m, E as i } from "./index-
|
|
2
|
-
import { z as c, c as f } from "./zeros-
|
|
1
|
+
import { n, p as t, q as m, E as i } from "./index-bMBtI-WR.js";
|
|
2
|
+
import { z as c, c as f } from "./zeros-CAbHfODe.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/ops/appendCache.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { e as a } from "../index-
|
|
1
|
+
import { e as a } from "../index-bMBtI-WR.js";
|
|
2
2
|
import "./cpu/appendCache.js";
|
|
3
3
|
import "./webgl/appendCache.js";
|
|
4
|
-
import { c as s } from "../concat-
|
|
5
|
-
import { z as c } from "../zeros-
|
|
4
|
+
import { c as s } from "../concat-DhZfF1GY.js";
|
|
5
|
+
import { z as c } from "../zeros-CAbHfODe.js";
|
|
6
6
|
function i(r, p, n, o) {
|
|
7
7
|
if (!o) {
|
|
8
8
|
const e = r.shape[2];
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { r as d } from "../../index-
|
|
2
|
-
import { c as h } from "../../concat-
|
|
1
|
+
import { r as d } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { c as h } from "../../concat-DhZfF1GY.js";
|
|
3
3
|
function u(p) {
|
|
4
4
|
const { cache: n, item: s } = p.inputs, { maxSize: r, pastLen: c } = p.attrs, t = n.shape[0], o = n.shape[1], a = n.shape[3], e = s.shape[2];
|
|
5
5
|
if (c + e <= r) {
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { r as a, g as p, f as u } from "../../index-
|
|
2
|
-
import { l as N, w as b } from "../../ops-
|
|
3
|
-
import { o as g } from "../../ones-
|
|
4
|
-
import { z as A } from "../../zeros-
|
|
5
|
-
import { m as I } from "../../mat_mul-
|
|
1
|
+
import { r as a, g as p, f as u } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { l as N, w as b } from "../../ops-Mv7Ta72x.js";
|
|
3
|
+
import { o as g } from "../../ones-g0K8jVwm.js";
|
|
4
|
+
import { z as A } from "../../zeros-CAbHfODe.js";
|
|
5
|
+
import { m as I } from "../../mat_mul-BsrLfy81.js";
|
|
6
6
|
function o(n) {
|
|
7
7
|
const { q: s, k: e } = n.inputs, { divisor: r } = n.attrs, c = s.shape[2], t = e.shape[2], m = N.bandPart(g([t, t]), -1, 0).cast("bool"), l = A([t, t]), i = p([t, t], Number.NEGATIVE_INFINITY), f = b(m, l, i), k = I(s, e, !1, !0).mul(u(r)), d = f.slice([0, 0], [c, t]).expandDims(0).expandDims(0);
|
|
8
8
|
return k.add(d);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { r as n } from "../../index-
|
|
2
|
-
import { s as f } from "../../softmax-
|
|
1
|
+
import { r as n } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { s as f } from "../../softmax-B2_IKPDR.js";
|
|
3
3
|
function r(t) {
|
|
4
4
|
const { inputs: s, attrs: i } = t, { logits: o } = s, { dim: a, dropoutRate: e } = i;
|
|
5
5
|
if (!o)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { o as u, j as c, E as g, O as h, r as m, s as p } from "../../index-
|
|
2
|
-
import { r as l } from "../../range-
|
|
3
|
-
import { s as N } from "../../stack-
|
|
1
|
+
import { o as u, j as c, E as g, O as h, r as m, s as p } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { r as l } from "../../range-DMaG9A3G.js";
|
|
3
|
+
import { s as N } from "../../stack-lpJ5kYvE.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/ops/cpu/gelu.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { r as n, t as M } from "../../index-
|
|
1
|
+
import { r as n, t as M } from "../../index-bMBtI-WR.js";
|
|
2
2
|
function e(t) {
|
|
3
3
|
const { inputs: r, attrs: o } = t, { transposeA: s, transposeB: l } = o, { x: c, kernel: u, y: a } = r, m = c, i = u, k = a;
|
|
4
4
|
return M(() => m.matMul(i, s, l).mul(k));
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { r as e, b as u } from "../../index-
|
|
1
|
+
import { r as e, b as u } from "../../index-bMBtI-WR.js";
|
|
2
2
|
function n(o) {
|
|
3
3
|
const { inputs: r } = o, { a: l, b: t } = r;
|
|
4
4
|
return console.warn("Using fallback mulDrop implementation without dropout."), u(l, t);
|
package/dist/ops/cpu/normRMS.js
CHANGED
package/dist/ops/cpu/qkv.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { r as q } from "../../index-
|
|
2
|
-
import { r as o } from "../../reshape-
|
|
3
|
-
import { s as x } from "../../split-
|
|
1
|
+
import { r as q } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { r as o } from "../../reshape-T4yDEqoF.js";
|
|
3
|
+
import { s as x } from "../../split-dcks18H1.js";
|
|
4
4
|
function v(p) {
|
|
5
5
|
const { x: c, kernel: K } = p.inputs, { heads: n } = p.attrs, [s, e, t] = c.shape, a = o(c, [s * e, t]), i = a.dot(K);
|
|
6
6
|
a.dispose();
|
package/dist/ops/cpu/rope.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { r as S } from "../../index-
|
|
2
|
-
import { r as F } from "../../range-
|
|
3
|
-
import { g as I } from "../../gather-
|
|
4
|
-
import { s as E } from "../../stack-
|
|
5
|
-
import { c as T } from "../../concat-
|
|
1
|
+
import { r as S } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { r as F } from "../../range-DMaG9A3G.js";
|
|
3
|
+
import { g as I } from "../../gather-DZCMHZuN.js";
|
|
4
|
+
import { s as E } from "../../stack-lpJ5kYvE.js";
|
|
5
|
+
import { c as T } from "../../concat-DhZfF1GY.js";
|
|
6
6
|
function U(t, c, p, o, r) {
|
|
7
7
|
const n = o.shape[3], s = p;
|
|
8
8
|
if (s > n) return o;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { o as l, n as g, j as h, E as k, a5 as w, r as $, s as d, b as m } from "../../index-
|
|
2
|
-
import { r as b } from "../../range-
|
|
3
|
-
import { s as E } from "../../stack-
|
|
4
|
-
import { o as D } from "../../ones-
|
|
1
|
+
import { o as l, n as g, j as h, E as k, a5 as w, r as $, s as d, b as m } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { r as b } from "../../range-DMaG9A3G.js";
|
|
3
|
+
import { s as E } from "../../stack-lpJ5kYvE.js";
|
|
4
|
+
import { o as D } from "../../ones-g0K8jVwm.js";
|
|
5
5
|
function N(a, r, t) {
|
|
6
6
|
const s = r.rank > 1 ? r.shape[r.rank - 1] : 1, e = r.rank > 1 ? r.rank - 1 : 1, o = `Must have updates.shape = indices.shape[:batchDim] + shape[sliceDim:], got updates.shape: ${t.shape}, indices.shape: ${r.shape}, shape: ${a}, sliceDim: ${s}, and batchDim: ${e}.`;
|
|
7
7
|
if (t.rank < e)
|
package/dist/ops/fusedSoftmax.js
CHANGED
package/dist/ops/gatherSub.js
CHANGED
package/dist/ops/gelu.js
CHANGED
|
@@ -1,25 +1,29 @@
|
|
|
1
|
-
import { h as
|
|
2
|
-
import { matMulMul as
|
|
3
|
-
const
|
|
1
|
+
import { h as m, f as i } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { matMulMul as u } from "../matMulMul.js";
|
|
3
|
+
const p = {
|
|
4
4
|
kernelName: "AttentionMask",
|
|
5
5
|
inputsToSave: ["q", "k"],
|
|
6
6
|
outputsToSave: [],
|
|
7
|
-
gradFunc: (t,
|
|
7
|
+
gradFunc: (t, c, l) => {
|
|
8
8
|
if (Array.isArray(t))
|
|
9
9
|
throw new Error("Expected dy to be a single Tensor");
|
|
10
|
-
const [e,
|
|
10
|
+
const [e, n] = c, { divisor: a } = l;
|
|
11
11
|
return {
|
|
12
|
-
q: () =>
|
|
12
|
+
q: () => u(t, n, i(a)),
|
|
13
13
|
k: () => {
|
|
14
|
-
const s = e.transpose([0, 1, 3, 2]), r =
|
|
15
|
-
|
|
14
|
+
const s = e.transpose([0, 1, 3, 2]), r = u(s, t, i(a));
|
|
15
|
+
s.dispose();
|
|
16
|
+
const o = r.transpose([0, 1, 3, 2]);
|
|
17
|
+
return r.dispose(), o;
|
|
16
18
|
},
|
|
17
19
|
mask: () => t,
|
|
18
20
|
divisor: () => {
|
|
19
|
-
const s = e.matMul(
|
|
20
|
-
|
|
21
|
+
const s = e.matMul(n, !1, !0), r = t.mul(s);
|
|
22
|
+
s.dispose();
|
|
23
|
+
const o = r.sum();
|
|
24
|
+
return r.dispose(), o;
|
|
21
25
|
}
|
|
22
26
|
};
|
|
23
27
|
}
|
|
24
28
|
};
|
|
25
|
-
|
|
29
|
+
m(p);
|
|
@@ -1,20 +1,22 @@
|
|
|
1
|
-
import { h as
|
|
2
|
-
import { mulDrop as
|
|
3
|
-
import { s as
|
|
4
|
-
const
|
|
1
|
+
import { h as f, b as i, s as l } from "../../index-bMBtI-WR.js";
|
|
2
|
+
import { mulDrop as g } from "../mulDrop.js";
|
|
3
|
+
import { s as T } from "../../sum-CutF5lj2.js";
|
|
4
|
+
const Y = {
|
|
5
5
|
kernelName: "FusedSoftmax",
|
|
6
6
|
outputsToSave: [!0],
|
|
7
|
-
gradFunc: (o,
|
|
8
|
-
const [s] =
|
|
7
|
+
gradFunc: (o, n, a) => {
|
|
8
|
+
const [s] = n, { dim: p, dropoutRate: t, seed: e } = a, c = !0, r = t && e ? g(o, s, t, e) : i(o, s);
|
|
9
9
|
return {
|
|
10
10
|
logits: () => {
|
|
11
|
-
const m =
|
|
12
|
-
|
|
11
|
+
const m = T(r, [p], c), u = i(m, s);
|
|
12
|
+
m.dispose();
|
|
13
|
+
const d = l(r, u);
|
|
14
|
+
return u.dispose(), d;
|
|
13
15
|
}
|
|
14
16
|
};
|
|
15
17
|
}
|
|
16
18
|
};
|
|
17
|
-
|
|
19
|
+
f(Y);
|
|
18
20
|
export {
|
|
19
|
-
|
|
21
|
+
Y as softmaxGradConfig
|
|
20
22
|
};
|
package/dist/ops/grads/gelu.js
CHANGED
package/dist/ops/grads/qkv.js
CHANGED
package/dist/ops/grads/rope.js
CHANGED
|
File without changes
|
package/dist/ops/log.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import "./webgl/log.js";
|
package/dist/ops/matMulGelu.js
CHANGED
package/dist/ops/matMulMul.js
CHANGED
package/dist/ops/mulDrop.js
CHANGED
package/dist/ops/normRMS.js
CHANGED
package/dist/ops/qkv.js
CHANGED
package/dist/ops/scatterSub.js
CHANGED