@genai-fi/nanogpt 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +3 -3
- package/dist/NanoGPTModel.js +83 -70
- package/dist/TeachableLLM.js +1 -1
- package/dist/{random_width-CMHmdbSu.js → TiedEmbedding-CnJ1bx4q.js} +760 -719
- package/dist/{axis_util-DeydwOoC.js → axis_util-BgTGy5w8.js} +1 -1
- package/dist/{concat-DS_qH7MI.js → concat-CuRsVY-K.js} +1 -1
- package/dist/dropout-DfDdklfL.js +193 -0
- package/dist/{gather-BUmJIS8n.js → gather-ZYRWhmXR.js} +1 -1
- package/dist/gelu-CnCt17Lk.js +26 -0
- package/dist/{index-XjBAhiFO.js → index-C4JCoBvj.js} +61 -61
- package/dist/kernel_funcs_utils-CAd1h9X1.js +388 -0
- package/dist/layers/CausalSelfAttention.js +71 -70
- package/dist/layers/MLP.d.ts +3 -1
- package/dist/layers/MLP.js +93 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +3 -3
- package/dist/layers/TiedEmbedding.js +6 -46
- package/dist/layers/TransformerBlock.js +2 -2
- package/dist/{log_sum_exp-DJPkVZZn.js → log_sum_exp-BswFnwOb.js} +5 -5
- package/dist/main.js +1 -1
- package/dist/{mat_mul-CKwFEV1Q.js → mat_mul-415y5Qn2.js} +1 -1
- package/dist/{max-DJvEiCAJ.js → max-CP_9O2Yd.js} +1 -1
- package/dist/{moments-CrWRPcR3.js → moments-CjeIaVdp.js} +3 -3
- package/dist/{norm-BzY929B_.js → norm-CZM380I3.js} +5 -5
- package/dist/{ones-BO01zpJG.js → ones-Bf3YR48P.js} +2 -2
- package/dist/ops/appendCache.js +1 -1
- package/dist/ops/attentionMask.d.ts +1 -1
- package/dist/ops/attentionMask.js +4 -4
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +13 -9
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +3 -3
- package/dist/ops/cpu/gelu.d.ts +1 -0
- package/dist/ops/cpu/gelu.js +40 -0
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +4 -4
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.d.ts +3 -0
- package/dist/ops/gelu.js +8 -0
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.d.ts +2 -0
- package/dist/ops/grads/gelu.js +5 -0
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +19 -18
- package/dist/ops/webgl/fusedSoftmax.js +483 -782
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.d.ts +2 -0
- package/dist/ops/webgl/gelu.js +50 -0
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{range-DQMNzBWs.js → range-9AzeApCc.js} +1 -1
- package/dist/{reshape-DFzh97Sc.js → reshape-Boe4DuIO.js} +1 -1
- package/dist/{sin-BYM-U4Ut.js → sin-KmhiDuMa.js} +1 -1
- package/dist/{slice_util-CnVNPQI-.js → slice_util-19zDNNSn.js} +2 -2
- package/dist/{softmax-4DOn6cPq.js → softmax-Cujsg4ay.js} +1 -1
- package/dist/{split-CkbeVdF8.js → split-DbcNm1-i.js} +1 -1
- package/dist/{stack-DaIMO5iX.js → stack-D1YjmgKN.js} +1 -1
- package/dist/{sum-C6u3xMi3.js → sum-R28pucR5.js} +1 -1
- package/dist/{tensor-Cu1fU7H7.js → tensor-BVeHdl7V.js} +1 -1
- package/dist/{tensor2d-D0CKdG6B.js → tensor2d-DqFGNs_K.js} +1 -1
- package/dist/{tfjs_backend-Bzl2SrRo.js → tfjs_backend-Cug-PH75.js} +826 -1015
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +3 -3
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +5 -5
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/load.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-BS4AKqNU.js → variable-LJT9Ld63.js} +1 -1
- package/dist/{zeros-CmJFiC84.js → zeros-dnQxFgAD.js} +1 -1
- package/package.json +1 -1
- package/dist/MLP-KHhikThU.js +0 -83
package/dist/training/AdamExt.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as r, b as c, f as h, s as g, e as o } from "../index-
|
|
1
|
+
import { A as r, b as c, f as h, s as g, e as o } from "../index-C4JCoBvj.js";
|
|
2
2
|
class u extends r {
|
|
3
3
|
constructor(t, e, s, a, i) {
|
|
4
4
|
super(t, e, s, a), this.config = i, this.startLearningRate = t;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ae as $, ac as m, af as M, a as R, ag as f, ah as v, ai as z, j as _, t as x } from "../index-C4JCoBvj.js";
|
|
2
2
|
import { s as E } from "../index-C4L8Cm77.js";
|
|
3
|
-
import { s as P } from "../stack-
|
|
4
|
-
import { t as D } from "../tensor-
|
|
3
|
+
import { s as P } from "../stack-D1YjmgKN.js";
|
|
4
|
+
import { t as D } from "../tensor-BVeHdl7V.js";
|
|
5
5
|
import "../index-Tf7vU29b.js";
|
|
6
6
|
/**
|
|
7
7
|
* @license
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { generateText as v } from "../utilities/generate.js";
|
|
2
2
|
import L from "./Trainer.js";
|
|
3
3
|
import x from "./Evaluator.js";
|
|
4
|
-
import { a as h } from "../index-
|
|
4
|
+
import { a as h } from "../index-C4JCoBvj.js";
|
|
5
5
|
const D = {
|
|
6
6
|
desiredLoss: 0.01,
|
|
7
7
|
logInterval: 1,
|
package/dist/training/Trainer.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { DatasetBuilder as d } from "./DatasetBuilder.js";
|
|
2
2
|
import h from "./AdamExt.js";
|
|
3
|
-
import { t as g, v as u, a as o } from "../index-
|
|
4
|
-
import { m as y, n as f } from "../norm-
|
|
5
|
-
import { m as S, a as z } from "../moments-
|
|
6
|
-
import { m as b } from "../max-
|
|
7
|
-
import { z as n } from "../zeros-
|
|
3
|
+
import { t as g, v as u, a as o } from "../index-C4JCoBvj.js";
|
|
4
|
+
import { m as y, n as f } from "../norm-CZM380I3.js";
|
|
5
|
+
import { m as S, a as z } from "../moments-CjeIaVdp.js";
|
|
6
|
+
import { m as b } from "../max-CP_9O2Yd.js";
|
|
7
|
+
import { z as n } from "../zeros-dnQxFgAD.js";
|
|
8
8
|
class G {
|
|
9
9
|
constructor(t, s, e = 1e-3) {
|
|
10
10
|
this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new d(s, t.config.gpt.blockSize);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { gatherSub as L } from "../ops/gatherSub.js";
|
|
2
2
|
import { scatterSub as y } from "../ops/scatterSub.js";
|
|
3
|
-
import { e as u, c as i, z as S, t as f, s as G } from "../index-
|
|
4
|
-
import { s as v } from "../softmax-
|
|
5
|
-
import { m as z } from "../max-
|
|
6
|
-
import { l as k } from "../log_sum_exp-
|
|
3
|
+
import { e as u, c as i, z as S, t as f, s as G } from "../index-C4JCoBvj.js";
|
|
4
|
+
import { s as v } from "../softmax-Cujsg4ay.js";
|
|
5
|
+
import { m as z } from "../max-CP_9O2Yd.js";
|
|
6
|
+
import { l as k } from "../log_sum_exp-BswFnwOb.js";
|
|
7
7
|
function F(a, s) {
|
|
8
8
|
return f(() => {
|
|
9
9
|
const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
|
package/dist/utilities/dummy.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import { z as n } from "../zeros-
|
|
1
|
+
import "../index-C4JCoBvj.js";
|
|
2
|
+
import { z as n } from "../zeros-dnQxFgAD.js";
|
|
3
3
|
async function a(s) {
|
|
4
4
|
const o = n([1, s.config.gpt.blockSize], "int32"), { logits: t, loss: i } = s.forward(o, void 0, !1);
|
|
5
5
|
await t.data(), t.dispose(), i && i.dispose(), o.dispose();
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { t as y } from "../index-
|
|
2
|
-
import { t as x } from "../tensor2d-
|
|
3
|
-
import { c as f } from "../concat-
|
|
1
|
+
import { t as y } from "../index-C4JCoBvj.js";
|
|
2
|
+
import { t as x } from "../tensor2d-DqFGNs_K.js";
|
|
3
|
+
import { c as f } from "../concat-CuRsVY-K.js";
|
|
4
4
|
async function A(o, r, a, c, T) {
|
|
5
5
|
if (c <= 0)
|
|
6
6
|
throw new Error("Length must be a positive integer");
|
package/dist/utilities/load.js
CHANGED
|
@@ -3,7 +3,7 @@ import { importWeights as b } from "./weights.js";
|
|
|
3
3
|
import u from "../tokeniser/CharTokeniser.js";
|
|
4
4
|
import F from "../NanoGPTModel.js";
|
|
5
5
|
import { dummyPassAsync as j } from "./dummy.js";
|
|
6
|
-
import { d as T } from "../index-
|
|
6
|
+
import { d as T } from "../index-C4JCoBvj.js";
|
|
7
7
|
import E from "../tokeniser/bpe.js";
|
|
8
8
|
async function A(t) {
|
|
9
9
|
const o = await fetch(t);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import { t as p } from "../tensor-
|
|
1
|
+
import "../index-C4JCoBvj.js";
|
|
2
|
+
import { t as p } from "../tensor-BVeHdl7V.js";
|
|
3
3
|
function h(n) {
|
|
4
4
|
const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
|
|
5
5
|
let t = 0;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as m, h as r, U as l, E as c, V as i, k as p, W as u, n as f } from "./index-
|
|
1
|
+
import { o as m, h as r, U as l, E as c, V as i, k as p, W as u, n as f } from "./index-C4JCoBvj.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
package/package.json
CHANGED
package/dist/MLP-KHhikThU.js
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import { t as d } from "./index-XjBAhiFO.js";
|
|
2
|
-
import c from "./layers/BaseLayer.js";
|
|
3
|
-
import { E as p, D as l, a as h, r as i } from "./random_width-CMHmdbSu.js";
|
|
4
|
-
/**
|
|
5
|
-
* @license
|
|
6
|
-
* Copyright 2018 Google LLC
|
|
7
|
-
*
|
|
8
|
-
* Use of this source code is governed by an MIT-style
|
|
9
|
-
* license that can be found in the LICENSE file or at
|
|
10
|
-
* https://opensource.org/licenses/MIT.
|
|
11
|
-
* =============================================================================
|
|
12
|
-
*/
|
|
13
|
-
function r(s) {
|
|
14
|
-
return new h(s);
|
|
15
|
-
}
|
|
16
|
-
function u(s) {
|
|
17
|
-
return new l(s);
|
|
18
|
-
}
|
|
19
|
-
function g(s) {
|
|
20
|
-
return new p(s);
|
|
21
|
-
}
|
|
22
|
-
class P extends c {
|
|
23
|
-
cFc;
|
|
24
|
-
cProj;
|
|
25
|
-
dropout;
|
|
26
|
-
index;
|
|
27
|
-
_trainable = !0;
|
|
28
|
-
constructor(t, e) {
|
|
29
|
-
super(e), this.index = t, this.cFc = r({
|
|
30
|
-
units: e.gpt.mlpFactor * e.gpt.nEmbed,
|
|
31
|
-
activation: "gelu",
|
|
32
|
-
useBias: e.gpt.biasInLinear,
|
|
33
|
-
kernelInitializer: i({
|
|
34
|
-
mean: 0,
|
|
35
|
-
stddev: 0.02
|
|
36
|
-
}),
|
|
37
|
-
biasInitializer: "zeros",
|
|
38
|
-
name: `block_${t}_mlp_cFc`
|
|
39
|
-
}), this.cProj = r({
|
|
40
|
-
units: e.gpt.nEmbed,
|
|
41
|
-
useBias: e.gpt.biasInLinear,
|
|
42
|
-
kernelInitializer: i({
|
|
43
|
-
mean: 0,
|
|
44
|
-
stddev: 0.02 / Math.sqrt(2 * e.gpt.nLayer)
|
|
45
|
-
}),
|
|
46
|
-
biasInitializer: "zeros",
|
|
47
|
-
name: `block_${t}_mlp_cProj`
|
|
48
|
-
}), this.dropout = u({ rate: e.gpt.dropout });
|
|
49
|
-
}
|
|
50
|
-
get variables() {
|
|
51
|
-
return [
|
|
52
|
-
...this.cFc.trainableWeights.map((t) => t.read()),
|
|
53
|
-
...this.cProj.trainableWeights.map((t) => t.read())
|
|
54
|
-
];
|
|
55
|
-
}
|
|
56
|
-
get trainable() {
|
|
57
|
-
return this._trainable;
|
|
58
|
-
}
|
|
59
|
-
set trainable(t) {
|
|
60
|
-
this._trainable = t, this.cFc.trainable = t, this.cProj.trainable = t;
|
|
61
|
-
}
|
|
62
|
-
saveWeights(t) {
|
|
63
|
-
t.set(`block_${this.index}_mlpHidden`, this.cFc.getWeights()), t.set(`block_${this.index}_mlpOut`, this.cProj.getWeights());
|
|
64
|
-
}
|
|
65
|
-
loadWeights(t) {
|
|
66
|
-
this.cFc.setWeights(t.get(`block_${this.index}_mlpHidden`) || []), this.cProj.setWeights(t.get(`block_${this.index}_mlpOut`) || []);
|
|
67
|
-
}
|
|
68
|
-
call(t, e = !1) {
|
|
69
|
-
return d(() => {
|
|
70
|
-
this.startMemory();
|
|
71
|
-
const a = this.cFc.apply(t), n = this.cProj.apply(a), o = this.dropout.apply(n, { training: e });
|
|
72
|
-
return this.endMemory("MLP"), o;
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
dispose() {
|
|
76
|
-
this.cFc.dispose(), this.cProj.dispose(), this.dropout.dispose();
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
export {
|
|
80
|
-
P as M,
|
|
81
|
-
u as d,
|
|
82
|
-
g as e
|
|
83
|
-
};
|