@genai-fi/nanogpt 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +3 -3
- package/dist/NanoGPTModel.js +83 -70
- package/dist/TeachableLLM.js +1 -1
- package/dist/{random_width-CMHmdbSu.js → TiedEmbedding-CnJ1bx4q.js} +760 -719
- package/dist/{axis_util-DeydwOoC.js → axis_util-BgTGy5w8.js} +1 -1
- package/dist/{concat-DS_qH7MI.js → concat-CuRsVY-K.js} +1 -1
- package/dist/dropout-DfDdklfL.js +193 -0
- package/dist/{gather-BUmJIS8n.js → gather-ZYRWhmXR.js} +1 -1
- package/dist/gelu-CnCt17Lk.js +26 -0
- package/dist/{index-XjBAhiFO.js → index-C4JCoBvj.js} +61 -61
- package/dist/kernel_funcs_utils-CAd1h9X1.js +388 -0
- package/dist/layers/CausalSelfAttention.js +71 -70
- package/dist/layers/MLP.d.ts +3 -1
- package/dist/layers/MLP.js +93 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +3 -3
- package/dist/layers/TiedEmbedding.js +6 -46
- package/dist/layers/TransformerBlock.js +2 -2
- package/dist/{log_sum_exp-DJPkVZZn.js → log_sum_exp-BswFnwOb.js} +5 -5
- package/dist/main.js +1 -1
- package/dist/{mat_mul-CKwFEV1Q.js → mat_mul-415y5Qn2.js} +1 -1
- package/dist/{max-DJvEiCAJ.js → max-CP_9O2Yd.js} +1 -1
- package/dist/{moments-CrWRPcR3.js → moments-CjeIaVdp.js} +3 -3
- package/dist/{norm-BzY929B_.js → norm-CZM380I3.js} +5 -5
- package/dist/{ones-BO01zpJG.js → ones-Bf3YR48P.js} +2 -2
- package/dist/ops/appendCache.js +1 -1
- package/dist/ops/attentionMask.d.ts +1 -1
- package/dist/ops/attentionMask.js +4 -4
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +13 -9
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +3 -3
- package/dist/ops/cpu/gelu.d.ts +1 -0
- package/dist/ops/cpu/gelu.js +40 -0
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +4 -4
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.d.ts +3 -0
- package/dist/ops/gelu.js +8 -0
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.d.ts +2 -0
- package/dist/ops/grads/gelu.js +5 -0
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +19 -18
- package/dist/ops/webgl/fusedSoftmax.js +483 -782
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.d.ts +2 -0
- package/dist/ops/webgl/gelu.js +50 -0
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{range-DQMNzBWs.js → range-9AzeApCc.js} +1 -1
- package/dist/{reshape-DFzh97Sc.js → reshape-Boe4DuIO.js} +1 -1
- package/dist/{sin-BYM-U4Ut.js → sin-KmhiDuMa.js} +1 -1
- package/dist/{slice_util-CnVNPQI-.js → slice_util-19zDNNSn.js} +2 -2
- package/dist/{softmax-4DOn6cPq.js → softmax-Cujsg4ay.js} +1 -1
- package/dist/{split-CkbeVdF8.js → split-DbcNm1-i.js} +1 -1
- package/dist/{stack-DaIMO5iX.js → stack-D1YjmgKN.js} +1 -1
- package/dist/{sum-C6u3xMi3.js → sum-R28pucR5.js} +1 -1
- package/dist/{tensor-Cu1fU7H7.js → tensor-BVeHdl7V.js} +1 -1
- package/dist/{tensor2d-D0CKdG6B.js → tensor2d-DqFGNs_K.js} +1 -1
- package/dist/{tfjs_backend-Bzl2SrRo.js → tfjs_backend-Cug-PH75.js} +826 -1015
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +3 -3
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +5 -5
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/load.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-BS4AKqNU.js → variable-LJT9Ld63.js} +1 -1
- package/dist/{zeros-CmJFiC84.js → zeros-dnQxFgAD.js} +1 -1
- package/package.json +1 -1
- package/dist/MLP-KHhikThU.js +0 -83
package/dist/layers/MLP.js
CHANGED
|
@@ -1,7 +1,95 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import "./BaseLayer.js";
|
|
3
|
-
import "../
|
|
4
|
-
import {
|
|
1
|
+
import { t as _, c as M, e as d, H as v } from "../index-C4JCoBvj.js";
|
|
2
|
+
import x from "./BaseLayer.js";
|
|
3
|
+
import { g as L } from "../gelu-CnCt17Lk.js";
|
|
4
|
+
import { v as n } from "../variable-LJT9Ld63.js";
|
|
5
|
+
import { r as p, d as u } from "../dropout-DfDdklfL.js";
|
|
6
|
+
import { r as l } from "../reshape-Boe4DuIO.js";
|
|
7
|
+
import { m as f } from "../mat_mul-415y5Qn2.js";
|
|
8
|
+
class V extends x {
|
|
9
|
+
cFc = null;
|
|
10
|
+
cProj = null;
|
|
11
|
+
index;
|
|
12
|
+
_trainable = !0;
|
|
13
|
+
hiddenUnits;
|
|
14
|
+
constructor(t, s) {
|
|
15
|
+
super(s), this.index = t, this.hiddenUnits = s.gpt.mlpFactor * s.gpt.nEmbed;
|
|
16
|
+
}
|
|
17
|
+
build() {
|
|
18
|
+
this.cFc === null && (this.cFc = n(
|
|
19
|
+
p([this.config.gpt.nEmbed, this.hiddenUnits], 0, 0.02),
|
|
20
|
+
!0
|
|
21
|
+
//`block_${this.index}_attn_cAttn_kernel`
|
|
22
|
+
)), this.cProj === null && (this.cProj = n(
|
|
23
|
+
p(
|
|
24
|
+
[this.hiddenUnits, this.config.gpt.nEmbed],
|
|
25
|
+
0,
|
|
26
|
+
0.02 / Math.sqrt(2 * this.config.gpt.nLayer)
|
|
27
|
+
),
|
|
28
|
+
!0
|
|
29
|
+
//`block_${this.index}_attn_cProj_kernel`
|
|
30
|
+
));
|
|
31
|
+
}
|
|
32
|
+
get variables() {
|
|
33
|
+
return [this.cFc, this.cProj];
|
|
34
|
+
}
|
|
35
|
+
get trainable() {
|
|
36
|
+
return this._trainable;
|
|
37
|
+
}
|
|
38
|
+
set trainable(t) {
|
|
39
|
+
this._trainable = t, this.cFc && (this.cFc.trainable = t), this.cProj && (this.cProj.trainable = t);
|
|
40
|
+
}
|
|
41
|
+
saveWeights(t) {
|
|
42
|
+
t.set(`block_${this.index}_mlpHidden`, this.cFc ? [this.cFc.clone()] : []), t.set(`block_${this.index}_mlpOut`, this.cProj ? [this.cProj.clone()] : []);
|
|
43
|
+
}
|
|
44
|
+
loadWeights(t) {
|
|
45
|
+
const s = t.get(`block_${this.index}_mlpOut`)?.[0], i = t.get(`block_${this.index}_mlpHidden`)?.[0];
|
|
46
|
+
if (!s || !i)
|
|
47
|
+
throw new Error(`Weights for block ${this.index} not found`);
|
|
48
|
+
this.cFc ? this.cFc.assign(i) : this.cFc = n(i, !0), this.cProj ? this.cProj.assign(s) : this.cProj = n(s, !0);
|
|
49
|
+
}
|
|
50
|
+
forward(t) {
|
|
51
|
+
return _(() => {
|
|
52
|
+
this.startMemory();
|
|
53
|
+
const [s, i, o] = t.shape, r = l(t, [s * i, o]), e = f(r, this.cFc), c = L(e);
|
|
54
|
+
e.dispose();
|
|
55
|
+
const a = f(c, this.cProj);
|
|
56
|
+
c.dispose();
|
|
57
|
+
const h = l(a, [s, i, o]);
|
|
58
|
+
return this.endMemory("MLP"), h;
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
call(t, s = !1) {
|
|
62
|
+
if (this.build(), s && this.config.layerConfig.checkpointMLP) {
|
|
63
|
+
const o = M(
|
|
64
|
+
// @ts-expect-error Invalid params
|
|
65
|
+
(r, e, c, a) => {
|
|
66
|
+
const h = this.forward(r);
|
|
67
|
+
return a([r]), { value: h, gradFunc: (g, m) => {
|
|
68
|
+
const [b] = m, P = d().state.activeTape;
|
|
69
|
+
d().state.activeTape = [];
|
|
70
|
+
const j = v((F, w, T) => this.forward(F))([b, e, c], g);
|
|
71
|
+
return d().state.activeTape = P, j;
|
|
72
|
+
} };
|
|
73
|
+
}
|
|
74
|
+
)(t, this.cFc, this.cProj);
|
|
75
|
+
if (this.config.gpt.dropout > 0) {
|
|
76
|
+
const r = u(o, this.config.gpt.dropout);
|
|
77
|
+
return o.dispose(), r;
|
|
78
|
+
}
|
|
79
|
+
return o;
|
|
80
|
+
} else {
|
|
81
|
+
const i = this.forward(t);
|
|
82
|
+
if (s && this.config.gpt.dropout > 0) {
|
|
83
|
+
const o = u(i, this.config.gpt.dropout);
|
|
84
|
+
return i.dispose(), o;
|
|
85
|
+
}
|
|
86
|
+
return i;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
dispose() {
|
|
90
|
+
this.cFc?.dispose(), this.cProj?.dispose();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
5
93
|
export {
|
|
6
|
-
|
|
94
|
+
V as default
|
|
7
95
|
};
|
package/dist/layers/RMSNorm.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { t as r } from "../index-
|
|
1
|
+
import { t as r } from "../index-C4JCoBvj.js";
|
|
2
2
|
import m from "./BaseLayer.js";
|
|
3
|
-
import { v as i } from "../variable-
|
|
4
|
-
import { o } from "../ones-
|
|
3
|
+
import { v as i } from "../variable-LJT9Ld63.js";
|
|
4
|
+
import { o } from "../ones-Bf3YR48P.js";
|
|
5
5
|
class d extends m {
|
|
6
6
|
gamma;
|
|
7
7
|
epsilon;
|
package/dist/layers/RoPECache.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { o as h, h as c, E as f, N as l, f as n, O as m, t as u, F as p } from "../index-
|
|
2
|
-
import { c as d, s as C } from "../sin-
|
|
3
|
-
import { r as a } from "../range-
|
|
1
|
+
import { o as h, h as c, E as f, N as l, f as n, O as m, t as u, F as p } from "../index-C4JCoBvj.js";
|
|
2
|
+
import { c as d, s as C } from "../sin-KmhiDuMa.js";
|
|
3
|
+
import { r as a } from "../range-9AzeApCc.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -1,48 +1,8 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import "../index-
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
class n {
|
|
7
|
-
vocabSize;
|
|
8
|
-
embedDim;
|
|
9
|
-
tiedWeights;
|
|
10
|
-
initializer;
|
|
11
|
-
constructor(i, e) {
|
|
12
|
-
this.vocabSize = i.vocabSize, this.embedDim = i.embedDim, this.initializer = t({
|
|
13
|
-
mean: 0,
|
|
14
|
-
stddev: 0.02
|
|
15
|
-
}), this.tiedWeights = r(
|
|
16
|
-
this.initializer.apply([this.vocabSize, this.embedDim]),
|
|
17
|
-
!0,
|
|
18
|
-
e || "tied_embedding"
|
|
19
|
-
);
|
|
20
|
-
}
|
|
21
|
-
get variables() {
|
|
22
|
-
return [this.tiedWeights];
|
|
23
|
-
}
|
|
24
|
-
embed(i) {
|
|
25
|
-
return d(this.tiedWeights, i, 0);
|
|
26
|
-
}
|
|
27
|
-
project(i) {
|
|
28
|
-
return s(i, this.tiedWeights.transpose());
|
|
29
|
-
}
|
|
30
|
-
getWeights() {
|
|
31
|
-
return [this.tiedWeights];
|
|
32
|
-
}
|
|
33
|
-
setWeights(i) {
|
|
34
|
-
this.tiedWeights.assign(i[0]);
|
|
35
|
-
}
|
|
36
|
-
getConfig() {
|
|
37
|
-
return {
|
|
38
|
-
vocabSize: this.vocabSize,
|
|
39
|
-
embedDim: this.embedDim
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
dispose() {
|
|
43
|
-
this.tiedWeights.dispose();
|
|
44
|
-
}
|
|
45
|
-
}
|
|
1
|
+
import { T as a } from "../TiedEmbedding-CnJ1bx4q.js";
|
|
2
|
+
import "../index-C4JCoBvj.js";
|
|
3
|
+
import "../tfjs_backend-Cug-PH75.js";
|
|
4
|
+
import "../variable-LJT9Ld63.js";
|
|
5
|
+
import "../gather-ZYRWhmXR.js";
|
|
46
6
|
export {
|
|
47
|
-
|
|
7
|
+
a as default
|
|
48
8
|
};
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import h from "./CausalSelfAttention.js";
|
|
2
|
-
import
|
|
2
|
+
import o from "./MLP.js";
|
|
3
3
|
import a from "./RMSNorm.js";
|
|
4
4
|
import p from "./BaseLayer.js";
|
|
5
|
-
import { t as d } from "../index-
|
|
5
|
+
import { t as d } from "../index-C4JCoBvj.js";
|
|
6
6
|
class W extends p {
|
|
7
7
|
ln1;
|
|
8
8
|
attn;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { o as r, h as p, E as u, a3 as h, a4 as E, Y as S, s as $, a5 as d } from "./index-
|
|
2
|
-
import { e as K } from "./axis_util-
|
|
3
|
-
import { m as T } from "./max-
|
|
4
|
-
import { r as m } from "./reshape-
|
|
5
|
-
import { s as _ } from "./sum-
|
|
1
|
+
import { o as r, h as p, E as u, a3 as h, a4 as E, Y as S, s as $, a5 as d } from "./index-C4JCoBvj.js";
|
|
2
|
+
import { e as K } from "./axis_util-BgTGy5w8.js";
|
|
3
|
+
import { m as T } from "./max-CP_9O2Yd.js";
|
|
4
|
+
import { r as m } from "./reshape-Boe4DuIO.js";
|
|
5
|
+
import { s as _ } from "./sum-R28pucR5.js";
|
|
6
6
|
/**
|
|
7
7
|
* @license
|
|
8
8
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/main.js
CHANGED
|
@@ -5,7 +5,7 @@ import { default as w } from "./tokeniser/bpe.js";
|
|
|
5
5
|
import { default as D } from "./utilities/waitForModel.js";
|
|
6
6
|
import { default as F } from "./data/textLoader.js";
|
|
7
7
|
import { estimateMemoryUsage as N, estimateParameterCount as R, estimateResources as j, estimateTrainingMemoryUsage as q, validateConfig as z } from "./utilities/parameters.js";
|
|
8
|
-
import "./index-
|
|
8
|
+
import "./index-C4JCoBvj.js";
|
|
9
9
|
import "./ops/cpu/scatterSub.js";
|
|
10
10
|
import "./ops/webgl/scatterSub.js";
|
|
11
11
|
import "./ops/cpu/gatherSub.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { o as m, h as c, E as f, X as i, Y as l, Z as h, s as x, x as d } from "./index-
|
|
2
|
-
import { e as v } from "./axis_util-
|
|
3
|
-
import { r as E } from "./reshape-
|
|
1
|
+
import { o as m, h as c, E as f, X as i, Y as l, Z as h, s as x, x as d } from "./index-C4JCoBvj.js";
|
|
2
|
+
import { e as v } from "./axis_util-BgTGy5w8.js";
|
|
3
|
+
import { r as E } from "./reshape-Boe4DuIO.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2020 Google Inc. All Rights Reserved.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { o as l, h as c, E as y, _ as E, Y as w, $ as o, a0 as u, O as v, f as I, Z as $ } from "./index-
|
|
2
|
-
import { e as A } from "./axis_util-
|
|
3
|
-
import { m as f } from "./max-
|
|
4
|
-
import { r as h } from "./reshape-
|
|
5
|
-
import { s as t } from "./sum-
|
|
1
|
+
import { o as l, h as c, E as y, _ as E, Y as w, $ as o, a0 as u, O as v, f as I, Z as $ } from "./index-C4JCoBvj.js";
|
|
2
|
+
import { e as A } from "./axis_util-BgTGy5w8.js";
|
|
3
|
+
import { m as f } from "./max-CP_9O2Yd.js";
|
|
4
|
+
import { r as h } from "./reshape-Boe4DuIO.js";
|
|
5
|
+
import { s as t } from "./sum-R28pucR5.js";
|
|
6
6
|
/**
|
|
7
7
|
* @license
|
|
8
8
|
* Copyright 2020 Google Inc. All Rights Reserved.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { k as n, l as t, n as m, E as i } from "./index-
|
|
2
|
-
import { z as l, c } from "./zeros-
|
|
1
|
+
import { k as n, l as t, n as m, E as i } from "./index-C4JCoBvj.js";
|
|
2
|
+
import { z as l, c } from "./zeros-dnQxFgAD.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/ops/appendCache.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import { Tensor } from '@tensorflow/tfjs-core';
|
|
2
|
-
export declare function attentionMask(q: Tensor, k: Tensor,
|
|
2
|
+
export declare function attentionMask(q: Tensor, k: Tensor, divisor: number, mask?: Tensor, pastLen?: number): Tensor;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { e as
|
|
1
|
+
import { e as i } from "../index-C4JCoBvj.js";
|
|
2
2
|
import "./cpu/attentionMask.js";
|
|
3
3
|
import "./webgl/attentionMask.js";
|
|
4
4
|
import "./grads/attentionMask.js";
|
|
5
|
-
function
|
|
6
|
-
return
|
|
5
|
+
function f(t, n, e, r, o) {
|
|
6
|
+
return r ? i().runKernel("AttentionMask", { q: t, k: n, mask: r }, { divisor: e, pastLen: o || 0 }) : i().runKernel("AttentionMask", { q: t, k: n }, { divisor: e, pastLen: o || 0 });
|
|
7
7
|
}
|
|
8
8
|
export {
|
|
9
|
-
|
|
9
|
+
f as attentionMask
|
|
10
10
|
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { r as a, e as m } from "../../index-
|
|
2
|
-
import { c as d } from "../../concat-
|
|
1
|
+
import { r as a, e as m } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { c as d } from "../../concat-CuRsVY-K.js";
|
|
3
3
|
function r(n) {
|
|
4
4
|
const { cache: c, item: t } = n.inputs, { maxSize: o } = n.attrs, e = d([c, t], 2), s = e.shape[2];
|
|
5
5
|
if (s > o) {
|
|
@@ -1,18 +1,22 @@
|
|
|
1
|
-
import { r as
|
|
2
|
-
import { m as k } from "../../mat_mul-
|
|
3
|
-
function
|
|
4
|
-
const { q: e, k:
|
|
5
|
-
|
|
1
|
+
import { r as o, f as l } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { m as k } from "../../mat_mul-415y5Qn2.js";
|
|
3
|
+
function r(t) {
|
|
4
|
+
const { q: e, k: c, mask: n } = t.inputs, { divisor: m } = t.attrs, s = e.shape[2], a = k(e, c, !1, !0).mul(l(m));
|
|
5
|
+
if (n) {
|
|
6
|
+
const i = n.slice([0, 0], [s, s]).expandDims(0).expandDims(0);
|
|
7
|
+
return a.add(i);
|
|
8
|
+
}
|
|
9
|
+
return a;
|
|
6
10
|
}
|
|
7
11
|
const d = {
|
|
8
12
|
kernelName: "AttentionMask",
|
|
9
13
|
backendName: "cpu",
|
|
10
|
-
kernelFunc:
|
|
14
|
+
kernelFunc: r
|
|
11
15
|
};
|
|
12
|
-
|
|
16
|
+
o(d);
|
|
13
17
|
const u = {
|
|
14
18
|
kernelName: "AttentionMask",
|
|
15
19
|
backendName: "tensorflow",
|
|
16
|
-
kernelFunc:
|
|
20
|
+
kernelFunc: r
|
|
17
21
|
};
|
|
18
|
-
|
|
22
|
+
o(u);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { r as n } from "../../index-
|
|
2
|
-
import { s as f } from "../../softmax-
|
|
1
|
+
import { r as n } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { s as f } from "../../softmax-Cujsg4ay.js";
|
|
3
3
|
function r(t) {
|
|
4
4
|
const { inputs: s, attrs: i } = t, { logits: o } = s, { dim: a, dropoutRate: e } = i;
|
|
5
5
|
if (!o)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { o as u, h as c, E as g, L as h, r as m, s as p } from "../../index-
|
|
2
|
-
import { r as l } from "../../range-
|
|
3
|
-
import { s as N } from "../../stack-
|
|
1
|
+
import { o as u, h as c, E as g, L as h, r as m, s as p } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { r as l } from "../../range-9AzeApCc.js";
|
|
3
|
+
import { s as N } from "../../stack-D1YjmgKN.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { r as t, t as d } from "../../index-C4JCoBvj.js";
|
|
2
|
+
const o = 0.7978845608028654, c = 0.044715;
|
|
3
|
+
function m(u) {
|
|
4
|
+
const { inputs: l } = u, { x: e } = l, n = e;
|
|
5
|
+
return d(() => {
|
|
6
|
+
const r = n.mul(n).mul(n), s = n.add(r.mul(c)).mul(o).tanh().add(1).mul(0.5);
|
|
7
|
+
return n.mul(s);
|
|
8
|
+
});
|
|
9
|
+
}
|
|
10
|
+
const K = {
|
|
11
|
+
kernelName: "Gelu",
|
|
12
|
+
backendName: "cpu",
|
|
13
|
+
kernelFunc: m
|
|
14
|
+
};
|
|
15
|
+
t(K);
|
|
16
|
+
const p = {
|
|
17
|
+
kernelName: "Gelu",
|
|
18
|
+
backendName: "tensorflow",
|
|
19
|
+
kernelFunc: m
|
|
20
|
+
};
|
|
21
|
+
t(p);
|
|
22
|
+
function i(u) {
|
|
23
|
+
const { dy: l, x: e } = u.inputs;
|
|
24
|
+
return d(() => {
|
|
25
|
+
const n = e.square(), r = n.mul(e), a = e.add(r.mul(c)).mul(o).tanh(), f = a.square().neg().add(1), k = n.mul(3 * c).add(1), g = a.add(1).mul(0.5), G = e.mul(f).mul(o).mul(k).mul(0.5), N = g.add(G);
|
|
26
|
+
return l.mul(N);
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
const x = {
|
|
30
|
+
kernelName: "GeluGrad",
|
|
31
|
+
backendName: "cpu",
|
|
32
|
+
kernelFunc: i
|
|
33
|
+
};
|
|
34
|
+
t(x);
|
|
35
|
+
const h = {
|
|
36
|
+
kernelName: "GeluGrad",
|
|
37
|
+
backendName: "tensorflow",
|
|
38
|
+
kernelFunc: i
|
|
39
|
+
};
|
|
40
|
+
t(h);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { r as e, b as u } from "../../index-
|
|
1
|
+
import { r as e, b as u } from "../../index-C4JCoBvj.js";
|
|
2
2
|
function n(o) {
|
|
3
3
|
const { inputs: r } = o, { a: l, b: t } = r;
|
|
4
4
|
return console.warn("Using fallback mulDrop implementation without dropout."), u(l, t);
|
package/dist/ops/cpu/qkv.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { r as q } from "../../index-
|
|
2
|
-
import { r as o } from "../../reshape-
|
|
3
|
-
import { s as x } from "../../split-
|
|
1
|
+
import { r as q } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { r as o } from "../../reshape-Boe4DuIO.js";
|
|
3
|
+
import { s as x } from "../../split-DbcNm1-i.js";
|
|
4
4
|
function v(p) {
|
|
5
5
|
const { x: c, kernel: K } = p.inputs, { heads: n } = p.attrs, [s, e, t] = c.shape, a = o(c, [s * e, t]), i = a.dot(K);
|
|
6
6
|
a.dispose();
|
package/dist/ops/cpu/rope.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { r as S } from "../../index-
|
|
2
|
-
import { r as F } from "../../range-
|
|
3
|
-
import { g as I } from "../../gather-
|
|
4
|
-
import { s as E } from "../../stack-
|
|
5
|
-
import { c as T } from "../../concat-
|
|
1
|
+
import { r as S } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { r as F } from "../../range-9AzeApCc.js";
|
|
3
|
+
import { g as I } from "../../gather-ZYRWhmXR.js";
|
|
4
|
+
import { s as E } from "../../stack-D1YjmgKN.js";
|
|
5
|
+
import { c as T } from "../../concat-CuRsVY-K.js";
|
|
6
6
|
function U(t, c, p, o, r) {
|
|
7
7
|
const n = o.shape[3], s = p;
|
|
8
8
|
if (s > n) return o;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { o as l, k, h, E as g, a2 as w, r as $, s as d, b as m } from "../../index-
|
|
2
|
-
import { r as b } from "../../range-
|
|
3
|
-
import { s as E } from "../../stack-
|
|
4
|
-
import { o as D } from "../../ones-
|
|
1
|
+
import { o as l, k, h, E as g, a2 as w, r as $, s as d, b as m } from "../../index-C4JCoBvj.js";
|
|
2
|
+
import { r as b } from "../../range-9AzeApCc.js";
|
|
3
|
+
import { s as E } from "../../stack-D1YjmgKN.js";
|
|
4
|
+
import { o as D } from "../../ones-Bf3YR48P.js";
|
|
5
5
|
function N(n, r, t) {
|
|
6
6
|
const s = r.rank > 1 ? r.shape[r.rank - 1] : 1, e = r.rank > 1 ? r.rank - 1 : 1, o = `Must have updates.shape = indices.shape[:batchDim] + shape[sliceDim:], got updates.shape: ${t.shape}, indices.shape: ${r.shape}, shape: ${n}, sliceDim: ${s}, and batchDim: ${e}.`;
|
|
7
7
|
if (t.rank < e)
|
package/dist/ops/fusedSoftmax.js
CHANGED
package/dist/ops/gatherSub.js
CHANGED
package/dist/ops/gelu.js
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { g as p, b as m, s as d } from "../../index-
|
|
1
|
+
import { g as p, b as m, s as d } from "../../index-C4JCoBvj.js";
|
|
2
2
|
import { mulDrop as c } from "../mulDrop.js";
|
|
3
|
-
import { s as f } from "../../sum-
|
|
3
|
+
import { s as f } from "../../sum-R28pucR5.js";
|
|
4
4
|
const g = {
|
|
5
5
|
kernelName: "FusedSoftmax",
|
|
6
6
|
outputsToSave: [!0],
|
package/dist/ops/grads/qkv.js
CHANGED
package/dist/ops/grads/rope.js
CHANGED
package/dist/ops/mulDrop.js
CHANGED
package/dist/ops/qkv.js
CHANGED
package/dist/ops/scatterSub.js
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
import { r as
|
|
2
|
-
class
|
|
3
|
-
variableNames = ["q", "k"
|
|
1
|
+
import { r as h } from "../../index-C4JCoBvj.js";
|
|
2
|
+
class l {
|
|
3
|
+
variableNames = ["q", "k"];
|
|
4
4
|
outputShape;
|
|
5
5
|
userCode;
|
|
6
|
-
customUniforms = [
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
customUniforms = [
|
|
7
|
+
{ name: "divisor", type: "float" },
|
|
8
|
+
{ name: "pastLen", type: "int" }
|
|
9
|
+
];
|
|
10
|
+
constructor(t, s, e, n, a) {
|
|
11
|
+
this.outputShape = [t, s, e, n], this.userCode = `
|
|
9
12
|
void main() {
|
|
10
13
|
ivec4 coords = getOutputCoords(); // [batch, nh, t1, t2]
|
|
11
14
|
int b = coords.x;
|
|
@@ -14,30 +17,28 @@ class k {
|
|
|
14
17
|
int t2 = coords.w;
|
|
15
18
|
|
|
16
19
|
float sum = 0.0;
|
|
17
|
-
for (int i = 0; i < ${
|
|
20
|
+
for (int i = 0; i < ${a}; ++i) {
|
|
18
21
|
float qv = getQ(b, h, t1, i);
|
|
19
|
-
float kv = getK(b, h, t2, i);
|
|
22
|
+
float kv = getK(b, h, t2, i);
|
|
20
23
|
sum += qv * kv;
|
|
21
24
|
}
|
|
22
25
|
|
|
23
26
|
// Scale by divisor
|
|
24
27
|
float scaled = sum * divisor;
|
|
25
28
|
|
|
26
|
-
//
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
setOutput(scaled + maskVal);
|
|
29
|
+
// Mask out future positions
|
|
30
|
+
setOutput((t2 > t1 + pastLen) ? -1.0/0.0 : scaled);
|
|
30
31
|
}
|
|
31
32
|
`;
|
|
32
33
|
}
|
|
33
34
|
}
|
|
34
|
-
function
|
|
35
|
-
const { q: t, k:
|
|
36
|
-
return
|
|
35
|
+
function m(o) {
|
|
36
|
+
const { q: t, k: s } = o.inputs, { divisor: e, pastLen: n } = o.attrs, a = o.backend, i = t.shape[0], r = t.shape[2], c = s.shape[2], u = t.shape[1], p = t.shape[3], d = new l(i, u, r, c, p);
|
|
37
|
+
return a.runWebGLProgram(d, [t, s], "float32", [[e], [n]]);
|
|
37
38
|
}
|
|
38
|
-
const
|
|
39
|
+
const k = {
|
|
39
40
|
kernelName: "AttentionMask",
|
|
40
41
|
backendName: "webgl",
|
|
41
|
-
kernelFunc:
|
|
42
|
+
kernelFunc: m
|
|
42
43
|
};
|
|
43
|
-
|
|
44
|
+
h(k);
|