@genai-fi/nanogpt 0.2.11 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +30 -25
- package/dist/NanoGPTModel.d.ts +13 -14
- package/dist/NanoGPTModel.js +167 -85
- package/dist/TeachableLLM.d.ts +3 -5
- package/dist/TeachableLLM.js +47 -35
- package/dist/Trainer.js +8 -8
- package/dist/concat-BIZS_td9.js +33 -0
- package/dist/data/parquet.js +1 -1
- package/dist/exports_layers-7idKoYqh.js +25 -0
- package/dist/{sum-D7fu15XL.js → gather-BPGW8RsB.js} +6 -8
- package/dist/index-C4L8Cm77.js +349 -0
- package/dist/{index-YPKosni4.js → index-pWA4_lUh.js} +1020 -782
- package/dist/layers/CausalSelfAttention.d.ts +11 -11
- package/dist/layers/CausalSelfAttention.js +71 -63
- package/dist/layers/MLP.d.ts +6 -7
- package/dist/layers/MLP.js +18 -16
- package/dist/layers/RMSNorm.d.ts +6 -7
- package/dist/layers/RMSNorm.js +15 -13
- package/dist/layers/RoPECache.d.ts +4 -6
- package/dist/layers/RoPECache.js +36 -23
- package/dist/layers/TiedEmbedding.d.ts +7 -8
- package/dist/layers/TiedEmbedding.js +16 -418
- package/dist/layers/TransformerBlock.d.ts +8 -9
- package/dist/layers/TransformerBlock.js +12 -12
- package/dist/main.d.ts +1 -0
- package/dist/main.js +35 -21
- package/dist/{mat_mul-Bu7bhLms.js → mat_mul-D7_a4KJn.js} +5 -5
- package/dist/moments-DfcpfwKi.js +132 -0
- package/dist/ones-Cog-G2ag.js +29 -0
- package/dist/ops/appendCache.d.ts +2 -0
- package/dist/ops/appendCache.js +9 -0
- package/dist/ops/attentionMask.d.ts +1 -1
- package/dist/ops/attentionMask.js +7 -85
- package/dist/ops/cpu/appendCache.d.ts +2 -0
- package/dist/ops/cpu/appendCache.js +28 -0
- package/dist/ops/cpu/attentionMask.js +18 -0
- package/dist/ops/cpu/gatherSub.d.ts +1 -0
- package/dist/ops/cpu/gatherSub.js +34 -0
- package/dist/ops/cpu/qkv.d.ts +5 -0
- package/dist/ops/cpu/qkv.js +38 -0
- package/dist/ops/cpu/rope.d.ts +6 -0
- package/dist/ops/cpu/rope.js +38 -0
- package/dist/ops/cpu/scatterSub.d.ts +1 -0
- package/dist/ops/cpu/scatterSub.js +70 -0
- package/dist/ops/gatherSub.d.ts +1 -1
- package/dist/ops/gatherSub.js +6 -63
- package/dist/ops/grads/attentionMask.d.ts +1 -0
- package/dist/ops/grads/attentionMask.js +21 -0
- package/dist/ops/grads/qkv.d.ts +1 -0
- package/dist/ops/grads/qkv.js +20 -0
- package/dist/ops/grads/rope.d.ts +1 -0
- package/dist/ops/grads/rope.js +14 -0
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/qkv.d.ts +1 -6
- package/dist/ops/qkv.js +7 -124
- package/dist/ops/rope.d.ts +0 -5
- package/dist/ops/rope.js +7 -150
- package/dist/ops/scatterSub.d.ts +1 -1
- package/dist/ops/scatterSub.js +6 -147
- package/dist/ops/webgl/appendCache.d.ts +1 -0
- package/dist/ops/webgl/appendCache.js +43 -0
- package/dist/ops/webgl/attentionMask.d.ts +1 -0
- package/dist/ops/webgl/attentionMask.js +43 -0
- package/dist/ops/webgl/gatherSub.d.ts +1 -0
- package/dist/ops/webgl/gatherSub.js +27 -0
- package/dist/ops/webgl/qkv.d.ts +1 -0
- package/dist/ops/webgl/qkv.js +46 -0
- package/dist/ops/webgl/rope.d.ts +1 -0
- package/dist/ops/webgl/rope.js +56 -0
- package/dist/ops/webgl/scatterSub.d.ts +1 -0
- package/dist/ops/webgl/scatterSub.js +27 -0
- package/dist/{parquet-BRl5lE_I.js → parquet-C0Tlmv9c.js} +3045 -3048
- package/dist/random_width-PbCt7RXv.js +15489 -0
- package/dist/range-CcDl05lo.js +26 -0
- package/dist/{reshape-DmnmKT6r.js → reshape-C8CR_Bad.js} +3 -3
- package/dist/sin-BJIrfnj7.js +47 -0
- package/dist/softmax-Be_lsqUc.js +105 -0
- package/dist/{complex-CJ-qCcLB.js → split-DZbvruEP.js} +6 -8
- package/dist/stack-BMm-efee.js +27 -0
- package/dist/sum-C7Mgy9Bw.js +104 -0
- package/dist/tensor-DJVbYhh1.js +24 -0
- package/dist/tensor2d-ZuQSh2D-.js +30 -0
- package/dist/tokeniser/bpe.d.ts +17 -6
- package/dist/tokeniser/bpe.js +88 -60
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.d.ts +6 -6
- package/dist/training/DatasetBuilder.js +1262 -17
- package/dist/training/Evaluator.d.ts +3 -2
- package/dist/training/FullTrainer.d.ts +9 -8
- package/dist/training/FullTrainer.js +26 -25
- package/dist/training/LayerTrainer.d.ts +9 -8
- package/dist/training/LayerTrainer.js +34 -33
- package/dist/training/Trainer.d.ts +22 -21
- package/dist/training/Trainer.js +21 -18
- package/dist/training/sparseCrossEntropy.js +22 -166
- package/dist/utilities/dummy.js +10 -8
- package/dist/utilities/generate.js +14 -11
- package/dist/utilities/load.d.ts +1 -2
- package/dist/utilities/load.js +37 -35
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/save.js +14 -9
- package/dist/utilities/tokenParse.d.ts +1 -1
- package/dist/utilities/tokenParse.js +7 -61
- package/dist/utilities/weights.d.ts +3 -3
- package/dist/utilities/weights.js +21 -19
- package/dist/variable-Dl_ub3pk.js +23 -0
- package/dist/{stack-BtKpB0Ry.js → zeros-CCy9C3uU.js} +18 -16
- package/package.json +2 -1
- package/dist/assets/worker-BYeSPNkq.js +0 -1
- package/dist/tokeniser/NodeTokeniser.d.ts +0 -20
- package/dist/tokeniser/NodeTokeniser.js +0 -46
- package/dist/tokeniser/WebTokeniser.d.ts +0 -18
- package/dist/tokeniser/WebTokeniser.js +0 -96
- package/dist/tokeniser/worker.js +0 -53
- /package/dist/{tokeniser/worker.d.ts → ops/cpu/attentionMask.d.ts} +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { r as u } from "../../index-pWA4_lUh.js";
|
|
2
|
+
class l {
|
|
3
|
+
variableNames = ["x", "sin", "cos"];
|
|
4
|
+
outputShape;
|
|
5
|
+
userCode;
|
|
6
|
+
// enableShapeUniforms = true;
|
|
7
|
+
customUniforms = [{ name: "pastLen", type: "int" }];
|
|
8
|
+
constructor(t, n, s, o) {
|
|
9
|
+
this.outputShape = [t, n, s, o], this.userCode = `
|
|
10
|
+
void main() {
|
|
11
|
+
ivec4 coords = getOutputCoords(); // [b, h, t, d]
|
|
12
|
+
int b = coords.x;
|
|
13
|
+
int h = coords.y;
|
|
14
|
+
int t = coords.z;
|
|
15
|
+
int d = coords.w;
|
|
16
|
+
|
|
17
|
+
int rotaryDim = ${o};
|
|
18
|
+
|
|
19
|
+
float outVal = 0.0;
|
|
20
|
+
|
|
21
|
+
if (d < rotaryDim) {
|
|
22
|
+
int pairIdx = d / 2;
|
|
23
|
+
float cos = getCos(t + pastLen, pairIdx, 0);
|
|
24
|
+
float sin = getSin(t + pastLen, pairIdx, 0);
|
|
25
|
+
|
|
26
|
+
if (d % 2 == 0) {
|
|
27
|
+
// even index
|
|
28
|
+
float even = getX(b, h, t, d);
|
|
29
|
+
float odd = getX(b, h, t, d + 1);
|
|
30
|
+
outVal = even * cos - odd * sin;
|
|
31
|
+
} else {
|
|
32
|
+
// odd index
|
|
33
|
+
float even = getX(b, h, t, d - 1);
|
|
34
|
+
float odd = getX(b, h, t, d);
|
|
35
|
+
outVal = even * sin + odd * cos;
|
|
36
|
+
}
|
|
37
|
+
} else {
|
|
38
|
+
// pass through for non-rotary dims
|
|
39
|
+
outVal = getX(b, h, t, d);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
setOutput(outVal);
|
|
43
|
+
}
|
|
44
|
+
`;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
function h(e) {
|
|
48
|
+
const { x: t, sin: n, cos: s } = e.inputs, { pastLen: o } = e.attrs, a = e.backend, r = t.shape[0], d = t.shape[1], i = t.shape[2], c = t.shape[3], p = new l(r, d, i, c);
|
|
49
|
+
return a.runWebGLProgram(p, [t, n, s], "float32", [[o]]);
|
|
50
|
+
}
|
|
51
|
+
const f = {
|
|
52
|
+
kernelName: "Rope",
|
|
53
|
+
backendName: "webgl",
|
|
54
|
+
kernelFunc: h
|
|
55
|
+
};
|
|
56
|
+
u(f);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { r as i } from "../../index-pWA4_lUh.js";
|
|
2
|
+
class u {
|
|
3
|
+
variableNames = ["labels", "softmaxProbs", "dy"];
|
|
4
|
+
outputShape;
|
|
5
|
+
userCode;
|
|
6
|
+
constructor(e, t) {
|
|
7
|
+
this.outputShape = [e, t], this.userCode = `
|
|
8
|
+
void main() {
|
|
9
|
+
ivec2 coords = getOutputCoords();
|
|
10
|
+
int index = int(getLabels(coords.x));
|
|
11
|
+
float prob = getSoftmaxProbsAtOutCoords();
|
|
12
|
+
float dy = getDy(coords.x);
|
|
13
|
+
setOutput((index == coords.y ? prob - 1.0 : prob) * dy);
|
|
14
|
+
}
|
|
15
|
+
`;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
function d(o) {
|
|
19
|
+
const { logits: e, labels: t, dy: r } = o.inputs, s = o.backend, n = t.shape[0], a = e.shape[1], c = new u(n, a);
|
|
20
|
+
return s.runWebGLProgram(c, [t, e, r], "float32");
|
|
21
|
+
}
|
|
22
|
+
const b = {
|
|
23
|
+
kernelName: "EfficientScatterSub",
|
|
24
|
+
backendName: "webgl",
|
|
25
|
+
kernelFunc: d
|
|
26
|
+
};
|
|
27
|
+
i(b);
|