@genai-fi/nanogpt 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +11 -11
- package/dist/NanoGPTModel.d.ts +2 -2
- package/dist/NanoGPTModel.js +104 -136
- package/dist/{RealDiv-BYViZwhN.js → RealDiv-C4hOvYOZ.js} +26 -25
- package/dist/{Reshape-t7Kcikjk.js → Reshape-BLijOA8h.js} +5 -5
- package/dist/TeachableLLM.d.ts +3 -0
- package/dist/TeachableLLM.js +50 -47
- package/dist/{TiedEmbedding-9WeDwvjO.js → TiedEmbedding-BLltddza.js} +4 -4
- package/dist/{axis_util-Bu4h7XWV.js → axis_util-DaAl5MER.js} +3 -3
- package/dist/backend.d.ts +1 -0
- package/dist/backend.js +7 -0
- package/dist/backend_util-DWiwsi2N.js +749 -0
- package/dist/{broadcast_to-DARN-DBD.js → broadcast_to-C4v-j9yA.js} +2 -2
- package/dist/{concat-5aPGqw3Z.js → concat-CsHeR4zV.js} +8 -8
- package/dist/{dataset-pgqp-YfL.js → dataset-JDyjG3QR.js} +3 -3
- package/dist/{dropout-Bciw46HT.js → dropout-hpDwECTe.js} +7 -7
- package/dist/{gather-DjyCjmOD.js → gather-D0_gPiBz.js} +4 -4
- package/dist/gelu-uyHP1x1f.js +26 -0
- package/dist/gpgpu_math-DJm3ZTAf.js +2371 -0
- package/dist/index-BPPzKVdR.js +12099 -0
- package/dist/{index-BAzbokzv.js → index-C0dhsYom.js} +405 -389
- package/dist/{kernel_funcs_utils-CUxJCg0g.js → kernel_funcs_utils-CwRTFqrc.js} +31 -30
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +5 -5
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +5 -5
- package/dist/{log_sum_exp-YEo2h3gb.js → log_sum_exp-D086OgZJ.js} +15 -15
- package/dist/main.d.ts +2 -0
- package/dist/main.js +9 -5
- package/dist/{mat_mul-7121rsJk.js → mat_mul-1nwdPkQ_.js} +4 -4
- package/dist/{max-DtlIuVeW.js → max-BQc2Aj-I.js} +4 -4
- package/dist/{mulmat_packed_gpu-D4nKF7Je.js → mulmat_packed_gpu-Gzf3I9UV.js} +1 -1
- package/dist/non_max_suppression_impl-CsEgBuMA.js +134 -0
- package/dist/{ones-BBlSRqn1.js → ones-D63HpSF_.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +8 -8
- package/dist/ops/cpu/attentionMask.js +9 -9
- package/dist/ops/cpu/fusedSoftmax.js +17 -11
- package/dist/ops/cpu/gatherSub.js +7 -7
- package/dist/ops/cpu/gelu.js +13 -13
- package/dist/ops/cpu/matMulGelu.js +36 -24
- package/dist/ops/cpu/matMulMul.js +14 -8
- package/dist/ops/cpu/mulDropout.js +9 -3
- package/dist/ops/cpu/normRMS.js +5 -5
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +11 -11
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +3 -24
- package/dist/ops/grads/matMulGelu.js +5 -5
- package/dist/ops/grads/normRMS.js +6 -6
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +3 -3
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +5 -5
- package/dist/ops/webgl/matMulGelu.js +17 -17
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +4 -4
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/appendCache.js +56 -0
- package/dist/ops/webgpu/attentionMask.d.ts +1 -0
- package/dist/ops/webgpu/attentionMask.js +64 -0
- package/dist/ops/webgpu/gatherSub.d.ts +1 -0
- package/dist/ops/webgpu/gatherSub.js +37 -0
- package/dist/ops/webgpu/gelu.d.ts +14 -0
- package/dist/ops/webgpu/gelu.js +86 -0
- package/dist/ops/webgpu/index.d.ts +0 -0
- package/dist/ops/webgpu/index.js +8 -0
- package/dist/ops/webgpu/normRMS.d.ts +1 -0
- package/dist/ops/webgpu/normRMS.js +115 -0
- package/dist/ops/webgpu/qkv.d.ts +1 -0
- package/dist/ops/webgpu/qkv.js +56 -0
- package/dist/ops/webgpu/rope.d.ts +1 -0
- package/dist/ops/webgpu/rope.js +68 -0
- package/dist/ops/webgpu/scatterSub.d.ts +1 -0
- package/dist/ops/webgpu/scatterSub.js +37 -0
- package/dist/{ops-C0sQEcPw.js → ops-CIQLNshk.js} +452 -503
- package/dist/{random_width-DWzaOgrn.js → random_width-DkYP8W8N.js} +143 -144
- package/dist/{range-DYsrnfiy.js → range-CYzpQY53.js} +1 -1
- package/dist/{reciprocal-CJQeasVa.js → reciprocal-_A9yv27J.js} +1 -1
- package/dist/{register_all_kernels-BfFCQAqs.js → register_all_kernels-guvSxp7M.js} +202 -200
- package/dist/{reshape-krWGKraP.js → reshape-BMUzc1UY.js} +3 -3
- package/dist/{scatter_nd_util-93ln7Hut.js → scatter_nd_util-IRBqKz_b.js} +3 -3
- package/dist/{selu_util-sntGesxr.js → selu_util-Dt_iuXaq.js} +6 -6
- package/dist/shared-BNa2q6jD.js +69 -0
- package/dist/{shared-Ca6iDobD.js → shared-CDu9S76h.js} +541 -606
- package/dist/{sin-D_h-qCSx.js → sin-Cocju-BY.js} +6 -6
- package/dist/{softmax-fsdtf6JC.js → softmax-GPNK3o-U.js} +3 -3
- package/dist/{split-eiktj-6L.js → split-CHzJjxDv.js} +4 -4
- package/dist/{stack-dfEEz2OY.js → stack-Dpgg_1W1.js} +2 -2
- package/dist/{sum-BE_Irnim.js → sum-B8wEpKsg.js} +5 -5
- package/dist/{tensor-Xyi595sG.js → tensor-RvZVNmg0.js} +1 -1
- package/dist/{tensor2d-CPEkynbH.js → tensor2d-B_kyod7_.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/Evaluator.js +1 -1
- package/dist/training/FullTrainer.js +20 -20
- package/dist/training/Trainer.d.ts +5 -6
- package/dist/training/Trainer.js +59 -60
- package/dist/training/sparseCrossEntropy.js +19 -26
- package/dist/utilities/dummy.js +19 -19
- package/dist/utilities/generate.js +15 -16
- package/dist/utilities/multinomialCPU.d.ts +2 -0
- package/dist/utilities/multinomialCPU.js +13 -0
- package/dist/utilities/performance.d.ts +2 -0
- package/dist/utilities/performance.js +16 -0
- package/dist/utilities/profile.d.ts +1 -0
- package/dist/utilities/profile.js +9 -6
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-wSS22xj5.js → variable-DXEUOwew.js} +1 -1
- package/dist/webgpu_util-g13LvDIv.js +625 -0
- package/dist/{zeros-YJDE7oRb.js → zeros-DCPCdFGq.js} +8 -8
- package/package.json +2 -1
- package/dist/gpgpu_math-CNslybmD.js +0 -3115
- package/dist/norm-CzltS9Fz.js +0 -86
- package/dist/ops/node/sparseCrossEntropy.js +0 -11
- /package/dist/ops/{node/sparseCrossEntropy.d.ts → webgpu/appendCache.d.ts} +0 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { f as h, c as l, g as m } from "../../webgpu_util-g13LvDIv.js";
|
|
2
|
+
import { f as g } from "../../index-C0dhsYom.js";
|
|
3
|
+
class f {
|
|
4
|
+
variableNames = ["x", "sin", "cos"];
|
|
5
|
+
outputShape;
|
|
6
|
+
shaderKey = "Rope";
|
|
7
|
+
dispatchLayout;
|
|
8
|
+
dispatch;
|
|
9
|
+
workgroupSize = [64, 1, 1];
|
|
10
|
+
size = !0;
|
|
11
|
+
uniforms = "pastLen: i32";
|
|
12
|
+
constructor(t, o, s, n) {
|
|
13
|
+
this.outputShape = [t, o, s, n], this.dispatchLayout = h(this.outputShape), this.dispatch = l(this.dispatchLayout, this.outputShape, this.workgroupSize);
|
|
14
|
+
}
|
|
15
|
+
getUserCode() {
|
|
16
|
+
const t = this.outputShape[3];
|
|
17
|
+
return `
|
|
18
|
+
${m("index")} {
|
|
19
|
+
if (index < uniforms.size) {
|
|
20
|
+
let coords = getCoordsFromIndex(index); // [b, h, t, d]
|
|
21
|
+
let b = coords[0];
|
|
22
|
+
let h = coords[1];
|
|
23
|
+
let t = coords[2];
|
|
24
|
+
let d = coords[3];
|
|
25
|
+
|
|
26
|
+
let rotaryDim = ${t};
|
|
27
|
+
|
|
28
|
+
var outVal = 0.0;
|
|
29
|
+
|
|
30
|
+
if (d < rotaryDim) {
|
|
31
|
+
let pairIdx = d / 2;
|
|
32
|
+
let cos = getCos(t + uniforms.pastLen, pairIdx, 0);
|
|
33
|
+
let sin = getSin(t + uniforms.pastLen, pairIdx, 0);
|
|
34
|
+
|
|
35
|
+
let ownX = getX(b, h, t, d) * cos;
|
|
36
|
+
|
|
37
|
+
if (d % 2 == 0) {
|
|
38
|
+
// even index
|
|
39
|
+
let even = ownX;
|
|
40
|
+
let odd = getX(b, h, t, d + 1);
|
|
41
|
+
outVal = even - odd * sin;
|
|
42
|
+
} else {
|
|
43
|
+
// odd index
|
|
44
|
+
let even = getX(b, h, t, d - 1);
|
|
45
|
+
let odd = ownX;
|
|
46
|
+
outVal = even * sin + odd;
|
|
47
|
+
}
|
|
48
|
+
} else {
|
|
49
|
+
// pass through for non-rotary dims
|
|
50
|
+
outVal = getX(b, h, t, d);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
setOutputAtIndex(index, outVal);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
`;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
function b(e) {
|
|
60
|
+
const { x: t, sin: o, cos: s } = e.inputs, { pastLen: n } = e.attrs, a = e.backend, i = t.shape[0], r = t.shape[1], d = t.shape[2], p = t.shape[3], u = new f(i, r, d, p), c = [{ type: "int32", data: [n] }];
|
|
61
|
+
return a.runWebGPUProgram(u, [t, o, s], "float32", c);
|
|
62
|
+
}
|
|
63
|
+
const x = {
|
|
64
|
+
kernelName: "Rope",
|
|
65
|
+
backendName: "webgpu",
|
|
66
|
+
kernelFunc: b
|
|
67
|
+
};
|
|
68
|
+
g(x);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { f as c, c as u, g as d } from "../../webgpu_util-g13LvDIv.js";
|
|
2
|
+
import { f as p } from "../../index-C0dhsYom.js";
|
|
3
|
+
class h {
|
|
4
|
+
variableNames = ["labels", "softmaxProbs", "dy"];
|
|
5
|
+
outputShape;
|
|
6
|
+
shaderKey = "ScatterSub";
|
|
7
|
+
dispatchLayout;
|
|
8
|
+
dispatch;
|
|
9
|
+
workgroupSize = [64, 1, 1];
|
|
10
|
+
size = !0;
|
|
11
|
+
constructor(t, e) {
|
|
12
|
+
this.outputShape = [t, e], this.dispatchLayout = c(this.outputShape), this.dispatch = u(this.dispatchLayout, this.outputShape, this.workgroupSize);
|
|
13
|
+
}
|
|
14
|
+
getUserCode() {
|
|
15
|
+
return `
|
|
16
|
+
${d("index")} {
|
|
17
|
+
if (index < uniforms.size) {
|
|
18
|
+
let coords = getCoordsFromIndex(index); // [batch, depth]
|
|
19
|
+
let idx = i32(getLabels(coords[0]));
|
|
20
|
+
let prob = getSoftmaxProbsByOutputIndex(index);
|
|
21
|
+
let dy = getDy(coords[0]);
|
|
22
|
+
setOutputAtIndex(index, select(prob, prob - 1.0, idx == coords[1]) * dy);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
`;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function b(o) {
|
|
29
|
+
const { logits: t, labels: e, dy: s } = o.inputs, a = o.backend, r = e.shape[0], i = t.shape[1], n = new h(r, i);
|
|
30
|
+
return a.runWebGPUProgram(n, [e, t, s], "float32");
|
|
31
|
+
}
|
|
32
|
+
const l = {
|
|
33
|
+
kernelName: "EfficientScatterSub",
|
|
34
|
+
backendName: "webgpu",
|
|
35
|
+
kernelFunc: b
|
|
36
|
+
};
|
|
37
|
+
p(l);
|