@genai-fi/nanogpt 0.5.6 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +10 -9
- package/dist/NanoGPTModel.js +70 -121
- package/dist/RealDiv-7xu-pkZN.js +540 -0
- package/dist/Reshape-BYC1oUku.js +127 -0
- package/dist/TeachableLLM.d.ts +2 -0
- package/dist/TeachableLLM.js +42 -34
- package/dist/{TiedEmbedding-8S8xn8e6.js → TiedEmbedding-C1HBot-5.js} +12 -13
- package/dist/{axis_util-BczFISHz.js → axis_util-CCNL7jea.js} +14 -12
- package/dist/{broadcast_to-B7NGsBSh.js → broadcast_to-CddAF879.js} +2 -2
- package/dist/{concat-DdKPyAtw.js → concat-XOK9ANZu.js} +7 -7
- package/dist/{dataset-iqT4Otvb.js → dataset-BFFipD1c.js} +5 -5
- package/dist/{dropout-B09InSJS.js → dropout-xlKRoJyU.js} +9 -9
- package/dist/{gather-D6MsdXqc.js → gather-DKtUaTtA.js} +1 -1
- package/dist/gpgpu_math-B_ycgZ4W.js +3115 -0
- package/dist/{index-Du-bmOP8.js → index-CamYe_M8.js} +844 -647
- package/dist/{kernel_funcs_utils-DShm7-0k.js → kernel_funcs_utils-D5MS0JFg.js} +232 -136
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +13 -33
- package/dist/layers/TiedEmbedding.js +6 -7
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/{log_sum_exp-CxfBtUaG.js → log_sum_exp-CV_5-TTu.js} +15 -15
- package/dist/main.js +24 -20
- package/dist/{mat_mul-CbiqIe2d.js → mat_mul-CAbRFWUj.js} +4 -4
- package/dist/{max-0Xnlpv8k.js → max-JBBv7aUf.js} +3 -3
- package/dist/mulmat_packed_gpu-DW4doKL_.js +71 -0
- package/dist/{norm-01kY9I2B.js → norm-B9dQTFYn.js} +12 -12
- package/dist/{ones-CrutWGas.js → ones-CMHNqMr6.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +1 -1
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +18 -49
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +1 -1
- package/dist/ops/grads/attentionMask.js +15 -11
- package/dist/ops/grads/fusedSoftmax.js +12 -10
- package/dist/ops/grads/gelu.js +1 -1
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/log.d.ts +0 -0
- package/dist/ops/log.js +1 -0
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +8 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +31 -3379
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/{gpgpu_math-BFbOyvk4.js → ops/webgl/log.d.ts} +2 -8
- package/dist/ops/webgl/log.js +39 -0
- package/dist/ops/webgl/matMulGelu.js +48 -115
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{ops-CJNniCAV.js → ops-DqtYemmV.js} +143 -135
- package/dist/{random_width-C-v-35bY.js → random_width-CLMQG5Jn.js} +6925 -6291
- package/dist/{range-Bvs1hidm.js → range-DqYjKnuG.js} +1 -1
- package/dist/reciprocal-z49filta.js +25 -0
- package/dist/register_all_kernels-COt6wLD0.js +21397 -0
- package/dist/{reshape-BH7eBpwq.js → reshape-C45vIIRU.js} +1 -1
- package/dist/scatter_nd_util-qgtnviTE.js +46 -0
- package/dist/selu_util-4QV_GXTB.js +740 -0
- package/dist/shared-ByfrGA97.js +3199 -0
- package/dist/{sin-CPAZXNjH.js → sin-9JBrfVaB.js} +1 -1
- package/dist/{softmax-DhWoBa7r.js → softmax-DvMvui-_.js} +1 -1
- package/dist/{split-BCUhuU7B.js → split-DxrHrPFK.js} +4 -4
- package/dist/{stack-BV1v7l3S.js → stack-DgaoDmnF.js} +1 -1
- package/dist/{sum-Cvq06317.js → sum-BpcpxNEh.js} +3 -3
- package/dist/{tensor-DgTOPY6h.js → tensor-CDz5x1mP.js} +1 -1
- package/dist/{tensor2d-CRWjDyUe.js → tensor2d-jO8JY5Jd.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +3 -3
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/utilities/dummy.d.ts +6 -0
- package/dist/utilities/dummy.js +31 -10
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/load.d.ts +25 -0
- package/dist/utilities/load.js +89 -37
- package/dist/utilities/profile.d.ts +5 -0
- package/dist/utilities/profile.js +12 -9
- package/dist/utilities/safetensors.d.ts +3 -0
- package/dist/utilities/safetensors.js +83 -0
- package/dist/utilities/save.js +47 -29
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-DZ3fF0R2.js → variable-CLVXjN7F.js} +1 -1
- package/dist/{zeros-BaHhQTWf.js → zeros-DUkkVccu.js} +8 -8
- package/package.json +3 -9
- package/dist/Reshape-Biok_3X1.js +0 -212
- package/dist/slice_util-DskXqRZa.js +0 -49
- package/dist/tfjs_backend-D9Ytje0G.js +0 -1010
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { i as p, W as w } from "./index-CamYe_M8.js";
|
|
2
|
+
function k(o, t, r) {
|
|
3
|
+
const n = t.rank > 1 ? t.shape[t.rank - 1] : 1, e = t.rank > 1 ? t.rank - 1 : 1, h = `Must have updates.shape = indices.shape[:batchDim] + shape[sliceDim:], got updates.shape: ${r.shape}, indices.shape: ${t.shape}, shape: ${o}, sliceDim: ${n}, and batchDim: ${e}.`;
|
|
4
|
+
if (r.rank < e)
|
|
5
|
+
throw new Error(h + ` update.rank < ${e}. `);
|
|
6
|
+
if (o.length < n + (r.rank - e))
|
|
7
|
+
throw new Error(h + ` Output shape length < ${n + (r.rank - e)}`);
|
|
8
|
+
if (r.rank !== e + o.length - n)
|
|
9
|
+
throw new Error(h + ` update.rank != ${e + o.length - n}`);
|
|
10
|
+
for (let a = 0; a < e; ++a)
|
|
11
|
+
if (r.shape[a] !== t.shape[a])
|
|
12
|
+
throw new Error(h + ` updates.shape[${a}] (${r.shape[a]}) != indices.shape[${a}] (${t.shape[a]}).`);
|
|
13
|
+
for (let a = 0; a < r.rank - e; ++a)
|
|
14
|
+
if (r.shape[a + e] !== o[a + n])
|
|
15
|
+
throw new Error(h + ` updates.shape[${a + e}] (${r.shape[a + e]}) != shape[${a + e}] (${o[a + e]})`);
|
|
16
|
+
}
|
|
17
|
+
function $(o, t, r) {
|
|
18
|
+
if (t.rank < 1)
|
|
19
|
+
throw new Error(`tf.scatterND() expects the indices to be rank 1 or higher, but the rank was ${t.rank}.`);
|
|
20
|
+
if (o.rank < 1)
|
|
21
|
+
throw new Error(`tf.scatterND() expects the updates to be rank 1 or higher, but the rank was ${o.rank}.`);
|
|
22
|
+
if (t.dtype !== "int32")
|
|
23
|
+
throw new Error(`The dtype of 'indices' should be int32, but got dtype: ${t.dtype}`);
|
|
24
|
+
if (r.length < 1)
|
|
25
|
+
throw new Error(`Output rank must be greater or equal to 1, but got shape: ${r}`);
|
|
26
|
+
if (r.length === 0) {
|
|
27
|
+
if (t.size === 0)
|
|
28
|
+
throw new Error(`Indices specified for empty output. indices shape: ${t.shape}`);
|
|
29
|
+
if (o.size === 0)
|
|
30
|
+
throw new Error(`Updates specified for empty output. updates shape: ${o.shape}`);
|
|
31
|
+
}
|
|
32
|
+
k(r, t, o);
|
|
33
|
+
}
|
|
34
|
+
function m(o, t, r) {
|
|
35
|
+
const n = t.shape.length, e = n > 1 ? t.shape[n - 1] : 1, h = r.length;
|
|
36
|
+
let a = 1;
|
|
37
|
+
for (let s = e; s < h; ++s)
|
|
38
|
+
a *= r[s];
|
|
39
|
+
const i = e < 1 ? 1 : e, f = p(t.shape) / i, c = [...w(r.slice(0, e)), 1], l = p(r);
|
|
40
|
+
return { sliceRank: e, numUpdates: f, sliceSize: a, strides: c, outputSize: l };
|
|
41
|
+
}
|
|
42
|
+
export {
|
|
43
|
+
k as a,
|
|
44
|
+
m as c,
|
|
45
|
+
$ as v
|
|
46
|
+
};
|