@genai-fi/nanogpt 0.3.2 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +22 -22
- package/dist/MLP-KHhikThU.js +83 -0
- package/dist/NanoGPTModel.d.ts +2 -3
- package/dist/NanoGPTModel.js +79 -79
- package/dist/TeachableLLM.js +16 -13
- package/dist/axis_util-DeydwOoC.js +69 -0
- package/dist/{concat-BIZS_td9.js → concat-DS_qH7MI.js} +5 -5
- package/dist/config.js +7 -8
- package/dist/{gather-BPGW8RsB.js → gather-BUmJIS8n.js} +1 -1
- package/dist/{index-pWA4_lUh.js → index-XjBAhiFO.js} +1272 -1174
- package/dist/layers/BaseLayer.d.ts +14 -2
- package/dist/layers/BaseLayer.js +9 -9
- package/dist/layers/CausalSelfAttention.d.ts +4 -8
- package/dist/layers/CausalSelfAttention.js +106 -80
- package/dist/layers/MLP.d.ts +2 -3
- package/dist/layers/MLP.js +5 -62
- package/dist/layers/RMSNorm.d.ts +2 -2
- package/dist/layers/RMSNorm.js +11 -11
- package/dist/layers/RoPECache.js +3 -3
- package/dist/layers/TiedEmbedding.js +7 -6
- package/dist/layers/TransformerBlock.d.ts +2 -6
- package/dist/layers/TransformerBlock.js +9 -12
- package/dist/{sum-C7Mgy9Bw.js → log_sum_exp-DJPkVZZn.js} +32 -54
- package/dist/main.js +22 -19
- package/dist/{mat_mul-D7_a4KJn.js → mat_mul-CKwFEV1Q.js} +1 -1
- package/dist/max-DJvEiCAJ.js +25 -0
- package/dist/moments-CrWRPcR3.js +53 -0
- package/dist/norm-BzY929B_.js +86 -0
- package/dist/{ones-Cog-G2ag.js → ones-BO01zpJG.js} +2 -2
- package/dist/ops/appendCache.js +1 -1
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +2 -2
- package/dist/ops/cpu/fusedSoftmax.d.ts +9 -0
- package/dist/ops/cpu/fusedSoftmax.js +23 -0
- package/dist/ops/cpu/gatherSub.js +3 -3
- package/dist/ops/cpu/mulDropout.d.ts +1 -0
- package/dist/ops/cpu/mulDropout.js +17 -0
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +27 -27
- package/dist/ops/fusedSoftmax.d.ts +2 -0
- package/dist/ops/fusedSoftmax.js +10 -0
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.d.ts +2 -0
- package/dist/ops/grads/fusedSoftmax.js +17 -0
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/mulDrop.d.ts +2 -0
- package/dist/ops/mulDrop.js +9 -0
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.d.ts +11 -0
- package/dist/ops/webgl/fusedSoftmax.js +3930 -0
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/mulDropout.d.ts +1 -0
- package/dist/ops/webgl/mulDropout.js +41 -0
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{random_width-oeUIlUZj.js → random_width-CMHmdbSu.js} +4212 -6630
- package/dist/{range-CcDl05lo.js → range-DQMNzBWs.js} +1 -1
- package/dist/{reshape-C8CR_Bad.js → reshape-DFzh97Sc.js} +1 -1
- package/dist/{sin-BJIrfnj7.js → sin-BYM-U4Ut.js} +1 -1
- package/dist/slice_util-CnVNPQI-.js +90 -0
- package/dist/softmax-4DOn6cPq.js +28 -0
- package/dist/{split-DZbvruEP.js → split-CkbeVdF8.js} +3 -3
- package/dist/{stack-BMm-efee.js → stack-DaIMO5iX.js} +1 -1
- package/dist/sum-C6u3xMi3.js +27 -0
- package/dist/{tensor-DJVbYhh1.js → tensor-Cu1fU7H7.js} +1 -1
- package/dist/{tensor2d-ZuQSh2D-.js → tensor2d-D0CKdG6B.js} +1 -1
- package/dist/tfjs_backend-Bzl2SrRo.js +2460 -0
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +3 -3
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +13 -12
- package/dist/training/sparseCrossEntropy.js +12 -11
- package/dist/utilities/dummy.js +8 -8
- package/dist/utilities/generate.js +11 -11
- package/dist/utilities/load.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-Dl_ub3pk.js → variable-BS4AKqNU.js} +1 -1
- package/dist/{zeros-CCy9C3uU.js → zeros-CmJFiC84.js} +1 -1
- package/package.json +1 -1
- package/dist/exports_layers-tbTBcwMM.js +0 -25
- package/dist/layers/LayerNorm.d.ts +0 -13
- package/dist/layers/LayerNorm.js +0 -33
- package/dist/moments-DfcpfwKi.js +0 -132
- package/dist/softmax-Be_lsqUc.js +0 -105
- package/dist/training/LayerTrainer.d.ts +0 -29
- package/dist/training/LayerTrainer.js +0 -95
- package/dist/training/lwSchedule.d.ts +0 -7
- package/dist/training/lwSchedule.js +0 -162
package/dist/config.js
CHANGED
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
const e = {
|
|
2
|
-
vocabSize:
|
|
3
|
-
|
|
4
|
-
blockSize: 1024,
|
|
2
|
+
vocabSize: 2e3,
|
|
3
|
+
blockSize: 128,
|
|
5
4
|
// Maximum sequence length
|
|
6
|
-
nLayer:
|
|
5
|
+
nLayer: 6,
|
|
7
6
|
// Number of transformer layers
|
|
8
|
-
nHead:
|
|
7
|
+
nHead: 4,
|
|
9
8
|
// Number of attention heads
|
|
10
|
-
nEmbed:
|
|
9
|
+
nEmbed: 256,
|
|
11
10
|
// Embedding dimension
|
|
12
|
-
dropout: 0,
|
|
11
|
+
dropout: 0.1,
|
|
13
12
|
// Dropout probability
|
|
14
13
|
biasInLinear: !1,
|
|
15
14
|
biasInLayerNorm: !1,
|
|
16
15
|
mlpFactor: 4,
|
|
17
|
-
useRope: !
|
|
16
|
+
useRope: !0
|
|
18
17
|
// Use Rotary Position Embeddings
|
|
19
18
|
};
|
|
20
19
|
export {
|