@genai-fi/nanogpt 0.3.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/Generator.js +22 -22
  2. package/dist/MLP-KHhikThU.js +83 -0
  3. package/dist/NanoGPTModel.d.ts +2 -3
  4. package/dist/NanoGPTModel.js +79 -79
  5. package/dist/TeachableLLM.js +16 -13
  6. package/dist/axis_util-DeydwOoC.js +69 -0
  7. package/dist/{concat-BIZS_td9.js → concat-DS_qH7MI.js} +5 -5
  8. package/dist/config.js +7 -8
  9. package/dist/{gather-BPGW8RsB.js → gather-BUmJIS8n.js} +1 -1
  10. package/dist/{index-pWA4_lUh.js → index-XjBAhiFO.js} +1272 -1174
  11. package/dist/layers/BaseLayer.d.ts +14 -2
  12. package/dist/layers/BaseLayer.js +9 -9
  13. package/dist/layers/CausalSelfAttention.d.ts +4 -8
  14. package/dist/layers/CausalSelfAttention.js +106 -80
  15. package/dist/layers/MLP.d.ts +2 -3
  16. package/dist/layers/MLP.js +5 -62
  17. package/dist/layers/RMSNorm.d.ts +2 -2
  18. package/dist/layers/RMSNorm.js +11 -11
  19. package/dist/layers/RoPECache.js +3 -3
  20. package/dist/layers/TiedEmbedding.js +7 -6
  21. package/dist/layers/TransformerBlock.d.ts +2 -6
  22. package/dist/layers/TransformerBlock.js +9 -12
  23. package/dist/{sum-C7Mgy9Bw.js → log_sum_exp-DJPkVZZn.js} +32 -54
  24. package/dist/main.js +22 -19
  25. package/dist/{mat_mul-D7_a4KJn.js → mat_mul-CKwFEV1Q.js} +1 -1
  26. package/dist/max-DJvEiCAJ.js +25 -0
  27. package/dist/moments-CrWRPcR3.js +53 -0
  28. package/dist/norm-BzY929B_.js +86 -0
  29. package/dist/{ones-Cog-G2ag.js → ones-BO01zpJG.js} +2 -2
  30. package/dist/ops/appendCache.js +1 -1
  31. package/dist/ops/attentionMask.js +1 -1
  32. package/dist/ops/cpu/appendCache.js +2 -2
  33. package/dist/ops/cpu/attentionMask.js +2 -2
  34. package/dist/ops/cpu/fusedSoftmax.d.ts +9 -0
  35. package/dist/ops/cpu/fusedSoftmax.js +23 -0
  36. package/dist/ops/cpu/gatherSub.js +3 -3
  37. package/dist/ops/cpu/mulDropout.d.ts +1 -0
  38. package/dist/ops/cpu/mulDropout.js +17 -0
  39. package/dist/ops/cpu/qkv.js +3 -3
  40. package/dist/ops/cpu/rope.js +5 -5
  41. package/dist/ops/cpu/scatterSub.js +27 -27
  42. package/dist/ops/fusedSoftmax.d.ts +2 -0
  43. package/dist/ops/fusedSoftmax.js +10 -0
  44. package/dist/ops/gatherSub.js +1 -1
  45. package/dist/ops/grads/attentionMask.js +1 -1
  46. package/dist/ops/grads/fusedSoftmax.d.ts +2 -0
  47. package/dist/ops/grads/fusedSoftmax.js +17 -0
  48. package/dist/ops/grads/qkv.js +1 -1
  49. package/dist/ops/grads/rope.js +1 -1
  50. package/dist/ops/mulDrop.d.ts +2 -0
  51. package/dist/ops/mulDrop.js +9 -0
  52. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  53. package/dist/ops/qkv.js +1 -1
  54. package/dist/ops/scatterSub.js +1 -1
  55. package/dist/ops/webgl/appendCache.js +1 -1
  56. package/dist/ops/webgl/attentionMask.js +1 -1
  57. package/dist/ops/webgl/fusedSoftmax.d.ts +11 -0
  58. package/dist/ops/webgl/fusedSoftmax.js +3930 -0
  59. package/dist/ops/webgl/gatherSub.js +1 -1
  60. package/dist/ops/webgl/mulDropout.d.ts +1 -0
  61. package/dist/ops/webgl/mulDropout.js +41 -0
  62. package/dist/ops/webgl/qkv.js +1 -1
  63. package/dist/ops/webgl/rope.js +1 -1
  64. package/dist/ops/webgl/scatterSub.js +1 -1
  65. package/dist/{random_width-oeUIlUZj.js → random_width-CMHmdbSu.js} +4212 -6630
  66. package/dist/{range-CcDl05lo.js → range-DQMNzBWs.js} +1 -1
  67. package/dist/{reshape-C8CR_Bad.js → reshape-DFzh97Sc.js} +1 -1
  68. package/dist/{sin-BJIrfnj7.js → sin-BYM-U4Ut.js} +1 -1
  69. package/dist/slice_util-CnVNPQI-.js +90 -0
  70. package/dist/softmax-4DOn6cPq.js +28 -0
  71. package/dist/{split-DZbvruEP.js → split-CkbeVdF8.js} +3 -3
  72. package/dist/{stack-BMm-efee.js → stack-DaIMO5iX.js} +1 -1
  73. package/dist/sum-C6u3xMi3.js +27 -0
  74. package/dist/{tensor-DJVbYhh1.js → tensor-Cu1fU7H7.js} +1 -1
  75. package/dist/{tensor2d-ZuQSh2D-.js → tensor2d-D0CKdG6B.js} +1 -1
  76. package/dist/tfjs_backend-Bzl2SrRo.js +2460 -0
  77. package/dist/training/AdamExt.js +1 -1
  78. package/dist/training/DatasetBuilder.js +3 -3
  79. package/dist/training/FullTrainer.js +1 -1
  80. package/dist/training/Trainer.js +13 -12
  81. package/dist/training/sparseCrossEntropy.js +12 -11
  82. package/dist/utilities/dummy.js +8 -8
  83. package/dist/utilities/generate.js +11 -11
  84. package/dist/utilities/load.js +1 -1
  85. package/dist/utilities/profile.js +1 -1
  86. package/dist/utilities/weights.js +2 -2
  87. package/dist/{variable-Dl_ub3pk.js → variable-BS4AKqNU.js} +1 -1
  88. package/dist/{zeros-CCy9C3uU.js → zeros-CmJFiC84.js} +1 -1
  89. package/package.json +1 -1
  90. package/dist/exports_layers-tbTBcwMM.js +0 -25
  91. package/dist/layers/LayerNorm.d.ts +0 -13
  92. package/dist/layers/LayerNorm.js +0 -33
  93. package/dist/moments-DfcpfwKi.js +0 -132
  94. package/dist/softmax-Be_lsqUc.js +0 -105
  95. package/dist/training/LayerTrainer.d.ts +0 -29
  96. package/dist/training/LayerTrainer.js +0 -95
  97. package/dist/training/lwSchedule.d.ts +0 -7
  98. package/dist/training/lwSchedule.js +0 -162
@@ -1,4 +1,4 @@
1
- import { r as l } from "../../index-pWA4_lUh.js";
1
+ import { r as l } from "../../index-XjBAhiFO.js";
2
2
  class u {
3
3
  variableNames = ["labels", "logits", "values"];
4
4
  outputShape;
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,41 @@
1
+ import { r as m } from "../../index-XjBAhiFO.js";
2
+ class f {
3
+ variableNames = ["a", "b"];
4
+ outputShape;
5
+ userCode;
6
+ customUniforms = [
7
+ { name: "dropoutRate", type: "float" },
8
+ { name: "seed", type: "float" }
9
+ ];
10
+ constructor(r, t, o) {
11
+ this.outputShape = [r, t, o, o], this.userCode = `
12
+ float random(ivec4 coords) {
13
+ float x = float(coords.x * 4096 + coords.y * 256 + coords.z * 16 + coords.w);
14
+ return fract(sin(seed + x) * 43758.5453123);
15
+ }
16
+ void main() {
17
+ ivec4 coords = getOutputCoords();
18
+ float a = getA(coords.x, coords.y, coords.z, coords.w);
19
+ float b = getB(coords.x, coords.y, coords.z, coords.w);
20
+
21
+ float keepProb = 1.0 - dropoutRate;
22
+ float rand = random(coords);
23
+ float mask = step(rand, keepProb);
24
+ setOutput(a * b * mask / keepProb);
25
+ }
26
+ `;
27
+ }
28
+ }
29
+ function b(e) {
30
+ const { inputs: r, attrs: t } = e, { a: o, b: s } = r, { dropoutRate: a, seed: c } = t, n = e.backend, d = o.shape[0], u = o.shape[2], p = o.shape[1], l = new f(d, p, u);
31
+ return n.runWebGLProgram(l, [o, s], "float32", [
32
+ [a ?? 0],
33
+ [c ?? Math.random() * 1e4]
34
+ ]);
35
+ }
36
+ const i = {
37
+ kernelName: "MulDropout",
38
+ backendName: "webgl",
39
+ kernelFunc: b
40
+ };
41
+ m(i);
@@ -1,4 +1,4 @@
1
- import { r as i } from "../../index-pWA4_lUh.js";
1
+ import { r as i } from "../../index-XjBAhiFO.js";
2
2
  class l {
3
3
  variableNames = ["x", "kernel"];
4
4
  outputShape;
@@ -1,4 +1,4 @@
1
- import { r as u } from "../../index-pWA4_lUh.js";
1
+ import { r as u } from "../../index-XjBAhiFO.js";
2
2
  class l {
3
3
  variableNames = ["x", "sin", "cos"];
4
4
  outputShape;
@@ -1,4 +1,4 @@
1
- import { r as i } from "../../index-pWA4_lUh.js";
1
+ import { r as i } from "../../index-XjBAhiFO.js";
2
2
  class u {
3
3
  variableNames = ["labels", "softmaxProbs", "dy"];
4
4
  outputShape;