@genai-fi/nanogpt 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/Generator.js +3 -3
  2. package/dist/NanoGPTModel.js +7 -7
  3. package/dist/Reshape-CiAY8ltP.js +212 -0
  4. package/dist/TeachableLLM.js +7 -1
  5. package/dist/{TiedEmbedding-CnJ1bx4q.js → TiedEmbedding-DznFwzcB.js} +244 -244
  6. package/dist/{axis_util-BgTGy5w8.js → axis_util-QP0LdI1v.js} +1 -1
  7. package/dist/{concat-CuRsVY-K.js → concat-DvWM7HGZ.js} +1 -1
  8. package/dist/data/parquet.js +9 -6
  9. package/dist/data/textLoader.js +6 -5
  10. package/dist/{dropout-DfDdklfL.js → dropout-DFEXTPV0.js} +4 -4
  11. package/dist/{gather-ZYRWhmXR.js → gather-C5D8PxwA.js} +1 -1
  12. package/dist/gpgpu_math-CUzjlO9A.js +23 -0
  13. package/dist/{index-C4JCoBvj.js → index--6vO-cOz.js} +87 -87
  14. package/dist/{kernel_funcs_utils-CAd1h9X1.js → kernel_funcs_utils-C6YBCuOt.js} +72 -91
  15. package/dist/layers/CausalSelfAttention.js +8 -8
  16. package/dist/layers/MLP.js +31 -33
  17. package/dist/layers/RMSNorm.js +3 -3
  18. package/dist/layers/RoPECache.js +3 -3
  19. package/dist/layers/TiedEmbedding.js +5 -5
  20. package/dist/layers/TransformerBlock.js +1 -1
  21. package/dist/{log_sum_exp-BswFnwOb.js → log_sum_exp-CiEy1aUe.js} +7 -7
  22. package/dist/main.js +25 -19
  23. package/dist/{mat_mul-415y5Qn2.js → mat_mul-BEHRPMh0.js} +1 -1
  24. package/dist/{max-CP_9O2Yd.js → max-BUShNgfh.js} +1 -1
  25. package/dist/{moments-CjeIaVdp.js → moments-DYOHXoRV.js} +5 -5
  26. package/dist/{norm-CZM380I3.js → norm-DSva3hI3.js} +13 -13
  27. package/dist/{ones-Bf3YR48P.js → ones-D6kB8bdY.js} +2 -2
  28. package/dist/ops/appendCache.js +3 -3
  29. package/dist/ops/attentionMask.js +1 -1
  30. package/dist/ops/cpu/appendCache.js +2 -2
  31. package/dist/ops/cpu/attentionMask.js +2 -2
  32. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  33. package/dist/ops/cpu/gatherSub.js +4 -4
  34. package/dist/ops/cpu/gelu.js +1 -1
  35. package/dist/ops/cpu/matMulGelu.d.ts +1 -0
  36. package/dist/ops/cpu/matMulGelu.js +40 -0
  37. package/dist/ops/cpu/mulDropout.js +1 -1
  38. package/dist/ops/cpu/qkv.js +3 -3
  39. package/dist/ops/cpu/rope.js +5 -5
  40. package/dist/ops/cpu/scatterSub.js +4 -4
  41. package/dist/ops/fusedSoftmax.js +1 -1
  42. package/dist/ops/gatherSub.js +1 -1
  43. package/dist/ops/gelu.js +2 -2
  44. package/dist/ops/grads/attentionMask.js +1 -1
  45. package/dist/ops/grads/fusedSoftmax.js +2 -2
  46. package/dist/ops/grads/gelu.js +24 -3
  47. package/dist/ops/grads/matMulGelu.d.ts +1 -0
  48. package/dist/ops/grads/matMulGelu.js +17 -0
  49. package/dist/ops/grads/qkv.js +1 -1
  50. package/dist/ops/grads/rope.js +1 -1
  51. package/dist/ops/matMulGelu.d.ts +3 -0
  52. package/dist/ops/matMulGelu.js +14 -0
  53. package/dist/ops/mulDrop.js +1 -1
  54. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  55. package/dist/ops/qkv.js +1 -1
  56. package/dist/ops/scatterSub.js +1 -1
  57. package/dist/ops/webgl/appendCache.js +1 -1
  58. package/dist/ops/webgl/attentionMask.js +1 -1
  59. package/dist/ops/webgl/fusedSoftmax.js +689 -895
  60. package/dist/ops/webgl/gatherSub.js +1 -1
  61. package/dist/ops/webgl/gelu.js +2 -2
  62. package/dist/ops/webgl/matMulGelu.d.ts +20 -0
  63. package/dist/ops/webgl/matMulGelu.js +166 -0
  64. package/dist/ops/webgl/mulDropout.js +1 -1
  65. package/dist/ops/webgl/qkv.js +1 -1
  66. package/dist/ops/webgl/rope.js +1 -1
  67. package/dist/ops/webgl/scatterSub.js +1 -1
  68. package/dist/{range-9AzeApCc.js → range-C_vpUjBu.js} +1 -1
  69. package/dist/{reshape-Boe4DuIO.js → reshape-z51Eu-re.js} +1 -1
  70. package/dist/{sin-KmhiDuMa.js → sin-H567uayl.js} +1 -1
  71. package/dist/{slice_util-19zDNNSn.js → slice_util-BdhYwFY_.js} +2 -2
  72. package/dist/{softmax-Cujsg4ay.js → softmax-Dsxflvdl.js} +1 -1
  73. package/dist/{split-DbcNm1-i.js → split-B_k_jwud.js} +1 -1
  74. package/dist/{stack-D1YjmgKN.js → stack-CmqSdsfs.js} +1 -1
  75. package/dist/{sum-R28pucR5.js → sum-DdkDf2MG.js} +1 -1
  76. package/dist/{tensor-BVeHdl7V.js → tensor-BGYi41cj.js} +1 -1
  77. package/dist/{tensor2d-DqFGNs_K.js → tensor2d-DUr_htjt.js} +1 -1
  78. package/dist/{tfjs_backend-Cug-PH75.js → tfjs_backend-DuKis_xG.js} +46 -46
  79. package/dist/training/AdamExt.js +1 -1
  80. package/dist/training/DatasetBuilder.js +18 -18
  81. package/dist/training/FullTrainer.js +1 -1
  82. package/dist/training/Trainer.js +5 -5
  83. package/dist/training/sparseCrossEntropy.js +4 -4
  84. package/dist/utilities/dummy.js +2 -2
  85. package/dist/utilities/generate.js +3 -3
  86. package/dist/utilities/load.js +1 -1
  87. package/dist/utilities/profile.js +1 -1
  88. package/dist/utilities/weights.js +2 -2
  89. package/dist/{variable-LJT9Ld63.js → variable-BJTZ3jOy.js} +1 -1
  90. package/dist/{zeros-dnQxFgAD.js → zeros-8xl-W2DC.js} +1 -1
  91. package/package.json +1 -1
  92. package/dist/gelu-CnCt17Lk.js +0 -26
@@ -1,7 +1,7 @@
1
1
  import { generateText as v } from "../utilities/generate.js";
2
2
  import L from "./Trainer.js";
3
3
  import x from "./Evaluator.js";
4
- import { a as h } from "../index-C4JCoBvj.js";
4
+ import { a as h } from "../index--6vO-cOz.js";
5
5
  const D = {
6
6
  desiredLoss: 0.01,
7
7
  logInterval: 1,
@@ -1,10 +1,10 @@
1
1
  import { DatasetBuilder as d } from "./DatasetBuilder.js";
2
2
  import h from "./AdamExt.js";
3
- import { t as g, v as u, a as o } from "../index-C4JCoBvj.js";
4
- import { m as y, n as f } from "../norm-CZM380I3.js";
5
- import { m as S, a as z } from "../moments-CjeIaVdp.js";
6
- import { m as b } from "../max-CP_9O2Yd.js";
7
- import { z as n } from "../zeros-dnQxFgAD.js";
3
+ import { t as g, v as u, a as o } from "../index--6vO-cOz.js";
4
+ import { m as y, n as f } from "../norm-DSva3hI3.js";
5
+ import { m as S, a as z } from "../moments-DYOHXoRV.js";
6
+ import { m as b } from "../max-BUShNgfh.js";
7
+ import { z as n } from "../zeros-8xl-W2DC.js";
8
8
  class G {
9
9
  constructor(t, s, e = 1e-3) {
10
10
  this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new d(s, t.config.gpt.blockSize);
@@ -1,9 +1,9 @@
1
1
  import { gatherSub as L } from "../ops/gatherSub.js";
2
2
  import { scatterSub as y } from "../ops/scatterSub.js";
3
- import { e as u, c as i, z as S, t as f, s as G } from "../index-C4JCoBvj.js";
4
- import { s as v } from "../softmax-Cujsg4ay.js";
5
- import { m as z } from "../max-CP_9O2Yd.js";
6
- import { l as k } from "../log_sum_exp-BswFnwOb.js";
3
+ import { e as u, c as i, z as S, t as f, s as G } from "../index--6vO-cOz.js";
4
+ import { s as v } from "../softmax-Dsxflvdl.js";
5
+ import { m as z } from "../max-BUShNgfh.js";
6
+ import { l as k } from "../log_sum_exp-CiEy1aUe.js";
7
7
  function F(a, s) {
8
8
  return f(() => {
9
9
  const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
@@ -1,5 +1,5 @@
1
- import "../index-C4JCoBvj.js";
2
- import { z as n } from "../zeros-dnQxFgAD.js";
1
+ import "../index--6vO-cOz.js";
2
+ import { z as n } from "../zeros-8xl-W2DC.js";
3
3
  async function a(s) {
4
4
  const o = n([1, s.config.gpt.blockSize], "int32"), { logits: t, loss: i } = s.forward(o, void 0, !1);
5
5
  await t.data(), t.dispose(), i && i.dispose(), o.dispose();
@@ -1,6 +1,6 @@
1
- import { t as y } from "../index-C4JCoBvj.js";
2
- import { t as x } from "../tensor2d-DqFGNs_K.js";
3
- import { c as f } from "../concat-CuRsVY-K.js";
1
+ import { t as y } from "../index--6vO-cOz.js";
2
+ import { t as x } from "../tensor2d-DUr_htjt.js";
3
+ import { c as f } from "../concat-DvWM7HGZ.js";
4
4
  async function A(o, r, a, c, T) {
5
5
  if (c <= 0)
6
6
  throw new Error("Length must be a positive integer");
@@ -3,7 +3,7 @@ import { importWeights as b } from "./weights.js";
3
3
  import u from "../tokeniser/CharTokeniser.js";
4
4
  import F from "../NanoGPTModel.js";
5
5
  import { dummyPassAsync as j } from "./dummy.js";
6
- import { d as T } from "../index-C4JCoBvj.js";
6
+ import { d as T } from "../index--6vO-cOz.js";
7
7
  import E from "../tokeniser/bpe.js";
8
8
  async function A(t) {
9
9
  const o = await fetch(t);
@@ -1,4 +1,4 @@
1
- import { m as s } from "../index-C4JCoBvj.js";
1
+ import { m as s } from "../index--6vO-cOz.js";
2
2
  const m = 1024 * 1024;
3
3
  class i {
4
4
  log = /* @__PURE__ */ new Map();
@@ -1,5 +1,5 @@
1
- import "../index-C4JCoBvj.js";
2
- import { t as p } from "../tensor-BVeHdl7V.js";
1
+ import "../index--6vO-cOz.js";
2
+ import { t as p } from "../tensor-BGYi41cj.js";
3
3
  function h(n) {
4
4
  const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
5
5
  let t = 0;
@@ -1,4 +1,4 @@
1
- import { E as i } from "./index-C4JCoBvj.js";
1
+ import { E as i } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o as m, h as r, U as l, E as c, V as i, k as p, W as u, n as f } from "./index-C4JCoBvj.js";
1
+ import { o as m, h as r, X as l, E as c, Y as i, k as p, Z as u, n as f } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.4.3",
3
+ "version": "0.4.4",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",
@@ -1,26 +0,0 @@
1
- import { g as t, e as n } from "./index-C4JCoBvj.js";
2
- import "./ops/cpu/gelu.js";
3
- import "./ops/webgl/gelu.js";
4
- const a = {
5
- kernelName: "Gelu",
6
- inputsToSave: ["x"],
7
- outputsToSave: [],
8
- gradFunc: (e, r) => {
9
- const [u] = r;
10
- return {
11
- x: () => o(e, u)
12
- };
13
- }
14
- };
15
- t(a);
16
- function g(e) {
17
- return n().runKernel("Gelu", { x: e });
18
- }
19
- function o(e, r) {
20
- return n().runKernel("GeluGrad", { dy: e, x: r });
21
- }
22
- export {
23
- a,
24
- o as d,
25
- g
26
- };