@genai-fi/nanogpt 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/Generator.js +3 -3
  2. package/dist/NanoGPTModel.js +8 -8
  3. package/dist/Reshape-CiAY8ltP.js +212 -0
  4. package/dist/TeachableLLM.js +14 -5
  5. package/dist/{TiedEmbedding-CnJ1bx4q.js → TiedEmbedding-DznFwzcB.js} +244 -244
  6. package/dist/{axis_util-BgTGy5w8.js → axis_util-QP0LdI1v.js} +1 -1
  7. package/dist/{concat-CuRsVY-K.js → concat-DvWM7HGZ.js} +1 -1
  8. package/dist/data/parquet.js +9 -6
  9. package/dist/data/textLoader.js +6 -5
  10. package/dist/{dropout-DfDdklfL.js → dropout-DFEXTPV0.js} +4 -4
  11. package/dist/{gather-ZYRWhmXR.js → gather-C5D8PxwA.js} +1 -1
  12. package/dist/gpgpu_math-CUzjlO9A.js +23 -0
  13. package/dist/{index-C4JCoBvj.js → index--6vO-cOz.js} +87 -87
  14. package/dist/{kernel_funcs_utils-CAd1h9X1.js → kernel_funcs_utils-C6YBCuOt.js} +72 -91
  15. package/dist/layers/CausalSelfAttention.js +47 -46
  16. package/dist/layers/MLP.js +31 -33
  17. package/dist/layers/RMSNorm.d.ts +1 -2
  18. package/dist/layers/RMSNorm.js +10 -10
  19. package/dist/layers/RoPECache.js +3 -3
  20. package/dist/layers/TiedEmbedding.js +5 -5
  21. package/dist/layers/TransformerBlock.js +2 -2
  22. package/dist/{log_sum_exp-BswFnwOb.js → log_sum_exp-CiEy1aUe.js} +7 -7
  23. package/dist/main.js +28 -19
  24. package/dist/{mat_mul-415y5Qn2.js → mat_mul-BEHRPMh0.js} +1 -1
  25. package/dist/{max-CP_9O2Yd.js → max-BUShNgfh.js} +1 -1
  26. package/dist/{moments-CjeIaVdp.js → moments-DYOHXoRV.js} +5 -5
  27. package/dist/{norm-CZM380I3.js → norm-DSva3hI3.js} +13 -13
  28. package/dist/{ones-Bf3YR48P.js → ones-D6kB8bdY.js} +2 -2
  29. package/dist/ops/appendCache.js +3 -3
  30. package/dist/ops/attentionMask.js +1 -1
  31. package/dist/ops/cpu/appendCache.js +2 -2
  32. package/dist/ops/cpu/attentionMask.js +2 -2
  33. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  34. package/dist/ops/cpu/gatherSub.js +4 -4
  35. package/dist/ops/cpu/gelu.js +1 -1
  36. package/dist/ops/cpu/matMulGelu.d.ts +1 -0
  37. package/dist/ops/cpu/matMulGelu.js +40 -0
  38. package/dist/ops/cpu/mulDropout.js +1 -1
  39. package/dist/ops/cpu/normRMS.d.ts +1 -0
  40. package/dist/ops/cpu/normRMS.js +39 -0
  41. package/dist/ops/cpu/qkv.js +3 -3
  42. package/dist/ops/cpu/rope.js +5 -5
  43. package/dist/ops/cpu/scatterSub.js +4 -4
  44. package/dist/ops/fusedSoftmax.js +1 -1
  45. package/dist/ops/gatherSub.js +1 -1
  46. package/dist/ops/gelu.js +2 -2
  47. package/dist/ops/grads/attentionMask.js +1 -1
  48. package/dist/ops/grads/fusedSoftmax.js +2 -2
  49. package/dist/ops/grads/gelu.js +24 -3
  50. package/dist/ops/grads/matMulGelu.d.ts +1 -0
  51. package/dist/ops/grads/matMulGelu.js +17 -0
  52. package/dist/ops/grads/normRMS.d.ts +2 -0
  53. package/dist/ops/grads/normRMS.js +20 -0
  54. package/dist/ops/grads/qkv.js +1 -1
  55. package/dist/ops/grads/rope.js +1 -1
  56. package/dist/ops/matMulGelu.d.ts +3 -0
  57. package/dist/ops/matMulGelu.js +14 -0
  58. package/dist/ops/mulDrop.js +1 -1
  59. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  60. package/dist/ops/normRMS.d.ts +2 -0
  61. package/dist/ops/normRMS.js +10 -0
  62. package/dist/ops/qkv.js +1 -1
  63. package/dist/ops/scatterSub.js +1 -1
  64. package/dist/ops/webgl/appendCache.js +1 -1
  65. package/dist/ops/webgl/attentionMask.js +1 -1
  66. package/dist/ops/webgl/fusedSoftmax.js +689 -895
  67. package/dist/ops/webgl/gatherSub.js +1 -1
  68. package/dist/ops/webgl/gelu.js +2 -2
  69. package/dist/ops/webgl/matMulGelu.d.ts +21 -0
  70. package/dist/ops/webgl/matMulGelu.js +168 -0
  71. package/dist/ops/webgl/mulDropout.js +1 -1
  72. package/dist/ops/webgl/normRMS.d.ts +1 -0
  73. package/dist/ops/webgl/normRMS.js +78 -0
  74. package/dist/ops/webgl/qkv.js +1 -1
  75. package/dist/ops/webgl/rope.js +1 -1
  76. package/dist/ops/webgl/scatterSub.js +1 -1
  77. package/dist/{range-9AzeApCc.js → range-C_vpUjBu.js} +1 -1
  78. package/dist/{reshape-Boe4DuIO.js → reshape-z51Eu-re.js} +1 -1
  79. package/dist/{sin-KmhiDuMa.js → sin-H567uayl.js} +1 -1
  80. package/dist/{slice_util-19zDNNSn.js → slice_util-BdhYwFY_.js} +2 -2
  81. package/dist/{softmax-Cujsg4ay.js → softmax-Dsxflvdl.js} +1 -1
  82. package/dist/{split-DbcNm1-i.js → split-B_k_jwud.js} +1 -1
  83. package/dist/{stack-D1YjmgKN.js → stack-CmqSdsfs.js} +1 -1
  84. package/dist/{sum-R28pucR5.js → sum-DdkDf2MG.js} +1 -1
  85. package/dist/{tensor-BVeHdl7V.js → tensor-BGYi41cj.js} +1 -1
  86. package/dist/{tensor2d-DqFGNs_K.js → tensor2d-DUr_htjt.js} +1 -1
  87. package/dist/{tfjs_backend-Cug-PH75.js → tfjs_backend-DuKis_xG.js} +46 -46
  88. package/dist/training/AdamExt.js +1 -1
  89. package/dist/training/DatasetBuilder.js +18 -18
  90. package/dist/training/FullTrainer.js +1 -1
  91. package/dist/training/Trainer.js +5 -5
  92. package/dist/training/sparseCrossEntropy.js +4 -4
  93. package/dist/utilities/dummy.js +2 -2
  94. package/dist/utilities/generate.js +3 -3
  95. package/dist/utilities/load.js +1 -1
  96. package/dist/utilities/profile.js +1 -1
  97. package/dist/utilities/weights.js +2 -2
  98. package/dist/{variable-LJT9Ld63.js → variable-BJTZ3jOy.js} +1 -1
  99. package/dist/{zeros-dnQxFgAD.js → zeros-8xl-W2DC.js} +1 -1
  100. package/package.json +1 -1
  101. package/dist/gelu-CnCt17Lk.js +0 -26
@@ -1,7 +1,7 @@
1
- import { ae as $, ac as m, af as M, a as R, ag as f, ah as v, ai as z, j as _, t as x } from "../index-C4JCoBvj.js";
1
+ import { aj as $, ah as d, L as M, a as R, ak as f, al as v, am as z, j as _, t as x } from "../index--6vO-cOz.js";
2
2
  import { s as E } from "../index-C4L8Cm77.js";
3
- import { s as P } from "../stack-D1YjmgKN.js";
4
- import { t as D } from "../tensor-BVeHdl7V.js";
3
+ import { s as P } from "../stack-CmqSdsfs.js";
4
+ import { t as D } from "../tensor-BGYi41cj.js";
5
5
  import "../index-Tf7vU29b.js";
6
6
  /**
7
7
  * @license
@@ -82,10 +82,10 @@ function p(s) {
82
82
  const { StringDecoder: e } = require("string_decoder");
83
83
  t = s instanceof e;
84
84
  }
85
- return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof m) && !(s instanceof Promise) && !t);
85
+ return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof d) && !(s instanceof Promise) && !t);
86
86
  }
87
87
  function H(s) {
88
- return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof m || $(s);
88
+ return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof d || $(s);
89
89
  }
90
90
  function q(s) {
91
91
  return s === null || typeof s != "object" && typeof s != "function";
@@ -111,7 +111,7 @@ function Q(s) {
111
111
  return L(s, G);
112
112
  }
113
113
  function G(s) {
114
- return s instanceof m ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
114
+ return s instanceof d ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
115
115
  }
116
116
  /**
117
117
  * @license
@@ -477,7 +477,7 @@ class i {
477
477
  * of the original element type.
478
478
  */
479
479
  rowMajorBatch(t, e = !0) {
480
- return new j(this, t, e);
480
+ return new K(this, t, e);
481
481
  }
482
482
  /**
483
483
  * Groups elements into batches, represented in column-major form.
@@ -535,7 +535,7 @@ class i {
535
535
  * unaltered.
536
536
  */
537
537
  take(t) {
538
- return t < 0 || t == null ? this : new K(this, t);
538
+ return t < 0 || t == null ? this : new j(this, t);
539
539
  }
540
540
  /**
541
541
  * Skips the first `count` items in this stream.
@@ -641,7 +641,7 @@ class X extends i {
641
641
  return this.upstream.next();
642
642
  }
643
643
  }
644
- class K extends i {
644
+ class j extends i {
645
645
  constructor(t, e) {
646
646
  super(), this.upstream = t, this.maxCount = e, this.count = 0;
647
647
  }
@@ -652,7 +652,7 @@ class K extends i {
652
652
  return this.count++ >= this.maxCount ? { value: null, done: !0 } : this.upstream.next();
653
653
  }
654
654
  }
655
- class j extends i {
655
+ class K extends i {
656
656
  constructor(t, e, r = !0) {
657
657
  super(), this.upstream = t, this.batchSize = e, this.enableSmallLastBatch = r, this.lastRead = Promise.resolve({ value: null, done: !1 });
658
658
  }
@@ -1219,7 +1219,7 @@ function at(s) {
1219
1219
  function it(s) {
1220
1220
  if (s.length === 0)
1221
1221
  throw new Error("Can't make a batch of zero elements.");
1222
- return s[0] instanceof m ? P(s) : D(s);
1222
+ return s[0] instanceof d ? P(s) : D(s);
1223
1223
  }
1224
1224
  /**
1225
1225
  * @license
@@ -1244,7 +1244,7 @@ function ut(s) {
1244
1244
  return k(() => t.next());
1245
1245
  });
1246
1246
  }
1247
- class dt {
1247
+ class mt {
1248
1248
  tokenizer;
1249
1249
  blockSize;
1250
1250
  constructor(t, e = 128) {
@@ -1257,20 +1257,20 @@ class dt {
1257
1257
  n === 1 ? void 0 : Math.floor(n * h.length)
1258
1258
  ), w = (function* () {
1259
1259
  for (; ; ) {
1260
- const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), d = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
1261
- yield { xs: d, ys: B };
1260
+ const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), m = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
1261
+ yield { xs: m, ys: B };
1262
1262
  }
1263
1263
  }).bind(this);
1264
1264
  return ut(w).batch(e).map((u) => {
1265
- const d = u;
1265
+ const m = u;
1266
1266
  return x(() => ({
1267
- xs: d.xs.cast("int32"),
1268
- ys: d.ys.cast("int32")
1267
+ xs: m.xs.cast("int32"),
1268
+ ys: m.ys.cast("int32")
1269
1269
  // this.tf.oneHot(batchData.ys.cast('int32'), this.tokenizer.vocabSize),
1270
1270
  }));
1271
1271
  }).prefetch(2);
1272
1272
  }
1273
1273
  }
1274
1274
  export {
1275
- dt as DatasetBuilder
1275
+ mt as DatasetBuilder
1276
1276
  };
@@ -1,7 +1,7 @@
1
1
  import { generateText as v } from "../utilities/generate.js";
2
2
  import L from "./Trainer.js";
3
3
  import x from "./Evaluator.js";
4
- import { a as h } from "../index-C4JCoBvj.js";
4
+ import { a as h } from "../index--6vO-cOz.js";
5
5
  const D = {
6
6
  desiredLoss: 0.01,
7
7
  logInterval: 1,
@@ -1,10 +1,10 @@
1
1
  import { DatasetBuilder as d } from "./DatasetBuilder.js";
2
2
  import h from "./AdamExt.js";
3
- import { t as g, v as u, a as o } from "../index-C4JCoBvj.js";
4
- import { m as y, n as f } from "../norm-CZM380I3.js";
5
- import { m as S, a as z } from "../moments-CjeIaVdp.js";
6
- import { m as b } from "../max-CP_9O2Yd.js";
7
- import { z as n } from "../zeros-dnQxFgAD.js";
3
+ import { t as g, v as u, a as o } from "../index--6vO-cOz.js";
4
+ import { m as y, n as f } from "../norm-DSva3hI3.js";
5
+ import { m as S, a as z } from "../moments-DYOHXoRV.js";
6
+ import { m as b } from "../max-BUShNgfh.js";
7
+ import { z as n } from "../zeros-8xl-W2DC.js";
8
8
  class G {
9
9
  constructor(t, s, e = 1e-3) {
10
10
  this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new d(s, t.config.gpt.blockSize);
@@ -1,9 +1,9 @@
1
1
  import { gatherSub as L } from "../ops/gatherSub.js";
2
2
  import { scatterSub as y } from "../ops/scatterSub.js";
3
- import { e as u, c as i, z as S, t as f, s as G } from "../index-C4JCoBvj.js";
4
- import { s as v } from "../softmax-Cujsg4ay.js";
5
- import { m as z } from "../max-CP_9O2Yd.js";
6
- import { l as k } from "../log_sum_exp-BswFnwOb.js";
3
+ import { e as u, c as i, z as S, t as f, s as G } from "../index--6vO-cOz.js";
4
+ import { s as v } from "../softmax-Dsxflvdl.js";
5
+ import { m as z } from "../max-BUShNgfh.js";
6
+ import { l as k } from "../log_sum_exp-CiEy1aUe.js";
7
7
  function F(a, s) {
8
8
  return f(() => {
9
9
  const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
@@ -1,5 +1,5 @@
1
- import "../index-C4JCoBvj.js";
2
- import { z as n } from "../zeros-dnQxFgAD.js";
1
+ import "../index--6vO-cOz.js";
2
+ import { z as n } from "../zeros-8xl-W2DC.js";
3
3
  async function a(s) {
4
4
  const o = n([1, s.config.gpt.blockSize], "int32"), { logits: t, loss: i } = s.forward(o, void 0, !1);
5
5
  await t.data(), t.dispose(), i && i.dispose(), o.dispose();
@@ -1,6 +1,6 @@
1
- import { t as y } from "../index-C4JCoBvj.js";
2
- import { t as x } from "../tensor2d-DqFGNs_K.js";
3
- import { c as f } from "../concat-CuRsVY-K.js";
1
+ import { t as y } from "../index--6vO-cOz.js";
2
+ import { t as x } from "../tensor2d-DUr_htjt.js";
3
+ import { c as f } from "../concat-DvWM7HGZ.js";
4
4
  async function A(o, r, a, c, T) {
5
5
  if (c <= 0)
6
6
  throw new Error("Length must be a positive integer");
@@ -3,7 +3,7 @@ import { importWeights as b } from "./weights.js";
3
3
  import u from "../tokeniser/CharTokeniser.js";
4
4
  import F from "../NanoGPTModel.js";
5
5
  import { dummyPassAsync as j } from "./dummy.js";
6
- import { d as T } from "../index-C4JCoBvj.js";
6
+ import { d as T } from "../index--6vO-cOz.js";
7
7
  import E from "../tokeniser/bpe.js";
8
8
  async function A(t) {
9
9
  const o = await fetch(t);
@@ -1,4 +1,4 @@
1
- import { m as s } from "../index-C4JCoBvj.js";
1
+ import { m as s } from "../index--6vO-cOz.js";
2
2
  const m = 1024 * 1024;
3
3
  class i {
4
4
  log = /* @__PURE__ */ new Map();
@@ -1,5 +1,5 @@
1
- import "../index-C4JCoBvj.js";
2
- import { t as p } from "../tensor-BVeHdl7V.js";
1
+ import "../index--6vO-cOz.js";
2
+ import { t as p } from "../tensor-BGYi41cj.js";
3
3
  function h(n) {
4
4
  const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
5
5
  let t = 0;
@@ -1,4 +1,4 @@
1
- import { E as i } from "./index-C4JCoBvj.js";
1
+ import { E as i } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { o as m, h as r, U as l, E as c, V as i, k as p, W as u, n as f } from "./index-C4JCoBvj.js";
1
+ import { o as m, h as r, X as l, E as c, Y as i, k as p, Z as u, n as f } from "./index--6vO-cOz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.4.3",
3
+ "version": "0.4.5",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",
@@ -1,26 +0,0 @@
1
- import { g as t, e as n } from "./index-C4JCoBvj.js";
2
- import "./ops/cpu/gelu.js";
3
- import "./ops/webgl/gelu.js";
4
- const a = {
5
- kernelName: "Gelu",
6
- inputsToSave: ["x"],
7
- outputsToSave: [],
8
- gradFunc: (e, r) => {
9
- const [u] = r;
10
- return {
11
- x: () => o(e, u)
12
- };
13
- }
14
- };
15
- t(a);
16
- function g(e) {
17
- return n().runKernel("Gelu", { x: e });
18
- }
19
- function o(e, r) {
20
- return n().runKernel("GeluGrad", { dy: e, x: r });
21
- }
22
- export {
23
- a,
24
- o as d,
25
- g
26
- };