@genai-fi/nanogpt 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/dist/Generator.d.ts +4 -1
  2. package/dist/Generator.js +144 -124
  3. package/dist/{RealDiv-D_q39E3A.js → RealDiv-D4EzDsC0.js} +7 -7
  4. package/dist/{Reshape-Bh_jzKzV.js → Reshape-Bowtk9BP.js} +2 -2
  5. package/dist/{Reshape-41YpQqEo.js → Reshape-DUqYftGC.js} +1 -1
  6. package/dist/TeachableLLM.js +5 -5
  7. package/dist/Trainer.d.ts +1 -0
  8. package/dist/Trainer.js +3 -0
  9. package/dist/{axis_util-Did9235A.js → axis_util-TbGYJ208.js} +1 -1
  10. package/dist/backend.js +2 -2
  11. package/dist/{backend_util-yC3YH1jo.js → backend_util-CJIiDoV1.js} +4 -4
  12. package/dist/{broadcast_to-CUvOdOT5.js → broadcast_to-DzlNweb8.js} +2 -2
  13. package/dist/checks/appendCache.js +2 -2
  14. package/dist/checks/attentionMask.js +3 -3
  15. package/dist/checks/gelu.js +2 -2
  16. package/dist/checks/matMulGelu.js +5 -5
  17. package/dist/checks/normRMS.js +4 -4
  18. package/dist/checks/normRMSGrad.js +3 -3
  19. package/dist/checks/qkv.js +2 -2
  20. package/dist/checks/rope.js +2 -2
  21. package/dist/{concat-pHiVqR3L.js → concat-B912vBbo.js} +1 -1
  22. package/dist/{dataset-DPPl-iLT.js → dataset-DlZtKmBq.js} +3 -3
  23. package/dist/{dropout-CcKSfOYE.js → dropout-C-csYCLj.js} +6 -6
  24. package/dist/{exports_initializers-DKk7-bsx.js → exports_initializers-B8iZMgQ0.js} +1 -1
  25. package/dist/{gather-CPg6ZlQA.js → gather-Dnpgw-YQ.js} +1 -1
  26. package/dist/{gelu-BkcmEEyD.js → gelu-Bp_-935b.js} +1 -1
  27. package/dist/{gpgpu_math-D_ODOLix.js → gpgpu_math-CDaYiyE_.js} +2 -2
  28. package/dist/{index-DdmHGZjq.js → index-BzFyqcy-.js} +13 -13
  29. package/dist/{index-evZ57wr4.js → index-C1rx_Ajs.js} +10 -10
  30. package/dist/{kernel_funcs_utils-CDfFpUab.js → kernel_funcs_utils-DKLK0Mg3.js} +3 -3
  31. package/dist/layers/BaseLayer.js +2 -2
  32. package/dist/layers/CausalSelfAttention.js +6 -6
  33. package/dist/layers/MLP.js +5 -5
  34. package/dist/layers/PositionEmbedding.js +5 -5
  35. package/dist/layers/RMSNorm.js +3 -3
  36. package/dist/layers/RoPECache.js +4 -4
  37. package/dist/layers/TiedEmbedding.js +5 -5
  38. package/dist/layers/TransformerBlock.js +1 -1
  39. package/dist/loader/loadTransformers.js +1 -1
  40. package/dist/loader/oldZipLoad.js +5 -5
  41. package/dist/{log_sum_exp-C8yFJfZz.js → log_sum_exp-DO6z8tSE.js} +9 -9
  42. package/dist/main.d.ts +1 -0
  43. package/dist/main.js +18 -16
  44. package/dist/{mat_mul-Dpy2mMRu.js → mat_mul-DzjTFx-u.js} +1 -1
  45. package/dist/{mod-CbibJi3D.js → mod-Dobti4j4.js} +1 -1
  46. package/dist/models/NanoGPTV1.d.ts +1 -0
  47. package/dist/models/NanoGPTV1.js +12 -9
  48. package/dist/models/model.d.ts +1 -0
  49. package/dist/models/model.js +5 -5
  50. package/dist/{mulmat_packed_gpu-q_Gmwyld.js → mulmat_packed_gpu-BT60jmzP.js} +1 -1
  51. package/dist/{ones-BAqVh-eA.js → ones-tIJeHlq-.js} +2 -2
  52. package/dist/ops/adamAdjust.js +1 -1
  53. package/dist/ops/adamMoments.js +1 -1
  54. package/dist/ops/appendCache.js +3 -3
  55. package/dist/ops/attentionMask.js +1 -1
  56. package/dist/ops/cpu/adamAdjust.js +1 -1
  57. package/dist/ops/cpu/adamMoments.js +2 -2
  58. package/dist/ops/cpu/appendCache.js +2 -2
  59. package/dist/ops/cpu/attentionMask.js +5 -5
  60. package/dist/ops/cpu/fusedSoftmax.js +2 -2
  61. package/dist/ops/cpu/gatherSub.js +5 -5
  62. package/dist/ops/cpu/gelu.js +1 -1
  63. package/dist/ops/cpu/matMulGelu.js +2 -2
  64. package/dist/ops/cpu/matMulMul.js +1 -1
  65. package/dist/ops/cpu/mulDropout.js +1 -1
  66. package/dist/ops/cpu/normRMS.js +1 -1
  67. package/dist/ops/cpu/qkv.js +3 -3
  68. package/dist/ops/cpu/rope.js +5 -5
  69. package/dist/ops/cpu/scatterSub.js +13 -13
  70. package/dist/ops/fusedSoftmax.js +1 -1
  71. package/dist/ops/gatherSub.js +1 -1
  72. package/dist/ops/gelu.js +2 -2
  73. package/dist/ops/grads/attentionMask.js +1 -1
  74. package/dist/ops/grads/fusedSoftmax.js +2 -2
  75. package/dist/ops/grads/gelu.js +2 -2
  76. package/dist/ops/grads/matMulGelu.js +1 -1
  77. package/dist/ops/grads/normRMS.js +1 -1
  78. package/dist/ops/grads/qkv.js +1 -1
  79. package/dist/ops/grads/rope.js +1 -1
  80. package/dist/ops/matMulGelu.js +1 -1
  81. package/dist/ops/matMulMul.js +1 -1
  82. package/dist/ops/mulDrop.js +1 -1
  83. package/dist/ops/normRMS.js +1 -1
  84. package/dist/ops/qkv.js +1 -1
  85. package/dist/ops/rope.js +4 -4
  86. package/dist/ops/scatterSub.js +1 -1
  87. package/dist/ops/webgl/adamAdjust.js +2 -2
  88. package/dist/ops/webgl/adamMoments.js +1 -1
  89. package/dist/ops/webgl/appendCache.js +1 -1
  90. package/dist/ops/webgl/attentionMask.js +1 -1
  91. package/dist/ops/webgl/fusedSoftmax.js +4 -4
  92. package/dist/ops/webgl/gatherSub.js +1 -1
  93. package/dist/ops/webgl/gelu.js +2 -2
  94. package/dist/ops/webgl/log.js +3 -3
  95. package/dist/ops/webgl/matMulGelu.js +4 -4
  96. package/dist/ops/webgl/matMulMul.js +1 -1
  97. package/dist/ops/webgl/mulDropout.js +1 -1
  98. package/dist/ops/webgl/normRMS.js +2 -2
  99. package/dist/ops/webgl/qkv.js +1 -1
  100. package/dist/ops/webgl/rope.js +1 -1
  101. package/dist/ops/webgl/scatterSub.js +1 -1
  102. package/dist/ops/webgpu/adamAdjust.js +3 -3
  103. package/dist/ops/webgpu/adamMoments.js +3 -3
  104. package/dist/ops/webgpu/appendCache.js +3 -3
  105. package/dist/ops/webgpu/attentionMask.js +3 -3
  106. package/dist/ops/webgpu/gatherSub.js +3 -3
  107. package/dist/ops/webgpu/gelu.js +37 -35
  108. package/dist/ops/webgpu/normRMS.js +2 -2
  109. package/dist/ops/webgpu/normRMSGrad.js +5 -5
  110. package/dist/ops/webgpu/qkv.js +3 -3
  111. package/dist/ops/webgpu/rope.js +3 -3
  112. package/dist/ops/webgpu/scatterSub.js +3 -3
  113. package/dist/ops/webgpu/utils/reductions.js +4 -4
  114. package/dist/{ops-542ai2vG.js → ops-LuCMAnmM.js} +65 -65
  115. package/dist/{random_width-DKGeiFuR.js → random_width-CXVRloNK.js} +23 -23
  116. package/dist/{range-BcUvLuf5.js → range-CWcz7xFA.js} +3 -3
  117. package/dist/{reciprocal-DhDWSKiD.js → reciprocal-C4rNcM-S.js} +1 -1
  118. package/dist/{register_all_kernels-Do9VvZmo.js → register_all_kernels-DIGpEwcf.js} +31 -31
  119. package/dist/{relu-B1AXs7p5.js → relu-BjCh_SYb.js} +1 -1
  120. package/dist/{reshape-WeJkT3ja.js → reshape-CnIwVG1c.js} +1 -1
  121. package/dist/{scatter_nd_util-B7yDhiQr.js → scatter_nd_util-BQdz--Gn.js} +1 -1
  122. package/dist/{selu_util-BgUO9gHY.js → selu_util-OtRzVwW5.js} +23 -23
  123. package/dist/{shared-V6D_md-c.js → shared-DmRsFyaJ.js} +6 -6
  124. package/dist/{shared-CZiWmQCI.js → shared-DuP7ue-R.js} +1 -1
  125. package/dist/{sin-CPxad7Am.js → sin-gpDNRxE0.js} +1 -1
  126. package/dist/{slice-B7jXtPnp.js → slice-d0Vo9XTN.js} +1 -1
  127. package/dist/{softmax-BfsyI4As.js → softmax-D7Jj3p_P.js} +1 -1
  128. package/dist/{split-BPxr8_8m.js → split-DK2k5eHf.js} +1 -1
  129. package/dist/{stack-BNwLzE43.js → stack-DFatutCx.js} +1 -1
  130. package/dist/{sum-ByFINZgi.js → sum-CJ0ULhmt.js} +1 -1
  131. package/dist/{tensor-DbqgIV9B.js → tensor-CZr4dh61.js} +1 -1
  132. package/dist/{tensor1d-CtJq5BOv.js → tensor1d-vML0r3q6.js} +1 -1
  133. package/dist/{tensor2d-CObBWBkW.js → tensor2d-D76QGjF3.js} +1 -1
  134. package/dist/{tensor4d-DLtk7Nxh.js → tensor4d-Df1WlVDY.js} +1 -1
  135. package/dist/training/Adam.js +2 -2
  136. package/dist/training/AdamExt.js +1 -1
  137. package/dist/training/DatasetBuilder.js +2 -2
  138. package/dist/training/FullTrainer.js +1 -1
  139. package/dist/training/Trainer.js +2 -2
  140. package/dist/training/sparseCrossEntropy.js +3 -3
  141. package/dist/utilities/dummy.js +2 -2
  142. package/dist/utilities/multinomialCPU.js +2 -2
  143. package/dist/utilities/performance.js +1 -1
  144. package/dist/utilities/profile.js +1 -1
  145. package/dist/utilities/safetensors.js +2 -2
  146. package/dist/utilities/topP.d.ts +1 -0
  147. package/dist/utilities/topP.js +13 -0
  148. package/dist/utilities/weights.js +2 -2
  149. package/dist/{variable-DPFOJyRG.js → variable-Bm2OFwGI.js} +1 -1
  150. package/dist/{webgpu_program-Dhk9R5aG.js → webgpu_program-DkQJOJSd.js} +1 -1
  151. package/dist/{webgpu_util-BqGnZg8t.js → webgpu_util-pLEV9tks.js} +1 -1
  152. package/dist/{zeros-Dnwix0p4.js → zeros-Bj5rMYA7.js} +1 -1
  153. package/package.json +1 -1
@@ -8,7 +8,8 @@ export interface GenerateOptions {
8
8
  usePadding?: boolean;
9
9
  attentionScores?: boolean;
10
10
  includeProbabilities?: boolean;
11
- embeddings?: boolean;
11
+ embeddings?: 'embedding' | 'logits' | 'softmax' | 'all';
12
+ targets?: number[];
12
13
  }
13
14
  export interface IGenerateOptions extends GenerateOptions {
14
15
  maxLength?: number;
@@ -31,6 +32,7 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
31
32
  private probabilitiesData;
32
33
  private embeddingsData;
33
34
  private tokens;
35
+ private lastLoss;
34
36
  constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser);
35
37
  private tokenisePrompt;
36
38
  private processResponse;
@@ -52,4 +54,5 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
52
54
  tensor: number[][];
53
55
  }[][];
54
56
  getTokens(): number[];
57
+ getLastLoss(): number | null;
55
58
  }
package/dist/Generator.js CHANGED
@@ -1,15 +1,15 @@
1
- import { E as z } from "./index-Dwqa6Zy2.js";
2
- import { C as A, D as L, E as C, a6 as I, t as O, k as R } from "./index-DdmHGZjq.js";
1
+ import { E as C } from "./index-Dwqa6Zy2.js";
2
+ import { E as _, F as I, G as O, a6 as R, t as q, k as K } from "./index-BzFyqcy-.js";
3
3
  import "./ops/cpu/attentionMask.js";
4
4
  import "./ops/webgl/attentionMask.js";
5
5
  import "./ops/grads/attentionMask.js";
6
6
  import "./ops/cpu/qkv.js";
7
7
  import "./ops/webgl/qkv.js";
8
8
  import "./ops/grads/qkv.js";
9
- import { p as _ } from "./random_width-DKGeiFuR.js";
10
- import { t as K } from "./register_all_kernels-Do9VvZmo.js";
9
+ import { p as j } from "./random_width-CXVRloNK.js";
10
+ import { t as G } from "./register_all_kernels-DIGpEwcf.js";
11
11
  import "./index-Tf7vU29b.js";
12
- import "./dataset-DPPl-iLT.js";
12
+ import "./dataset-DlZtKmBq.js";
13
13
  import "./ops/cpu/rope.js";
14
14
  import "./ops/webgl/rope.js";
15
15
  import "./ops/grads/rope.js";
@@ -24,29 +24,31 @@ import "./ops/grads/matMulGelu.js";
24
24
  import "./ops/cpu/normRMS.js";
25
25
  import "./ops/webgl/normRMS.js";
26
26
  import "./ops/grads/normRMS.js";
27
- import "./ops/cpu/gatherSub.js";
28
- import "./ops/webgl/gatherSub.js";
29
- import "./ops/cpu/scatterSub.js";
30
- import "./ops/webgl/scatterSub.js";
27
+ import { sparseSoftmaxCrossEntropy as V } from "./training/sparseCrossEntropy.js";
31
28
  import "./jszip.min-CjP2V1VV.js";
32
- import M from "./tokeniser/CharTokeniser.js";
29
+ import $ from "./tokeniser/CharTokeniser.js";
33
30
  import "./ops/cpu/adamAdjust.js";
34
31
  import "./ops/webgl/adamAdjust.js";
35
32
  import "./ops/cpu/adamMoments.js";
36
33
  import "./ops/webgl/adamMoments.js";
37
34
  import "./papaparse.min-C8l2Kvo1.js";
35
+ import M from "./utilities/topP.js";
36
+ import "./ops/cpu/scatterSub.js";
37
+ import "./ops/webgl/scatterSub.js";
38
+ import "./ops/cpu/gatherSub.js";
39
+ import "./ops/webgl/gatherSub.js";
38
40
  import "./ops/cpu/gelu.js";
39
41
  import "./ops/webgl/gelu.js";
40
- import "./gelu-BkcmEEyD.js";
42
+ import "./gelu-Bp_-935b.js";
41
43
  import "./ops/webgl/log.js";
42
44
  import "./checks/normRMS.js";
43
45
  import "./checks/normRMSGrad.js";
44
- import $ from "./utilities/multinomialCPU.js";
45
- import { r as x } from "./reshape-WeJkT3ja.js";
46
- import { t as P } from "./tensor2d-CObBWBkW.js";
47
- import { s as v } from "./softmax-BfsyI4As.js";
48
- import { g as q } from "./gather-CPg6ZlQA.js";
49
- import { c as G } from "./concat-pHiVqR3L.js";
46
+ import N from "./utilities/multinomialCPU.js";
47
+ import { r as E } from "./reshape-CnIwVG1c.js";
48
+ import { t as P } from "./tensor2d-D76QGjF3.js";
49
+ import { s as S } from "./softmax-D7Jj3p_P.js";
50
+ import { g as F } from "./gather-Dnpgw-YQ.js";
51
+ import { c as H } from "./concat-B912vBbo.js";
50
52
  /**
51
53
  * @license
52
54
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -63,18 +65,18 @@ import { c as G } from "./concat-pHiVqR3L.js";
63
65
  * limitations under the License.
64
66
  * =============================================================================
65
67
  */
66
- function N(h, t, e, i = !1) {
67
- const o = L(h, "logits", "multinomial"), s = o.size, n = o.rank;
68
- if (s < 2)
69
- throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
70
- if (n > 2)
71
- throw new Error(`Rank of probabilities must be 1 or 2, but is ${n}`);
72
- e = e || Math.random();
73
- const a = { logits: n === 1 ? x(o, [1, -1]) : o }, l = { numSamples: t, seed: e, normalized: i }, m = C.runKernel(I, a, l);
74
- return n === 1 ? x(m, [m.size]) : m;
68
+ function U(p, t, s, e = !1) {
69
+ const o = I(p, "logits", "multinomial"), i = o.size, c = o.rank;
70
+ if (i < 2)
71
+ throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${i}.`);
72
+ if (c > 2)
73
+ throw new Error(`Rank of probabilities must be 1 or 2, but is ${c}`);
74
+ s = s || Math.random();
75
+ const n = { logits: c === 1 ? E(o, [1, -1]) : o }, l = { numSamples: t, seed: s, normalized: e }, d = O.runKernel(R, n, l);
76
+ return c === 1 ? E(d, [d.size]) : d;
75
77
  }
76
- const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
77
- ...Array.from({ length: 95 }, (h, t) => String.fromCharCode(t + 32)),
78
+ const z = /* @__PURE__ */ _({ multinomial_: U }), W = [
79
+ ...Array.from({ length: 95 }, (p, t) => String.fromCharCode(t + 32)),
78
80
  // ASCII
79
81
  // Spanish accented letters and punctuation
80
82
  ..."áéíóúüñ¿¡",
@@ -85,12 +87,12 @@ const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
85
87
  // Cyrillic letters
86
88
  ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
87
89
  ];
88
- function U(h, t) {
89
- return h.length === t ? h : h.length > t ? h.slice(0, t) : h.concat(Array(t - h.length).fill(""));
90
+ function B(p, t) {
91
+ return p.length === t ? p : p.length > t ? p.slice(0, t) : p.concat(Array(t - p.length).fill(""));
90
92
  }
91
- class qt extends z {
92
- constructor(t, e) {
93
- super(), this.model = t, this.tokeniser = e, this.actualTokeniser = e;
93
+ class Wt extends C {
94
+ constructor(t, s) {
95
+ super(), this.model = t, this.tokeniser = s, this.actualTokeniser = s;
94
96
  }
95
97
  active = !1;
96
98
  cache = null;
@@ -102,130 +104,145 @@ class qt extends z {
102
104
  probabilitiesData = [];
103
105
  embeddingsData = [];
104
106
  tokens = [];
105
- async tokenisePrompt(t, e) {
106
- const i = e ? await t.tokenise([e], !0) : [[t.eosToken]];
107
- return P(i, [1, i[0].length], "int32");
107
+ lastLoss = null;
108
+ async tokenisePrompt(t, s) {
109
+ const e = s ? await t.tokenise([s], !0) : [[t.eosToken]];
110
+ return e[0].length > this.model.config.blockSize && (e[0] = e[0].slice(-this.model.config.blockSize)), P(e, [1, e[0].length], "int32");
108
111
  }
109
- async processResponse(t, e, i, o) {
110
- const s = (await e.array())[0][0];
111
- if (this.lastToken = s, s === this.tokeniser.eosToken)
112
+ async processResponse(t, s, e, o) {
113
+ const i = (await s.array())[0][0];
114
+ if (this.lastToken = i, i === this.tokeniser.eosToken)
112
115
  return null;
113
- const n = await t.decode([s]);
114
- if (i) {
115
- const d = await Promise.all(
116
- i.map((a) => a.array().then((l) => l))
116
+ const c = await t.decode([i]);
117
+ if (e) {
118
+ const T = await Promise.all(
119
+ e.map((n) => n.array().then((l) => l))
117
120
  );
118
- i.forEach((a) => a.dispose()), this.attentionData.push(d);
119
- }
120
- if (o) {
121
- const d = await o.array();
122
- o.dispose(), this.probabilitiesData.push(d);
121
+ e.forEach((n) => n.dispose()), this.attentionData.push(T);
123
122
  }
124
- return this.tokens.push(s), this.emit("tokens", [s], n), n;
123
+ return o && this.probabilitiesData.push(o), this.tokens.push(i), this.emit("tokens", [i], c), c;
125
124
  }
126
125
  /** Generate logits and select a token. */
127
- async _generateToken(t, e, i) {
128
- const o = i?.temperature ?? 1, s = i?.topK, n = i?.topP, d = i?.usePadding ?? !1, a = {
126
+ async _generateToken(t, s, e) {
127
+ const o = e?.temperature ?? 1, i = e?.topK, c = e?.topP, T = e?.usePadding ?? !1, n = {
129
128
  training: !1,
130
- attentionScores: i?.attentionScores ? {
129
+ attentionScores: e?.attentionScores ? {
131
130
  attentionOut: []
132
131
  } : void 0,
133
- cache: e,
134
- outputEmbeddings: i?.embeddings ?? !1
135
- }, l = O(() => {
136
- const p = t, u = p.shape[1], r = u <= this.model.config.blockSize ? p : p.slice(
137
- [0, u - this.model.config.blockSize],
138
- [p.shape[0], this.model.config.blockSize]
139
- ), f = d ? this.model.config.blockSize - r.shape[1] : 0, b = f > 0 ? _(r, [
132
+ cache: s,
133
+ outputEmbeddings: !!e?.embeddings
134
+ }, [l, d] = q(() => {
135
+ const a = t, m = a.shape[1], h = m <= this.model.config.blockSize ? a : a.slice(
136
+ [0, m - this.model.config.blockSize],
137
+ [a.shape[0], this.model.config.blockSize]
138
+ ), r = T ? this.model.config.blockSize - h.shape[1] : 0, v = r > 0 ? j(h, [
140
139
  [0, 0],
141
- [0, f]
142
- ]) : r, [g] = this.model.forward(a, b), y = g.shape[1] - 1 - f, c = g.slice([0, y, 0], [g.shape[0], 1, g.shape[2]]);
143
- return a.attentionScores?.attentionOut && a.attentionScores.attentionOut.forEach((T, E) => {
144
- T.shape[1] !== 1 && (a.attentionScores.attentionOut[E] = R(
145
- T.slice([0, y, 0], [T.shape[0], 1, T.shape[2]])
146
- ), T.dispose());
147
- }), g.dispose(), c.div(o).squeeze([1]);
140
+ [0, r]
141
+ ]) : h, [g] = this.model.forward(n, v), u = g.shape[1] - 1 - r, f = g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]);
142
+ let y;
143
+ if (e?.targets) {
144
+ const k = e.targets.shift();
145
+ if (k !== void 0) {
146
+ const w = P([[k]], [1, 1], "int32"), D = V(f, w);
147
+ y = D.mean(), w.dispose(), D.dispose();
148
+ }
149
+ }
150
+ return n.attentionScores?.attentionOut && n.attentionScores.attentionOut.forEach((k, w) => {
151
+ k.shape[1] !== 1 && (n.attentionScores.attentionOut[w] = K(
152
+ k.slice([0, u, 0], [k.shape[0], 1, k.shape[2]])
153
+ ), k.dispose());
154
+ }), g.dispose(), [f.div(o).squeeze([1]), y];
148
155
  });
149
- let m;
150
- if (n) {
151
- const p = v(l), u = await p.array();
152
- p.dispose();
153
- const r = u[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
154
- let f = 0;
155
- const b = new Array(r.length).fill(0);
156
- for (const c of r)
157
- if (f += c.prob, b[c.index] = c.prob, f >= n)
158
- break;
159
- const g = b.reduce((c, k) => c + k, 0), y = b.map((c) => c / g);
160
- m = $(y);
161
- } else if (s) {
162
- const { values: p, indices: u } = K(l, s), r = D(p, 1);
163
- m = q(u, r, 1), p.dispose(), u.dispose(), r.dispose();
164
- } else
165
- m = D(l, 1);
166
- let w;
167
- if (i?.includeProbabilities && (w = v(l)), a.embeddings) {
168
- const p = a.embeddings.map(async (r) => {
169
- const f = await r.tensor.array();
170
- return r.tensor.dispose(), { name: r.name, tensor: f };
171
- }), u = await Promise.all(p);
172
- this.embeddingsData.push(u);
156
+ let b, x;
157
+ if (c) {
158
+ const a = S(l), m = await a.array();
159
+ a.dispose();
160
+ const h = M(m, c);
161
+ e?.includeProbabilities && (x = m), b = N(h);
162
+ } else if (i) {
163
+ const { values: a, indices: m } = G(l, i), h = z(a, 1);
164
+ b = F(m, h, 1), a.dispose(), m.dispose(), h.dispose();
165
+ } else if (b = z(l, 1), e?.includeProbabilities) {
166
+ const a = S(l);
167
+ x = await a.array(), a.dispose();
168
+ }
169
+ if (n.embeddings) {
170
+ const m = (e?.embeddings === "all" ? n.embeddings : n.embeddings.filter((r) => r.name.startsWith("block_output_"))).map(async (r) => {
171
+ const v = r.tensor.shape[1], g = r.tensor.slice([0, v - 1, 0], [r.tensor.shape[0], 1, r.tensor.shape[2]]);
172
+ r.tensor.dispose();
173
+ const u = g.squeeze([1]);
174
+ if (g.dispose(), e?.embeddings === "softmax") {
175
+ const f = this.model.project(u);
176
+ u.dispose();
177
+ const y = S(f, -1);
178
+ return f.dispose(), { name: r.name, tensor: await y.array() };
179
+ } else if (e?.embeddings === "logits") {
180
+ const f = this.model.project(u);
181
+ return u.dispose(), { name: r.name, tensor: await f.array() };
182
+ } else {
183
+ const f = await u.array();
184
+ return u.dispose(), { name: r.name, tensor: f };
185
+ }
186
+ }), h = await Promise.all(m);
187
+ this.embeddingsData.push(h);
173
188
  }
174
- const S = m.reshape([1, 1]);
175
- return m.dispose(), m = S, l.dispose(), { output: m, probabilities: w, attention: a.attentionScores?.attentionOut };
189
+ const A = b.reshape([1, 1]);
190
+ b.dispose(), b = A, l.dispose();
191
+ let L;
192
+ return d && (L = await d.array(), d.dispose()), { output: b, probabilities: x, attention: n.attentionScores?.attentionOut, loss: L };
176
193
  }
177
194
  /** Generate multiple tokens in a loop and produce text */
178
195
  async _generate(t) {
179
- let e = this.lastToken >= 0 && this.cache ? P([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
180
- const i = t?.maxLength ?? 1e3;
181
- for (let o = 0; o < i && this.active; o++) {
196
+ let s = this.lastToken >= 0 && this.cache ? P([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
197
+ const e = t?.maxLength ?? 1e3;
198
+ for (let o = 0; o < e && this.active; o++) {
182
199
  const {
183
- output: s,
184
- probabilities: n,
185
- attention: d
186
- } = await this._generateToken(e, this.cache ? this.cache : void 0, {
200
+ output: i,
201
+ probabilities: c,
202
+ attention: T,
203
+ loss: n
204
+ } = await this._generateToken(s, this.cache ? this.cache : void 0, {
187
205
  ...t,
188
206
  usePadding: !this.cache
189
207
  });
190
- if (this.cache)
191
- e.dispose(), e = s;
208
+ if (n !== void 0 && (this.lastLoss = n), this.cache)
209
+ s.dispose(), s = i;
192
210
  else {
193
- const l = e;
194
- e = G([e, s], 1), l.dispose();
211
+ const d = s;
212
+ s = H([s, i], 1), d.dispose();
195
213
  }
196
- const a = await this.processResponse(this.actualTokeniser, s, d, n);
197
- if (this.cache || s.dispose(), a === null)
214
+ const l = await this.processResponse(this.actualTokeniser, i, T, c);
215
+ if (this.cache || i.dispose(), l === null)
198
216
  break;
199
- this.outputText += a;
217
+ this.outputText += l;
200
218
  }
201
- return e.dispose(), this.outputText;
219
+ return s.dispose(), this.outputText;
202
220
  }
203
221
  reset() {
204
222
  this.cache && (this.cache.forEach((t) => {
205
223
  t && (t.k && t.k.dispose(), t.v && t.v.dispose());
206
- }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [];
224
+ }), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [], this.lastLoss = null;
207
225
  }
208
226
  dispose() {
209
227
  this.reset();
210
228
  }
211
- initialise(t, e) {
212
- const i = t && t.length > this.model.config.blockSize ? t.slice(-this.model.config.blockSize) : t ?? null;
213
- if (this.cache && e?.noCache && this.reset(), this.initialPrompt = i || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !e?.noCache && this.model.config.useRope) {
214
- const s = new Array(this.model.config.nLayer);
215
- for (let n = 0; n < this.model.config.nLayer; n++)
216
- s[n] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
217
- this.cache = s, this.lastToken = -1;
229
+ initialise(t, s) {
230
+ if (this.cache && s?.noCache && this.reset(), this.initialPrompt = t || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !s?.noCache && this.model.config.useRope) {
231
+ const o = new Array(this.model.config.nLayer);
232
+ for (let i = 0; i < this.model.config.nLayer; i++)
233
+ o[i] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
234
+ this.cache = o, this.lastToken = -1;
218
235
  }
219
- const o = this.tokeniser.trained ? this.tokeniser : new M(U(H, this.tokeniser.vocabSize));
220
- this.actualTokeniser = o;
236
+ const e = this.tokeniser.trained ? this.tokeniser : new $(B(W, this.tokeniser.vocabSize));
237
+ this.actualTokeniser = e;
221
238
  }
222
- async step(t, e) {
223
- const i = { ...e, maxLength: 1 };
224
- return this.generate(t, i);
239
+ async step(t, s) {
240
+ const e = { ...s, maxLength: 1 };
241
+ return this.generate(t, e);
225
242
  }
226
- async generate(t, e) {
227
- this.initialise(t, e), this.active = !0, this.emit("start");
228
- const o = await this._generate(e);
243
+ async generate(t, s) {
244
+ this.initialise(t, s), this.active = !0, s?.maxLength !== 1 && this.emit("start");
245
+ const o = await this._generate(s);
229
246
  return this.active = !1, this.emit("stop"), o;
230
247
  }
231
248
  stop() {
@@ -246,7 +263,10 @@ class qt extends z {
246
263
  getTokens() {
247
264
  return this.tokens;
248
265
  }
266
+ getLastLoss() {
267
+ return this.lastLoss;
268
+ }
249
269
  }
250
270
  export {
251
- qt as default
271
+ Wt as default
252
272
  };
@@ -1,10 +1,10 @@
1
- import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-DdmHGZjq.js";
2
- import { r as $ } from "./Reshape-Bh_jzKzV.js";
3
- import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-Did9235A.js";
4
- import { t as U, m as W } from "./shared-CZiWmQCI.js";
5
- import { c as _ } from "./backend_util-yC3YH1jo.js";
6
- import { f as y } from "./gpgpu_math-D_ODOLix.js";
7
- import { g as G, b as L } from "./kernel_funcs_utils-CDfFpUab.js";
1
+ import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-BzFyqcy-.js";
2
+ import { r as $ } from "./Reshape-Bowtk9BP.js";
3
+ import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-TbGYJ208.js";
4
+ import { t as U, m as W } from "./shared-DuP7ue-R.js";
5
+ import { c as _ } from "./backend_util-CJIiDoV1.js";
6
+ import { f as y } from "./gpgpu_math-CDaYiyE_.js";
7
+ import { g as G, b as L } from "./kernel_funcs_utils-DKLK0Mg3.js";
8
8
  /**
9
9
  * @license
10
10
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,5 +1,5 @@
1
- import { j as c, a5 as C, n as f, V as R } from "./index-DdmHGZjq.js";
2
- import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-D_ODOLix.js";
1
+ import { j as c, a5 as C, n as f, V as R } from "./index-BzFyqcy-.js";
2
+ import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-CDaYiyE_.js";
3
3
  /**
4
4
  * @license
5
5
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,4 +1,4 @@
1
- import { j as h, a5 as d, n as c, V as m } from "./index-DdmHGZjq.js";
1
+ import { j as h, a5 as d, n as c, V as m } from "./index-BzFyqcy-.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2021 Google LLC. All Rights Reserved.
@@ -5,17 +5,17 @@ import u from "./Generator.js";
5
5
  import f from "./Trainer.js";
6
6
  import { E as p } from "./index-Dwqa6Zy2.js";
7
7
  import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
8
- import "./index-DdmHGZjq.js";
8
+ import "./index-BzFyqcy-.js";
9
9
  import "./ops/cpu/attentionMask.js";
10
10
  import "./ops/webgl/attentionMask.js";
11
11
  import "./ops/grads/attentionMask.js";
12
12
  import "./ops/cpu/qkv.js";
13
13
  import "./ops/webgl/qkv.js";
14
14
  import "./ops/grads/qkv.js";
15
- import "./random_width-DKGeiFuR.js";
16
- import "./register_all_kernels-Do9VvZmo.js";
15
+ import "./random_width-CXVRloNK.js";
16
+ import "./register_all_kernels-DIGpEwcf.js";
17
17
  import "./index-Tf7vU29b.js";
18
- import "./dataset-DPPl-iLT.js";
18
+ import "./dataset-DlZtKmBq.js";
19
19
  import "./ops/cpu/rope.js";
20
20
  import "./ops/webgl/rope.js";
21
21
  import "./ops/grads/rope.js";
@@ -40,7 +40,7 @@ import "./papaparse.min-C8l2Kvo1.js";
40
40
  import "./jszip.min-CjP2V1VV.js";
41
41
  import "./ops/cpu/gelu.js";
42
42
  import "./ops/webgl/gelu.js";
43
- import "./gelu-BkcmEEyD.js";
43
+ import "./gelu-Bp_-935b.js";
44
44
  import "./ops/webgl/log.js";
45
45
  import "./ops/cpu/adamMoments.js";
46
46
  import "./ops/webgl/adamMoments.js";
package/dist/Trainer.d.ts CHANGED
@@ -33,5 +33,6 @@ export default class Trainer extends EE<'start' | 'stop' | 'log'> {
33
33
  step(options?: ITrainerOptions): Promise<void>;
34
34
  getLog(): TrainingLogEntry[];
35
35
  getProgress(): ExtendedTrainingProgress | null;
36
+ isPrepared(): boolean;
36
37
  }
37
38
  export {};
package/dist/Trainer.js CHANGED
@@ -85,6 +85,9 @@ class m extends l {
85
85
  getProgress() {
86
86
  return this.progress;
87
87
  }
88
+ isPrepared() {
89
+ return this.trainDataset !== void 0 && this.validationDataset !== void 0;
90
+ }
88
91
  }
89
92
  export {
90
93
  m as default
@@ -1,4 +1,4 @@
1
- import { n as c } from "./index-DdmHGZjq.js";
1
+ import { n as c } from "./index-BzFyqcy-.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2017 Google LLC. All Rights Reserved.
package/dist/backend.js CHANGED
@@ -1,6 +1,6 @@
1
- import { g as a, s as i, r as o } from "./index-DdmHGZjq.js";
1
+ import { g as a, s as i, r as o } from "./index-BzFyqcy-.js";
2
2
  async function e(t) {
3
- a() !== t && (t === "webgpu" && (await import("./index-evZ57wr4.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
3
+ a() !== t && (t === "webgpu" && (await import("./index-C1rx_Ajs.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
4
4
  }
5
5
  export {
6
6
  e as selectBackend
@@ -1,7 +1,7 @@
1
- import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-DdmHGZjq.js";
2
- import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-Did9235A.js";
3
- import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-BgUO9gHY.js";
4
- import { c as re, v as oe, a as ae } from "./scatter_nd_util-B7yDhiQr.js";
1
+ import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-BzFyqcy-.js";
2
+ import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-TbGYJ208.js";
3
+ import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-OtRzVwW5.js";
4
+ import { c as re, v as oe, a as ae } from "./scatter_nd_util-BQdz--Gn.js";
5
5
  function ie(e, n) {
6
6
  const r = e.shape.length, t = n.shape.length;
7
7
  if (r < 1)
@@ -1,5 +1,5 @@
1
- import { C as h, D as f, M as p, H as g, E as u, X as b } from "./index-DdmHGZjq.js";
2
- import { r as T } from "./reshape-WeJkT3ja.js";
1
+ import { E as h, F as f, M as p, J as g, G as u, X as b } from "./index-BzFyqcy-.js";
2
+ import { r as T } from "./reshape-CnIwVG1c.js";
3
3
  /**
4
4
  * @license
5
5
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,5 +1,5 @@
1
- import { s, e as a } from "../index-DdmHGZjq.js";
2
- import { t } from "../tensor4d-DLtk7Nxh.js";
1
+ import { s, e as a } from "../index-BzFyqcy-.js";
2
+ import { t } from "../tensor4d-Df1WlVDY.js";
3
3
  async function u(e) {
4
4
  await s(e);
5
5
  const n = t(
@@ -1,6 +1,6 @@
1
- import { s as i, e } from "../index-DdmHGZjq.js";
2
- import { t } from "../tensor4d-DLtk7Nxh.js";
3
- import { t as a } from "../tensor2d-CObBWBkW.js";
1
+ import { s as i, e } from "../index-BzFyqcy-.js";
2
+ import { t } from "../tensor4d-Df1WlVDY.js";
3
+ import { t as a } from "../tensor2d-D76QGjF3.js";
4
4
  async function k(n) {
5
5
  await i(n);
6
6
  const s = t(
@@ -1,5 +1,5 @@
1
- import { s as e, e as o } from "../index-DdmHGZjq.js";
2
- import { t as s } from "../tensor2d-CObBWBkW.js";
1
+ import { s as e, e as o } from "../index-BzFyqcy-.js";
2
+ import { t as s } from "../tensor2d-D76QGjF3.js";
3
3
  async function m(t) {
4
4
  await e(t);
5
5
  const r = s(
@@ -1,9 +1,9 @@
1
- import { s as n, e as s } from "../index-DdmHGZjq.js";
2
- import "../random_width-DKGeiFuR.js";
3
- import "../register_all_kernels-Do9VvZmo.js";
1
+ import { s as n, e as s } from "../index-BzFyqcy-.js";
2
+ import "../random_width-CXVRloNK.js";
3
+ import "../register_all_kernels-DIGpEwcf.js";
4
4
  import "../index-Tf7vU29b.js";
5
- import "../dataset-DPPl-iLT.js";
6
- import { t as e } from "../tensor2d-CObBWBkW.js";
5
+ import "../dataset-DlZtKmBq.js";
6
+ import { t as e } from "../tensor2d-D76QGjF3.js";
7
7
  async function f(t) {
8
8
  await n(t);
9
9
  const r = e(
@@ -1,7 +1,7 @@
1
- import { s as u, y as A, e as y } from "../index-DdmHGZjq.js";
2
- import { a as h } from "../ops-542ai2vG.js";
3
- import { t as p } from "../tensor1d-CtJq5BOv.js";
4
- import { t as a } from "../tensor-DbqgIV9B.js";
1
+ import { s as u, y as A, e as y } from "../index-BzFyqcy-.js";
2
+ import { a as h } from "../ops-LuCMAnmM.js";
3
+ import { t as p } from "../tensor1d-vML0r3q6.js";
4
+ import { t as a } from "../tensor-CZr4dh61.js";
5
5
  const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
6
6
  async function k(t) {
7
7
  await u(t);
@@ -1,6 +1,6 @@
1
- import { s as c, e as d } from "../index-DdmHGZjq.js";
2
- import { t as f } from "../tensor1d-CtJq5BOv.js";
3
- import { t as r } from "../tensor-DbqgIV9B.js";
1
+ import { s as c, e as d } from "../index-BzFyqcy-.js";
2
+ import { t as f } from "../tensor1d-vML0r3q6.js";
3
+ import { t as r } from "../tensor-CZr4dh61.js";
4
4
  const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
5
5
  async function x(t) {
6
6
  await c(t);
@@ -1,5 +1,5 @@
1
- import { z as i, A as u, B as c, s as l, e as h } from "../index-DdmHGZjq.js";
2
- import { t as f } from "../tensor2d-CObBWBkW.js";
1
+ import { B as i, C as u, D as c, s as l, e as h } from "../index-BzFyqcy-.js";
2
+ import { t as f } from "../tensor2d-D76QGjF3.js";
3
3
  /**
4
4
  * @license
5
5
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -1,6 +1,6 @@
1
1
  import t from "../layers/RoPECache.js";
2
- import { s as c, e as i } from "../index-DdmHGZjq.js";
3
- import { t as p } from "../tensor4d-DLtk7Nxh.js";
2
+ import { s as c, e as i } from "../index-BzFyqcy-.js";
3
+ import { t as p } from "../tensor4d-Df1WlVDY.js";
4
4
  async function y(a) {
5
5
  await c(a);
6
6
  const o = p(
@@ -1,4 +1,4 @@
1
- import { C as s, n as a, F as p, H as i, E as l, I as f } from "./index-DdmHGZjq.js";
1
+ import { E as s, n as a, I as p, J as i, G as l, K as f } from "./index-BzFyqcy-.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -1,7 +1,7 @@
1
- import { ak as S, T as h, ag as k, d as v, al as o, am as p, an as g, n as N, t as y } from "./index-DdmHGZjq.js";
1
+ import { ak as S, T as h, ag as k, d as v, al as o, am as p, an as g, n as N, t as y } from "./index-BzFyqcy-.js";
2
2
  import { s as R } from "./index-C4L8Cm77.js";
3
- import { s as $ } from "./stack-BNwLzE43.js";
4
- import { t as B } from "./tensor-DbqgIV9B.js";
3
+ import { s as $ } from "./stack-DFatutCx.js";
4
+ import { t as B } from "./tensor-CZr4dh61.js";
5
5
  /**
6
6
  * @license
7
7
  * Copyright 2018 Google LLC. All Rights Reserved.