@genai-fi/nanogpt 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,4 +46,5 @@ export default class NanoGPT {
46
46
  probabilities?: TF.Tensor;
47
47
  };
48
48
  getNumParams(): number;
49
+ dispose(): void;
49
50
  }
@@ -54,7 +54,7 @@ class $ {
54
54
  }
55
55
  inputPhase(t, e = !1) {
56
56
  return this.tf.tidy(() => {
57
- const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), a = this.wpe.apply(n), o = i.add(a);
57
+ const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), h = this.wpe.apply(n), o = i.add(h);
58
58
  return this.drop.apply(o, { training: e });
59
59
  });
60
60
  }
@@ -98,8 +98,8 @@ class $ {
98
98
  throw new Error("No attentions for rollout");
99
99
  const e = t[0].shape[0], s = t[0].shape[1], i = this.tf.eye(s, s).expandDims(0);
100
100
  let n = i.tile([e, 1, 1]);
101
- for (const a of t) {
102
- let o = a.add(i);
101
+ for (const h of t) {
102
+ let o = h.add(i);
103
103
  o = o.div(o.sum(-1, !0)), n = o.matMul(n);
104
104
  }
105
105
  return n;
@@ -108,36 +108,36 @@ class $ {
108
108
  forward(t, e, s = !1, i = !1) {
109
109
  return this.validateInput(t), this.tf.tidy(() => {
110
110
  let n = this.inputPhase(t, s);
111
- const a = [];
111
+ const h = [];
112
112
  for (const c of this.blocks) {
113
- const { output: p, attention: l } = c.call(n, s, i);
114
- n = p, i && l && a.push(l);
113
+ const { output: d, attention: l } = c.call(n, s, i);
114
+ n = d, i && l && h.push(l);
115
115
  }
116
116
  let o;
117
- i && a.length > 0 && (o = this.computeAttentionRollout(a)), n = this.lnF.apply(n);
118
- const h = this.wte.project(n);
117
+ i && h.length > 0 && (o = this.computeAttentionRollout(h)), n = this.lnF.apply(n);
118
+ const a = this.wte.project(n);
119
119
  let r;
120
- return e && (r = this.calculateLoss(h, e)), { logits: h, loss: r, attention: i ? o : void 0 };
120
+ return e && (r = this.calculateLoss(a, e)), { logits: a, loss: r, attention: i ? o : void 0 };
121
121
  });
122
122
  }
123
123
  generate(t, e) {
124
- const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, a = e?.includeAttention ?? !1;
124
+ const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, h = e?.includeAttention ?? !1;
125
125
  return this.tf.tidy(() => {
126
- const o = t, h = o.shape[1], r = h <= this.config.blockSize ? o : o.slice(
127
- [0, h - this.config.blockSize],
126
+ const o = t, a = o.shape[1], r = a <= this.config.blockSize ? o : o.slice(
127
+ [0, a - this.config.blockSize],
128
128
  [o.shape[0], this.config.blockSize]
129
- ), c = n ? this.config.blockSize - r.shape[1] : 0, p = c > 0 ? this.tf.pad(r, [
129
+ ), c = n ? this.config.blockSize - r.shape[1] : 0, d = c > 0 ? this.tf.pad(r, [
130
130
  [0, 0],
131
131
  [0, c]
132
- ]) : r, { logits: l, attention: g } = this.forward(p, void 0, !1, a), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = g ? g.slice([0, b, 0], [g.shape[0], 1, g.shape[2]]) : void 0, d = u.div(s);
132
+ ]) : r, { logits: l, attention: p } = this.forward(d, void 0, !1, h), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = p ? p.slice([0, b, 0], [p.shape[0], 1, p.shape[2]]) : void 0, g = u.div(s);
133
133
  let f;
134
134
  if (i) {
135
- const { values: w, indices: E } = this.tf.topk(d, i), y = this.tf.multinomial(w.squeeze([1]), 1);
135
+ const { values: w, indices: E } = this.tf.topk(g, i), y = this.tf.multinomial(w.squeeze([1]), 1);
136
136
  f = this.tf.gather(E.squeeze([1]), y, 1);
137
137
  } else
138
- f = this.tf.multinomial(d.squeeze([1]), 1);
138
+ f = this.tf.multinomial(g.squeeze([1]), 1);
139
139
  let m;
140
- return e?.includeProbabilities && (m = this.tf.softmax(d.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
140
+ return e?.includeProbabilities && (m = this.tf.softmax(g.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
141
141
  });
142
142
  }
143
143
  getNumParams() {
@@ -146,6 +146,9 @@ class $ {
146
146
  this.config.nEmbed * 4 * this.config.nEmbed), i = this.config.nEmbed + this.config.vocabSize * this.config.nEmbed;
147
147
  return t + e + s + i;
148
148
  }
149
+ dispose() {
150
+ this.wte.dispose(), this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
151
+ }
149
152
  }
150
153
  export {
151
154
  $ as default
@@ -2,10 +2,11 @@ import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from './config';
3
3
  import { ITokeniser } from './tokeniser/type';
4
4
  import { default as NanoGPT } from './NanoGPTModel';
5
+ import { SaveOptions } from './utilities/save';
5
6
  import { default as Generator, IGenerateOptions } from './Generator';
6
7
  import { default as Trainer, ITrainerOptions } from './Trainer';
7
8
  import { default as EE } from 'eventemitter3';
8
- type TeachableLLMStatus = 'warmup' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
9
+ type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
9
10
  export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
10
11
  private _config?;
11
12
  private _model?;
@@ -19,7 +20,7 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
19
20
  get status(): TeachableLLMStatus;
20
21
  get ready(): boolean;
21
22
  private setStatus;
22
- saveModel(): Promise<Blob>;
23
+ saveModel(options?: SaveOptions): Promise<Blob>;
23
24
  static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
24
25
  static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
25
26
  getNumParams(): number;
@@ -27,5 +28,6 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
27
28
  train(text: string[], options?: ITrainerOptions): Promise<void>;
28
29
  generator(): Generator;
29
30
  generateText(prompt?: string, options?: IGenerateOptions): Promise<string>;
31
+ dispose(): void;
30
32
  }
31
33
  export {};
@@ -1,13 +1,13 @@
1
1
  import d from "./NanoGPTModel.js";
2
- import { defaultConfig as m } from "./config.js";
3
- import { saveModel as u } from "./utilities/save.js";
2
+ import { defaultConfig as u } from "./config.js";
3
+ import { saveModel as m } from "./utilities/save.js";
4
4
  import { loadModel as l } from "./utilities/load.js";
5
5
  import f from "./Generator.js";
6
6
  import _ from "./Trainer.js";
7
7
  import { E as c } from "./index-SOhdqzHq.js";
8
- import { dummyPassAsync as a } from "./utilities/dummy.js";
8
+ import { dummyPassAsync as h } from "./utilities/dummy.js";
9
9
  import g from "./tokeniser/CharTokeniser.js";
10
- class n extends c {
10
+ class a extends c {
11
11
  _config;
12
12
  _model;
13
13
  tf;
@@ -35,20 +35,20 @@ class n extends c {
35
35
  return this._status;
36
36
  }
37
37
  get ready() {
38
- return this._status === "ready" && !!this._model && !!this._tokeniser;
38
+ return this._status === "ready" && !!this._model && !!this._tokeniser && this.tokeniser.trained;
39
39
  }
40
40
  setStatus(t) {
41
41
  this._status !== t && (this._status = t, this.emit("status", t));
42
42
  }
43
- saveModel() {
43
+ saveModel(t) {
44
44
  if (!this._model || !this._tokeniser)
45
45
  throw new Error("Model or tokeniser is not initialized.");
46
- return u(this._model, this._tokeniser);
46
+ return m(this._model, this._tokeniser, t);
47
47
  }
48
48
  static loadModel(t, r) {
49
- const e = new n(t);
49
+ const e = new a(t);
50
50
  return l(t, r).then(({ model: i, tokeniser: o }) => {
51
- e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), a(i).then(() => {
51
+ e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), h(i).then(() => {
52
52
  e.setStatus("ready");
53
53
  }).catch((s) => {
54
54
  e.setStatus("error"), e.emit("error", s);
@@ -58,11 +58,13 @@ class n extends c {
58
58
  }), e;
59
59
  }
60
60
  static create(t, r = {}) {
61
- const e = { ...m, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new n(t, i, o);
62
- return s.setStatus("warmup"), a(o).then(() => {
63
- s.setStatus("ready");
64
- }).catch((h) => {
65
- s.setStatus("error"), s.emit("error", h);
61
+ const e = { ...u, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new a(t, i, o);
62
+ return s.setStatus("warmup"), h(o).then(() => {
63
+ s.setStatus("awaitingTokens"), s.tokeniser.once("trainStatus", (n) => {
64
+ n === "trained" && s.setStatus("ready");
65
+ });
66
+ }).catch((n) => {
67
+ s.setStatus("error"), s.emit("error", n);
66
68
  }), s;
67
69
  }
68
70
  getNumParams() {
@@ -96,7 +98,10 @@ class n extends c {
96
98
  generateText(t, r) {
97
99
  return this.generator().generate(t, r);
98
100
  }
101
+ dispose() {
102
+ this._model?.dispose();
103
+ }
99
104
  }
100
105
  export {
101
- n as default
106
+ a as default
102
107
  };
@@ -25,4 +25,5 @@ export default class CausalSelfAttention {
25
25
  output: TF.Tensor;
26
26
  attention?: TF.Tensor;
27
27
  };
28
+ dispose(): void;
28
29
  }
@@ -50,35 +50,38 @@ class m {
50
50
  this.cAttn.setWeights(t.get(`block_${this.index}_cAttn`) || []), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
51
51
  }
52
52
  getAttentionScores(t, e, s) {
53
- const a = t.shape[2], n = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), o = n.add(i), h = this.tf.softmax(o, -1);
53
+ const a = t.shape[2], o = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), n = o.add(i), h = this.tf.softmax(n, -1);
54
54
  return this.attnDropout.apply(h, { training: s });
55
55
  }
56
56
  getQKV(t) {
57
- const [e, s, a] = t.shape, r = this.cAttn.apply(t), [n, i, o] = this.tf.split(r, 3, -1);
57
+ const [e, s, a] = t.shape, r = this.cAttn.apply(t), [o, i, n] = this.tf.split(r, 3, -1);
58
58
  r.dispose();
59
- const h = a / this.config.nHead, c = this.tf.reshape(n, [e, s, this.config.nHead, h]);
60
- n.dispose();
61
- const p = c.transpose([0, 2, 1, 3]);
59
+ const h = a / this.config.nHead, c = this.tf.reshape(o, [e, s, this.config.nHead, h]);
60
+ o.dispose();
61
+ const l = c.transpose([0, 2, 1, 3]);
62
62
  c.dispose();
63
- const l = this.tf.reshape(i, [e, s, this.config.nHead, h]);
63
+ const d = this.tf.reshape(i, [e, s, this.config.nHead, h]);
64
64
  i.dispose();
65
- const u = l.transpose([0, 2, 1, 3]);
66
- l.dispose();
67
- const d = this.tf.reshape(o, [e, s, this.config.nHead, h]);
68
- o.dispose();
69
- const b = d.transpose([0, 2, 1, 3]);
70
- return d.dispose(), [p, u, b];
65
+ const u = d.transpose([0, 2, 1, 3]);
66
+ d.dispose();
67
+ const p = this.tf.reshape(n, [e, s, this.config.nHead, h]);
68
+ n.dispose();
69
+ const b = p.transpose([0, 2, 1, 3]);
70
+ return p.dispose(), [l, u, b];
71
71
  }
72
72
  getOutputProjection(t, e) {
73
- const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, n = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(n, [s, a, r]), o = this.cProj.apply(i);
74
- return this.residDropout.apply(o, { training: e });
73
+ const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, o = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(o, [s, a, r]), n = this.cProj.apply(i);
74
+ return this.residDropout.apply(n, { training: e });
75
75
  }
76
76
  call(t, e = !1, s = !1) {
77
77
  return this.tf.tidy(() => {
78
- const [a, r, n] = this.getQKV(t), i = this.getAttentionScores(a, r, e), o = this.tf.matMul(i, n);
79
- return { output: this.getOutputProjection(o, e), attention: s ? i.mean(1) : void 0 };
78
+ const [a, r, o] = this.getQKV(t), i = this.getAttentionScores(a, r, e), n = this.tf.matMul(i, o);
79
+ return { output: this.getOutputProjection(n, e), attention: s ? i.mean(1) : void 0 };
80
80
  });
81
81
  }
82
+ dispose() {
83
+ this.cAttn.dispose(), this.cProj.dispose(), this.attnDropout.dispose(), this.residDropout.dispose(), this.bias.dispose(), this.maskInf.dispose(), this.divisor.dispose();
84
+ }
82
85
  }
83
86
  export {
84
87
  m as default
@@ -9,4 +9,5 @@ export default class LayerNorm {
9
9
  getWeights(): TF.Tensor[];
10
10
  setWeights(weights: TF.Tensor[]): void;
11
11
  apply(x: TF.Tensor): TF.Tensor;
12
+ dispose(): void;
12
13
  }
@@ -1,4 +1,4 @@
1
- class u {
1
+ class h {
2
2
  gamma;
3
3
  //private beta: TF.Variable;
4
4
  epsilon;
@@ -20,11 +20,14 @@ class u {
20
20
  }
21
21
  apply(a) {
22
22
  return this.tf.tidy(() => {
23
- const s = a.mean(-1, !0), t = a.sub(s), n = t.square().mean(-1, !0).add(this.epsilon).rsqrt();
24
- return t.mul(n).mul(this.gamma);
23
+ const s = a.mean(-1, !0), t = a.sub(s), i = t.square().mean(-1, !0).add(this.epsilon).rsqrt();
24
+ return t.mul(i).mul(this.gamma);
25
25
  });
26
26
  }
27
+ dispose() {
28
+ this.gamma.dispose();
29
+ }
27
30
  }
28
31
  export {
29
- u as default
32
+ h as default
30
33
  };
@@ -14,4 +14,5 @@ export default class MLP {
14
14
  saveWeights(map: Map<string, TF.Tensor[]>): void;
15
15
  loadWeights(weights: Map<string, TF.Tensor[]>): void;
16
16
  call(x: TF.Tensor, training?: boolean): TF.Tensor;
17
+ dispose(): void;
17
18
  }
@@ -5,27 +5,27 @@ class l {
5
5
  tf;
6
6
  index;
7
7
  _trainable = !0;
8
- constructor(t, i, e) {
9
- this.tf = t, this.index = i, this.cFc = this.tf.layers.dense({
10
- units: e.mlpFactor * e.nEmbed,
8
+ constructor(t, e, i) {
9
+ this.tf = t, this.index = e, this.cFc = this.tf.layers.dense({
10
+ units: i.mlpFactor * i.nEmbed,
11
11
  activation: "gelu",
12
- useBias: e.biasInLinear,
12
+ useBias: i.biasInLinear,
13
13
  kernelInitializer: this.tf.initializers.randomNormal({
14
14
  mean: 0,
15
15
  stddev: 0.02
16
16
  }),
17
17
  biasInitializer: "zeros",
18
- name: `block_${i}_mlp_cFc`
18
+ name: `block_${e}_mlp_cFc`
19
19
  }), this.cProj = this.tf.layers.dense({
20
- units: e.nEmbed,
21
- useBias: e.biasInLinear,
20
+ units: i.nEmbed,
21
+ useBias: i.biasInLinear,
22
22
  kernelInitializer: this.tf.initializers.randomNormal({
23
23
  mean: 0,
24
- stddev: 0.02 / Math.sqrt(2 * e.nLayer)
24
+ stddev: 0.02 / Math.sqrt(2 * i.nLayer)
25
25
  }),
26
26
  biasInitializer: "zeros",
27
- name: `block_${i}_mlp_cProj`
28
- }), this.dropout = this.tf.layers.dropout({ rate: e.dropout });
27
+ name: `block_${e}_mlp_cProj`
28
+ }), this.dropout = this.tf.layers.dropout({ rate: i.dropout });
29
29
  }
30
30
  get variables() {
31
31
  return [
@@ -45,12 +45,15 @@ class l {
45
45
  loadWeights(t) {
46
46
  this.cFc.setWeights(t.get(`block_${this.index}_mlpHidden`) || []), this.cProj.setWeights(t.get(`block_${this.index}_mlpOut`) || []);
47
47
  }
48
- call(t, i = !1) {
48
+ call(t, e = !1) {
49
49
  return this.tf.tidy(() => {
50
- const e = this.cFc.apply(t), s = this.cProj.apply(e);
51
- return this.dropout.apply(s, { training: i });
50
+ const i = this.cFc.apply(t), s = this.cProj.apply(i);
51
+ return this.dropout.apply(s, { training: e });
52
52
  });
53
53
  }
54
+ dispose() {
55
+ this.cFc.dispose(), this.cProj.dispose(), this.dropout.dispose();
56
+ }
54
57
  }
55
58
  export {
56
59
  l as default
@@ -19,4 +19,5 @@ export default class TiedEmbeddingOutputLayer {
19
19
  vocabSize: number;
20
20
  embedDim: number;
21
21
  };
22
+ dispose(): void;
22
23
  }
@@ -168,11 +168,11 @@ const we = /* @__PURE__ */ p({ imag_: Ke });
168
168
  * limitations under the License.
169
169
  * =============================================================================
170
170
  */
171
- function ze(t, e = 0.2) {
171
+ function We(t, e = 0.2) {
172
172
  const n = { x: a(t, "x", "leakyRelu") }, r = { alpha: e };
173
173
  return u.runKernel(ae, n, r);
174
174
  }
175
- const Ee = /* @__PURE__ */ p({ leakyRelu_: ze });
175
+ const ze = /* @__PURE__ */ p({ leakyRelu_: We });
176
176
  /**
177
177
  * @license
178
178
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -189,11 +189,11 @@ const Ee = /* @__PURE__ */ p({ leakyRelu_: ze });
189
189
  * limitations under the License.
190
190
  * =============================================================================
191
191
  */
192
- function We(t) {
192
+ function Ee(t) {
193
193
  const s = { x: a(t, "x", "neg") };
194
194
  return u.runKernel(ue, s);
195
195
  }
196
- const Oe = /* @__PURE__ */ p({ neg_: We });
196
+ const Oe = /* @__PURE__ */ p({ neg_: Ee });
197
197
  /**
198
198
  * @license
199
199
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -368,7 +368,7 @@ function Ue(t, e, s, n) {
368
368
  if (e === "prelu")
369
369
  return Fe(t, s);
370
370
  if (e === "leakyrelu")
371
- return Ee(t, n);
371
+ return ze(t, n);
372
372
  if (e === "sigmoid")
373
373
  return De(t);
374
374
  throw new Error(`Unknown fused activation ${e}.`);
@@ -397,18 +397,18 @@ function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
397
397
  }
398
398
  let o = a(t, "a", "fused matMul"), c = a(e, "b", "fused matMul");
399
399
  [o, c] = A(o, c);
400
- const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], z = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
400
+ const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], W = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
401
401
  B(b === D, () => `Error in fused matMul: inner shapes (${b}) and (${D}) of Tensors with shapes ${o.shape} and ${c.shape} and transposeA=${s} and transposeB=${n} must match.`);
402
- const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, z]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, z, D]) : f(c, [v, D, z]);
402
+ const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, W]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, W, D]) : f(c, [v, D, W]);
403
403
  let y;
404
404
  r != null && (y = a(r, "bias", "fused matMul"), [y] = A(y, o), P(O, y.shape));
405
405
  let C;
406
406
  l != null && (C = a(l, "prelu weights", "fused matMul"));
407
407
  const G = (x, K) => {
408
- const [g, $, k, E] = K, m = qe(f(x, k.shape), k, i);
408
+ const [g, $, k, z] = K, m = qe(f(x, k.shape), k, i);
409
409
  let _, M;
410
410
  if (!s && !n ? (_ = d(m, $, !1, !0), M = d(g, m, !0, !1)) : !s && n ? (_ = d(m, $, !1, !1), M = d(m, g, !0, !1)) : s && !n ? (_ = d($, m, !1, !0), M = d(g, m, !1, !1)) : (_ = d($, m, !0, !0), M = d(m, g, !0, !0)), r != null) {
411
- const Q = Pe(E, m);
411
+ const Q = Pe(z, m);
412
412
  return [_, M, Q];
413
413
  } else
414
414
  return [_, M];
@@ -425,11 +425,11 @@ function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activ
425
425
  );
426
426
  return $([K, g, k]), { value: f(k, O), gradFunc: G };
427
427
  })(R, F) : U((K, g, $, k) => {
428
- const E = (
428
+ const z = (
429
429
  // tslint:disable-next-line: no-unnecessary-type-assertion
430
430
  u.runKernel(H, I, j)
431
431
  );
432
- return k([K, g, E, $]), { value: f(E, O), gradFunc: G };
432
+ return k([K, g, z, $]), { value: f(z, O), gradFunc: G };
433
433
  })(R, F, y);
434
434
  }
435
435
  const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
@@ -442,9 +442,9 @@ const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
442
442
  * https://opensource.org/licenses/MIT.
443
443
  * =============================================================================
444
444
  */
445
- class W extends Error {
445
+ class E extends Error {
446
446
  constructor(e) {
447
- super(e), Object.setPrototypeOf(this, W.prototype);
447
+ super(e), Object.setPrototypeOf(this, E.prototype);
448
448
  }
449
449
  }
450
450
  /**
@@ -458,11 +458,11 @@ class W extends Error {
458
458
  */
459
459
  function Qe(t, e, s, n) {
460
460
  if (t.rank < 2 || e.rank < 2)
461
- throw new W(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
461
+ throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
462
462
  if (e.rank >= 3) {
463
463
  const r = t.shape.slice(-1)[0], i = e.shape.slice(-2)[0];
464
464
  if (r !== i)
465
- throw new W(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and y shape = ${e.shape}`);
465
+ throw new E(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and y shape = ${e.shape}`);
466
466
  }
467
467
  if (t.rank === 2 && e.rank === 2)
468
468
  return J({
@@ -526,6 +526,9 @@ class Ye {
526
526
  embedDim: this.embedDim
527
527
  };
528
528
  }
529
+ dispose() {
530
+ this.tiedWeights.dispose();
531
+ }
529
532
  }
530
533
  export {
531
534
  Ye as default
@@ -20,4 +20,5 @@ export default class Block {
20
20
  output: TF.Tensor;
21
21
  attention?: TF.Tensor;
22
22
  };
23
+ dispose(): void;
23
24
  }
@@ -10,8 +10,8 @@ class u {
10
10
  index;
11
11
  _trainable = !0;
12
12
  skipped = !1;
13
- constructor(t, s, i) {
14
- this.tf = t, this.index = s, this.ln1 = new l(t, [i.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, i), this.ln2 = new l(t, [i.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, i);
13
+ constructor(t, i, s) {
14
+ this.tf = t, this.index = i, this.ln1 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, s), this.ln2 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, s);
15
15
  }
16
16
  get variables() {
17
17
  return [
@@ -33,18 +33,21 @@ class u {
33
33
  loadWeights(t) {
34
34
  this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
35
35
  }
36
- getMLPOutput(t, s) {
37
- const i = this.ln2.apply(t), e = this.mlp.call(i, s);
36
+ getMLPOutput(t, i) {
37
+ const s = this.ln2.apply(t), e = this.mlp.call(s, i);
38
38
  return t.add(e);
39
39
  }
40
- call(t, s = !1, i = !1) {
40
+ call(t, i = !1, s = !1) {
41
41
  return this.tf.tidy(() => {
42
42
  if (this.skipped)
43
43
  return { output: t };
44
- const e = this.ln1.apply(t), n = this.attn.call(e, s, i), a = t.add(n.output);
45
- return { output: this.getMLPOutput(a, s), attention: n.attention };
44
+ const e = this.ln1.apply(t), n = this.attn.call(e, i, s), a = t.add(n.output);
45
+ return { output: this.getMLPOutput(a, i), attention: n.attention };
46
46
  });
47
47
  }
48
+ dispose() {
49
+ this.ln1.dispose(), this.attn.dispose(), this.ln2.dispose(), this.mlp.dispose();
50
+ }
48
51
  }
49
52
  export {
50
53
  u as default
@@ -1,6 +1,6 @@
1
- import { E as h } from "../index-SOhdqzHq.js";
2
- const c = ["<eos>", "<unk>"];
3
- class l extends h {
1
+ import { E as r } from "../index-SOhdqzHq.js";
2
+ const h = ["<eos>", "<unk>"];
3
+ class l extends r {
4
4
  vocabSize = 0;
5
5
  eosToken = 0;
6
6
  unkToken = 0;
@@ -9,7 +9,7 @@ class l extends h {
9
9
  constructor(s) {
10
10
  if (super(), Array.isArray(s))
11
11
  if (this.vocab = s, this.vocab.length > 0)
12
- this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((i, o) => {
12
+ this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<pad>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("_")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf(" ")), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((i, o) => {
13
13
  this.cache.set(i, o);
14
14
  });
15
15
  else
@@ -23,29 +23,29 @@ class l extends h {
23
23
  destroy() {
24
24
  }
25
25
  async train(s) {
26
- const i = s.map((e) => e.split("")).flat(), o = new Set(i), t = Array.from(o), n = this.vocabSize - c.length;
27
- if (t.length > n) {
28
- const e = /* @__PURE__ */ new Map();
26
+ const i = s.map((t) => t.split("")).flat(), o = new Set(i), e = Array.from(o), n = this.vocabSize - h.length;
27
+ if (e.length > n) {
28
+ const t = /* @__PURE__ */ new Map();
29
29
  i.forEach((a) => {
30
- e.set(a, (e.get(a) || 0) + 1);
31
- }), t.sort((a, r) => (e.get(a) || 0) - (e.get(r) || 0)), t.splice(0, t.length - n);
32
- } else if (t.length < n)
33
- for (; t.length < n; )
34
- t.push("<pad>");
35
- return t.sort((e, a) => e.charCodeAt(0) - a.charCodeAt(0)), this.vocab = [...t, ...c], this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.vocabSize = this.vocab.length, this.cache.clear(), this.vocab.forEach((e, a) => {
36
- this.cache.set(e, a);
37
- }), this.vocabSize;
30
+ t.set(a, (t.get(a) || 0) + 1);
31
+ }), e.sort((a, c) => (t.get(a) || 0) - (t.get(c) || 0)), e.splice(0, e.length - n);
32
+ } else if (e.length < n)
33
+ for (; e.length < n; )
34
+ e.push("<pad>");
35
+ return e.sort((t, a) => t.charCodeAt(0) - a.charCodeAt(0)), this.vocab = [...e, ...h], this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.vocabSize = this.vocab.length, this.cache.clear(), this.vocab.forEach((t, a) => {
36
+ this.cache.set(t, a);
37
+ }), this.emit("trainStatus", "trained"), this.vocabSize;
38
38
  }
39
39
  async tokenise(s, i) {
40
40
  if (!this.trained)
41
41
  throw new Error("Tokeniser not trained");
42
- return s.map((t) => i ? t.split("").map((n) => this.cache.get(n) ?? this.unkToken) : t.split("").map((n) => {
43
- const e = this.cache.get(n);
44
- return e !== void 0 ? this.vocab[e] : "<unk>";
42
+ return s.map((e) => i ? e.split("").map((n) => this.cache.get(n) ?? this.unkToken) : e.split("").map((n) => {
43
+ const t = this.cache.get(n);
44
+ return t !== void 0 ? this.vocab[t] : "<unk>";
45
45
  }));
46
46
  }
47
47
  async detokenise(s) {
48
- return s.map((o) => o.map((t) => this.vocab[t]).join(""));
48
+ return s.map((o) => o.map((e) => this.vocab[e]).join(""));
49
49
  }
50
50
  async encode(s) {
51
51
  return (await this.tokenise([s], !0))[0];
@@ -60,10 +60,10 @@ class l extends h {
60
60
  return [];
61
61
  }
62
62
  async createTrainingData(s, i = 5) {
63
- const o = await this.tokenise(s, !0), t = [], n = [];
64
- for (let e = 0; e < o.length - i; e++)
65
- t.push(...o[e].slice(0, i)), n.push(o[e + 1][0]);
66
- return [t, n];
63
+ const o = await this.tokenise(s, !0), e = [], n = [];
64
+ for (let t = 0; t < o.length - i; t++)
65
+ e.push(...o[t].slice(0, i)), n.push(o[t + 1][0]);
66
+ return [e, n];
67
67
  }
68
68
  }
69
69
  export {
@@ -1,3 +1,9 @@
1
1
  import { default as NanoGPT } from '../NanoGPTModel';
2
2
  import { ITokeniser } from '../tokeniser/type';
3
- export declare function saveModel(model: NanoGPT, tokeniser: ITokeniser): Promise<Blob>;
3
+ export interface SaveOptions {
4
+ includeLog?: boolean;
5
+ name?: string;
6
+ metadata?: Record<string, unknown>;
7
+ files?: Record<string, unknown>;
8
+ }
9
+ export declare function saveModel(model: NanoGPT, tokeniser: ITokeniser, options?: SaveOptions): Promise<Blob>;
@@ -1,21 +1,36 @@
1
- import { z as f } from "../jszip.min-BLbRbbKt.js";
2
- import { exportWeights as g } from "./weights.js";
3
- async function l(i, t) {
4
- const o = i.saveWeights(), e = new f(), s = {};
5
- for (const [n, r] of o) {
6
- const a = await g(r);
7
- s[n] = a.spec, e.file(`${n}.bin`, a.data.buffer, { binary: !0 });
1
+ import { z as g } from "../jszip.min-BLbRbbKt.js";
2
+ import { exportWeights as l } from "./weights.js";
3
+ const b = "1.0.0";
4
+ async function p(t, s, i) {
5
+ const o = i?.includeLog ?? !0, c = t.saveWeights(), e = new g(), f = {};
6
+ for (const [n, a] of c) {
7
+ const r = await l(a);
8
+ f[n] = r.spec, e.file(`${n}.bin`, r.data.buffer, { binary: !0 });
8
9
  }
9
- return e.file("manifest.json", JSON.stringify({ weightSpec: s, config: i.config }), {
10
- binary: !1
11
- }), e.file(
10
+ if (e.file(
11
+ "manifest.json",
12
+ JSON.stringify({
13
+ weightSpec: f,
14
+ config: t.config,
15
+ version: b,
16
+ application: "@genai-fi/nanogpt",
17
+ meta: i?.metadata,
18
+ name: i?.name
19
+ }),
20
+ {
21
+ binary: !1
22
+ }
23
+ ), e.file(
12
24
  "tokeniser.json",
13
- JSON.stringify({ vocab: t.getVocab(), merges: await t.getMerges() }),
25
+ JSON.stringify({ vocab: s.getVocab(), merges: await s.getMerges() }),
14
26
  {
15
27
  binary: !1
16
28
  }
17
- ), e.file("log.json", JSON.stringify(i.log), { binary: !1 }), e.generateAsync({ type: "blob" });
29
+ ), o && e.file("log.json", JSON.stringify(t.log), { binary: !1 }), i?.files)
30
+ for (const [n, a] of Object.entries(i.files))
31
+ e.file(n, JSON.stringify(a), { binary: !1 });
32
+ return e.generateAsync({ type: "blob" });
18
33
  }
19
34
  export {
20
- l as saveModel
35
+ p as saveModel
21
36
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",