@genai-fi/nanogpt 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,12 @@ export interface IGenerateOptions extends GenerateOptions {
8
8
  export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
9
9
  private readonly model;
10
10
  private readonly tokeniser;
11
+ private active;
11
12
  constructor(model: NanoGPT, tokeniser: ITokeniser);
12
13
  private tokenisePrompt;
13
14
  private generateNoCache;
14
15
  private processResponse;
15
16
  private generateCache;
16
17
  generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
18
+ stop(): void;
17
19
  }
package/dist/Generator.js CHANGED
@@ -1,65 +1,70 @@
1
1
  import { E as u } from "./index-Dwqa6Zy2.js";
2
- class p extends u {
2
+ class f extends u {
3
3
  constructor(s, e) {
4
4
  super(), this.model = s, this.tokeniser = e;
5
5
  }
6
+ active = !1;
6
7
  async tokenisePrompt(s) {
7
8
  const e = s ? await this.tokeniser.tokenise([s], !0) : [[this.tokeniser.eosToken]];
8
9
  return this.model.tf.tensor2d(e, [1, e[0].length], "int32");
9
10
  }
10
11
  async generateNoCache(s, e) {
11
- let t = await this.tokenisePrompt(s), n = s || "";
12
- const a = e?.maxLength ?? 1e3;
13
- for (let i = 0; i < a; i++) {
12
+ let t = await this.tokenisePrompt(s), i = s || "";
13
+ const o = e?.maxLength ?? 1e3;
14
+ for (let a = 0; a < o && this.active; a++) {
14
15
  const {
15
- output: o,
16
+ output: n,
16
17
  attention: c,
17
- probabilities: h
18
- } = this.model.generate(t, void 0, e), l = t;
19
- t = this.model.tf.concat([t, o], 1), l.dispose();
20
- const r = await this.processResponse(o, c, h);
21
- if (o.dispose(), r === null)
18
+ probabilities: l
19
+ } = this.model.generate(t, void 0, e), h = t;
20
+ t = this.model.tf.concat([t, n], 1), h.dispose();
21
+ const r = await this.processResponse(n, c, l);
22
+ if (n.dispose(), r === null)
22
23
  break;
23
- n += r;
24
+ i += r;
24
25
  }
25
- return t.dispose(), n;
26
+ return t.dispose(), i;
26
27
  }
27
28
  async processResponse(s, e, t) {
28
- const n = (await s.array())[0][0];
29
- if (n === this.tokeniser.eosToken)
29
+ const i = (await s.array())[0][0];
30
+ if (i === this.tokeniser.eosToken)
30
31
  return null;
31
- const a = await this.tokeniser.decode([n]);
32
- let i;
33
- e && (i = await e.array(), e.dispose());
34
- let o;
35
- return t && (o = await t.array(), t.dispose()), this.emit("tokens", [n], a, i, o), a;
32
+ const o = await this.tokeniser.decode([i]);
33
+ let a;
34
+ e && (a = await e.array(), e.dispose());
35
+ let n;
36
+ return t && (n = await t.array(), t.dispose()), this.emit("tokens", [i], o, a, n), o;
36
37
  }
37
38
  async generateCache(s, e) {
38
- let t = await this.tokenisePrompt(s), n = s || "";
39
- const a = new Array(this.model.config.nLayer).fill(void 0), i = e?.maxLength ?? 1e3;
40
- for (let o = 0; o < i; o++) {
39
+ let t = await this.tokenisePrompt(s), i = s || "";
40
+ const o = new Array(this.model.config.nLayer).fill(void 0), a = e?.maxLength ?? 1e3;
41
+ for (let n = 0; n < a && this.active; n++) {
41
42
  const {
42
43
  output: c,
43
- attention: h,
44
- probabilities: l
45
- } = this.model.generate(t, a, {
44
+ attention: l,
45
+ probabilities: h
46
+ } = this.model.generate(t, o, {
46
47
  ...e,
47
48
  usePadding: !1
48
49
  });
49
50
  t.dispose(), t = c;
50
- const r = await this.processResponse(c, h, l);
51
+ const r = await this.processResponse(c, l, h);
51
52
  if (r === null)
52
53
  break;
53
- n += r;
54
+ i += r;
54
55
  }
55
- return t.dispose(), n;
56
+ return t.dispose(), i;
56
57
  }
57
58
  async generate(s, e) {
58
- this.emit("start");
59
- const t = this.model.config.useRope && !e?.noCache ? this.generateCache(s, e) : this.generateNoCache(s, e);
60
- return this.emit("stop"), t;
59
+ const t = s && s.length > this.model.config.blockSize ? s.slice(-this.model.config.blockSize) : s;
60
+ this.active = !0, this.emit("start");
61
+ const o = await (this.model.config.useRope && !e?.noCache ? this.generateCache(t, e) : this.generateNoCache(t, e));
62
+ return this.active = !1, this.emit("stop"), o;
63
+ }
64
+ stop() {
65
+ this.active = !1;
61
66
  }
62
67
  }
63
68
  export {
64
- p as default
69
+ f as default
65
70
  };
@@ -1,6 +1,8 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from './config';
3
3
  import { KVCache } from './layers/CausalSelfAttention';
4
+ import { default as MemoryProfiler } from './utilities/profile';
5
+ import { default as BaseLayer } from './layers/BaseLayer';
4
6
  export interface TrainingLogEntry {
5
7
  loss: number;
6
8
  valLoss?: number;
@@ -16,7 +18,7 @@ export interface GenerateOptions {
16
18
  includeAttention?: boolean;
17
19
  includeProbabilities?: boolean;
18
20
  }
19
- export default class NanoGPT {
21
+ export default class NanoGPT extends BaseLayer {
20
22
  readonly config: GPTConfig;
21
23
  private wte;
22
24
  private wpe?;
@@ -34,6 +36,7 @@ export default class NanoGPT {
34
36
  setSkipMask(mask: boolean[]): void;
35
37
  setTrainableMask(mask: boolean[]): void;
36
38
  set trainable(value: boolean);
39
+ setProfiler(value: MemoryProfiler | undefined): void;
37
40
  private validateInput;
38
41
  private calculateLoss;
39
42
  private computeAttentionRollout;
@@ -1,11 +1,12 @@
1
- import { defaultConfig as $ } from "./config.js";
1
+ import { defaultConfig as v } from "./config.js";
2
2
  import z from "./layers/TransformerBlock.js";
3
3
  import S from "./layers/TiedEmbedding.js";
4
- import I from "./layers/RoPECache.js";
5
- import _ from "./layers/RMSNorm.js";
6
- import { estimateParameterCount as W } from "./utilities/parameters.js";
7
- import { createSoftmaxCrossEntropyWithGrad as C } from "./training/sparseCrossEntropy.js";
8
- class K {
4
+ import _ from "./layers/RoPECache.js";
5
+ import I from "./layers/RMSNorm.js";
6
+ import { estimateParameterCount as F } from "./utilities/parameters.js";
7
+ import { createSoftmaxCrossEntropyWithGrad as L } from "./training/sparseCrossEntropy.js";
8
+ import P from "./layers/BaseLayer.js";
9
+ class A extends P {
9
10
  config;
10
11
  wte;
11
12
  // Token embeddings
@@ -21,7 +22,7 @@ class K {
21
22
  log = [];
22
23
  // Training log
23
24
  constructor(t, e = {}) {
24
- this.tf = t, this.config = { ...$, ...e }, this.wte = new S(t, {
25
+ super(), this.tf = t, this.config = { ...v, ...e }, this.wte = new S(t, {
25
26
  vocabSize: this.config.vocabSize,
26
27
  embedDim: this.config.nEmbed,
27
28
  name: "token_embedding"
@@ -30,10 +31,10 @@ class K {
30
31
  outputDim: this.config.nEmbed,
31
32
  name: "positional_embedding",
32
33
  embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
33
- }) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
34
+ }) : this.ropeCache = new _(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
34
35
  for (let o = 0; o < this.config.nLayer; o++)
35
36
  this.blocks.push(new z(this.tf, o, this.config, this.ropeCache));
36
- this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
37
+ this.lnF = new I(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
37
38
  }
38
39
  get variables() {
39
40
  return [
@@ -86,6 +87,12 @@ class K {
86
87
  e.trainable = t;
87
88
  this.lnF.trainable = t;
88
89
  }
90
+ setProfiler(t) {
91
+ this._profiler = t;
92
+ for (const e of this.blocks)
93
+ e.setProfiler(t);
94
+ this.lnF.setProfiler(t);
95
+ }
89
96
  validateInput(t) {
90
97
  if (t.shape.length !== 2)
91
98
  throw new Error(`Invalid input shape: expected [batch_size, sequence_length], got ${t.shape}`);
@@ -96,7 +103,7 @@ class K {
96
103
  }
97
104
  calculateLoss(t, e) {
98
105
  try {
99
- return C()(t, e).mean();
106
+ return L()(t, e).mean();
100
107
  } catch (o) {
101
108
  throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
102
109
  }
@@ -139,24 +146,25 @@ class K {
139
146
  }
140
147
  forward(t, e, o = !1, i = !1, s) {
141
148
  return this.validateInput(t), this.tf.tidy(() => {
149
+ this.startMemory();
142
150
  const l = s?.[0]?.length ?? 0;
143
151
  let r = this.inputPhase(t, l, o);
144
152
  const n = [];
145
153
  if (s && s.length !== this.blocks.length)
146
154
  throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
147
155
  for (let a = 0; a < this.blocks.length; a++) {
148
- const d = this.blocks[a], {
149
- output: g,
150
- attention: u,
156
+ const d = r, g = this.blocks[a], {
157
+ output: m,
158
+ attention: b,
151
159
  cache: f
152
- } = d.call(r, o, i, s ? s[a] : void 0);
153
- r = g, i && u && n.push(u), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
160
+ } = g.call(r, o, i, s ? s[a] : void 0);
161
+ r = m, d.dispose(), i && b && n.push(b), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
154
162
  }
155
163
  let h;
156
164
  i && n.length > 0 && (h = this.computeAttentionRollout(n)), r = this.lnF.apply(r);
157
165
  const c = this.wte.project(r);
158
166
  let p;
159
- return e && (p = this.calculateLoss(c, e)), { logits: c, loss: p, attention: i ? h : void 0 };
167
+ return e && (p = this.calculateLoss(c, e)), this.endMemory("Forward"), { logits: c, loss: p, attention: i ? h : void 0 };
160
168
  });
161
169
  }
162
170
  generate(t, e, o) {
@@ -168,24 +176,24 @@ class K {
168
176
  ), p = l ? this.config.blockSize - c.shape[1] : 0, a = p > 0 ? this.tf.pad(c, [
169
177
  [0, 0],
170
178
  [0, p]
171
- ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), u = d.shape[1] - 1 - p, f = d.slice([0, u, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = f.div(i);
172
- let m;
179
+ ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), m = d.shape[1] - 1 - p, b = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), f = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, k = b.div(i);
180
+ let u;
173
181
  if (s) {
174
- const { values: v, indices: y } = this.tf.topk(b, s), E = this.tf.multinomial(v.squeeze([1]), 1);
175
- m = this.tf.gather(y.squeeze([1]), E, 1);
182
+ const { values: y, indices: E } = this.tf.topk(k, s), $ = this.tf.multinomial(y.squeeze([1]), 1);
183
+ u = this.tf.gather(E.squeeze([1]), $, 1);
176
184
  } else
177
- m = this.tf.multinomial(b.squeeze([1]), 1);
178
- let k;
179
- return o?.includeProbabilities && (k = this.tf.softmax(b.squeeze([1]))), m = m.reshape([1, 1]), { output: m, attention: w?.squeeze([1]), probabilities: k };
185
+ u = this.tf.multinomial(k.squeeze([1]), 1);
186
+ let w;
187
+ return o?.includeProbabilities && (w = this.tf.softmax(k.squeeze([1]))), u = u.reshape([1, 1]), { output: u, attention: f?.squeeze([1]), probabilities: w };
180
188
  });
181
189
  }
182
190
  getNumParams() {
183
- return W(this.config);
191
+ return F(this.config);
184
192
  }
185
193
  dispose() {
186
194
  this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
187
195
  }
188
196
  }
189
197
  export {
190
- K as default
198
+ A as default
191
199
  };
@@ -6,6 +6,7 @@ import { SaveOptions } from './utilities/save';
6
6
  import { default as Generator, IGenerateOptions } from './Generator';
7
7
  import { default as Trainer, ITrainerOptions } from './Trainer';
8
8
  import { default as EE } from 'eventemitter3';
9
+ import { default as MemoryProfiler } from './utilities/profile';
9
10
  type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
10
11
  export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
11
12
  private _config?;
@@ -23,6 +24,9 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
23
24
  saveModel(options?: SaveOptions): Promise<Blob>;
24
25
  static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
25
26
  static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
27
+ getProfiler(): MemoryProfiler | undefined;
28
+ get enableProfiler(): boolean;
29
+ set enableProfiler(value: boolean);
26
30
  getNumParams(): number;
27
31
  trainer(): Trainer;
28
32
  train(text: string[], options?: ITrainerOptions): Promise<void>;
@@ -1,11 +1,11 @@
1
- import { defaultConfig as d } from "./config.js";
2
- import m from "./NanoGPTModel.js";
3
- import { saveModel as u } from "./utilities/save.js";
4
- import { loadModel as l } from "./utilities/load.js";
5
- import f from "./Generator.js";
1
+ import { defaultConfig as h } from "./config.js";
2
+ import d from "./NanoGPTModel.js";
3
+ import { saveModel as m } from "./utilities/save.js";
4
+ import { loadModel as f } from "./utilities/load.js";
5
+ import u from "./Generator.js";
6
6
  import _ from "./Trainer.js";
7
7
  import { E as c } from "./index-Dwqa6Zy2.js";
8
- import { dummyPassAsync as h } from "./utilities/dummy.js";
8
+ import { dummyPassAsync as l } from "./utilities/dummy.js";
9
9
  import g from "./tokeniser/CharTokeniser.js";
10
10
  import "./papaparse.min-C8l2Kvo1.js";
11
11
  import "./index-Tf7vU29b.js";
@@ -13,6 +13,7 @@ import "./jszip.min-CjP2V1VV.js";
13
13
  import "./ops/scatterSub.js";
14
14
  import "./ops/gatherSub.js";
15
15
  import "./ops/attentionMask.js";
16
+ import w from "./utilities/profile.js";
16
17
  class a extends c {
17
18
  _config;
18
19
  _model;
@@ -49,23 +50,23 @@ class a extends c {
49
50
  saveModel(t) {
50
51
  if (!this._model || !this._tokeniser)
51
52
  throw new Error("Model or tokeniser is not initialized.");
52
- return u(this._model, this._tokeniser, t);
53
+ return m(this._model, this._tokeniser, t);
53
54
  }
54
55
  static loadModel(t, r) {
55
56
  const e = new a(t);
56
- return l(t, r).then(({ model: s, tokeniser: o }) => {
57
- e._model = s, e._tokeniser = o, e._config = s.config, e.setStatus("warmup"), h(s).then(() => {
57
+ return f(t, r).then(({ model: o, tokeniser: s }) => {
58
+ e._model = o, e._tokeniser = s, e._config = o.config, e.setStatus("warmup"), l(o).then(() => {
58
59
  e.setStatus("ready");
59
60
  }).catch((i) => {
60
61
  e.setStatus("error"), e.emit("error", i);
61
62
  });
62
- }).catch((s) => {
63
- e.setStatus("error"), e.emit("error", s);
63
+ }).catch((o) => {
64
+ e.setStatus("error"), e.emit("error", o);
64
65
  }), e;
65
66
  }
66
67
  static create(t, r = {}) {
67
- const e = { ...d, ...r }, s = new g(e.vocabSize), o = new m(t, e), i = new a(t, s, o);
68
- return i.setStatus("warmup"), h(o).then(() => {
68
+ const e = { ...h, ...r }, o = new g(e.vocabSize), s = new d(t, e), i = new a(t, o, s);
69
+ return i.setStatus("warmup"), l(s).then(() => {
69
70
  i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
70
71
  n === "trained" && i.setStatus("ready");
71
72
  }));
@@ -73,6 +74,20 @@ class a extends c {
73
74
  i.setStatus("error"), i.emit("error", n);
74
75
  }), i;
75
76
  }
77
+ getProfiler() {
78
+ return this._model?.getProfiler();
79
+ }
80
+ get enableProfiler() {
81
+ return !!this._model?.getProfiler();
82
+ }
83
+ set enableProfiler(t) {
84
+ if (t) {
85
+ if (!this._model)
86
+ throw new Error("Model is not initialized.");
87
+ this._model.getProfiler() || this._model.setProfiler(new w());
88
+ } else
89
+ this._model && this._model.setProfiler(void 0);
90
+ }
76
91
  getNumParams() {
77
92
  if (!this._model)
78
93
  throw new Error("Model is not initialized.");
@@ -84,8 +99,8 @@ class a extends c {
84
99
  const t = new _(this._model, this._tokeniser);
85
100
  return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
86
101
  const e = this.listeners("trainStep");
87
- for (const s of e)
88
- await s(r);
102
+ for (const o of e)
103
+ await o(r);
89
104
  }), t;
90
105
  }
91
106
  train(t, r) {
@@ -94,7 +109,7 @@ class a extends c {
94
109
  generator() {
95
110
  if (!this._model || !this._tokeniser)
96
111
  throw new Error("Model or tokeniser is not initialized.");
97
- const t = new f(this._model, this._tokeniser);
112
+ const t = new u(this._model, this._tokeniser);
98
113
  return t.on("start", () => {
99
114
  this.status === "ready" && this.setStatus("busy");
100
115
  }), t.on("stop", () => {
@@ -1,4 +1,4 @@
1
- import { o as t, c as s, d as n, E as m, C as r } from "./index-DQfEAU9u.js";
1
+ import { o as c, d as s, g as n, E as m, C as r } from "./index-CWQLouWz.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as t, c as s, d as n, E as m, C as r } from "./index-DQfEAU9u.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function l(o, c) {
19
- const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
18
+ function l(o, p) {
19
+ const a = s(o, "real", "complex"), e = s(p, "imag", "complex");
20
20
  n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
21
- const p = { real: a, imag: e };
22
- return m.runKernel(r, p);
21
+ const t = { real: a, imag: e };
22
+ return m.runKernel(r, t);
23
23
  }
24
- const i = /* @__PURE__ */ t({ complex_: l });
24
+ const i = /* @__PURE__ */ c({ complex_: l });
25
25
  export {
26
26
  i as c
27
27
  };