@genai-fi/nanogpt 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/Generator.d.ts +2 -0
  2. package/dist/Generator.js +37 -32
  3. package/dist/NanoGPTModel.d.ts +4 -1
  4. package/dist/NanoGPTModel.js +33 -25
  5. package/dist/TeachableLLM.d.ts +4 -0
  6. package/dist/TeachableLLM.js +32 -15
  7. package/dist/{complex-Cd8sqiBC.js → complex-CJ-qCcLB.js} +6 -6
  8. package/dist/{index-Dsg28SG6.js → index-YPKosni4.js} +59 -51
  9. package/dist/layers/BaseLayer.d.ts +8 -0
  10. package/dist/layers/BaseLayer.js +18 -0
  11. package/dist/layers/CausalSelfAttention.d.ts +4 -1
  12. package/dist/layers/CausalSelfAttention.js +47 -55
  13. package/dist/layers/MLP.d.ts +2 -1
  14. package/dist/layers/MLP.js +16 -14
  15. package/dist/layers/RMSNorm.d.ts +2 -1
  16. package/dist/layers/RMSNorm.js +13 -11
  17. package/dist/layers/RoPECache.d.ts +4 -2
  18. package/dist/layers/RoPECache.js +13 -7
  19. package/dist/layers/TiedEmbedding.js +16 -15
  20. package/dist/layers/TransformerBlock.d.ts +4 -1
  21. package/dist/layers/TransformerBlock.js +9 -5
  22. package/dist/main.js +18 -16
  23. package/dist/{mat_mul-BAYDrXvE.js → mat_mul-Bu7bhLms.js} +5 -5
  24. package/dist/ops/attentionMask.js +31 -25
  25. package/dist/ops/gatherSub.js +2 -2
  26. package/dist/ops/node/sparseCrossEntropy.js +1 -1
  27. package/dist/ops/qkv.d.ts +7 -0
  28. package/dist/ops/qkv.js +127 -0
  29. package/dist/ops/rope.d.ts +8 -0
  30. package/dist/ops/rope.js +153 -0
  31. package/dist/ops/scatterSub.js +14 -14
  32. package/dist/reshape-DmnmKT6r.js +25 -0
  33. package/dist/{stack-1o648CP_.js → stack-BtKpB0Ry.js} +5 -5
  34. package/dist/sum-D7fu15XL.js +27 -0
  35. package/dist/training/AdamExt.js +1 -1
  36. package/dist/training/Trainer.js +30 -29
  37. package/dist/training/sparseCrossEntropy.js +34 -33
  38. package/dist/utilities/profile.d.ts +10 -0
  39. package/dist/utilities/profile.js +29 -0
  40. package/package.json +1 -1
  41. package/dist/sum-NWazHI7f.js +0 -49
@@ -8,10 +8,12 @@ export interface IGenerateOptions extends GenerateOptions {
8
8
  export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
9
9
  private readonly model;
10
10
  private readonly tokeniser;
11
+ private active;
11
12
  constructor(model: NanoGPT, tokeniser: ITokeniser);
12
13
  private tokenisePrompt;
13
14
  private generateNoCache;
14
15
  private processResponse;
15
16
  private generateCache;
16
17
  generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
18
+ stop(): void;
17
19
  }
package/dist/Generator.js CHANGED
@@ -1,65 +1,70 @@
1
1
  import { E as u } from "./index-Dwqa6Zy2.js";
2
- class p extends u {
2
+ class f extends u {
3
3
  constructor(s, e) {
4
4
  super(), this.model = s, this.tokeniser = e;
5
5
  }
6
+ active = !1;
6
7
  async tokenisePrompt(s) {
7
8
  const e = s ? await this.tokeniser.tokenise([s], !0) : [[this.tokeniser.eosToken]];
8
9
  return this.model.tf.tensor2d(e, [1, e[0].length], "int32");
9
10
  }
10
11
  async generateNoCache(s, e) {
11
- let t = await this.tokenisePrompt(s), n = s || "";
12
- const a = e?.maxLength ?? 1e3;
13
- for (let i = 0; i < a; i++) {
12
+ let t = await this.tokenisePrompt(s), i = s || "";
13
+ const o = e?.maxLength ?? 1e3;
14
+ for (let a = 0; a < o && this.active; a++) {
14
15
  const {
15
- output: o,
16
+ output: n,
16
17
  attention: c,
17
- probabilities: h
18
- } = this.model.generate(t, void 0, e), l = t;
19
- t = this.model.tf.concat([t, o], 1), l.dispose();
20
- const r = await this.processResponse(o, c, h);
21
- if (o.dispose(), r === null)
18
+ probabilities: l
19
+ } = this.model.generate(t, void 0, e), h = t;
20
+ t = this.model.tf.concat([t, n], 1), h.dispose();
21
+ const r = await this.processResponse(n, c, l);
22
+ if (n.dispose(), r === null)
22
23
  break;
23
- n += r;
24
+ i += r;
24
25
  }
25
- return t.dispose(), n;
26
+ return t.dispose(), i;
26
27
  }
27
28
  async processResponse(s, e, t) {
28
- const n = (await s.array())[0][0];
29
- if (n === this.tokeniser.eosToken)
29
+ const i = (await s.array())[0][0];
30
+ if (i === this.tokeniser.eosToken)
30
31
  return null;
31
- const a = await this.tokeniser.decode([n]);
32
- let i;
33
- e && (i = await e.array(), e.dispose());
34
- let o;
35
- return t && (o = await t.array(), t.dispose()), this.emit("tokens", [n], a, i, o), a;
32
+ const o = await this.tokeniser.decode([i]);
33
+ let a;
34
+ e && (a = await e.array(), e.dispose());
35
+ let n;
36
+ return t && (n = await t.array(), t.dispose()), this.emit("tokens", [i], o, a, n), o;
36
37
  }
37
38
  async generateCache(s, e) {
38
- let t = await this.tokenisePrompt(s), n = s || "";
39
- const a = new Array(this.model.config.nLayer).fill(void 0), i = e?.maxLength ?? 1e3;
40
- for (let o = 0; o < i; o++) {
39
+ let t = await this.tokenisePrompt(s), i = s || "";
40
+ const o = new Array(this.model.config.nLayer).fill(void 0), a = e?.maxLength ?? 1e3;
41
+ for (let n = 0; n < a && this.active; n++) {
41
42
  const {
42
43
  output: c,
43
- attention: h,
44
- probabilities: l
45
- } = this.model.generate(t, a, {
44
+ attention: l,
45
+ probabilities: h
46
+ } = this.model.generate(t, o, {
46
47
  ...e,
47
48
  usePadding: !1
48
49
  });
49
50
  t.dispose(), t = c;
50
- const r = await this.processResponse(c, h, l);
51
+ const r = await this.processResponse(c, l, h);
51
52
  if (r === null)
52
53
  break;
53
- n += r;
54
+ i += r;
54
55
  }
55
- return t.dispose(), n;
56
+ return t.dispose(), i;
56
57
  }
57
58
  async generate(s, e) {
58
- this.emit("start");
59
- const t = this.model.config.useRope && !e?.noCache ? this.generateCache(s, e) : this.generateNoCache(s, e);
60
- return this.emit("stop"), t;
59
+ const t = s && s.length > this.model.config.blockSize ? s.slice(-this.model.config.blockSize) : s;
60
+ this.active = !0, this.emit("start");
61
+ const o = await (this.model.config.useRope && !e?.noCache ? this.generateCache(t, e) : this.generateNoCache(t, e));
62
+ return this.active = !1, this.emit("stop"), o;
63
+ }
64
+ stop() {
65
+ this.active = !1;
61
66
  }
62
67
  }
63
68
  export {
64
- p as default
69
+ f as default
65
70
  };
@@ -1,6 +1,8 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from './config';
3
3
  import { KVCache } from './layers/CausalSelfAttention';
4
+ import { default as MemoryProfiler } from './utilities/profile';
5
+ import { default as BaseLayer } from './layers/BaseLayer';
4
6
  export interface TrainingLogEntry {
5
7
  loss: number;
6
8
  valLoss?: number;
@@ -16,7 +18,7 @@ export interface GenerateOptions {
16
18
  includeAttention?: boolean;
17
19
  includeProbabilities?: boolean;
18
20
  }
19
- export default class NanoGPT {
21
+ export default class NanoGPT extends BaseLayer {
20
22
  readonly config: GPTConfig;
21
23
  private wte;
22
24
  private wpe?;
@@ -34,6 +36,7 @@ export default class NanoGPT {
34
36
  setSkipMask(mask: boolean[]): void;
35
37
  setTrainableMask(mask: boolean[]): void;
36
38
  set trainable(value: boolean);
39
+ setProfiler(value: MemoryProfiler | undefined): void;
37
40
  private validateInput;
38
41
  private calculateLoss;
39
42
  private computeAttentionRollout;
@@ -1,11 +1,12 @@
1
- import { defaultConfig as $ } from "./config.js";
1
+ import { defaultConfig as v } from "./config.js";
2
2
  import z from "./layers/TransformerBlock.js";
3
3
  import S from "./layers/TiedEmbedding.js";
4
- import I from "./layers/RoPECache.js";
5
- import _ from "./layers/RMSNorm.js";
6
- import { estimateParameterCount as W } from "./utilities/parameters.js";
7
- import { createSoftmaxCrossEntropyWithGrad as C } from "./training/sparseCrossEntropy.js";
8
- class K {
4
+ import _ from "./layers/RoPECache.js";
5
+ import I from "./layers/RMSNorm.js";
6
+ import { estimateParameterCount as F } from "./utilities/parameters.js";
7
+ import { createSoftmaxCrossEntropyWithGrad as L } from "./training/sparseCrossEntropy.js";
8
+ import P from "./layers/BaseLayer.js";
9
+ class A extends P {
9
10
  config;
10
11
  wte;
11
12
  // Token embeddings
@@ -21,7 +22,7 @@ class K {
21
22
  log = [];
22
23
  // Training log
23
24
  constructor(t, e = {}) {
24
- this.tf = t, this.config = { ...$, ...e }, this.wte = new S(t, {
25
+ super(), this.tf = t, this.config = { ...v, ...e }, this.wte = new S(t, {
25
26
  vocabSize: this.config.vocabSize,
26
27
  embedDim: this.config.nEmbed,
27
28
  name: "token_embedding"
@@ -30,10 +31,10 @@ class K {
30
31
  outputDim: this.config.nEmbed,
31
32
  name: "positional_embedding",
32
33
  embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
33
- }) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
34
+ }) : this.ropeCache = new _(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
34
35
  for (let o = 0; o < this.config.nLayer; o++)
35
36
  this.blocks.push(new z(this.tf, o, this.config, this.ropeCache));
36
- this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
37
+ this.lnF = new I(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
37
38
  }
38
39
  get variables() {
39
40
  return [
@@ -86,6 +87,12 @@ class K {
86
87
  e.trainable = t;
87
88
  this.lnF.trainable = t;
88
89
  }
90
+ setProfiler(t) {
91
+ this._profiler = t;
92
+ for (const e of this.blocks)
93
+ e.setProfiler(t);
94
+ this.lnF.setProfiler(t);
95
+ }
89
96
  validateInput(t) {
90
97
  if (t.shape.length !== 2)
91
98
  throw new Error(`Invalid input shape: expected [batch_size, sequence_length], got ${t.shape}`);
@@ -96,7 +103,7 @@ class K {
96
103
  }
97
104
  calculateLoss(t, e) {
98
105
  try {
99
- return C()(t, e).mean();
106
+ return L()(t, e).mean();
100
107
  } catch (o) {
101
108
  throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
102
109
  }
@@ -139,24 +146,25 @@ class K {
139
146
  }
140
147
  forward(t, e, o = !1, i = !1, s) {
141
148
  return this.validateInput(t), this.tf.tidy(() => {
149
+ this.startMemory();
142
150
  const l = s?.[0]?.length ?? 0;
143
151
  let r = this.inputPhase(t, l, o);
144
152
  const n = [];
145
153
  if (s && s.length !== this.blocks.length)
146
154
  throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
147
155
  for (let a = 0; a < this.blocks.length; a++) {
148
- const d = this.blocks[a], {
149
- output: g,
150
- attention: u,
156
+ const d = r, g = this.blocks[a], {
157
+ output: m,
158
+ attention: b,
151
159
  cache: f
152
- } = d.call(r, o, i, s ? s[a] : void 0);
153
- r = g, i && u && n.push(u), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
160
+ } = g.call(r, o, i, s ? s[a] : void 0);
161
+ r = m, d.dispose(), i && b && n.push(b), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
154
162
  }
155
163
  let h;
156
164
  i && n.length > 0 && (h = this.computeAttentionRollout(n)), r = this.lnF.apply(r);
157
165
  const c = this.wte.project(r);
158
166
  let p;
159
- return e && (p = this.calculateLoss(c, e)), { logits: c, loss: p, attention: i ? h : void 0 };
167
+ return e && (p = this.calculateLoss(c, e)), this.endMemory("Forward"), { logits: c, loss: p, attention: i ? h : void 0 };
160
168
  });
161
169
  }
162
170
  generate(t, e, o) {
@@ -168,24 +176,24 @@ class K {
168
176
  ), p = l ? this.config.blockSize - c.shape[1] : 0, a = p > 0 ? this.tf.pad(c, [
169
177
  [0, 0],
170
178
  [0, p]
171
- ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), u = d.shape[1] - 1 - p, f = d.slice([0, u, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = f.div(i);
172
- let m;
179
+ ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), m = d.shape[1] - 1 - p, b = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), f = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, k = b.div(i);
180
+ let u;
173
181
  if (s) {
174
- const { values: v, indices: y } = this.tf.topk(b, s), E = this.tf.multinomial(v.squeeze([1]), 1);
175
- m = this.tf.gather(y.squeeze([1]), E, 1);
182
+ const { values: y, indices: E } = this.tf.topk(k, s), $ = this.tf.multinomial(y.squeeze([1]), 1);
183
+ u = this.tf.gather(E.squeeze([1]), $, 1);
176
184
  } else
177
- m = this.tf.multinomial(b.squeeze([1]), 1);
178
- let k;
179
- return o?.includeProbabilities && (k = this.tf.softmax(b.squeeze([1]))), m = m.reshape([1, 1]), { output: m, attention: w?.squeeze([1]), probabilities: k };
185
+ u = this.tf.multinomial(k.squeeze([1]), 1);
186
+ let w;
187
+ return o?.includeProbabilities && (w = this.tf.softmax(k.squeeze([1]))), u = u.reshape([1, 1]), { output: u, attention: f?.squeeze([1]), probabilities: w };
180
188
  });
181
189
  }
182
190
  getNumParams() {
183
- return W(this.config);
191
+ return F(this.config);
184
192
  }
185
193
  dispose() {
186
194
  this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
187
195
  }
188
196
  }
189
197
  export {
190
- K as default
198
+ A as default
191
199
  };
@@ -6,6 +6,7 @@ import { SaveOptions } from './utilities/save';
6
6
  import { default as Generator, IGenerateOptions } from './Generator';
7
7
  import { default as Trainer, ITrainerOptions } from './Trainer';
8
8
  import { default as EE } from 'eventemitter3';
9
+ import { default as MemoryProfiler } from './utilities/profile';
9
10
  type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
10
11
  export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
11
12
  private _config?;
@@ -23,6 +24,9 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
23
24
  saveModel(options?: SaveOptions): Promise<Blob>;
24
25
  static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
25
26
  static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
27
+ getProfiler(): MemoryProfiler | undefined;
28
+ get enableProfiler(): boolean;
29
+ set enableProfiler(value: boolean);
26
30
  getNumParams(): number;
27
31
  trainer(): Trainer;
28
32
  train(text: string[], options?: ITrainerOptions): Promise<void>;
@@ -1,11 +1,11 @@
1
- import { defaultConfig as d } from "./config.js";
1
+ import { defaultConfig as h } from "./config.js";
2
2
  import m from "./NanoGPTModel.js";
3
- import { saveModel as u } from "./utilities/save.js";
4
- import { loadModel as l } from "./utilities/load.js";
5
- import f from "./Generator.js";
3
+ import { saveModel as d } from "./utilities/save.js";
4
+ import { loadModel as f } from "./utilities/load.js";
5
+ import u from "./Generator.js";
6
6
  import _ from "./Trainer.js";
7
7
  import { E as c } from "./index-Dwqa6Zy2.js";
8
- import { dummyPassAsync as h } from "./utilities/dummy.js";
8
+ import { dummyPassAsync as l } from "./utilities/dummy.js";
9
9
  import g from "./tokeniser/CharTokeniser.js";
10
10
  import "./papaparse.min-C8l2Kvo1.js";
11
11
  import "./index-Tf7vU29b.js";
@@ -13,6 +13,9 @@ import "./jszip.min-CjP2V1VV.js";
13
13
  import "./ops/scatterSub.js";
14
14
  import "./ops/gatherSub.js";
15
15
  import "./ops/attentionMask.js";
16
+ import "./ops/qkv.js";
17
+ import "./ops/rope.js";
18
+ import p from "./utilities/profile.js";
16
19
  class a extends c {
17
20
  _config;
18
21
  _model;
@@ -49,23 +52,23 @@ class a extends c {
49
52
  saveModel(t) {
50
53
  if (!this._model || !this._tokeniser)
51
54
  throw new Error("Model or tokeniser is not initialized.");
52
- return u(this._model, this._tokeniser, t);
55
+ return d(this._model, this._tokeniser, t);
53
56
  }
54
57
  static loadModel(t, r) {
55
58
  const e = new a(t);
56
- return l(t, r).then(({ model: s, tokeniser: o }) => {
57
- e._model = s, e._tokeniser = o, e._config = s.config, e.setStatus("warmup"), h(s).then(() => {
59
+ return f(t, r).then(({ model: o, tokeniser: s }) => {
60
+ e._model = o, e._tokeniser = s, e._config = o.config, e.setStatus("warmup"), l(o).then(() => {
58
61
  e.setStatus("ready");
59
62
  }).catch((i) => {
60
63
  e.setStatus("error"), e.emit("error", i);
61
64
  });
62
- }).catch((s) => {
63
- e.setStatus("error"), e.emit("error", s);
65
+ }).catch((o) => {
66
+ e.setStatus("error"), e.emit("error", o);
64
67
  }), e;
65
68
  }
66
69
  static create(t, r = {}) {
67
- const e = { ...d, ...r }, s = new g(e.vocabSize), o = new m(t, e), i = new a(t, s, o);
68
- return i.setStatus("warmup"), h(o).then(() => {
70
+ const e = { ...h, ...r }, o = new g(e.vocabSize), s = new m(t, e), i = new a(t, o, s);
71
+ return i.setStatus("warmup"), l(s).then(() => {
69
72
  i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
70
73
  n === "trained" && i.setStatus("ready");
71
74
  }));
@@ -73,6 +76,20 @@ class a extends c {
73
76
  i.setStatus("error"), i.emit("error", n);
74
77
  }), i;
75
78
  }
79
+ getProfiler() {
80
+ return this._model?.getProfiler();
81
+ }
82
+ get enableProfiler() {
83
+ return !!this._model?.getProfiler();
84
+ }
85
+ set enableProfiler(t) {
86
+ if (t) {
87
+ if (!this._model)
88
+ throw new Error("Model is not initialized.");
89
+ this._model.getProfiler() || this._model.setProfiler(new p());
90
+ } else
91
+ this._model && this._model.setProfiler(void 0);
92
+ }
76
93
  getNumParams() {
77
94
  if (!this._model)
78
95
  throw new Error("Model is not initialized.");
@@ -84,8 +101,8 @@ class a extends c {
84
101
  const t = new _(this._model, this._tokeniser);
85
102
  return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
86
103
  const e = this.listeners("trainStep");
87
- for (const s of e)
88
- await s(r);
104
+ for (const o of e)
105
+ await o(r);
89
106
  }), t;
90
107
  }
91
108
  train(t, r) {
@@ -94,7 +111,7 @@ class a extends c {
94
111
  generator() {
95
112
  if (!this._model || !this._tokeniser)
96
113
  throw new Error("Model or tokeniser is not initialized.");
97
- const t = new f(this._model, this._tokeniser);
114
+ const t = new u(this._model, this._tokeniser);
98
115
  return t.on("start", () => {
99
116
  this.status === "ready" && this.setStatus("busy");
100
117
  }), t.on("stop", () => {
@@ -1,4 +1,4 @@
1
- import { o as t, c as s, f as n, E as m, C as r } from "./index-Dsg28SG6.js";
1
+ import { o as c, d as s, g as n, E as m, C as r } from "./index-YPKosni4.js";
2
2
  /**
3
3
  * @license
4
4
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,13 +15,13 @@ import { o as t, c as s, f as n, E as m, C as r } from "./index-Dsg28SG6.js";
15
15
  * limitations under the License.
16
16
  * =============================================================================
17
17
  */
18
- function l(o, c) {
19
- const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
18
+ function l(o, p) {
19
+ const a = s(o, "real", "complex"), e = s(p, "imag", "complex");
20
20
  n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
21
- const p = { real: a, imag: e };
22
- return m.runKernel(r, p);
21
+ const t = { real: a, imag: e };
22
+ return m.runKernel(r, t);
23
23
  }
24
- const i = /* @__PURE__ */ t({ complex_: l });
24
+ const i = /* @__PURE__ */ c({ complex_: l });
25
25
  export {
26
26
  i as c
27
27
  };
@@ -383,7 +383,7 @@ function _t(n, t) {
383
383
  return e.set(n, s), e.get(n);
384
384
  }
385
385
  }
386
- const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", We = "RealDiv", Bs = "Elu", vs = "Exp", je = "Fill", Ke = "FloorDiv", Ms = "GatherNd", re = "Identity", Fs = "Imag", $s = "LeakyRelu", Rs = "Log", xs = "Max", Ve = "Maximum", qe = "Multiply", Ns = "Neg", Ds = "Pack", He = "Pow", Cs = "Prelu", _s = "Range", Ps = "Real", Os = "Relu", Ls = "Reshape", Us = "Relu6", Gs = "ScatterNd", zs = "Sigmoid", Je = "Sqrt", Ws = "Sum", js = "Softmax", Xe = "Sub", Ks = "Transpose", Ye = "ZerosLike", Vs = "Step", qs = "_FusedMatMul";
386
+ const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", Bs = "Concat", We = "RealDiv", vs = "Elu", Ms = "Exp", je = "Fill", Ke = "FloorDiv", Fs = "GatherV2", $s = "GatherNd", re = "Identity", Rs = "Imag", xs = "LeakyRelu", Ns = "Log", Ds = "Max", Ve = "Maximum", qe = "Multiply", Cs = "Neg", _s = "Pack", He = "Pow", Ps = "Prelu", Os = "Range", Ls = "Real", Us = "Relu", Gs = "Reshape", zs = "Relu6", Ws = "ScatterNd", js = "Sigmoid", Je = "Sqrt", Ks = "Sum", Vs = "SplitV", qs = "Softmax", Xe = "Sub", Hs = "Transpose", Ye = "ZerosLike", Js = "Step", Xs = "_FusedMatMul";
387
387
  /**
388
388
  * @license
389
389
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -438,11 +438,11 @@ function Wt(n) {
438
438
  }
439
439
  return e;
440
440
  }
441
- function Hs(n) {
441
+ function Ys(n) {
442
442
  const { kernelName: t, backendName: e } = n, s = ie(t, e);
443
443
  ht.has(s) && O(`The kernel '${t}' for backend '${e}' is already registered`), ht.set(s, n);
444
444
  }
445
- function Js(n) {
445
+ function Qs(n) {
446
446
  const { kernelName: t } = n;
447
447
  It.has(t) && S().getBool("DEBUG") && O(`Overriding the gradient for '${t}'`), It.set(t, n);
448
448
  }
@@ -1902,7 +1902,7 @@ function I(n, t, e, s = "numeric") {
1902
1902
  const a = r !== "string" ? ae(n, r) : at(n, [], !0);
1903
1903
  return g.makeTensor(a, i, r);
1904
1904
  }
1905
- function Xs(n, t, e, s = "numeric") {
1905
+ function Zs(n, t, e, s = "numeric") {
1906
1906
  if (!Array.isArray(n))
1907
1907
  throw new Error(`Argument ${t} passed to ${e} must be a \`Tensor[]\` or \`TensorLike[]\``);
1908
1908
  return n.map((i, o) => I(i, `${t}[${o}]`, e, s));
@@ -2065,9 +2065,12 @@ function Sn(n, t) {
2065
2065
  * limitations under the License.
2066
2066
  * =============================================================================
2067
2067
  */
2068
- function Ys() {
2068
+ function tr() {
2069
2069
  return g;
2070
2070
  }
2071
+ function er() {
2072
+ return g.memory();
2073
+ }
2071
2074
  function E(n, t) {
2072
2075
  return g.tidy(n, t);
2073
2076
  }
@@ -2890,7 +2893,7 @@ function Yn(n, t, e) {
2890
2893
  * limitations under the License.
2891
2894
  * =============================================================================
2892
2895
  */
2893
- function Qs(n, t) {
2896
+ function nr(n, t) {
2894
2897
  const e = [];
2895
2898
  for (let s = 0; s < t.length; s++) {
2896
2899
  const r = n[n.length - s - 1], i = t.length - s - 1, o = t[i];
@@ -3058,7 +3061,7 @@ function ss(n, t) {
3058
3061
  a[u] != null && (c[l.name] = a[u]);
3059
3062
  }), s?.forEach((l) => c[l.name] = null), { value: o, grads: c };
3060
3063
  }
3061
- function Zs(n) {
3064
+ function sr(n) {
3062
3065
  return g.customGrad(n);
3063
3066
  }
3064
3067
  /**
@@ -3838,54 +3841,59 @@ function bs() {
3838
3841
  */
3839
3842
  bs();
3840
3843
  export {
3844
+ Qn as $,
3841
3845
  ds as A,
3842
3846
  Es as B,
3843
3847
  As as C,
3844
- zs as D,
3848
+ w as D,
3845
3849
  g as E,
3846
- Bs as F,
3847
- Ms as G,
3848
- $s as H,
3849
- Fs as I,
3850
- Cs as J,
3851
- Ps as K,
3852
- Rs as L,
3853
- xs as M,
3854
- Ns as N,
3855
- Os as O,
3856
- Ds as P,
3857
- Us as Q,
3858
- _s as R,
3859
- Ws as S,
3860
- Vs as T,
3861
- Ks as U,
3862
- Qs as V,
3863
- Qn as W,
3864
- qs as _,
3865
- Z as a,
3866
- Js as b,
3867
- I as c,
3868
- V as d,
3869
- Ys as e,
3870
- Is as f,
3871
- Xs as g,
3872
- y as h,
3873
- Ls as i,
3874
- $t as j,
3875
- Dt as k,
3876
- Zt as l,
3877
- p as m,
3878
- G as n,
3850
+ qs as F,
3851
+ $s as G,
3852
+ sr as H,
3853
+ E as I,
3854
+ C as J,
3855
+ js as K,
3856
+ Ns as L,
3857
+ Ds as M,
3858
+ vs as N,
3859
+ Rs as O,
3860
+ _s as P,
3861
+ xs as Q,
3862
+ Gs as R,
3863
+ Ks as S,
3864
+ Cs as T,
3865
+ Ps as U,
3866
+ Ls as V,
3867
+ Us as W,
3868
+ zs as X,
3869
+ Js as Y,
3870
+ Hs as Z,
3871
+ nr as _,
3872
+ p as a,
3873
+ Xs as a0,
3874
+ Z as b,
3875
+ Qs as c,
3876
+ I as d,
3877
+ tr as e,
3878
+ V as f,
3879
+ Is as g,
3880
+ $t as h,
3881
+ Vs as i,
3882
+ Os as j,
3883
+ Zs as k,
3884
+ y as l,
3885
+ er as m,
3886
+ Gn as n,
3879
3887
  F as o,
3880
- De as p,
3881
- Gs as q,
3882
- Hs as r,
3888
+ Bs as p,
3889
+ Fs as q,
3890
+ Ys as r,
3883
3891
  K as s,
3884
- vs as t,
3885
- Ts as u,
3886
- w as v,
3887
- js as w,
3888
- Zs as x,
3889
- E as y,
3890
- C as z
3892
+ Dt as t,
3893
+ Zt as u,
3894
+ G as v,
3895
+ De as w,
3896
+ Ws as x,
3897
+ Ms as y,
3898
+ Ts as z
3891
3899
  };
@@ -0,0 +1,8 @@
1
+ import { default as MemoryProfiler } from '../utilities/profile';
2
+ export default abstract class BaseLayer {
3
+ protected _profiler?: MemoryProfiler;
4
+ getProfiler(): MemoryProfiler | undefined;
5
+ setProfiler(value: MemoryProfiler | undefined): void;
6
+ startMemory(): void;
7
+ endMemory(label: string): void;
8
+ }
@@ -0,0 +1,18 @@
1
+ class t {
2
+ _profiler;
3
+ getProfiler() {
4
+ return this._profiler;
5
+ }
6
+ setProfiler(r) {
7
+ this._profiler = r;
8
+ }
9
+ startMemory() {
10
+ this._profiler?.startMemory();
11
+ }
12
+ endMemory(r) {
13
+ this._profiler?.endMemory(r);
14
+ }
15
+ }
16
+ export {
17
+ t as default
18
+ };
@@ -1,13 +1,14 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from '../config';
3
3
  import { default as RoPECache } from './RoPECache';
4
+ import { default as BaseLayer } from './BaseLayer';
4
5
  export type KVCache = {
5
6
  k: TF.Tensor;
6
7
  v: TF.Tensor;
7
8
  length: number;
8
9
  cumulativeLength: number;
9
10
  };
10
- export default class CausalSelfAttention {
11
+ export default class CausalSelfAttention extends BaseLayer {
11
12
  private readonly ropeCache?;
12
13
  private config;
13
14
  private cAttn;
@@ -20,7 +21,9 @@ export default class CausalSelfAttention {
20
21
  private divisor;
21
22
  private index;
22
23
  private _trainable;
24
+ private units;
23
25
  constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache | undefined);
26
+ private build;
24
27
  get variables(): TF.Variable[];
25
28
  get trainable(): boolean;
26
29
  set trainable(value: boolean);