@genai-fi/nanogpt 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ export interface IGenerateOptions {
7
7
  topK?: number;
8
8
  usePadding?: boolean;
9
9
  includeAttention?: boolean;
10
+ includeProbabilities?: boolean;
10
11
  }
11
12
  export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
12
13
  private readonly model;
package/dist/Generator.js CHANGED
@@ -1,53 +1,62 @@
1
1
  import { E as m } from "./index-SOhdqzHq.js";
2
- const g = 4;
3
- class w extends m {
4
- constructor(o, t) {
5
- super(), this.model = o, this.tokeniser = t;
2
+ const b = 4;
3
+ class x extends m {
4
+ constructor(a, t) {
5
+ super(), this.model = a, this.tokeniser = t;
6
6
  }
7
- generateBlockOfTokens(o, t) {
8
- const c = t?.temperature ?? 1, a = t?.topK, r = t?.usePadding ?? t?.includeAttention ?? !1, d = t?.includeAttention ?? !1;
9
- let s = o, n;
10
- for (let l = 0; l < g; l++) {
11
- const { output: e, attention: i } = this.model.generate(s, {
12
- temperature: c,
13
- topK: a,
14
- usePadding: r,
15
- includeAttention: d
16
- }), h = s;
17
- if (s = this.model.tf.concat([s, e], 1), n && i) {
18
- const u = n;
19
- n = this.model.tf.concat([n, i], 0), u.dispose();
20
- } else i && (n = i);
21
- h.dispose(), e.dispose();
7
+ generateBlockOfTokens(a, t) {
8
+ const g = t?.temperature ?? 1, c = t?.topK, d = t?.usePadding ?? t?.includeAttention ?? !1, k = t?.includeAttention ?? !1, h = t?.includeProbabilities ?? !1;
9
+ let i = a, n, s;
10
+ for (let e = 0; e < b; e++) {
11
+ const {
12
+ output: u,
13
+ attention: l,
14
+ probabilities: r
15
+ } = this.model.generate(i, {
16
+ temperature: g,
17
+ topK: c,
18
+ usePadding: d,
19
+ includeAttention: k,
20
+ includeProbabilities: h
21
+ }), p = i;
22
+ if (i = this.model.tf.concat([i, u], 1), n && l) {
23
+ const o = n;
24
+ n = this.model.tf.concat([n, l], 0), o.dispose();
25
+ } else l && (n = l);
26
+ if (s && r) {
27
+ const o = s;
28
+ s = this.model.tf.concat([s, r], 0), o.dispose();
29
+ } else r && (s = r);
30
+ p.dispose(), u.dispose();
22
31
  }
23
- return { output: s, attention: n };
32
+ return { output: i, attention: n, probabilities: s };
24
33
  }
25
- async generate(o, t) {
26
- const c = o ? await this.tokeniser.tokenise([o], !0) : [[this.tokeniser.eosToken]];
27
- let a = this.model.tf.tensor2d(c, [1, c[0].length], "int32");
34
+ async generate(a, t) {
35
+ const g = a ? await this.tokeniser.tokenise([a], !0) : [[this.tokeniser.eosToken]];
36
+ let c = this.model.tf.tensor2d(g, [1, g[0].length], "int32");
28
37
  this.emit("start");
29
- let r = o || "";
38
+ let d = a || "";
30
39
  for (; ; ) {
31
- const { output: d, attention: s } = this.generateBlockOfTokens(a, t), n = a;
32
- a = d;
33
- const l = d.slice([0, n.shape[1]], [1, g]), e = (await l.array())[0];
34
- let i = !1, h = !1;
35
- const u = e.indexOf(this.tokeniser.eosToken);
36
- u !== -1 && (i = !0, e.splice(u)), e.length + r.length >= (t?.maxLength ?? 1e3) && (h = !0, e.splice(
37
- t?.maxLength ? t.maxLength - r.length : e.length
40
+ const { output: k, attention: h, probabilities: i } = this.generateBlockOfTokens(c, t), n = c;
41
+ c = k;
42
+ const s = k.slice([0, n.shape[1]], [1, b]), e = (await s.array())[0];
43
+ n.dispose(), s.dispose();
44
+ let u = !1, l = !1;
45
+ const r = e.indexOf(this.tokeniser.eosToken);
46
+ r !== -1 && (u = !0, e.splice(r)), e.length + d.length >= (t?.maxLength ?? 1e3) && (l = !0, e.splice(
47
+ t?.maxLength ? t.maxLength - d.length : e.length
38
48
  ));
39
- const k = await this.tokeniser.decode(e);
40
- if (r += k, s) {
41
- let f = await s.array();
42
- f.length > e.length && (f = f.slice(0, e.length)), this.emit("tokens", e, k, f);
43
- } else
44
- this.emit("tokens", e, k);
45
- if (n.dispose(), l.dispose(), i || h)
49
+ const p = await this.tokeniser.decode(e);
50
+ d += p;
51
+ let o;
52
+ h && (o = await h.array(), h.dispose(), o.length > e.length && (o = o.slice(0, e.length)));
53
+ let f;
54
+ if (i && (f = await i.array(), i.dispose(), f.length > e.length && (f = f.slice(0, e.length))), this.emit("tokens", e, p, o, f), u || l)
46
55
  break;
47
56
  }
48
- return a.dispose(), this.emit("stop"), r;
57
+ return c.dispose(), this.emit("stop"), d;
49
58
  }
50
59
  }
51
60
  export {
52
- w as default
61
+ x as default
53
62
  };
@@ -13,6 +13,7 @@ export interface GenerateOptions {
13
13
  topK?: number;
14
14
  usePadding?: boolean;
15
15
  includeAttention?: boolean;
16
+ includeProbabilities?: boolean;
16
17
  }
17
18
  export default class NanoGPT {
18
19
  readonly config: GPTConfig;
@@ -42,6 +43,7 @@ export default class NanoGPT {
42
43
  generate(idx: TF.Tensor, options?: GenerateOptions): {
43
44
  output: TF.Tensor;
44
45
  attention?: TF.Tensor;
46
+ probabilities?: TF.Tensor;
45
47
  };
46
48
  getNumParams(): number;
47
49
  }
@@ -1,7 +1,7 @@
1
- import { defaultConfig as y } from "./config.js";
2
- import z from "./layers/TransformerBlock.js";
3
- import v from "./layers/TiedEmbedding.js";
4
- import S from "./layers/LayerNorm.js";
1
+ import { defaultConfig as z } from "./config.js";
2
+ import v from "./layers/TransformerBlock.js";
3
+ import S from "./layers/TiedEmbedding.js";
4
+ import _ from "./layers/LayerNorm.js";
5
5
  class $ {
6
6
  config;
7
7
  wte;
@@ -17,7 +17,7 @@ class $ {
17
17
  log = [];
18
18
  // Training log
19
19
  constructor(t, e = {}) {
20
- this.tf = t, this.config = { ...y, ...e }, this.wte = new v(t, {
20
+ this.tf = t, this.config = { ...z, ...e }, this.wte = new S(t, {
21
21
  vocabSize: this.config.vocabSize,
22
22
  embedDim: this.config.nEmbed,
23
23
  name: "token_embedding"
@@ -28,8 +28,8 @@ class $ {
28
28
  embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
29
29
  }), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
30
30
  for (let s = 0; s < this.config.nLayer; s++)
31
- this.blocks.push(new z(this.tf, s, this.config));
32
- this.lnF = new S(t, [this.config.nEmbed], 1e-5, "final_layer_norm");
31
+ this.blocks.push(new v(this.tf, s, this.config));
32
+ this.lnF = new _(t, [this.config.nEmbed], 1e-5, "final_layer_norm");
33
33
  }
34
34
  get variables() {
35
35
  return [
@@ -54,7 +54,7 @@ class $ {
54
54
  }
55
55
  inputPhase(t, e = !1) {
56
56
  return this.tf.tidy(() => {
57
- const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), h = this.wpe.apply(n), o = i.add(h);
57
+ const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), a = this.wpe.apply(n), o = i.add(a);
58
58
  return this.drop.apply(o, { training: e });
59
59
  });
60
60
  }
@@ -98,8 +98,8 @@ class $ {
98
98
  throw new Error("No attentions for rollout");
99
99
  const e = t[0].shape[0], s = t[0].shape[1], i = this.tf.eye(s, s).expandDims(0);
100
100
  let n = i.tile([e, 1, 1]);
101
- for (const h of t) {
102
- let o = h.add(i);
101
+ for (const a of t) {
102
+ let o = a.add(i);
103
103
  o = o.div(o.sum(-1, !0)), n = o.matMul(n);
104
104
  }
105
105
  return n;
@@ -108,35 +108,36 @@ class $ {
108
108
  forward(t, e, s = !1, i = !1) {
109
109
  return this.validateInput(t), this.tf.tidy(() => {
110
110
  let n = this.inputPhase(t, s);
111
- const h = [];
111
+ const a = [];
112
112
  for (const c of this.blocks) {
113
- const { output: p, attention: a } = c.call(n, s, i);
114
- n = p, i && a && h.push(a);
113
+ const { output: p, attention: l } = c.call(n, s, i);
114
+ n = p, i && l && a.push(l);
115
115
  }
116
116
  let o;
117
- i && h.length > 0 && (o = this.computeAttentionRollout(h)), n = this.lnF.apply(n);
118
- const l = this.wte.project(n);
117
+ i && a.length > 0 && (o = this.computeAttentionRollout(a)), n = this.lnF.apply(n);
118
+ const h = this.wte.project(n);
119
119
  let r;
120
- return e && (r = this.calculateLoss(l, e)), { logits: l, loss: r, attention: i ? o : void 0 };
120
+ return e && (r = this.calculateLoss(h, e)), { logits: h, loss: r, attention: i ? o : void 0 };
121
121
  });
122
122
  }
123
123
  generate(t, e) {
124
- const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, h = e?.includeAttention ?? !1;
124
+ const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, a = e?.includeAttention ?? !1;
125
125
  return this.tf.tidy(() => {
126
- const o = t, l = o.shape[1], r = l <= this.config.blockSize ? o : o.slice(
127
- [0, l - this.config.blockSize],
126
+ const o = t, h = o.shape[1], r = h <= this.config.blockSize ? o : o.slice(
127
+ [0, h - this.config.blockSize],
128
128
  [o.shape[0], this.config.blockSize]
129
129
  ), c = n ? this.config.blockSize - r.shape[1] : 0, p = c > 0 ? this.tf.pad(r, [
130
130
  [0, 0],
131
131
  [0, c]
132
- ]) : r, { logits: a, attention: g } = this.forward(p, void 0, !1, h), d = a.shape[1] - 1 - c, m = a.slice([0, d, 0], [a.shape[0], 1, a.shape[2]]), u = g ? g.slice([0, d, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = m.div(s);
132
+ ]) : r, { logits: l, attention: g } = this.forward(p, void 0, !1, a), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = g ? g.slice([0, b, 0], [g.shape[0], 1, g.shape[2]]) : void 0, d = u.div(s);
133
133
  let f;
134
134
  if (i) {
135
- const { values: k, indices: w } = this.tf.topk(b, i), E = this.tf.multinomial(k.squeeze([1]), 1);
136
- f = this.tf.gather(w.squeeze([1]), E, 1);
135
+ const { values: w, indices: E } = this.tf.topk(d, i), y = this.tf.multinomial(w.squeeze([1]), 1);
136
+ f = this.tf.gather(E.squeeze([1]), y, 1);
137
137
  } else
138
- f = this.tf.multinomial(b.squeeze([1]), 1);
139
- return f = f.reshape([1, 1]), { output: f, attention: u?.squeeze([1]) };
138
+ f = this.tf.multinomial(d.squeeze([1]), 1);
139
+ let m;
140
+ return e?.includeProbabilities && (m = this.tf.softmax(d.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
140
141
  });
141
142
  }
142
143
  getNumParams() {
package/dist/config.d.ts CHANGED
@@ -7,5 +7,6 @@ export interface GPTConfig {
7
7
  dropout: number;
8
8
  biasInLinear: boolean;
9
9
  biasInLayerNorm: boolean;
10
+ mlpFactor: number;
10
11
  }
11
12
  export declare const defaultConfig: GPTConfig;
package/dist/config.js CHANGED
@@ -1,4 +1,4 @@
1
- const e = {
1
+ const a = {
2
2
  vocabSize: 50304,
3
3
  // GPT-2 vocab size
4
4
  blockSize: 1024,
@@ -12,8 +12,9 @@ const e = {
12
12
  dropout: 0,
13
13
  // Dropout probability
14
14
  biasInLinear: !1,
15
- biasInLayerNorm: !1
15
+ biasInLayerNorm: !1,
16
+ mlpFactor: 4
16
17
  };
17
18
  export {
18
- e as defaultConfig
19
+ a as defaultConfig
19
20
  };
@@ -1,4 +1,4 @@
1
- class n {
1
+ class l {
2
2
  cFc;
3
3
  cProj;
4
4
  dropout;
@@ -7,7 +7,7 @@ class n {
7
7
  _trainable = !0;
8
8
  constructor(t, i, e) {
9
9
  this.tf = t, this.index = i, this.cFc = this.tf.layers.dense({
10
- units: 4 * e.nEmbed,
10
+ units: e.mlpFactor * e.nEmbed,
11
11
  activation: "gelu",
12
12
  useBias: e.biasInLinear,
13
13
  kernelInitializer: this.tf.initializers.randomNormal({
@@ -53,5 +53,5 @@ class n {
53
53
  }
54
54
  }
55
55
  export {
56
- n as default
56
+ l as default
57
57
  };
@@ -5,7 +5,7 @@ export declare class DatasetBuilder {
5
5
  blockSize: number;
6
6
  private tf;
7
7
  constructor(tf: typeof TF, tokenizer: ITokeniser, blockSize?: number);
8
- createTextDataset(textData: string[], batchSize?: number): Promise<TF.data.Dataset<{
8
+ createTextDataset(textData: string[], batchSize?: number, start?: number, end?: number): Promise<TF.data.Dataset<{
9
9
  xs: TF.Tensor;
10
10
  ys: TF.Tensor;
11
11
  }>>;
@@ -1,19 +1,22 @@
1
- class l {
1
+ class z {
2
2
  tokenizer;
3
3
  blockSize;
4
4
  tf;
5
- constructor(s, i, o = 128) {
6
- this.tokenizer = i, this.blockSize = o, this.tf = s;
5
+ constructor(s, o, i = 128) {
6
+ this.tokenizer = o, this.blockSize = i, this.tf = s;
7
7
  }
8
8
  // Create dataset from text files
9
- async createTextDataset(s, i = 32) {
10
- const o = await Promise.all(s.map((t) => this.tokenizer.encode(t))), a = this.tokenizer.eosToken >= 0, n = o.map((t) => a ? [...t, this.tokenizer.eosToken] : t).flat(), c = (function* () {
9
+ async createTextDataset(s, o = 32, i = 0, c = 1) {
10
+ const n = await Promise.all(s.map((t) => this.tokenizer.encode(t))), h = this.tokenizer.eosToken >= 0, a = n.map((t) => h ? [...t, this.tokenizer.eosToken] : t).flat().slice(
11
+ Math.floor(i * n.length),
12
+ c === 1 ? void 0 : Math.floor(c * n.length)
13
+ ), r = (function* () {
11
14
  for (; ; ) {
12
- const t = Math.floor(Math.random() * (n.length - this.blockSize - 1)), e = n.slice(t, t + this.blockSize), r = n.slice(t + 1, t + this.blockSize + 1);
13
- yield { xs: e, ys: r };
15
+ const t = Math.floor(Math.random() * (a.length - this.blockSize - 1)), e = a.slice(t, t + this.blockSize), l = a.slice(t + 1, t + this.blockSize + 1);
16
+ yield { xs: e, ys: l };
14
17
  }
15
18
  }).bind(this);
16
- return this.tf.data.generator(c).batch(i).map((t) => {
19
+ return this.tf.data.generator(r).batch(o).map((t) => {
17
20
  const e = t;
18
21
  return this.tf.tidy(() => ({
19
22
  xs: e.xs.cast("int32"),
@@ -23,5 +26,5 @@ class l {
23
26
  }
24
27
  }
25
28
  export {
26
- l as DatasetBuilder
29
+ z as DatasetBuilder
27
30
  };
@@ -0,0 +1,8 @@
1
+ import { default as NanoGPT } from '../NanoGPTModel';
2
+ import { default as TF } from '@tensorflow/tfjs';
3
+ export default class Evaluator {
4
+ private model;
5
+ private iterator;
6
+ constructor(model: NanoGPT, dataset: TF.data.Dataset<TF.TensorContainer>);
7
+ evaluate(maxBatches?: number): Promise<number>;
8
+ }
@@ -0,0 +1,22 @@
1
+ class p {
2
+ constructor(s, t) {
3
+ this.model = s, this.iterator = t.iterator();
4
+ }
5
+ iterator;
6
+ async evaluate(s = 100) {
7
+ let t = 0, o = 0;
8
+ const c = await this.iterator;
9
+ for (let a = 0; a < s; a++) {
10
+ const e = await c.next();
11
+ if (e.done) break;
12
+ const n = e.value, { xs: r, ys: l } = n, { loss: i, logits: u } = this.model.forward(r, l, !1, !1);
13
+ u.dispose(), r.dispose(), l.dispose();
14
+ const d = i.arraySync();
15
+ i.dispose(), t += d, o++;
16
+ }
17
+ return t / o;
18
+ }
19
+ }
20
+ export {
21
+ p as default
22
+ };
@@ -1,18 +1,19 @@
1
1
  import { generateText as L } from "../utilities/generate.js";
2
2
  import w from "./Trainer.js";
3
- const g = {
3
+ import g from "./Evaluator.js";
4
+ const x = {
4
5
  desiredLoss: 0.01,
5
6
  logInterval: 1,
6
7
  maxSteps: 1e3
7
8
  };
8
- class S extends w {
9
+ class D extends w {
9
10
  constructor(r, i, o, n = 3e-4) {
10
11
  super(r, i, o, n);
11
12
  }
12
13
  // Train for multiple epochs using Dataset API - FIXED memory leaks
13
14
  async trainOnDataset(r, i, o) {
14
- const { desiredLoss: n, logInterval: c, onStep: l, prompt: p, maxSteps: d } = {
15
- ...g,
15
+ const { desiredLoss: n, logInterval: d, onStep: l, prompt: p, maxSteps: m } = {
16
+ ...x,
16
17
  ...i
17
18
  }, s = {
18
19
  pass: 0,
@@ -25,23 +26,23 @@ class S extends w {
25
26
  validationLosses: []
26
27
  };
27
28
  this.dummyPass(), this.model.trainable = !0;
28
- const m = Date.now();
29
+ const u = Date.now();
29
30
  this.running = !0;
30
- const u = await r.iterator();
31
+ const c = o ? new g(this.model, o) : void 0, f = await r.iterator();
31
32
  try {
32
33
  for (; this.running && !(s.lastLoss < n); ) {
33
- const e = await u.next();
34
+ const e = await f.next();
34
35
  if (e.done) break;
35
- const h = e.value, f = this.trainBatch(s, h), a = {
36
+ const h = e.value, v = this.trainBatch(s, h), a = {
36
37
  loss: s.lastLoss,
37
38
  step: s.step,
38
- time: Date.now() - m,
39
+ time: Date.now() - u,
39
40
  batchSize: h.xs.shape[0]
40
41
  };
41
- if (this.model.log.push(a), s.step % c === 0) {
42
- if (await f, o)
42
+ if (this.model.log.push(a), s.step % d === 0) {
43
+ if (await v, c)
43
44
  try {
44
- const t = await this.evaluateOnDataset(o, 5);
45
+ const t = await c.evaluate(5);
45
46
  s.validationLosses.push(t), a.valLoss = t;
46
47
  } catch (t) {
47
48
  console.error("Validation error:", t);
@@ -56,7 +57,7 @@ class S extends w {
56
57
  await l(a);
57
58
  }
58
59
  }
59
- s.step >= d && this.stop();
60
+ s.step >= m && this.stop();
60
61
  }
61
62
  } catch (e) {
62
63
  throw console.error("Training error:", e), this.tf.dispose(), e;
@@ -65,5 +66,5 @@ class S extends w {
65
66
  }
66
67
  }
67
68
  export {
68
- S as default
69
+ D as default
69
70
  };
@@ -1,32 +1,33 @@
1
- import { generateText as d } from "../utilities/generate.js";
2
- import S from "./Trainer.js";
3
- import { schedule as u } from "./lwSchedule.js";
4
- const w = {
1
+ import { generateText as S } from "../utilities/generate.js";
2
+ import u from "./Trainer.js";
3
+ import { schedule as v } from "./lwSchedule.js";
4
+ import w from "./Evaluator.js";
5
+ const T = {
5
6
  desiredLoss: 0.01,
6
7
  logInterval: 1,
7
8
  stepsPerLayer: 400,
8
9
  maxPasses: 3,
9
10
  maxSteps: 1e3
10
11
  };
11
- class b extends S {
12
+ class z extends u {
12
13
  trainingPattern = [];
13
14
  startPass = 0;
14
15
  startLayer = 0;
15
- constructor(r, a, e, p = 3e-4) {
16
- if (super(r, a, e, p), this.trainingPattern = u[a.config.nLayer - 1] || [], a.log.length > 0) {
17
- const i = a.log[a.log.length - 1];
16
+ constructor(r, s, e, p = 3e-4) {
17
+ if (super(r, s, e, p), this.trainingPattern = v[s.config.nLayer - 1] || [], s.log.length > 0) {
18
+ const i = s.log[s.log.length - 1];
18
19
  i.pass !== void 0 && i.layer !== void 0 && (this.startPass = i.pass, this.startLayer = i.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
19
20
  }
20
21
  }
21
22
  applyTrainingPattern(r) {
22
- const a = r < this.trainingPattern.length ? r : this.trainingPattern.length - 1, e = this.trainingPattern[a];
23
- this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", a, e);
23
+ const s = r < this.trainingPattern.length ? r : this.trainingPattern.length - 1, e = this.trainingPattern[s];
24
+ this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", s, e);
24
25
  }
25
26
  // Train for multiple epochs using Dataset API - FIXED memory leaks
26
- async trainOnDataset(r, a, e) {
27
- const { desiredLoss: p, logInterval: i, stepsPerLayer: L, onLayerChange: l, onPassComplete: h, onStep: c, prompt: g } = {
28
- ...w,
29
- ...a
27
+ async trainOnDataset(r, s, e) {
28
+ const { desiredLoss: p, logInterval: i, stepsPerLayer: L, onLayerChange: o, onPassComplete: h, onStep: c, prompt: g } = {
29
+ ...T,
30
+ ...s
30
31
  }, t = {
31
32
  pass: 0,
32
33
  layerStep: 0,
@@ -38,47 +39,44 @@ class b extends S {
38
39
  validationLosses: []
39
40
  };
40
41
  this.dummyPass();
41
- const m = Date.now();
42
+ const f = Date.now();
42
43
  this.startPass = 0, this.startLayer = 0;
43
- const f = await r.iterator();
44
+ const y = e ? new w(this.model, e) : void 0, d = await r.iterator();
44
45
  this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
45
46
  try {
46
47
  for (; !(t.lastLoss < p); ) {
47
- const n = await f.next();
48
+ const n = await d.next();
48
49
  if (n.done) break;
49
- const y = n.value, P = this.trainBatch(t, y);
50
+ const m = n.value, P = this.trainBatch(t, m);
50
51
  t.stepSinceLayerChange++;
51
- const o = {
52
+ const l = {
52
53
  loss: t.lastLoss,
53
54
  step: t.step,
54
- time: Date.now() - m,
55
- batchSize: y.xs.shape[0],
55
+ time: Date.now() - f,
56
+ batchSize: m.xs.shape[0],
56
57
  pass: t.pass,
57
58
  layer: t.layerStep % this.model.config.nLayer
58
59
  };
59
- if (this.model.log.push(o), t.step % i === 0) {
60
- if (await P, e)
60
+ if (this.model.log.push(l), t.step % i === 0) {
61
+ if (await P, y)
61
62
  try {
62
- const s = await this.evaluateOnDataset(e, 5);
63
- t.validationLosses.push(s), o.valLoss = s;
64
- } catch (s) {
65
- console.error("Validation error:", s);
63
+ const a = await y.evaluate(5);
64
+ t.validationLosses.push(a), l.valLoss = a;
65
+ } catch (a) {
66
+ console.error("Validation error:", a);
66
67
  }
67
68
  if (c) {
68
69
  if (g) {
69
- const s = await d(this.tokenizer, this.model, g, 100, {
70
+ const a = await S(this.tokenizer, this.model, g, 100, {
70
71
  temperature: 0.8,
71
72
  topK: 10
72
73
  });
73
- o.example = s;
74
+ l.example = a;
74
75
  }
75
- await c(o);
76
+ await c(l);
76
77
  }
77
78
  }
78
- if (t.stepSinceLayerChange >= L) {
79
- let s;
80
- e && (s = await this.evaluateOnDataset(e, 5), t.validationLosses.push(s), o.valLoss = s), t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (l && await l(t.layerStep, t.pass, s), h && await h(t.pass), t.pass++) : l && await l(t.layerStep, t.pass, s), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
81
- }
79
+ t.stepSinceLayerChange >= L && (t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (o && await o(t.layerStep, t.pass), h && await h(t.pass), t.pass++) : o && await o(t.layerStep, t.pass), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length));
82
80
  }
83
81
  } catch (n) {
84
82
  throw console.error("Training error:", n), this.tf.dispose(), n;
@@ -87,5 +85,5 @@ class b extends S {
87
85
  }
88
86
  }
89
87
  export {
90
- b as default
88
+ z as default
91
89
  };
@@ -56,7 +56,6 @@ export default abstract class GPTTrainer {
56
56
  losses: number[];
57
57
  validationLosses: number[];
58
58
  }>;
59
- evaluateOnDataset(dataset: TF.data.Dataset<TF.TensorContainer>, maxBatches?: number): Promise<number>;
60
59
  createTrainValidationSplit(textData: string[], batchSize?: number, validationSplit?: number): Promise<{
61
60
  trainDataset: TF.data.Dataset<{
62
61
  xs: TF.Tensor;
@@ -1,8 +1,8 @@
1
- import { DatasetBuilder as h } from "./DatasetBuilder.js";
1
+ import { DatasetBuilder as d } from "./DatasetBuilder.js";
2
2
  import p from "./AdamExt.js";
3
- class y {
4
- constructor(t, e, s, a = 1e-3) {
5
- this.tokenizer = s, this.tf = t, this.model = e, this.learningRate = a, this.resetOptimizer(), this.datasetBuilder = new h(this.tf, s, e.config.blockSize);
3
+ class u {
4
+ constructor(t, s, e, i = 1e-3) {
5
+ this.tokenizer = e, this.tf = t, this.model = s, this.learningRate = i, this.resetOptimizer(), this.datasetBuilder = new d(this.tf, e, s.config.blockSize);
6
6
  }
7
7
  model;
8
8
  optimizer;
@@ -21,7 +21,7 @@ class y {
21
21
  }
22
22
  resetOptimizer(t = { learningRateFactor: 1, beta1: 0.9, beta2: 0.99, epsilon: 1e-8 }) {
23
23
  this.optimizer && this.optimizer.dispose();
24
- const e = new p(
24
+ const s = new p(
25
25
  t.learningRateFactor * this.learningRate,
26
26
  t.beta1,
27
27
  t.beta2,
@@ -33,62 +33,58 @@ class y {
33
33
  weightDecay: 0
34
34
  }
35
35
  );
36
- this.optimizer = e;
36
+ this.optimizer = s;
37
37
  }
38
38
  printGradients(t) {
39
- Object.keys(t).forEach((e) => {
40
- const s = t[e];
41
- console.log(`${e}:`), console.log(` Shape: ${s.shape}`), console.log(` Mean: ${this.tf.mean(s).dataSync()[0]}`), console.log(` Std: ${this.tf.moments(s).variance.sqrt().dataSync()[0]}`), console.log(` Min: ${this.tf.min(s).dataSync()[0]}`), console.log(` Max: ${this.tf.max(s).dataSync()[0]}`), console.log(` Norm: ${this.tf.norm(s).dataSync()[0]}`);
39
+ Object.keys(t).forEach((s) => {
40
+ const e = t[s];
41
+ console.log(`${s}:`), console.log(` Shape: ${e.shape}`), console.log(` Mean: ${this.tf.mean(e).dataSync()[0]}`), console.log(` Std: ${this.tf.moments(e).variance.sqrt().dataSync()[0]}`), console.log(` Min: ${this.tf.min(e).dataSync()[0]}`), console.log(` Max: ${this.tf.max(e).dataSync()[0]}`), console.log(` Norm: ${this.tf.norm(e).dataSync()[0]}`);
42
42
  });
43
43
  }
44
- trainStep(t, e = !1, s = !1) {
44
+ trainStep(t, s = !1, e = !1) {
45
45
  return this.tf.tidy(() => {
46
- const { xs: a, ys: o } = t, r = () => {
47
- const { loss: l, logits: c } = this.model.forward(a, o, !0);
46
+ const { xs: i, ys: a } = t, o = () => {
47
+ const { loss: l, logits: c } = this.model.forward(i, a, !0);
48
48
  return c.dispose(), l;
49
- }, { value: n, grads: i } = this.tf.variableGrads(r);
50
- return e || (s && (console.log("-------"), this.printGradients(i), console.log("-------")), this.optimizer.applyGradients(i), this.tf.dispose(i)), n;
49
+ }, { value: n, grads: r } = this.tf.variableGrads(o);
50
+ return s || (e && (console.log("-------"), this.printGradients(r), console.log("-------")), this.optimizer.applyGradients(r), this.tf.dispose(r)), n;
51
51
  });
52
52
  }
53
53
  dummyPass() {
54
- const t = this.tf.zeros([1, this.model.config.blockSize], "int32"), e = this.tf.zeros([1, this.model.config.blockSize, this.model.config.vocabSize]);
54
+ const t = this.tf.zeros([1, this.model.config.blockSize], "int32"), s = this.tf.zeros([1, this.model.config.blockSize, this.model.config.vocabSize]);
55
55
  try {
56
- const s = this.trainStep({ xs: t, ys: e }, !0);
57
- s.dataSync(), s.dispose();
58
- } catch (s) {
59
- console.error("Error during dummy pass:", s);
56
+ const e = this.trainStep({ xs: t, ys: s }, !0);
57
+ e.dataSync(), e.dispose();
58
+ } catch (e) {
59
+ console.error("Error during dummy pass:", e);
60
60
  } finally {
61
- t.dispose(), e.dispose();
61
+ t.dispose(), s.dispose();
62
62
  }
63
63
  }
64
- async trainBatch(t, e) {
64
+ async trainBatch(t, s) {
65
65
  try {
66
- const s = this.trainStep(e, !1, !1);
67
- return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), s.dispose(), t.lastLoss));
68
- } catch (s) {
69
- throw console.error(`Error processing batch at step ${t.step}:`, s), this.tf.dispose(), s;
66
+ const e = this.trainStep(s, !1, !1);
67
+ return s.xs.dispose(), s.ys.dispose(), t.step++, t.totalSteps++, e.array().then((i) => (t.lastLoss = i, t.losses.push(t.lastLoss), e.dispose(), t.lastLoss));
68
+ } catch (e) {
69
+ throw console.error(`Error processing batch at step ${t.step}:`, e), this.tf.dispose(), e;
70
70
  }
71
71
  }
72
- // Evaluate model on validation dataset - FIXED memory leaks
73
- async evaluateOnDataset(t, e = 100) {
74
- let s = 0, a = 0;
75
- return await t.take(e).forEachAsync(async (o) => {
76
- const { xs: r, ys: n } = o, { loss: i, logits: l } = this.model.forward(r, n, !1), d = i.arraySync();
77
- i.dispose(), l.dispose(), s += d, a++;
78
- }), s / a;
79
- }
80
- // Create training and validation datasets - FIXED memory leaks
81
- async createTrainValidationSplit(t, e = 32, s = 0.1) {
82
- const a = Math.floor(t.length * (1 - s)), o = t.slice(0, a), r = t.slice(a), n = await this.datasetBuilder.createTextDataset(o, e), i = await this.datasetBuilder.createTextDataset(r, e);
83
- return { trainDataset: n, validationDataset: i };
72
+ async createTrainValidationSplit(t, s = 32, e = 0.1) {
73
+ const i = await this.datasetBuilder.createTextDataset(t, s, 0, 1 - e), a = await this.datasetBuilder.createTextDataset(
74
+ t,
75
+ s,
76
+ 1 - e,
77
+ 1
78
+ );
79
+ return { trainDataset: i, validationDataset: a };
84
80
  }
85
- async createDataset(t, e = 32) {
86
- return await this.datasetBuilder.createTextDataset(t, e);
81
+ async createDataset(t, s = 32) {
82
+ return await this.datasetBuilder.createTextDataset(t, s);
87
83
  }
88
84
  dispose() {
89
- this.optimizer && this.optimizer.dispose(), this.tf.dispose();
85
+ this.optimizer && this.optimizer.dispose();
90
86
  }
91
87
  }
92
88
  export {
93
- y as default
89
+ u as default
94
90
  };
@@ -1,17 +1,19 @@
1
- async function w(n, e, o, s, p) {
1
+ async function w(n, t, r, s, g) {
2
2
  if (s <= 0)
3
3
  throw new Error("Length must be a positive integer");
4
- if (o.length === 0)
4
+ if (r.length === 0)
5
5
  throw new Error("Prompt cannot be an empty string");
6
- const a = await n.tokenise([o], !0), r = (await e.tf.tidy(() => {
7
- let t = e.tf.tensor2d(a, [1, a[0].length], "int32");
8
- for (let c = 0; c < s; c++) {
9
- const { output: d } = e.generate(t, p), u = t;
10
- t = e.tf.concat([t, d], 1), u.dispose(), d.dispose();
6
+ const i = await n.tokenise([r], !0), a = t.tf.tidy(() => {
7
+ let e = t.tf.tensor2d(i, [1, i[0].length], "int32");
8
+ for (let d = 0; d < s; d++) {
9
+ const { output: p } = t.generate(e, g), f = e;
10
+ e = t.tf.concat([e, p], 1), f.dispose(), p.dispose();
11
11
  }
12
- return t;
13
- }).array())[0], i = r.indexOf(n.eosToken);
14
- return i !== -1 && r.splice(i), await n.decode(r);
12
+ return e;
13
+ }), u = await a.array();
14
+ a.dispose();
15
+ const o = u[0], c = o.indexOf(n.eosToken);
16
+ return c !== -1 && o.splice(c), await n.decode(o);
15
17
  }
16
18
  export {
17
19
  w as generateText
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",