@genai-fi/nanogpt 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from './config';
3
3
  export interface TrainingLogEntry {
4
- epoch: number;
5
4
  loss: number;
6
5
  valLoss?: number;
7
6
  step: number;
@@ -6,7 +6,7 @@ import { default as Generator, IGenerateOptions } from './Generator';
6
6
  import { default as Trainer, ITrainerOptions } from './Trainer';
7
7
  import { default as EE } from 'eventemitter3';
8
8
  type TeachableLLMStatus = 'warmup' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
9
- export default class TeachableLLM extends EE<'status' | 'error'> {
9
+ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
10
10
  private _config?;
11
11
  private _model?;
12
12
  readonly tf: typeof TF;
@@ -1,13 +1,13 @@
1
- import a from "./NanoGPTModel.js";
2
- import { defaultConfig as h } from "./config.js";
3
- import { saveModel as d } from "./utilities/save.js";
4
- import { loadModel as m } from "./utilities/load.js";
5
- import l from "./Generator.js";
6
- import u from "./Trainer.js";
7
- import { E as _ } from "./index-SOhdqzHq.js";
8
- import { dummyPassAsync as f } from "./utilities/dummy.js";
9
- import c from "./tokeniser/CharTokeniser.js";
10
- class s extends _ {
1
+ import d from "./NanoGPTModel.js";
2
+ import { defaultConfig as m } from "./config.js";
3
+ import { saveModel as u } from "./utilities/save.js";
4
+ import { loadModel as l } from "./utilities/load.js";
5
+ import f from "./Generator.js";
6
+ import _ from "./Trainer.js";
7
+ import { E as c } from "./index-SOhdqzHq.js";
8
+ import { dummyPassAsync as a } from "./utilities/dummy.js";
9
+ import g from "./tokeniser/CharTokeniser.js";
10
+ class n extends c {
11
11
  _config;
12
12
  _model;
13
13
  tf;
@@ -43,23 +43,27 @@ class s extends _ {
43
43
  saveModel() {
44
44
  if (!this._model || !this._tokeniser)
45
45
  throw new Error("Model or tokeniser is not initialized.");
46
- return d(this._model, this._tokeniser);
46
+ return u(this._model, this._tokeniser);
47
47
  }
48
48
  static loadModel(t, r) {
49
- const e = new s(t);
50
- return m(t, r).then(({ model: i, tokeniser: o }) => {
51
- e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), f(i).then(() => {
49
+ const e = new n(t);
50
+ return l(t, r).then(({ model: i, tokeniser: o }) => {
51
+ e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), a(i).then(() => {
52
52
  e.setStatus("ready");
53
- }).catch((n) => {
54
- e.setStatus("error"), e.emit("error", n);
53
+ }).catch((s) => {
54
+ e.setStatus("error"), e.emit("error", s);
55
55
  });
56
56
  }).catch((i) => {
57
57
  e.setStatus("error"), e.emit("error", i);
58
58
  }), e;
59
59
  }
60
60
  static create(t, r = {}) {
61
- const e = { ...h, ...r }, i = new c(e.vocabSize), o = new a(t, e);
62
- return new s(t, i, o);
61
+ const e = { ...m, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new n(t, i, o);
62
+ return s.setStatus("warmup"), a(o).then(() => {
63
+ s.setStatus("ready");
64
+ }).catch((h) => {
65
+ s.setStatus("error"), s.emit("error", h);
66
+ }), s;
63
67
  }
64
68
  getNumParams() {
65
69
  if (!this._model)
@@ -69,8 +73,12 @@ class s extends _ {
69
73
  trainer() {
70
74
  if (!this._model || !this._tokeniser)
71
75
  throw new Error("Model or tokeniser is not initialized.");
72
- const t = new u(this._model, this._tokeniser);
73
- return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t;
76
+ const t = new _(this._model, this._tokeniser);
77
+ return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
78
+ const e = this.listeners("trainStep");
79
+ for (const i of e)
80
+ await i(r);
81
+ }), t;
74
82
  }
75
83
  train(t, r) {
76
84
  return this.trainer().train(t, r);
@@ -78,13 +86,17 @@ class s extends _ {
78
86
  generator() {
79
87
  if (!this._model || !this._tokeniser)
80
88
  throw new Error("Model or tokeniser is not initialized.");
81
- const t = new l(this._model, this._tokeniser);
82
- return t.on("start", () => this.setStatus("busy")), t.on("stop", () => this.setStatus("ready")), t;
89
+ const t = new f(this._model, this._tokeniser);
90
+ return t.on("start", () => {
91
+ this.status === "ready" && this.setStatus("busy");
92
+ }), t.on("stop", () => {
93
+ this.status === "busy" && this.setStatus("ready");
94
+ }), t;
83
95
  }
84
96
  generateText(t, r) {
85
97
  return this.generator().generate(t, r);
86
98
  }
87
99
  }
88
100
  export {
89
- s as default
101
+ n as default
90
102
  };
package/dist/Trainer.d.ts CHANGED
@@ -2,7 +2,6 @@ import { default as NanoGPT } from './NanoGPTModel';
2
2
  import { ITokeniser } from './tokeniser/type';
3
3
  import { default as EE } from 'eventemitter3';
4
4
  export interface ITrainerOptions {
5
- epochs?: number;
6
5
  batchSize?: number;
7
6
  learningRate?: number;
8
7
  maxSteps?: number;
package/dist/Trainer.js CHANGED
@@ -1,9 +1,9 @@
1
- import { E as s } from "./index-SOhdqzHq.js";
2
- import n from "./training/FullTrainer.js";
3
- class o extends s {
1
+ import { E as l } from "./index-SOhdqzHq.js";
2
+ import o from "./training/FullTrainer.js";
3
+ class d extends l {
4
4
  trainer;
5
5
  constructor(a, t) {
6
- super(), this.trainer = new n(a.tf, a, t, 1e-3);
6
+ super(), this.trainer = new o(a.tf, a, t, 1e-3);
7
7
  }
8
8
  stop() {
9
9
  }
@@ -13,16 +13,17 @@ class o extends s {
13
13
  t?.batchSize || 32,
14
14
  t?.validationSplit || 0.1
15
15
  );
16
- this.emit("start"), await this.trainer.trainOnDataset(
16
+ this.trainer.setLearningRate(t?.learningRate || 1e-3), this.emit("start"), await this.trainer.trainOnDataset(
17
17
  e,
18
18
  {
19
- epochs: t?.epochs || 2,
20
19
  prompt: t?.prompt,
21
- stepsPerEpoch: t?.maxSteps || 100,
22
20
  logInterval: t?.logInterval || 10,
23
21
  desiredLoss: t?.desiredLoss || 0.01,
22
+ maxSteps: t?.maxSteps || 1e3,
24
23
  onStep: async (i) => {
25
- this.emit("log", i);
24
+ const s = this.listeners("log");
25
+ for (const n of s)
26
+ await n(i);
26
27
  }
27
28
  },
28
29
  r
@@ -30,5 +31,5 @@ class o extends s {
30
31
  }
31
32
  }
32
33
  export {
33
- o as default
34
+ d as default
34
35
  };
@@ -1,75 +1,67 @@
1
- import { generateText as g } from "../utilities/generate.js";
2
- import T from "./Trainer.js";
3
- const b = {
4
- epochs: 1,
5
- stepsPerEpoch: 1e6,
1
+ import { generateText as L } from "../utilities/generate.js";
2
+ import w from "./Trainer.js";
3
+ const g = {
6
4
  desiredLoss: 0.01,
7
- logInterval: 1
5
+ logInterval: 1,
6
+ maxSteps: 1e3
8
7
  };
9
- class S extends T {
10
- constructor(a, r, t, i = 3e-4) {
11
- super(a, r, t, i);
8
+ class S extends w {
9
+ constructor(r, i, o, n = 3e-4) {
10
+ super(r, i, o, n);
12
11
  }
13
12
  // Train for multiple epochs using Dataset API - FIXED memory leaks
14
- async trainOnDataset(a, r, t) {
15
- const { epochs: i, stepsPerEpoch: n, desiredLoss: c, logInterval: L, onStep: h, onEpoch: o, prompt: l } = {
16
- ...b,
17
- ...r
13
+ async trainOnDataset(r, i, o) {
14
+ const { desiredLoss: n, logInterval: c, onStep: l, prompt: p, maxSteps: d } = {
15
+ ...g,
16
+ ...i
18
17
  }, s = {
19
- epoch: 0,
20
18
  pass: 0,
21
19
  depth: 1,
22
20
  step: 0,
23
21
  stepSinceDepthChange: 0,
24
22
  lastLoss: 1e6,
25
- epochLoss: 0,
26
23
  totalSteps: 0,
27
24
  losses: [],
28
25
  validationLosses: []
29
26
  };
30
27
  this.dummyPass(), this.model.trainable = !0;
31
28
  const m = Date.now();
32
- for (s.epoch = 0; s.epoch < i; s.epoch++) {
33
- s.step = 0, s.epochLoss = 0, s.pass = 0, s.depth = 1, s.stepSinceDepthChange = 0;
34
- const u = await a.iterator();
35
- try {
36
- for (; !(n && s.step >= n || s.lastLoss < c); ) {
37
- const e = await u.next();
38
- if (e.done) break;
39
- const f = e.value, w = this.trainBatch(s, f), p = {
40
- epoch: s.epoch,
41
- loss: s.lastLoss,
42
- step: s.step,
43
- time: Date.now() - m,
44
- batchSize: f.xs.shape[0]
45
- };
46
- if (this.model.log.push(p), s.step % L === 0 && (await w, h)) {
47
- if (l) {
48
- const v = await g(this.tokenizer, this.model, l, 100, {
29
+ this.running = !0;
30
+ const u = await r.iterator();
31
+ try {
32
+ for (; this.running && !(s.lastLoss < n); ) {
33
+ const e = await u.next();
34
+ if (e.done) break;
35
+ const h = e.value, f = this.trainBatch(s, h), a = {
36
+ loss: s.lastLoss,
37
+ step: s.step,
38
+ time: Date.now() - m,
39
+ batchSize: h.xs.shape[0]
40
+ };
41
+ if (this.model.log.push(a), s.step % c === 0) {
42
+ if (await f, o)
43
+ try {
44
+ const t = await this.evaluateOnDataset(o, 5);
45
+ s.validationLosses.push(t), a.valLoss = t;
46
+ } catch (t) {
47
+ console.error("Validation error:", t);
48
+ }
49
+ if (l) {
50
+ if (p) {
51
+ const t = await L(this.tokenizer, this.model, p, 100, {
49
52
  temperature: 0.8
50
53
  });
51
- p.example = v;
54
+ a.example = t;
52
55
  }
53
- await h(p);
56
+ await l(a);
54
57
  }
55
58
  }
56
- } catch (e) {
57
- throw console.error("Training error:", e), this.tf.dispose(), e;
59
+ s.step >= d && this.stop();
58
60
  }
59
- const d = s.epochLoss / s.step;
60
- if (t)
61
- try {
62
- const e = await this.evaluateOnDataset(t, 5);
63
- s.validationLosses.push(e), o && await o(s.epoch, d, e);
64
- } catch (e) {
65
- console.error("Validation error:", e);
66
- }
67
- else
68
- o && o(s.epoch, d);
69
- if (this.tf.dispose(), s.lastLoss < c)
70
- break;
61
+ } catch (e) {
62
+ throw console.error("Training error:", e), this.tf.dispose(), e;
71
63
  }
72
- return { losses: s.losses, validationLosses: s.validationLosses };
64
+ return this.tf.dispose(), this.running = !1, { losses: s.losses, validationLosses: s.validationLosses };
73
65
  }
74
66
  }
75
67
  export {
@@ -1,111 +1,91 @@
1
- import { generateText as v } from "../utilities/generate.js";
2
- import T from "./Trainer.js";
3
- import { schedule as k } from "./lwSchedule.js";
4
- const x = {
5
- epochs: 1,
6
- stepsPerEpoch: 1e6,
1
+ import { generateText as d } from "../utilities/generate.js";
2
+ import S from "./Trainer.js";
3
+ import { schedule as u } from "./lwSchedule.js";
4
+ const w = {
7
5
  desiredLoss: 0.01,
8
6
  logInterval: 1,
9
7
  stepsPerLayer: 400,
10
- maxPasses: 3
8
+ maxPasses: 3,
9
+ maxSteps: 1e3
11
10
  };
12
- class D extends T {
11
+ class b extends S {
13
12
  trainingPattern = [];
14
13
  startPass = 0;
15
14
  startLayer = 0;
16
- constructor(o, e, t, h = 3e-4) {
17
- if (super(o, e, t, h), this.trainingPattern = k[e.config.nLayer - 1] || [], e.log.length > 0) {
18
- const r = e.log[e.log.length - 1];
19
- r.pass !== void 0 && r.layer !== void 0 && (this.startPass = r.pass, this.startLayer = r.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
15
+ constructor(r, a, e, p = 3e-4) {
16
+ if (super(r, a, e, p), this.trainingPattern = u[a.config.nLayer - 1] || [], a.log.length > 0) {
17
+ const i = a.log[a.log.length - 1];
18
+ i.pass !== void 0 && i.layer !== void 0 && (this.startPass = i.pass, this.startLayer = i.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
20
19
  }
21
20
  }
22
- applyTrainingPattern(o) {
23
- const e = o < this.trainingPattern.length ? o : this.trainingPattern.length - 1, t = this.trainingPattern[e];
24
- this.model.setSkipMask(t.skip), this.model.setTrainableMask(t.trainable), this.resetOptimizer(t.adam), console.log("Applied training pattern:", e, t);
21
+ applyTrainingPattern(r) {
22
+ const a = r < this.trainingPattern.length ? r : this.trainingPattern.length - 1, e = this.trainingPattern[a];
23
+ this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", a, e);
25
24
  }
26
25
  // Train for multiple epochs using Dataset API - FIXED memory leaks
27
- async trainOnDataset(o, e, t) {
28
- const {
29
- epochs: h,
30
- stepsPerEpoch: r,
31
- desiredLoss: c,
32
- logInterval: P,
33
- stepsPerLayer: d,
34
- onLayerChange: n,
35
- onPassComplete: g,
36
- onStep: y,
37
- onEpoch: p,
38
- prompt: L
39
- } = {
40
- ...x,
41
- ...e
42
- }, s = {
43
- epoch: 0,
26
+ async trainOnDataset(r, a, e) {
27
+ const { desiredLoss: p, logInterval: i, stepsPerLayer: L, onLayerChange: l, onPassComplete: h, onStep: c, prompt: g } = {
28
+ ...w,
29
+ ...a
30
+ }, t = {
44
31
  pass: 0,
45
32
  layerStep: 0,
46
33
  step: 0,
47
34
  stepSinceLayerChange: 0,
48
35
  lastLoss: 1e6,
49
- epochLoss: 0,
50
36
  totalSteps: 0,
51
37
  losses: [],
52
38
  validationLosses: []
53
39
  };
54
40
  this.dummyPass();
55
- const S = Date.now();
56
- for (s.epoch = 0; s.epoch < h; s.epoch++) {
57
- s.step = 0, s.epochLoss = 0, s.pass = this.startPass, s.layerStep = this.startLayer + this.startPass * this.model.config.nLayer, s.stepSinceLayerChange = 0, this.startPass = 0, this.startLayer = 0;
58
- const u = await o.iterator();
59
- this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
60
- try {
61
- for (; !(r && s.step >= r || s.lastLoss < c); ) {
62
- const a = await u.next();
63
- if (a.done) break;
64
- const m = a.value, w = this.trainBatch(s, m);
65
- s.stepSinceLayerChange++;
66
- const l = {
67
- epoch: s.epoch,
68
- loss: s.lastLoss,
69
- step: s.step,
70
- time: Date.now() - S,
71
- batchSize: m.xs.shape[0],
72
- pass: s.pass,
73
- layer: s.layerStep % this.model.config.nLayer
74
- };
75
- if (this.model.log.push(l), s.step % P === 0 && (await w, y)) {
76
- if (L) {
77
- const i = await v(this.tokenizer, this.model, L, 100, {
41
+ const m = Date.now();
42
+ this.startPass = 0, this.startLayer = 0;
43
+ const f = await r.iterator();
44
+ this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
45
+ try {
46
+ for (; !(t.lastLoss < p); ) {
47
+ const n = await f.next();
48
+ if (n.done) break;
49
+ const y = n.value, P = this.trainBatch(t, y);
50
+ t.stepSinceLayerChange++;
51
+ const o = {
52
+ loss: t.lastLoss,
53
+ step: t.step,
54
+ time: Date.now() - m,
55
+ batchSize: y.xs.shape[0],
56
+ pass: t.pass,
57
+ layer: t.layerStep % this.model.config.nLayer
58
+ };
59
+ if (this.model.log.push(o), t.step % i === 0) {
60
+ if (await P, e)
61
+ try {
62
+ const s = await this.evaluateOnDataset(e, 5);
63
+ t.validationLosses.push(s), o.valLoss = s;
64
+ } catch (s) {
65
+ console.error("Validation error:", s);
66
+ }
67
+ if (c) {
68
+ if (g) {
69
+ const s = await d(this.tokenizer, this.model, g, 100, {
78
70
  temperature: 0.8,
79
71
  topK: 10
80
72
  });
81
- l.example = i;
73
+ o.example = s;
82
74
  }
83
- await y(l);
84
- }
85
- if (s.stepSinceLayerChange >= d) {
86
- let i;
87
- t && (i = await this.evaluateOnDataset(t, 5), s.validationLosses.push(i), l.valLoss = i), s.layerStep++, s.layerStep % this.model.config.nLayer === 0 ? (n && await n(s.layerStep, s.pass, i), g && await g(s.pass), s.pass++) : n && await n(s.layerStep, s.pass, i), s.stepSinceLayerChange = 0, this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
75
+ await c(o);
88
76
  }
89
77
  }
90
- } catch (a) {
91
- throw console.error("Training error:", a), this.tf.dispose(), a;
92
- }
93
- const f = s.epochLoss / s.step;
94
- if (t)
95
- try {
96
- const a = await this.evaluateOnDataset(t, 5);
97
- s.validationLosses.push(a), p && await p(s.epoch, f, a);
98
- } catch (a) {
99
- console.error("Validation error:", a);
78
+ if (t.stepSinceLayerChange >= L) {
79
+ let s;
80
+ e && (s = await this.evaluateOnDataset(e, 5), t.validationLosses.push(s), o.valLoss = s), t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (l && await l(t.layerStep, t.pass, s), h && await h(t.pass), t.pass++) : l && await l(t.layerStep, t.pass, s), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
100
81
  }
101
- else
102
- p && p(s.epoch, f);
103
- if (this.tf.dispose(), s.lastLoss < c)
104
- break;
82
+ }
83
+ } catch (n) {
84
+ throw console.error("Training error:", n), this.tf.dispose(), n;
105
85
  }
106
- return { losses: s.losses, validationLosses: s.validationLosses };
86
+ return this.tf.dispose(), { losses: t.losses, validationLosses: t.validationLosses };
107
87
  }
108
88
  }
109
89
  export {
110
- D as default
90
+ b as default
111
91
  };
@@ -4,10 +4,8 @@ import { default as NanoGPT, TrainingLogEntry } from '../NanoGPTModel';
4
4
  import { default as TF } from '@tensorflow/tfjs';
5
5
  import { default as AdamExt } from './AdamExt';
6
6
  export interface TrainingState {
7
- epoch: number;
8
7
  step: number;
9
8
  lastLoss: number;
10
- epochLoss: number;
11
9
  totalSteps: number;
12
10
  losses: number[];
13
11
  validationLosses: number[];
@@ -19,12 +17,10 @@ export interface AdamConfig {
19
17
  epsilon: number;
20
18
  }
21
19
  export interface TrainingOptions {
22
- epochs: number;
23
- stepsPerEpoch: number;
24
20
  desiredLoss: number;
25
21
  logInterval: number;
26
22
  prompt?: string;
27
- onEpoch?: (e: number, loss: number, valLoss?: number) => Promise<void> | void;
23
+ maxSteps: number;
28
24
  onStep?: (log: TrainingLogEntry) => Promise<void> | void;
29
25
  }
30
26
  export default abstract class GPTTrainer {
@@ -34,7 +30,10 @@ export default abstract class GPTTrainer {
34
30
  protected datasetBuilder: DatasetBuilder;
35
31
  protected tf: typeof TF;
36
32
  protected learningRate: number;
33
+ protected running: boolean;
37
34
  constructor(tf: typeof TF, model: NanoGPT, tokenizer: ITokeniser, learningRate?: number);
35
+ setLearningRate(learningRate: number): void;
36
+ stop(): void;
38
37
  getOptimizer(): AdamExt;
39
38
  resetOptimizer(config?: AdamConfig): void;
40
39
  private printGradients;
@@ -9,6 +9,13 @@ class y {
9
9
  datasetBuilder;
10
10
  tf;
11
11
  learningRate;
12
+ running = !1;
13
+ setLearningRate(t) {
14
+ this.learningRate = t, this.resetOptimizer({ learningRateFactor: 1, beta1: 0.9, beta2: 0.99, epsilon: 1e-8 });
15
+ }
16
+ stop() {
17
+ this.running = !1;
18
+ }
12
19
  getOptimizer() {
13
20
  return this.optimizer;
14
21
  }
@@ -57,7 +64,7 @@ class y {
57
64
  async trainBatch(t, e) {
58
65
  try {
59
66
  const s = this.trainStep(e, !1, !1);
60
- return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), t.epochLoss += t.lastLoss, s.dispose(), t.lastLoss));
67
+ return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), s.dispose(), t.lastLoss));
61
68
  } catch (s) {
62
69
  throw console.error(`Error processing batch at step ${t.step}:`, s), this.tf.dispose(), s;
63
70
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",
@@ -23,7 +23,7 @@
23
23
  "test": "vitest",
24
24
  "ci:test": "vitest --coverage --reporter=junit --outputFile=junit.xml",
25
25
  "coverage": "vitest run --coverage",
26
- "train": "tsx scripts/train.ts --epochs 2 --batch 64",
26
+ "train": "tsx scripts/train.ts --batch 64",
27
27
  "generate": "tsx scripts/generate.ts",
28
28
  "evaluate": "tsx scripts/evaluate.ts",
29
29
  "debug": "tsx scripts/debug.ts"