@genai-fi/nanogpt 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,11 @@
1
1
  import { GPTConfig } from './config';
2
2
  import { ITokeniser } from './tokeniser/type';
3
- import { default as NanoGPT } from './NanoGPTModel';
3
+ import { default as NanoGPT, TrainingLogEntry } from './NanoGPTModel';
4
4
  import { SaveOptions } from './utilities/save';
5
5
  import { default as Generator, IGenerateOptions } from './Generator';
6
6
  import { default as Trainer, ITrainerOptions } from './Trainer';
7
7
  import { default as MemoryProfiler } from './utilities/profile';
8
+ import { TrainingProgress } from './training/Trainer';
8
9
  type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
9
10
  export default class TeachableLLM {
10
11
  private ee;
@@ -36,11 +37,11 @@ export default class TeachableLLM {
36
37
  dispose(): void;
37
38
  on(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
38
39
  on(event: 'error', listener: (error: Error) => void): void;
39
- on(event: 'trainStep', listener: (step: number) => void): void;
40
+ on(event: 'trainStep', listener: (step: TrainingLogEntry, progress: TrainingProgress) => void): void;
40
41
  on(event: 'loaded', listener: () => void): void;
41
42
  off(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
42
43
  off(event: 'error', listener: (error: Error) => void): void;
43
- off(event: 'trainStep', listener: (step: number) => void): void;
44
+ off(event: 'trainStep', listener: (step: TrainingLogEntry, progress: TrainingProgress) => void): void;
44
45
  off(event: 'loaded', listener: () => void): void;
45
46
  }
46
47
  export {};
package/dist/Trainer.js CHANGED
@@ -1,10 +1,10 @@
1
- import { E as l } from "./index-Dwqa6Zy2.js";
2
- import h from "./training/FullTrainer.js";
3
- class c extends l {
1
+ import { E as m } from "./index-Dwqa6Zy2.js";
2
+ import d from "./training/FullTrainer.js";
3
+ class S extends m {
4
4
  trainer;
5
5
  hasTrained = !1;
6
6
  constructor(e, t) {
7
- super(), this.trainer = new h(e, t, 1e-3);
7
+ super(), this.trainer = new d(e, t, 1e-3);
8
8
  }
9
9
  stop() {
10
10
  this.trainer.stop();
@@ -13,28 +13,35 @@ class c extends l {
13
13
  this.hasTrained = !1, this.trainer.reset();
14
14
  }
15
15
  async train(e, t) {
16
- const { trainDataset: a, validationDataset: r } = await this.trainer.createTrainValidationSplit(
16
+ const { trainDataset: s, validationDataset: n } = await this.trainer.createTrainValidationSplit(
17
17
  e,
18
18
  t?.batchSize || 32,
19
19
  t?.validationSplit || 0.1
20
- );
20
+ ), r = e.reduce((i, a) => i + a.length, 0) * (1 - (t?.validationSplit || 0));
21
21
  this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), await this.trainer.trainOnDataset(
22
- a,
22
+ s,
23
23
  {
24
24
  prompt: t?.prompt,
25
25
  logInterval: t?.logInterval || 10,
26
26
  desiredLoss: t?.desiredLoss || 0.01,
27
27
  maxSteps: t?.maxSteps || 1e3,
28
- onStep: async (i) => {
29
- const s = this.listeners("log");
30
- for (const n of s)
31
- await n(i);
28
+ onStep: async (i, a) => {
29
+ const l = this.listeners("log");
30
+ for (const h of l)
31
+ await h(i, {
32
+ ...a,
33
+ progress: a.totalSamples / r,
34
+ remaining: Math.max(
35
+ 0,
36
+ (r - a.totalSamples) / a.totalSamples * a.duration
37
+ )
38
+ });
32
39
  }
33
40
  },
34
- r
41
+ n
35
42
  ), this.emit("stop");
36
43
  }
37
44
  }
38
45
  export {
39
- c as default
46
+ S as default
40
47
  };
@@ -1,69 +1,77 @@
1
- import { generateText as w } from "../utilities/generate.js";
2
- import x from "./Trainer.js";
3
- import g from "./Evaluator.js";
1
+ import { generateText as v } from "../utilities/generate.js";
2
+ import L from "./Trainer.js";
3
+ import x from "./Evaluator.js";
4
4
  import { a as h } from "../index-pWA4_lUh.js";
5
- const S = {
5
+ const D = {
6
6
  desiredLoss: 0.01,
7
7
  logInterval: 1,
8
8
  maxSteps: 1e3
9
9
  };
10
- class D extends x {
10
+ class E extends L {
11
11
  constructor(r, i, o = 3e-4) {
12
12
  super(r, i, o);
13
13
  }
14
14
  // Train for multiple epochs using Dataset API - FIXED memory leaks
15
15
  async trainOnDataset(r, i, o) {
16
- const { desiredLoss: m, logInterval: u, onStep: n, prompt: l, maxSteps: d } = {
17
- ...S,
16
+ const { desiredLoss: u, logInterval: d, onStep: l, prompt: c, maxSteps: g } = {
17
+ ...D,
18
18
  ...i
19
- }, t = {
19
+ }, n = Date.now(), t = {
20
20
  step: 0,
21
21
  lastLoss: 1e6,
22
22
  totalSteps: 0,
23
23
  losses: [],
24
24
  validationLosses: [],
25
+ logStartTime: n,
26
+ trainingDuration: 0,
25
27
  ...this.lastState || {}
26
28
  };
27
- this.lastState = t, this.dummyPass(), this.model.trainable = !0;
28
- const f = Date.now();
29
- this.running = !0;
30
- const c = o ? new g(this.model, o) : void 0, v = await r.iterator();
29
+ this.lastState = t, this.dummyPass(), this.model.trainable = !0, this.running = !0, t.logStartTime = n;
30
+ const m = o ? new x(this.model, o) : void 0, S = await r.iterator();
31
31
  try {
32
- for (; this.running && !(t.lastLoss < m); ) {
33
- const e = await v.next();
34
- if (e.done) break;
35
- const p = e.value, L = this.trainBatch(t, p), a = {
32
+ for (; this.running && !(t.lastLoss < u); ) {
33
+ const a = await S.next();
34
+ if (a.done) break;
35
+ const p = a.value, f = this.trainBatch(t, p), s = {
36
36
  loss: t.lastLoss,
37
37
  step: t.step,
38
- time: Date.now() - f,
38
+ time: Date.now() - n,
39
39
  batchSize: p.xs.shape[0]
40
40
  };
41
- if (this.model.log.push(a), t.step % u === 0) {
42
- if (await L, c)
41
+ if (this.model.log.push(s), t.step % d === 0) {
42
+ await f;
43
+ const w = Date.now();
44
+ if (t.trainingDuration += w - t.logStartTime, m)
43
45
  try {
44
- const s = await c.evaluate(5);
45
- t.validationLosses.push(s), a.valLoss = s;
46
- } catch (s) {
47
- console.error("Validation error:", s);
46
+ const e = await m.evaluate(5);
47
+ t.validationLosses.push(e), s.valLoss = e;
48
+ } catch (e) {
49
+ console.error("Validation error:", e);
48
50
  }
49
- if (n) {
50
- if (l) {
51
- const s = await w(this.tokenizer, this.model, l, 100, {
51
+ if (l) {
52
+ if (c) {
53
+ const T = await v(this.tokenizer, this.model, c, 100, {
52
54
  temperature: 0.8
53
55
  });
54
- a.example = s;
56
+ s.example = T;
55
57
  }
56
- await n(a);
58
+ const e = {
59
+ duration: t.trainingDuration,
60
+ totalSamples: t.totalSteps * s.batchSize,
61
+ samplesPerSecond: t.totalSteps * s.batchSize / (t.trainingDuration / 1e3)
62
+ };
63
+ await l(s, e);
57
64
  }
65
+ t.logStartTime = Date.now();
58
66
  }
59
- t.step >= d && this.stop();
67
+ t.step >= g && this.stop();
60
68
  }
61
- } catch (e) {
62
- throw console.error("Training error:", e), h(), e;
69
+ } catch (a) {
70
+ throw console.error("Training error:", a), h(), a;
63
71
  }
64
72
  return h(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
65
73
  }
66
74
  }
67
75
  export {
68
- D as default
76
+ E as default
69
77
  };
@@ -14,19 +14,19 @@ class E extends v {
14
14
  trainingPattern = [];
15
15
  startPass = 0;
16
16
  startLayer = 0;
17
- constructor(s, r, e = 3e-4) {
18
- if (super(s, r, e), this.trainingPattern = w[s.config.nLayer - 1] || [], s.log.length > 0) {
19
- const i = s.log[s.log.length - 1];
17
+ constructor(a, r, e = 3e-4) {
18
+ if (super(a, r, e), this.trainingPattern = w[a.config.nLayer - 1] || [], a.log.length > 0) {
19
+ const i = a.log[a.log.length - 1];
20
20
  i.pass !== void 0 && i.layer !== void 0 && (this.startPass = i.pass, this.startLayer = i.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
21
21
  }
22
22
  }
23
- applyTrainingPattern(s) {
24
- const r = s < this.trainingPattern.length ? s : this.trainingPattern.length - 1, e = this.trainingPattern[r];
23
+ applyTrainingPattern(a) {
24
+ const r = a < this.trainingPattern.length ? a : this.trainingPattern.length - 1, e = this.trainingPattern[r];
25
25
  this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", r, e);
26
26
  }
27
27
  // Train for multiple epochs using Dataset API - FIXED memory leaks
28
- async trainOnDataset(s, r, e) {
29
- const { desiredLoss: i, logInterval: L, stepsPerLayer: f, onLayerChange: o, onPassComplete: p, onStep: h, prompt: c } = {
28
+ async trainOnDataset(a, r, e) {
29
+ const { desiredLoss: i, logInterval: L, stepsPerLayer: d, onLayerChange: l, onPassComplete: p, onStep: h, prompt: c } = {
30
30
  ...x,
31
31
  ...r
32
32
  }, t = {
@@ -37,50 +37,55 @@ class E extends v {
37
37
  lastLoss: 1e6,
38
38
  totalSteps: 0,
39
39
  losses: [],
40
- validationLosses: []
40
+ validationLosses: [],
41
+ trainingDuration: 0
41
42
  };
42
43
  this.dummyPass();
43
- const d = Date.now();
44
+ const S = Date.now();
44
45
  this.startPass = 0, this.startLayer = 0;
45
- const g = e ? new T(this.model, e) : void 0, P = await s.iterator();
46
+ const g = e ? new T(this.model, e) : void 0, f = await a.iterator();
46
47
  this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
47
48
  try {
48
49
  for (; !(t.lastLoss < i); ) {
49
- const n = await P.next();
50
- if (n.done) break;
51
- const y = n.value, S = this.trainBatch(t, y);
50
+ const o = await f.next();
51
+ if (o.done) break;
52
+ const y = o.value, P = this.trainBatch(t, y);
52
53
  t.stepSinceLayerChange++;
53
- const l = {
54
+ const n = {
54
55
  loss: t.lastLoss,
55
56
  step: t.step,
56
- time: Date.now() - d,
57
+ time: Date.now() - S,
57
58
  batchSize: y.xs.shape[0],
58
59
  pass: t.pass,
59
60
  layer: t.layerStep % this.model.config.nLayer
60
61
  };
61
- if (this.model.log.push(l), t.step % L === 0) {
62
- if (await S, g)
62
+ if (this.model.log.push(n), t.step % L === 0) {
63
+ if (await P, g)
63
64
  try {
64
- const a = await g.evaluate(5);
65
- t.validationLosses.push(a), l.valLoss = a;
66
- } catch (a) {
67
- console.error("Validation error:", a);
65
+ const s = await g.evaluate(5);
66
+ t.validationLosses.push(s), n.valLoss = s;
67
+ } catch (s) {
68
+ console.error("Validation error:", s);
68
69
  }
69
70
  if (h) {
70
71
  if (c) {
71
- const a = await u(this.tokenizer, this.model, c, 100, {
72
+ const s = await u(this.tokenizer, this.model, c, 100, {
72
73
  temperature: 0.8,
73
74
  topK: 10
74
75
  });
75
- l.example = a;
76
+ n.example = s;
76
77
  }
77
- await h(l);
78
+ await h(n, {
79
+ duration: t.trainingDuration,
80
+ totalSamples: t.totalSteps * n.batchSize,
81
+ samplesPerSecond: t.totalSteps * n.batchSize / (t.trainingDuration / 1e3)
82
+ });
78
83
  }
79
84
  }
80
- t.stepSinceLayerChange >= f && (t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (o && await o(t.layerStep, t.pass), p && await p(t.pass), t.pass++) : o && await o(t.layerStep, t.pass), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length));
85
+ t.stepSinceLayerChange >= d && (t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (l && await l(t.layerStep, t.pass), p && await p(t.pass), t.pass++) : l && await l(t.layerStep, t.pass), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length));
81
86
  }
82
- } catch (n) {
83
- throw console.error("Training error:", n), m(), n;
87
+ } catch (o) {
88
+ throw console.error("Training error:", o), m(), o;
84
89
  }
85
90
  return m(), { losses: t.losses, validationLosses: t.validationLosses };
86
91
  }
@@ -12,6 +12,11 @@ export interface TrainingState {
12
12
  losses: number[];
13
13
  validationLosses: number[];
14
14
  }
15
+ export interface TrainingProgress {
16
+ duration: number;
17
+ totalSamples: number;
18
+ samplesPerSecond: number;
19
+ }
15
20
  export interface AdamConfig {
16
21
  learningRateFactor: number;
17
22
  beta1: number;
@@ -23,7 +28,7 @@ export interface TrainingOptions {
23
28
  logInterval: number;
24
29
  prompt?: string;
25
30
  maxSteps: number;
26
- onStep?: (log: TrainingLogEntry) => Promise<void> | void;
31
+ onStep?: (log: TrainingLogEntry, progress: TrainingProgress) => Promise<void> | void;
27
32
  }
28
33
  export default abstract class GPTTrainer {
29
34
  protected tokenizer: ITokeniser;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.3.1",
3
+ "version": "0.3.2",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",