@genai-fi/nanogpt 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TeachableLLM.d.ts +4 -3
- package/dist/Trainer.js +20 -13
- package/dist/training/FullTrainer.js +40 -32
- package/dist/training/LayerTrainer.js +32 -27
- package/dist/training/Trainer.d.ts +6 -1
- package/package.json +1 -1
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { GPTConfig } from './config';
|
|
2
2
|
import { ITokeniser } from './tokeniser/type';
|
|
3
|
-
import { default as NanoGPT } from './NanoGPTModel';
|
|
3
|
+
import { default as NanoGPT, TrainingLogEntry } from './NanoGPTModel';
|
|
4
4
|
import { SaveOptions } from './utilities/save';
|
|
5
5
|
import { default as Generator, IGenerateOptions } from './Generator';
|
|
6
6
|
import { default as Trainer, ITrainerOptions } from './Trainer';
|
|
7
7
|
import { default as MemoryProfiler } from './utilities/profile';
|
|
8
|
+
import { TrainingProgress } from './training/Trainer';
|
|
8
9
|
type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
|
|
9
10
|
export default class TeachableLLM {
|
|
10
11
|
private ee;
|
|
@@ -36,11 +37,11 @@ export default class TeachableLLM {
|
|
|
36
37
|
dispose(): void;
|
|
37
38
|
on(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
|
|
38
39
|
on(event: 'error', listener: (error: Error) => void): void;
|
|
39
|
-
on(event: 'trainStep', listener: (step:
|
|
40
|
+
on(event: 'trainStep', listener: (step: TrainingLogEntry, progress: TrainingProgress) => void): void;
|
|
40
41
|
on(event: 'loaded', listener: () => void): void;
|
|
41
42
|
off(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
|
|
42
43
|
off(event: 'error', listener: (error: Error) => void): void;
|
|
43
|
-
off(event: 'trainStep', listener: (step:
|
|
44
|
+
off(event: 'trainStep', listener: (step: TrainingLogEntry, progress: TrainingProgress) => void): void;
|
|
44
45
|
off(event: 'loaded', listener: () => void): void;
|
|
45
46
|
}
|
|
46
47
|
export {};
|
package/dist/Trainer.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { E as
|
|
2
|
-
import
|
|
3
|
-
class
|
|
1
|
+
import { E as m } from "./index-Dwqa6Zy2.js";
|
|
2
|
+
import d from "./training/FullTrainer.js";
|
|
3
|
+
class S extends m {
|
|
4
4
|
trainer;
|
|
5
5
|
hasTrained = !1;
|
|
6
6
|
constructor(e, t) {
|
|
7
|
-
super(), this.trainer = new
|
|
7
|
+
super(), this.trainer = new d(e, t, 1e-3);
|
|
8
8
|
}
|
|
9
9
|
stop() {
|
|
10
10
|
this.trainer.stop();
|
|
@@ -13,28 +13,35 @@ class c extends l {
|
|
|
13
13
|
this.hasTrained = !1, this.trainer.reset();
|
|
14
14
|
}
|
|
15
15
|
async train(e, t) {
|
|
16
|
-
const { trainDataset:
|
|
16
|
+
const { trainDataset: s, validationDataset: n } = await this.trainer.createTrainValidationSplit(
|
|
17
17
|
e,
|
|
18
18
|
t?.batchSize || 32,
|
|
19
19
|
t?.validationSplit || 0.1
|
|
20
|
-
);
|
|
20
|
+
), r = e.reduce((i, a) => i + a.length, 0) * (1 - (t?.validationSplit || 0));
|
|
21
21
|
this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), await this.trainer.trainOnDataset(
|
|
22
|
-
|
|
22
|
+
s,
|
|
23
23
|
{
|
|
24
24
|
prompt: t?.prompt,
|
|
25
25
|
logInterval: t?.logInterval || 10,
|
|
26
26
|
desiredLoss: t?.desiredLoss || 0.01,
|
|
27
27
|
maxSteps: t?.maxSteps || 1e3,
|
|
28
|
-
onStep: async (i) => {
|
|
29
|
-
const
|
|
30
|
-
for (const
|
|
31
|
-
await
|
|
28
|
+
onStep: async (i, a) => {
|
|
29
|
+
const l = this.listeners("log");
|
|
30
|
+
for (const h of l)
|
|
31
|
+
await h(i, {
|
|
32
|
+
...a,
|
|
33
|
+
progress: a.totalSamples / r,
|
|
34
|
+
remaining: Math.max(
|
|
35
|
+
0,
|
|
36
|
+
(r - a.totalSamples) / a.totalSamples * a.duration
|
|
37
|
+
)
|
|
38
|
+
});
|
|
32
39
|
}
|
|
33
40
|
},
|
|
34
|
-
|
|
41
|
+
n
|
|
35
42
|
), this.emit("stop");
|
|
36
43
|
}
|
|
37
44
|
}
|
|
38
45
|
export {
|
|
39
|
-
|
|
46
|
+
S as default
|
|
40
47
|
};
|
|
@@ -1,69 +1,77 @@
|
|
|
1
|
-
import { generateText as
|
|
2
|
-
import
|
|
3
|
-
import
|
|
1
|
+
import { generateText as v } from "../utilities/generate.js";
|
|
2
|
+
import L from "./Trainer.js";
|
|
3
|
+
import x from "./Evaluator.js";
|
|
4
4
|
import { a as h } from "../index-pWA4_lUh.js";
|
|
5
|
-
const
|
|
5
|
+
const D = {
|
|
6
6
|
desiredLoss: 0.01,
|
|
7
7
|
logInterval: 1,
|
|
8
8
|
maxSteps: 1e3
|
|
9
9
|
};
|
|
10
|
-
class
|
|
10
|
+
class E extends L {
|
|
11
11
|
constructor(r, i, o = 3e-4) {
|
|
12
12
|
super(r, i, o);
|
|
13
13
|
}
|
|
14
14
|
// Train for multiple epochs using Dataset API - FIXED memory leaks
|
|
15
15
|
async trainOnDataset(r, i, o) {
|
|
16
|
-
const { desiredLoss:
|
|
17
|
-
...
|
|
16
|
+
const { desiredLoss: u, logInterval: d, onStep: l, prompt: c, maxSteps: g } = {
|
|
17
|
+
...D,
|
|
18
18
|
...i
|
|
19
|
-
}, t = {
|
|
19
|
+
}, n = Date.now(), t = {
|
|
20
20
|
step: 0,
|
|
21
21
|
lastLoss: 1e6,
|
|
22
22
|
totalSteps: 0,
|
|
23
23
|
losses: [],
|
|
24
24
|
validationLosses: [],
|
|
25
|
+
logStartTime: n,
|
|
26
|
+
trainingDuration: 0,
|
|
25
27
|
...this.lastState || {}
|
|
26
28
|
};
|
|
27
|
-
this.lastState = t, this.dummyPass(), this.model.trainable = !0;
|
|
28
|
-
const
|
|
29
|
-
this.running = !0;
|
|
30
|
-
const c = o ? new g(this.model, o) : void 0, v = await r.iterator();
|
|
29
|
+
this.lastState = t, this.dummyPass(), this.model.trainable = !0, this.running = !0, t.logStartTime = n;
|
|
30
|
+
const m = o ? new x(this.model, o) : void 0, S = await r.iterator();
|
|
31
31
|
try {
|
|
32
|
-
for (; this.running && !(t.lastLoss <
|
|
33
|
-
const
|
|
34
|
-
if (
|
|
35
|
-
const p =
|
|
32
|
+
for (; this.running && !(t.lastLoss < u); ) {
|
|
33
|
+
const a = await S.next();
|
|
34
|
+
if (a.done) break;
|
|
35
|
+
const p = a.value, f = this.trainBatch(t, p), s = {
|
|
36
36
|
loss: t.lastLoss,
|
|
37
37
|
step: t.step,
|
|
38
|
-
time: Date.now() -
|
|
38
|
+
time: Date.now() - n,
|
|
39
39
|
batchSize: p.xs.shape[0]
|
|
40
40
|
};
|
|
41
|
-
if (this.model.log.push(
|
|
42
|
-
|
|
41
|
+
if (this.model.log.push(s), t.step % d === 0) {
|
|
42
|
+
await f;
|
|
43
|
+
const w = Date.now();
|
|
44
|
+
if (t.trainingDuration += w - t.logStartTime, m)
|
|
43
45
|
try {
|
|
44
|
-
const
|
|
45
|
-
t.validationLosses.push(
|
|
46
|
-
} catch (
|
|
47
|
-
console.error("Validation error:",
|
|
46
|
+
const e = await m.evaluate(5);
|
|
47
|
+
t.validationLosses.push(e), s.valLoss = e;
|
|
48
|
+
} catch (e) {
|
|
49
|
+
console.error("Validation error:", e);
|
|
48
50
|
}
|
|
49
|
-
if (
|
|
50
|
-
if (
|
|
51
|
-
const
|
|
51
|
+
if (l) {
|
|
52
|
+
if (c) {
|
|
53
|
+
const T = await v(this.tokenizer, this.model, c, 100, {
|
|
52
54
|
temperature: 0.8
|
|
53
55
|
});
|
|
54
|
-
|
|
56
|
+
s.example = T;
|
|
55
57
|
}
|
|
56
|
-
|
|
58
|
+
const e = {
|
|
59
|
+
duration: t.trainingDuration,
|
|
60
|
+
totalSamples: t.totalSteps * s.batchSize,
|
|
61
|
+
samplesPerSecond: t.totalSteps * s.batchSize / (t.trainingDuration / 1e3)
|
|
62
|
+
};
|
|
63
|
+
await l(s, e);
|
|
57
64
|
}
|
|
65
|
+
t.logStartTime = Date.now();
|
|
58
66
|
}
|
|
59
|
-
t.step >=
|
|
67
|
+
t.step >= g && this.stop();
|
|
60
68
|
}
|
|
61
|
-
} catch (
|
|
62
|
-
throw console.error("Training error:",
|
|
69
|
+
} catch (a) {
|
|
70
|
+
throw console.error("Training error:", a), h(), a;
|
|
63
71
|
}
|
|
64
72
|
return h(), this.running = !1, { losses: t.losses, validationLosses: t.validationLosses };
|
|
65
73
|
}
|
|
66
74
|
}
|
|
67
75
|
export {
|
|
68
|
-
|
|
76
|
+
E as default
|
|
69
77
|
};
|
|
@@ -14,19 +14,19 @@ class E extends v {
|
|
|
14
14
|
trainingPattern = [];
|
|
15
15
|
startPass = 0;
|
|
16
16
|
startLayer = 0;
|
|
17
|
-
constructor(
|
|
18
|
-
if (super(
|
|
19
|
-
const i =
|
|
17
|
+
constructor(a, r, e = 3e-4) {
|
|
18
|
+
if (super(a, r, e), this.trainingPattern = w[a.config.nLayer - 1] || [], a.log.length > 0) {
|
|
19
|
+
const i = a.log[a.log.length - 1];
|
|
20
20
|
i.pass !== void 0 && i.layer !== void 0 && (this.startPass = i.pass, this.startLayer = i.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
|
-
applyTrainingPattern(
|
|
24
|
-
const r =
|
|
23
|
+
applyTrainingPattern(a) {
|
|
24
|
+
const r = a < this.trainingPattern.length ? a : this.trainingPattern.length - 1, e = this.trainingPattern[r];
|
|
25
25
|
this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", r, e);
|
|
26
26
|
}
|
|
27
27
|
// Train for multiple epochs using Dataset API - FIXED memory leaks
|
|
28
|
-
async trainOnDataset(
|
|
29
|
-
const { desiredLoss: i, logInterval: L, stepsPerLayer:
|
|
28
|
+
async trainOnDataset(a, r, e) {
|
|
29
|
+
const { desiredLoss: i, logInterval: L, stepsPerLayer: d, onLayerChange: l, onPassComplete: p, onStep: h, prompt: c } = {
|
|
30
30
|
...x,
|
|
31
31
|
...r
|
|
32
32
|
}, t = {
|
|
@@ -37,50 +37,55 @@ class E extends v {
|
|
|
37
37
|
lastLoss: 1e6,
|
|
38
38
|
totalSteps: 0,
|
|
39
39
|
losses: [],
|
|
40
|
-
validationLosses: []
|
|
40
|
+
validationLosses: [],
|
|
41
|
+
trainingDuration: 0
|
|
41
42
|
};
|
|
42
43
|
this.dummyPass();
|
|
43
|
-
const
|
|
44
|
+
const S = Date.now();
|
|
44
45
|
this.startPass = 0, this.startLayer = 0;
|
|
45
|
-
const g = e ? new T(this.model, e) : void 0,
|
|
46
|
+
const g = e ? new T(this.model, e) : void 0, f = await a.iterator();
|
|
46
47
|
this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
|
|
47
48
|
try {
|
|
48
49
|
for (; !(t.lastLoss < i); ) {
|
|
49
|
-
const
|
|
50
|
-
if (
|
|
51
|
-
const y =
|
|
50
|
+
const o = await f.next();
|
|
51
|
+
if (o.done) break;
|
|
52
|
+
const y = o.value, P = this.trainBatch(t, y);
|
|
52
53
|
t.stepSinceLayerChange++;
|
|
53
|
-
const
|
|
54
|
+
const n = {
|
|
54
55
|
loss: t.lastLoss,
|
|
55
56
|
step: t.step,
|
|
56
|
-
time: Date.now() -
|
|
57
|
+
time: Date.now() - S,
|
|
57
58
|
batchSize: y.xs.shape[0],
|
|
58
59
|
pass: t.pass,
|
|
59
60
|
layer: t.layerStep % this.model.config.nLayer
|
|
60
61
|
};
|
|
61
|
-
if (this.model.log.push(
|
|
62
|
-
if (await
|
|
62
|
+
if (this.model.log.push(n), t.step % L === 0) {
|
|
63
|
+
if (await P, g)
|
|
63
64
|
try {
|
|
64
|
-
const
|
|
65
|
-
t.validationLosses.push(
|
|
66
|
-
} catch (
|
|
67
|
-
console.error("Validation error:",
|
|
65
|
+
const s = await g.evaluate(5);
|
|
66
|
+
t.validationLosses.push(s), n.valLoss = s;
|
|
67
|
+
} catch (s) {
|
|
68
|
+
console.error("Validation error:", s);
|
|
68
69
|
}
|
|
69
70
|
if (h) {
|
|
70
71
|
if (c) {
|
|
71
|
-
const
|
|
72
|
+
const s = await u(this.tokenizer, this.model, c, 100, {
|
|
72
73
|
temperature: 0.8,
|
|
73
74
|
topK: 10
|
|
74
75
|
});
|
|
75
|
-
|
|
76
|
+
n.example = s;
|
|
76
77
|
}
|
|
77
|
-
await h(
|
|
78
|
+
await h(n, {
|
|
79
|
+
duration: t.trainingDuration,
|
|
80
|
+
totalSamples: t.totalSteps * n.batchSize,
|
|
81
|
+
samplesPerSecond: t.totalSteps * n.batchSize / (t.trainingDuration / 1e3)
|
|
82
|
+
});
|
|
78
83
|
}
|
|
79
84
|
}
|
|
80
|
-
t.stepSinceLayerChange >=
|
|
85
|
+
t.stepSinceLayerChange >= d && (t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (l && await l(t.layerStep, t.pass), p && await p(t.pass), t.pass++) : l && await l(t.layerStep, t.pass), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length));
|
|
81
86
|
}
|
|
82
|
-
} catch (
|
|
83
|
-
throw console.error("Training error:",
|
|
87
|
+
} catch (o) {
|
|
88
|
+
throw console.error("Training error:", o), m(), o;
|
|
84
89
|
}
|
|
85
90
|
return m(), { losses: t.losses, validationLosses: t.validationLosses };
|
|
86
91
|
}
|
|
@@ -12,6 +12,11 @@ export interface TrainingState {
|
|
|
12
12
|
losses: number[];
|
|
13
13
|
validationLosses: number[];
|
|
14
14
|
}
|
|
15
|
+
export interface TrainingProgress {
|
|
16
|
+
duration: number;
|
|
17
|
+
totalSamples: number;
|
|
18
|
+
samplesPerSecond: number;
|
|
19
|
+
}
|
|
15
20
|
export interface AdamConfig {
|
|
16
21
|
learningRateFactor: number;
|
|
17
22
|
beta1: number;
|
|
@@ -23,7 +28,7 @@ export interface TrainingOptions {
|
|
|
23
28
|
logInterval: number;
|
|
24
29
|
prompt?: string;
|
|
25
30
|
maxSteps: number;
|
|
26
|
-
onStep?: (log: TrainingLogEntry) => Promise<void> | void;
|
|
31
|
+
onStep?: (log: TrainingLogEntry, progress: TrainingProgress) => Promise<void> | void;
|
|
27
32
|
}
|
|
28
33
|
export default abstract class GPTTrainer {
|
|
29
34
|
protected tokenizer: ITokeniser;
|