@genai-fi/nanogpt 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/NanoGPTModel.d.ts +0 -1
- package/dist/TeachableLLM.d.ts +10 -6
- package/dist/TeachableLLM.js +70 -32
- package/dist/Trainer.d.ts +0 -1
- package/dist/Trainer.js +11 -11
- package/dist/training/FullTrainer.js +38 -50
- package/dist/training/LayerTrainer.js +56 -77
- package/dist/training/Trainer.d.ts +0 -5
- package/dist/training/Trainer.js +1 -1
- package/dist/utilities/load.js +11 -10
- package/dist/utilities/waitForModel.d.ts +2 -0
- package/dist/utilities/waitForModel.js +12 -0
- package/package.json +2 -2
package/dist/NanoGPTModel.d.ts
CHANGED
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -6,17 +6,21 @@ import { default as Generator, IGenerateOptions } from './Generator';
|
|
|
6
6
|
import { default as Trainer, ITrainerOptions } from './Trainer';
|
|
7
7
|
import { default as EE } from 'eventemitter3';
|
|
8
8
|
type TeachableLLMStatus = 'warmup' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
|
|
9
|
-
export default class TeachableLLM extends EE<'status' | 'error'> {
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
|
|
10
|
+
private _config?;
|
|
11
|
+
private _model?;
|
|
12
12
|
readonly tf: typeof TF;
|
|
13
|
-
|
|
13
|
+
private _tokeniser?;
|
|
14
14
|
private _status;
|
|
15
|
-
constructor(tf: typeof TF, tokeniser
|
|
15
|
+
constructor(tf: typeof TF, tokeniser?: ITokeniser, model?: NanoGPT);
|
|
16
|
+
get config(): GPTConfig;
|
|
17
|
+
get model(): NanoGPT;
|
|
18
|
+
get tokeniser(): ITokeniser;
|
|
16
19
|
get status(): TeachableLLMStatus;
|
|
20
|
+
get ready(): boolean;
|
|
17
21
|
private setStatus;
|
|
18
22
|
saveModel(): Promise<Blob>;
|
|
19
|
-
static loadModel(tf: typeof TF, data: Blob | Buffer | string):
|
|
23
|
+
static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
|
|
20
24
|
static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
|
|
21
25
|
getNumParams(): number;
|
|
22
26
|
trainer(): Trainer;
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,58 +1,96 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { defaultConfig as
|
|
3
|
-
import { saveModel as
|
|
4
|
-
import { loadModel as
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import { E as
|
|
8
|
-
import { dummyPassAsync as
|
|
1
|
+
import m from "./NanoGPTModel.js";
|
|
2
|
+
import { defaultConfig as d } from "./config.js";
|
|
3
|
+
import { saveModel as l } from "./utilities/save.js";
|
|
4
|
+
import { loadModel as u } from "./utilities/load.js";
|
|
5
|
+
import _ from "./Generator.js";
|
|
6
|
+
import c from "./Trainer.js";
|
|
7
|
+
import { E as f } from "./index-SOhdqzHq.js";
|
|
8
|
+
import { dummyPassAsync as a } from "./utilities/dummy.js";
|
|
9
9
|
import g from "./tokeniser/CharTokeniser.js";
|
|
10
|
-
class n extends
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
class n extends f {
|
|
11
|
+
_config;
|
|
12
|
+
_model;
|
|
13
13
|
tf;
|
|
14
|
-
|
|
14
|
+
_tokeniser;
|
|
15
15
|
_status = "loading";
|
|
16
|
-
constructor(t,
|
|
17
|
-
super(), this.tf = t, this.
|
|
16
|
+
constructor(t, r, e) {
|
|
17
|
+
super(), this.tf = t, this._config = e?.config, this._tokeniser = r, this._model = e;
|
|
18
|
+
}
|
|
19
|
+
get config() {
|
|
20
|
+
if (!this._config)
|
|
21
|
+
throw new Error("Model configuration is not initialized.");
|
|
22
|
+
return this._config;
|
|
23
|
+
}
|
|
24
|
+
get model() {
|
|
25
|
+
if (!this._model)
|
|
26
|
+
throw new Error("Model is not initialized.");
|
|
27
|
+
return this._model;
|
|
28
|
+
}
|
|
29
|
+
get tokeniser() {
|
|
30
|
+
if (!this._tokeniser)
|
|
31
|
+
throw new Error("Tokeniser is not initialized.");
|
|
32
|
+
return this._tokeniser;
|
|
18
33
|
}
|
|
19
34
|
get status() {
|
|
20
35
|
return this._status;
|
|
21
36
|
}
|
|
37
|
+
get ready() {
|
|
38
|
+
return this._status === "ready" && !!this._model && !!this._tokeniser;
|
|
39
|
+
}
|
|
22
40
|
setStatus(t) {
|
|
23
41
|
this._status !== t && (this._status = t, this.emit("status", t));
|
|
24
42
|
}
|
|
25
43
|
saveModel() {
|
|
26
|
-
|
|
44
|
+
if (!this._model || !this._tokeniser)
|
|
45
|
+
throw new Error("Model or tokeniser is not initialized.");
|
|
46
|
+
return l(this._model, this._tokeniser);
|
|
27
47
|
}
|
|
28
|
-
static
|
|
29
|
-
const
|
|
30
|
-
return
|
|
31
|
-
s.setStatus("
|
|
48
|
+
static loadModel(t, r) {
|
|
49
|
+
const e = new n(t);
|
|
50
|
+
return u(t, r).then(({ model: i, tokeniser: s }) => {
|
|
51
|
+
e._model = i, e._tokeniser = s, e._config = i.config, e.setStatus("warmup"), a(i).then(() => {
|
|
52
|
+
e.setStatus("ready");
|
|
53
|
+
}).catch((o) => {
|
|
54
|
+
e.setStatus("error"), e.emit("error", o);
|
|
55
|
+
});
|
|
32
56
|
}).catch((i) => {
|
|
33
|
-
|
|
34
|
-
}),
|
|
57
|
+
e.setStatus("error"), e.emit("error", i);
|
|
58
|
+
}), e;
|
|
35
59
|
}
|
|
36
|
-
static create(t,
|
|
37
|
-
const
|
|
38
|
-
return
|
|
60
|
+
static create(t, r = {}) {
|
|
61
|
+
const e = { ...d, ...r }, i = new g(e.vocabSize), s = new m(t, e), o = new n(t, i, s);
|
|
62
|
+
return o.setStatus("warmup"), a(s).then(() => {
|
|
63
|
+
o.setStatus("ready");
|
|
64
|
+
}).catch((h) => {
|
|
65
|
+
o.setStatus("error"), o.emit("error", h);
|
|
66
|
+
}), o;
|
|
39
67
|
}
|
|
40
68
|
getNumParams() {
|
|
41
|
-
|
|
69
|
+
if (!this._model)
|
|
70
|
+
throw new Error("Model is not initialized.");
|
|
71
|
+
return this._model.getNumParams();
|
|
42
72
|
}
|
|
43
73
|
trainer() {
|
|
44
|
-
|
|
45
|
-
|
|
74
|
+
if (!this._model || !this._tokeniser)
|
|
75
|
+
throw new Error("Model or tokeniser is not initialized.");
|
|
76
|
+
const t = new c(this._model, this._tokeniser);
|
|
77
|
+
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
|
|
78
|
+
const e = this.listeners("trainStep");
|
|
79
|
+
for (const i of e)
|
|
80
|
+
await i(r);
|
|
81
|
+
}), t;
|
|
46
82
|
}
|
|
47
|
-
train(t,
|
|
48
|
-
return this.trainer().train(t,
|
|
83
|
+
train(t, r) {
|
|
84
|
+
return this.trainer().train(t, r);
|
|
49
85
|
}
|
|
50
86
|
generator() {
|
|
51
|
-
|
|
87
|
+
if (!this._model || !this._tokeniser)
|
|
88
|
+
throw new Error("Model or tokeniser is not initialized.");
|
|
89
|
+
const t = new _(this._model, this._tokeniser);
|
|
52
90
|
return t.on("start", () => this.setStatus("busy")), t.on("stop", () => this.setStatus("ready")), t;
|
|
53
91
|
}
|
|
54
|
-
generateText(t,
|
|
55
|
-
return this.generator().generate(t,
|
|
92
|
+
generateText(t, r) {
|
|
93
|
+
return this.generator().generate(t, r);
|
|
56
94
|
}
|
|
57
95
|
}
|
|
58
96
|
export {
|
package/dist/Trainer.d.ts
CHANGED
|
@@ -2,7 +2,6 @@ import { default as NanoGPT } from './NanoGPTModel';
|
|
|
2
2
|
import { ITokeniser } from './tokeniser/type';
|
|
3
3
|
import { default as EE } from 'eventemitter3';
|
|
4
4
|
export interface ITrainerOptions {
|
|
5
|
-
epochs?: number;
|
|
6
5
|
batchSize?: number;
|
|
7
6
|
learningRate?: number;
|
|
8
7
|
maxSteps?: number;
|
package/dist/Trainer.js
CHANGED
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
import { E as
|
|
2
|
-
import
|
|
3
|
-
class
|
|
1
|
+
import { E as l } from "./index-SOhdqzHq.js";
|
|
2
|
+
import o from "./training/FullTrainer.js";
|
|
3
|
+
class m extends l {
|
|
4
4
|
trainer;
|
|
5
5
|
constructor(a, t) {
|
|
6
|
-
super(), this.trainer = new
|
|
6
|
+
super(), this.trainer = new o(a.tf, a, t, 1e-3);
|
|
7
7
|
}
|
|
8
8
|
stop() {
|
|
9
9
|
}
|
|
10
10
|
async train(a, t) {
|
|
11
|
-
const { trainDataset:
|
|
11
|
+
const { trainDataset: r, validationDataset: e } = await this.trainer.createTrainValidationSplit(
|
|
12
12
|
a,
|
|
13
13
|
t?.batchSize || 32,
|
|
14
14
|
t?.validationSplit || 0.1
|
|
15
15
|
);
|
|
16
16
|
this.emit("start"), await this.trainer.trainOnDataset(
|
|
17
|
-
|
|
17
|
+
r,
|
|
18
18
|
{
|
|
19
|
-
epochs: t?.epochs || 2,
|
|
20
19
|
prompt: t?.prompt,
|
|
21
|
-
stepsPerEpoch: t?.maxSteps || 100,
|
|
22
20
|
logInterval: t?.logInterval || 10,
|
|
23
21
|
desiredLoss: t?.desiredLoss || 0.01,
|
|
24
22
|
onStep: async (i) => {
|
|
25
|
-
this.
|
|
23
|
+
const s = this.listeners("log");
|
|
24
|
+
for (const n of s)
|
|
25
|
+
await n(i);
|
|
26
26
|
}
|
|
27
27
|
},
|
|
28
|
-
|
|
28
|
+
e
|
|
29
29
|
), this.emit("stop");
|
|
30
30
|
}
|
|
31
31
|
}
|
|
32
32
|
export {
|
|
33
|
-
|
|
33
|
+
m as default
|
|
34
34
|
};
|
|
@@ -1,77 +1,65 @@
|
|
|
1
|
-
import { generateText as
|
|
2
|
-
import
|
|
3
|
-
const
|
|
4
|
-
epochs: 1,
|
|
5
|
-
stepsPerEpoch: 1e6,
|
|
1
|
+
import { generateText as L } from "../utilities/generate.js";
|
|
2
|
+
import f from "./Trainer.js";
|
|
3
|
+
const w = {
|
|
6
4
|
desiredLoss: 0.01,
|
|
7
5
|
logInterval: 1
|
|
8
6
|
};
|
|
9
|
-
class
|
|
10
|
-
constructor(
|
|
11
|
-
super(
|
|
7
|
+
class g extends f {
|
|
8
|
+
constructor(r, i, o, n = 3e-4) {
|
|
9
|
+
super(r, i, o, n);
|
|
12
10
|
}
|
|
13
11
|
// Train for multiple epochs using Dataset API - FIXED memory leaks
|
|
14
|
-
async trainOnDataset(
|
|
15
|
-
const {
|
|
16
|
-
...
|
|
17
|
-
...
|
|
12
|
+
async trainOnDataset(r, i, o) {
|
|
13
|
+
const { desiredLoss: n, logInterval: h, onStep: l, prompt: c } = {
|
|
14
|
+
...w,
|
|
15
|
+
...i
|
|
18
16
|
}, s = {
|
|
19
|
-
epoch: 0,
|
|
20
17
|
pass: 0,
|
|
21
18
|
depth: 1,
|
|
22
19
|
step: 0,
|
|
23
20
|
stepSinceDepthChange: 0,
|
|
24
21
|
lastLoss: 1e6,
|
|
25
|
-
epochLoss: 0,
|
|
26
22
|
totalSteps: 0,
|
|
27
23
|
losses: [],
|
|
28
24
|
validationLosses: []
|
|
29
25
|
};
|
|
30
26
|
this.dummyPass(), this.model.trainable = !0;
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
27
|
+
const d = Date.now(), m = await r.iterator();
|
|
28
|
+
try {
|
|
29
|
+
for (; !(s.lastLoss < n); ) {
|
|
30
|
+
const e = await m.next();
|
|
31
|
+
if (e.done) break;
|
|
32
|
+
const p = e.value, u = this.trainBatch(s, p), a = {
|
|
33
|
+
loss: s.lastLoss,
|
|
34
|
+
step: s.step,
|
|
35
|
+
time: Date.now() - d,
|
|
36
|
+
batchSize: p.xs.shape[0]
|
|
37
|
+
};
|
|
38
|
+
if (this.model.log.push(a), s.step % h === 0) {
|
|
39
|
+
if (await u, o)
|
|
40
|
+
try {
|
|
41
|
+
const t = await this.evaluateOnDataset(o, 5);
|
|
42
|
+
s.validationLosses.push(t), a.valLoss = t;
|
|
43
|
+
} catch (t) {
|
|
44
|
+
console.error("Validation error:", t);
|
|
45
|
+
}
|
|
46
|
+
if (l) {
|
|
47
|
+
if (c) {
|
|
48
|
+
const t = await L(this.tokenizer, this.model, c, 100, {
|
|
49
49
|
temperature: 0.8
|
|
50
50
|
});
|
|
51
|
-
|
|
51
|
+
a.example = t;
|
|
52
52
|
}
|
|
53
|
-
await
|
|
53
|
+
await l(a);
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
|
-
} catch (e) {
|
|
57
|
-
throw console.error("Training error:", e), this.tf.dispose(), e;
|
|
58
56
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
try {
|
|
62
|
-
const e = await this.evaluateOnDataset(t, 5);
|
|
63
|
-
s.validationLosses.push(e), o && await o(s.epoch, d, e);
|
|
64
|
-
} catch (e) {
|
|
65
|
-
console.error("Validation error:", e);
|
|
66
|
-
}
|
|
67
|
-
else
|
|
68
|
-
o && o(s.epoch, d);
|
|
69
|
-
if (this.tf.dispose(), s.lastLoss < c)
|
|
70
|
-
break;
|
|
57
|
+
} catch (e) {
|
|
58
|
+
throw console.error("Training error:", e), this.tf.dispose(), e;
|
|
71
59
|
}
|
|
72
|
-
return { losses: s.losses, validationLosses: s.validationLosses };
|
|
60
|
+
return this.tf.dispose(), { losses: s.losses, validationLosses: s.validationLosses };
|
|
73
61
|
}
|
|
74
62
|
}
|
|
75
63
|
export {
|
|
76
|
-
|
|
64
|
+
g as default
|
|
77
65
|
};
|
|
@@ -1,111 +1,90 @@
|
|
|
1
|
-
import { generateText as
|
|
2
|
-
import
|
|
3
|
-
import { schedule as
|
|
4
|
-
const
|
|
5
|
-
epochs: 1,
|
|
6
|
-
stepsPerEpoch: 1e6,
|
|
1
|
+
import { generateText as d } from "../utilities/generate.js";
|
|
2
|
+
import S from "./Trainer.js";
|
|
3
|
+
import { schedule as u } from "./lwSchedule.js";
|
|
4
|
+
const w = {
|
|
7
5
|
desiredLoss: 0.01,
|
|
8
6
|
logInterval: 1,
|
|
9
7
|
stepsPerLayer: 400,
|
|
10
8
|
maxPasses: 3
|
|
11
9
|
};
|
|
12
|
-
class
|
|
10
|
+
class b extends S {
|
|
13
11
|
trainingPattern = [];
|
|
14
12
|
startPass = 0;
|
|
15
13
|
startLayer = 0;
|
|
16
|
-
constructor(
|
|
17
|
-
if (super(
|
|
18
|
-
const
|
|
19
|
-
|
|
14
|
+
constructor(r, a, e, p = 3e-4) {
|
|
15
|
+
if (super(r, a, e, p), this.trainingPattern = u[a.config.nLayer - 1] || [], a.log.length > 0) {
|
|
16
|
+
const i = a.log[a.log.length - 1];
|
|
17
|
+
i.pass !== void 0 && i.layer !== void 0 && (this.startPass = i.pass, this.startLayer = i.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
|
|
20
18
|
}
|
|
21
19
|
}
|
|
22
|
-
applyTrainingPattern(
|
|
23
|
-
const
|
|
24
|
-
this.model.setSkipMask(
|
|
20
|
+
applyTrainingPattern(r) {
|
|
21
|
+
const a = r < this.trainingPattern.length ? r : this.trainingPattern.length - 1, e = this.trainingPattern[a];
|
|
22
|
+
this.model.setSkipMask(e.skip), this.model.setTrainableMask(e.trainable), this.resetOptimizer(e.adam), console.log("Applied training pattern:", a, e);
|
|
25
23
|
}
|
|
26
24
|
// Train for multiple epochs using Dataset API - FIXED memory leaks
|
|
27
|
-
async trainOnDataset(
|
|
28
|
-
const {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
logInterval: P,
|
|
33
|
-
stepsPerLayer: d,
|
|
34
|
-
onLayerChange: n,
|
|
35
|
-
onPassComplete: g,
|
|
36
|
-
onStep: y,
|
|
37
|
-
onEpoch: p,
|
|
38
|
-
prompt: L
|
|
39
|
-
} = {
|
|
40
|
-
...x,
|
|
41
|
-
...e
|
|
42
|
-
}, s = {
|
|
43
|
-
epoch: 0,
|
|
25
|
+
async trainOnDataset(r, a, e) {
|
|
26
|
+
const { desiredLoss: p, logInterval: i, stepsPerLayer: L, onLayerChange: l, onPassComplete: h, onStep: c, prompt: g } = {
|
|
27
|
+
...w,
|
|
28
|
+
...a
|
|
29
|
+
}, t = {
|
|
44
30
|
pass: 0,
|
|
45
31
|
layerStep: 0,
|
|
46
32
|
step: 0,
|
|
47
33
|
stepSinceLayerChange: 0,
|
|
48
34
|
lastLoss: 1e6,
|
|
49
|
-
epochLoss: 0,
|
|
50
35
|
totalSteps: 0,
|
|
51
36
|
losses: [],
|
|
52
37
|
validationLosses: []
|
|
53
38
|
};
|
|
54
39
|
this.dummyPass();
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
40
|
+
const f = Date.now();
|
|
41
|
+
this.startPass = 0, this.startLayer = 0;
|
|
42
|
+
const m = await r.iterator();
|
|
43
|
+
this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
|
|
44
|
+
try {
|
|
45
|
+
for (; !(t.lastLoss < p); ) {
|
|
46
|
+
const n = await m.next();
|
|
47
|
+
if (n.done) break;
|
|
48
|
+
const y = n.value, P = this.trainBatch(t, y);
|
|
49
|
+
t.stepSinceLayerChange++;
|
|
50
|
+
const o = {
|
|
51
|
+
loss: t.lastLoss,
|
|
52
|
+
step: t.step,
|
|
53
|
+
time: Date.now() - f,
|
|
54
|
+
batchSize: y.xs.shape[0],
|
|
55
|
+
pass: t.pass,
|
|
56
|
+
layer: t.layerStep % this.model.config.nLayer
|
|
57
|
+
};
|
|
58
|
+
if (this.model.log.push(o), t.step % i === 0) {
|
|
59
|
+
if (await P, e)
|
|
60
|
+
try {
|
|
61
|
+
const s = await this.evaluateOnDataset(e, 5);
|
|
62
|
+
t.validationLosses.push(s), o.valLoss = s;
|
|
63
|
+
} catch (s) {
|
|
64
|
+
console.error("Validation error:", s);
|
|
65
|
+
}
|
|
66
|
+
if (c) {
|
|
67
|
+
if (g) {
|
|
68
|
+
const s = await d(this.tokenizer, this.model, g, 100, {
|
|
78
69
|
temperature: 0.8,
|
|
79
70
|
topK: 10
|
|
80
71
|
});
|
|
81
|
-
|
|
72
|
+
o.example = s;
|
|
82
73
|
}
|
|
83
|
-
await
|
|
84
|
-
}
|
|
85
|
-
if (s.stepSinceLayerChange >= d) {
|
|
86
|
-
let i;
|
|
87
|
-
t && (i = await this.evaluateOnDataset(t, 5), s.validationLosses.push(i), l.valLoss = i), s.layerStep++, s.layerStep % this.model.config.nLayer === 0 ? (n && await n(s.layerStep, s.pass, i), g && await g(s.pass), s.pass++) : n && await n(s.layerStep, s.pass, i), s.stepSinceLayerChange = 0, this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
|
|
74
|
+
await c(o);
|
|
88
75
|
}
|
|
89
76
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const f = s.epochLoss / s.step;
|
|
94
|
-
if (t)
|
|
95
|
-
try {
|
|
96
|
-
const a = await this.evaluateOnDataset(t, 5);
|
|
97
|
-
s.validationLosses.push(a), p && await p(s.epoch, f, a);
|
|
98
|
-
} catch (a) {
|
|
99
|
-
console.error("Validation error:", a);
|
|
77
|
+
if (t.stepSinceLayerChange >= L) {
|
|
78
|
+
let s;
|
|
79
|
+
e && (s = await this.evaluateOnDataset(e, 5), t.validationLosses.push(s), o.valLoss = s), t.layerStep++, t.layerStep % this.model.config.nLayer === 0 ? (l && await l(t.layerStep, t.pass, s), h && await h(t.pass), t.pass++) : l && await l(t.layerStep, t.pass, s), t.stepSinceLayerChange = 0, this.applyTrainingPattern(t.layerStep % this.trainingPattern.length);
|
|
100
80
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
break;
|
|
81
|
+
}
|
|
82
|
+
} catch (n) {
|
|
83
|
+
throw console.error("Training error:", n), this.tf.dispose(), n;
|
|
105
84
|
}
|
|
106
|
-
return { losses:
|
|
85
|
+
return this.tf.dispose(), { losses: t.losses, validationLosses: t.validationLosses };
|
|
107
86
|
}
|
|
108
87
|
}
|
|
109
88
|
export {
|
|
110
|
-
|
|
89
|
+
b as default
|
|
111
90
|
};
|
|
@@ -4,10 +4,8 @@ import { default as NanoGPT, TrainingLogEntry } from '../NanoGPTModel';
|
|
|
4
4
|
import { default as TF } from '@tensorflow/tfjs';
|
|
5
5
|
import { default as AdamExt } from './AdamExt';
|
|
6
6
|
export interface TrainingState {
|
|
7
|
-
epoch: number;
|
|
8
7
|
step: number;
|
|
9
8
|
lastLoss: number;
|
|
10
|
-
epochLoss: number;
|
|
11
9
|
totalSteps: number;
|
|
12
10
|
losses: number[];
|
|
13
11
|
validationLosses: number[];
|
|
@@ -19,12 +17,9 @@ export interface AdamConfig {
|
|
|
19
17
|
epsilon: number;
|
|
20
18
|
}
|
|
21
19
|
export interface TrainingOptions {
|
|
22
|
-
epochs: number;
|
|
23
|
-
stepsPerEpoch: number;
|
|
24
20
|
desiredLoss: number;
|
|
25
21
|
logInterval: number;
|
|
26
22
|
prompt?: string;
|
|
27
|
-
onEpoch?: (e: number, loss: number, valLoss?: number) => Promise<void> | void;
|
|
28
23
|
onStep?: (log: TrainingLogEntry) => Promise<void> | void;
|
|
29
24
|
}
|
|
30
25
|
export default abstract class GPTTrainer {
|
package/dist/training/Trainer.js
CHANGED
|
@@ -57,7 +57,7 @@ class y {
|
|
|
57
57
|
async trainBatch(t, e) {
|
|
58
58
|
try {
|
|
59
59
|
const s = this.trainStep(e, !1, !1);
|
|
60
|
-
return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss),
|
|
60
|
+
return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), s.dispose(), t.lastLoss));
|
|
61
61
|
} catch (s) {
|
|
62
62
|
throw console.error(`Error processing batch at step ${t.step}:`, s), this.tf.dispose(), s;
|
|
63
63
|
}
|
package/dist/utilities/load.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { z as
|
|
2
|
-
import { importWeights as
|
|
1
|
+
import { z as u } from "../jszip.min-BLbRbbKt.js";
|
|
2
|
+
import { importWeights as F } from "./weights.js";
|
|
3
3
|
import k from "../tokeniser/CharTokeniser.js";
|
|
4
4
|
import j from "../NanoGPTModel.js";
|
|
5
5
|
import { dummyPassAsync as z } from "./dummy.js";
|
|
@@ -10,7 +10,7 @@ async function A(o) {
|
|
|
10
10
|
return e.arrayBuffer();
|
|
11
11
|
}
|
|
12
12
|
async function T(o, e) {
|
|
13
|
-
const m = typeof e == "string" ? await A(e) : e, n = await
|
|
13
|
+
const m = typeof e == "string" ? await A(e) : e, n = await u.loadAsync(m), s = /* @__PURE__ */ new Map(), c = await n.file("manifest.json")?.async("string");
|
|
14
14
|
if (!c)
|
|
15
15
|
throw new Error("Manifest file not found in the zip archive");
|
|
16
16
|
const f = JSON.parse(c);
|
|
@@ -19,20 +19,21 @@ async function T(o, e) {
|
|
|
19
19
|
const l = await n.file("tokeniser.json")?.async("string");
|
|
20
20
|
if (!l)
|
|
21
21
|
throw new Error("Tokeniser file not found in the zip archive");
|
|
22
|
-
const g = JSON.parse(l), y = new k(g.vocab),
|
|
22
|
+
const g = JSON.parse(l), y = new k(g.vocab), p = /* @__PURE__ */ new Map();
|
|
23
23
|
for (const t of Object.keys(n.files))
|
|
24
24
|
if (t.endsWith(".bin")) {
|
|
25
25
|
const r = t.replace(".bin", ""), h = await n.file(t).async("arraybuffer"), d = new Float32Array(h), i = s.get(r) || { spec: [], data: new Float32Array() };
|
|
26
26
|
i.data = d, s.set(r, i);
|
|
27
|
-
const
|
|
28
|
-
|
|
27
|
+
const b = await F(i, o);
|
|
28
|
+
p.set(r, b);
|
|
29
29
|
}
|
|
30
|
+
o.disposeVariables();
|
|
30
31
|
const a = new j(o, f.config);
|
|
31
|
-
await z(a), a.loadWeights(
|
|
32
|
-
const
|
|
33
|
-
if (
|
|
32
|
+
await z(a), a.loadWeights(p);
|
|
33
|
+
const w = await n.file("log.json")?.async("string");
|
|
34
|
+
if (w)
|
|
34
35
|
try {
|
|
35
|
-
const t = JSON.parse(
|
|
36
|
+
const t = JSON.parse(w);
|
|
36
37
|
a.log = t;
|
|
37
38
|
} catch (t) {
|
|
38
39
|
throw console.error("Error parsing training log:", t), new Error(`Failed to parse training log: ${t}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@genai-fi/nanogpt",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "dist/main.js",
|
|
6
6
|
"types": "dist/main.d.ts",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"test": "vitest",
|
|
24
24
|
"ci:test": "vitest --coverage --reporter=junit --outputFile=junit.xml",
|
|
25
25
|
"coverage": "vitest run --coverage",
|
|
26
|
-
"train": "tsx scripts/train.ts --
|
|
26
|
+
"train": "tsx scripts/train.ts --batch 64",
|
|
27
27
|
"generate": "tsx scripts/generate.ts",
|
|
28
28
|
"evaluate": "tsx scripts/evaluate.ts",
|
|
29
29
|
"debug": "tsx scripts/debug.ts"
|