@genai-fi/nanogpt 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +20 -0
- package/dist/Generator.d.ts +14 -0
- package/dist/Generator.js +39 -0
- package/dist/NanoGPTModel.d.ts +35 -0
- package/dist/NanoGPTModel.js +129 -0
- package/dist/TeachableLLM.d.ts +21 -0
- package/dist/TeachableLLM.js +47 -0
- package/dist/Trainer.d.ts +19 -0
- package/dist/Trainer.js +34 -0
- package/dist/_commonjsHelpers-DaMA6jEr.js +8 -0
- package/dist/assets/worker-BYeSPNkq.js +1 -0
- package/dist/config.d.ts +11 -0
- package/dist/config.js +19 -0
- package/dist/index-B8nyc6IR.js +3899 -0
- package/dist/index-SOhdqzHq.js +113 -0
- package/dist/jszip.min-BLbRbbKt.js +2324 -0
- package/dist/layers/CausalSelfAttention.d.ts +22 -0
- package/dist/layers/CausalSelfAttention.js +75 -0
- package/dist/layers/LayerNorm.d.ts +12 -0
- package/dist/layers/LayerNorm.js +30 -0
- package/dist/layers/MLP.d.ts +17 -0
- package/dist/layers/MLP.js +57 -0
- package/dist/layers/TiedEmbedding.d.ts +22 -0
- package/dist/layers/TiedEmbedding.js +532 -0
- package/dist/layers/TransformerBlock.d.ts +19 -0
- package/dist/layers/TransformerBlock.js +47 -0
- package/dist/main.d.ts +6 -0
- package/dist/main.js +8 -0
- package/dist/tokeniser/CharTokeniser.d.ts +20 -0
- package/dist/tokeniser/CharTokeniser.js +52 -0
- package/dist/tokeniser/NodeTokeniser.d.ts +19 -0
- package/dist/tokeniser/NodeTokeniser.js +46 -0
- package/dist/tokeniser/WebTokeniser.d.ts +18 -0
- package/dist/tokeniser/WebTokeniser.js +96 -0
- package/dist/tokeniser/bpe.d.ts +14 -0
- package/dist/tokeniser/bpe.js +102 -0
- package/dist/tokeniser/messages.d.ts +61 -0
- package/dist/tokeniser/messages.js +1 -0
- package/dist/tokeniser/type.d.ts +14 -0
- package/dist/tokeniser/type.js +1 -0
- package/dist/tokeniser/worker.d.ts +1 -0
- package/dist/tokeniser/worker.js +53 -0
- package/dist/training/AdamExt.d.ts +23 -0
- package/dist/training/AdamExt.js +43 -0
- package/dist/training/DatasetBuilder.d.ts +12 -0
- package/dist/training/DatasetBuilder.js +27 -0
- package/dist/training/FullTrainer.d.ts +17 -0
- package/dist/training/FullTrainer.js +75 -0
- package/dist/training/LayerTrainer.d.ts +28 -0
- package/dist/training/LayerTrainer.js +108 -0
- package/dist/training/Trainer.d.ts +73 -0
- package/dist/training/Trainer.js +87 -0
- package/dist/training/lwSchedule.d.ts +7 -0
- package/dist/training/lwSchedule.js +162 -0
- package/dist/utilities/generate.d.ts +3 -0
- package/dist/utilities/generate.js +22 -0
- package/dist/utilities/load.d.ts +7 -0
- package/dist/utilities/load.js +47 -0
- package/dist/utilities/save.d.ts +3 -0
- package/dist/utilities/save.js +21 -0
- package/dist/utilities/textLoader.d.ts +1 -0
- package/dist/utilities/textLoader.js +438 -0
- package/dist/utilities/tokenParse.d.ts +1 -0
- package/dist/utilities/tokenParse.js +66 -0
- package/dist/utilities/weights.d.ts +12 -0
- package/dist/utilities/weights.js +43 -0
- package/package.json +59 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { generateText as v } from "../utilities/generate.js";
|
|
2
|
+
import T from "./Trainer.js";
|
|
3
|
+
import { schedule as k } from "./lwSchedule.js";
|
|
4
|
+
const x = {
|
|
5
|
+
epochs: 1,
|
|
6
|
+
stepsPerEpoch: 1e6,
|
|
7
|
+
desiredLoss: 0.01,
|
|
8
|
+
logInterval: 1,
|
|
9
|
+
stepsPerLayer: 400,
|
|
10
|
+
maxPasses: 3
|
|
11
|
+
};
|
|
12
|
+
class D extends T {
|
|
13
|
+
trainingPattern = [];
|
|
14
|
+
startPass = 0;
|
|
15
|
+
startLayer = 0;
|
|
16
|
+
constructor(o, e, t, h = 3e-4) {
|
|
17
|
+
if (super(o, e, t, h), this.trainingPattern = k[e.config.nLayer - 1] || [], e.log.length > 0) {
|
|
18
|
+
const r = e.log[e.log.length - 1];
|
|
19
|
+
r.pass !== void 0 && r.layer !== void 0 && (this.startPass = r.pass, this.startLayer = r.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
applyTrainingPattern(o) {
|
|
23
|
+
const e = o < this.trainingPattern.length ? o : this.trainingPattern.length - 1, t = this.trainingPattern[e];
|
|
24
|
+
this.model.setSkipMask(t.skip), this.model.setTrainableMask(t.trainable), this.resetOptimizer(t.adam), console.log("Applied training pattern:", e, t);
|
|
25
|
+
}
|
|
26
|
+
// Train for multiple epochs using Dataset API - FIXED memory leaks
|
|
27
|
+
async trainOnDataset(o, e, t) {
|
|
28
|
+
const {
|
|
29
|
+
epochs: h,
|
|
30
|
+
stepsPerEpoch: r,
|
|
31
|
+
desiredLoss: c,
|
|
32
|
+
logInterval: m,
|
|
33
|
+
stepsPerLayer: d,
|
|
34
|
+
onLayerChange: n,
|
|
35
|
+
onPassComplete: g,
|
|
36
|
+
onStep: y,
|
|
37
|
+
onEpoch: p,
|
|
38
|
+
prompt: L
|
|
39
|
+
} = {
|
|
40
|
+
...x,
|
|
41
|
+
...e
|
|
42
|
+
}, s = {
|
|
43
|
+
epoch: 0,
|
|
44
|
+
pass: 0,
|
|
45
|
+
layerStep: 0,
|
|
46
|
+
step: 0,
|
|
47
|
+
stepSinceLayerChange: 0,
|
|
48
|
+
lastLoss: 1e6,
|
|
49
|
+
epochLoss: 0,
|
|
50
|
+
totalSteps: 0,
|
|
51
|
+
losses: [],
|
|
52
|
+
validationLosses: []
|
|
53
|
+
};
|
|
54
|
+
this.dummyPass();
|
|
55
|
+
const S = Date.now();
|
|
56
|
+
for (s.epoch = 0; s.epoch < h; s.epoch++) {
|
|
57
|
+
s.step = 0, s.epochLoss = 0, s.pass = this.startPass, s.layerStep = this.startLayer + this.startPass * this.model.config.nLayer, s.stepSinceLayerChange = 0, this.startPass = 0, this.startLayer = 0;
|
|
58
|
+
const u = await o.iterator();
|
|
59
|
+
this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
|
|
60
|
+
try {
|
|
61
|
+
for (; !(r && s.step >= r || s.lastLoss < c); ) {
|
|
62
|
+
const a = await u.next();
|
|
63
|
+
if (a.done) break;
|
|
64
|
+
const P = a.value, w = this.trainBatch(s, P);
|
|
65
|
+
s.stepSinceLayerChange++;
|
|
66
|
+
const l = {
|
|
67
|
+
epoch: s.epoch,
|
|
68
|
+
loss: s.lastLoss,
|
|
69
|
+
step: s.step,
|
|
70
|
+
time: Date.now() - S,
|
|
71
|
+
batchSize: P.xs.shape[0],
|
|
72
|
+
pass: s.pass,
|
|
73
|
+
layer: s.layerStep % this.model.config.nLayer
|
|
74
|
+
};
|
|
75
|
+
if (this.model.log.push(l), s.step % m === 0 && (await w, y)) {
|
|
76
|
+
if (L) {
|
|
77
|
+
const i = await v(this.tokenizer, this.model, L, 100, 0.8, 10);
|
|
78
|
+
l.example = i;
|
|
79
|
+
}
|
|
80
|
+
await y(l);
|
|
81
|
+
}
|
|
82
|
+
if (s.stepSinceLayerChange >= d) {
|
|
83
|
+
let i;
|
|
84
|
+
t && (i = await this.evaluateOnDataset(t, 5), s.validationLosses.push(i), l.valLoss = i), s.layerStep++, s.layerStep % this.model.config.nLayer === 0 ? (n && await n(s.layerStep, s.pass, i), g && await g(s.pass), s.pass++) : n && await n(s.layerStep, s.pass, i), s.stepSinceLayerChange = 0, this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
} catch (a) {
|
|
88
|
+
throw console.error("Training error:", a), this.tf.dispose(), a;
|
|
89
|
+
}
|
|
90
|
+
const f = s.epochLoss / s.step;
|
|
91
|
+
if (t)
|
|
92
|
+
try {
|
|
93
|
+
const a = await this.evaluateOnDataset(t, 5);
|
|
94
|
+
s.validationLosses.push(a), p && await p(s.epoch, f, a);
|
|
95
|
+
} catch (a) {
|
|
96
|
+
console.error("Validation error:", a);
|
|
97
|
+
}
|
|
98
|
+
else
|
|
99
|
+
p && p(s.epoch, f);
|
|
100
|
+
if (this.tf.dispose(), s.lastLoss < c)
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
return { losses: s.losses, validationLosses: s.validationLosses };
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
export {
|
|
107
|
+
D as default
|
|
108
|
+
};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { ITokeniser } from '../tokeniser/type';
|
|
2
|
+
import { DatasetBuilder } from './DatasetBuilder';
|
|
3
|
+
import { default as NanoGPT, TrainingLogEntry } from '../NanoGPTModel';
|
|
4
|
+
import { default as TF } from '@tensorflow/tfjs';
|
|
5
|
+
import { default as AdamExt } from './AdamExt';
|
|
6
|
+
export interface TrainingState {
|
|
7
|
+
epoch: number;
|
|
8
|
+
step: number;
|
|
9
|
+
lastLoss: number;
|
|
10
|
+
epochLoss: number;
|
|
11
|
+
totalSteps: number;
|
|
12
|
+
losses: number[];
|
|
13
|
+
validationLosses: number[];
|
|
14
|
+
}
|
|
15
|
+
export interface AdamConfig {
|
|
16
|
+
learningRateFactor: number;
|
|
17
|
+
beta1: number;
|
|
18
|
+
beta2: number;
|
|
19
|
+
epsilon: number;
|
|
20
|
+
}
|
|
21
|
+
export interface TrainingOptions {
|
|
22
|
+
epochs: number;
|
|
23
|
+
stepsPerEpoch: number;
|
|
24
|
+
desiredLoss: number;
|
|
25
|
+
logInterval: number;
|
|
26
|
+
prompt?: string;
|
|
27
|
+
onEpoch?: (e: number, loss: number, valLoss?: number) => Promise<void> | void;
|
|
28
|
+
onStep?: (log: TrainingLogEntry) => Promise<void> | void;
|
|
29
|
+
}
|
|
30
|
+
export default abstract class GPTTrainer {
|
|
31
|
+
protected tokenizer: ITokeniser;
|
|
32
|
+
protected model: NanoGPT;
|
|
33
|
+
protected optimizer: AdamExt;
|
|
34
|
+
protected datasetBuilder: DatasetBuilder;
|
|
35
|
+
protected tf: typeof TF;
|
|
36
|
+
protected learningRate: number;
|
|
37
|
+
constructor(tf: typeof TF, model: NanoGPT, tokenizer: ITokeniser, learningRate?: number);
|
|
38
|
+
getOptimizer(): AdamExt;
|
|
39
|
+
resetOptimizer(config?: AdamConfig): void;
|
|
40
|
+
private printGradients;
|
|
41
|
+
protected trainStep(batch: {
|
|
42
|
+
xs: TF.Tensor;
|
|
43
|
+
ys: TF.Tensor;
|
|
44
|
+
}, dummy?: boolean, print?: boolean): TF.Scalar;
|
|
45
|
+
protected dummyPass(): void;
|
|
46
|
+
protected trainBatch(state: TrainingState, batch: {
|
|
47
|
+
xs: TF.Tensor;
|
|
48
|
+
ys: TF.Tensor;
|
|
49
|
+
}): Promise<number>;
|
|
50
|
+
abstract trainOnDataset(dataset: TF.data.Dataset<{
|
|
51
|
+
xs: TF.Tensor;
|
|
52
|
+
ys: TF.Tensor;
|
|
53
|
+
}>, options: Partial<TrainingOptions>, validationDataset?: TF.data.Dataset<{
|
|
54
|
+
xs: TF.Tensor;
|
|
55
|
+
ys: TF.Tensor;
|
|
56
|
+
}>): Promise<{
|
|
57
|
+
losses: number[];
|
|
58
|
+
validationLosses: number[];
|
|
59
|
+
}>;
|
|
60
|
+
evaluateOnDataset(dataset: TF.data.Dataset<TF.TensorContainer>, maxBatches?: number): Promise<number>;
|
|
61
|
+
createTrainValidationSplit(textData: string[], batchSize?: number, validationSplit?: number): Promise<{
|
|
62
|
+
trainDataset: TF.data.Dataset<{
|
|
63
|
+
xs: TF.Tensor;
|
|
64
|
+
ys: TF.Tensor;
|
|
65
|
+
}>;
|
|
66
|
+
validationDataset: TF.data.Dataset<{
|
|
67
|
+
xs: TF.Tensor;
|
|
68
|
+
ys: TF.Tensor;
|
|
69
|
+
}>;
|
|
70
|
+
}>;
|
|
71
|
+
createDataset(textData: string[], batchSize?: number): Promise<TF.data.Dataset<TF.TensorContainer>>;
|
|
72
|
+
dispose(): void;
|
|
73
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { DatasetBuilder as h } from "./DatasetBuilder.js";
|
|
2
|
+
import p from "./AdamExt.js";
|
|
3
|
+
class y {
|
|
4
|
+
constructor(t, e, s, a = 1e-3) {
|
|
5
|
+
this.tokenizer = s, this.tf = t, this.model = e, this.learningRate = a, this.resetOptimizer(), this.datasetBuilder = new h(this.tf, s, e.config.blockSize);
|
|
6
|
+
}
|
|
7
|
+
model;
|
|
8
|
+
optimizer;
|
|
9
|
+
datasetBuilder;
|
|
10
|
+
tf;
|
|
11
|
+
learningRate;
|
|
12
|
+
getOptimizer() {
|
|
13
|
+
return this.optimizer;
|
|
14
|
+
}
|
|
15
|
+
resetOptimizer(t = { learningRateFactor: 1, beta1: 0.9, beta2: 0.99, epsilon: 1e-8 }) {
|
|
16
|
+
this.optimizer && this.optimizer.dispose();
|
|
17
|
+
const e = new p(
|
|
18
|
+
t.learningRateFactor * this.learningRate,
|
|
19
|
+
t.beta1,
|
|
20
|
+
t.beta2,
|
|
21
|
+
t.epsilon,
|
|
22
|
+
{
|
|
23
|
+
warmupSteps: 100,
|
|
24
|
+
decaySteps: 2e4,
|
|
25
|
+
minLearningRate: 1e-4,
|
|
26
|
+
weightDecay: 0
|
|
27
|
+
}
|
|
28
|
+
);
|
|
29
|
+
this.optimizer = e;
|
|
30
|
+
}
|
|
31
|
+
printGradients(t) {
|
|
32
|
+
Object.keys(t).forEach((e) => {
|
|
33
|
+
const s = t[e];
|
|
34
|
+
console.log(`${e}:`), console.log(` Shape: ${s.shape}`), console.log(` Mean: ${this.tf.mean(s).dataSync()[0]}`), console.log(` Std: ${this.tf.moments(s).variance.sqrt().dataSync()[0]}`), console.log(` Min: ${this.tf.min(s).dataSync()[0]}`), console.log(` Max: ${this.tf.max(s).dataSync()[0]}`), console.log(` Norm: ${this.tf.norm(s).dataSync()[0]}`);
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
trainStep(t, e = !1, s = !1) {
|
|
38
|
+
return this.tf.tidy(() => {
|
|
39
|
+
const { xs: a, ys: o } = t, r = () => {
|
|
40
|
+
const { loss: l, logits: c } = this.model.forward(a, o, !0);
|
|
41
|
+
return c.dispose(), l;
|
|
42
|
+
}, { value: n, grads: i } = this.tf.variableGrads(r);
|
|
43
|
+
return e || (s && (console.log("-------"), this.printGradients(i), console.log("-------")), this.optimizer.applyGradients(i), this.tf.dispose(i)), n;
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
dummyPass() {
|
|
47
|
+
const t = this.tf.zeros([1, this.model.config.blockSize], "int32"), e = this.tf.zeros([1, this.model.config.blockSize, this.model.config.vocabSize]);
|
|
48
|
+
try {
|
|
49
|
+
const s = this.trainStep({ xs: t, ys: e }, !0);
|
|
50
|
+
s.dataSync(), s.dispose();
|
|
51
|
+
} catch (s) {
|
|
52
|
+
console.error("Error during dummy pass:", s);
|
|
53
|
+
} finally {
|
|
54
|
+
t.dispose(), e.dispose();
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
async trainBatch(t, e) {
|
|
58
|
+
try {
|
|
59
|
+
const s = this.trainStep(e, !1, !1);
|
|
60
|
+
return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), t.epochLoss += t.lastLoss, s.dispose(), t.lastLoss));
|
|
61
|
+
} catch (s) {
|
|
62
|
+
throw console.error(`Error processing batch at step ${t.step}:`, s), this.tf.dispose(), s;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// Evaluate model on validation dataset - FIXED memory leaks
|
|
66
|
+
async evaluateOnDataset(t, e = 100) {
|
|
67
|
+
let s = 0, a = 0;
|
|
68
|
+
return await t.take(e).forEachAsync(async (o) => {
|
|
69
|
+
const { xs: r, ys: n } = o, { loss: i, logits: l } = this.model.forward(r, n, !1), d = i.arraySync();
|
|
70
|
+
i.dispose(), l.dispose(), s += d, a++;
|
|
71
|
+
}), s / a;
|
|
72
|
+
}
|
|
73
|
+
// Create training and validation datasets - FIXED memory leaks
|
|
74
|
+
async createTrainValidationSplit(t, e = 32, s = 0.1) {
|
|
75
|
+
const a = Math.floor(t.length * (1 - s)), o = t.slice(0, a), r = t.slice(a), n = await this.datasetBuilder.createTextDataset(o, e), i = await this.datasetBuilder.createTextDataset(r, e);
|
|
76
|
+
return { trainDataset: n, validationDataset: i };
|
|
77
|
+
}
|
|
78
|
+
async createDataset(t, e = 32) {
|
|
79
|
+
return await this.datasetBuilder.createTextDataset(t, e);
|
|
80
|
+
}
|
|
81
|
+
dispose() {
|
|
82
|
+
this.optimizer && this.optimizer.dispose(), this.tf.dispose();
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
export {
|
|
86
|
+
y as default
|
|
87
|
+
};
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
const e = [
|
|
2
|
+
[
|
|
3
|
+
{
|
|
4
|
+
adam: {
|
|
5
|
+
learningRateFactor: 1,
|
|
6
|
+
beta1: 0.9,
|
|
7
|
+
beta2: 0.999,
|
|
8
|
+
epsilon: 1e-8
|
|
9
|
+
},
|
|
10
|
+
skip: [!1],
|
|
11
|
+
trainable: [!0]
|
|
12
|
+
}
|
|
13
|
+
],
|
|
14
|
+
[
|
|
15
|
+
{
|
|
16
|
+
adam: {
|
|
17
|
+
learningRateFactor: 1,
|
|
18
|
+
beta1: 0.9,
|
|
19
|
+
beta2: 0.999,
|
|
20
|
+
epsilon: 1e-8
|
|
21
|
+
},
|
|
22
|
+
skip: [!0, !1],
|
|
23
|
+
trainable: [!1, !0]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
adam: {
|
|
27
|
+
learningRateFactor: 1,
|
|
28
|
+
beta1: 0.9,
|
|
29
|
+
beta2: 0.999,
|
|
30
|
+
epsilon: 1e-8
|
|
31
|
+
},
|
|
32
|
+
skip: [!1, !1],
|
|
33
|
+
trainable: [!0, !1]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
adam: {
|
|
37
|
+
learningRateFactor: 0.3333333333333333,
|
|
38
|
+
beta1: 0.95,
|
|
39
|
+
beta2: 0.999,
|
|
40
|
+
epsilon: 1e-8
|
|
41
|
+
},
|
|
42
|
+
skip: [!1, !1],
|
|
43
|
+
trainable: [!0, !0]
|
|
44
|
+
}
|
|
45
|
+
],
|
|
46
|
+
[],
|
|
47
|
+
[
|
|
48
|
+
{
|
|
49
|
+
adam: {
|
|
50
|
+
learningRateFactor: 1,
|
|
51
|
+
beta1: 0.9,
|
|
52
|
+
beta2: 0.999,
|
|
53
|
+
epsilon: 1e-8
|
|
54
|
+
},
|
|
55
|
+
skip: [!0, !0, !0, !1],
|
|
56
|
+
trainable: [!1, !1, !1, !0]
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
adam: {
|
|
60
|
+
learningRateFactor: 1,
|
|
61
|
+
beta1: 0.9,
|
|
62
|
+
beta2: 0.999,
|
|
63
|
+
epsilon: 1e-8
|
|
64
|
+
},
|
|
65
|
+
skip: [!0, !0, !1, !1],
|
|
66
|
+
trainable: [!1, !1, !0, !1]
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
adam: {
|
|
70
|
+
learningRateFactor: 0.3333333333333333,
|
|
71
|
+
beta1: 0.95,
|
|
72
|
+
beta2: 0.999,
|
|
73
|
+
epsilon: 1e-8
|
|
74
|
+
},
|
|
75
|
+
skip: [!0, !0, !1, !1],
|
|
76
|
+
trainable: [!1, !1, !1, !0]
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
adam: {
|
|
80
|
+
learningRateFactor: 1,
|
|
81
|
+
beta1: 0.9,
|
|
82
|
+
beta2: 0.999,
|
|
83
|
+
epsilon: 1e-8
|
|
84
|
+
},
|
|
85
|
+
skip: [!0, !1, !1, !1],
|
|
86
|
+
trainable: [!1, !0, !1, !1]
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
adam: {
|
|
90
|
+
learningRateFactor: 0.3333333333333333,
|
|
91
|
+
beta1: 0.95,
|
|
92
|
+
beta2: 0.999,
|
|
93
|
+
epsilon: 1e-8
|
|
94
|
+
},
|
|
95
|
+
skip: [!0, !1, !1, !1],
|
|
96
|
+
trainable: [!1, !1, !0, !1]
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
adam: {
|
|
100
|
+
learningRateFactor: 0.16666666666666666,
|
|
101
|
+
beta1: 0.98,
|
|
102
|
+
beta2: 0.9999,
|
|
103
|
+
epsilon: 1e-8
|
|
104
|
+
},
|
|
105
|
+
skip: [!0, !1, !1, !1],
|
|
106
|
+
trainable: [!1, !1, !1, !0]
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
adam: {
|
|
110
|
+
learningRateFactor: 1,
|
|
111
|
+
beta1: 0.9,
|
|
112
|
+
beta2: 0.999,
|
|
113
|
+
epsilon: 1e-8
|
|
114
|
+
},
|
|
115
|
+
skip: [!1, !1, !1, !1],
|
|
116
|
+
trainable: [!0, !1, !1, !1]
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
adam: {
|
|
120
|
+
learningRateFactor: 0.3333333333333333,
|
|
121
|
+
beta1: 0.95,
|
|
122
|
+
beta2: 0.999,
|
|
123
|
+
epsilon: 1e-8
|
|
124
|
+
},
|
|
125
|
+
skip: [!1, !1, !1, !1],
|
|
126
|
+
trainable: [!1, !0, !1, !1]
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
adam: {
|
|
130
|
+
learningRateFactor: 0.16666666666666666,
|
|
131
|
+
beta1: 0.98,
|
|
132
|
+
beta2: 0.9999,
|
|
133
|
+
epsilon: 1e-8
|
|
134
|
+
},
|
|
135
|
+
skip: [!1, !1, !1, !1],
|
|
136
|
+
trainable: [!1, !1, !0, !1]
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
adam: {
|
|
140
|
+
learningRateFactor: 0.16666666666666666,
|
|
141
|
+
beta1: 0.98,
|
|
142
|
+
beta2: 0.9999,
|
|
143
|
+
epsilon: 1e-8
|
|
144
|
+
},
|
|
145
|
+
skip: [!1, !1, !1, !1],
|
|
146
|
+
trainable: [!1, !1, !1, !0]
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
adam: {
|
|
150
|
+
learningRateFactor: 0.16666666666666666,
|
|
151
|
+
beta1: 0.98,
|
|
152
|
+
beta2: 0.9999,
|
|
153
|
+
epsilon: 1e-8
|
|
154
|
+
},
|
|
155
|
+
skip: [!1, !1, !1, !1],
|
|
156
|
+
trainable: [!0, !0, !0, !0]
|
|
157
|
+
}
|
|
158
|
+
]
|
|
159
|
+
];
|
|
160
|
+
export {
|
|
161
|
+
e as schedule
|
|
162
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
async function h(n, t, i, s, a = 1, r) {
|
|
2
|
+
if (s <= 0)
|
|
3
|
+
throw new Error("Length must be a positive integer");
|
|
4
|
+
if (a <= 0)
|
|
5
|
+
throw new Error("Temperature must be a positive number");
|
|
6
|
+
if (r !== void 0 && r <= 0)
|
|
7
|
+
throw new Error("topK must be a positive integer or undefined");
|
|
8
|
+
if (i.length === 0)
|
|
9
|
+
throw new Error("Prompt cannot be an empty string");
|
|
10
|
+
const c = await n.tokenise([i], !0), o = (await t.tf.tidy(() => {
|
|
11
|
+
let e = t.tf.tensor2d(c, [1, c[0].length], "int32");
|
|
12
|
+
for (let u = 0; u < s; u++) {
|
|
13
|
+
const f = t.generate(e, a, r), g = e;
|
|
14
|
+
e = t.tf.concat([e, f], 1), g.dispose(), f.dispose();
|
|
15
|
+
}
|
|
16
|
+
return e;
|
|
17
|
+
}).array())[0], d = o.indexOf(n.eosToken);
|
|
18
|
+
return d !== -1 && o.splice(d), await n.decode(o);
|
|
19
|
+
}
|
|
20
|
+
export {
|
|
21
|
+
h as generateText
|
|
22
|
+
};
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { default as TF } from '@tensorflow/tfjs';
|
|
2
|
+
import { default as NanoGPT } from '../NanoGPTModel';
|
|
3
|
+
import { ITokeniser } from '../tokeniser/type';
|
|
4
|
+
export declare function loadModel(tf: typeof TF, data: Blob | Buffer | string): Promise<{
|
|
5
|
+
model: NanoGPT;
|
|
6
|
+
tokeniser: ITokeniser;
|
|
7
|
+
}>;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { z as k } from "../jszip.min-BLbRbbKt.js";
|
|
2
|
+
import { importWeights as F } from "./weights.js";
|
|
3
|
+
import z from "../tokeniser/CharTokeniser.js";
|
|
4
|
+
import j from "../NanoGPTModel.js";
|
|
5
|
+
function m(o) {
|
|
6
|
+
const a = o.tf.zeros([1, o.config.blockSize], "int32"), { logits: n, loss: s } = o.forward(a, void 0, !1);
|
|
7
|
+
n.dispose(), s && s.dispose(), a.dispose();
|
|
8
|
+
}
|
|
9
|
+
async function E(o) {
|
|
10
|
+
const e = await fetch(o);
|
|
11
|
+
if (!e.ok)
|
|
12
|
+
throw new Error(`Failed to fetch ${o}: ${e.statusText}`);
|
|
13
|
+
return e.arrayBuffer();
|
|
14
|
+
}
|
|
15
|
+
async function A(o, e) {
|
|
16
|
+
const a = typeof e == "string" ? await E(e) : e, n = await k.loadAsync(a), s = /* @__PURE__ */ new Map(), f = await n.file("manifest.json")?.async("string");
|
|
17
|
+
if (!f)
|
|
18
|
+
throw new Error("Manifest file not found in the zip archive");
|
|
19
|
+
const l = JSON.parse(f);
|
|
20
|
+
for (const [t, r] of Object.entries(l.weightSpec))
|
|
21
|
+
s.set(t, { spec: r, data: new Float32Array() });
|
|
22
|
+
const p = await n.file("tokeniser.json")?.async("string");
|
|
23
|
+
if (!p)
|
|
24
|
+
throw new Error("Tokeniser file not found in the zip archive");
|
|
25
|
+
const d = JSON.parse(p), y = new z(d.vocab), w = /* @__PURE__ */ new Map();
|
|
26
|
+
for (const t of Object.keys(n.files))
|
|
27
|
+
if (t.endsWith(".bin")) {
|
|
28
|
+
const r = t.replace(".bin", ""), h = await n.file(t).async("arraybuffer"), u = new Float32Array(h), c = s.get(r) || { spec: [], data: new Float32Array() };
|
|
29
|
+
c.data = u, s.set(r, c);
|
|
30
|
+
const b = await F(c, o);
|
|
31
|
+
w.set(r, b);
|
|
32
|
+
}
|
|
33
|
+
const i = new j(o, l.config);
|
|
34
|
+
m(i), i.loadWeights(w), m(i);
|
|
35
|
+
const g = await n.file("log.json")?.async("string");
|
|
36
|
+
if (g)
|
|
37
|
+
try {
|
|
38
|
+
const t = JSON.parse(g);
|
|
39
|
+
i.log = t;
|
|
40
|
+
} catch (t) {
|
|
41
|
+
throw console.error("Error parsing training log:", t), new Error(`Failed to parse training log: ${t}`);
|
|
42
|
+
}
|
|
43
|
+
return { model: i, tokeniser: y };
|
|
44
|
+
}
|
|
45
|
+
export {
|
|
46
|
+
A as loadModel
|
|
47
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { z as f } from "../jszip.min-BLbRbbKt.js";
|
|
2
|
+
import { exportWeights as g } from "./weights.js";
|
|
3
|
+
async function l(i, t) {
|
|
4
|
+
const o = i.saveWeights(), e = new f(), s = {};
|
|
5
|
+
for (const [n, r] of o) {
|
|
6
|
+
const a = await g(r);
|
|
7
|
+
s[n] = a.spec, e.file(`${n}.bin`, a.data.buffer, { binary: !0 });
|
|
8
|
+
}
|
|
9
|
+
return e.file("manifest.json", JSON.stringify({ weightSpec: s, config: i.config }), {
|
|
10
|
+
binary: !1
|
|
11
|
+
}), e.file(
|
|
12
|
+
"tokeniser.json",
|
|
13
|
+
JSON.stringify({ vocab: t.getVocab(), merges: await t.getMerges() }),
|
|
14
|
+
{
|
|
15
|
+
binary: !1
|
|
16
|
+
}
|
|
17
|
+
), e.file("log.json", JSON.stringify(i.log), { binary: !1 }), e.generateAsync({ type: "blob" });
|
|
18
|
+
}
|
|
19
|
+
export {
|
|
20
|
+
l as saveModel
|
|
21
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export default function loadTextData(file: File | string): Promise<string[]>;
|