@genai-fi/nanogpt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +20 -0
  3. package/dist/Generator.d.ts +14 -0
  4. package/dist/Generator.js +39 -0
  5. package/dist/NanoGPTModel.d.ts +35 -0
  6. package/dist/NanoGPTModel.js +129 -0
  7. package/dist/TeachableLLM.d.ts +21 -0
  8. package/dist/TeachableLLM.js +47 -0
  9. package/dist/Trainer.d.ts +19 -0
  10. package/dist/Trainer.js +34 -0
  11. package/dist/_commonjsHelpers-DaMA6jEr.js +8 -0
  12. package/dist/assets/worker-BYeSPNkq.js +1 -0
  13. package/dist/config.d.ts +11 -0
  14. package/dist/config.js +19 -0
  15. package/dist/index-B8nyc6IR.js +3899 -0
  16. package/dist/index-SOhdqzHq.js +113 -0
  17. package/dist/jszip.min-BLbRbbKt.js +2324 -0
  18. package/dist/layers/CausalSelfAttention.d.ts +22 -0
  19. package/dist/layers/CausalSelfAttention.js +75 -0
  20. package/dist/layers/LayerNorm.d.ts +12 -0
  21. package/dist/layers/LayerNorm.js +30 -0
  22. package/dist/layers/MLP.d.ts +17 -0
  23. package/dist/layers/MLP.js +57 -0
  24. package/dist/layers/TiedEmbedding.d.ts +22 -0
  25. package/dist/layers/TiedEmbedding.js +532 -0
  26. package/dist/layers/TransformerBlock.d.ts +19 -0
  27. package/dist/layers/TransformerBlock.js +47 -0
  28. package/dist/main.d.ts +6 -0
  29. package/dist/main.js +8 -0
  30. package/dist/tokeniser/CharTokeniser.d.ts +20 -0
  31. package/dist/tokeniser/CharTokeniser.js +52 -0
  32. package/dist/tokeniser/NodeTokeniser.d.ts +19 -0
  33. package/dist/tokeniser/NodeTokeniser.js +46 -0
  34. package/dist/tokeniser/WebTokeniser.d.ts +18 -0
  35. package/dist/tokeniser/WebTokeniser.js +96 -0
  36. package/dist/tokeniser/bpe.d.ts +14 -0
  37. package/dist/tokeniser/bpe.js +102 -0
  38. package/dist/tokeniser/messages.d.ts +61 -0
  39. package/dist/tokeniser/messages.js +1 -0
  40. package/dist/tokeniser/type.d.ts +14 -0
  41. package/dist/tokeniser/type.js +1 -0
  42. package/dist/tokeniser/worker.d.ts +1 -0
  43. package/dist/tokeniser/worker.js +53 -0
  44. package/dist/training/AdamExt.d.ts +23 -0
  45. package/dist/training/AdamExt.js +43 -0
  46. package/dist/training/DatasetBuilder.d.ts +12 -0
  47. package/dist/training/DatasetBuilder.js +27 -0
  48. package/dist/training/FullTrainer.d.ts +17 -0
  49. package/dist/training/FullTrainer.js +75 -0
  50. package/dist/training/LayerTrainer.d.ts +28 -0
  51. package/dist/training/LayerTrainer.js +108 -0
  52. package/dist/training/Trainer.d.ts +73 -0
  53. package/dist/training/Trainer.js +87 -0
  54. package/dist/training/lwSchedule.d.ts +7 -0
  55. package/dist/training/lwSchedule.js +162 -0
  56. package/dist/utilities/generate.d.ts +3 -0
  57. package/dist/utilities/generate.js +22 -0
  58. package/dist/utilities/load.d.ts +7 -0
  59. package/dist/utilities/load.js +47 -0
  60. package/dist/utilities/save.d.ts +3 -0
  61. package/dist/utilities/save.js +21 -0
  62. package/dist/utilities/textLoader.d.ts +1 -0
  63. package/dist/utilities/textLoader.js +438 -0
  64. package/dist/utilities/tokenParse.d.ts +1 -0
  65. package/dist/utilities/tokenParse.js +66 -0
  66. package/dist/utilities/weights.d.ts +12 -0
  67. package/dist/utilities/weights.js +43 -0
  68. package/package.json +59 -0
@@ -0,0 +1,108 @@
1
+ import { generateText as v } from "../utilities/generate.js";
2
+ import T from "./Trainer.js";
3
+ import { schedule as k } from "./lwSchedule.js";
4
+ const x = {
5
+ epochs: 1,
6
+ stepsPerEpoch: 1e6,
7
+ desiredLoss: 0.01,
8
+ logInterval: 1,
9
+ stepsPerLayer: 400,
10
+ maxPasses: 3
11
+ };
12
+ class D extends T {
13
+ trainingPattern = [];
14
+ startPass = 0;
15
+ startLayer = 0;
16
+ constructor(o, e, t, h = 3e-4) {
17
+ if (super(o, e, t, h), this.trainingPattern = k[e.config.nLayer - 1] || [], e.log.length > 0) {
18
+ const r = e.log[e.log.length - 1];
19
+ r.pass !== void 0 && r.layer !== void 0 && (this.startPass = r.pass, this.startLayer = r.layer, console.log(`Resuming training from pass ${this.startPass}, layer ${this.startLayer}`));
20
+ }
21
+ }
22
+ applyTrainingPattern(o) {
23
+ const e = o < this.trainingPattern.length ? o : this.trainingPattern.length - 1, t = this.trainingPattern[e];
24
+ this.model.setSkipMask(t.skip), this.model.setTrainableMask(t.trainable), this.resetOptimizer(t.adam), console.log("Applied training pattern:", e, t);
25
+ }
26
+ // Train for multiple epochs using Dataset API - FIXED memory leaks
27
+ async trainOnDataset(o, e, t) {
28
+ const {
29
+ epochs: h,
30
+ stepsPerEpoch: r,
31
+ desiredLoss: c,
32
+ logInterval: m,
33
+ stepsPerLayer: d,
34
+ onLayerChange: n,
35
+ onPassComplete: g,
36
+ onStep: y,
37
+ onEpoch: p,
38
+ prompt: L
39
+ } = {
40
+ ...x,
41
+ ...e
42
+ }, s = {
43
+ epoch: 0,
44
+ pass: 0,
45
+ layerStep: 0,
46
+ step: 0,
47
+ stepSinceLayerChange: 0,
48
+ lastLoss: 1e6,
49
+ epochLoss: 0,
50
+ totalSteps: 0,
51
+ losses: [],
52
+ validationLosses: []
53
+ };
54
+ this.dummyPass();
55
+ const S = Date.now();
56
+ for (s.epoch = 0; s.epoch < h; s.epoch++) {
57
+ s.step = 0, s.epochLoss = 0, s.pass = this.startPass, s.layerStep = this.startLayer + this.startPass * this.model.config.nLayer, s.stepSinceLayerChange = 0, this.startPass = 0, this.startLayer = 0;
58
+ const u = await o.iterator();
59
+ this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
60
+ try {
61
+ for (; !(r && s.step >= r || s.lastLoss < c); ) {
62
+ const a = await u.next();
63
+ if (a.done) break;
64
+ const P = a.value, w = this.trainBatch(s, P);
65
+ s.stepSinceLayerChange++;
66
+ const l = {
67
+ epoch: s.epoch,
68
+ loss: s.lastLoss,
69
+ step: s.step,
70
+ time: Date.now() - S,
71
+ batchSize: P.xs.shape[0],
72
+ pass: s.pass,
73
+ layer: s.layerStep % this.model.config.nLayer
74
+ };
75
+ if (this.model.log.push(l), s.step % m === 0 && (await w, y)) {
76
+ if (L) {
77
+ const i = await v(this.tokenizer, this.model, L, 100, 0.8, 10);
78
+ l.example = i;
79
+ }
80
+ await y(l);
81
+ }
82
+ if (s.stepSinceLayerChange >= d) {
83
+ let i;
84
+ t && (i = await this.evaluateOnDataset(t, 5), s.validationLosses.push(i), l.valLoss = i), s.layerStep++, s.layerStep % this.model.config.nLayer === 0 ? (n && await n(s.layerStep, s.pass, i), g && await g(s.pass), s.pass++) : n && await n(s.layerStep, s.pass, i), s.stepSinceLayerChange = 0, this.applyTrainingPattern(s.layerStep % this.trainingPattern.length);
85
+ }
86
+ }
87
+ } catch (a) {
88
+ throw console.error("Training error:", a), this.tf.dispose(), a;
89
+ }
90
+ const f = s.epochLoss / s.step;
91
+ if (t)
92
+ try {
93
+ const a = await this.evaluateOnDataset(t, 5);
94
+ s.validationLosses.push(a), p && await p(s.epoch, f, a);
95
+ } catch (a) {
96
+ console.error("Validation error:", a);
97
+ }
98
+ else
99
+ p && p(s.epoch, f);
100
+ if (this.tf.dispose(), s.lastLoss < c)
101
+ break;
102
+ }
103
+ return { losses: s.losses, validationLosses: s.validationLosses };
104
+ }
105
+ }
106
+ export {
107
+ D as default
108
+ };
@@ -0,0 +1,73 @@
1
+ import { ITokeniser } from '../tokeniser/type';
2
+ import { DatasetBuilder } from './DatasetBuilder';
3
+ import { default as NanoGPT, TrainingLogEntry } from '../NanoGPTModel';
4
+ import { default as TF } from '@tensorflow/tfjs';
5
+ import { default as AdamExt } from './AdamExt';
6
+ export interface TrainingState {
7
+ epoch: number;
8
+ step: number;
9
+ lastLoss: number;
10
+ epochLoss: number;
11
+ totalSteps: number;
12
+ losses: number[];
13
+ validationLosses: number[];
14
+ }
15
+ export interface AdamConfig {
16
+ learningRateFactor: number;
17
+ beta1: number;
18
+ beta2: number;
19
+ epsilon: number;
20
+ }
21
+ export interface TrainingOptions {
22
+ epochs: number;
23
+ stepsPerEpoch: number;
24
+ desiredLoss: number;
25
+ logInterval: number;
26
+ prompt?: string;
27
+ onEpoch?: (e: number, loss: number, valLoss?: number) => Promise<void> | void;
28
+ onStep?: (log: TrainingLogEntry) => Promise<void> | void;
29
+ }
30
+ export default abstract class GPTTrainer {
31
+ protected tokenizer: ITokeniser;
32
+ protected model: NanoGPT;
33
+ protected optimizer: AdamExt;
34
+ protected datasetBuilder: DatasetBuilder;
35
+ protected tf: typeof TF;
36
+ protected learningRate: number;
37
+ constructor(tf: typeof TF, model: NanoGPT, tokenizer: ITokeniser, learningRate?: number);
38
+ getOptimizer(): AdamExt;
39
+ resetOptimizer(config?: AdamConfig): void;
40
+ private printGradients;
41
+ protected trainStep(batch: {
42
+ xs: TF.Tensor;
43
+ ys: TF.Tensor;
44
+ }, dummy?: boolean, print?: boolean): TF.Scalar;
45
+ protected dummyPass(): void;
46
+ protected trainBatch(state: TrainingState, batch: {
47
+ xs: TF.Tensor;
48
+ ys: TF.Tensor;
49
+ }): Promise<number>;
50
+ abstract trainOnDataset(dataset: TF.data.Dataset<{
51
+ xs: TF.Tensor;
52
+ ys: TF.Tensor;
53
+ }>, options: Partial<TrainingOptions>, validationDataset?: TF.data.Dataset<{
54
+ xs: TF.Tensor;
55
+ ys: TF.Tensor;
56
+ }>): Promise<{
57
+ losses: number[];
58
+ validationLosses: number[];
59
+ }>;
60
+ evaluateOnDataset(dataset: TF.data.Dataset<TF.TensorContainer>, maxBatches?: number): Promise<number>;
61
+ createTrainValidationSplit(textData: string[], batchSize?: number, validationSplit?: number): Promise<{
62
+ trainDataset: TF.data.Dataset<{
63
+ xs: TF.Tensor;
64
+ ys: TF.Tensor;
65
+ }>;
66
+ validationDataset: TF.data.Dataset<{
67
+ xs: TF.Tensor;
68
+ ys: TF.Tensor;
69
+ }>;
70
+ }>;
71
+ createDataset(textData: string[], batchSize?: number): Promise<TF.data.Dataset<TF.TensorContainer>>;
72
+ dispose(): void;
73
+ }
@@ -0,0 +1,87 @@
1
+ import { DatasetBuilder as h } from "./DatasetBuilder.js";
2
+ import p from "./AdamExt.js";
3
+ class y {
4
+ constructor(t, e, s, a = 1e-3) {
5
+ this.tokenizer = s, this.tf = t, this.model = e, this.learningRate = a, this.resetOptimizer(), this.datasetBuilder = new h(this.tf, s, e.config.blockSize);
6
+ }
7
+ model;
8
+ optimizer;
9
+ datasetBuilder;
10
+ tf;
11
+ learningRate;
12
+ getOptimizer() {
13
+ return this.optimizer;
14
+ }
15
+ resetOptimizer(t = { learningRateFactor: 1, beta1: 0.9, beta2: 0.99, epsilon: 1e-8 }) {
16
+ this.optimizer && this.optimizer.dispose();
17
+ const e = new p(
18
+ t.learningRateFactor * this.learningRate,
19
+ t.beta1,
20
+ t.beta2,
21
+ t.epsilon,
22
+ {
23
+ warmupSteps: 100,
24
+ decaySteps: 2e4,
25
+ minLearningRate: 1e-4,
26
+ weightDecay: 0
27
+ }
28
+ );
29
+ this.optimizer = e;
30
+ }
31
+ printGradients(t) {
32
+ Object.keys(t).forEach((e) => {
33
+ const s = t[e];
34
+ console.log(`${e}:`), console.log(` Shape: ${s.shape}`), console.log(` Mean: ${this.tf.mean(s).dataSync()[0]}`), console.log(` Std: ${this.tf.moments(s).variance.sqrt().dataSync()[0]}`), console.log(` Min: ${this.tf.min(s).dataSync()[0]}`), console.log(` Max: ${this.tf.max(s).dataSync()[0]}`), console.log(` Norm: ${this.tf.norm(s).dataSync()[0]}`);
35
+ });
36
+ }
37
+ trainStep(t, e = !1, s = !1) {
38
+ return this.tf.tidy(() => {
39
+ const { xs: a, ys: o } = t, r = () => {
40
+ const { loss: l, logits: c } = this.model.forward(a, o, !0);
41
+ return c.dispose(), l;
42
+ }, { value: n, grads: i } = this.tf.variableGrads(r);
43
+ return e || (s && (console.log("-------"), this.printGradients(i), console.log("-------")), this.optimizer.applyGradients(i), this.tf.dispose(i)), n;
44
+ });
45
+ }
46
+ dummyPass() {
47
+ const t = this.tf.zeros([1, this.model.config.blockSize], "int32"), e = this.tf.zeros([1, this.model.config.blockSize, this.model.config.vocabSize]);
48
+ try {
49
+ const s = this.trainStep({ xs: t, ys: e }, !0);
50
+ s.dataSync(), s.dispose();
51
+ } catch (s) {
52
+ console.error("Error during dummy pass:", s);
53
+ } finally {
54
+ t.dispose(), e.dispose();
55
+ }
56
+ }
57
+ async trainBatch(t, e) {
58
+ try {
59
+ const s = this.trainStep(e, !1, !1);
60
+ return e.xs.dispose(), e.ys.dispose(), t.step++, t.totalSteps++, s.array().then((a) => (t.lastLoss = a, t.losses.push(t.lastLoss), t.epochLoss += t.lastLoss, s.dispose(), t.lastLoss));
61
+ } catch (s) {
62
+ throw console.error(`Error processing batch at step ${t.step}:`, s), this.tf.dispose(), s;
63
+ }
64
+ }
65
+ // Evaluate model on validation dataset - FIXED memory leaks
66
+ async evaluateOnDataset(t, e = 100) {
67
+ let s = 0, a = 0;
68
+ return await t.take(e).forEachAsync(async (o) => {
69
+ const { xs: r, ys: n } = o, { loss: i, logits: l } = this.model.forward(r, n, !1), d = i.arraySync();
70
+ i.dispose(), l.dispose(), s += d, a++;
71
+ }), s / a;
72
+ }
73
+ // Create training and validation datasets - FIXED memory leaks
74
+ async createTrainValidationSplit(t, e = 32, s = 0.1) {
75
+ const a = Math.floor(t.length * (1 - s)), o = t.slice(0, a), r = t.slice(a), n = await this.datasetBuilder.createTextDataset(o, e), i = await this.datasetBuilder.createTextDataset(r, e);
76
+ return { trainDataset: n, validationDataset: i };
77
+ }
78
+ async createDataset(t, e = 32) {
79
+ return await this.datasetBuilder.createTextDataset(t, e);
80
+ }
81
+ dispose() {
82
+ this.optimizer && this.optimizer.dispose(), this.tf.dispose();
83
+ }
84
+ }
85
+ export {
86
+ y as default
87
+ };
@@ -0,0 +1,7 @@
1
+ import { AdamConfig } from './Trainer';
2
+ export interface LWSchedule {
3
+ adam: AdamConfig;
4
+ skip: boolean[];
5
+ trainable: boolean[];
6
+ }
7
+ export declare const schedule: LWSchedule[][];
@@ -0,0 +1,162 @@
1
+ const e = [
2
+ [
3
+ {
4
+ adam: {
5
+ learningRateFactor: 1,
6
+ beta1: 0.9,
7
+ beta2: 0.999,
8
+ epsilon: 1e-8
9
+ },
10
+ skip: [!1],
11
+ trainable: [!0]
12
+ }
13
+ ],
14
+ [
15
+ {
16
+ adam: {
17
+ learningRateFactor: 1,
18
+ beta1: 0.9,
19
+ beta2: 0.999,
20
+ epsilon: 1e-8
21
+ },
22
+ skip: [!0, !1],
23
+ trainable: [!1, !0]
24
+ },
25
+ {
26
+ adam: {
27
+ learningRateFactor: 1,
28
+ beta1: 0.9,
29
+ beta2: 0.999,
30
+ epsilon: 1e-8
31
+ },
32
+ skip: [!1, !1],
33
+ trainable: [!0, !1]
34
+ },
35
+ {
36
+ adam: {
37
+ learningRateFactor: 0.3333333333333333,
38
+ beta1: 0.95,
39
+ beta2: 0.999,
40
+ epsilon: 1e-8
41
+ },
42
+ skip: [!1, !1],
43
+ trainable: [!0, !0]
44
+ }
45
+ ],
46
+ [],
47
+ [
48
+ {
49
+ adam: {
50
+ learningRateFactor: 1,
51
+ beta1: 0.9,
52
+ beta2: 0.999,
53
+ epsilon: 1e-8
54
+ },
55
+ skip: [!0, !0, !0, !1],
56
+ trainable: [!1, !1, !1, !0]
57
+ },
58
+ {
59
+ adam: {
60
+ learningRateFactor: 1,
61
+ beta1: 0.9,
62
+ beta2: 0.999,
63
+ epsilon: 1e-8
64
+ },
65
+ skip: [!0, !0, !1, !1],
66
+ trainable: [!1, !1, !0, !1]
67
+ },
68
+ {
69
+ adam: {
70
+ learningRateFactor: 0.3333333333333333,
71
+ beta1: 0.95,
72
+ beta2: 0.999,
73
+ epsilon: 1e-8
74
+ },
75
+ skip: [!0, !0, !1, !1],
76
+ trainable: [!1, !1, !1, !0]
77
+ },
78
+ {
79
+ adam: {
80
+ learningRateFactor: 1,
81
+ beta1: 0.9,
82
+ beta2: 0.999,
83
+ epsilon: 1e-8
84
+ },
85
+ skip: [!0, !1, !1, !1],
86
+ trainable: [!1, !0, !1, !1]
87
+ },
88
+ {
89
+ adam: {
90
+ learningRateFactor: 0.3333333333333333,
91
+ beta1: 0.95,
92
+ beta2: 0.999,
93
+ epsilon: 1e-8
94
+ },
95
+ skip: [!0, !1, !1, !1],
96
+ trainable: [!1, !1, !0, !1]
97
+ },
98
+ {
99
+ adam: {
100
+ learningRateFactor: 0.16666666666666666,
101
+ beta1: 0.98,
102
+ beta2: 0.9999,
103
+ epsilon: 1e-8
104
+ },
105
+ skip: [!0, !1, !1, !1],
106
+ trainable: [!1, !1, !1, !0]
107
+ },
108
+ {
109
+ adam: {
110
+ learningRateFactor: 1,
111
+ beta1: 0.9,
112
+ beta2: 0.999,
113
+ epsilon: 1e-8
114
+ },
115
+ skip: [!1, !1, !1, !1],
116
+ trainable: [!0, !1, !1, !1]
117
+ },
118
+ {
119
+ adam: {
120
+ learningRateFactor: 0.3333333333333333,
121
+ beta1: 0.95,
122
+ beta2: 0.999,
123
+ epsilon: 1e-8
124
+ },
125
+ skip: [!1, !1, !1, !1],
126
+ trainable: [!1, !0, !1, !1]
127
+ },
128
+ {
129
+ adam: {
130
+ learningRateFactor: 0.16666666666666666,
131
+ beta1: 0.98,
132
+ beta2: 0.9999,
133
+ epsilon: 1e-8
134
+ },
135
+ skip: [!1, !1, !1, !1],
136
+ trainable: [!1, !1, !0, !1]
137
+ },
138
+ {
139
+ adam: {
140
+ learningRateFactor: 0.16666666666666666,
141
+ beta1: 0.98,
142
+ beta2: 0.9999,
143
+ epsilon: 1e-8
144
+ },
145
+ skip: [!1, !1, !1, !1],
146
+ trainable: [!1, !1, !1, !0]
147
+ },
148
+ {
149
+ adam: {
150
+ learningRateFactor: 0.16666666666666666,
151
+ beta1: 0.98,
152
+ beta2: 0.9999,
153
+ epsilon: 1e-8
154
+ },
155
+ skip: [!1, !1, !1, !1],
156
+ trainable: [!0, !0, !0, !0]
157
+ }
158
+ ]
159
+ ];
160
+ export {
161
+ e as schedule
162
+ };
@@ -0,0 +1,3 @@
1
+ import { ITokeniser } from '../tokeniser/type';
2
+ import { default as NanoGPT } from '../NanoGPTModel';
3
+ export declare function generateText(tokeniser: ITokeniser, model: NanoGPT, prompt: string, length: number, temperature?: number, topK?: number): Promise<string>;
@@ -0,0 +1,22 @@
1
+ async function h(n, t, i, s, a = 1, r) {
2
+ if (s <= 0)
3
+ throw new Error("Length must be a positive integer");
4
+ if (a <= 0)
5
+ throw new Error("Temperature must be a positive number");
6
+ if (r !== void 0 && r <= 0)
7
+ throw new Error("topK must be a positive integer or undefined");
8
+ if (i.length === 0)
9
+ throw new Error("Prompt cannot be an empty string");
10
+ const c = await n.tokenise([i], !0), o = (await t.tf.tidy(() => {
11
+ let e = t.tf.tensor2d(c, [1, c[0].length], "int32");
12
+ for (let u = 0; u < s; u++) {
13
+ const f = t.generate(e, a, r), g = e;
14
+ e = t.tf.concat([e, f], 1), g.dispose(), f.dispose();
15
+ }
16
+ return e;
17
+ }).array())[0], d = o.indexOf(n.eosToken);
18
+ return d !== -1 && o.splice(d), await n.decode(o);
19
+ }
20
+ export {
21
+ h as generateText
22
+ };
@@ -0,0 +1,7 @@
1
+ import { default as TF } from '@tensorflow/tfjs';
2
+ import { default as NanoGPT } from '../NanoGPTModel';
3
+ import { ITokeniser } from '../tokeniser/type';
4
+ export declare function loadModel(tf: typeof TF, data: Blob | Buffer | string): Promise<{
5
+ model: NanoGPT;
6
+ tokeniser: ITokeniser;
7
+ }>;
@@ -0,0 +1,47 @@
1
+ import { z as k } from "../jszip.min-BLbRbbKt.js";
2
+ import { importWeights as F } from "./weights.js";
3
+ import z from "../tokeniser/CharTokeniser.js";
4
+ import j from "../NanoGPTModel.js";
5
+ function m(o) {
6
+ const a = o.tf.zeros([1, o.config.blockSize], "int32"), { logits: n, loss: s } = o.forward(a, void 0, !1);
7
+ n.dispose(), s && s.dispose(), a.dispose();
8
+ }
9
+ async function E(o) {
10
+ const e = await fetch(o);
11
+ if (!e.ok)
12
+ throw new Error(`Failed to fetch ${o}: ${e.statusText}`);
13
+ return e.arrayBuffer();
14
+ }
15
+ async function A(o, e) {
16
+ const a = typeof e == "string" ? await E(e) : e, n = await k.loadAsync(a), s = /* @__PURE__ */ new Map(), f = await n.file("manifest.json")?.async("string");
17
+ if (!f)
18
+ throw new Error("Manifest file not found in the zip archive");
19
+ const l = JSON.parse(f);
20
+ for (const [t, r] of Object.entries(l.weightSpec))
21
+ s.set(t, { spec: r, data: new Float32Array() });
22
+ const p = await n.file("tokeniser.json")?.async("string");
23
+ if (!p)
24
+ throw new Error("Tokeniser file not found in the zip archive");
25
+ const d = JSON.parse(p), y = new z(d.vocab), w = /* @__PURE__ */ new Map();
26
+ for (const t of Object.keys(n.files))
27
+ if (t.endsWith(".bin")) {
28
+ const r = t.replace(".bin", ""), h = await n.file(t).async("arraybuffer"), u = new Float32Array(h), c = s.get(r) || { spec: [], data: new Float32Array() };
29
+ c.data = u, s.set(r, c);
30
+ const b = await F(c, o);
31
+ w.set(r, b);
32
+ }
33
+ const i = new j(o, l.config);
34
+ m(i), i.loadWeights(w), m(i);
35
+ const g = await n.file("log.json")?.async("string");
36
+ if (g)
37
+ try {
38
+ const t = JSON.parse(g);
39
+ i.log = t;
40
+ } catch (t) {
41
+ throw console.error("Error parsing training log:", t), new Error(`Failed to parse training log: ${t}`);
42
+ }
43
+ return { model: i, tokeniser: y };
44
+ }
45
+ export {
46
+ A as loadModel
47
+ };
@@ -0,0 +1,3 @@
1
+ import { default as NanoGPT } from '../NanoGPTModel';
2
+ import { ITokeniser } from '../tokeniser/type';
3
+ export declare function saveModel(model: NanoGPT, tokeniser: ITokeniser): Promise<Blob>;
@@ -0,0 +1,21 @@
1
+ import { z as f } from "../jszip.min-BLbRbbKt.js";
2
+ import { exportWeights as g } from "./weights.js";
3
+ async function l(i, t) {
4
+ const o = i.saveWeights(), e = new f(), s = {};
5
+ for (const [n, r] of o) {
6
+ const a = await g(r);
7
+ s[n] = a.spec, e.file(`${n}.bin`, a.data.buffer, { binary: !0 });
8
+ }
9
+ return e.file("manifest.json", JSON.stringify({ weightSpec: s, config: i.config }), {
10
+ binary: !1
11
+ }), e.file(
12
+ "tokeniser.json",
13
+ JSON.stringify({ vocab: t.getVocab(), merges: await t.getMerges() }),
14
+ {
15
+ binary: !1
16
+ }
17
+ ), e.file("log.json", JSON.stringify(i.log), { binary: !1 }), e.generateAsync({ type: "blob" });
18
+ }
19
+ export {
20
+ l as saveModel
21
+ };
@@ -0,0 +1 @@
1
+ export default function loadTextData(file: File | string): Promise<string[]>;