@genai-fi/nanogpt 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -16
- package/dist/Generator.d.ts +3 -0
- package/dist/Generator.js +41 -27
- package/dist/NanoGPTModel.d.ts +14 -2
- package/dist/NanoGPTModel.js +49 -43
- package/dist/TeachableLLM.d.ts +8 -2
- package/dist/TeachableLLM.js +39 -26
- package/dist/layers/CausalSelfAttention.d.ts +7 -1
- package/dist/layers/CausalSelfAttention.js +45 -35
- package/dist/layers/TransformerBlock.d.ts +5 -1
- package/dist/layers/TransformerBlock.js +14 -10
- package/dist/tokeniser/CharTokeniser.d.ts +3 -1
- package/dist/tokeniser/CharTokeniser.js +44 -25
- package/dist/tokeniser/NodeTokeniser.d.ts +3 -2
- package/dist/tokeniser/NodeTokeniser.js +5 -5
- package/dist/tokeniser/type.d.ts +1 -1
- package/dist/training/FullTrainer.js +3 -1
- package/dist/training/LayerTrainer.js +8 -5
- package/dist/utilities/dummy.d.ts +3 -0
- package/dist/utilities/dummy.js +12 -0
- package/dist/utilities/generate.d.ts +2 -2
- package/dist/utilities/generate.js +11 -15
- package/dist/utilities/load.js +25 -28
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,20 +1,28 @@
|
|
|
1
|
-
#
|
|
2
|
-
TODO: Give a short introduction of your project. Let this section explain the objectives or the motivation behind this project.
|
|
1
|
+
# GenAI NanoGPT
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
TODO: Guide users through getting your code up and running on their own system. In this section you can talk about:
|
|
6
|
-
1. Installation process
|
|
7
|
-
2. Software dependencies
|
|
8
|
-
3. Latest releases
|
|
9
|
-
4. API references
|
|
3
|
+
Developed as a part of the Finnish Generation AI research project. This is an implementation of [NanoGPT](https://github.com/karpathy/nanoGPT) for Tensorflow.js. It allows GPT models to be training and loaded within a web browser and exposes some XAI functionality.
|
|
10
4
|
|
|
11
|
-
|
|
12
|
-
TODO: Describe and show how to build your code and run the tests.
|
|
5
|
+
Work in progress...
|
|
13
6
|
|
|
14
|
-
#
|
|
15
|
-
TODO: Explain how other users and developers can contribute to make your code better.
|
|
7
|
+
# Install
|
|
16
8
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
9
|
+
```
|
|
10
|
+
npm install @genai-fi/nanogpt
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
# Usage
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
import { TeachableLLM, CharTokeniser } from '@genai-fi/nanogpt';
|
|
17
|
+
import * as tf from '@tensorflow/tfjs';
|
|
18
|
+
|
|
19
|
+
const tokeniser = new CharTokeniser();
|
|
20
|
+
const model = TeachableLLM.create(tf, tokeniser, {
|
|
21
|
+
vocabSize: 200,
|
|
22
|
+
blockSize: 128,
|
|
23
|
+
nLayer: 4,
|
|
24
|
+
nHead: 3,
|
|
25
|
+
nEmbed: 192,
|
|
26
|
+
dropout: 0.0,
|
|
27
|
+
});
|
|
28
|
+
```
|
package/dist/Generator.d.ts
CHANGED
|
@@ -4,6 +4,9 @@ import { default as EE } from 'eventemitter3';
|
|
|
4
4
|
export interface IGenerateOptions {
|
|
5
5
|
maxLength?: number;
|
|
6
6
|
temperature?: number;
|
|
7
|
+
topK?: number;
|
|
8
|
+
usePadding?: boolean;
|
|
9
|
+
includeAttention?: boolean;
|
|
7
10
|
}
|
|
8
11
|
export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
|
|
9
12
|
private readonly model;
|
package/dist/Generator.js
CHANGED
|
@@ -1,39 +1,53 @@
|
|
|
1
|
-
import { E as
|
|
2
|
-
const
|
|
3
|
-
class
|
|
4
|
-
constructor(
|
|
5
|
-
super(), this.model =
|
|
1
|
+
import { E as m } from "./index-SOhdqzHq.js";
|
|
2
|
+
const g = 4;
|
|
3
|
+
class w extends m {
|
|
4
|
+
constructor(o, t) {
|
|
5
|
+
super(), this.model = o, this.tokeniser = t;
|
|
6
6
|
}
|
|
7
|
-
generateBlockOfTokens(
|
|
8
|
-
const
|
|
9
|
-
let
|
|
10
|
-
for (let
|
|
11
|
-
const i = this.model.generate(
|
|
12
|
-
|
|
7
|
+
generateBlockOfTokens(o, t) {
|
|
8
|
+
const c = t?.temperature ?? 1, a = t?.topK, r = t?.usePadding ?? t?.includeAttention ?? !1, d = t?.includeAttention ?? !1;
|
|
9
|
+
let s = o, n;
|
|
10
|
+
for (let l = 0; l < g; l++) {
|
|
11
|
+
const { output: e, attention: i } = this.model.generate(s, {
|
|
12
|
+
temperature: c,
|
|
13
|
+
topK: a,
|
|
14
|
+
usePadding: r,
|
|
15
|
+
includeAttention: d
|
|
16
|
+
}), h = s;
|
|
17
|
+
if (s = this.model.tf.concat([s, e], 1), n && i) {
|
|
18
|
+
const u = n;
|
|
19
|
+
n = this.model.tf.concat([n, i], 0), u.dispose();
|
|
20
|
+
} else i && (n = i);
|
|
21
|
+
h.dispose(), e.dispose();
|
|
13
22
|
}
|
|
14
|
-
return
|
|
23
|
+
return { output: s, attention: n };
|
|
15
24
|
}
|
|
16
|
-
async generate(
|
|
17
|
-
const
|
|
18
|
-
let
|
|
25
|
+
async generate(o, t) {
|
|
26
|
+
const c = o ? await this.tokeniser.tokenise([o], !0) : [[this.tokeniser.eosToken]];
|
|
27
|
+
let a = this.model.tf.tensor2d(c, [1, c[0].length], "int32");
|
|
19
28
|
this.emit("start");
|
|
20
|
-
let
|
|
29
|
+
let r = o || "";
|
|
21
30
|
for (; ; ) {
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
const l =
|
|
25
|
-
let
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
t?.maxLength ? t.maxLength -
|
|
31
|
+
const { output: d, attention: s } = this.generateBlockOfTokens(a, t), n = a;
|
|
32
|
+
a = d;
|
|
33
|
+
const l = d.slice([0, n.shape[1]], [1, g]), e = (await l.array())[0];
|
|
34
|
+
let i = !1, h = !1;
|
|
35
|
+
const u = e.indexOf(this.tokeniser.eosToken);
|
|
36
|
+
u !== -1 && (i = !0, e.splice(u)), e.length + r.length >= (t?.maxLength ?? 1e3) && (h = !0, e.splice(
|
|
37
|
+
t?.maxLength ? t.maxLength - r.length : e.length
|
|
29
38
|
));
|
|
30
|
-
const k = await this.tokeniser.decode(
|
|
31
|
-
if (
|
|
39
|
+
const k = await this.tokeniser.decode(e);
|
|
40
|
+
if (r += k, s) {
|
|
41
|
+
let f = await s.array();
|
|
42
|
+
f.length > e.length && (f = f.slice(0, e.length)), this.emit("tokens", e, k, f);
|
|
43
|
+
} else
|
|
44
|
+
this.emit("tokens", e, k);
|
|
45
|
+
if (n.dispose(), l.dispose(), i || h)
|
|
32
46
|
break;
|
|
33
47
|
}
|
|
34
|
-
return
|
|
48
|
+
return a.dispose(), this.emit("stop"), r;
|
|
35
49
|
}
|
|
36
50
|
}
|
|
37
51
|
export {
|
|
38
|
-
|
|
52
|
+
w as default
|
|
39
53
|
};
|
package/dist/NanoGPTModel.d.ts
CHANGED
|
@@ -9,6 +9,12 @@ export interface TrainingLogEntry {
|
|
|
9
9
|
example?: string;
|
|
10
10
|
batchSize: number;
|
|
11
11
|
}
|
|
12
|
+
export interface GenerateOptions {
|
|
13
|
+
temperature?: number;
|
|
14
|
+
topK?: number;
|
|
15
|
+
usePadding?: boolean;
|
|
16
|
+
includeAttention?: boolean;
|
|
17
|
+
}
|
|
12
18
|
export default class NanoGPT {
|
|
13
19
|
readonly config: GPTConfig;
|
|
14
20
|
private wte;
|
|
@@ -26,10 +32,16 @@ export default class NanoGPT {
|
|
|
26
32
|
setSkipMask(mask: boolean[]): void;
|
|
27
33
|
setTrainableMask(mask: boolean[]): void;
|
|
28
34
|
set trainable(value: boolean);
|
|
29
|
-
|
|
35
|
+
private validateInput;
|
|
36
|
+
private calculateLoss;
|
|
37
|
+
forward(idx: TF.Tensor, targets?: TF.Tensor, training?: boolean, includeAttention?: boolean): {
|
|
30
38
|
logits: TF.Tensor;
|
|
31
39
|
loss?: TF.Tensor;
|
|
40
|
+
attention?: TF.Tensor;
|
|
41
|
+
};
|
|
42
|
+
generate(idx: TF.Tensor, options?: GenerateOptions): {
|
|
43
|
+
output: TF.Tensor;
|
|
44
|
+
attention?: TF.Tensor;
|
|
32
45
|
};
|
|
33
|
-
generate(idx: TF.Tensor, temperature?: number, topK?: number): TF.Tensor;
|
|
34
46
|
getNumParams(): number;
|
|
35
47
|
}
|
package/dist/NanoGPTModel.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
class
|
|
1
|
+
import { defaultConfig as y } from "./config.js";
|
|
2
|
+
import z from "./layers/TransformerBlock.js";
|
|
3
|
+
import v from "./layers/TiedEmbedding.js";
|
|
4
|
+
import S from "./layers/LayerNorm.js";
|
|
5
|
+
class $ {
|
|
6
6
|
config;
|
|
7
7
|
wte;
|
|
8
8
|
// Token embeddings
|
|
@@ -17,7 +17,7 @@ class S {
|
|
|
17
17
|
log = [];
|
|
18
18
|
// Training log
|
|
19
19
|
constructor(t, e = {}) {
|
|
20
|
-
this.tf = t, this.config = { ...
|
|
20
|
+
this.tf = t, this.config = { ...y, ...e }, this.wte = new v(t, {
|
|
21
21
|
vocabSize: this.config.vocabSize,
|
|
22
22
|
embedDim: this.config.nEmbed,
|
|
23
23
|
name: "token_embedding"
|
|
@@ -28,8 +28,8 @@ class S {
|
|
|
28
28
|
embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
|
|
29
29
|
}), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
|
|
30
30
|
for (let s = 0; s < this.config.nLayer; s++)
|
|
31
|
-
this.blocks.push(new
|
|
32
|
-
this.lnF = new
|
|
31
|
+
this.blocks.push(new z(this.tf, s, this.config));
|
|
32
|
+
this.lnF = new S(t, [this.config.nEmbed], 1e-5, "final_layer_norm");
|
|
33
33
|
}
|
|
34
34
|
get variables() {
|
|
35
35
|
return [
|
|
@@ -54,8 +54,8 @@ class S {
|
|
|
54
54
|
}
|
|
55
55
|
inputPhase(t, e = !1) {
|
|
56
56
|
return this.tf.tidy(() => {
|
|
57
|
-
const [, s] = t.shape,
|
|
58
|
-
return this.drop.apply(
|
|
57
|
+
const [, s] = t.shape, n = this.wte.embed(t), i = this.tf.range(0, s, 1, "int32"), o = this.wpe.apply(i), h = n.add(o);
|
|
58
|
+
return this.drop.apply(h, { training: e });
|
|
59
59
|
});
|
|
60
60
|
}
|
|
61
61
|
setSkipMask(t) {
|
|
@@ -75,55 +75,61 @@ class S {
|
|
|
75
75
|
e.trainable = t;
|
|
76
76
|
this.wpe.trainable = t, this.lnF.trainable = t;
|
|
77
77
|
}
|
|
78
|
-
|
|
78
|
+
validateInput(t) {
|
|
79
79
|
if (t.shape.length !== 2)
|
|
80
80
|
throw new Error(`Invalid input shape: expected [batch_size, sequence_length], got ${t.shape}`);
|
|
81
81
|
if (t.shape[1] > this.config.blockSize)
|
|
82
82
|
throw new Error(`Input sequence length ${t.shape[1]} isn't block size ${this.config.blockSize}`);
|
|
83
83
|
if (t.dtype !== "int32")
|
|
84
84
|
throw new Error(`Input tensor must be of type int32, got ${t.dtype}`);
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
85
|
+
}
|
|
86
|
+
calculateLoss(t, e) {
|
|
87
|
+
try {
|
|
88
|
+
return this.tf.losses.softmaxCrossEntropy(e, t, this.tf.Reduction.MEAN);
|
|
89
|
+
} catch (s) {
|
|
90
|
+
throw console.error("Error computing loss:", s), new Error(`Loss computation failed: ${s}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
forward(t, e, s = !1, n = !1) {
|
|
94
|
+
return this.validateInput(t), this.tf.tidy(() => {
|
|
95
|
+
let i = this.inputPhase(t, s), o;
|
|
96
|
+
n && (o = this.tf.zeros([i.shape[0], i.shape[1], i.shape[1]]));
|
|
97
|
+
for (const l of this.blocks) {
|
|
98
|
+
const { output: r, attention: f } = l.call(i, s, n);
|
|
99
|
+
i = r, f && o && (o = o.add(f));
|
|
100
|
+
}
|
|
101
|
+
o && (o = o.div(this.blocks.length)), i = this.lnF.apply(i);
|
|
102
|
+
const h = this.wte.project(i);
|
|
103
|
+
let a;
|
|
104
|
+
return e && (a = this.calculateLoss(h, e)), { logits: h, loss: a, attention: n ? o : void 0 };
|
|
102
105
|
});
|
|
103
106
|
}
|
|
104
|
-
generate(t, e
|
|
107
|
+
generate(t, e) {
|
|
108
|
+
const s = e?.temperature ?? 1, n = e?.topK, i = e?.usePadding ?? !1, o = e?.includeAttention ?? !1;
|
|
105
109
|
return this.tf.tidy(() => {
|
|
106
|
-
const
|
|
107
|
-
[0,
|
|
108
|
-
[
|
|
109
|
-
),
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
110
|
+
const h = t, a = h.shape[1], l = a <= this.config.blockSize ? h : h.slice(
|
|
111
|
+
[0, a - this.config.blockSize],
|
|
112
|
+
[h.shape[0], this.config.blockSize]
|
|
113
|
+
), r = i ? this.config.blockSize - l.shape[1] : 0, f = r > 0 ? this.tf.pad(l, [
|
|
114
|
+
[0, 0],
|
|
115
|
+
[0, r]
|
|
116
|
+
]) : l, { logits: g, attention: p } = this.forward(f, void 0, !1, o), d = g.shape[1] - 1 - r, m = g.slice([0, d, 0], [g.shape[0], 1, g.shape[2]]), u = p ? p.slice([0, d, 0], [p.shape[0], 1, p.shape[2]]) : void 0, b = m.div(s);
|
|
117
|
+
let c;
|
|
118
|
+
if (n) {
|
|
119
|
+
const { values: k, indices: w } = this.tf.topk(b, n), E = this.tf.multinomial(k.squeeze([1]), 1);
|
|
120
|
+
c = this.tf.gather(w.squeeze([1]), E, 1);
|
|
114
121
|
} else
|
|
115
|
-
|
|
116
|
-
return
|
|
122
|
+
c = this.tf.multinomial(b.squeeze([1]), 1);
|
|
123
|
+
return c = c.reshape([1, 1]), { output: c, attention: u?.squeeze([1]) };
|
|
117
124
|
});
|
|
118
125
|
}
|
|
119
|
-
// Get number of parameters
|
|
120
126
|
getNumParams() {
|
|
121
127
|
const t = this.config.vocabSize * this.config.nEmbed + this.config.blockSize * this.config.nEmbed, e = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // qkv + proj
|
|
122
128
|
2 * this.config.nEmbed), s = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // fc
|
|
123
|
-
this.config.nEmbed * 4 * this.config.nEmbed),
|
|
124
|
-
return t + e + s +
|
|
129
|
+
this.config.nEmbed * 4 * this.config.nEmbed), n = this.config.nEmbed + this.config.vocabSize * this.config.nEmbed;
|
|
130
|
+
return t + e + s + n;
|
|
125
131
|
}
|
|
126
132
|
}
|
|
127
133
|
export {
|
|
128
|
-
|
|
134
|
+
$ as default
|
|
129
135
|
};
|
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -4,18 +4,24 @@ import { ITokeniser } from './tokeniser/type';
|
|
|
4
4
|
import { default as NanoGPT } from './NanoGPTModel';
|
|
5
5
|
import { default as Generator, IGenerateOptions } from './Generator';
|
|
6
6
|
import { default as Trainer, ITrainerOptions } from './Trainer';
|
|
7
|
-
|
|
7
|
+
import { default as EE } from 'eventemitter3';
|
|
8
|
+
type TeachableLLMStatus = 'warmup' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
|
|
9
|
+
export default class TeachableLLM extends EE<'status' | 'error'> {
|
|
8
10
|
readonly config: GPTConfig;
|
|
9
11
|
readonly model: NanoGPT;
|
|
10
12
|
readonly tf: typeof TF;
|
|
11
13
|
readonly tokeniser: ITokeniser;
|
|
14
|
+
private _status;
|
|
12
15
|
constructor(tf: typeof TF, tokeniser: ITokeniser, model: NanoGPT);
|
|
16
|
+
get status(): TeachableLLMStatus;
|
|
17
|
+
private setStatus;
|
|
13
18
|
saveModel(): Promise<Blob>;
|
|
14
19
|
static loadModel(tf: typeof TF, data: Blob | Buffer | string): Promise<TeachableLLM>;
|
|
15
|
-
static create(tf: typeof TF,
|
|
20
|
+
static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
|
|
16
21
|
getNumParams(): number;
|
|
17
22
|
trainer(): Trainer;
|
|
18
23
|
train(text: string[], options?: ITrainerOptions): Promise<void>;
|
|
19
24
|
generator(): Generator;
|
|
20
25
|
generateText(prompt?: string, options?: IGenerateOptions): Promise<string>;
|
|
21
26
|
}
|
|
27
|
+
export {};
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,47 +1,60 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { defaultConfig as
|
|
3
|
-
import { saveModel as
|
|
4
|
-
import { loadModel as
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import "./
|
|
8
|
-
|
|
1
|
+
import a from "./NanoGPTModel.js";
|
|
2
|
+
import { defaultConfig as m } from "./config.js";
|
|
3
|
+
import { saveModel as u } from "./utilities/save.js";
|
|
4
|
+
import { loadModel as h } from "./utilities/load.js";
|
|
5
|
+
import c from "./Generator.js";
|
|
6
|
+
import d from "./Trainer.js";
|
|
7
|
+
import { E as l } from "./index-SOhdqzHq.js";
|
|
8
|
+
import { dummyPassAsync as f } from "./utilities/dummy.js";
|
|
9
|
+
import g from "./tokeniser/CharTokeniser.js";
|
|
10
|
+
class n extends l {
|
|
9
11
|
config;
|
|
10
12
|
model;
|
|
11
13
|
tf;
|
|
12
14
|
tokeniser;
|
|
13
|
-
|
|
14
|
-
|
|
15
|
+
_status = "loading";
|
|
16
|
+
constructor(t, e, r) {
|
|
17
|
+
super(), this.tf = t, this.config = r.config, this.tokeniser = e, this.model = r;
|
|
18
|
+
}
|
|
19
|
+
get status() {
|
|
20
|
+
return this._status;
|
|
21
|
+
}
|
|
22
|
+
setStatus(t) {
|
|
23
|
+
this._status !== t && (this._status = t, this.emit("status", t));
|
|
15
24
|
}
|
|
16
25
|
saveModel() {
|
|
17
|
-
return
|
|
26
|
+
return u(this.model, this.tokeniser);
|
|
18
27
|
}
|
|
19
|
-
static async loadModel(
|
|
20
|
-
const { model: r, tokeniser: o } = await
|
|
21
|
-
return
|
|
28
|
+
static async loadModel(t, e) {
|
|
29
|
+
const { model: r, tokeniser: o } = await h(t, e), s = new n(t, o, r);
|
|
30
|
+
return s.setStatus("warmup"), f(r).then(() => {
|
|
31
|
+
s.setStatus("ready");
|
|
32
|
+
}).catch((i) => {
|
|
33
|
+
s.setStatus("error"), s.emit("error", i);
|
|
34
|
+
}), s;
|
|
22
35
|
}
|
|
23
|
-
static create(
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
const n = new s(e, o);
|
|
27
|
-
return new i(e, t, n);
|
|
36
|
+
static create(t, e = {}) {
|
|
37
|
+
const r = { ...m, ...e }, o = new g(r.vocabSize), s = new a(t, r);
|
|
38
|
+
return new n(t, o, s);
|
|
28
39
|
}
|
|
29
40
|
getNumParams() {
|
|
30
41
|
return this.model.getNumParams();
|
|
31
42
|
}
|
|
32
43
|
trainer() {
|
|
33
|
-
|
|
44
|
+
const t = new d(this.model, this.tokeniser);
|
|
45
|
+
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t;
|
|
34
46
|
}
|
|
35
|
-
train(
|
|
36
|
-
return this.trainer().train(
|
|
47
|
+
train(t, e) {
|
|
48
|
+
return this.trainer().train(t, e);
|
|
37
49
|
}
|
|
38
50
|
generator() {
|
|
39
|
-
|
|
51
|
+
const t = new c(this.model, this.tokeniser);
|
|
52
|
+
return t.on("start", () => this.setStatus("busy")), t.on("stop", () => this.setStatus("ready")), t;
|
|
40
53
|
}
|
|
41
|
-
generateText(
|
|
42
|
-
return this.generator().generate(
|
|
54
|
+
generateText(t, e) {
|
|
55
|
+
return this.generator().generate(t, e);
|
|
43
56
|
}
|
|
44
57
|
}
|
|
45
58
|
export {
|
|
46
|
-
|
|
59
|
+
n as default
|
|
47
60
|
};
|
|
@@ -18,5 +18,11 @@ export default class CausalSelfAttention {
|
|
|
18
18
|
set trainable(value: boolean);
|
|
19
19
|
saveWeights(map: Map<string, TF.Tensor[]>): void;
|
|
20
20
|
loadWeights(weights: Map<string, TF.Tensor[]>): void;
|
|
21
|
-
|
|
21
|
+
private getAttentionScores;
|
|
22
|
+
private getQKV;
|
|
23
|
+
private getOutputProjection;
|
|
24
|
+
call(x: TF.Tensor, training?: boolean, includeAttention?: boolean): {
|
|
25
|
+
output: TF.Tensor;
|
|
26
|
+
attention?: TF.Tensor;
|
|
27
|
+
};
|
|
22
28
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
class
|
|
1
|
+
class m {
|
|
2
2
|
config;
|
|
3
3
|
cAttn;
|
|
4
4
|
cProj;
|
|
@@ -10,10 +10,10 @@ class g {
|
|
|
10
10
|
divisor;
|
|
11
11
|
index;
|
|
12
12
|
_trainable = !0;
|
|
13
|
-
constructor(
|
|
14
|
-
this.config =
|
|
15
|
-
units: 3 *
|
|
16
|
-
useBias:
|
|
13
|
+
constructor(t, e, s) {
|
|
14
|
+
this.config = s, this.tf = t, this.index = e, this.cAttn = this.tf.layers.dense({
|
|
15
|
+
units: 3 * s.nEmbed,
|
|
16
|
+
useBias: s.biasInLinear,
|
|
17
17
|
name: `block_${e}_attn_cAttn`,
|
|
18
18
|
kernelInitializer: this.tf.initializers.randomNormal({
|
|
19
19
|
mean: 0,
|
|
@@ -21,55 +21,65 @@ class g {
|
|
|
21
21
|
}),
|
|
22
22
|
biasInitializer: "zeros"
|
|
23
23
|
}), this.cProj = this.tf.layers.dense({
|
|
24
|
-
units:
|
|
25
|
-
useBias:
|
|
24
|
+
units: s.nEmbed,
|
|
25
|
+
useBias: s.biasInLinear,
|
|
26
26
|
name: `block_${e}_attn_cProj`,
|
|
27
27
|
kernelInitializer: this.tf.initializers.randomNormal({
|
|
28
28
|
mean: 0,
|
|
29
|
-
stddev: 0.02 / Math.sqrt(2 *
|
|
29
|
+
stddev: 0.02 / Math.sqrt(2 * s.nLayer)
|
|
30
30
|
}),
|
|
31
31
|
biasInitializer: "zeros"
|
|
32
|
-
}), this.attnDropout = this.tf.layers.dropout({ rate:
|
|
32
|
+
}), this.attnDropout = this.tf.layers.dropout({ rate: s.dropout }), this.residDropout = this.tf.layers.dropout({ rate: s.dropout }), this.bias = this.tf.linalg.bandPart(this.tf.ones([s.blockSize, s.blockSize]), -1, 0).cast("bool"), this.divisor = this.tf.scalar(1 / Math.sqrt(s.nEmbed / s.nHead)), this.maskInf = this.tf.zeros([s.blockSize, s.blockSize]).where(this.bias, -1 / 0);
|
|
33
33
|
}
|
|
34
34
|
get variables() {
|
|
35
35
|
return [
|
|
36
|
-
...this.cAttn.trainableWeights.map((
|
|
37
|
-
...this.cProj.trainableWeights.map((
|
|
36
|
+
...this.cAttn.trainableWeights.map((t) => t.read()),
|
|
37
|
+
...this.cProj.trainableWeights.map((t) => t.read())
|
|
38
38
|
];
|
|
39
39
|
}
|
|
40
40
|
get trainable() {
|
|
41
41
|
return this._trainable;
|
|
42
42
|
}
|
|
43
|
-
set trainable(
|
|
44
|
-
this._trainable =
|
|
43
|
+
set trainable(t) {
|
|
44
|
+
this._trainable = t, this.cAttn.trainable = t, this.cProj.trainable = t;
|
|
45
45
|
}
|
|
46
|
-
saveWeights(
|
|
47
|
-
|
|
46
|
+
saveWeights(t) {
|
|
47
|
+
t.set(`block_${this.index}_cAttn`, this.cAttn.getWeights()), t.set(`block_${this.index}_cProj`, this.cProj.getWeights());
|
|
48
48
|
}
|
|
49
|
-
loadWeights(
|
|
50
|
-
this.cAttn.setWeights(
|
|
49
|
+
loadWeights(t) {
|
|
50
|
+
this.cAttn.setWeights(t.get(`block_${this.index}_cAttn`) || []), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
|
|
51
51
|
}
|
|
52
|
-
|
|
52
|
+
getAttentionScores(t, e, s) {
|
|
53
|
+
const a = t.shape[2], n = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), o = n.add(i), h = this.tf.softmax(o, -1);
|
|
54
|
+
return this.attnDropout.apply(h, { training: s });
|
|
55
|
+
}
|
|
56
|
+
getQKV(t) {
|
|
57
|
+
const [e, s, a] = t.shape, r = this.cAttn.apply(t), [n, i, o] = this.tf.split(r, 3, -1);
|
|
58
|
+
r.dispose();
|
|
59
|
+
const h = a / this.config.nHead, c = this.tf.reshape(n, [e, s, this.config.nHead, h]);
|
|
60
|
+
n.dispose();
|
|
61
|
+
const p = c.transpose([0, 2, 1, 3]);
|
|
62
|
+
c.dispose();
|
|
63
|
+
const l = this.tf.reshape(i, [e, s, this.config.nHead, h]);
|
|
64
|
+
i.dispose();
|
|
65
|
+
const u = l.transpose([0, 2, 1, 3]);
|
|
66
|
+
l.dispose();
|
|
67
|
+
const d = this.tf.reshape(o, [e, s, this.config.nHead, h]);
|
|
68
|
+
o.dispose();
|
|
69
|
+
const b = d.transpose([0, 2, 1, 3]);
|
|
70
|
+
return d.dispose(), [p, u, b];
|
|
71
|
+
}
|
|
72
|
+
getOutputProjection(t, e) {
|
|
73
|
+
const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, n = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(n, [s, a, r]), o = this.cProj.apply(i);
|
|
74
|
+
return this.residDropout.apply(o, { training: e });
|
|
75
|
+
}
|
|
76
|
+
call(t, e = !1, s = !1) {
|
|
53
77
|
return this.tf.tidy(() => {
|
|
54
|
-
const [
|
|
55
|
-
|
|
56
|
-
const a = n / this.config.nHead, d = this.tf.reshape(o, [t, i, this.config.nHead, a]);
|
|
57
|
-
o.dispose();
|
|
58
|
-
const b = d.transpose([0, 2, 1, 3]);
|
|
59
|
-
d.dispose();
|
|
60
|
-
const c = this.tf.reshape(h, [t, i, this.config.nHead, a]);
|
|
61
|
-
h.dispose();
|
|
62
|
-
const u = c.transpose([0, 2, 1, 3]);
|
|
63
|
-
c.dispose();
|
|
64
|
-
const p = this.tf.reshape(l, [t, i, this.config.nHead, a]);
|
|
65
|
-
l.dispose();
|
|
66
|
-
const f = p.transpose([0, 2, 1, 3]);
|
|
67
|
-
p.dispose();
|
|
68
|
-
const m = this.tf.matMul(b, u, !1, !0).mul(this.divisor), k = this.maskInf.slice([0, 0], [i, i]), _ = m.add(k), y = this.tf.softmax(_, -1), z = this.attnDropout.apply(y, { training: e }), A = this.tf.matMul(z, f).transpose([0, 2, 1, 3]), P = this.tf.reshape(A, [t, i, n]), j = this.cProj.apply(P);
|
|
69
|
-
return this.residDropout.apply(j, { training: e });
|
|
78
|
+
const [a, r, n] = this.getQKV(t), i = this.getAttentionScores(a, r, e), o = this.tf.matMul(i, n);
|
|
79
|
+
return { output: this.getOutputProjection(o, e), attention: s ? i.mean(1) : void 0 };
|
|
70
80
|
});
|
|
71
81
|
}
|
|
72
82
|
}
|
|
73
83
|
export {
|
|
74
|
-
|
|
84
|
+
m as default
|
|
75
85
|
};
|
|
@@ -15,5 +15,9 @@ export default class Block {
|
|
|
15
15
|
set trainable(value: boolean);
|
|
16
16
|
saveWeights(map: Map<string, TF.Tensor[]>): void;
|
|
17
17
|
loadWeights(weights: Map<string, TF.Tensor[]>): void;
|
|
18
|
-
|
|
18
|
+
private getMLPOutput;
|
|
19
|
+
call(x: TF.Tensor, training?: boolean, includeAttention?: boolean): {
|
|
20
|
+
output: TF.Tensor;
|
|
21
|
+
attention?: TF.Tensor;
|
|
22
|
+
};
|
|
19
23
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
class
|
|
1
|
+
import h from "./CausalSelfAttention.js";
|
|
2
|
+
import r from "./MLP.js";
|
|
3
|
+
import l from "./LayerNorm.js";
|
|
4
|
+
class u {
|
|
5
5
|
ln1;
|
|
6
6
|
attn;
|
|
7
7
|
ln2;
|
|
@@ -11,7 +11,7 @@ class _ {
|
|
|
11
11
|
_trainable = !0;
|
|
12
12
|
skipped = !1;
|
|
13
13
|
constructor(t, s, i) {
|
|
14
|
-
this.tf = t, this.index = s, this.ln1 = new
|
|
14
|
+
this.tf = t, this.index = s, this.ln1 = new l(t, [i.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, i), this.ln2 = new l(t, [i.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, i);
|
|
15
15
|
}
|
|
16
16
|
get variables() {
|
|
17
17
|
return [
|
|
@@ -33,15 +33,19 @@ class _ {
|
|
|
33
33
|
loadWeights(t) {
|
|
34
34
|
this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
|
|
35
35
|
}
|
|
36
|
-
|
|
36
|
+
getMLPOutput(t, s) {
|
|
37
|
+
const i = this.ln2.apply(t), e = this.mlp.call(i, s);
|
|
38
|
+
return t.add(e);
|
|
39
|
+
}
|
|
40
|
+
call(t, s = !1, i = !1) {
|
|
37
41
|
return this.tf.tidy(() => {
|
|
38
42
|
if (this.skipped)
|
|
39
|
-
return t;
|
|
40
|
-
const
|
|
41
|
-
return
|
|
43
|
+
return { output: t };
|
|
44
|
+
const e = this.ln1.apply(t), n = this.attn.call(e, s, i), a = t.add(n.output);
|
|
45
|
+
return { output: this.getMLPOutput(a, s), attention: n.attention };
|
|
42
46
|
});
|
|
43
47
|
}
|
|
44
48
|
}
|
|
45
49
|
export {
|
|
46
|
-
|
|
50
|
+
u as default
|
|
47
51
|
};
|
|
@@ -3,9 +3,11 @@ import { ITokeniser } from './type';
|
|
|
3
3
|
export default class CharTokeniser extends EE<'trainStatus'> implements ITokeniser {
|
|
4
4
|
vocabSize: number;
|
|
5
5
|
eosToken: number;
|
|
6
|
+
unkToken: number;
|
|
6
7
|
vocab: string[];
|
|
7
8
|
private cache;
|
|
8
|
-
constructor(
|
|
9
|
+
constructor(vocabSize: number);
|
|
10
|
+
constructor(vocab: string[]);
|
|
9
11
|
get trained(): boolean;
|
|
10
12
|
destroy(): void;
|
|
11
13
|
train(text: string[]): Promise<number>;
|
|
@@ -1,38 +1,57 @@
|
|
|
1
|
-
import { E as
|
|
2
|
-
|
|
1
|
+
import { E as h } from "../index-SOhdqzHq.js";
|
|
2
|
+
const c = ["<eos>", "<unk>"];
|
|
3
|
+
class l extends h {
|
|
3
4
|
vocabSize = 0;
|
|
4
5
|
eosToken = 0;
|
|
6
|
+
unkToken = 0;
|
|
5
7
|
vocab = [];
|
|
6
8
|
cache = /* @__PURE__ */ new Map();
|
|
7
|
-
constructor(
|
|
8
|
-
|
|
9
|
-
this.
|
|
10
|
-
|
|
9
|
+
constructor(s) {
|
|
10
|
+
if (super(), Array.isArray(s))
|
|
11
|
+
if (this.vocab = s, this.vocab.length > 0)
|
|
12
|
+
this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((i, o) => {
|
|
13
|
+
this.cache.set(i, o);
|
|
14
|
+
});
|
|
15
|
+
else
|
|
16
|
+
throw new Error("Vocab cannot be empty");
|
|
17
|
+
else
|
|
18
|
+
this.vocabSize = s;
|
|
11
19
|
}
|
|
12
20
|
get trained() {
|
|
13
|
-
return this.
|
|
21
|
+
return this.vocab.length === this.vocabSize;
|
|
14
22
|
}
|
|
15
23
|
destroy() {
|
|
16
24
|
}
|
|
17
|
-
async train(
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
25
|
+
async train(s) {
|
|
26
|
+
const i = s.map((e) => e.split("")).flat(), o = new Set(i), t = Array.from(o), n = this.vocabSize - c.length;
|
|
27
|
+
if (t.length > n) {
|
|
28
|
+
const e = /* @__PURE__ */ new Map();
|
|
29
|
+
i.forEach((a) => {
|
|
30
|
+
e.set(a, (e.get(a) || 0) + 1);
|
|
31
|
+
}), t.sort((a, r) => (e.get(a) || 0) - (e.get(r) || 0)), t.splice(0, t.length - n);
|
|
32
|
+
} else if (t.length < n)
|
|
33
|
+
for (; t.length < n; )
|
|
34
|
+
t.push("<pad>");
|
|
35
|
+
return t.sort((e, a) => e.charCodeAt(0) - a.charCodeAt(0)), this.vocab = [...t, ...c], this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.vocabSize = this.vocab.length, this.cache.clear(), this.vocab.forEach((e, a) => {
|
|
36
|
+
this.cache.set(e, a);
|
|
21
37
|
}), this.vocabSize;
|
|
22
38
|
}
|
|
23
|
-
async tokenise(
|
|
39
|
+
async tokenise(s, i) {
|
|
24
40
|
if (!this.trained)
|
|
25
41
|
throw new Error("Tokeniser not trained");
|
|
26
|
-
return
|
|
42
|
+
return s.map((t) => i ? t.split("").map((n) => this.cache.get(n) ?? this.unkToken) : t.split("").map((n) => {
|
|
43
|
+
const e = this.cache.get(n);
|
|
44
|
+
return e !== void 0 ? this.vocab[e] : "<unk>";
|
|
45
|
+
}));
|
|
27
46
|
}
|
|
28
|
-
async detokenise(
|
|
29
|
-
return
|
|
47
|
+
async detokenise(s) {
|
|
48
|
+
return s.map((o) => o.map((t) => this.vocab[t]).join(""));
|
|
30
49
|
}
|
|
31
|
-
async encode(
|
|
32
|
-
return (await this.tokenise([
|
|
50
|
+
async encode(s) {
|
|
51
|
+
return (await this.tokenise([s], !0))[0];
|
|
33
52
|
}
|
|
34
|
-
async decode(
|
|
35
|
-
return (await this.detokenise([
|
|
53
|
+
async decode(s) {
|
|
54
|
+
return (await this.detokenise([s]))[0];
|
|
36
55
|
}
|
|
37
56
|
getVocab() {
|
|
38
57
|
return this.vocab;
|
|
@@ -40,13 +59,13 @@ class h extends r {
|
|
|
40
59
|
async getMerges() {
|
|
41
60
|
return [];
|
|
42
61
|
}
|
|
43
|
-
async createTrainingData(
|
|
44
|
-
const
|
|
45
|
-
for (let
|
|
46
|
-
|
|
47
|
-
return [
|
|
62
|
+
async createTrainingData(s, i = 5) {
|
|
63
|
+
const o = await this.tokenise(s, !0), t = [], n = [];
|
|
64
|
+
for (let e = 0; e < o.length - i; e++)
|
|
65
|
+
t.push(...o[e].slice(0, i)), n.push(o[e + 1][0]);
|
|
66
|
+
return [t, n];
|
|
48
67
|
}
|
|
49
68
|
}
|
|
50
69
|
export {
|
|
51
|
-
|
|
70
|
+
l as default
|
|
52
71
|
};
|
|
@@ -4,10 +4,11 @@ export default class NodeTokeniser extends EE<'trainStatus'> implements ITokenis
|
|
|
4
4
|
vocabSize: number;
|
|
5
5
|
eosToken: number;
|
|
6
6
|
private bpe;
|
|
7
|
-
constructor(
|
|
7
|
+
constructor(vocabSize: number);
|
|
8
|
+
constructor(vocab: string[], merges: [string, string][]);
|
|
8
9
|
get trained(): boolean;
|
|
9
10
|
destroy(): void;
|
|
10
|
-
train(text: string[]
|
|
11
|
+
train(text: string[]): Promise<number>;
|
|
11
12
|
tokenise(text: string[], numeric: true): Promise<number[][]>;
|
|
12
13
|
tokenise(text: string[]): Promise<string[][]>;
|
|
13
14
|
detokenise(tokens: number[][]): Promise<string[]>;
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import { E as a } from "../index-SOhdqzHq.js";
|
|
2
2
|
import o from "./bpe.js";
|
|
3
|
-
class
|
|
3
|
+
class p extends a {
|
|
4
4
|
vocabSize = 0;
|
|
5
5
|
eosToken = 0;
|
|
6
6
|
bpe = new o();
|
|
7
7
|
constructor(e, t) {
|
|
8
|
-
super(), e
|
|
8
|
+
super(), Array.isArray(e) ? (this.bpe = new o(e, t), this.vocabSize = e.length) : this.vocabSize = e;
|
|
9
9
|
}
|
|
10
10
|
get trained() {
|
|
11
11
|
return this.vocabSize > 0;
|
|
12
12
|
}
|
|
13
13
|
destroy() {
|
|
14
14
|
}
|
|
15
|
-
async train(e
|
|
16
|
-
return this.bpe.train(e,
|
|
15
|
+
async train(e) {
|
|
16
|
+
return this.bpe.train(e, this.vocabSize), this.vocabSize = this.bpe.getVocab().length, this.vocabSize;
|
|
17
17
|
}
|
|
18
18
|
async tokenise(e, t) {
|
|
19
19
|
return t ? this.bpe.tokenise(e, !0) : this.bpe.tokenise(e);
|
|
@@ -42,5 +42,5 @@ class b extends a {
|
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
44
|
export {
|
|
45
|
-
|
|
45
|
+
p as default
|
|
46
46
|
};
|
package/dist/tokeniser/type.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { default as EE } from 'eventemitter3';
|
|
2
2
|
export interface ITokeniser extends EE<'trainStatus'> {
|
|
3
|
-
train(text: string[]
|
|
3
|
+
train(text: string[]): Promise<number>;
|
|
4
4
|
tokenise(text: string[], numeric?: boolean): Promise<string[][] | number[][]>;
|
|
5
5
|
detokenise(tokens: string[][] | number[][]): Promise<string[]>;
|
|
6
6
|
getVocab(): string[];
|
|
@@ -45,7 +45,9 @@ class S extends T {
|
|
|
45
45
|
};
|
|
46
46
|
if (this.model.log.push(p), s.step % L === 0 && (await w, h)) {
|
|
47
47
|
if (l) {
|
|
48
|
-
const v = await g(this.tokenizer, this.model, l, 100,
|
|
48
|
+
const v = await g(this.tokenizer, this.model, l, 100, {
|
|
49
|
+
temperature: 0.8
|
|
50
|
+
});
|
|
49
51
|
p.example = v;
|
|
50
52
|
}
|
|
51
53
|
await h(p);
|
|
@@ -29,7 +29,7 @@ class D extends T {
|
|
|
29
29
|
epochs: h,
|
|
30
30
|
stepsPerEpoch: r,
|
|
31
31
|
desiredLoss: c,
|
|
32
|
-
logInterval:
|
|
32
|
+
logInterval: P,
|
|
33
33
|
stepsPerLayer: d,
|
|
34
34
|
onLayerChange: n,
|
|
35
35
|
onPassComplete: g,
|
|
@@ -61,20 +61,23 @@ class D extends T {
|
|
|
61
61
|
for (; !(r && s.step >= r || s.lastLoss < c); ) {
|
|
62
62
|
const a = await u.next();
|
|
63
63
|
if (a.done) break;
|
|
64
|
-
const
|
|
64
|
+
const m = a.value, w = this.trainBatch(s, m);
|
|
65
65
|
s.stepSinceLayerChange++;
|
|
66
66
|
const l = {
|
|
67
67
|
epoch: s.epoch,
|
|
68
68
|
loss: s.lastLoss,
|
|
69
69
|
step: s.step,
|
|
70
70
|
time: Date.now() - S,
|
|
71
|
-
batchSize:
|
|
71
|
+
batchSize: m.xs.shape[0],
|
|
72
72
|
pass: s.pass,
|
|
73
73
|
layer: s.layerStep % this.model.config.nLayer
|
|
74
74
|
};
|
|
75
|
-
if (this.model.log.push(l), s.step %
|
|
75
|
+
if (this.model.log.push(l), s.step % P === 0 && (await w, y)) {
|
|
76
76
|
if (L) {
|
|
77
|
-
const i = await v(this.tokenizer, this.model, L, 100,
|
|
77
|
+
const i = await v(this.tokenizer, this.model, L, 100, {
|
|
78
|
+
temperature: 0.8,
|
|
79
|
+
topK: 10
|
|
80
|
+
});
|
|
78
81
|
l.example = i;
|
|
79
82
|
}
|
|
80
83
|
await y(l);
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
async function f(s) {
|
|
2
|
+
const o = s.tf.zeros([1, s.config.blockSize], "int32"), { logits: i, loss: t } = s.forward(o, void 0, !1);
|
|
3
|
+
await i.data(), i.dispose(), t && t.dispose(), o.dispose();
|
|
4
|
+
}
|
|
5
|
+
function c(s) {
|
|
6
|
+
const o = s.tf.zeros([1, s.config.blockSize], "int32"), { logits: i, loss: t } = s.forward(o, void 0, !1);
|
|
7
|
+
i.dispose(), t && t.dispose(), o.dispose();
|
|
8
|
+
}
|
|
9
|
+
export {
|
|
10
|
+
c as dummyPass,
|
|
11
|
+
f as dummyPassAsync
|
|
12
|
+
};
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import { ITokeniser } from '../tokeniser/type';
|
|
2
|
-
import { default as NanoGPT } from '../NanoGPTModel';
|
|
3
|
-
export declare function generateText(tokeniser: ITokeniser, model: NanoGPT, prompt: string, length: number,
|
|
2
|
+
import { default as NanoGPT, GenerateOptions } from '../NanoGPTModel';
|
|
3
|
+
export declare function generateText(tokeniser: ITokeniser, model: NanoGPT, prompt: string, length: number, options: GenerateOptions): Promise<string>;
|
|
@@ -1,22 +1,18 @@
|
|
|
1
|
-
async function
|
|
1
|
+
async function w(n, e, o, s, p) {
|
|
2
2
|
if (s <= 0)
|
|
3
3
|
throw new Error("Length must be a positive integer");
|
|
4
|
-
if (
|
|
5
|
-
throw new Error("Temperature must be a positive number");
|
|
6
|
-
if (r !== void 0 && r <= 0)
|
|
7
|
-
throw new Error("topK must be a positive integer or undefined");
|
|
8
|
-
if (i.length === 0)
|
|
4
|
+
if (o.length === 0)
|
|
9
5
|
throw new Error("Prompt cannot be an empty string");
|
|
10
|
-
const
|
|
11
|
-
let
|
|
12
|
-
for (let
|
|
13
|
-
const
|
|
14
|
-
|
|
6
|
+
const a = await n.tokenise([o], !0), r = (await e.tf.tidy(() => {
|
|
7
|
+
let t = e.tf.tensor2d(a, [1, a[0].length], "int32");
|
|
8
|
+
for (let c = 0; c < s; c++) {
|
|
9
|
+
const { output: d } = e.generate(t, p), u = t;
|
|
10
|
+
t = e.tf.concat([t, d], 1), u.dispose(), d.dispose();
|
|
15
11
|
}
|
|
16
|
-
return
|
|
17
|
-
}).array())[0],
|
|
18
|
-
return
|
|
12
|
+
return t;
|
|
13
|
+
}).array())[0], i = r.indexOf(n.eosToken);
|
|
14
|
+
return i !== -1 && r.splice(i), await n.decode(r);
|
|
19
15
|
}
|
|
20
16
|
export {
|
|
21
|
-
|
|
17
|
+
w as generateText
|
|
22
18
|
};
|
package/dist/utilities/load.js
CHANGED
|
@@ -1,47 +1,44 @@
|
|
|
1
|
-
import { z as
|
|
2
|
-
import { importWeights as
|
|
3
|
-
import
|
|
1
|
+
import { z as F } from "../jszip.min-BLbRbbKt.js";
|
|
2
|
+
import { importWeights as b } from "./weights.js";
|
|
3
|
+
import k from "../tokeniser/CharTokeniser.js";
|
|
4
4
|
import j from "../NanoGPTModel.js";
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
n.dispose(), s && s.dispose(), a.dispose();
|
|
8
|
-
}
|
|
9
|
-
async function E(o) {
|
|
5
|
+
import { dummyPassAsync as z } from "./dummy.js";
|
|
6
|
+
async function A(o) {
|
|
10
7
|
const e = await fetch(o);
|
|
11
8
|
if (!e.ok)
|
|
12
9
|
throw new Error(`Failed to fetch ${o}: ${e.statusText}`);
|
|
13
10
|
return e.arrayBuffer();
|
|
14
11
|
}
|
|
15
|
-
async function
|
|
16
|
-
const
|
|
17
|
-
if (!
|
|
12
|
+
async function T(o, e) {
|
|
13
|
+
const m = typeof e == "string" ? await A(e) : e, n = await F.loadAsync(m), s = /* @__PURE__ */ new Map(), c = await n.file("manifest.json")?.async("string");
|
|
14
|
+
if (!c)
|
|
18
15
|
throw new Error("Manifest file not found in the zip archive");
|
|
19
|
-
const
|
|
20
|
-
for (const [t, r] of Object.entries(
|
|
16
|
+
const f = JSON.parse(c);
|
|
17
|
+
for (const [t, r] of Object.entries(f.weightSpec))
|
|
21
18
|
s.set(t, { spec: r, data: new Float32Array() });
|
|
22
|
-
const
|
|
23
|
-
if (!
|
|
19
|
+
const l = await n.file("tokeniser.json")?.async("string");
|
|
20
|
+
if (!l)
|
|
24
21
|
throw new Error("Tokeniser file not found in the zip archive");
|
|
25
|
-
const
|
|
22
|
+
const g = JSON.parse(l), y = new k(g.vocab), w = /* @__PURE__ */ new Map();
|
|
26
23
|
for (const t of Object.keys(n.files))
|
|
27
24
|
if (t.endsWith(".bin")) {
|
|
28
|
-
const r = t.replace(".bin", ""), h = await n.file(t).async("arraybuffer"),
|
|
29
|
-
|
|
30
|
-
const
|
|
31
|
-
w.set(r,
|
|
25
|
+
const r = t.replace(".bin", ""), h = await n.file(t).async("arraybuffer"), d = new Float32Array(h), i = s.get(r) || { spec: [], data: new Float32Array() };
|
|
26
|
+
i.data = d, s.set(r, i);
|
|
27
|
+
const u = await b(i, o);
|
|
28
|
+
w.set(r, u);
|
|
32
29
|
}
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
const
|
|
36
|
-
if (
|
|
30
|
+
const a = new j(o, f.config);
|
|
31
|
+
await z(a), a.loadWeights(w);
|
|
32
|
+
const p = await n.file("log.json")?.async("string");
|
|
33
|
+
if (p)
|
|
37
34
|
try {
|
|
38
|
-
const t = JSON.parse(
|
|
39
|
-
|
|
35
|
+
const t = JSON.parse(p);
|
|
36
|
+
a.log = t;
|
|
40
37
|
} catch (t) {
|
|
41
38
|
throw console.error("Error parsing training log:", t), new Error(`Failed to parse training log: ${t}`);
|
|
42
39
|
}
|
|
43
|
-
return { model:
|
|
40
|
+
return { model: a, tokeniser: y };
|
|
44
41
|
}
|
|
45
42
|
export {
|
|
46
|
-
|
|
43
|
+
T as loadModel
|
|
47
44
|
};
|