@genai-fi/nanogpt 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +2 -0
- package/dist/Generator.js +37 -32
- package/dist/NanoGPTModel.d.ts +4 -1
- package/dist/NanoGPTModel.js +33 -25
- package/dist/TeachableLLM.d.ts +4 -0
- package/dist/TeachableLLM.js +32 -15
- package/dist/{complex-Cd8sqiBC.js → complex-CJ-qCcLB.js} +6 -6
- package/dist/{index-Dsg28SG6.js → index-YPKosni4.js} +59 -51
- package/dist/layers/BaseLayer.d.ts +8 -0
- package/dist/layers/BaseLayer.js +18 -0
- package/dist/layers/CausalSelfAttention.d.ts +4 -1
- package/dist/layers/CausalSelfAttention.js +47 -55
- package/dist/layers/MLP.d.ts +2 -1
- package/dist/layers/MLP.js +16 -14
- package/dist/layers/RMSNorm.d.ts +2 -1
- package/dist/layers/RMSNorm.js +13 -11
- package/dist/layers/RoPECache.d.ts +4 -2
- package/dist/layers/RoPECache.js +13 -7
- package/dist/layers/TiedEmbedding.js +16 -15
- package/dist/layers/TransformerBlock.d.ts +4 -1
- package/dist/layers/TransformerBlock.js +9 -5
- package/dist/main.js +18 -16
- package/dist/{mat_mul-BAYDrXvE.js → mat_mul-Bu7bhLms.js} +5 -5
- package/dist/ops/attentionMask.js +31 -25
- package/dist/ops/gatherSub.js +2 -2
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/qkv.d.ts +7 -0
- package/dist/ops/qkv.js +127 -0
- package/dist/ops/rope.d.ts +8 -0
- package/dist/ops/rope.js +153 -0
- package/dist/ops/scatterSub.js +14 -14
- package/dist/reshape-DmnmKT6r.js +25 -0
- package/dist/{stack-1o648CP_.js → stack-BtKpB0Ry.js} +5 -5
- package/dist/sum-D7fu15XL.js +27 -0
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/Trainer.js +30 -29
- package/dist/training/sparseCrossEntropy.js +34 -33
- package/dist/utilities/profile.d.ts +10 -0
- package/dist/utilities/profile.js +29 -0
- package/package.json +1 -1
- package/dist/sum-NWazHI7f.js +0 -49
package/dist/Generator.d.ts
CHANGED
|
@@ -8,10 +8,12 @@ export interface IGenerateOptions extends GenerateOptions {
|
|
|
8
8
|
export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
|
|
9
9
|
private readonly model;
|
|
10
10
|
private readonly tokeniser;
|
|
11
|
+
private active;
|
|
11
12
|
constructor(model: NanoGPT, tokeniser: ITokeniser);
|
|
12
13
|
private tokenisePrompt;
|
|
13
14
|
private generateNoCache;
|
|
14
15
|
private processResponse;
|
|
15
16
|
private generateCache;
|
|
16
17
|
generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
|
|
18
|
+
stop(): void;
|
|
17
19
|
}
|
package/dist/Generator.js
CHANGED
|
@@ -1,65 +1,70 @@
|
|
|
1
1
|
import { E as u } from "./index-Dwqa6Zy2.js";
|
|
2
|
-
class
|
|
2
|
+
class f extends u {
|
|
3
3
|
constructor(s, e) {
|
|
4
4
|
super(), this.model = s, this.tokeniser = e;
|
|
5
5
|
}
|
|
6
|
+
active = !1;
|
|
6
7
|
async tokenisePrompt(s) {
|
|
7
8
|
const e = s ? await this.tokeniser.tokenise([s], !0) : [[this.tokeniser.eosToken]];
|
|
8
9
|
return this.model.tf.tensor2d(e, [1, e[0].length], "int32");
|
|
9
10
|
}
|
|
10
11
|
async generateNoCache(s, e) {
|
|
11
|
-
let t = await this.tokenisePrompt(s),
|
|
12
|
-
const
|
|
13
|
-
for (let
|
|
12
|
+
let t = await this.tokenisePrompt(s), i = s || "";
|
|
13
|
+
const o = e?.maxLength ?? 1e3;
|
|
14
|
+
for (let a = 0; a < o && this.active; a++) {
|
|
14
15
|
const {
|
|
15
|
-
output:
|
|
16
|
+
output: n,
|
|
16
17
|
attention: c,
|
|
17
|
-
probabilities:
|
|
18
|
-
} = this.model.generate(t, void 0, e),
|
|
19
|
-
t = this.model.tf.concat([t,
|
|
20
|
-
const r = await this.processResponse(
|
|
21
|
-
if (
|
|
18
|
+
probabilities: l
|
|
19
|
+
} = this.model.generate(t, void 0, e), h = t;
|
|
20
|
+
t = this.model.tf.concat([t, n], 1), h.dispose();
|
|
21
|
+
const r = await this.processResponse(n, c, l);
|
|
22
|
+
if (n.dispose(), r === null)
|
|
22
23
|
break;
|
|
23
|
-
|
|
24
|
+
i += r;
|
|
24
25
|
}
|
|
25
|
-
return t.dispose(),
|
|
26
|
+
return t.dispose(), i;
|
|
26
27
|
}
|
|
27
28
|
async processResponse(s, e, t) {
|
|
28
|
-
const
|
|
29
|
-
if (
|
|
29
|
+
const i = (await s.array())[0][0];
|
|
30
|
+
if (i === this.tokeniser.eosToken)
|
|
30
31
|
return null;
|
|
31
|
-
const
|
|
32
|
-
let
|
|
33
|
-
e && (
|
|
34
|
-
let
|
|
35
|
-
return t && (
|
|
32
|
+
const o = await this.tokeniser.decode([i]);
|
|
33
|
+
let a;
|
|
34
|
+
e && (a = await e.array(), e.dispose());
|
|
35
|
+
let n;
|
|
36
|
+
return t && (n = await t.array(), t.dispose()), this.emit("tokens", [i], o, a, n), o;
|
|
36
37
|
}
|
|
37
38
|
async generateCache(s, e) {
|
|
38
|
-
let t = await this.tokenisePrompt(s),
|
|
39
|
-
const
|
|
40
|
-
for (let
|
|
39
|
+
let t = await this.tokenisePrompt(s), i = s || "";
|
|
40
|
+
const o = new Array(this.model.config.nLayer).fill(void 0), a = e?.maxLength ?? 1e3;
|
|
41
|
+
for (let n = 0; n < a && this.active; n++) {
|
|
41
42
|
const {
|
|
42
43
|
output: c,
|
|
43
|
-
attention:
|
|
44
|
-
probabilities:
|
|
45
|
-
} = this.model.generate(t,
|
|
44
|
+
attention: l,
|
|
45
|
+
probabilities: h
|
|
46
|
+
} = this.model.generate(t, o, {
|
|
46
47
|
...e,
|
|
47
48
|
usePadding: !1
|
|
48
49
|
});
|
|
49
50
|
t.dispose(), t = c;
|
|
50
|
-
const r = await this.processResponse(c,
|
|
51
|
+
const r = await this.processResponse(c, l, h);
|
|
51
52
|
if (r === null)
|
|
52
53
|
break;
|
|
53
|
-
|
|
54
|
+
i += r;
|
|
54
55
|
}
|
|
55
|
-
return t.dispose(),
|
|
56
|
+
return t.dispose(), i;
|
|
56
57
|
}
|
|
57
58
|
async generate(s, e) {
|
|
58
|
-
this.
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
const t = s && s.length > this.model.config.blockSize ? s.slice(-this.model.config.blockSize) : s;
|
|
60
|
+
this.active = !0, this.emit("start");
|
|
61
|
+
const o = await (this.model.config.useRope && !e?.noCache ? this.generateCache(t, e) : this.generateNoCache(t, e));
|
|
62
|
+
return this.active = !1, this.emit("stop"), o;
|
|
63
|
+
}
|
|
64
|
+
stop() {
|
|
65
|
+
this.active = !1;
|
|
61
66
|
}
|
|
62
67
|
}
|
|
63
68
|
export {
|
|
64
|
-
|
|
69
|
+
f as default
|
|
65
70
|
};
|
package/dist/NanoGPTModel.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { default as TF } from '@tensorflow/tfjs';
|
|
2
2
|
import { GPTConfig } from './config';
|
|
3
3
|
import { KVCache } from './layers/CausalSelfAttention';
|
|
4
|
+
import { default as MemoryProfiler } from './utilities/profile';
|
|
5
|
+
import { default as BaseLayer } from './layers/BaseLayer';
|
|
4
6
|
export interface TrainingLogEntry {
|
|
5
7
|
loss: number;
|
|
6
8
|
valLoss?: number;
|
|
@@ -16,7 +18,7 @@ export interface GenerateOptions {
|
|
|
16
18
|
includeAttention?: boolean;
|
|
17
19
|
includeProbabilities?: boolean;
|
|
18
20
|
}
|
|
19
|
-
export default class NanoGPT {
|
|
21
|
+
export default class NanoGPT extends BaseLayer {
|
|
20
22
|
readonly config: GPTConfig;
|
|
21
23
|
private wte;
|
|
22
24
|
private wpe?;
|
|
@@ -34,6 +36,7 @@ export default class NanoGPT {
|
|
|
34
36
|
setSkipMask(mask: boolean[]): void;
|
|
35
37
|
setTrainableMask(mask: boolean[]): void;
|
|
36
38
|
set trainable(value: boolean);
|
|
39
|
+
setProfiler(value: MemoryProfiler | undefined): void;
|
|
37
40
|
private validateInput;
|
|
38
41
|
private calculateLoss;
|
|
39
42
|
private computeAttentionRollout;
|
package/dist/NanoGPTModel.js
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
1
|
+
import { defaultConfig as v } from "./config.js";
|
|
2
2
|
import z from "./layers/TransformerBlock.js";
|
|
3
3
|
import S from "./layers/TiedEmbedding.js";
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import { estimateParameterCount as
|
|
7
|
-
import { createSoftmaxCrossEntropyWithGrad as
|
|
8
|
-
|
|
4
|
+
import _ from "./layers/RoPECache.js";
|
|
5
|
+
import I from "./layers/RMSNorm.js";
|
|
6
|
+
import { estimateParameterCount as F } from "./utilities/parameters.js";
|
|
7
|
+
import { createSoftmaxCrossEntropyWithGrad as L } from "./training/sparseCrossEntropy.js";
|
|
8
|
+
import P from "./layers/BaseLayer.js";
|
|
9
|
+
class A extends P {
|
|
9
10
|
config;
|
|
10
11
|
wte;
|
|
11
12
|
// Token embeddings
|
|
@@ -21,7 +22,7 @@ class K {
|
|
|
21
22
|
log = [];
|
|
22
23
|
// Training log
|
|
23
24
|
constructor(t, e = {}) {
|
|
24
|
-
this.tf = t, this.config = {
|
|
25
|
+
super(), this.tf = t, this.config = { ...v, ...e }, this.wte = new S(t, {
|
|
25
26
|
vocabSize: this.config.vocabSize,
|
|
26
27
|
embedDim: this.config.nEmbed,
|
|
27
28
|
name: "token_embedding"
|
|
@@ -30,10 +31,10 @@ class K {
|
|
|
30
31
|
outputDim: this.config.nEmbed,
|
|
31
32
|
name: "positional_embedding",
|
|
32
33
|
embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
|
|
33
|
-
}) : this.ropeCache = new
|
|
34
|
+
}) : this.ropeCache = new _(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
|
|
34
35
|
for (let o = 0; o < this.config.nLayer; o++)
|
|
35
36
|
this.blocks.push(new z(this.tf, o, this.config, this.ropeCache));
|
|
36
|
-
this.lnF = new
|
|
37
|
+
this.lnF = new I(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
|
|
37
38
|
}
|
|
38
39
|
get variables() {
|
|
39
40
|
return [
|
|
@@ -86,6 +87,12 @@ class K {
|
|
|
86
87
|
e.trainable = t;
|
|
87
88
|
this.lnF.trainable = t;
|
|
88
89
|
}
|
|
90
|
+
setProfiler(t) {
|
|
91
|
+
this._profiler = t;
|
|
92
|
+
for (const e of this.blocks)
|
|
93
|
+
e.setProfiler(t);
|
|
94
|
+
this.lnF.setProfiler(t);
|
|
95
|
+
}
|
|
89
96
|
validateInput(t) {
|
|
90
97
|
if (t.shape.length !== 2)
|
|
91
98
|
throw new Error(`Invalid input shape: expected [batch_size, sequence_length], got ${t.shape}`);
|
|
@@ -96,7 +103,7 @@ class K {
|
|
|
96
103
|
}
|
|
97
104
|
calculateLoss(t, e) {
|
|
98
105
|
try {
|
|
99
|
-
return
|
|
106
|
+
return L()(t, e).mean();
|
|
100
107
|
} catch (o) {
|
|
101
108
|
throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
|
|
102
109
|
}
|
|
@@ -139,24 +146,25 @@ class K {
|
|
|
139
146
|
}
|
|
140
147
|
forward(t, e, o = !1, i = !1, s) {
|
|
141
148
|
return this.validateInput(t), this.tf.tidy(() => {
|
|
149
|
+
this.startMemory();
|
|
142
150
|
const l = s?.[0]?.length ?? 0;
|
|
143
151
|
let r = this.inputPhase(t, l, o);
|
|
144
152
|
const n = [];
|
|
145
153
|
if (s && s.length !== this.blocks.length)
|
|
146
154
|
throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
|
|
147
155
|
for (let a = 0; a < this.blocks.length; a++) {
|
|
148
|
-
const d = this.blocks[a], {
|
|
149
|
-
output:
|
|
150
|
-
attention:
|
|
156
|
+
const d = r, g = this.blocks[a], {
|
|
157
|
+
output: m,
|
|
158
|
+
attention: b,
|
|
151
159
|
cache: f
|
|
152
|
-
} =
|
|
153
|
-
r =
|
|
160
|
+
} = g.call(r, o, i, s ? s[a] : void 0);
|
|
161
|
+
r = m, d.dispose(), i && b && n.push(b), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
|
|
154
162
|
}
|
|
155
163
|
let h;
|
|
156
164
|
i && n.length > 0 && (h = this.computeAttentionRollout(n)), r = this.lnF.apply(r);
|
|
157
165
|
const c = this.wte.project(r);
|
|
158
166
|
let p;
|
|
159
|
-
return e && (p = this.calculateLoss(c, e)), { logits: c, loss: p, attention: i ? h : void 0 };
|
|
167
|
+
return e && (p = this.calculateLoss(c, e)), this.endMemory("Forward"), { logits: c, loss: p, attention: i ? h : void 0 };
|
|
160
168
|
});
|
|
161
169
|
}
|
|
162
170
|
generate(t, e, o) {
|
|
@@ -168,24 +176,24 @@ class K {
|
|
|
168
176
|
), p = l ? this.config.blockSize - c.shape[1] : 0, a = p > 0 ? this.tf.pad(c, [
|
|
169
177
|
[0, 0],
|
|
170
178
|
[0, p]
|
|
171
|
-
]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e),
|
|
172
|
-
let
|
|
179
|
+
]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), m = d.shape[1] - 1 - p, b = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), f = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, k = b.div(i);
|
|
180
|
+
let u;
|
|
173
181
|
if (s) {
|
|
174
|
-
const { values:
|
|
175
|
-
|
|
182
|
+
const { values: y, indices: E } = this.tf.topk(k, s), $ = this.tf.multinomial(y.squeeze([1]), 1);
|
|
183
|
+
u = this.tf.gather(E.squeeze([1]), $, 1);
|
|
176
184
|
} else
|
|
177
|
-
|
|
178
|
-
let
|
|
179
|
-
return o?.includeProbabilities && (
|
|
185
|
+
u = this.tf.multinomial(k.squeeze([1]), 1);
|
|
186
|
+
let w;
|
|
187
|
+
return o?.includeProbabilities && (w = this.tf.softmax(k.squeeze([1]))), u = u.reshape([1, 1]), { output: u, attention: f?.squeeze([1]), probabilities: w };
|
|
180
188
|
});
|
|
181
189
|
}
|
|
182
190
|
getNumParams() {
|
|
183
|
-
return
|
|
191
|
+
return F(this.config);
|
|
184
192
|
}
|
|
185
193
|
dispose() {
|
|
186
194
|
this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
|
|
187
195
|
}
|
|
188
196
|
}
|
|
189
197
|
export {
|
|
190
|
-
|
|
198
|
+
A as default
|
|
191
199
|
};
|
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { SaveOptions } from './utilities/save';
|
|
|
6
6
|
import { default as Generator, IGenerateOptions } from './Generator';
|
|
7
7
|
import { default as Trainer, ITrainerOptions } from './Trainer';
|
|
8
8
|
import { default as EE } from 'eventemitter3';
|
|
9
|
+
import { default as MemoryProfiler } from './utilities/profile';
|
|
9
10
|
type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
|
|
10
11
|
export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
|
|
11
12
|
private _config?;
|
|
@@ -23,6 +24,9 @@ export default class TeachableLLM extends EE<'status' | 'error' | 'trainStep'> {
|
|
|
23
24
|
saveModel(options?: SaveOptions): Promise<Blob>;
|
|
24
25
|
static loadModel(tf: typeof TF, data: Blob | Buffer | string): TeachableLLM;
|
|
25
26
|
static create(tf: typeof TF, config?: Partial<GPTConfig>): TeachableLLM;
|
|
27
|
+
getProfiler(): MemoryProfiler | undefined;
|
|
28
|
+
get enableProfiler(): boolean;
|
|
29
|
+
set enableProfiler(value: boolean);
|
|
26
30
|
getNumParams(): number;
|
|
27
31
|
trainer(): Trainer;
|
|
28
32
|
train(text: string[], options?: ITrainerOptions): Promise<void>;
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
1
|
+
import { defaultConfig as h } from "./config.js";
|
|
2
2
|
import m from "./NanoGPTModel.js";
|
|
3
|
-
import { saveModel as
|
|
4
|
-
import { loadModel as
|
|
5
|
-
import
|
|
3
|
+
import { saveModel as d } from "./utilities/save.js";
|
|
4
|
+
import { loadModel as f } from "./utilities/load.js";
|
|
5
|
+
import u from "./Generator.js";
|
|
6
6
|
import _ from "./Trainer.js";
|
|
7
7
|
import { E as c } from "./index-Dwqa6Zy2.js";
|
|
8
|
-
import { dummyPassAsync as
|
|
8
|
+
import { dummyPassAsync as l } from "./utilities/dummy.js";
|
|
9
9
|
import g from "./tokeniser/CharTokeniser.js";
|
|
10
10
|
import "./papaparse.min-C8l2Kvo1.js";
|
|
11
11
|
import "./index-Tf7vU29b.js";
|
|
@@ -13,6 +13,9 @@ import "./jszip.min-CjP2V1VV.js";
|
|
|
13
13
|
import "./ops/scatterSub.js";
|
|
14
14
|
import "./ops/gatherSub.js";
|
|
15
15
|
import "./ops/attentionMask.js";
|
|
16
|
+
import "./ops/qkv.js";
|
|
17
|
+
import "./ops/rope.js";
|
|
18
|
+
import p from "./utilities/profile.js";
|
|
16
19
|
class a extends c {
|
|
17
20
|
_config;
|
|
18
21
|
_model;
|
|
@@ -49,23 +52,23 @@ class a extends c {
|
|
|
49
52
|
saveModel(t) {
|
|
50
53
|
if (!this._model || !this._tokeniser)
|
|
51
54
|
throw new Error("Model or tokeniser is not initialized.");
|
|
52
|
-
return
|
|
55
|
+
return d(this._model, this._tokeniser, t);
|
|
53
56
|
}
|
|
54
57
|
static loadModel(t, r) {
|
|
55
58
|
const e = new a(t);
|
|
56
|
-
return
|
|
57
|
-
e._model =
|
|
59
|
+
return f(t, r).then(({ model: o, tokeniser: s }) => {
|
|
60
|
+
e._model = o, e._tokeniser = s, e._config = o.config, e.setStatus("warmup"), l(o).then(() => {
|
|
58
61
|
e.setStatus("ready");
|
|
59
62
|
}).catch((i) => {
|
|
60
63
|
e.setStatus("error"), e.emit("error", i);
|
|
61
64
|
});
|
|
62
|
-
}).catch((
|
|
63
|
-
e.setStatus("error"), e.emit("error",
|
|
65
|
+
}).catch((o) => {
|
|
66
|
+
e.setStatus("error"), e.emit("error", o);
|
|
64
67
|
}), e;
|
|
65
68
|
}
|
|
66
69
|
static create(t, r = {}) {
|
|
67
|
-
const e = { ...
|
|
68
|
-
return i.setStatus("warmup"),
|
|
70
|
+
const e = { ...h, ...r }, o = new g(e.vocabSize), s = new m(t, e), i = new a(t, o, s);
|
|
71
|
+
return i.setStatus("warmup"), l(s).then(() => {
|
|
69
72
|
i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
|
|
70
73
|
n === "trained" && i.setStatus("ready");
|
|
71
74
|
}));
|
|
@@ -73,6 +76,20 @@ class a extends c {
|
|
|
73
76
|
i.setStatus("error"), i.emit("error", n);
|
|
74
77
|
}), i;
|
|
75
78
|
}
|
|
79
|
+
getProfiler() {
|
|
80
|
+
return this._model?.getProfiler();
|
|
81
|
+
}
|
|
82
|
+
get enableProfiler() {
|
|
83
|
+
return !!this._model?.getProfiler();
|
|
84
|
+
}
|
|
85
|
+
set enableProfiler(t) {
|
|
86
|
+
if (t) {
|
|
87
|
+
if (!this._model)
|
|
88
|
+
throw new Error("Model is not initialized.");
|
|
89
|
+
this._model.getProfiler() || this._model.setProfiler(new p());
|
|
90
|
+
} else
|
|
91
|
+
this._model && this._model.setProfiler(void 0);
|
|
92
|
+
}
|
|
76
93
|
getNumParams() {
|
|
77
94
|
if (!this._model)
|
|
78
95
|
throw new Error("Model is not initialized.");
|
|
@@ -84,8 +101,8 @@ class a extends c {
|
|
|
84
101
|
const t = new _(this._model, this._tokeniser);
|
|
85
102
|
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
|
|
86
103
|
const e = this.listeners("trainStep");
|
|
87
|
-
for (const
|
|
88
|
-
await
|
|
104
|
+
for (const o of e)
|
|
105
|
+
await o(r);
|
|
89
106
|
}), t;
|
|
90
107
|
}
|
|
91
108
|
train(t, r) {
|
|
@@ -94,7 +111,7 @@ class a extends c {
|
|
|
94
111
|
generator() {
|
|
95
112
|
if (!this._model || !this._tokeniser)
|
|
96
113
|
throw new Error("Model or tokeniser is not initialized.");
|
|
97
|
-
const t = new
|
|
114
|
+
const t = new u(this._model, this._tokeniser);
|
|
98
115
|
return t.on("start", () => {
|
|
99
116
|
this.status === "ready" && this.setStatus("busy");
|
|
100
117
|
}), t.on("stop", () => {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as
|
|
1
|
+
import { o as c, d as s, g as n, E as m, C as r } from "./index-YPKosni4.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,13 +15,13 @@ import { o as t, c as s, f as n, E as m, C as r } from "./index-Dsg28SG6.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function l(o,
|
|
19
|
-
const a = s(o, "real", "complex"), e = s(
|
|
18
|
+
function l(o, p) {
|
|
19
|
+
const a = s(o, "real", "complex"), e = s(p, "imag", "complex");
|
|
20
20
|
n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
|
|
21
|
-
const
|
|
22
|
-
return m.runKernel(r,
|
|
21
|
+
const t = { real: a, imag: e };
|
|
22
|
+
return m.runKernel(r, t);
|
|
23
23
|
}
|
|
24
|
-
const i = /* @__PURE__ */
|
|
24
|
+
const i = /* @__PURE__ */ c({ complex_: l });
|
|
25
25
|
export {
|
|
26
26
|
i as c
|
|
27
27
|
};
|
|
@@ -383,7 +383,7 @@ function _t(n, t) {
|
|
|
383
383
|
return e.set(n, s), e.get(n);
|
|
384
384
|
}
|
|
385
385
|
}
|
|
386
|
-
const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", We = "RealDiv",
|
|
386
|
+
const Ge = "Abs", ne = "Add", Es = "BatchMatMul", se = "Cast", As = "Complex", ze = "ComplexAbs", Bs = "Concat", We = "RealDiv", vs = "Elu", Ms = "Exp", je = "Fill", Ke = "FloorDiv", Fs = "GatherV2", $s = "GatherNd", re = "Identity", Rs = "Imag", xs = "LeakyRelu", Ns = "Log", Ds = "Max", Ve = "Maximum", qe = "Multiply", Cs = "Neg", _s = "Pack", He = "Pow", Ps = "Prelu", Os = "Range", Ls = "Real", Us = "Relu", Gs = "Reshape", zs = "Relu6", Ws = "ScatterNd", js = "Sigmoid", Je = "Sqrt", Ks = "Sum", Vs = "SplitV", qs = "Softmax", Xe = "Sub", Hs = "Transpose", Ye = "ZerosLike", Js = "Step", Xs = "_FusedMatMul";
|
|
387
387
|
/**
|
|
388
388
|
* @license
|
|
389
389
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -438,11 +438,11 @@ function Wt(n) {
|
|
|
438
438
|
}
|
|
439
439
|
return e;
|
|
440
440
|
}
|
|
441
|
-
function
|
|
441
|
+
function Ys(n) {
|
|
442
442
|
const { kernelName: t, backendName: e } = n, s = ie(t, e);
|
|
443
443
|
ht.has(s) && O(`The kernel '${t}' for backend '${e}' is already registered`), ht.set(s, n);
|
|
444
444
|
}
|
|
445
|
-
function
|
|
445
|
+
function Qs(n) {
|
|
446
446
|
const { kernelName: t } = n;
|
|
447
447
|
It.has(t) && S().getBool("DEBUG") && O(`Overriding the gradient for '${t}'`), It.set(t, n);
|
|
448
448
|
}
|
|
@@ -1902,7 +1902,7 @@ function I(n, t, e, s = "numeric") {
|
|
|
1902
1902
|
const a = r !== "string" ? ae(n, r) : at(n, [], !0);
|
|
1903
1903
|
return g.makeTensor(a, i, r);
|
|
1904
1904
|
}
|
|
1905
|
-
function
|
|
1905
|
+
function Zs(n, t, e, s = "numeric") {
|
|
1906
1906
|
if (!Array.isArray(n))
|
|
1907
1907
|
throw new Error(`Argument ${t} passed to ${e} must be a \`Tensor[]\` or \`TensorLike[]\``);
|
|
1908
1908
|
return n.map((i, o) => I(i, `${t}[${o}]`, e, s));
|
|
@@ -2065,9 +2065,12 @@ function Sn(n, t) {
|
|
|
2065
2065
|
* limitations under the License.
|
|
2066
2066
|
* =============================================================================
|
|
2067
2067
|
*/
|
|
2068
|
-
function
|
|
2068
|
+
function tr() {
|
|
2069
2069
|
return g;
|
|
2070
2070
|
}
|
|
2071
|
+
function er() {
|
|
2072
|
+
return g.memory();
|
|
2073
|
+
}
|
|
2071
2074
|
function E(n, t) {
|
|
2072
2075
|
return g.tidy(n, t);
|
|
2073
2076
|
}
|
|
@@ -2890,7 +2893,7 @@ function Yn(n, t, e) {
|
|
|
2890
2893
|
* limitations under the License.
|
|
2891
2894
|
* =============================================================================
|
|
2892
2895
|
*/
|
|
2893
|
-
function
|
|
2896
|
+
function nr(n, t) {
|
|
2894
2897
|
const e = [];
|
|
2895
2898
|
for (let s = 0; s < t.length; s++) {
|
|
2896
2899
|
const r = n[n.length - s - 1], i = t.length - s - 1, o = t[i];
|
|
@@ -3058,7 +3061,7 @@ function ss(n, t) {
|
|
|
3058
3061
|
a[u] != null && (c[l.name] = a[u]);
|
|
3059
3062
|
}), s?.forEach((l) => c[l.name] = null), { value: o, grads: c };
|
|
3060
3063
|
}
|
|
3061
|
-
function
|
|
3064
|
+
function sr(n) {
|
|
3062
3065
|
return g.customGrad(n);
|
|
3063
3066
|
}
|
|
3064
3067
|
/**
|
|
@@ -3838,54 +3841,59 @@ function bs() {
|
|
|
3838
3841
|
*/
|
|
3839
3842
|
bs();
|
|
3840
3843
|
export {
|
|
3844
|
+
Qn as $,
|
|
3841
3845
|
ds as A,
|
|
3842
3846
|
Es as B,
|
|
3843
3847
|
As as C,
|
|
3844
|
-
|
|
3848
|
+
w as D,
|
|
3845
3849
|
g as E,
|
|
3846
|
-
|
|
3847
|
-
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
|
|
3856
|
-
|
|
3857
|
-
|
|
3858
|
-
|
|
3859
|
-
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
|
|
3863
|
-
|
|
3864
|
-
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
|
|
3872
|
-
|
|
3873
|
-
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
|
|
3877
|
-
|
|
3878
|
-
|
|
3850
|
+
qs as F,
|
|
3851
|
+
$s as G,
|
|
3852
|
+
sr as H,
|
|
3853
|
+
E as I,
|
|
3854
|
+
C as J,
|
|
3855
|
+
js as K,
|
|
3856
|
+
Ns as L,
|
|
3857
|
+
Ds as M,
|
|
3858
|
+
vs as N,
|
|
3859
|
+
Rs as O,
|
|
3860
|
+
_s as P,
|
|
3861
|
+
xs as Q,
|
|
3862
|
+
Gs as R,
|
|
3863
|
+
Ks as S,
|
|
3864
|
+
Cs as T,
|
|
3865
|
+
Ps as U,
|
|
3866
|
+
Ls as V,
|
|
3867
|
+
Us as W,
|
|
3868
|
+
zs as X,
|
|
3869
|
+
Js as Y,
|
|
3870
|
+
Hs as Z,
|
|
3871
|
+
nr as _,
|
|
3872
|
+
p as a,
|
|
3873
|
+
Xs as a0,
|
|
3874
|
+
Z as b,
|
|
3875
|
+
Qs as c,
|
|
3876
|
+
I as d,
|
|
3877
|
+
tr as e,
|
|
3878
|
+
V as f,
|
|
3879
|
+
Is as g,
|
|
3880
|
+
$t as h,
|
|
3881
|
+
Vs as i,
|
|
3882
|
+
Os as j,
|
|
3883
|
+
Zs as k,
|
|
3884
|
+
y as l,
|
|
3885
|
+
er as m,
|
|
3886
|
+
Gn as n,
|
|
3879
3887
|
F as o,
|
|
3880
|
-
|
|
3881
|
-
|
|
3882
|
-
|
|
3888
|
+
Bs as p,
|
|
3889
|
+
Fs as q,
|
|
3890
|
+
Ys as r,
|
|
3883
3891
|
K as s,
|
|
3884
|
-
|
|
3885
|
-
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3892
|
+
Dt as t,
|
|
3893
|
+
Zt as u,
|
|
3894
|
+
G as v,
|
|
3895
|
+
De as w,
|
|
3896
|
+
Ws as x,
|
|
3897
|
+
Ms as y,
|
|
3898
|
+
Ts as z
|
|
3891
3899
|
};
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { default as MemoryProfiler } from '../utilities/profile';
|
|
2
|
+
export default abstract class BaseLayer {
|
|
3
|
+
protected _profiler?: MemoryProfiler;
|
|
4
|
+
getProfiler(): MemoryProfiler | undefined;
|
|
5
|
+
setProfiler(value: MemoryProfiler | undefined): void;
|
|
6
|
+
startMemory(): void;
|
|
7
|
+
endMemory(label: string): void;
|
|
8
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class t {
|
|
2
|
+
_profiler;
|
|
3
|
+
getProfiler() {
|
|
4
|
+
return this._profiler;
|
|
5
|
+
}
|
|
6
|
+
setProfiler(r) {
|
|
7
|
+
this._profiler = r;
|
|
8
|
+
}
|
|
9
|
+
startMemory() {
|
|
10
|
+
this._profiler?.startMemory();
|
|
11
|
+
}
|
|
12
|
+
endMemory(r) {
|
|
13
|
+
this._profiler?.endMemory(r);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
export {
|
|
17
|
+
t as default
|
|
18
|
+
};
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import { default as TF } from '@tensorflow/tfjs';
|
|
2
2
|
import { GPTConfig } from '../config';
|
|
3
3
|
import { default as RoPECache } from './RoPECache';
|
|
4
|
+
import { default as BaseLayer } from './BaseLayer';
|
|
4
5
|
export type KVCache = {
|
|
5
6
|
k: TF.Tensor;
|
|
6
7
|
v: TF.Tensor;
|
|
7
8
|
length: number;
|
|
8
9
|
cumulativeLength: number;
|
|
9
10
|
};
|
|
10
|
-
export default class CausalSelfAttention {
|
|
11
|
+
export default class CausalSelfAttention extends BaseLayer {
|
|
11
12
|
private readonly ropeCache?;
|
|
12
13
|
private config;
|
|
13
14
|
private cAttn;
|
|
@@ -20,7 +21,9 @@ export default class CausalSelfAttention {
|
|
|
20
21
|
private divisor;
|
|
21
22
|
private index;
|
|
22
23
|
private _trainable;
|
|
24
|
+
private units;
|
|
23
25
|
constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache | undefined);
|
|
26
|
+
private build;
|
|
24
27
|
get variables(): TF.Variable[];
|
|
25
28
|
get trainable(): boolean;
|
|
26
29
|
set trainable(value: boolean);
|