@genai-fi/nanogpt 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +1 -0
- package/dist/Generator.js +11 -11
- package/dist/NanoGPTModel.js +81 -60
- package/dist/TeachableLLM.js +5 -4
- package/dist/Trainer.js +1 -1
- package/dist/_commonjsHelpers-ByX85dGu.js +33 -0
- package/dist/data/parquet.d.ts +1 -0
- package/dist/data/parquet.js +12 -0
- package/dist/data/textLoader.d.ts +6 -0
- package/dist/data/textLoader.js +39 -0
- package/dist/{index-SOhdqzHq.js → index-Dwqa6Zy2.js} +1 -1
- package/dist/{jszip.min-BLbRbbKt.js → jszip.min-pMIn3RZH.js} +1 -1
- package/dist/layers/RoPECache.js +24 -19
- package/dist/main.d.ts +2 -0
- package/dist/main.js +6 -2
- package/dist/{utilities/textLoader.js → papaparse.min-C8l2Kvo1.js} +127 -141
- package/dist/parquet-DpcqBLb0.js +39727 -0
- package/dist/tokeniser/CharTokeniser.js +1 -1
- package/dist/tokeniser/NodeTokeniser.js +1 -1
- package/dist/tokeniser/WebTokeniser.js +1 -1
- package/dist/utilities/load.js +1 -1
- package/dist/utilities/save.js +1 -1
- package/package.json +3 -2
- package/dist/_commonjsHelpers-DaMA6jEr.js +0 -8
- package/dist/utilities/textLoader.d.ts +0 -1
package/dist/Generator.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { ITokeniser } from './tokeniser/type';
|
|
|
3
3
|
import { default as EE } from 'eventemitter3';
|
|
4
4
|
export interface IGenerateOptions extends GenerateOptions {
|
|
5
5
|
maxLength?: number;
|
|
6
|
+
noCache?: boolean;
|
|
6
7
|
}
|
|
7
8
|
export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
|
|
8
9
|
private readonly model;
|
package/dist/Generator.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { E as u } from "./index-
|
|
2
|
-
class
|
|
1
|
+
import { E as u } from "./index-Dwqa6Zy2.js";
|
|
2
|
+
class p extends u {
|
|
3
3
|
constructor(s, e) {
|
|
4
4
|
super(), this.model = s, this.tokeniser = e;
|
|
5
5
|
}
|
|
@@ -14,10 +14,10 @@ class k extends u {
|
|
|
14
14
|
const {
|
|
15
15
|
output: o,
|
|
16
16
|
attention: c,
|
|
17
|
-
probabilities:
|
|
18
|
-
} = this.model.generate(t, void 0, e),
|
|
19
|
-
t = this.model.tf.concat([t, o], 1),
|
|
20
|
-
const r = await this.processResponse(o, c,
|
|
17
|
+
probabilities: h
|
|
18
|
+
} = this.model.generate(t, void 0, e), l = t;
|
|
19
|
+
t = this.model.tf.concat([t, o], 1), l.dispose();
|
|
20
|
+
const r = await this.processResponse(o, c, h);
|
|
21
21
|
if (o.dispose(), r === null)
|
|
22
22
|
break;
|
|
23
23
|
n += r;
|
|
@@ -40,14 +40,14 @@ class k extends u {
|
|
|
40
40
|
for (let o = 0; o < i; o++) {
|
|
41
41
|
const {
|
|
42
42
|
output: c,
|
|
43
|
-
attention:
|
|
44
|
-
probabilities:
|
|
43
|
+
attention: h,
|
|
44
|
+
probabilities: l
|
|
45
45
|
} = this.model.generate(t, a, {
|
|
46
46
|
...e,
|
|
47
47
|
usePadding: !1
|
|
48
48
|
});
|
|
49
49
|
t.dispose(), t = c;
|
|
50
|
-
const r = await this.processResponse(c,
|
|
50
|
+
const r = await this.processResponse(c, h, l);
|
|
51
51
|
if (r === null)
|
|
52
52
|
break;
|
|
53
53
|
n += r;
|
|
@@ -56,10 +56,10 @@ class k extends u {
|
|
|
56
56
|
}
|
|
57
57
|
async generate(s, e) {
|
|
58
58
|
this.emit("start");
|
|
59
|
-
const t = this.model.config.useRope ? this.generateCache(s, e) : this.generateNoCache(s, e);
|
|
59
|
+
const t = this.model.config.useRope && !e?.noCache ? this.generateCache(s, e) : this.generateNoCache(s, e);
|
|
60
60
|
return this.emit("stop"), t;
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
63
|
export {
|
|
64
|
-
|
|
64
|
+
p as default
|
|
65
65
|
};
|
package/dist/NanoGPTModel.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
class
|
|
1
|
+
import { defaultConfig as z } from "./config.js";
|
|
2
|
+
import $ from "./layers/TransformerBlock.js";
|
|
3
|
+
import S from "./layers/TiedEmbedding.js";
|
|
4
|
+
import I from "./layers/RoPECache.js";
|
|
5
|
+
import _ from "./layers/RMSNorm.js";
|
|
6
|
+
class x {
|
|
7
7
|
config;
|
|
8
8
|
wte;
|
|
9
9
|
// Token embeddings
|
|
@@ -19,7 +19,7 @@ class F {
|
|
|
19
19
|
log = [];
|
|
20
20
|
// Training log
|
|
21
21
|
constructor(t, e = {}) {
|
|
22
|
-
this.tf = t, this.config = { ...
|
|
22
|
+
this.tf = t, this.config = { ...z, ...e }, this.wte = new S(t, {
|
|
23
23
|
vocabSize: this.config.vocabSize,
|
|
24
24
|
embedDim: this.config.nEmbed,
|
|
25
25
|
name: "token_embedding"
|
|
@@ -28,10 +28,10 @@ class F {
|
|
|
28
28
|
outputDim: this.config.nEmbed,
|
|
29
29
|
name: "positional_embedding",
|
|
30
30
|
embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
|
|
31
|
-
}) : this.ropeCache = new
|
|
32
|
-
for (let
|
|
33
|
-
this.blocks.push(new
|
|
34
|
-
this.lnF = new
|
|
31
|
+
}) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
|
|
32
|
+
for (let o = 0; o < this.config.nLayer; o++)
|
|
33
|
+
this.blocks.push(new $(this.tf, o, this.config, this.ropeCache));
|
|
34
|
+
this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
|
|
35
35
|
}
|
|
36
36
|
get variables() {
|
|
37
37
|
return [
|
|
@@ -54,17 +54,17 @@ class F {
|
|
|
54
54
|
this.blocks[e].loadWeights(t);
|
|
55
55
|
this.lnF.setWeights(t.get("final_rms_norm") || []);
|
|
56
56
|
}
|
|
57
|
-
inputPhase(t, e,
|
|
57
|
+
inputPhase(t, e, o = !1) {
|
|
58
58
|
return this.tf.tidy(() => {
|
|
59
|
-
const
|
|
59
|
+
const i = this.wte.embed(t);
|
|
60
60
|
if (this.config.useRope === !1) {
|
|
61
|
-
const [,
|
|
62
|
-
this.tf.add(
|
|
63
|
-
this.tf.scalar(
|
|
64
|
-
),
|
|
65
|
-
return this.drop.apply(
|
|
61
|
+
const [, s] = t.shape, r = this.config.blockSize, l = this.tf.range(0, s, 1, "int32"), n = this.tf.mod(
|
|
62
|
+
this.tf.add(l, this.tf.scalar(e, "int32")),
|
|
63
|
+
this.tf.scalar(r, "int32")
|
|
64
|
+
), h = this.wpe.apply(n), c = i.add(h);
|
|
65
|
+
return this.drop.apply(c, { training: o });
|
|
66
66
|
} else
|
|
67
|
-
return this.drop.apply(
|
|
67
|
+
return this.drop.apply(i, { training: o });
|
|
68
68
|
});
|
|
69
69
|
}
|
|
70
70
|
setSkipMask(t) {
|
|
@@ -95,8 +95,8 @@ class F {
|
|
|
95
95
|
calculateLoss(t, e) {
|
|
96
96
|
try {
|
|
97
97
|
return this.tf.losses.softmaxCrossEntropy(e, t, this.tf.Reduction.MEAN);
|
|
98
|
-
} catch (
|
|
99
|
-
throw console.error("Error computing loss:",
|
|
98
|
+
} catch (o) {
|
|
99
|
+
throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
102
|
// Attention rollout per Abnar & Zuidema (2020)
|
|
@@ -105,67 +105,88 @@ class F {
|
|
|
105
105
|
return this.tf.tidy(() => {
|
|
106
106
|
if (t.length === 0)
|
|
107
107
|
throw new Error("No attentions for rollout");
|
|
108
|
-
const e
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
108
|
+
const [e, o, i] = t[0].shape;
|
|
109
|
+
for (const s of t) {
|
|
110
|
+
const [r, l, n] = s.shape;
|
|
111
|
+
if (r !== e || l !== o || n !== i)
|
|
112
|
+
throw new Error(
|
|
113
|
+
`Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${r},${l},${n}]`
|
|
114
|
+
);
|
|
113
115
|
}
|
|
114
|
-
|
|
116
|
+
if (o === i) {
|
|
117
|
+
const s = this.tf.eye(i, i).expandDims(0);
|
|
118
|
+
let r = s.tile([e, 1, 1]);
|
|
119
|
+
for (const l of t) {
|
|
120
|
+
const n = l.add(s);
|
|
121
|
+
r = n.div(n.sum(-1, !0)).matMul(r);
|
|
122
|
+
}
|
|
123
|
+
return r;
|
|
124
|
+
}
|
|
125
|
+
if (o === 1) {
|
|
126
|
+
let s = null;
|
|
127
|
+
const r = this.tf.tensor1d([i - 1], "int32"), l = this.tf.oneHot(r, i).reshape([1, 1, i]).tile([e, 1, 1]);
|
|
128
|
+
r.dispose();
|
|
129
|
+
for (const n of t) {
|
|
130
|
+
let h = n.add(l);
|
|
131
|
+
h = h.div(h.sum(-1, !0)), s == null ? s = h : (s = s.mul(h), s = s.div(s.sum(-1, !0)));
|
|
132
|
+
}
|
|
133
|
+
return s;
|
|
134
|
+
}
|
|
135
|
+
throw new Error(`Unsupported attention shapes for rollout: [B=${e}, Q=${o}, K=${i}]`);
|
|
115
136
|
});
|
|
116
137
|
}
|
|
117
|
-
forward(t, e,
|
|
138
|
+
forward(t, e, o = !1, i = !1, s) {
|
|
118
139
|
return this.validateInput(t), this.tf.tidy(() => {
|
|
119
|
-
const
|
|
120
|
-
let
|
|
121
|
-
const
|
|
122
|
-
if (
|
|
123
|
-
throw console.error("Cache",
|
|
124
|
-
for (let
|
|
125
|
-
const d = this.blocks[
|
|
140
|
+
const r = s?.[0]?.length ?? 0;
|
|
141
|
+
let l = this.inputPhase(t, r, o);
|
|
142
|
+
const n = [];
|
|
143
|
+
if (s && s.length !== this.blocks.length)
|
|
144
|
+
throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
|
|
145
|
+
for (let a = 0; a < this.blocks.length; a++) {
|
|
146
|
+
const d = this.blocks[a], {
|
|
126
147
|
output: g,
|
|
127
|
-
attention:
|
|
148
|
+
attention: m,
|
|
128
149
|
cache: p
|
|
129
|
-
} = d.call(
|
|
130
|
-
|
|
150
|
+
} = d.call(l, o, i, s ? s[a] : void 0);
|
|
151
|
+
l = g, i && m && n.push(m), s && p ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = p) : p && (p.k.dispose(), p.v.dispose());
|
|
131
152
|
}
|
|
132
|
-
let
|
|
133
|
-
|
|
134
|
-
const
|
|
153
|
+
let h;
|
|
154
|
+
i && n.length > 0 && (h = this.computeAttentionRollout(n)), l = this.lnF.apply(l);
|
|
155
|
+
const c = this.wte.project(l);
|
|
135
156
|
let f;
|
|
136
|
-
return e && (f = this.calculateLoss(
|
|
157
|
+
return e && (f = this.calculateLoss(c, e)), { logits: c, loss: f, attention: i ? h : void 0 };
|
|
137
158
|
});
|
|
138
159
|
}
|
|
139
|
-
generate(t, e,
|
|
140
|
-
const
|
|
160
|
+
generate(t, e, o) {
|
|
161
|
+
const i = o?.temperature ?? 1, s = o?.topK, r = o?.usePadding ?? !1, l = o?.includeAttention ?? !1;
|
|
141
162
|
return this.tf.tidy(() => {
|
|
142
|
-
const
|
|
143
|
-
[0,
|
|
144
|
-
[
|
|
145
|
-
), f =
|
|
163
|
+
const n = t, h = n.shape[1], c = h <= this.config.blockSize ? n : n.slice(
|
|
164
|
+
[0, h - this.config.blockSize],
|
|
165
|
+
[n.shape[0], this.config.blockSize]
|
|
166
|
+
), f = r ? this.config.blockSize - c.shape[1] : 0, a = f > 0 ? this.tf.pad(c, [
|
|
146
167
|
[0, 0],
|
|
147
168
|
[0, f]
|
|
148
|
-
]) :
|
|
149
|
-
let
|
|
150
|
-
if (
|
|
151
|
-
const { values: E, indices:
|
|
152
|
-
|
|
169
|
+
]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, l, e), m = d.shape[1] - 1 - f, p = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = p.div(i);
|
|
170
|
+
let u;
|
|
171
|
+
if (s) {
|
|
172
|
+
const { values: E, indices: v } = this.tf.topk(b, s), y = this.tf.multinomial(E.squeeze([1]), 1);
|
|
173
|
+
u = this.tf.gather(v.squeeze([1]), y, 1);
|
|
153
174
|
} else
|
|
154
|
-
|
|
175
|
+
u = this.tf.multinomial(b.squeeze([1]), 1);
|
|
155
176
|
let k;
|
|
156
|
-
return
|
|
177
|
+
return o?.includeProbabilities && (k = this.tf.softmax(b.squeeze([1]))), u = u.reshape([1, 1]), { output: u, attention: w?.squeeze([1]), probabilities: k };
|
|
157
178
|
});
|
|
158
179
|
}
|
|
159
180
|
getNumParams() {
|
|
160
|
-
const t = this.config.vocabSize * this.config.nEmbed
|
|
161
|
-
2 * this.config.nEmbed),
|
|
162
|
-
this.config.nEmbed *
|
|
163
|
-
return t + e +
|
|
181
|
+
const t = this.config.vocabSize * this.config.nEmbed, e = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // qkv + proj
|
|
182
|
+
2 * this.config.nEmbed), o = this.config.nLayer * (this.config.mlpFactor * this.config.nEmbed * this.config.nEmbed + // fc
|
|
183
|
+
this.config.nEmbed * this.config.mlpFactor * this.config.nEmbed), i = this.config.nEmbed;
|
|
184
|
+
return t + e + o + i;
|
|
164
185
|
}
|
|
165
186
|
dispose() {
|
|
166
187
|
this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
|
|
167
188
|
}
|
|
168
189
|
}
|
|
169
190
|
export {
|
|
170
|
-
|
|
191
|
+
x as default
|
|
171
192
|
};
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import d from "./
|
|
2
|
-
import
|
|
1
|
+
import { defaultConfig as d } from "./config.js";
|
|
2
|
+
import u from "./NanoGPTModel.js";
|
|
3
3
|
import { saveModel as m } from "./utilities/save.js";
|
|
4
4
|
import { loadModel as l } from "./utilities/load.js";
|
|
5
5
|
import f from "./Generator.js";
|
|
6
6
|
import _ from "./Trainer.js";
|
|
7
|
-
import { E as c } from "./index-
|
|
7
|
+
import { E as c } from "./index-Dwqa6Zy2.js";
|
|
8
8
|
import { dummyPassAsync as h } from "./utilities/dummy.js";
|
|
9
9
|
import g from "./tokeniser/CharTokeniser.js";
|
|
10
|
+
import "./papaparse.min-C8l2Kvo1.js";
|
|
10
11
|
class a extends c {
|
|
11
12
|
_config;
|
|
12
13
|
_model;
|
|
@@ -58,7 +59,7 @@ class a extends c {
|
|
|
58
59
|
}), e;
|
|
59
60
|
}
|
|
60
61
|
static create(t, r = {}) {
|
|
61
|
-
const e = { ...
|
|
62
|
+
const e = { ...d, ...r }, s = new g(e.vocabSize), o = new u(t, e), i = new a(t, s, o);
|
|
62
63
|
return i.setStatus("warmup"), h(o).then(() => {
|
|
63
64
|
i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
|
|
64
65
|
n === "trained" && i.setStatus("ready");
|
package/dist/Trainer.js
CHANGED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
var u = typeof globalThis < "u" ? globalThis : typeof window < "u" ? window : typeof global < "u" ? global : typeof self < "u" ? self : {};
|
|
2
|
+
function a(e) {
|
|
3
|
+
return e && e.__esModule && Object.prototype.hasOwnProperty.call(e, "default") ? e.default : e;
|
|
4
|
+
}
|
|
5
|
+
function f(e) {
|
|
6
|
+
if (Object.prototype.hasOwnProperty.call(e, "__esModule")) return e;
|
|
7
|
+
var n = e.default;
|
|
8
|
+
if (typeof n == "function") {
|
|
9
|
+
var t = function r() {
|
|
10
|
+
var o = !1;
|
|
11
|
+
try {
|
|
12
|
+
o = this instanceof r;
|
|
13
|
+
} catch {
|
|
14
|
+
}
|
|
15
|
+
return o ? Reflect.construct(n, arguments, this.constructor) : n.apply(this, arguments);
|
|
16
|
+
};
|
|
17
|
+
t.prototype = n.prototype;
|
|
18
|
+
} else t = {};
|
|
19
|
+
return Object.defineProperty(t, "__esModule", { value: !0 }), Object.keys(e).forEach(function(r) {
|
|
20
|
+
var o = Object.getOwnPropertyDescriptor(e, r);
|
|
21
|
+
Object.defineProperty(t, r, o.get ? o : {
|
|
22
|
+
enumerable: !0,
|
|
23
|
+
get: function() {
|
|
24
|
+
return e[r];
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
}), t;
|
|
28
|
+
}
|
|
29
|
+
export {
|
|
30
|
+
f as a,
|
|
31
|
+
u as c,
|
|
32
|
+
a as g
|
|
33
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function loadParquet(file: File, maxSize?: number, column?: string): Promise<string[]>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
async function p(s, f = 104857600, e = "text") {
|
|
2
|
+
const r = await (await import("../parquet-DpcqBLb0.js").then((t) => t.p)).ParquetReader.openBuffer(Buffer.from(await s.arrayBuffer())), a = [], i = r.getCursor([[e]]);
|
|
3
|
+
let o = 0;
|
|
4
|
+
for (; ; ) {
|
|
5
|
+
const t = await i.next();
|
|
6
|
+
if (!t || !t[e] || typeof t[e] != "string" || (a.push(t[e]), o += t[e].length, o > f)) break;
|
|
7
|
+
}
|
|
8
|
+
return r.close(), a;
|
|
9
|
+
}
|
|
10
|
+
export {
|
|
11
|
+
p as loadParquet
|
|
12
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { p as s } from "../papaparse.min-C8l2Kvo1.js";
|
|
2
|
+
import { loadParquet as l } from "./parquet.js";
|
|
3
|
+
function m(e, t) {
|
|
4
|
+
const a = e.findIndex((n) => n.toLowerCase() === t.toLowerCase());
|
|
5
|
+
return a === -1 ? 0 : a;
|
|
6
|
+
}
|
|
7
|
+
function u(e) {
|
|
8
|
+
return e.every((t) => t.length < 64);
|
|
9
|
+
}
|
|
10
|
+
async function w(e, t) {
|
|
11
|
+
const a = e.type;
|
|
12
|
+
if (a === "application/parquet")
|
|
13
|
+
return l(e, t?.maxSize, t?.column);
|
|
14
|
+
if (a === "text/csv") {
|
|
15
|
+
const n = "FileReaderSync" in global ? e : await e.text();
|
|
16
|
+
return new Promise((c, o) => {
|
|
17
|
+
s.parse(n, {
|
|
18
|
+
header: !1,
|
|
19
|
+
skipEmptyLines: !0,
|
|
20
|
+
complete: (r) => {
|
|
21
|
+
if (r.errors.length > 0)
|
|
22
|
+
o(new Error("Error parsing file"));
|
|
23
|
+
else {
|
|
24
|
+
const i = m(r.data[0], t?.column || "text"), d = t?.hasHeader ?? u(r.data[0]) ? r.data.slice(1) : r.data;
|
|
25
|
+
c(d.map((p) => p[i]));
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
error: (r) => {
|
|
29
|
+
o(r);
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
} else if (a === "text/plain")
|
|
34
|
+
return [await e.text()];
|
|
35
|
+
throw new Error(`Unsupported file type: ${a}`);
|
|
36
|
+
}
|
|
37
|
+
export {
|
|
38
|
+
w as default
|
|
39
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { c as bt, g as It } from "./_commonjsHelpers-
|
|
1
|
+
import { c as bt, g as It } from "./_commonjsHelpers-ByX85dGu.js";
|
|
2
2
|
function vt(yt) {
|
|
3
3
|
throw new Error('Could not dynamically require "' + yt + '". Please configure the dynamicRequireTargets or/and ignoreDynamicRequires option of @rollup/plugin-commonjs appropriately for this require call to work.');
|
|
4
4
|
}
|
package/dist/layers/RoPECache.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
|
-
class
|
|
2
|
-
constructor(
|
|
3
|
-
this.tf =
|
|
4
|
-
const
|
|
5
|
-
if (this.rotaryDim =
|
|
1
|
+
class b {
|
|
2
|
+
constructor(s, r) {
|
|
3
|
+
this.tf = s, this.config = r;
|
|
4
|
+
const o = this.config.nEmbed / this.config.nHead;
|
|
5
|
+
if (this.rotaryDim = o, this.rotaryDim % 2 !== 0)
|
|
6
6
|
throw new Error("rotaryDim must be even");
|
|
7
7
|
this.ropeBase = 1e4;
|
|
8
|
-
const
|
|
9
|
-
this.ropeInvFreq = this.tf.reciprocal(
|
|
8
|
+
const i = this.tf.range(0, this.rotaryDim, 2, "float32"), t = i.div(this.tf.scalar(this.rotaryDim, "float32")), e = this.tf.pow(this.tf.scalar(this.ropeBase, "float32"), t);
|
|
9
|
+
this.ropeInvFreq = this.tf.reciprocal(e), t.dispose(), e.dispose(), i.dispose(), this.config.useRope === !1 ? (this.ropeCos = null, this.ropeSin = null, this.ropeCacheLen = 0) : this.tf.tidy(() => {
|
|
10
|
+
this.ensureRopeCache(this.config.blockSize * 4);
|
|
11
|
+
});
|
|
10
12
|
}
|
|
11
13
|
rotaryDim;
|
|
12
14
|
ropeBase;
|
|
@@ -16,24 +18,27 @@ class E {
|
|
|
16
18
|
ropeSin = null;
|
|
17
19
|
// [cacheLen, rotaryDim/2]
|
|
18
20
|
ropeCacheLen = 0;
|
|
19
|
-
ensureRopeCache(
|
|
20
|
-
if (
|
|
21
|
+
ensureRopeCache(s) {
|
|
22
|
+
if (s <= this.ropeCacheLen) return;
|
|
21
23
|
this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose();
|
|
22
|
-
const
|
|
23
|
-
this.ropeCos = this.tf.keep(this.tf.cos(
|
|
24
|
+
const o = this.tf.range(0, s, 1, "float32").expandDims(1).mul(this.ropeInvFreq.expandDims(0));
|
|
25
|
+
this.ropeCos = this.tf.keep(this.tf.cos(o).expandDims(-1)), this.ropeSin = this.tf.keep(this.tf.sin(o).expandDims(-1)), this.ropeCacheLen = s;
|
|
24
26
|
}
|
|
25
|
-
applyRoPE(
|
|
26
|
-
const
|
|
27
|
-
if (
|
|
28
|
-
const
|
|
29
|
-
this.ensureRopeCache(
|
|
30
|
-
const n =
|
|
31
|
-
|
|
27
|
+
applyRoPE(s, r, o) {
|
|
28
|
+
const i = s.shape[3], t = this.rotaryDim;
|
|
29
|
+
if (t > i) return [s, r];
|
|
30
|
+
const e = s.shape[2], v = o + e;
|
|
31
|
+
this.ensureRopeCache(v);
|
|
32
|
+
const n = t / 2, p = this.ropeCos.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), a = this.ropeSin.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), h = s.shape[0], c = s.shape[1], f = this.tf.range(0, t, 2, "int32"), l = this.tf.range(1, t, 2, "int32"), d = (u) => {
|
|
33
|
+
const m = u.slice([0, 0, 0, 0], [h, c, e, t]), C = t < i ? u.slice([0, 0, 0, t], [h, c, e, i - t]) : null, D = this.tf.gather(m, f, 3), g = this.tf.gather(m, l, 3), x = D.mul(p).sub(g.mul(a)), k = g.mul(p).add(D.mul(a)), R = this.tf.stack([x, k], -1).reshape([h, c, e, t]);
|
|
34
|
+
return C ? this.tf.concat([R, C], 3) : R;
|
|
35
|
+
}, y = d(s), S = d(r);
|
|
36
|
+
return f.dispose(), l.dispose(), [y, S];
|
|
32
37
|
}
|
|
33
38
|
dispose() {
|
|
34
39
|
this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose(), this.ropeInvFreq.dispose();
|
|
35
40
|
}
|
|
36
41
|
}
|
|
37
42
|
export {
|
|
38
|
-
|
|
43
|
+
b as default
|
|
39
44
|
};
|
package/dist/main.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export { default as NanoGPT } from './NanoGPTModel';
|
|
2
2
|
export { default as TeachableLLM } from './TeachableLLM';
|
|
3
3
|
export { default as CharTokeniser } from './tokeniser/CharTokeniser';
|
|
4
|
+
export { default as waitForModel } from './utilities/waitForModel';
|
|
5
|
+
export { default as loadTextData } from './data/textLoader';
|
|
4
6
|
export type { ITrainerOptions } from './Trainer';
|
|
5
7
|
export type { IGenerateOptions } from './Generator';
|
|
6
8
|
export type { TrainingLogEntry } from './NanoGPTModel';
|
package/dist/main.js
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import { default as o } from "./NanoGPTModel.js";
|
|
2
|
-
import { default as
|
|
2
|
+
import { default as t } from "./TeachableLLM.js";
|
|
3
3
|
import { default as l } from "./tokeniser/CharTokeniser.js";
|
|
4
|
+
import { default as s } from "./utilities/waitForModel.js";
|
|
5
|
+
import { default as m } from "./data/textLoader.js";
|
|
4
6
|
export {
|
|
5
7
|
l as CharTokeniser,
|
|
6
8
|
o as NanoGPT,
|
|
7
|
-
|
|
9
|
+
t as TeachableLLM,
|
|
10
|
+
m as loadTextData,
|
|
11
|
+
s as waitForModel
|
|
8
12
|
};
|