@genai-fi/nanogpt 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +95 -46
- package/dist/NanoGPTModel.d.ts +3 -2
- package/dist/NanoGPTModel.js +91 -76
- package/dist/{Reshape-BE5rA4rT.js → Reshape-Bt_t7RNz.js} +4 -4
- package/dist/TeachableLLM.js +1 -1
- package/dist/TiedEmbedding-DORsPlNL.js +44 -0
- package/dist/{axis_util-97KkkyRQ.js → axis_util-CVbf1vmL.js} +3 -3
- package/dist/{broadcast_to-CMlkG8NS.js → broadcast_to-BBoMQXbL.js} +4 -4
- package/dist/{concat-Cxbo2sOz.js → concat-BRRtq4S2.js} +1 -1
- package/dist/dataset-ZHEPJmED.js +1226 -0
- package/dist/{dropout-kbDY39Ci.js → dropout-lQm_YyX3.js} +1 -1
- package/dist/{gather-Bxe1Qip8.js → gather-BWyutxwi.js} +3 -3
- package/dist/{gpgpu_math-C0zyxKFi.js → gpgpu_math-Df7gzJWH.js} +1 -1
- package/dist/{index-iNhkcAEQ.js → index-CnHyhpKc.js} +32 -32
- package/dist/{kernel_funcs_utils-C4eIk4fE.js → kernel_funcs_utils-Dqo82NH4.js} +25 -25
- package/dist/layers/BaseLayer.js +114 -3
- package/dist/layers/CausalSelfAttention.d.ts +2 -3
- package/dist/layers/CausalSelfAttention.js +31 -30
- package/dist/layers/MLP.js +10 -9
- package/dist/layers/RMSNorm.js +12 -11
- package/dist/layers/RoPECache.js +3 -3
- package/dist/layers/TiedEmbedding.js +8 -6
- package/dist/layers/TransformerBlock.js +2 -2
- package/dist/{log_sum_exp-CkumwesB.js → log_sum_exp-CRH7Np9v.js} +12 -12
- package/dist/main.js +1 -1
- package/dist/{mat_mul-D0SifYfJ.js → mat_mul-DeGU1U_C.js} +3 -3
- package/dist/{max-CYaAjEEp.js → max-CcnEArWK.js} +3 -3
- package/dist/{moments-B06NlR_V.js → moments-DLTE6-1p.js} +4 -4
- package/dist/{norm-D3676xIo.js → norm-BpWsOapl.js} +5 -5
- package/dist/{ones-BIeFnPHR.js → ones-CDWGzVnm.js} +6 -6
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +1 -1
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +27 -27
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +1 -1
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +1 -1
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +36 -36
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/matMulGelu.js +22 -22
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{ops-ObfXLHYQ.js → ops-DzQTmLIl.js} +60 -60
- package/dist/{TiedEmbedding-DsDRvLB0.js → random_width-DI2h9CMs.js} +1215 -1250
- package/dist/{range-BsFU-SNG.js → range-CkOJ7090.js} +1 -1
- package/dist/{reshape-DxTPgnwL.js → reshape-CTIbqjwm.js} +1 -1
- package/dist/{sin-BOX-JVAj.js → sin-HzioENy_.js} +5 -5
- package/dist/{slice_util-D-kaD4ZV.js → slice_util-n4wHKmex.js} +1 -1
- package/dist/{softmax-BjsptB07.js → softmax-DX6qXAbm.js} +2 -2
- package/dist/{split-BCbrzthj.js → split-CVwhL8Oe.js} +3 -3
- package/dist/{stack--cqr9Dgc.js → stack-S2-D2JAQ.js} +1 -1
- package/dist/{sum-B_92TaHD.js → sum-UdfvaNhB.js} +4 -4
- package/dist/{tensor-CfiPXsW4.js → tensor-IZex6Bwp.js} +1 -1
- package/dist/{tensor2d-tSxWdFMH.js → tensor2d-CqtBzOKq.js} +1 -1
- package/dist/{tfjs_backend-NucKez4s.js → tfjs_backend-DX9yVvwk.js} +41 -41
- package/dist/tokeniser/CharTokeniser.js +27 -27
- package/dist/tokeniser/bpe.d.ts +1 -0
- package/dist/tokeniser/bpe.js +38 -35
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +22 -1242
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +5 -5
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/load.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/save.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/variable-BGvK-VN3.js +23 -0
- package/dist/{zeros-NMYTayy7.js → zeros-CYMicyqz.js} +3 -3
- package/package.json +1 -1
- package/dist/BaseLayer-BhrMN8JO.js +0 -135
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o,
|
|
1
|
+
import { o, j as t, E as c, _ as a, $ as e } from "./index-CnHyhpKc.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -15,11 +15,11 @@ import { o, i as t, E as c, _ as i, $ as a } from "./index-iNhkcAEQ.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function
|
|
18
|
+
function i(n) {
|
|
19
19
|
const s = { x: t(n, "x", "cos", "float32") };
|
|
20
|
-
return c.runKernel(
|
|
20
|
+
return c.runKernel(a, s);
|
|
21
21
|
}
|
|
22
|
-
const f = /* @__PURE__ */ o({ cos_:
|
|
22
|
+
const f = /* @__PURE__ */ o({ cos_: i });
|
|
23
23
|
/**
|
|
24
24
|
* @license
|
|
25
25
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -38,7 +38,7 @@ const f = /* @__PURE__ */ o({ cos_: e });
|
|
|
38
38
|
*/
|
|
39
39
|
function x(n) {
|
|
40
40
|
const s = { x: t(n, "x", "sin", "float32") };
|
|
41
|
-
return c.runKernel(
|
|
41
|
+
return c.runKernel(e, s);
|
|
42
42
|
}
|
|
43
43
|
const p = /* @__PURE__ */ o({ sin_: x });
|
|
44
44
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as r,
|
|
1
|
+
import { o as r, j as f, E as e, S as i } from "./index-CnHyhpKc.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -20,7 +20,7 @@ function l(s, o = -1) {
|
|
|
20
20
|
if (o === -1 && (o = t.rank - 1), o !== t.rank - 1)
|
|
21
21
|
throw Error(`Softmax along a non-last dimension is not yet supported. Logits was rank ${t.rank} and dim was ${o}`);
|
|
22
22
|
const n = { logits: t }, a = { dim: o };
|
|
23
|
-
return
|
|
23
|
+
return e.runKernel(i, n, a);
|
|
24
24
|
}
|
|
25
25
|
const p = /* @__PURE__ */ r({ softmax_: l });
|
|
26
26
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as
|
|
1
|
+
import { o as p, j as i, E as a, x as c } from "./index-CnHyhpKc.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -16,10 +16,10 @@ import { o as i, i as p, E as a, w as c } from "./index-iNhkcAEQ.js";
|
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
18
|
function e(t, s, o = 0) {
|
|
19
|
-
const n = { x:
|
|
19
|
+
const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
|
|
20
20
|
return a.runKernel(c, n, r);
|
|
21
21
|
}
|
|
22
|
-
const u = /* @__PURE__ */
|
|
22
|
+
const u = /* @__PURE__ */ p({ split_: e });
|
|
23
23
|
export {
|
|
24
24
|
u as s
|
|
25
25
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as e,
|
|
1
|
+
import { o as e, j as u, D as c, E as l, F as m } from "./index-CnHyhpKc.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -15,13 +15,13 @@ import { o as e, i as u, y as c, E as i, D as l } from "./index-iNhkcAEQ.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function
|
|
18
|
+
function i(t, o = null, n = !1) {
|
|
19
19
|
let s = u(t, "x", "sum");
|
|
20
20
|
s.dtype === "bool" && (s = c(s, "int32"));
|
|
21
21
|
const r = { x: s }, a = { axis: o, keepDims: n };
|
|
22
|
-
return
|
|
22
|
+
return l.runKernel(m, r, a);
|
|
23
23
|
}
|
|
24
|
-
const f = /* @__PURE__ */ e({ sum_:
|
|
24
|
+
const f = /* @__PURE__ */ e({ sum_: i });
|
|
25
25
|
export {
|
|
26
26
|
f as s
|
|
27
27
|
};
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { o as h,
|
|
2
|
-
import { s as C, t as Ke, a as Ue, b as ve } from "./ops-
|
|
3
|
-
import { r as Re, d as Ve } from "./dropout-
|
|
4
|
-
import { r as u } from "./reshape-
|
|
5
|
-
import { g as qe } from "./gather-
|
|
6
|
-
import { s as Ge } from "./sum-
|
|
7
|
-
import { m as A } from "./mat_mul-
|
|
8
|
-
import { c as M } from "./concat-
|
|
1
|
+
import { o as h, j as f, E as $, ap as Te, l as _, g as Ee, aq as xe, ar as Ie, as as Le, at as be, au as Ne, av as Ce, aw as Pe, b as H, ax as Fe, a9 as U, u as ae, q as ie, Q as le, c as fe, ay as he, aj as pe, az as je, t as S, D as $e, am as Me, a4 as Be } from "./index-CnHyhpKc.js";
|
|
2
|
+
import { s as C, t as Ke, a as Ue, b as ve } from "./ops-DzQTmLIl.js";
|
|
3
|
+
import { r as Re, d as Ve } from "./dropout-lQm_YyX3.js";
|
|
4
|
+
import { r as u } from "./reshape-CTIbqjwm.js";
|
|
5
|
+
import { g as qe } from "./gather-BWyutxwi.js";
|
|
6
|
+
import { s as Ge } from "./sum-UdfvaNhB.js";
|
|
7
|
+
import { m as A } from "./mat_mul-DeGU1U_C.js";
|
|
8
|
+
import { c as M } from "./concat-BRRtq4S2.js";
|
|
9
9
|
/**
|
|
10
10
|
* @license
|
|
11
11
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -51,14 +51,14 @@ function We(e, n, t) {
|
|
|
51
51
|
return $.runKernel(xe, s, o);
|
|
52
52
|
}
|
|
53
53
|
const Ye = /* @__PURE__ */ h({ clipByValue_: We });
|
|
54
|
-
function
|
|
54
|
+
function Qe(e) {
|
|
55
55
|
return M(
|
|
56
56
|
e,
|
|
57
57
|
0
|
|
58
58
|
/* axis */
|
|
59
59
|
);
|
|
60
60
|
}
|
|
61
|
-
const
|
|
61
|
+
const He = /* @__PURE__ */ h({ concat1d_: Qe });
|
|
62
62
|
function Xe(e, n) {
|
|
63
63
|
return M(e, n);
|
|
64
64
|
}
|
|
@@ -131,7 +131,7 @@ const un = /* @__PURE__ */ h({ leakyRelu_: on });
|
|
|
131
131
|
*/
|
|
132
132
|
function cn(e, n) {
|
|
133
133
|
const t = f(e, "x", "prelu"), r = f(n, "alpha", "prelu"), s = { x: t, alpha: r };
|
|
134
|
-
return $.runKernel(
|
|
134
|
+
return $.runKernel(be, s);
|
|
135
135
|
}
|
|
136
136
|
const an = /* @__PURE__ */ h({ prelu_: cn });
|
|
137
137
|
/**
|
|
@@ -152,7 +152,7 @@ const an = /* @__PURE__ */ h({ prelu_: cn });
|
|
|
152
152
|
*/
|
|
153
153
|
function ln(e) {
|
|
154
154
|
const t = { x: f(e, "x", "relu") };
|
|
155
|
-
return $.runKernel(
|
|
155
|
+
return $.runKernel(Ne, t);
|
|
156
156
|
}
|
|
157
157
|
const fn = /* @__PURE__ */ h({ relu_: ln });
|
|
158
158
|
/**
|
|
@@ -301,7 +301,7 @@ function An(e, n, t) {
|
|
|
301
301
|
if (t == null || t === "linear")
|
|
302
302
|
return e;
|
|
303
303
|
if (t === "relu")
|
|
304
|
-
return
|
|
304
|
+
return H(e, wn(n));
|
|
305
305
|
throw new Error(`Cannot compute gradient for fused activation ${t}.`);
|
|
306
306
|
}
|
|
307
307
|
function Sn(e, n) {
|
|
@@ -359,12 +359,12 @@ function _n({ a: e, b: n, transposeA: t = !1, transposeB: r = !1, bias: s, activ
|
|
|
359
359
|
i != null && (se = f(i, "prelu weights", "fused matMul"));
|
|
360
360
|
const oe = (D, P) => {
|
|
361
361
|
const [y, O, T, B] = P, w = An(u(D, T.shape), T, o);
|
|
362
|
-
let L,
|
|
363
|
-
if (!t && !r ? (L = A(w, O, !1, !0),
|
|
362
|
+
let L, b;
|
|
363
|
+
if (!t && !r ? (L = A(w, O, !1, !0), b = A(y, w, !0, !1)) : !t && r ? (L = A(w, O, !1, !1), b = A(w, y, !0, !1)) : t && !r ? (L = A(O, w, !1, !0), b = A(y, w, !1, !1)) : (L = A(O, w, !0, !0), b = A(w, y, !0, !0)), s != null) {
|
|
364
364
|
const De = Sn(B, w);
|
|
365
|
-
return [L,
|
|
365
|
+
return [L, b, De];
|
|
366
366
|
} else
|
|
367
|
-
return [L,
|
|
367
|
+
return [L, b];
|
|
368
368
|
}, ue = {
|
|
369
369
|
a: V,
|
|
370
370
|
b: q,
|
|
@@ -466,11 +466,11 @@ function Wn(e) {
|
|
|
466
466
|
function Yn(e) {
|
|
467
467
|
return Array.isArray(e) ? e : [e];
|
|
468
468
|
}
|
|
469
|
-
function
|
|
469
|
+
function Qn(e) {
|
|
470
470
|
const t = e.replace(/(.)([A-Z][a-z0-9]+)/g, "$1_$2").replace(/([a-z])([A-Z])/g, "$1_$2").toLowerCase();
|
|
471
471
|
return t[0] !== "_" ? t : "private" + t;
|
|
472
472
|
}
|
|
473
|
-
function
|
|
473
|
+
function Hn(e) {
|
|
474
474
|
return e.length <= 1 || e.indexOf("_") === -1 ? e : e.replace(/[_]+(\w|$)/g, (n, t) => t.toUpperCase());
|
|
475
475
|
}
|
|
476
476
|
let m = {};
|
|
@@ -593,8 +593,8 @@ function ot(e) {
|
|
|
593
593
|
* https://opensource.org/licenses/MIT.
|
|
594
594
|
* =============================================================================
|
|
595
595
|
*/
|
|
596
|
-
const
|
|
597
|
-
function
|
|
596
|
+
const N = /* @__PURE__ */ new Map();
|
|
597
|
+
function bn(e) {
|
|
598
598
|
v(Dn, "DataFormat", e);
|
|
599
599
|
}
|
|
600
600
|
function ut(e) {
|
|
@@ -616,22 +616,22 @@ function it(e, n) {
|
|
|
616
616
|
throw F.pop(), t;
|
|
617
617
|
}
|
|
618
618
|
}
|
|
619
|
-
function
|
|
619
|
+
function Nn() {
|
|
620
620
|
return F.length === 0 ? "" : F.join(ge) + ge;
|
|
621
621
|
}
|
|
622
622
|
function lt(e) {
|
|
623
623
|
if (!Oe(e))
|
|
624
624
|
throw new Error("Not a valid tensor name: '" + e + "'");
|
|
625
|
-
return
|
|
625
|
+
return Nn() + e;
|
|
626
626
|
}
|
|
627
627
|
function ft(e) {
|
|
628
628
|
if (!Oe(e))
|
|
629
629
|
throw new Error("Not a valid tensor name: '" + e + "'");
|
|
630
|
-
|
|
631
|
-
const n =
|
|
632
|
-
if (
|
|
630
|
+
N.has(e) || N.set(e, 0);
|
|
631
|
+
const n = N.get(e);
|
|
632
|
+
if (N.set(e, N.get(e) + 1), n > 0) {
|
|
633
633
|
const t = `${e}_${n}`;
|
|
634
|
-
return
|
|
634
|
+
return N.set(t, 1), t;
|
|
635
635
|
} else
|
|
636
636
|
return e;
|
|
637
637
|
}
|
|
@@ -835,7 +835,7 @@ function yt(e, n = -1) {
|
|
|
835
835
|
function Ot(e, n) {
|
|
836
836
|
switch (e.rank) {
|
|
837
837
|
case 1:
|
|
838
|
-
return
|
|
838
|
+
return He([e, n]);
|
|
839
839
|
case 2:
|
|
840
840
|
return ze([e, n], 0);
|
|
841
841
|
case 3:
|
|
@@ -868,7 +868,7 @@ function Dt(e, n, t, r) {
|
|
|
868
868
|
b: n,
|
|
869
869
|
transposeA: !1,
|
|
870
870
|
transposeB: !1,
|
|
871
|
-
bias: r ?
|
|
871
|
+
bias: r ? Q(e.rank, r, Y()) : null,
|
|
872
872
|
activation: t
|
|
873
873
|
});
|
|
874
874
|
{
|
|
@@ -882,7 +882,7 @@ function Dt(e, n, t, r) {
|
|
|
882
882
|
b: n,
|
|
883
883
|
transposeA: !1,
|
|
884
884
|
transposeB: !1,
|
|
885
|
-
bias: r ?
|
|
885
|
+
bias: r ? Q(e.rank, r, Y()) : null,
|
|
886
886
|
activation: t
|
|
887
887
|
}), g);
|
|
888
888
|
}
|
|
@@ -891,9 +891,9 @@ function Tt(e, n, t) {
|
|
|
891
891
|
return S(() => (Array.isArray(n) ? n = Ke(n, "int32") : n = $e(n, "int32"), qe(e, n, t)));
|
|
892
892
|
}
|
|
893
893
|
function Et(e) {
|
|
894
|
-
return
|
|
894
|
+
return H(e, e);
|
|
895
895
|
}
|
|
896
|
-
function
|
|
896
|
+
function Q(e, n, t) {
|
|
897
897
|
const r = n.shape;
|
|
898
898
|
if (n.rank !== 1 && n.rank !== e)
|
|
899
899
|
throw new l(`Unexpected bias dimensions: ${n.rank}; expected it to be 1 or ${e}`);
|
|
@@ -917,7 +917,7 @@ function H(e, n, t) {
|
|
|
917
917
|
throw new l(`Unsupported input rank by biasAdd: ${n.rank}`);
|
|
918
918
|
}
|
|
919
919
|
function xt(e, n, t) {
|
|
920
|
-
return S(() => (t == null && (t = Y()),
|
|
920
|
+
return S(() => (t == null && (t = Y()), bn(t), U(e, Q(e.rank, n, t))));
|
|
921
921
|
}
|
|
922
922
|
function It(e, n = 1) {
|
|
923
923
|
if (n !== 1)
|
|
@@ -927,12 +927,12 @@ function It(e, n = 1) {
|
|
|
927
927
|
function Lt(e) {
|
|
928
928
|
return S(() => Me(e, U(Be(e), 1)));
|
|
929
929
|
}
|
|
930
|
-
function
|
|
930
|
+
function bt(e, n, t, r) {
|
|
931
931
|
return S(() => Ve(e, n, t, r));
|
|
932
932
|
}
|
|
933
|
-
function
|
|
933
|
+
function Nt(e) {
|
|
934
934
|
return S(() => {
|
|
935
|
-
const n = U(0.5,
|
|
935
|
+
const n = U(0.5, H(0.2, e));
|
|
936
936
|
return Ye(n, 0, 1);
|
|
937
937
|
});
|
|
938
938
|
}
|
|
@@ -943,7 +943,7 @@ export {
|
|
|
943
943
|
Ln as $,
|
|
944
944
|
Ae as A,
|
|
945
945
|
$t as B,
|
|
946
|
-
|
|
946
|
+
Hn as C,
|
|
947
947
|
nt as D,
|
|
948
948
|
et as E,
|
|
949
949
|
Jn as F,
|
|
@@ -955,7 +955,7 @@ export {
|
|
|
955
955
|
Zn as L,
|
|
956
956
|
It as M,
|
|
957
957
|
j as N,
|
|
958
|
-
|
|
958
|
+
Nt as O,
|
|
959
959
|
Lt as P,
|
|
960
960
|
un as Q,
|
|
961
961
|
Se as R,
|
|
@@ -976,7 +976,7 @@ export {
|
|
|
976
976
|
pt as a4,
|
|
977
977
|
Ot as a5,
|
|
978
978
|
Ct as a6,
|
|
979
|
-
|
|
979
|
+
bt as a7,
|
|
980
980
|
yt as a8,
|
|
981
981
|
At as a9,
|
|
982
982
|
kt as aa,
|
|
@@ -985,7 +985,7 @@ export {
|
|
|
985
985
|
Sn as b,
|
|
986
986
|
v as c,
|
|
987
987
|
Dt as d,
|
|
988
|
-
|
|
988
|
+
bn as e,
|
|
989
989
|
_e as f,
|
|
990
990
|
An as g,
|
|
991
991
|
zn as h,
|
|
@@ -1000,7 +1000,7 @@ export {
|
|
|
1000
1000
|
Ye as q,
|
|
1001
1001
|
_t as r,
|
|
1002
1002
|
On as s,
|
|
1003
|
-
|
|
1003
|
+
Qn as t,
|
|
1004
1004
|
gt as u,
|
|
1005
1005
|
fn as v,
|
|
1006
1006
|
st as w,
|
|
@@ -10,14 +10,14 @@ class b extends k {
|
|
|
10
10
|
constructor(t) {
|
|
11
11
|
if (super(), Array.isArray(t)) {
|
|
12
12
|
if (this.vocab = t, this.vocab.length > 0)
|
|
13
|
-
this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf("<unk>"), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<pad>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("_")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf(" ")), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab.forEach((
|
|
14
|
-
this.cache.set(
|
|
13
|
+
this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.unkToken = this.vocab.indexOf(""), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<unk>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("<pad>")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf("_")), this.unkToken === -1 && (this.unkToken = this.vocab.indexOf(" ")), this.unkToken === -1 && (this.unkToken = this.eosToken), this.vocab = this.vocab.map((e) => e === "<pad>" ? "" : e), this.vocab.forEach((e, n) => {
|
|
14
|
+
this.cache.set(e, n);
|
|
15
15
|
});
|
|
16
16
|
else
|
|
17
17
|
throw new Error("Vocab cannot be empty");
|
|
18
18
|
this._trained = !0;
|
|
19
19
|
} else
|
|
20
|
-
this.vocabSize = t, this.vocab = new Array(this.vocabSize).fill("
|
|
20
|
+
this.vocabSize = t, this.vocab = new Array(this.vocabSize).fill(""), this.vocab[0] = "<eos>", this.vocab[1] = "", this.eosToken = 0, this.unkToken = 1, this.cache.set("<eos>", 0), this.cache.set("", 1);
|
|
21
21
|
}
|
|
22
22
|
get trained() {
|
|
23
23
|
return this.vocab.length === this.vocabSize && this._trained;
|
|
@@ -25,36 +25,36 @@ class b extends k {
|
|
|
25
25
|
destroy() {
|
|
26
26
|
}
|
|
27
27
|
async train(t) {
|
|
28
|
-
const
|
|
29
|
-
if (
|
|
28
|
+
const e = t.map((i) => i.split("")).flat(), n = new Set(e), s = Array.from(n), h = this.vocab.indexOf("", this.unkToken + 1), o = this.vocabSize - u.length;
|
|
29
|
+
if (h === -1)
|
|
30
30
|
return this.vocabSize;
|
|
31
|
-
if (this._trained = !0,
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
}),
|
|
31
|
+
if (this._trained = !0, s.length > o) {
|
|
32
|
+
const i = /* @__PURE__ */ new Map();
|
|
33
|
+
e.forEach((a) => {
|
|
34
|
+
i.set(a, (i.get(a) || 0) + 1);
|
|
35
|
+
}), s.sort((a, r) => (i.get(a) || 0) - (i.get(r) || 0)), s.splice(0, s.length - o);
|
|
36
36
|
}
|
|
37
|
-
let
|
|
38
|
-
if (
|
|
39
|
-
const
|
|
40
|
-
for (const
|
|
41
|
-
if (!
|
|
37
|
+
let c = h;
|
|
38
|
+
if (c !== -1) {
|
|
39
|
+
const i = new Set(this.vocab);
|
|
40
|
+
for (const a of s)
|
|
41
|
+
if (!i.has(a) && (this.vocab[c] = a, i.add(a), c = this.vocab.indexOf("", c + 1), c === -1))
|
|
42
42
|
break;
|
|
43
43
|
}
|
|
44
|
-
return this.cache.clear(), this.vocab.forEach((
|
|
45
|
-
this.cache.set(
|
|
44
|
+
return this.cache.clear(), this.vocab.forEach((i, a) => {
|
|
45
|
+
this.cache.set(i, a);
|
|
46
46
|
}), this.emit("trainStatus", "trained"), this.vocabSize;
|
|
47
47
|
}
|
|
48
|
-
async tokenise(t,
|
|
48
|
+
async tokenise(t, e) {
|
|
49
49
|
if (!this.trained)
|
|
50
50
|
throw new Error("Tokeniser not trained");
|
|
51
|
-
return t.map((
|
|
52
|
-
const
|
|
53
|
-
return
|
|
51
|
+
return t.map((s) => e ? s.split("").map((h) => this.cache.get(h) ?? this.unkToken) : s.split("").map((h) => {
|
|
52
|
+
const o = this.cache.get(h);
|
|
53
|
+
return o !== void 0 ? this.vocab[o] : "";
|
|
54
54
|
}));
|
|
55
55
|
}
|
|
56
56
|
async detokenise(t) {
|
|
57
|
-
return t.map((n) => n.map((
|
|
57
|
+
return t.map((n) => n.map((s) => this.vocab[s]).join(""));
|
|
58
58
|
}
|
|
59
59
|
async encode(t) {
|
|
60
60
|
return (await this.tokenise([t], !0))[0];
|
|
@@ -68,11 +68,11 @@ class b extends k {
|
|
|
68
68
|
async getMerges() {
|
|
69
69
|
return [];
|
|
70
70
|
}
|
|
71
|
-
async createTrainingData(t,
|
|
72
|
-
const n = await this.tokenise(t, !0),
|
|
73
|
-
for (let
|
|
74
|
-
|
|
75
|
-
return [
|
|
71
|
+
async createTrainingData(t, e = 5) {
|
|
72
|
+
const n = await this.tokenise(t, !0), s = [], h = [];
|
|
73
|
+
for (let o = 0; o < n.length - e; o++)
|
|
74
|
+
s.push(...n[o].slice(0, e)), h.push(n[o + 1][0]);
|
|
75
|
+
return [s, h];
|
|
76
76
|
}
|
|
77
77
|
}
|
|
78
78
|
export {
|
package/dist/tokeniser/bpe.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ export default class BPETokeniser extends EE<'trainStatus'> implements ITokenise
|
|
|
12
12
|
get trained(): boolean;
|
|
13
13
|
get vocabSize(): number;
|
|
14
14
|
get eosToken(): number;
|
|
15
|
+
get unkToken(): number;
|
|
15
16
|
train(text: string[]): Promise<number>;
|
|
16
17
|
getVocab(): string[];
|
|
17
18
|
getMerges(): Promise<[string, string][]>;
|
package/dist/tokeniser/bpe.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { E as
|
|
1
|
+
import l from "../utilities/tokenParse.js";
|
|
2
|
+
import { E as f } from "../index-Dwqa6Zy2.js";
|
|
3
3
|
function u(o, e) {
|
|
4
4
|
return `${o}-::-${e}`;
|
|
5
5
|
}
|
|
@@ -7,25 +7,25 @@ function k(o) {
|
|
|
7
7
|
const e = /* @__PURE__ */ new Map();
|
|
8
8
|
for (let s = 0; s < o.length; s++) {
|
|
9
9
|
const t = o[s];
|
|
10
|
-
for (let
|
|
11
|
-
const
|
|
12
|
-
a: t[
|
|
13
|
-
b: t[
|
|
10
|
+
for (let r = 0; r < t.length - 1; r++) {
|
|
11
|
+
const n = u(t[r], t[r + 1]), a = e.get(n) || {
|
|
12
|
+
a: t[r],
|
|
13
|
+
b: t[r + 1],
|
|
14
14
|
count: 0,
|
|
15
15
|
instances: /* @__PURE__ */ new Set()
|
|
16
16
|
};
|
|
17
|
-
a.count += 1, a.instances.add(s), e.set(
|
|
17
|
+
a.count += 1, a.instances.add(s), e.set(n, a);
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
20
|
return { pairs: e, tokens: o };
|
|
21
21
|
}
|
|
22
|
-
function h(o, e, s, t,
|
|
23
|
-
const
|
|
24
|
-
if (o.pairs.has(
|
|
25
|
-
const a = o.pairs.get(
|
|
26
|
-
a.count +=
|
|
22
|
+
function h(o, e, s, t, r) {
|
|
23
|
+
const n = u(e, s);
|
|
24
|
+
if (o.pairs.has(n)) {
|
|
25
|
+
const a = o.pairs.get(n);
|
|
26
|
+
a.count += r, r > 0 ? a.instances.add(t) : a.count <= 0 ? o.pairs.delete(n) : a.instances.delete(t);
|
|
27
27
|
} else
|
|
28
|
-
o.pairs.set(
|
|
28
|
+
o.pairs.set(n, { a: e, b: s, count: r, instances: /* @__PURE__ */ new Set([t]) });
|
|
29
29
|
}
|
|
30
30
|
function b(o) {
|
|
31
31
|
let e = null, s = 0;
|
|
@@ -36,33 +36,33 @@ function b(o) {
|
|
|
36
36
|
function d(o, e) {
|
|
37
37
|
return o.map((s) => {
|
|
38
38
|
const t = [];
|
|
39
|
-
for (let
|
|
40
|
-
|
|
39
|
+
for (let r = 0; r < s.length; r++)
|
|
40
|
+
r < s.length - 1 && s[r] === e[0] && s[r + 1] === e[1] ? (t.push(e[0] + e[1]), r++) : t.push(s[r]);
|
|
41
41
|
return t;
|
|
42
42
|
});
|
|
43
43
|
}
|
|
44
44
|
function m(o, e) {
|
|
45
45
|
e.instances.forEach((s) => {
|
|
46
|
-
const t = o.tokens[s],
|
|
47
|
-
for (let
|
|
48
|
-
if (
|
|
46
|
+
const t = o.tokens[s], r = [];
|
|
47
|
+
for (let n = 0; n < t.length; n++)
|
|
48
|
+
if (n < t.length - 1 && t[n] === e.a && t[n + 1] === e.b) {
|
|
49
49
|
const a = e.a + e.b;
|
|
50
|
-
|
|
50
|
+
r.push(a), n > 0 && (h(o, t[n - 1], e.a, s, -1), h(o, t[n - 1], a, s, 1)), n++, n < t.length - 1 && (h(o, e.b, t[n + 1], s, -1), h(o, a, t[n + 1], s, 1));
|
|
51
51
|
} else
|
|
52
|
-
|
|
53
|
-
o.tokens[s] =
|
|
52
|
+
r.push(t[n]);
|
|
53
|
+
o.tokens[s] = r;
|
|
54
54
|
}), o.pairs.delete(u(e.a, e.b));
|
|
55
55
|
}
|
|
56
|
-
class S extends
|
|
56
|
+
class S extends f {
|
|
57
57
|
targetSize;
|
|
58
58
|
vocab = /* @__PURE__ */ new Set();
|
|
59
59
|
vocabIndex = /* @__PURE__ */ new Map();
|
|
60
60
|
merges = [];
|
|
61
61
|
pretokenMap = /* @__PURE__ */ new Map();
|
|
62
62
|
constructor(e, s) {
|
|
63
|
-
super(), Array.isArray(e) ? (e.forEach((t,
|
|
64
|
-
this.vocab.add(t), this.vocabIndex.set(t,
|
|
65
|
-
}), s && (this.merges = s), this.targetSize = e.length) : (this.vocab.add("<eos>"), this.vocab.add("
|
|
63
|
+
super(), Array.isArray(e) ? (e.forEach((t, r) => {
|
|
64
|
+
this.vocab.add(t), this.vocabIndex.set(t, r);
|
|
65
|
+
}), s && (this.merges = s), this.targetSize = e.length) : (this.vocab.add("<eos>"), this.vocab.add(""), this.targetSize = e);
|
|
66
66
|
}
|
|
67
67
|
destroy() {
|
|
68
68
|
this.vocab.clear(), this.vocabIndex.clear(), this.merges = [], this.pretokenMap.clear();
|
|
@@ -76,23 +76,26 @@ class S extends g {
|
|
|
76
76
|
get eosToken() {
|
|
77
77
|
return this.vocabIndex.get("<eos>") ?? 0;
|
|
78
78
|
}
|
|
79
|
+
get unkToken() {
|
|
80
|
+
return this.vocabIndex.get("") ?? 1;
|
|
81
|
+
}
|
|
79
82
|
async train(e) {
|
|
80
|
-
const s = e.map((i) =>
|
|
81
|
-
this.vocab = /* @__PURE__ */ new Set(), this.pretokenMap.clear(), this.merges = [], this.vocab.add("<eos>");
|
|
82
|
-
const
|
|
83
|
+
const s = e.map((i) => l(i)).flat(1), t = new Set(s);
|
|
84
|
+
this.vocab = /* @__PURE__ */ new Set(), this.pretokenMap.clear(), this.merges = [], this.vocab.add("<eos>"), this.vocab.add("");
|
|
85
|
+
const r = Array.from(t), n = r.map((i) => Array.from(i).map((c) => (this.vocab.add(c), c))), a = k(n);
|
|
83
86
|
for (; this.vocab.size < this.targetSize && this.merges.length < this.targetSize; ) {
|
|
84
87
|
const i = b(a);
|
|
85
88
|
if (!i)
|
|
86
89
|
break;
|
|
87
90
|
this.merges.push([i.a, i.b]), this.vocab.add(i.a + i.b), m(a, i);
|
|
88
91
|
}
|
|
89
|
-
|
|
90
|
-
const c =
|
|
92
|
+
r.forEach((i, p) => {
|
|
93
|
+
const c = n[p];
|
|
91
94
|
this.pretokenMap.set(i, c);
|
|
92
95
|
}), this.vocabIndex.clear();
|
|
93
|
-
let
|
|
96
|
+
let g = 0;
|
|
94
97
|
for (const i of this.vocab.keys())
|
|
95
|
-
this.vocabIndex.set(i,
|
|
98
|
+
this.vocabIndex.set(i, g++);
|
|
96
99
|
return this.emit("trainStatus", "trained"), this.vocab.size;
|
|
97
100
|
}
|
|
98
101
|
getVocab() {
|
|
@@ -108,15 +111,15 @@ class S extends g {
|
|
|
108
111
|
}), this.pretokenMap.set(e, s), s;
|
|
109
112
|
}
|
|
110
113
|
tokeniseStrings(e) {
|
|
111
|
-
return e.map((s) =>
|
|
114
|
+
return e.map((s) => l(s).map((n) => this.pretokenMap.has(n) ? this.pretokenMap.get(n) : this.tokeniseWord(n)).flat(1));
|
|
112
115
|
}
|
|
113
116
|
async tokenise(e, s) {
|
|
114
117
|
const t = this.tokeniseStrings(e);
|
|
115
|
-
return s ? t.map((
|
|
118
|
+
return s ? t.map((r) => r.map((n) => this.vocabIndex.get(n) ?? this.unkToken)) : t.map((r) => r.map((n) => this.vocab.has(n) ? n : ""));
|
|
116
119
|
}
|
|
117
120
|
async detokenise(e) {
|
|
118
121
|
const s = this.getVocab();
|
|
119
|
-
return e.map((
|
|
122
|
+
return e.map((r) => r.map((n) => s[n]).join(""));
|
|
120
123
|
}
|
|
121
124
|
async encode(e) {
|
|
122
125
|
return (await this.tokenise([e], !0))[0];
|
package/dist/training/AdamExt.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as r, b as c, f as h, s as g, e as o } from "../index-
|
|
1
|
+
import { A as r, b as c, f as h, s as g, e as o } from "../index-CnHyhpKc.js";
|
|
2
2
|
class u extends r {
|
|
3
3
|
constructor(t, e, s, a, i) {
|
|
4
4
|
super(t, e, s, a), this.config = i, this.startLearningRate = t;
|