@genai-fi/nanogpt 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +6 -6
- package/dist/NanoGPTModel.js +9 -9
- package/dist/{RealDiv-7xu-pkZN.js → RealDiv-BYViZwhN.js} +6 -6
- package/dist/{Reshape-BYC1oUku.js → Reshape-t7Kcikjk.js} +2 -2
- package/dist/TeachableLLM.js +5 -5
- package/dist/{TiedEmbedding-C1HBot-5.js → TiedEmbedding-9WeDwvjO.js} +4 -4
- package/dist/{axis_util-CCNL7jea.js → axis_util-Bu4h7XWV.js} +1 -1
- package/dist/{broadcast_to-CddAF879.js → broadcast_to-DARN-DBD.js} +2 -2
- package/dist/{concat-XOK9ANZu.js → concat-5aPGqw3Z.js} +8 -8
- package/dist/{dataset-BFFipD1c.js → dataset-pgqp-YfL.js} +5 -5
- package/dist/{dropout-xlKRoJyU.js → dropout-Bciw46HT.js} +10 -10
- package/dist/{gather-DKtUaTtA.js → gather-DjyCjmOD.js} +1 -1
- package/dist/{gpgpu_math-B_ycgZ4W.js → gpgpu_math-CNslybmD.js} +31 -31
- package/dist/{index-CamYe_M8.js → index-BAzbokzv.js} +31 -31
- package/dist/{kernel_funcs_utils-D5MS0JFg.js → kernel_funcs_utils-CUxJCg0g.js} +5 -5
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +5 -5
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/load.d.ts +13 -0
- package/dist/loader/load.js +27 -0
- package/dist/loader/loadHF.d.ts +7 -0
- package/dist/loader/loadHF.js +22 -0
- package/dist/{utilities/load.d.ts → loader/loadTransformers.d.ts} +11 -11
- package/dist/loader/loadTransformers.js +28 -0
- package/dist/loader/newZipLoad.d.ts +8 -0
- package/dist/loader/newZipLoad.js +21 -0
- package/dist/loader/oldZipLoad.d.ts +7 -0
- package/dist/loader/oldZipLoad.js +76 -0
- package/dist/{log_sum_exp-CV_5-TTu.js → log_sum_exp-YEo2h3gb.js} +16 -16
- package/dist/main.js +4 -4
- package/dist/{mat_mul-CAbRFWUj.js → mat_mul-7121rsJk.js} +4 -4
- package/dist/{max-JBBv7aUf.js → max-DtlIuVeW.js} +3 -3
- package/dist/{mulmat_packed_gpu-DW4doKL_.js → mulmat_packed_gpu-D4nKF7Je.js} +1 -1
- package/dist/{norm-B9dQTFYn.js → norm-CzltS9Fz.js} +10 -10
- package/dist/{ones-CMHNqMr6.js → ones-BBlSRqn1.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +6 -6
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +9 -9
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +1 -1
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +14 -14
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +1 -1
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +4 -4
- package/dist/ops/grads/gelu.js +1 -1
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +17 -17
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{ops-DqtYemmV.js → ops-C0sQEcPw.js} +78 -78
- package/dist/{random_width-CLMQG5Jn.js → random_width-DWzaOgrn.js} +22 -22
- package/dist/{range-DqYjKnuG.js → range-DYsrnfiy.js} +1 -1
- package/dist/{reciprocal-z49filta.js → reciprocal-CJQeasVa.js} +1 -1
- package/dist/{register_all_kernels-COt6wLD0.js → register_all_kernels-BfFCQAqs.js} +28 -28
- package/dist/{reshape-C45vIIRU.js → reshape-krWGKraP.js} +1 -1
- package/dist/{scatter_nd_util-qgtnviTE.js → scatter_nd_util-93ln7Hut.js} +3 -3
- package/dist/{selu_util-4QV_GXTB.js → selu_util-sntGesxr.js} +41 -41
- package/dist/{shared-ByfrGA97.js → shared-Ca6iDobD.js} +6 -6
- package/dist/{sin-9JBrfVaB.js → sin-D_h-qCSx.js} +1 -1
- package/dist/{softmax-DvMvui-_.js → softmax-fsdtf6JC.js} +1 -1
- package/dist/{split-DxrHrPFK.js → split-eiktj-6L.js} +4 -4
- package/dist/{stack-DgaoDmnF.js → stack-dfEEz2OY.js} +2 -2
- package/dist/{sum-BpcpxNEh.js → sum-BE_Irnim.js} +3 -3
- package/dist/{tensor-CDz5x1mP.js → tensor-Xyi595sG.js} +1 -1
- package/dist/{tensor2d-jO8JY5Jd.js → tensor2d-CPEkynbH.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +3 -3
- package/dist/training/sparseCrossEntropy.js +5 -5
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/save.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-CLVXjN7F.js → variable-wSS22xj5.js} +1 -1
- package/dist/{zeros-DUkkVccu.js → zeros-YJDE7oRb.js} +10 -10
- package/package.json +3 -3
- package/dist/utilities/load.js +0 -99
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as ce } from "./reshape-
|
|
3
|
-
import { s as ae } from "./sum-
|
|
1
|
+
import { i as N, ao as H, o as _, q as S, E as L, ap as te, aq as ne, al as se, an as re, ar as ie, as as oe, at as le, a as ue, au as fe, av as O } from "./index-BAzbokzv.js";
|
|
2
|
+
import { r as ce } from "./reshape-krWGKraP.js";
|
|
3
|
+
import { s as ae } from "./sum-BE_Irnim.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -51,7 +51,7 @@ function X(e, t, n, s, r, u, i = !1, o = "channelsLast") {
|
|
|
51
51
|
[c, g, a, p] = e;
|
|
52
52
|
else
|
|
53
53
|
throw new Error(`Unknown dataFormat ${o}`);
|
|
54
|
-
const [l, h, , d] = t, [A, m] = T(n), [b,
|
|
54
|
+
const [l, h, , d] = t, [A, m] = T(n), [b, k] = T(s), f = G(l, b), E = G(h, k), { padInfo: w, outHeight: I, outWidth: x } = de(r, a, p, A, m, f, E, u, o), y = i ? d * g : d;
|
|
55
55
|
let $;
|
|
56
56
|
return o === "channelsFirst" ? $ = [c, y, I, x] : o === "channelsLast" && ($ = [c, I, x, y]), {
|
|
57
57
|
batchSize: c,
|
|
@@ -62,15 +62,15 @@ function X(e, t, n, s, r, u, i = !1, o = "channelsLast") {
|
|
|
62
62
|
outHeight: I,
|
|
63
63
|
outWidth: x,
|
|
64
64
|
outChannels: y,
|
|
65
|
-
padInfo:
|
|
65
|
+
padInfo: w,
|
|
66
66
|
strideHeight: A,
|
|
67
67
|
strideWidth: m,
|
|
68
68
|
filterHeight: l,
|
|
69
69
|
filterWidth: h,
|
|
70
70
|
effectiveFilterHeight: f,
|
|
71
|
-
effectiveFilterWidth:
|
|
71
|
+
effectiveFilterWidth: E,
|
|
72
72
|
dilationHeight: b,
|
|
73
|
-
dilationWidth:
|
|
73
|
+
dilationWidth: k,
|
|
74
74
|
inShape: e,
|
|
75
75
|
outShape: $,
|
|
76
76
|
filterShape: t
|
|
@@ -84,22 +84,22 @@ function he(e, t, n, s, r, u = !1, i = "channelsLast", o) {
|
|
|
84
84
|
[c, l, a, p, g] = e;
|
|
85
85
|
else
|
|
86
86
|
throw new Error(`Unknown dataFormat ${i}`);
|
|
87
|
-
const [h, d, A, , m] = t, [b,
|
|
87
|
+
const [h, d, A, , m] = t, [b, k, f] = W(n), [E, w, I] = W(s), x = G(h, E), y = G(d, w), $ = G(A, I), { padInfo: C, outDepth: M, outHeight: D, outWidth: F } = me(r, a, p, g, b, k, f, x, y, $, o), V = u ? m * l : m;
|
|
88
88
|
let U;
|
|
89
|
-
return i === "channelsFirst" ? U = [c, V,
|
|
89
|
+
return i === "channelsFirst" ? U = [c, V, M, D, F] : i === "channelsLast" && (U = [c, M, D, F, V]), {
|
|
90
90
|
batchSize: c,
|
|
91
91
|
dataFormat: i,
|
|
92
92
|
inDepth: a,
|
|
93
93
|
inHeight: p,
|
|
94
94
|
inWidth: g,
|
|
95
95
|
inChannels: l,
|
|
96
|
-
outDepth:
|
|
96
|
+
outDepth: M,
|
|
97
97
|
outHeight: D,
|
|
98
98
|
outWidth: F,
|
|
99
99
|
outChannels: V,
|
|
100
100
|
padInfo: C,
|
|
101
101
|
strideDepth: b,
|
|
102
|
-
strideHeight:
|
|
102
|
+
strideHeight: k,
|
|
103
103
|
strideWidth: f,
|
|
104
104
|
filterDepth: h,
|
|
105
105
|
filterHeight: d,
|
|
@@ -107,8 +107,8 @@ function he(e, t, n, s, r, u = !1, i = "channelsLast", o) {
|
|
|
107
107
|
effectiveFilterDepth: x,
|
|
108
108
|
effectiveFilterHeight: y,
|
|
109
109
|
effectiveFilterWidth: $,
|
|
110
|
-
dilationDepth:
|
|
111
|
-
dilationHeight:
|
|
110
|
+
dilationDepth: E,
|
|
111
|
+
dilationHeight: w,
|
|
112
112
|
dilationWidth: I,
|
|
113
113
|
inShape: e,
|
|
114
114
|
outShape: U,
|
|
@@ -175,8 +175,8 @@ function me(e, t, n, s, r, u, i, o, c, a, p) {
|
|
|
175
175
|
l = m[0], h = m[1], d = m[2];
|
|
176
176
|
} else if (e === "same") {
|
|
177
177
|
l = Math.ceil(t / r), h = Math.ceil(n / u), d = Math.ceil(s / i);
|
|
178
|
-
const A = (l - 1) * r + o - t, m = (h - 1) * u + c - n, b = (d - 1) * i + a - s,
|
|
179
|
-
g = { top:
|
|
178
|
+
const A = (l - 1) * r + o - t, m = (h - 1) * u + c - n, b = (d - 1) * i + a - s, k = Math.floor(A / 2), f = A - k, E = Math.floor(m / 2), w = m - E, I = Math.floor(b / 2), x = b - I;
|
|
179
|
+
g = { top: E, bottom: w, left: I, right: x, front: k, back: f, type: "SAME" };
|
|
180
180
|
} else
|
|
181
181
|
throw Error(`Unknown padding parameter: ${e}`);
|
|
182
182
|
return { padInfo: g, outDepth: l, outHeight: h, outWidth: d };
|
|
@@ -244,11 +244,11 @@ function Qe(e, t, n) {
|
|
|
244
244
|
* limitations under the License.
|
|
245
245
|
* =============================================================================
|
|
246
246
|
*/
|
|
247
|
-
function
|
|
247
|
+
function Ee(e) {
|
|
248
248
|
const n = { x: S(e, "x", "sigmoid", "float32") };
|
|
249
249
|
return L.runKernel(te, n);
|
|
250
250
|
}
|
|
251
|
-
const
|
|
251
|
+
const we = /* @__PURE__ */ _({ sigmoid_: Ee });
|
|
252
252
|
/**
|
|
253
253
|
* @license
|
|
254
254
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -311,7 +311,7 @@ function ye(e, t) {
|
|
|
311
311
|
const n = S(e, "x", "prelu"), s = S(t, "alpha", "prelu"), r = { x: n, alpha: s };
|
|
312
312
|
return L.runKernel(re, r);
|
|
313
313
|
}
|
|
314
|
-
const
|
|
314
|
+
const Me = /* @__PURE__ */ _({ prelu_: ye });
|
|
315
315
|
/**
|
|
316
316
|
* @license
|
|
317
317
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -328,11 +328,11 @@ const ke = /* @__PURE__ */ _({ prelu_: ye });
|
|
|
328
328
|
* limitations under the License.
|
|
329
329
|
* =============================================================================
|
|
330
330
|
*/
|
|
331
|
-
function
|
|
331
|
+
function ke(e) {
|
|
332
332
|
const n = { x: S(e, "x", "relu") };
|
|
333
333
|
return L.runKernel(ie, n);
|
|
334
334
|
}
|
|
335
|
-
const Se = /* @__PURE__ */ _({ relu_:
|
|
335
|
+
const Se = /* @__PURE__ */ _({ relu_: ke });
|
|
336
336
|
/**
|
|
337
337
|
* @license
|
|
338
338
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -413,11 +413,11 @@ function ze(e, t, n, s) {
|
|
|
413
413
|
if (t === "relu6")
|
|
414
414
|
return Ne(e);
|
|
415
415
|
if (t === "prelu")
|
|
416
|
-
return
|
|
416
|
+
return Me(e, n);
|
|
417
417
|
if (t === "leakyrelu")
|
|
418
418
|
return xe(e, s);
|
|
419
419
|
if (t === "sigmoid")
|
|
420
|
-
return
|
|
420
|
+
return we(e);
|
|
421
421
|
throw new Error(`Unknown fused activation ${t}.`);
|
|
422
422
|
}
|
|
423
423
|
const et = (e, t) => !(e > 0) || t === "linear";
|
|
@@ -600,43 +600,43 @@ function We(e, t, n, s, r, u, i, o, c) {
|
|
|
600
600
|
for (let f = 0; f < e.length; ++f) {
|
|
601
601
|
if (l.strides[f] === 0)
|
|
602
602
|
throw Error(`strides[${f}] must be non-zero`);
|
|
603
|
-
const
|
|
604
|
-
if (
|
|
605
|
-
m.push(
|
|
603
|
+
const E = !!(l.shrinkAxisMask & 1 << f), w = e[f];
|
|
604
|
+
if (w === -1) {
|
|
605
|
+
m.push(E ? 1 : -1);
|
|
606
606
|
continue;
|
|
607
607
|
}
|
|
608
608
|
const I = [l.beginMask & 1 << f, l.endMask & 1 << f], x = [
|
|
609
609
|
l.strides[f] > 0 ? 0 : -1,
|
|
610
|
-
l.strides[f] > 0 ?
|
|
610
|
+
l.strides[f] > 0 ? w : w - 1
|
|
611
611
|
];
|
|
612
|
-
if (
|
|
612
|
+
if (E && l.strides[f] <= 0)
|
|
613
613
|
throw Error("only stride 1 allowed on non-range indexing.");
|
|
614
614
|
A = A && l.strides[f] === 1;
|
|
615
615
|
const y = !!(l.beginMask & 1 << f && l.endMask & 1 << f);
|
|
616
616
|
if (l.beginValid && l.endValid) {
|
|
617
|
-
if (
|
|
618
|
-
const D = l.begin[f] < 0 ?
|
|
619
|
-
if (l.begin[f] = D, l.end[f] = l.begin[f] + 1, D < 0 || D >=
|
|
617
|
+
if (E) {
|
|
618
|
+
const D = l.begin[f] < 0 ? w + l.begin[f] : l.begin[f];
|
|
619
|
+
if (l.begin[f] = D, l.end[f] = l.begin[f] + 1, D < 0 || D >= w)
|
|
620
620
|
throw Error(`slice index ${l.begin[f]} of dimension ${f} out of bounds.`);
|
|
621
621
|
} else
|
|
622
|
-
l.begin[f] = K(l.begin[f], 0, l.strides[f],
|
|
623
|
-
const
|
|
624
|
-
h = h &&
|
|
622
|
+
l.begin[f] = K(l.begin[f], 0, l.strides[f], w, I, x), l.end[f] = K(l.end[f], 1, l.strides[f], w, I, x);
|
|
623
|
+
const M = l.strides[f] === 1 && l.begin[f] === 0 && l.end[f] === w;
|
|
624
|
+
h = h && M, d = d && (f === 0 && l.strides[f] === 1 || M);
|
|
625
625
|
} else
|
|
626
626
|
h = h && l.strides[f] === 1 && y, d = d && (f === 0 && l.strides[f] === 1 || y);
|
|
627
627
|
let $, C = !1;
|
|
628
|
-
if (l.beginValid && l.endValid ? ($ = l.end[f] - l.begin[f], C = !0) :
|
|
629
|
-
let
|
|
630
|
-
$ === 0 || $ < 0 != l.strides[f] < 0 ?
|
|
628
|
+
if (l.beginValid && l.endValid ? ($ = l.end[f] - l.begin[f], C = !0) : E ? ($ = 1, C = !0) : y && w >= 0 && (l.strides[f] < 0 ? $ = -w : $ = w, C = !0), C) {
|
|
629
|
+
let M;
|
|
630
|
+
$ === 0 || $ < 0 != l.strides[f] < 0 ? M = 0 : M = Math.trunc($ / l.strides[f]) + ($ % l.strides[f] !== 0 ? 1 : 0), m.push(M);
|
|
631
631
|
} else
|
|
632
632
|
m.push(-1);
|
|
633
633
|
}
|
|
634
634
|
for (let f = 0; f < l.finalShapeGatherIndices.length; ++f) {
|
|
635
|
-
const
|
|
636
|
-
|
|
635
|
+
const E = l.finalShapeGatherIndices[f];
|
|
636
|
+
E >= 0 ? b.push(m[E]) : E === v && b.push(1);
|
|
637
637
|
}
|
|
638
638
|
return {
|
|
639
|
-
finalShapeSparse: b.filter((f,
|
|
639
|
+
finalShapeSparse: b.filter((f, E) => l.finalShapeGatherIndices[E] !== v),
|
|
640
640
|
finalShape: b,
|
|
641
641
|
isIdentity: h,
|
|
642
642
|
sliceDim0: d,
|
|
@@ -706,7 +706,7 @@ const tt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
|
706
706
|
*/
|
|
707
707
|
const nt = 1.7580993408473768, st = 1.0507009873554805;
|
|
708
708
|
export {
|
|
709
|
-
|
|
709
|
+
Me as A,
|
|
710
710
|
Ne as B,
|
|
711
711
|
We as C,
|
|
712
712
|
Re as D,
|
|
@@ -733,7 +733,7 @@ export {
|
|
|
733
733
|
P as t,
|
|
734
734
|
Ve as u,
|
|
735
735
|
Le as v,
|
|
736
|
-
|
|
736
|
+
we as w,
|
|
737
737
|
$e as x,
|
|
738
738
|
Se as y,
|
|
739
739
|
xe as z
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { h as it, m as kt, k as Je, l as Qe, R as tn, v as en, n as nn, o as sn, p as on, q as rn, r as an, s as ln, t as cn, w as un, x as hn, y as fn, z as Nt, A as gn, B as dn, C as mn } from "./gpgpu_math-
|
|
3
|
-
import { g as pn, a as In, e as wn, c as bn } from "./axis_util-
|
|
4
|
-
import { b as xn } from "./broadcast_to-
|
|
5
|
-
import { r as En } from "./reshape-
|
|
6
|
-
import { p as Fn, q as yn, r as kn, u as Nn } from "./selu_util-
|
|
1
|
+
import { i as qt, bc as _e, g as A, h as Ut, W as K, bd as et, aH as ht, be as gt, bf as at, _ as Ve, $ as st, ak as De, a$ as We, bg as $e, bh as ze, bi as Be, bj as Mt, ah as H, bk as Ot, bl as D, bm as Lt, bn as At, a8 as _t, bo as Vt, ag as Dt, bp as Wt, aT as $t, aU as zt, aW as Bt, aX as Gt, a9 as jt, bq as Zt, a_ as Ht, br as Kt, aY as Ge, am as je, bs as Xt, b2 as Ze, bt as He, p as Yt, u as Ke, t as Xe, bu as Jt, bv as wt, ap as Qt, aO as Ye, bw as te, bx as ee, by as ne, aG as pt, bz as se, bA as ot } from "./index-BAzbokzv.js";
|
|
2
|
+
import { h as it, m as kt, k as Je, l as Qe, R as tn, v as en, n as nn, o as sn, p as on, q as rn, r as an, s as ln, t as cn, w as un, x as hn, y as fn, z as Nt, A as gn, B as dn, C as mn } from "./gpgpu_math-CNslybmD.js";
|
|
3
|
+
import { g as pn, a as In, e as wn, c as bn } from "./axis_util-Bu4h7XWV.js";
|
|
4
|
+
import { b as xn } from "./broadcast_to-DARN-DBD.js";
|
|
5
|
+
import { r as En } from "./reshape-krWGKraP.js";
|
|
6
|
+
import { p as Fn, q as yn, r as kn, u as Nn } from "./selu_util-sntGesxr.js";
|
|
7
7
|
import { g as Sn } from "./_commonjsHelpers-ByX85dGu.js";
|
|
8
8
|
function vn(e, t) {
|
|
9
9
|
for (var n = 0; n < t.length; n++) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as p, q as i, E as a, F as c } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,9 +15,9 @@ import { q as p, w as i, E as a, H as c } from "./index-CamYe_M8.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function e(t, s,
|
|
19
|
-
const
|
|
20
|
-
return a.runKernel(c,
|
|
18
|
+
function e(t, s, o = 0) {
|
|
19
|
+
const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
|
|
20
|
+
return a.runKernel(c, n, r);
|
|
21
21
|
}
|
|
22
22
|
const u = /* @__PURE__ */ p({ split_: e });
|
|
23
23
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as e, w as c, i as n, E as i, P as k } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -19,7 +19,7 @@ function u(r, t = 0) {
|
|
|
19
19
|
const s = c(r, "tensors", "stack", "string_or_numeric");
|
|
20
20
|
n(s.length >= 1, () => "Pass at least one tensor to tf.stack"), s.length > 0 && n(t <= s[0].rank, () => "Axis must be <= rank of the tensor");
|
|
21
21
|
const o = s, a = { axis: t };
|
|
22
|
-
return
|
|
22
|
+
return i.runKernel(k, o, a);
|
|
23
23
|
}
|
|
24
24
|
const l = /* @__PURE__ */ e({ stack_: u });
|
|
25
25
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as e, q as u, L as c, E as l, N as m } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -15,10 +15,10 @@ import { q as e, w as u, N as c, E as l, O as m } from "./index-CamYe_M8.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function i(t,
|
|
18
|
+
function i(t, o = null, n = !1) {
|
|
19
19
|
let s = u(t, "x", "sum");
|
|
20
20
|
s.dtype === "bool" && (s = c(s, "int32"));
|
|
21
|
-
const r = { x: s }, a = { axis:
|
|
21
|
+
const r = { x: s }, a = { axis: o, keepDims: n };
|
|
22
22
|
return l.runKernel(m, r, a);
|
|
23
23
|
}
|
|
24
24
|
const f = /* @__PURE__ */ e({ sum_: i });
|
package/dist/training/AdamExt.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as r,
|
|
1
|
+
import { A as r, a as c, s as h, b as g, e as o } from "../index-BAzbokzv.js";
|
|
2
2
|
class u extends r {
|
|
3
3
|
constructor(t, e, s, a, i) {
|
|
4
4
|
super(t, e, s, a), this.config = i, this.startLearningRate = t;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { generateText as T } from "../utilities/generate.js";
|
|
2
2
|
import L from "./Trainer.js";
|
|
3
3
|
import x from "./Evaluator.js";
|
|
4
|
-
import {
|
|
4
|
+
import { d as h } from "../index-BAzbokzv.js";
|
|
5
5
|
import y from "../utilities/profile.js";
|
|
6
6
|
const D = {
|
|
7
7
|
desiredLoss: 0.01,
|
package/dist/training/Trainer.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { DatasetBuilder as g, flattenTokens as m, PAGE_FACTOR as u } from "./DatasetBuilder.js";
|
|
2
2
|
import f from "./AdamExt.js";
|
|
3
|
-
import { t as y, v as z,
|
|
4
|
-
import { n as S } from "../norm-
|
|
5
|
-
import { z as p } from "../zeros-
|
|
3
|
+
import { t as y, v as z, d as c } from "../index-BAzbokzv.js";
|
|
4
|
+
import { n as S } from "../norm-CzltS9Fz.js";
|
|
5
|
+
import { z as p } from "../zeros-YJDE7oRb.js";
|
|
6
6
|
class R {
|
|
7
7
|
constructor(t, e, s = 1e-3) {
|
|
8
8
|
this.tokenizer = e, this.model = t, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new g(e, t.config.gpt.blockSize);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { gatherSub as L } from "../ops/gatherSub.js";
|
|
2
2
|
import { scatterSub as y } from "../ops/scatterSub.js";
|
|
3
|
-
import { e as u,
|
|
4
|
-
import { s as v } from "../softmax-
|
|
5
|
-
import { m as z } from "../max-
|
|
6
|
-
import { l as k } from "../log_sum_exp-
|
|
3
|
+
import { e as u, l as i, z as S, t as f, b as G } from "../index-BAzbokzv.js";
|
|
4
|
+
import { s as v } from "../softmax-fsdtf6JC.js";
|
|
5
|
+
import { m as z } from "../max-DtlIuVeW.js";
|
|
6
|
+
import { l as k } from "../log_sum_exp-YEo2h3gb.js";
|
|
7
7
|
function F(a, s) {
|
|
8
8
|
return f(() => {
|
|
9
9
|
const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
|
|
@@ -23,7 +23,7 @@ function j() {
|
|
|
23
23
|
(s, e, m) => {
|
|
24
24
|
const o = s.shape[s.shape.length - 1], n = s.shape.slice(0, -1).reduce((d, c) => d * c, 1), t = s.reshape([n, o]), r = e.reshape([n]).cast("int32"), h = F(t, r);
|
|
25
25
|
return m([t, r]), t.dispose(), r.dispose(), { value: h, gradFunc: (d, c) => f(() => {
|
|
26
|
-
const
|
|
26
|
+
const g = c[0], b = c[1], x = v(g), C = y(x, b, d), E = S(e);
|
|
27
27
|
return [C.reshape(s.shape), E];
|
|
28
28
|
}) };
|
|
29
29
|
}
|
package/dist/utilities/dummy.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { m as f, v as S, e as w } from "../index-
|
|
2
|
-
import { z as i } from "../zeros-
|
|
1
|
+
import { m as f, v as S, e as w } from "../index-BAzbokzv.js";
|
|
2
|
+
import { z as i } from "../zeros-YJDE7oRb.js";
|
|
3
3
|
async function P(s) {
|
|
4
4
|
const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
|
|
5
5
|
await e.data(), e.dispose(), n && n.dispose(), t.dispose();
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { t as y } from "../index-
|
|
2
|
-
import { t as x } from "../tensor2d-
|
|
3
|
-
import { c as f } from "../concat-
|
|
1
|
+
import { t as y } from "../index-BAzbokzv.js";
|
|
2
|
+
import { t as x } from "../tensor2d-CPEkynbH.js";
|
|
3
|
+
import { c as f } from "../concat-5aPGqw3Z.js";
|
|
4
4
|
async function A(o, r, a, c, T) {
|
|
5
5
|
if (c <= 0)
|
|
6
6
|
throw new Error("Length must be a positive integer");
|
package/dist/utilities/save.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { j as p } from "../jszip.min-CjP2V1VV.js";
|
|
2
2
|
import b from "../tokeniser/CharTokeniser.js";
|
|
3
3
|
import { save_safetensors as l } from "./safetensors.js";
|
|
4
|
-
import { VERSION as y } from "
|
|
4
|
+
import { VERSION as y } from "../loader/load.js";
|
|
5
5
|
async function N(e, a, n) {
|
|
6
6
|
const f = n?.includeLog ?? !0, s = /* @__PURE__ */ new Map();
|
|
7
7
|
e.saveWeights(s);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import { t as p } from "../tensor-
|
|
1
|
+
import "../index-BAzbokzv.js";
|
|
2
|
+
import { t as p } from "../tensor-Xyi595sG.js";
|
|
3
3
|
function h(n) {
|
|
4
4
|
const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
|
|
5
5
|
let t = 0;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as m, q as r, Z as l, E as c, _ as i, x as p, $ as u, g as x } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,13 +15,13 @@ import { q as m, w as r, Z as l, E as c, _ as i, y as p, $ as u, i as f } from "
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function
|
|
19
|
-
const
|
|
20
|
-
l(
|
|
21
|
-
const n = { real:
|
|
18
|
+
function f(a, e) {
|
|
19
|
+
const o = r(a, "real", "complex"), s = r(e, "imag", "complex");
|
|
20
|
+
l(o.shape, s.shape, `real and imag shapes, ${o.shape} and ${s.shape}, must match in call to tf.complex().`);
|
|
21
|
+
const n = { real: o, imag: s };
|
|
22
22
|
return c.runKernel(i, n);
|
|
23
23
|
}
|
|
24
|
-
const g = /* @__PURE__ */ m({ complex_:
|
|
24
|
+
const g = /* @__PURE__ */ m({ complex_: f });
|
|
25
25
|
/**
|
|
26
26
|
* @license
|
|
27
27
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -40,11 +40,11 @@ const g = /* @__PURE__ */ m({ complex_: x });
|
|
|
40
40
|
*/
|
|
41
41
|
function t(a, e = "float32") {
|
|
42
42
|
if (p(a), e === "complex64") {
|
|
43
|
-
const
|
|
44
|
-
return g(
|
|
43
|
+
const s = t(a, "float32"), n = t(a, "float32");
|
|
44
|
+
return g(s, n);
|
|
45
45
|
}
|
|
46
|
-
const
|
|
47
|
-
return c.makeTensor(
|
|
46
|
+
const o = u(x(a), e);
|
|
47
|
+
return c.makeTensor(o, a, e);
|
|
48
48
|
}
|
|
49
49
|
export {
|
|
50
50
|
g as c,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@genai-fi/nanogpt",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "dist/main.js",
|
|
6
6
|
"types": "dist/main.d.ts",
|
|
@@ -49,10 +49,10 @@
|
|
|
49
49
|
},
|
|
50
50
|
"dependencies": {
|
|
51
51
|
"@dsnp/parquetjs": "^1.8.7",
|
|
52
|
+
"@tensorflow/tfjs": "^4.22.0",
|
|
52
53
|
"eventemitter3": "^5.0.1",
|
|
53
54
|
"jszip": "^3.10.1",
|
|
54
55
|
"papaparse": "^5.5.3",
|
|
55
|
-
"pdfjs-dist": "^5.4.149"
|
|
56
|
-
"@tensorflow/tfjs": "^4.22.0"
|
|
56
|
+
"pdfjs-dist": "^5.4.149"
|
|
57
57
|
}
|
|
58
58
|
}
|
package/dist/utilities/load.js
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import { j as v } from "../jszip.min-CjP2V1VV.js";
|
|
2
|
-
import { importWeights as F } from "./weights.js";
|
|
3
|
-
import h from "../tokeniser/CharTokeniser.js";
|
|
4
|
-
import b from "../NanoGPTModel.js";
|
|
5
|
-
import { dummyPassAsync as u } from "./dummy.js";
|
|
6
|
-
import { d as k } from "../index-CamYe_M8.js";
|
|
7
|
-
import j from "../tokeniser/bpe.js";
|
|
8
|
-
import { load_safetensors as N } from "./safetensors.js";
|
|
9
|
-
const I = 2;
|
|
10
|
-
async function O(t) {
|
|
11
|
-
const s = await fetch(t);
|
|
12
|
-
if (!s.ok)
|
|
13
|
-
throw new Error(`Failed to fetch ${t}: ${s.statusText}`);
|
|
14
|
-
return s.arrayBuffer();
|
|
15
|
-
}
|
|
16
|
-
async function S(t) {
|
|
17
|
-
const s = /* @__PURE__ */ new Map(), r = await t.file("manifest.json")?.async("string");
|
|
18
|
-
if (!r)
|
|
19
|
-
throw new Error("Manifest file not found in the zip archive");
|
|
20
|
-
const p = JSON.parse(r);
|
|
21
|
-
for (const [o, a] of Object.entries(p.weightSpec))
|
|
22
|
-
s.set(o, { spec: a, data: new Float32Array() });
|
|
23
|
-
const e = await t.file("tokeniser.json")?.async("string");
|
|
24
|
-
if (!e)
|
|
25
|
-
throw new Error("Tokeniser file not found in the zip archive");
|
|
26
|
-
const i = JSON.parse(e), c = (i.type ?? "char") === "char" ? new h(i.vocab) : new j(i.vocab, i.merges), d = /* @__PURE__ */ new Map();
|
|
27
|
-
for (const o of Object.keys(t.files))
|
|
28
|
-
if (o.endsWith(".bin")) {
|
|
29
|
-
const a = o.replace(".bin", ""), w = await t.file(o).async("arraybuffer"), g = new Float32Array(w), l = s.get(a) || { spec: [], data: new Float32Array() };
|
|
30
|
-
l.data = g, s.set(a, l);
|
|
31
|
-
const n = await F(l);
|
|
32
|
-
d.set(a, n);
|
|
33
|
-
}
|
|
34
|
-
k();
|
|
35
|
-
const f = new b(p.config);
|
|
36
|
-
await u(f), f.loadWeights(d);
|
|
37
|
-
const m = await t.file("log.json")?.async("string");
|
|
38
|
-
if (m)
|
|
39
|
-
try {
|
|
40
|
-
const o = JSON.parse(m);
|
|
41
|
-
f.log = o;
|
|
42
|
-
} catch (o) {
|
|
43
|
-
throw console.error("Error parsing training log:", o), new Error(`Failed to parse training log: ${o}`);
|
|
44
|
-
}
|
|
45
|
-
return { model: f, tokeniser: c };
|
|
46
|
-
}
|
|
47
|
-
async function R(t) {
|
|
48
|
-
const s = typeof t == "string" ? await O(t) : t, r = await v.loadAsync(s);
|
|
49
|
-
if (r.file("manifest.json"))
|
|
50
|
-
return S(r);
|
|
51
|
-
{
|
|
52
|
-
const p = await r.file("config.json")?.async("string");
|
|
53
|
-
if (!p)
|
|
54
|
-
throw new Error("Config file not found in the zip archive");
|
|
55
|
-
const e = JSON.parse(p), i = {
|
|
56
|
-
vocabSize: e.vocab_size,
|
|
57
|
-
blockSize: e.block_size,
|
|
58
|
-
nLayer: e.num_hidden_layers,
|
|
59
|
-
nHead: e.num_attention_heads,
|
|
60
|
-
nEmbed: e.hidden_size,
|
|
61
|
-
dropout: e.dropout,
|
|
62
|
-
biasInLinear: e.biasInLinear,
|
|
63
|
-
biasInLayerNorm: e.biasInLayerNorm,
|
|
64
|
-
mlpFactor: e.mlpFactor,
|
|
65
|
-
useRope: e.useRope
|
|
66
|
-
}, y = await r.file("tokeniser.json")?.async("string");
|
|
67
|
-
if (!y)
|
|
68
|
-
throw new Error("Tokeniser file not found in the zip archive");
|
|
69
|
-
const c = JSON.parse(y), f = (c.type ?? "char") === "char" ? new h(c.vocab) : new j(c.vocab, c.merges), m = await N(await r.file("model.safetensors").async("arraybuffer")), o = /* @__PURE__ */ new Map();
|
|
70
|
-
for (const [n, E] of Object.entries(m))
|
|
71
|
-
o.set(n, [E]);
|
|
72
|
-
k();
|
|
73
|
-
const a = new b(i);
|
|
74
|
-
await u(a), a.loadWeights(o);
|
|
75
|
-
const w = await r.file("meta.json")?.async("string");
|
|
76
|
-
let g;
|
|
77
|
-
if (w)
|
|
78
|
-
try {
|
|
79
|
-
const n = JSON.parse(w);
|
|
80
|
-
n.name && (g = n.name);
|
|
81
|
-
} catch (n) {
|
|
82
|
-
console.error("Error parsing meta file:", n);
|
|
83
|
-
}
|
|
84
|
-
const l = await r.file("log.json")?.async("string");
|
|
85
|
-
if (l)
|
|
86
|
-
try {
|
|
87
|
-
const n = JSON.parse(l);
|
|
88
|
-
a.log = n;
|
|
89
|
-
} catch (n) {
|
|
90
|
-
throw console.error("Error parsing training log:", n), new Error(`Failed to parse training log: ${n}`);
|
|
91
|
-
}
|
|
92
|
-
return { model: a, tokeniser: f, name: g };
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
export {
|
|
96
|
-
I as VERSION,
|
|
97
|
-
R as loadModel,
|
|
98
|
-
S as loadOldModel
|
|
99
|
-
};
|