@genai-fi/nanogpt 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +6 -6
- package/dist/NanoGPTModel.js +9 -9
- package/dist/{RealDiv-7xu-pkZN.js → RealDiv-BYViZwhN.js} +6 -6
- package/dist/{Reshape-BYC1oUku.js → Reshape-t7Kcikjk.js} +2 -2
- package/dist/TeachableLLM.d.ts +3 -0
- package/dist/TeachableLLM.js +49 -46
- package/dist/{TiedEmbedding-C1HBot-5.js → TiedEmbedding-9WeDwvjO.js} +4 -4
- package/dist/{axis_util-CCNL7jea.js → axis_util-Bu4h7XWV.js} +1 -1
- package/dist/{broadcast_to-CddAF879.js → broadcast_to-DARN-DBD.js} +2 -2
- package/dist/{concat-XOK9ANZu.js → concat-5aPGqw3Z.js} +8 -8
- package/dist/{dataset-BFFipD1c.js → dataset-pgqp-YfL.js} +5 -5
- package/dist/{dropout-xlKRoJyU.js → dropout-Bciw46HT.js} +10 -10
- package/dist/{gather-DKtUaTtA.js → gather-DjyCjmOD.js} +1 -1
- package/dist/{gpgpu_math-B_ycgZ4W.js → gpgpu_math-CNslybmD.js} +31 -31
- package/dist/{index-CamYe_M8.js → index-BAzbokzv.js} +31 -31
- package/dist/{kernel_funcs_utils-D5MS0JFg.js → kernel_funcs_utils-CUxJCg0g.js} +5 -5
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +5 -5
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/load.d.ts +13 -0
- package/dist/loader/load.js +27 -0
- package/dist/loader/loadHF.d.ts +7 -0
- package/dist/loader/loadHF.js +22 -0
- package/dist/{utilities/load.d.ts → loader/loadTransformers.d.ts} +11 -11
- package/dist/loader/loadTransformers.js +28 -0
- package/dist/loader/newZipLoad.d.ts +8 -0
- package/dist/loader/newZipLoad.js +21 -0
- package/dist/loader/oldZipLoad.d.ts +7 -0
- package/dist/loader/oldZipLoad.js +76 -0
- package/dist/{log_sum_exp-CV_5-TTu.js → log_sum_exp-YEo2h3gb.js} +16 -16
- package/dist/main.js +4 -4
- package/dist/{mat_mul-CAbRFWUj.js → mat_mul-7121rsJk.js} +4 -4
- package/dist/{max-JBBv7aUf.js → max-DtlIuVeW.js} +3 -3
- package/dist/{mulmat_packed_gpu-DW4doKL_.js → mulmat_packed_gpu-D4nKF7Je.js} +1 -1
- package/dist/{norm-B9dQTFYn.js → norm-CzltS9Fz.js} +10 -10
- package/dist/{ones-CMHNqMr6.js → ones-BBlSRqn1.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +6 -6
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +9 -9
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +1 -1
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +14 -14
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +1 -1
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +4 -4
- package/dist/ops/grads/gelu.js +1 -1
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +17 -17
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{ops-DqtYemmV.js → ops-C0sQEcPw.js} +78 -78
- package/dist/{random_width-CLMQG5Jn.js → random_width-DWzaOgrn.js} +22 -22
- package/dist/{range-DqYjKnuG.js → range-DYsrnfiy.js} +1 -1
- package/dist/{reciprocal-z49filta.js → reciprocal-CJQeasVa.js} +1 -1
- package/dist/{register_all_kernels-COt6wLD0.js → register_all_kernels-BfFCQAqs.js} +28 -28
- package/dist/{reshape-C45vIIRU.js → reshape-krWGKraP.js} +1 -1
- package/dist/{scatter_nd_util-qgtnviTE.js → scatter_nd_util-93ln7Hut.js} +3 -3
- package/dist/{selu_util-4QV_GXTB.js → selu_util-sntGesxr.js} +41 -41
- package/dist/{shared-ByfrGA97.js → shared-Ca6iDobD.js} +6 -6
- package/dist/{sin-9JBrfVaB.js → sin-D_h-qCSx.js} +1 -1
- package/dist/{softmax-DvMvui-_.js → softmax-fsdtf6JC.js} +1 -1
- package/dist/{split-DxrHrPFK.js → split-eiktj-6L.js} +4 -4
- package/dist/{stack-DgaoDmnF.js → stack-dfEEz2OY.js} +2 -2
- package/dist/{sum-BpcpxNEh.js → sum-BE_Irnim.js} +3 -3
- package/dist/{tensor-CDz5x1mP.js → tensor-Xyi595sG.js} +1 -1
- package/dist/{tensor2d-jO8JY5Jd.js → tensor2d-CPEkynbH.js} +1 -1
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +3 -3
- package/dist/training/sparseCrossEntropy.js +19 -26
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/save.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-CLVXjN7F.js → variable-wSS22xj5.js} +1 -1
- package/dist/{zeros-DUkkVccu.js → zeros-YJDE7oRb.js} +10 -10
- package/package.json +3 -3
- package/dist/ops/node/sparseCrossEntropy.d.ts +0 -1
- package/dist/ops/node/sparseCrossEntropy.js +0 -11
- package/dist/utilities/load.js +0 -99
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { e as Dr, s as Er, l as Fr, a as Pr, b as Or, m as Ar, c as _r, r as Gs, f as $l, g as kl, h as wl, i as Nl, j as vl, k as Sl, n as Rl, o as Tl, q as Dl, t as El, u as Fl, v as Pl, w as Ol, x as Al, y as _l, z as Ll, A as Vl, B as Wl, C as Ml, p as Bl, F as Gl, G as zl, H as Ul, I as Hl, J as Xl, K as Kl, L as ql } from "./random_width-
|
|
3
|
-
import { r as re } from "./reshape-
|
|
4
|
-
import { b as jl } from "./broadcast_to-
|
|
5
|
-
import { c as os } from "./concat-
|
|
6
|
-
import { c as Yl, s as Ql } from "./sin-
|
|
7
|
-
import { w as Lr, n as ss, a as zs, r as Us, o as Vr, s as as, h as Zl, g as Jl, e as eu, b as tu, f as su, m as nu, p as ou, q as au, v as ru, j as iu, d as cu, c as lu, u as uu, x as Wr, y as Mr, z as Br } from "./ops-
|
|
8
|
-
import { m as Tt } from "./mat_mul-
|
|
9
|
-
import { w as Gr, k as Ye, h as Ve, x as du, z as pu, A as hu, y as fu, B as mu, v as gu, i as ot, j as lt, d as De, e as $t, g as Rs, S as zr, a as Ur, C as Hr, D as Xr, p as xu, q as Cu, r as bu, u as Iu } from "./selu_util-
|
|
10
|
-
import { n as Kr, m as yu } from "./norm-
|
|
11
|
-
import { e as $u, l as ku, a as wu } from "./log_sum_exp-
|
|
12
|
-
import { f as Nu } from "./dropout-
|
|
13
|
-
import { g as vu } from "./gather-
|
|
14
|
-
import { c as Hs, z as qr } from "./zeros-
|
|
15
|
-
import { m as Su } from "./max-
|
|
16
|
-
import { r as Ru } from "./reciprocal-
|
|
17
|
-
import { s as Xs } from "./split-
|
|
18
|
-
import { s as Tu } from "./softmax-
|
|
19
|
-
import { s as Du } from "./stack-
|
|
20
|
-
import { s as Eu } from "./sum-
|
|
21
|
-
import { a as j, u as ue, c as ut, i as dt, s as Fu, b as Pt, d as Pe, e as kt, f as gt, g as jr, r as Ot, h as Be, j as Pu, k as Ou, l as On, z as Au, n as An, o as Yr, p as _u, q as Lu, v as Vu, w as Wu, x as Mu, y as Bu, A as Gu, B as zu, C as Uu, D as Hu, E as ft, F as Xu, G as Ku, H as Qr, I as qu, J as ju, K as Yu, L as Qu, M as Zu, N as Ju, O as ed, P as td, Q as sd, R as nd, S as od, T as ad, U as rd, V as id, W as cd, X as ld, Y as ud, Z as dd, _ as pd, $ as hd, a0 as fd, a1 as md, a2 as gd, a3 as xd, a4 as Cd, a5 as bd, a6 as Id, a7 as yd, a8 as $d, a9 as kd, aa as wd, ab as Nd, ac as vd, ad as Sd, ae as Rd, af as Td, ag as Dd, ah as Ed, ai as Fd, aj as Pd, ak as Zr, al as Od, t as Jr, am as Ad, an as _d, ao as ei, ap as Ld, aq as Vd, ar as Wd, as as Md, at as Bd, au as Gd, av as zd, aw as Ud, ax as Hd, ay as Xd, az as Kd, aA as qd, aB as jd, aC as Yd, aD as Qd, aE as Zd, aF as Jd, aG as ep, aH as tp, aI as sp, aJ as np, aK as op, aL as ap, aM as rp, aN as ip, aO as cp, aP as lp, aQ as up, aR as dp, aS as pp, aT as hp, aU as ti, aV as fp, aW as mp, aX as gp, aY as xp, aZ as Cp, a_ as bp, a$ as Ip, b0 as yp, b1 as $p, b2 as kp } from "./shared-
|
|
22
|
-
import { m as xt, D as Ts, E as Ds, F as Es, G as si, H as ni, I as oi, J as je, K as ai, L as ri, M as ii, N as ci, O as li, P as ui, Q as di, S as pi, T as hi, U as fi, V as mi, W as gi, X as wp, Y as Np, Z as vp, _ as Sp, $ as Rp, a0 as Tp, a1 as xi, a2 as Ci, a3 as bi, a4 as Ii, a5 as Qn, a6 as Dp, a7 as Ep, a8 as Fp, a9 as Pp, aa as Op, ab as Ap, ac as _n, ad as ze, u as We, ae as yi, c as $i, af as _e, ag as ki, g as wi, a as Ni, ah as _p, ai as Lp, aj as Z, ak as Mt, al as Vp, am as Zn, an as Wp, ao as Mp, ap as Bt, aq as Bp, ar as Gp, as as Ks, at as jt, au as Yt, av as zp, aw as Up, ax as Jn, ay as Hp, az as Xp, aA as Vs, aB as Kp, aC as qp, aD as jp, aE as Qt, aF as Ws, aG as Yp, f as Ce, aH as we, aI as Zt, aJ as Jt, aK as Qp, d as eo, e as to, i as vi, aL as Zp, aM as Jp, aN as eh, aO as th, aP as sh, aQ as nh, j as Si, aR as Gt, h as oh, aS as ah, aT as rh } from "./gpgpu_math-
|
|
23
|
-
import { g as ye, a as $e, b as Re, c as Ee, e as Oe, h as Fs } from "./axis_util-
|
|
1
|
+
import { o as oe, q as se, E as le, ch as ps, ci as hs, ck as en, cl as fs, cm as ms, co as gs, B as xs, cn as Cs, cp as bs, bm as To, i as Y, c$ as tn, cv as sn, aj as Do, z as nn, bo as Eo, cz as Is, cA as ys, cB as $s, ao as Nc, cE as on, l as vc, a as an, h as Ne, d0 as ks, cH as rn, cI as ws, L as It, bt as Fo, cM as Ns, d1 as cn, d2 as ln, s as Po, bx as Oo, d3 as un, cO as vs, d4 as dn, d5 as pn, ah as xe, d6 as S, a3 as Sc, a7 as Rc, T as Ao, d7 as Tc, cP as Dc, j as Ec, a4 as Fc, a5 as Pc, b as Oc, ac as L, d8 as _o, d9 as Lo, e as Me, aJ as Vt, da as Vo, aG as Wo, db as Ac, aF as Wt, af as Ke, dc as Mo, aq as hn, al as _c, g as z, bd as Ge, an as Lc, ar as fn, as as mn, D as Vc, aa as Wc, C as Bo, W as ne, ca as Go, cj as zo, bB as Uo, p as me, $ as st, bC as Ho, bD as Xo, bE as Ko, ai as ct, bF as qo, cq as jo, cr as Yo, bI as Qo, bH as Zo, aP as Jo, dd as ea, bJ as gn, ct as ta, aV as sa, J as na, bK as oa, bv as Te, c6 as aa, bL as ra, bM as ia, cu as ca, bN as la, a0 as xn, bO as Cn, b3 as ua, bP as da, u as nt, y as Mc, bQ as pa, bR as ha, bS as fa, c8 as ma, c9 as ga, de as xa, bl as Ss, bf as Ft, bi as bn, cw as Kn, df as mt, dg as Ca, cx as qn, dh as Bc, N as Gc, aR as ba, cy as Ia, bT as In, aS as ya, ax as $a, am as yt, di as ka, dj as wa, b4 as Na, c7 as va, dk as Sa, Y as Ra, G as Ta, dl as Da, bU as yn, aZ as $n, bW as kn, cD as Ea, M as zc, bX as Fa, bY as Pa, cF as Oa, cG as Aa, dm as _a, bZ as La, a2 as Va, be as Wa, S as Ma, a6 as Ba, b6 as Ga, b8 as za, b7 as Ua, b_ as Ha, cQ as Xa, b$ as Ka, P as qa, Z as ja, c0 as Ya, cJ as wn, dn as Qa, dp as Za, dq as Ja, R as er, K as Nn, b9 as tr, cK as sr, ba as nr, cL as or, c2 as ar, b5 as rr, b0 as vn, X as ir, dr as cr, aQ as lr, c3 as Sn, a1 as Rn, c4 as Tn, bV as Dn, c1 as ur, ds as dr, dt as pr, du as hr, dv as fr, dw as mr, F as gr, cN as xr, at as En, dx as Cr, dy as br, dz as Ir, bG as Fn, dA as yr, I as $r, bb as kr, av as Pn, b1 as wr, c5 as Nr, r as vr, dB as Sr, dC as jn, dD as Yn, dE as Uc, t as Hc, dF as Xc, dG as Kc, br as qc, b2 as jc, bc as Yc, bj as Qc, aO as Zc, bk as Jc, bs as el, a$ as tl, bg as sl, bh as nl, aM as Rr, bn as ol, a8 as al, ag as rl, bp as il, dH as cl, aT as ll, aU as ul, aW as dl, aX as pl, a9 as hl, bq as fl, a_ as ml, bz as gl, aY as xl, aw as Tr, bu as Cl, ap as bl, bw as Il, by as yl } from "./index-BAzbokzv.js";
|
|
2
|
+
import { e as Dr, s as Er, l as Fr, a as Pr, b as Or, m as Ar, c as _r, r as Gs, f as $l, g as kl, h as wl, i as Nl, j as vl, k as Sl, n as Rl, o as Tl, q as Dl, t as El, u as Fl, v as Pl, w as Ol, x as Al, y as _l, z as Ll, A as Vl, B as Wl, C as Ml, p as Bl, F as Gl, G as zl, H as Ul, I as Hl, J as Xl, K as Kl, L as ql } from "./random_width-DWzaOgrn.js";
|
|
3
|
+
import { r as re } from "./reshape-krWGKraP.js";
|
|
4
|
+
import { b as jl } from "./broadcast_to-DARN-DBD.js";
|
|
5
|
+
import { c as os } from "./concat-5aPGqw3Z.js";
|
|
6
|
+
import { c as Yl, s as Ql } from "./sin-D_h-qCSx.js";
|
|
7
|
+
import { w as Lr, n as ss, a as zs, r as Us, o as Vr, s as as, h as Zl, g as Jl, e as eu, b as tu, f as su, m as nu, p as ou, q as au, v as ru, j as iu, d as cu, c as lu, u as uu, x as Wr, y as Mr, z as Br } from "./ops-C0sQEcPw.js";
|
|
8
|
+
import { m as Tt } from "./mat_mul-7121rsJk.js";
|
|
9
|
+
import { w as Gr, k as Ye, h as Ve, x as du, z as pu, A as hu, y as fu, B as mu, v as gu, i as ot, j as lt, d as De, e as $t, g as Rs, S as zr, a as Ur, C as Hr, D as Xr, p as xu, q as Cu, r as bu, u as Iu } from "./selu_util-sntGesxr.js";
|
|
10
|
+
import { n as Kr, m as yu } from "./norm-CzltS9Fz.js";
|
|
11
|
+
import { e as $u, l as ku, a as wu } from "./log_sum_exp-YEo2h3gb.js";
|
|
12
|
+
import { f as Nu } from "./dropout-Bciw46HT.js";
|
|
13
|
+
import { g as vu } from "./gather-DjyCjmOD.js";
|
|
14
|
+
import { c as Hs, z as qr } from "./zeros-YJDE7oRb.js";
|
|
15
|
+
import { m as Su } from "./max-DtlIuVeW.js";
|
|
16
|
+
import { r as Ru } from "./reciprocal-CJQeasVa.js";
|
|
17
|
+
import { s as Xs } from "./split-eiktj-6L.js";
|
|
18
|
+
import { s as Tu } from "./softmax-fsdtf6JC.js";
|
|
19
|
+
import { s as Du } from "./stack-dfEEz2OY.js";
|
|
20
|
+
import { s as Eu } from "./sum-BE_Irnim.js";
|
|
21
|
+
import { a as j, u as ue, c as ut, i as dt, s as Fu, b as Pt, d as Pe, e as kt, f as gt, g as jr, r as Ot, h as Be, j as Pu, k as Ou, l as On, z as Au, n as An, o as Yr, p as _u, q as Lu, v as Vu, w as Wu, x as Mu, y as Bu, A as Gu, B as zu, C as Uu, D as Hu, E as ft, F as Xu, G as Ku, H as Qr, I as qu, J as ju, K as Yu, L as Qu, M as Zu, N as Ju, O as ed, P as td, Q as sd, R as nd, S as od, T as ad, U as rd, V as id, W as cd, X as ld, Y as ud, Z as dd, _ as pd, $ as hd, a0 as fd, a1 as md, a2 as gd, a3 as xd, a4 as Cd, a5 as bd, a6 as Id, a7 as yd, a8 as $d, a9 as kd, aa as wd, ab as Nd, ac as vd, ad as Sd, ae as Rd, af as Td, ag as Dd, ah as Ed, ai as Fd, aj as Pd, ak as Zr, al as Od, t as Jr, am as Ad, an as _d, ao as ei, ap as Ld, aq as Vd, ar as Wd, as as Md, at as Bd, au as Gd, av as zd, aw as Ud, ax as Hd, ay as Xd, az as Kd, aA as qd, aB as jd, aC as Yd, aD as Qd, aE as Zd, aF as Jd, aG as ep, aH as tp, aI as sp, aJ as np, aK as op, aL as ap, aM as rp, aN as ip, aO as cp, aP as lp, aQ as up, aR as dp, aS as pp, aT as hp, aU as ti, aV as fp, aW as mp, aX as gp, aY as xp, aZ as Cp, a_ as bp, a$ as Ip, b0 as yp, b1 as $p, b2 as kp } from "./shared-Ca6iDobD.js";
|
|
22
|
+
import { m as xt, D as Ts, E as Ds, F as Es, G as si, H as ni, I as oi, J as je, K as ai, L as ri, M as ii, N as ci, O as li, P as ui, Q as di, S as pi, T as hi, U as fi, V as mi, W as gi, X as wp, Y as Np, Z as vp, _ as Sp, $ as Rp, a0 as Tp, a1 as xi, a2 as Ci, a3 as bi, a4 as Ii, a5 as Qn, a6 as Dp, a7 as Ep, a8 as Fp, a9 as Pp, aa as Op, ab as Ap, ac as _n, ad as ze, u as We, ae as yi, c as $i, af as _e, ag as ki, g as wi, a as Ni, ah as _p, ai as Lp, aj as Z, ak as Mt, al as Vp, am as Zn, an as Wp, ao as Mp, ap as Bt, aq as Bp, ar as Gp, as as Ks, at as jt, au as Yt, av as zp, aw as Up, ax as Jn, ay as Hp, az as Xp, aA as Vs, aB as Kp, aC as qp, aD as jp, aE as Qt, aF as Ws, aG as Yp, f as Ce, aH as we, aI as Zt, aJ as Jt, aK as Qp, d as eo, e as to, i as vi, aL as Zp, aM as Jp, aN as eh, aO as th, aP as sh, aQ as nh, j as Si, aR as Gt, h as oh, aS as ah, aT as rh } from "./gpgpu_math-CNslybmD.js";
|
|
23
|
+
import { g as ye, a as $e, b as Re, c as Ee, e as Oe, h as Fs } from "./axis_util-Bu4h7XWV.js";
|
|
24
24
|
import { s as ih } from "./index-C4L8Cm77.js";
|
|
25
|
-
import { c as wt } from "./scatter_nd_util-
|
|
26
|
-
import { a as Se, c as ch, U as it, d as tt, e as qe, A as so, f as Nt, B as Ln, h as Vn, m as At, u as ae, C as Ue, b as ke, i as Le, j as Wn, k as pt, l as vt, n as lh, o as uh, p as dh, q as ph } from "./kernel_funcs_utils-
|
|
27
|
-
import { R as hh, r as H, a as fh } from "./Reshape-
|
|
28
|
-
import { M as Ri } from "./mulmat_packed_gpu-
|
|
29
|
-
import { t as Ti, s as Mn, a as zt, m as mh, r as gh, b as xh, c as Ch, d as bh } from "./RealDiv-
|
|
25
|
+
import { c as wt } from "./scatter_nd_util-93ln7Hut.js";
|
|
26
|
+
import { a as Se, c as ch, U as it, d as tt, e as qe, A as so, f as Nt, B as Ln, h as Vn, m as At, u as ae, C as Ue, b as ke, i as Le, j as Wn, k as pt, l as vt, n as lh, o as uh, p as dh, q as ph } from "./kernel_funcs_utils-CUxJCg0g.js";
|
|
27
|
+
import { R as hh, r as H, a as fh } from "./Reshape-t7Kcikjk.js";
|
|
28
|
+
import { M as Ri } from "./mulmat_packed_gpu-D4nKF7Je.js";
|
|
29
|
+
import { t as Ti, s as Mn, a as zt, m as mh, r as gh, b as xh, c as Ch, d as bh } from "./RealDiv-BYViZwhN.js";
|
|
30
30
|
/**
|
|
31
31
|
* @license
|
|
32
32
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { g as p, W as w } from "./index-BAzbokzv.js";
|
|
2
2
|
function k(o, t, r) {
|
|
3
3
|
const n = t.rank > 1 ? t.shape[t.rank - 1] : 1, e = t.rank > 1 ? t.rank - 1 : 1, h = `Must have updates.shape = indices.shape[:batchDim] + shape[sliceDim:], got updates.shape: ${r.shape}, indices.shape: ${t.shape}, shape: ${o}, sliceDim: ${n}, and batchDim: ${e}.`;
|
|
4
4
|
if (r.rank < e)
|
|
@@ -31,7 +31,7 @@ function $(o, t, r) {
|
|
|
31
31
|
}
|
|
32
32
|
k(r, t, o);
|
|
33
33
|
}
|
|
34
|
-
function
|
|
34
|
+
function g(o, t, r) {
|
|
35
35
|
const n = t.shape.length, e = n > 1 ? t.shape[n - 1] : 1, h = r.length;
|
|
36
36
|
let a = 1;
|
|
37
37
|
for (let s = e; s < h; ++s)
|
|
@@ -41,6 +41,6 @@ function m(o, t, r) {
|
|
|
41
41
|
}
|
|
42
42
|
export {
|
|
43
43
|
k as a,
|
|
44
|
-
|
|
44
|
+
g as c,
|
|
45
45
|
$ as v
|
|
46
46
|
};
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as ce } from "./reshape-
|
|
3
|
-
import { s as ae } from "./sum-
|
|
1
|
+
import { i as N, ao as H, o as _, q as S, E as L, ap as te, aq as ne, al as se, an as re, ar as ie, as as oe, at as le, a as ue, au as fe, av as O } from "./index-BAzbokzv.js";
|
|
2
|
+
import { r as ce } from "./reshape-krWGKraP.js";
|
|
3
|
+
import { s as ae } from "./sum-BE_Irnim.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -51,7 +51,7 @@ function X(e, t, n, s, r, u, i = !1, o = "channelsLast") {
|
|
|
51
51
|
[c, g, a, p] = e;
|
|
52
52
|
else
|
|
53
53
|
throw new Error(`Unknown dataFormat ${o}`);
|
|
54
|
-
const [l, h, , d] = t, [A, m] = T(n), [b,
|
|
54
|
+
const [l, h, , d] = t, [A, m] = T(n), [b, k] = T(s), f = G(l, b), E = G(h, k), { padInfo: w, outHeight: I, outWidth: x } = de(r, a, p, A, m, f, E, u, o), y = i ? d * g : d;
|
|
55
55
|
let $;
|
|
56
56
|
return o === "channelsFirst" ? $ = [c, y, I, x] : o === "channelsLast" && ($ = [c, I, x, y]), {
|
|
57
57
|
batchSize: c,
|
|
@@ -62,15 +62,15 @@ function X(e, t, n, s, r, u, i = !1, o = "channelsLast") {
|
|
|
62
62
|
outHeight: I,
|
|
63
63
|
outWidth: x,
|
|
64
64
|
outChannels: y,
|
|
65
|
-
padInfo:
|
|
65
|
+
padInfo: w,
|
|
66
66
|
strideHeight: A,
|
|
67
67
|
strideWidth: m,
|
|
68
68
|
filterHeight: l,
|
|
69
69
|
filterWidth: h,
|
|
70
70
|
effectiveFilterHeight: f,
|
|
71
|
-
effectiveFilterWidth:
|
|
71
|
+
effectiveFilterWidth: E,
|
|
72
72
|
dilationHeight: b,
|
|
73
|
-
dilationWidth:
|
|
73
|
+
dilationWidth: k,
|
|
74
74
|
inShape: e,
|
|
75
75
|
outShape: $,
|
|
76
76
|
filterShape: t
|
|
@@ -84,22 +84,22 @@ function he(e, t, n, s, r, u = !1, i = "channelsLast", o) {
|
|
|
84
84
|
[c, l, a, p, g] = e;
|
|
85
85
|
else
|
|
86
86
|
throw new Error(`Unknown dataFormat ${i}`);
|
|
87
|
-
const [h, d, A, , m] = t, [b,
|
|
87
|
+
const [h, d, A, , m] = t, [b, k, f] = W(n), [E, w, I] = W(s), x = G(h, E), y = G(d, w), $ = G(A, I), { padInfo: C, outDepth: M, outHeight: D, outWidth: F } = me(r, a, p, g, b, k, f, x, y, $, o), V = u ? m * l : m;
|
|
88
88
|
let U;
|
|
89
|
-
return i === "channelsFirst" ? U = [c, V,
|
|
89
|
+
return i === "channelsFirst" ? U = [c, V, M, D, F] : i === "channelsLast" && (U = [c, M, D, F, V]), {
|
|
90
90
|
batchSize: c,
|
|
91
91
|
dataFormat: i,
|
|
92
92
|
inDepth: a,
|
|
93
93
|
inHeight: p,
|
|
94
94
|
inWidth: g,
|
|
95
95
|
inChannels: l,
|
|
96
|
-
outDepth:
|
|
96
|
+
outDepth: M,
|
|
97
97
|
outHeight: D,
|
|
98
98
|
outWidth: F,
|
|
99
99
|
outChannels: V,
|
|
100
100
|
padInfo: C,
|
|
101
101
|
strideDepth: b,
|
|
102
|
-
strideHeight:
|
|
102
|
+
strideHeight: k,
|
|
103
103
|
strideWidth: f,
|
|
104
104
|
filterDepth: h,
|
|
105
105
|
filterHeight: d,
|
|
@@ -107,8 +107,8 @@ function he(e, t, n, s, r, u = !1, i = "channelsLast", o) {
|
|
|
107
107
|
effectiveFilterDepth: x,
|
|
108
108
|
effectiveFilterHeight: y,
|
|
109
109
|
effectiveFilterWidth: $,
|
|
110
|
-
dilationDepth:
|
|
111
|
-
dilationHeight:
|
|
110
|
+
dilationDepth: E,
|
|
111
|
+
dilationHeight: w,
|
|
112
112
|
dilationWidth: I,
|
|
113
113
|
inShape: e,
|
|
114
114
|
outShape: U,
|
|
@@ -175,8 +175,8 @@ function me(e, t, n, s, r, u, i, o, c, a, p) {
|
|
|
175
175
|
l = m[0], h = m[1], d = m[2];
|
|
176
176
|
} else if (e === "same") {
|
|
177
177
|
l = Math.ceil(t / r), h = Math.ceil(n / u), d = Math.ceil(s / i);
|
|
178
|
-
const A = (l - 1) * r + o - t, m = (h - 1) * u + c - n, b = (d - 1) * i + a - s,
|
|
179
|
-
g = { top:
|
|
178
|
+
const A = (l - 1) * r + o - t, m = (h - 1) * u + c - n, b = (d - 1) * i + a - s, k = Math.floor(A / 2), f = A - k, E = Math.floor(m / 2), w = m - E, I = Math.floor(b / 2), x = b - I;
|
|
179
|
+
g = { top: E, bottom: w, left: I, right: x, front: k, back: f, type: "SAME" };
|
|
180
180
|
} else
|
|
181
181
|
throw Error(`Unknown padding parameter: ${e}`);
|
|
182
182
|
return { padInfo: g, outDepth: l, outHeight: h, outWidth: d };
|
|
@@ -244,11 +244,11 @@ function Qe(e, t, n) {
|
|
|
244
244
|
* limitations under the License.
|
|
245
245
|
* =============================================================================
|
|
246
246
|
*/
|
|
247
|
-
function
|
|
247
|
+
function Ee(e) {
|
|
248
248
|
const n = { x: S(e, "x", "sigmoid", "float32") };
|
|
249
249
|
return L.runKernel(te, n);
|
|
250
250
|
}
|
|
251
|
-
const
|
|
251
|
+
const we = /* @__PURE__ */ _({ sigmoid_: Ee });
|
|
252
252
|
/**
|
|
253
253
|
* @license
|
|
254
254
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -311,7 +311,7 @@ function ye(e, t) {
|
|
|
311
311
|
const n = S(e, "x", "prelu"), s = S(t, "alpha", "prelu"), r = { x: n, alpha: s };
|
|
312
312
|
return L.runKernel(re, r);
|
|
313
313
|
}
|
|
314
|
-
const
|
|
314
|
+
const Me = /* @__PURE__ */ _({ prelu_: ye });
|
|
315
315
|
/**
|
|
316
316
|
* @license
|
|
317
317
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -328,11 +328,11 @@ const ke = /* @__PURE__ */ _({ prelu_: ye });
|
|
|
328
328
|
* limitations under the License.
|
|
329
329
|
* =============================================================================
|
|
330
330
|
*/
|
|
331
|
-
function
|
|
331
|
+
function ke(e) {
|
|
332
332
|
const n = { x: S(e, "x", "relu") };
|
|
333
333
|
return L.runKernel(ie, n);
|
|
334
334
|
}
|
|
335
|
-
const Se = /* @__PURE__ */ _({ relu_:
|
|
335
|
+
const Se = /* @__PURE__ */ _({ relu_: ke });
|
|
336
336
|
/**
|
|
337
337
|
* @license
|
|
338
338
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -413,11 +413,11 @@ function ze(e, t, n, s) {
|
|
|
413
413
|
if (t === "relu6")
|
|
414
414
|
return Ne(e);
|
|
415
415
|
if (t === "prelu")
|
|
416
|
-
return
|
|
416
|
+
return Me(e, n);
|
|
417
417
|
if (t === "leakyrelu")
|
|
418
418
|
return xe(e, s);
|
|
419
419
|
if (t === "sigmoid")
|
|
420
|
-
return
|
|
420
|
+
return we(e);
|
|
421
421
|
throw new Error(`Unknown fused activation ${t}.`);
|
|
422
422
|
}
|
|
423
423
|
const et = (e, t) => !(e > 0) || t === "linear";
|
|
@@ -600,43 +600,43 @@ function We(e, t, n, s, r, u, i, o, c) {
|
|
|
600
600
|
for (let f = 0; f < e.length; ++f) {
|
|
601
601
|
if (l.strides[f] === 0)
|
|
602
602
|
throw Error(`strides[${f}] must be non-zero`);
|
|
603
|
-
const
|
|
604
|
-
if (
|
|
605
|
-
m.push(
|
|
603
|
+
const E = !!(l.shrinkAxisMask & 1 << f), w = e[f];
|
|
604
|
+
if (w === -1) {
|
|
605
|
+
m.push(E ? 1 : -1);
|
|
606
606
|
continue;
|
|
607
607
|
}
|
|
608
608
|
const I = [l.beginMask & 1 << f, l.endMask & 1 << f], x = [
|
|
609
609
|
l.strides[f] > 0 ? 0 : -1,
|
|
610
|
-
l.strides[f] > 0 ?
|
|
610
|
+
l.strides[f] > 0 ? w : w - 1
|
|
611
611
|
];
|
|
612
|
-
if (
|
|
612
|
+
if (E && l.strides[f] <= 0)
|
|
613
613
|
throw Error("only stride 1 allowed on non-range indexing.");
|
|
614
614
|
A = A && l.strides[f] === 1;
|
|
615
615
|
const y = !!(l.beginMask & 1 << f && l.endMask & 1 << f);
|
|
616
616
|
if (l.beginValid && l.endValid) {
|
|
617
|
-
if (
|
|
618
|
-
const D = l.begin[f] < 0 ?
|
|
619
|
-
if (l.begin[f] = D, l.end[f] = l.begin[f] + 1, D < 0 || D >=
|
|
617
|
+
if (E) {
|
|
618
|
+
const D = l.begin[f] < 0 ? w + l.begin[f] : l.begin[f];
|
|
619
|
+
if (l.begin[f] = D, l.end[f] = l.begin[f] + 1, D < 0 || D >= w)
|
|
620
620
|
throw Error(`slice index ${l.begin[f]} of dimension ${f} out of bounds.`);
|
|
621
621
|
} else
|
|
622
|
-
l.begin[f] = K(l.begin[f], 0, l.strides[f],
|
|
623
|
-
const
|
|
624
|
-
h = h &&
|
|
622
|
+
l.begin[f] = K(l.begin[f], 0, l.strides[f], w, I, x), l.end[f] = K(l.end[f], 1, l.strides[f], w, I, x);
|
|
623
|
+
const M = l.strides[f] === 1 && l.begin[f] === 0 && l.end[f] === w;
|
|
624
|
+
h = h && M, d = d && (f === 0 && l.strides[f] === 1 || M);
|
|
625
625
|
} else
|
|
626
626
|
h = h && l.strides[f] === 1 && y, d = d && (f === 0 && l.strides[f] === 1 || y);
|
|
627
627
|
let $, C = !1;
|
|
628
|
-
if (l.beginValid && l.endValid ? ($ = l.end[f] - l.begin[f], C = !0) :
|
|
629
|
-
let
|
|
630
|
-
$ === 0 || $ < 0 != l.strides[f] < 0 ?
|
|
628
|
+
if (l.beginValid && l.endValid ? ($ = l.end[f] - l.begin[f], C = !0) : E ? ($ = 1, C = !0) : y && w >= 0 && (l.strides[f] < 0 ? $ = -w : $ = w, C = !0), C) {
|
|
629
|
+
let M;
|
|
630
|
+
$ === 0 || $ < 0 != l.strides[f] < 0 ? M = 0 : M = Math.trunc($ / l.strides[f]) + ($ % l.strides[f] !== 0 ? 1 : 0), m.push(M);
|
|
631
631
|
} else
|
|
632
632
|
m.push(-1);
|
|
633
633
|
}
|
|
634
634
|
for (let f = 0; f < l.finalShapeGatherIndices.length; ++f) {
|
|
635
|
-
const
|
|
636
|
-
|
|
635
|
+
const E = l.finalShapeGatherIndices[f];
|
|
636
|
+
E >= 0 ? b.push(m[E]) : E === v && b.push(1);
|
|
637
637
|
}
|
|
638
638
|
return {
|
|
639
|
-
finalShapeSparse: b.filter((f,
|
|
639
|
+
finalShapeSparse: b.filter((f, E) => l.finalShapeGatherIndices[E] !== v),
|
|
640
640
|
finalShape: b,
|
|
641
641
|
isIdentity: h,
|
|
642
642
|
sliceDim0: d,
|
|
@@ -706,7 +706,7 @@ const tt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
|
706
706
|
*/
|
|
707
707
|
const nt = 1.7580993408473768, st = 1.0507009873554805;
|
|
708
708
|
export {
|
|
709
|
-
|
|
709
|
+
Me as A,
|
|
710
710
|
Ne as B,
|
|
711
711
|
We as C,
|
|
712
712
|
Re as D,
|
|
@@ -733,7 +733,7 @@ export {
|
|
|
733
733
|
P as t,
|
|
734
734
|
Ve as u,
|
|
735
735
|
Le as v,
|
|
736
|
-
|
|
736
|
+
we as w,
|
|
737
737
|
$e as x,
|
|
738
738
|
Se as y,
|
|
739
739
|
xe as z
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { h as it, m as kt, k as Je, l as Qe, R as tn, v as en, n as nn, o as sn, p as on, q as rn, r as an, s as ln, t as cn, w as un, x as hn, y as fn, z as Nt, A as gn, B as dn, C as mn } from "./gpgpu_math-
|
|
3
|
-
import { g as pn, a as In, e as wn, c as bn } from "./axis_util-
|
|
4
|
-
import { b as xn } from "./broadcast_to-
|
|
5
|
-
import { r as En } from "./reshape-
|
|
6
|
-
import { p as Fn, q as yn, r as kn, u as Nn } from "./selu_util-
|
|
1
|
+
import { i as qt, bc as _e, g as A, h as Ut, W as K, bd as et, aH as ht, be as gt, bf as at, _ as Ve, $ as st, ak as De, a$ as We, bg as $e, bh as ze, bi as Be, bj as Mt, ah as H, bk as Ot, bl as D, bm as Lt, bn as At, a8 as _t, bo as Vt, ag as Dt, bp as Wt, aT as $t, aU as zt, aW as Bt, aX as Gt, a9 as jt, bq as Zt, a_ as Ht, br as Kt, aY as Ge, am as je, bs as Xt, b2 as Ze, bt as He, p as Yt, u as Ke, t as Xe, bu as Jt, bv as wt, ap as Qt, aO as Ye, bw as te, bx as ee, by as ne, aG as pt, bz as se, bA as ot } from "./index-BAzbokzv.js";
|
|
2
|
+
import { h as it, m as kt, k as Je, l as Qe, R as tn, v as en, n as nn, o as sn, p as on, q as rn, r as an, s as ln, t as cn, w as un, x as hn, y as fn, z as Nt, A as gn, B as dn, C as mn } from "./gpgpu_math-CNslybmD.js";
|
|
3
|
+
import { g as pn, a as In, e as wn, c as bn } from "./axis_util-Bu4h7XWV.js";
|
|
4
|
+
import { b as xn } from "./broadcast_to-DARN-DBD.js";
|
|
5
|
+
import { r as En } from "./reshape-krWGKraP.js";
|
|
6
|
+
import { p as Fn, q as yn, r as kn, u as Nn } from "./selu_util-sntGesxr.js";
|
|
7
7
|
import { g as Sn } from "./_commonjsHelpers-ByX85dGu.js";
|
|
8
8
|
function vn(e, t) {
|
|
9
9
|
for (var n = 0; n < t.length; n++) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as p, q as i, E as a, F as c } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,9 +15,9 @@ import { q as p, w as i, E as a, H as c } from "./index-CamYe_M8.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function e(t, s,
|
|
19
|
-
const
|
|
20
|
-
return a.runKernel(c,
|
|
18
|
+
function e(t, s, o = 0) {
|
|
19
|
+
const n = { x: i(t, "x", "split") }, r = { numOrSizeSplits: s, axis: o };
|
|
20
|
+
return a.runKernel(c, n, r);
|
|
21
21
|
}
|
|
22
22
|
const u = /* @__PURE__ */ p({ split_: e });
|
|
23
23
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as e, w as c, i as n, E as i, P as k } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -19,7 +19,7 @@ function u(r, t = 0) {
|
|
|
19
19
|
const s = c(r, "tensors", "stack", "string_or_numeric");
|
|
20
20
|
n(s.length >= 1, () => "Pass at least one tensor to tf.stack"), s.length > 0 && n(t <= s[0].rank, () => "Axis must be <= rank of the tensor");
|
|
21
21
|
const o = s, a = { axis: t };
|
|
22
|
-
return
|
|
22
|
+
return i.runKernel(k, o, a);
|
|
23
23
|
}
|
|
24
24
|
const l = /* @__PURE__ */ e({ stack_: u });
|
|
25
25
|
export {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { o as e, q as u, L as c, E as l, N as m } from "./index-BAzbokzv.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -15,10 +15,10 @@ import { q as e, w as u, N as c, E as l, O as m } from "./index-CamYe_M8.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function i(t,
|
|
18
|
+
function i(t, o = null, n = !1) {
|
|
19
19
|
let s = u(t, "x", "sum");
|
|
20
20
|
s.dtype === "bool" && (s = c(s, "int32"));
|
|
21
|
-
const r = { x: s }, a = { axis:
|
|
21
|
+
const r = { x: s }, a = { axis: o, keepDims: n };
|
|
22
22
|
return l.runKernel(m, r, a);
|
|
23
23
|
}
|
|
24
24
|
const f = /* @__PURE__ */ e({ sum_: i });
|
package/dist/training/AdamExt.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as r,
|
|
1
|
+
import { A as r, a as c, s as h, b as g, e as o } from "../index-BAzbokzv.js";
|
|
2
2
|
class u extends r {
|
|
3
3
|
constructor(t, e, s, a, i) {
|
|
4
4
|
super(t, e, s, a), this.config = i, this.startLearningRate = t;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { generateText as T } from "../utilities/generate.js";
|
|
2
2
|
import L from "./Trainer.js";
|
|
3
3
|
import x from "./Evaluator.js";
|
|
4
|
-
import {
|
|
4
|
+
import { d as h } from "../index-BAzbokzv.js";
|
|
5
5
|
import y from "../utilities/profile.js";
|
|
6
6
|
const D = {
|
|
7
7
|
desiredLoss: 0.01,
|
package/dist/training/Trainer.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { DatasetBuilder as g, flattenTokens as m, PAGE_FACTOR as u } from "./DatasetBuilder.js";
|
|
2
2
|
import f from "./AdamExt.js";
|
|
3
|
-
import { t as y, v as z,
|
|
4
|
-
import { n as S } from "../norm-
|
|
5
|
-
import { z as p } from "../zeros-
|
|
3
|
+
import { t as y, v as z, d as c } from "../index-BAzbokzv.js";
|
|
4
|
+
import { n as S } from "../norm-CzltS9Fz.js";
|
|
5
|
+
import { z as p } from "../zeros-YJDE7oRb.js";
|
|
6
6
|
class R {
|
|
7
7
|
constructor(t, e, s = 1e-3) {
|
|
8
8
|
this.tokenizer = e, this.model = t, this.learningRate = s, this.resetOptimizer(), this.datasetBuilder = new g(e, t.config.gpt.blockSize);
|
|
@@ -1,35 +1,28 @@
|
|
|
1
|
-
import { gatherSub as
|
|
2
|
-
import { scatterSub as
|
|
3
|
-
import {
|
|
4
|
-
import { s as
|
|
5
|
-
import { m as z } from "../max-
|
|
6
|
-
import { l as
|
|
7
|
-
function
|
|
8
|
-
return
|
|
9
|
-
const
|
|
10
|
-
return
|
|
1
|
+
import { gatherSub as x } from "../ops/gatherSub.js";
|
|
2
|
+
import { scatterSub as L } from "../ops/scatterSub.js";
|
|
3
|
+
import { l as C, t as u, z as E, b as G } from "../index-BAzbokzv.js";
|
|
4
|
+
import { s as y } from "../softmax-fsdtf6JC.js";
|
|
5
|
+
import { m as z } from "../max-DtlIuVeW.js";
|
|
6
|
+
import { l as v } from "../log_sum_exp-YEo2h3gb.js";
|
|
7
|
+
function k(t, s) {
|
|
8
|
+
return u(() => {
|
|
9
|
+
const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = G(h, r), m = v(a, -1);
|
|
10
|
+
return x(m, p, a);
|
|
11
11
|
});
|
|
12
12
|
}
|
|
13
|
-
function
|
|
14
|
-
return
|
|
15
|
-
const o = s.shape.length > 2 ? s.reshape([-1, s.shape[s.shape.length - 1]]) : s, p = e.shape.length > 1 ? e.reshape([-1]).cast("int32") : e.cast("int32"), [n, t] = u().runKernel(
|
|
16
|
-
"NativeSparseSoftmaxCrossEntropy",
|
|
17
|
-
{ logits: o, labels: p },
|
|
18
|
-
{}
|
|
19
|
-
);
|
|
20
|
-
return m([t.reshape(s.shape)]), { value: n, gradFunc: (r, h) => [h[0], S(e)] };
|
|
21
|
-
}) : i(
|
|
13
|
+
function A() {
|
|
14
|
+
return C(
|
|
22
15
|
// @ts-expect-error Invalid params
|
|
23
|
-
(s,
|
|
24
|
-
const
|
|
25
|
-
return
|
|
26
|
-
const
|
|
27
|
-
return [
|
|
16
|
+
(s, n, d) => {
|
|
17
|
+
const c = s.shape[s.shape.length - 1], p = s.shape.slice(0, -1).reduce((o, e) => o * e, 1), r = s.reshape([p, c]), a = n.reshape([p]).cast("int32"), m = k(r, a);
|
|
18
|
+
return d([r, a]), r.dispose(), a.dispose(), { value: m, gradFunc: (o, e) => u(() => {
|
|
19
|
+
const S = e[0], f = e[1], b = y(S), l = L(b, f, o), g = E(n);
|
|
20
|
+
return [l.reshape(s.shape), g];
|
|
28
21
|
}) };
|
|
29
22
|
}
|
|
30
23
|
);
|
|
31
24
|
}
|
|
32
25
|
export {
|
|
33
|
-
|
|
34
|
-
|
|
26
|
+
A as createSoftmaxCrossEntropyWithGrad,
|
|
27
|
+
k as sparseSoftmaxCrossEntropy
|
|
35
28
|
};
|
package/dist/utilities/dummy.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { m as f, v as S, e as w } from "../index-
|
|
2
|
-
import { z as i } from "../zeros-
|
|
1
|
+
import { m as f, v as S, e as w } from "../index-BAzbokzv.js";
|
|
2
|
+
import { z as i } from "../zeros-YJDE7oRb.js";
|
|
3
3
|
async function P(s) {
|
|
4
4
|
const t = i([1, s.config.gpt.blockSize], "int32"), [e, n] = s.forward({ training: !1 }, t);
|
|
5
5
|
await e.data(), e.dispose(), n && n.dispose(), t.dispose();
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { t as y } from "../index-
|
|
2
|
-
import { t as x } from "../tensor2d-
|
|
3
|
-
import { c as f } from "../concat-
|
|
1
|
+
import { t as y } from "../index-BAzbokzv.js";
|
|
2
|
+
import { t as x } from "../tensor2d-CPEkynbH.js";
|
|
3
|
+
import { c as f } from "../concat-5aPGqw3Z.js";
|
|
4
4
|
async function A(o, r, a, c, T) {
|
|
5
5
|
if (c <= 0)
|
|
6
6
|
throw new Error("Length must be a positive integer");
|
package/dist/utilities/save.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { j as p } from "../jszip.min-CjP2V1VV.js";
|
|
2
2
|
import b from "../tokeniser/CharTokeniser.js";
|
|
3
3
|
import { save_safetensors as l } from "./safetensors.js";
|
|
4
|
-
import { VERSION as y } from "
|
|
4
|
+
import { VERSION as y } from "../loader/load.js";
|
|
5
5
|
async function N(e, a, n) {
|
|
6
6
|
const f = n?.includeLog ?? !0, s = /* @__PURE__ */ new Map();
|
|
7
7
|
e.saveWeights(s);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import { t as p } from "../tensor-
|
|
1
|
+
import "../index-BAzbokzv.js";
|
|
2
|
+
import { t as p } from "../tensor-Xyi595sG.js";
|
|
3
3
|
function h(n) {
|
|
4
4
|
const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
|
|
5
5
|
let t = 0;
|