@genai-fi/nanogpt 0.15.1 → 0.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +44 -42
- package/dist/{RealDiv-BVigXOzk.js → RealDiv-Blil1IAl.js} +11 -11
- package/dist/{Reshape-rTOwpsUF.js → Reshape-CZVlscuS.js} +2 -2
- package/dist/{Reshape-Bt5FRw1J.js → Reshape-rNDnWBJS.js} +1 -1
- package/dist/TeachableLLM.d.ts +5 -0
- package/dist/TeachableLLM.js +29 -18
- package/dist/{axis_util-TthehkGs.js → axis_util-BTeTGnZF.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/backend_util-DGV1tIji.js +425 -0
- package/dist/{backend_webgpu-D0Qxzbf-.js → backend_webgpu-WReHKYZJ.js} +10 -10
- package/dist/{broadcast_to-B8hSZdb5.js → broadcast_to-BHFjzMSF.js} +2 -2
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +2 -2
- package/dist/checks/normRMS.js +4 -4
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +2 -2
- package/dist/checks/qkv.js +2 -2
- package/dist/checks/rope.js +2 -2
- package/dist/{clip_by_value-X6lmCZs0.js → clip_by_value-q6njgxYg.js} +1 -1
- package/dist/{complex-B3Lwk4IM.js → complex-W1ugWfNb.js} +1 -1
- package/dist/{concat-bX2wJt_F.js → concat-ByAvdM_j.js} +1 -1
- package/dist/{concat_util-DW7FiHui.js → concat_util-Bk6ay4Ob.js} +1 -1
- package/dist/data/parquet.js +1 -1
- package/dist/{dataset-DPU3hN0T.js → dataset-CtujTjY_.js} +3 -3
- package/dist/{dropout_util-8MaBQ4OF.js → dropout_util-D1mAkyan.js} +5 -5
- package/dist/{expand_dims-DK8hSYQw.js → expand_dims-B_9x_xWm.js} +1 -1
- package/dist/{exports_initializers-D04twsp1.js → exports_initializers-Br2S2v4K.js} +1 -1
- package/dist/{floor-CIzB2pwc.js → floor-DJ37xPdi.js} +1 -1
- package/dist/{gather-DzuXM3Xs.js → gather-DvRBFTfQ.js} +1 -1
- package/dist/{gelu-CX4aUwca.js → gelu-Bq6zXPEw.js} +1 -1
- package/dist/{gpgpu_math-xeTvCd6P.js → gpgpu_math-C0Hfh1As.js} +7 -7
- package/dist/{index-BF0-PdRD.js → index-BSjeovee.js} +105 -104
- package/dist/{index-uoQNhva6.js → index-BU-PZJ0l.js} +316 -320
- package/dist/{index-CSl8jhsR.js → index-CynxJ6AT.js} +83 -83
- package/dist/{index-lJWNfe6P.js → index-SkD4n5bj.js} +1 -1
- package/dist/{kernel_funcs_utils-D1Nso99H.js → kernel_funcs_utils-BNsajF26.js} +3 -3
- package/dist/layers/BaseLayer.js +1 -1
- package/dist/layers/CausalSelfAttention.js +7 -7
- package/dist/layers/LoRA.js +5 -5
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/layers/WeightStore.js +3 -3
- package/dist/loader/loadTransformers.js +4 -4
- package/dist/loader/oldZipLoad.js +22 -21
- package/dist/loader/save.js +6 -5
- package/dist/loader/types.d.ts +2 -0
- package/dist/main.js +9 -9
- package/dist/{matMul16-C9Aeua4l.js → matMul16-qjE_mKX8.js} +6 -6
- package/dist/{matMulGelu-De0BMbE7.js → matMulGelu-BM8qerU0.js} +4 -4
- package/dist/{mat_mul-3Czif3jo.js → mat_mul-dDBRouCf.js} +1 -1
- package/dist/{mod-DPeZbGKJ.js → mod-COvikySw.js} +1 -1
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/NanoGPTV2.js +2 -2
- package/dist/models/model.d.ts +1 -1
- package/dist/models/model.js +14 -13
- package/dist/{not_equal-DG_EBN1g.js → not_equal-C7_nnOW4.js} +5 -5
- package/dist/{ones-B6BIjz6m.js → ones-B0Fr32MP.js} +3 -3
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/concat16.js +2 -2
- package/dist/ops/cpu/adamAdjust.js +1 -1
- package/dist/ops/cpu/adamMoments.js +2 -2
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +6 -6
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +3 -3
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMul16.js +2 -2
- package/dist/ops/cpu/matMulGelu.js +3 -3
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +5 -5
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/dropout.js +3 -3
- package/dist/ops/dropout16.d.ts +2 -0
- package/dist/ops/dropout16.js +25 -0
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/globalNorm.js +2 -2
- package/dist/ops/grads/add16.js +1 -1
- package/dist/ops/grads/attentionMask.js +2 -2
- package/dist/ops/grads/dropout16.d.ts +1 -0
- package/dist/ops/grads/dropout16.js +2 -0
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMul16.js +3 -3
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/mul16.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/pack16.js +3 -3
- package/dist/ops/grads/qkv.js +3 -3
- package/dist/ops/grads/rope.js +2 -2
- package/dist/ops/grads/softmax16.js +1 -1
- package/dist/ops/grads/unpack16.js +2 -2
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +2 -2
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/pack16.js +2 -2
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/reshape16.js +2 -2
- package/dist/ops/rope.js +2 -2
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +1 -1
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +2 -2
- package/dist/ops/transpose16.js +3 -3
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/dropout16.d.ts +1 -0
- package/dist/ops/webgl/dropout16.js +11 -0
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMul16.js +5 -5
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +2 -2
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/add16.js +1 -1
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +2 -2
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/clipScale.js +1 -1
- package/dist/ops/webgpu/concat16.js +5 -5
- package/dist/ops/webgpu/dropout16.d.ts +1 -0
- package/dist/ops/webgpu/dropout16.js +51 -0
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +3 -3
- package/dist/ops/webgpu/index.js +1 -0
- package/dist/ops/webgpu/matMul16.js +5 -5
- package/dist/ops/webgpu/matMul16_program.js +2 -2
- package/dist/ops/webgpu/mul16.js +1 -1
- package/dist/ops/webgpu/norm2.js +1 -1
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +4 -4
- package/dist/ops/webgpu/pack16.js +1 -1
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +2 -2
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/slice16.js +4 -4
- package/dist/ops/webgpu/softmax16.js +2 -2
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +1 -1
- package/dist/ops/webgpu/sub16.js +1 -1
- package/dist/ops/webgpu/sum16.js +3 -3
- package/dist/ops/webgpu/transpose16.js +2 -2
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
- package/dist/ops/webgpu/unpack16.js +3 -3
- package/dist/ops/webgpu/utils/binary_op.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +5 -5
- package/dist/{ops-Bn8FhWHY.js → ops-DJqZQEpR.js} +86 -86
- package/dist/{pack16-EZQxclg7.js → pack16-CQUYvE0Y.js} +2 -2
- package/dist/{parquet-BNNuofNs.js → parquet-CXSmguvD.js} +2065 -2065
- package/dist/patches/webgpu_backend.js +7 -7
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +2 -2
- package/dist/{random_normal-BfFeLAg5.js → random_normal-m_v8bCWs.js} +1 -1
- package/dist/{random_width-WzwiRvPZ.js → random_width-DvNaD5W3.js} +131 -131
- package/dist/{range-BuNQxFZs.js → range-HI-DFEwl.js} +1 -1
- package/dist/{readers-DE9M89q1.js → readers-EC5q8HtM.js} +2 -2
- package/dist/{relu-DJDnFXBA.js → relu-CdxfMLya.js} +1 -1
- package/dist/{reshape-pwjcwOms.js → reshape-Dxch7IXb.js} +1 -1
- package/dist/{resize_nearest_neighbor-WhYlfAoZ.js → resize_nearest_neighbor-B8XxmKCX.js} +34 -34
- package/dist/{rope-Cks0wXgt.js → rope-DaRtCQFa.js} +1 -1
- package/dist/{scatter_nd_util-BlJWgCmU.js → scatter_nd_util-DJ8GpL1I.js} +1 -1
- package/dist/segment_util-CXELxApp.js +43 -0
- package/dist/{selu_util-DAcisF1E.js → selu_util-CPAAI9PC.js} +5 -5
- package/dist/{shared-dHG6fzNB.js → shared-DORJR6eU.js} +45 -45
- package/dist/{shared-Io-D1asH.js → shared-DVZEsT-0.js} +1 -1
- package/dist/{slice-AX2p6pAj.js → slice-DR1nQEmX.js} +1 -1
- package/dist/slice_util-DDbd6eyd.js +153 -0
- package/dist/{softmax-BYwgW-oF.js → softmax-CRzm-wNL.js} +1 -1
- package/dist/{split-Cctn4mjQ.js → split-B3dYPTIQ.js} +1 -1
- package/dist/{squeeze-CuZyQBC-.js → squeeze-D8r4RiDQ.js} +2 -2
- package/dist/{stack-B_KoxuTQ.js → stack-CVsp4-gn.js} +1 -1
- package/dist/{step-CzAily5b.js → step-A_sBP7En.js} +1 -1
- package/dist/{sum-DZtIEO1I.js → sum-DfuUvBem.js} +1 -1
- package/dist/{tensor-DO_P4mBQ.js → tensor-Gz_zmSjb.js} +1 -1
- package/dist/{tensor1d-C-kSHWyt.js → tensor1d-D218DD0-.js} +1 -1
- package/dist/{tensor2d-B-WkUbVR.js → tensor2d-BieJDjvb.js} +1 -1
- package/dist/{tensor4d-7IMAE-Er.js → tensor4d-Bptdqjb1.js} +1 -1
- package/dist/{tfjs_backend-BuIjDUKa.js → tfjs_backend-DmRY5T6A.js} +29 -29
- package/dist/{tile-Ci5sjpxd.js → tile-ksYNlaAo.js} +1 -1
- package/dist/training/AdamW.js +2 -2
- package/dist/training/BasicTrainer.js +2 -2
- package/dist/training/DatasetBuilder.js +3 -3
- package/dist/training/Evaluator.js +2 -2
- package/dist/training/SFTDatasetBuilder.js +3 -3
- package/dist/training/orthoGrad.js +1 -1
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/training/validation.js +13 -12
- package/dist/{transpose-Bx8ZPHPT.js → transpose-C4a4Zcdu.js} +2 -2
- package/dist/{unsorted_segment_sum-CxaDDiSg.js → unsorted_segment_sum-Cx23B-vz.js} +14 -14
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.js +1 -1
- package/dist/utilities/parameters.d.ts +1 -0
- package/dist/utilities/parameters.js +19 -13
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-Cw7ATbHx.js → variable-BEihhVWu.js} +1 -1
- package/dist/{webgpu_program-CKjlqSop.js → webgpu_program-BhNO5Knw.js} +1 -1
- package/dist/{webgpu_util-AdSNBZTn.js → webgpu_util-CASYKKsD.js} +1 -1
- package/dist/{zeros-if5itJPI.js → zeros-CkHJh2fz.js} +2 -2
- package/dist/{zeros_like-9qjyI2vq.js → zeros_like-CGnKYJEe.js} +62 -62
- package/package.json +1 -1
- package/dist/backend_util--hCDOKBW.js +0 -473
- package/dist/slice_util-4i3k5EwD.js +0 -261
package/dist/Generator.js
CHANGED
|
@@ -1,49 +1,51 @@
|
|
|
1
1
|
import { E as Ui } from "./index-DvYrXKkX.js";
|
|
2
|
-
import { o as Hi, q as Xi, E as Ki,
|
|
3
|
-
import { n as Mc } from "./random_width-
|
|
4
|
-
import { t as Bc } from "./zeros_like-
|
|
2
|
+
import { o as Hi, q as Xi, E as Ki, dl as Ss, a5 as pe, ab as _, as as oo, at as ao, e as Oe, aY as Dt, ay as ro, az as io, au as ji, av as Ft, aD as Ge, ae as co, aG as ws, aH as qi, U as G, af as _e, aI as Yi, H as Ns, aJ as Rs, R as Qi, aj as Zi, x as te, D as lo, _ as Ne, a9 as ee, bb as uo, c7 as Ts, c8 as Es, cO as po, bm as ho, ah as ue, Q as Ye, bn as fo, bo as mo, c9 as go, ca as Ds, cb as Fs, cc as Ps, cd as Os, ce as As, bp as xo, ac as nt, cy as Co, cP as bo, cQ as Io, bs as yo, br as ko, bd as $o, de as vo, C as _s, cS as So, ao as wo, z as No, bt as Ro, c2 as $e, cD as To, bu as Eo, cz as Do, cT as Fo, cA as Po, bv as Ls, bw as Vs, bf as Oo, bx as Ao, am as Qe, V as Ji, by as _o, cB as Lo, cf as Vo, bz as Wo, cF as Mo, cG as Bo, df as Go, cg as zo, bX as ns, bR as St, bU as Ws, cU as yn, dt as ut, du as Uo, cV as kn, dg as ec, N as tc, be as Ho, cW as Xo, bB as Ms, A as Ko, aX as jo, aV as mt, cp as qo, dc as Yo, dd as Qo, bg as Zo, cE as Jo, di as ea, al as ta, G as sa, cq as na, ch as Bs, ci as Gs, cj as zs, dj as oa, b3 as Us, b4 as Hs, bD as Xs, cl as Ks, ck as aa, cY as ra, aM as sc, bE as ia, cC as ca, cZ as la, c_ as ua, dk as da, aP as pa, aZ as ha, cm as fa, bQ as ma, M as js, I as ga, bi as xa, bk as Ca, bj as ba, bF as Ia, d3 as ya, bG as ka, P as $a, a6 as va, bH as Sa, c$ as qs, dv as wa, dw as Na, dx as Ra, Z as Ta, cn as Ys, b9 as Ea, d0 as Da, ba as Fa, d1 as Pa, bJ as Oa, bh as Aa, b6 as Qs, ak as _a, dm as La, aL as Va, bL as Zs, co as Js, bM as en, bN as tn, bC as sn, bI as Wa, dy as Ma, dz as Ba, dn as Ga, dp as za, dq as Ua, J as Ha, d2 as Xa, aK as nn, cr as Ka, dr as ja, dA as qa, dB as Ya, cs as on, bq as an, ds as Qa, T as Za, ct as Ja, bl as er, dC as rn, cu as tr, b8 as sr, bO as nr, c as or, dD as ar, dE as $n, aw as vn, ax as nc, t as rr, a as oc, dF as ac, dG as rc, c0 as ic, ar as cc, bP as lc, bV as uc, S as dc, bW as pc, aQ as hc, aq as fc, bS as mc, bT as gc, bY as xc, aS as ir, bA as Cc, aN as bc, bZ as Ic, F as yc, b_ as kc, dh as $c, a$ as vc, b0 as Sc, b1 as wc, b2 as Nc, aO as Rc, b$ as Tc, b5 as Ec, c5 as Dc, ap as Fc, c1 as Pc, aW as cr, bK as Oc, aF as Ac, c3 as _c, b7 as Lc, c4 as Vc, k as Wc } from "./index-BU-PZJ0l.js";
|
|
3
|
+
import { n as Mc } from "./random_width-DvNaD5W3.js";
|
|
4
|
+
import { t as Bc } from "./zeros_like-CGnKYJEe.js";
|
|
5
5
|
import "./index-Cp39cXWe.js";
|
|
6
|
-
import "./dataset-
|
|
7
|
-
import { a as j, u as ae, c as ot, i as at, b as Gc, d as wt, t as Re, e as gt, f as dt, g as lr, r as Nt, h as Ae, j as zc, k as Uc, l as cn, z as Hc, m as ln, n as ur, o as Xc, p as Kc, q as jc, v as qc, w as Yc, x as Qc, y as Zc, A as Jc, B as el, C as tl, D as lt, E as sl, F as nl, G as dr, H as ol, I as al, J as rl, K as il, L as cl, M as ll, N as ul, O as dl, P as pl, Q as hl, R as fl, S as ml, T as gl, U as xl, V as Cl, W as bl, X as Il, Y as yl, Z as kl, _ as $l, $ as vl, a0 as Sl, a1 as wl, a2 as Nl, a3 as Rl, a4 as Tl, a5 as El, a6 as Dl, a7 as Fl, a8 as Pl, a9 as Ol, aa as Al, ab as _l, ac as Ll, ad as Vl, ae as Wl, af as Ml, ag as Bl, ah as Gl, ai as zl } from "./shared-
|
|
6
|
+
import "./dataset-CtujTjY_.js";
|
|
7
|
+
import { a as j, u as ae, c as ot, i as at, b as Gc, d as wt, t as Re, e as gt, f as dt, g as lr, r as Nt, h as Ae, j as zc, k as Uc, l as cn, z as Hc, m as ln, n as ur, o as Xc, p as Kc, q as jc, v as qc, w as Yc, x as Qc, y as Zc, A as Jc, B as el, C as tl, D as lt, E as sl, F as nl, G as dr, H as ol, I as al, J as rl, K as il, L as cl, M as ll, N as ul, O as dl, P as pl, Q as hl, R as fl, S as ml, T as gl, U as xl, V as Cl, W as bl, X as Il, Y as yl, Z as kl, _ as $l, $ as vl, a0 as Sl, a1 as wl, a2 as Nl, a3 as Rl, a4 as Tl, a5 as El, a6 as Dl, a7 as Fl, a8 as Pl, a9 as Ol, aa as Al, ab as _l, ac as Ll, ad as Vl, ae as Wl, af as Ml, ag as Bl, ah as Gl, ai as zl } from "./shared-DORJR6eU.js";
|
|
8
8
|
import { m as pt, g as pr, s as Ul, c as Hl, b as Xl, d as Kl, a as jl, e as ql } from "./complex_util-Yc1A_gV1.js";
|
|
9
|
-
import { a as ge, b as xe, d as ke, c as ve, e as Te, g as os } from "./axis_util-
|
|
10
|
-
import { k as Ze, h as Le, i as Je, j as rt, b as Se, d as xt, g as as } from "./step-
|
|
11
|
-
import { z as rs, A as is, B as cs, C as hr, D as fr, F as mr, G as gr, H as xr, I as Cr, J as br, y as Ir, x as yr, w as kr, u as $r, t as vr, E as Sr, K as wr, L as Nr, M as Rr,
|
|
12
|
-
import { a as
|
|
9
|
+
import { a as ge, b as xe, d as ke, c as ve, e as Te, g as os } from "./axis_util-BTeTGnZF.js";
|
|
10
|
+
import { k as Ze, h as Le, i as Je, j as rt, b as Se, d as xt, g as as } from "./step-A_sBP7En.js";
|
|
11
|
+
import { z as rs, A as is, B as cs, C as hr, D as fr, F as mr, G as gr, H as xr, I as Cr, J as br, y as Ir, x as yr, w as kr, u as $r, t as vr, E as Sr, K as wr, L as Nr, M as Rr, c as Tr, f as Yl } from "./backend_util-DGV1tIji.js";
|
|
12
|
+
import { a as Er, c as Ue } from "./concat_util-Bk6ay4Ob.js";
|
|
13
|
+
import { c as Dr, a as Ql, s as Zl } from "./segment_util-CXELxApp.js";
|
|
13
14
|
import { s as Jl } from "./index-CieiGp4Y.js";
|
|
14
15
|
import { n as Fr, b as Pr, a as Or } from "./non_max_suppression_impl-B2W7YjZB.js";
|
|
15
|
-
import { c as Ct } from "./scatter_nd_util-
|
|
16
|
-
import { S as Ar, a as _r } from "./selu_util-
|
|
17
|
-
import {
|
|
18
|
-
import { h as Sn, j as ou, k as au, l as ru, m as iu, n as cu, o as lu, P as un, p as Ve, u as Pe, q as Wr, c as Mr, T as De, E as Br, g as Gr, a as zr, r as uu, s as du, t as Y, v as
|
|
19
|
-
import { s as Hr, a as Fu, t as Xr, b as Pu, c as Ou, d as Kr, e as Au, n as _u, f as Lu, g as Vu, h as Wu, i as Mu, j as Bu, k as Gu, l as zu, o as Uu, p as Hu, q as Xu, r as Ku, u as ju, v as qu, w as Yu, x as Qu, y as Zu, z as Ju, A as ed, B as td, C as sd, D as nd, E as od, F as ad, G as rd, H as id, I as cd, J as ld, K as ud, L as dd, M as jr, N as pd, O as hd, P as fd, Q as md, R as gd, S as xd, T as Cd, U as bd, V as Id, W as yd } from "./shared-
|
|
20
|
-
import { a as ye, c as kd, U as st, d as qe, e as ze, A as En, f as bt, B as dn, h as pn, m as Rt, u as se, C as We, b as Ce, i as Fe, j as hn, k as it, l as It, n as $d, o as vd, p as Sd, q as wd } from "./kernel_funcs_utils-
|
|
21
|
-
import { R as Nd, r as U, a as Rd } from "./Reshape-
|
|
22
|
-
import { M as qr } from "./matMulGelu-
|
|
23
|
-
import { t as Yr, s as fn, a as _t, m as Td, r as Ed, b as Dd, c as Fd, d as Pd } from "./RealDiv-
|
|
24
|
-
import { z as Od } from "./zeros-
|
|
16
|
+
import { c as Ct } from "./scatter_nd_util-DJ8GpL1I.js";
|
|
17
|
+
import { S as Ar, a as _r } from "./selu_util-CPAAI9PC.js";
|
|
18
|
+
import { s as Lr, b as Vr, p as eu, a as tu, i as su, c as nu } from "./slice_util-DDbd6eyd.js";
|
|
19
|
+
import { h as Sn, j as ou, k as au, l as ru, m as iu, n as cu, o as lu, P as un, p as Ve, u as Pe, q as Wr, c as Mr, T as De, E as Br, g as Gr, a as zr, r as uu, s as du, t as Y, v as pu, w as wn, x as hu, y as fu, z as Pt, A as Ot, B as mu, C as gu, D as bs, F as Gt, G as zt, H as xu, I as Cu, J as Nn, K as bu, L as Iu, M as fs, N as yu, O as ku, Q as $u, R as Ut, S as ms, U as vu, f as he, V as be, W as Ht, X as Xt, Y as Su, d as Rn, e as Tn, i as Ur, Z as wu, _ as Nu, $ as Ru, a0 as Tu, a1 as Eu, a2 as Du, a3 as At } from "./gpgpu_math-C0Hfh1As.js";
|
|
20
|
+
import { s as Hr, a as Fu, t as Xr, b as Pu, c as Ou, d as Kr, e as Au, n as _u, f as Lu, g as Vu, h as Wu, i as Mu, j as Bu, k as Gu, l as zu, o as Uu, p as Hu, q as Xu, r as Ku, u as ju, v as qu, w as Yu, x as Qu, y as Zu, z as Ju, A as ed, B as td, C as sd, D as nd, E as od, F as ad, G as rd, H as id, I as cd, J as ld, K as ud, L as dd, M as jr, N as pd, O as hd, P as fd, Q as md, R as gd, S as xd, T as Cd, U as bd, V as Id, W as yd } from "./shared-DVZEsT-0.js";
|
|
21
|
+
import { a as ye, c as kd, U as st, d as qe, e as ze, A as En, f as bt, B as dn, h as pn, m as Rt, u as se, C as We, b as Ce, i as Fe, j as hn, k as it, l as It, n as $d, o as vd, p as Sd, q as wd } from "./kernel_funcs_utils-BNsajF26.js";
|
|
22
|
+
import { R as Nd, r as U, a as Rd } from "./Reshape-CZVlscuS.js";
|
|
23
|
+
import { M as qr } from "./matMulGelu-BM8qerU0.js";
|
|
24
|
+
import { t as Yr, s as fn, a as _t, m as Td, r as Ed, b as Dd, c as Fd, d as Pd } from "./RealDiv-Blil1IAl.js";
|
|
25
|
+
import { z as Od } from "./zeros-CkHJh2fz.js";
|
|
25
26
|
import "./ops/cpu/attentionMask.js";
|
|
26
27
|
import "./ops/webgl/attentionMask.js";
|
|
27
28
|
import "./ops/grads/attentionMask.js";
|
|
28
29
|
import "./ops/cpu/rope.js";
|
|
29
30
|
import "./ops/webgl/rope.js";
|
|
30
|
-
import "./rope-
|
|
31
|
+
import "./rope-DaRtCQFa.js";
|
|
31
32
|
import "./ops/cpu/appendCache.js";
|
|
32
33
|
import "./ops/webgl/appendCache.js";
|
|
33
34
|
import "./ops/grads/softmax16.js";
|
|
34
|
-
import "./matMul16-
|
|
35
|
+
import "./matMul16-qjE_mKX8.js";
|
|
35
36
|
import "./ops/webgl/matMul16.js";
|
|
36
37
|
import "./ops/cpu/matMul16.js";
|
|
37
|
-
import "./pack16-
|
|
38
|
+
import "./pack16-CQUYvE0Y.js";
|
|
38
39
|
import "./ops/transpose16.js";
|
|
39
40
|
import "./ops/reshape16.js";
|
|
40
41
|
import "./ops/cpu/qkv.js";
|
|
41
42
|
import "./ops/webgl/qkv.js";
|
|
42
43
|
import "./ops/grads/qkv.js";
|
|
43
|
-
import "./ops/mul16.js";
|
|
44
44
|
import "./ops/cpu/normRMS.js";
|
|
45
45
|
import "./ops/webgl/normRMS.js";
|
|
46
46
|
import "./ops/grads/normRMS.js";
|
|
47
|
+
import "./ops/dropout16.js";
|
|
48
|
+
import "./ops/webgl/dropout16.js";
|
|
47
49
|
import "./ops/grads/add16.js";
|
|
48
50
|
import "./jszip.min-Bz5-11Bk.js";
|
|
49
51
|
import Ad from "./tokeniser/CharTokeniser.js";
|
|
@@ -63,17 +65,17 @@ import "./ops/cpu/matMulGelu.js";
|
|
|
63
65
|
import "./ops/grads/matMulGelu.js";
|
|
64
66
|
import "./ops/cpu/gelu.js";
|
|
65
67
|
import "./ops/webgl/gelu.js";
|
|
66
|
-
import "./gelu-
|
|
68
|
+
import "./gelu-Bq6zXPEw.js";
|
|
67
69
|
import "./ops/webgl/log.js";
|
|
68
70
|
import "./checks/normRMS.js";
|
|
69
71
|
import "./checks/normRMSGrad.js";
|
|
70
72
|
import Wd from "./utilities/multinomialCPU.js";
|
|
71
|
-
import { r as Dn } from "./reshape-
|
|
72
|
-
import { t as Kt } from "./tensor2d-
|
|
73
|
-
import { z as Md } from "./unsorted_segment_sum-
|
|
74
|
-
import { s as gs } from "./softmax-
|
|
75
|
-
import { g as Bd } from "./gather-
|
|
76
|
-
import { c as Gd } from "./concat-
|
|
73
|
+
import { r as Dn } from "./reshape-Dxch7IXb.js";
|
|
74
|
+
import { t as Kt } from "./tensor2d-BieJDjvb.js";
|
|
75
|
+
import { z as Md } from "./unsorted_segment_sum-Cx23B-vz.js";
|
|
76
|
+
import { s as gs } from "./softmax-CRzm-wNL.js";
|
|
77
|
+
import { g as Bd } from "./gather-DvRBFTfQ.js";
|
|
78
|
+
import { c as Gd } from "./concat-ByAvdM_j.js";
|
|
77
79
|
function zd(a, t, e, n = !1) {
|
|
78
80
|
const s = Xi(a, "logits", "multinomial"), o = s.size, r = s.rank;
|
|
79
81
|
if (o < 2)
|
|
@@ -846,7 +848,7 @@ const jp = {
|
|
|
846
848
|
};
|
|
847
849
|
function ft(a) {
|
|
848
850
|
const { inputs: t, backend: e, attrs: n } = a, { axis: s } = n, o = ue(s, t[0].shape)[0], r = t.map((m) => m.shape);
|
|
849
|
-
|
|
851
|
+
Er(r, o);
|
|
850
852
|
let c = Ue(t.map((m) => m.shape), o);
|
|
851
853
|
if (G(c) === 0)
|
|
852
854
|
return e.makeTensorInfo(c, t[0].dtype, []);
|
|
@@ -1856,7 +1858,7 @@ function lf(a) {
|
|
|
1856
1858
|
}
|
|
1857
1859
|
let p = c;
|
|
1858
1860
|
c == null && (p = 0);
|
|
1859
|
-
const d = G(o.shape), h =
|
|
1861
|
+
const d = G(o.shape), h = Dr(s, o, i, p), f = ce({
|
|
1860
1862
|
inputs: { x: s },
|
|
1861
1863
|
backend: e,
|
|
1862
1864
|
attrs: {
|
|
@@ -2590,7 +2592,7 @@ const Gm = {
|
|
|
2590
2592
|
kernelName: Aa,
|
|
2591
2593
|
backendName: "cpu",
|
|
2592
2594
|
kernelFunc: ({ inputs: a, attrs: t, backend: e }) => {
|
|
2593
|
-
const { image: n } = a, { radians: s, fillValue: o, center: r } = t, c = e, i = _e(n.dtype, G(n.shape)), [l, u, p, d] = n.shape, [h, f] =
|
|
2595
|
+
const { image: n } = a, { radians: s, fillValue: o, center: r } = t, c = e, i = _e(n.dtype, G(n.shape)), [l, u, p, d] = n.shape, [h, f] = Nr(r, u, p), m = 255, x = Math.sin(s), g = Math.cos(s), C = c.data.get(n.dataId).values;
|
|
2594
2596
|
for (let b = 0; b < l; b++) {
|
|
2595
2597
|
const y = b * p * u * d;
|
|
2596
2598
|
for (let k = 0; k < u; k++) {
|
|
@@ -2849,7 +2851,7 @@ const vg = {
|
|
|
2849
2851
|
kernelFunc: $g
|
|
2850
2852
|
};
|
|
2851
2853
|
function Sg(a) {
|
|
2852
|
-
const { inputs: t, backend: e, attrs: n } = a, { x: s } = t, { numOrSizeSplits: o, axis: r } = n, c = ue(r, s.shape)[0], i =
|
|
2854
|
+
const { inputs: t, backend: e, attrs: n } = a, { x: s } = t, { numOrSizeSplits: o, axis: r } = n, c = ue(r, s.shape)[0], i = Rr(s, o, c), l = new Array(s.shape.length).fill(0), u = s.shape.slice();
|
|
2853
2855
|
return i.map((p) => {
|
|
2854
2856
|
const d = [...u];
|
|
2855
2857
|
d[c] = p;
|
|
@@ -5055,7 +5057,7 @@ class fC {
|
|
|
5055
5057
|
function Ii(a, t, e, n = null) {
|
|
5056
5058
|
let s = t.shape[0], o = t.shape[1];
|
|
5057
5059
|
n != null && (s = n.shape[0], o = n.shape[1]);
|
|
5058
|
-
const r =
|
|
5060
|
+
const r = Tr(o), c = { windowSize: r, inSize: o, batchSize: s, outSize: Math.ceil(o / r) }, i = new hC(c, e, n == null), l = [t];
|
|
5059
5061
|
n != null && l.push(n);
|
|
5060
5062
|
const u = a.runWebGLProgram(i, l, "int32");
|
|
5061
5063
|
if (u.shape[1] === 1)
|
|
@@ -5064,7 +5066,7 @@ function Ii(a, t, e, n = null) {
|
|
|
5064
5066
|
return a.disposeIntermediateTensorInfo(u), p;
|
|
5065
5067
|
}
|
|
5066
5068
|
function yi(a, t, e, n = null) {
|
|
5067
|
-
const s = n != null ? n.shape : t.shape, o = s[s.length - 1], r =
|
|
5069
|
+
const s = n != null ? n.shape : t.shape, o = s[s.length - 1], r = Tr(o), c = new fC(s, r, e, n == null), i = n == null ? [t] : [t, n], l = a.runWebGLProgram(c, i, "int32");
|
|
5068
5070
|
if (l.shape.length === t.shape.length) {
|
|
5069
5071
|
const u = yi(a, t, e, l);
|
|
5070
5072
|
return a.disposeIntermediateTensorInfo(l), u;
|
|
@@ -6178,7 +6180,7 @@ function Eb(a, t, e) {
|
|
|
6178
6180
|
}
|
|
6179
6181
|
function vi(a) {
|
|
6180
6182
|
const { inputs: t, backend: e, attrs: n } = a, { axis: s } = n, o = ue(s, t[0].shape)[0], r = t.map((l) => l.shape);
|
|
6181
|
-
|
|
6183
|
+
Er(r, o);
|
|
6182
6184
|
const c = Ue(t.map((l) => l.shape), o);
|
|
6183
6185
|
if (G(c) === 0)
|
|
6184
6186
|
return e.makeTensorInfo(c, t[0].dtype, []);
|
|
@@ -8581,7 +8583,7 @@ function Oi(a) {
|
|
|
8581
8583
|
te(y <= I - 1 && y >= 0, () => `GatherV2: the index value ${y} is not in [0, ${I - 1}]`);
|
|
8582
8584
|
}
|
|
8583
8585
|
}
|
|
8584
|
-
const l =
|
|
8586
|
+
const l = Dr(s, o, i, c), u = G(o.shape), p = [], d = U({
|
|
8585
8587
|
inputs: { x: s },
|
|
8586
8588
|
backend: e,
|
|
8587
8589
|
attrs: {
|
|
@@ -10356,7 +10358,7 @@ const Ik = {
|
|
|
10356
10358
|
kernelName: Aa,
|
|
10357
10359
|
backendName: "webgl",
|
|
10358
10360
|
kernelFunc: ({ inputs: a, attrs: t, backend: e }) => {
|
|
10359
|
-
const { image: n } = a, { radians: s, fillValue: o, center: r } = t, c = e, i = new bk(n.shape, o), [l, u] =
|
|
10361
|
+
const { image: n } = a, { radians: s, fillValue: o, center: r } = t, c = e, i = new bk(n.shape, o), [l, u] = Nr(r, n.shape[1], n.shape[2]), p = [[l, u, Math.sin(s), Math.cos(s)]];
|
|
10360
10362
|
return c.runWebGLProgram(i, [n], n.dtype, p);
|
|
10361
10363
|
}
|
|
10362
10364
|
};
|
|
@@ -10771,7 +10773,7 @@ const h$ = {
|
|
|
10771
10773
|
kernelFunc: p$
|
|
10772
10774
|
};
|
|
10773
10775
|
function f$(a) {
|
|
10774
|
-
const { inputs: t, backend: e, attrs: n } = a, { x: s } = t, { numOrSizeSplits: o, axis: r } = n, c = ue(r, s.shape)[0], i =
|
|
10776
|
+
const { inputs: t, backend: e, attrs: n } = a, { x: s } = t, { numOrSizeSplits: o, axis: r } = n, c = ue(r, s.shape)[0], i = Rr(s, o, c), l = s.shape.length, u = new Array(l).fill(0), p = s.shape.slice();
|
|
10775
10777
|
return i.map((d) => {
|
|
10776
10778
|
const h = [...p];
|
|
10777
10779
|
h[c] = d;
|
|
@@ -11677,7 +11679,7 @@ const lv = [
|
|
|
11677
11679
|
function uv(a, t) {
|
|
11678
11680
|
return a.length === t ? a : a.length > t ? a.slice(0, t) : a.concat(Array(t - a.length).fill(""));
|
|
11679
11681
|
}
|
|
11680
|
-
class
|
|
11682
|
+
class AS extends Ui {
|
|
11681
11683
|
constructor(t, e) {
|
|
11682
11684
|
super(), this.model = t, this.tokeniser = e, this.actualTokeniser = e;
|
|
11683
11685
|
}
|
|
@@ -11873,6 +11875,6 @@ class PS extends Ui {
|
|
|
11873
11875
|
}
|
|
11874
11876
|
}
|
|
11875
11877
|
export {
|
|
11876
|
-
|
|
11878
|
+
AS as default,
|
|
11877
11879
|
cv as isConversation
|
|
11878
11880
|
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { aE as E,
|
|
2
|
-
import { r as $ } from "./Reshape-
|
|
3
|
-
import { a as A, b as k, d as C, c as N, e as R } from "./axis_util-
|
|
4
|
-
import { t as K, m as _ } from "./shared-
|
|
5
|
-
import { c as j } from "./backend_util
|
|
6
|
-
import { f as y } from "./gpgpu_math-
|
|
7
|
-
import { g as G, b as L } from "./kernel_funcs_utils-
|
|
1
|
+
import { aE as E, ab as T, ah as O, U as V, aW as B, N as F, aM as U, aX as W } from "./index-BU-PZJ0l.js";
|
|
2
|
+
import { r as $ } from "./Reshape-CZVlscuS.js";
|
|
3
|
+
import { a as A, b as k, d as C, c as N, e as R } from "./axis_util-BTeTGnZF.js";
|
|
4
|
+
import { t as K, m as _ } from "./shared-DVZEsT-0.js";
|
|
5
|
+
import { c as j } from "./backend_util-DGV1tIji.js";
|
|
6
|
+
import { f as y } from "./gpgpu_math-C0Hfh1As.js";
|
|
7
|
+
import { g as G, b as L } from "./kernel_funcs_utils-BNsajF26.js";
|
|
8
8
|
class w {
|
|
9
9
|
constructor(s, e) {
|
|
10
10
|
this.variableNames = ["x"];
|
|
@@ -273,7 +273,7 @@ function Q(a, s, e, t) {
|
|
|
273
273
|
const [p, h] = N(u.shape, i);
|
|
274
274
|
let d = p;
|
|
275
275
|
e && (d = R(p, r));
|
|
276
|
-
const f = V(h), g = V(a.shape) / f, x = $({ inputs: { x: u }, attrs: { shape: [g, f] }, backend: t }),
|
|
276
|
+
const f = V(h), g = V(a.shape) / f, x = $({ inputs: { x: u }, attrs: { shape: [g, f] }, backend: t }), S = B(a.dtype), I = M(x, S, "sum", t), m = $({ inputs: { x: I }, attrs: { shape: d }, backend: t });
|
|
277
277
|
return t.disposeIntermediateTensorInfo(x), t.disposeIntermediateTensorInfo(I), o && t.disposeIntermediateTensorInfo(u), m;
|
|
278
278
|
}
|
|
279
279
|
function Z(a) {
|
|
@@ -308,17 +308,17 @@ function te(a) {
|
|
|
308
308
|
o = k(o.length, i);
|
|
309
309
|
}
|
|
310
310
|
C("max", o, i);
|
|
311
|
-
const [f,
|
|
311
|
+
const [f, b] = N(d.shape, o);
|
|
312
312
|
let g = f;
|
|
313
313
|
r && (g = R(f, c));
|
|
314
314
|
let x;
|
|
315
315
|
if (h) {
|
|
316
|
-
const I = e.texData.get(d.dataId).values, m = _(I, V(
|
|
316
|
+
const I = e.texData.get(d.dataId).values, m = _(I, V(b), g, n.dtype);
|
|
317
317
|
x = e.makeTensorInfo(g, n.dtype);
|
|
318
318
|
const z = e.texData.get(x.dataId);
|
|
319
319
|
z.values = m;
|
|
320
320
|
} else
|
|
321
|
-
x = ee(d,
|
|
321
|
+
x = ee(d, b, g, e);
|
|
322
322
|
return p && e.disposeIntermediateTensorInfo(d), x;
|
|
323
323
|
}
|
|
324
324
|
const he = {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { R as C, U as c, aj as R, x as f } from "./index-
|
|
2
|
-
import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-
|
|
1
|
+
import { R as C, U as c, aj as R, x as f } from "./index-BU-PZJ0l.js";
|
|
2
|
+
import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-C0Hfh1As.js";
|
|
3
3
|
class S {
|
|
4
4
|
constructor(t, i) {
|
|
5
5
|
this.variableNames = ["A"], this.packedInputs = !0, this.packedOutput = !0, this.customUniforms = [{ name: "inputShape", type: "ivec3" }], this.outputShape = t, this.enableShapeUniforms = g(this.outputShape.length);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { U as h, aj as d, x as c, R as m } from "./index-
|
|
1
|
+
import { U as h, aj as d, x as c, R as m } from "./index-BU-PZJ0l.js";
|
|
2
2
|
function i(n) {
|
|
3
3
|
const { inputs: p, attrs: o } = n, { x: e } = p, { shape: r } = o, a = h(e.shape), s = d(r, a), t = h(s);
|
|
4
4
|
return c(a === t, () => `The new shape (${s}) has ${t} elements and the old shape (${e.shape}) has ${a} elements. The new shape and old shape must have the same number of elements.`), n.backend.incRef(e.dataId), { dataId: e.dataId, shape: s, dtype: e.dtype };
|
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { default as MemoryProfiler } from './utilities/profile';
|
|
|
8
8
|
import { default as Model, ModelForwardAttributes } from './models/model';
|
|
9
9
|
import { Task } from './training/tasks/Task';
|
|
10
10
|
import { TrainingLogEntry, TrainingOptions } from './training/types';
|
|
11
|
+
import { ModelPhase } from './loader/types';
|
|
11
12
|
type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
|
|
12
13
|
interface TeachableLLMMeta {
|
|
13
14
|
name?: string;
|
|
@@ -26,6 +27,8 @@ export default class TeachableLLM {
|
|
|
26
27
|
private _trainer;
|
|
27
28
|
constructor(tokeniser?: ITokeniser, model?: Model<ModelForwardAttributes, GPTConfig>);
|
|
28
29
|
get vocab(): string[];
|
|
30
|
+
get phase(): ModelPhase;
|
|
31
|
+
set phase(phase: ModelPhase);
|
|
29
32
|
/** Model is fully loaded */
|
|
30
33
|
get loaded(): boolean;
|
|
31
34
|
get config(): GPTConfig;
|
|
@@ -52,10 +55,12 @@ export default class TeachableLLM {
|
|
|
52
55
|
generateText(options?: IGenerateOptions): Promise<Conversation[]>;
|
|
53
56
|
dispose(): void;
|
|
54
57
|
on(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
|
|
58
|
+
on(event: 'phase', listener: (phase: ModelPhase) => void): void;
|
|
55
59
|
on(event: 'error', listener: (error: Error) => void): void;
|
|
56
60
|
on(event: 'trainStep', listener: (step: TrainingLogEntry) => void): void;
|
|
57
61
|
on(event: 'loaded', listener: () => void): void;
|
|
58
62
|
off(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
|
|
63
|
+
off(event: 'phase', listener: (phase: ModelPhase) => void): void;
|
|
59
64
|
off(event: 'error', listener: (error: Error) => void): void;
|
|
60
65
|
off(event: 'trainStep', listener: (step: TrainingLogEntry) => void): void;
|
|
61
66
|
off(event: 'loaded', listener: () => void): void;
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,37 +1,38 @@
|
|
|
1
1
|
import { validateConfig as m } from "./models/config.js";
|
|
2
2
|
import { saveModel as d } from "./loader/save.js";
|
|
3
|
-
import { loadModel as
|
|
4
|
-
import
|
|
3
|
+
import { loadModel as p } from "./loader/load.js";
|
|
4
|
+
import u from "./Generator.js";
|
|
5
5
|
import h from "./Trainer.js";
|
|
6
6
|
import { E as f } from "./index-DvYrXKkX.js";
|
|
7
7
|
import { dummyPassTrainAsync as l } from "./utilities/dummy.js";
|
|
8
|
-
import "./index-
|
|
9
|
-
import "./random_width-
|
|
10
|
-
import "./zeros_like-
|
|
8
|
+
import "./index-BU-PZJ0l.js";
|
|
9
|
+
import "./random_width-DvNaD5W3.js";
|
|
10
|
+
import "./zeros_like-CGnKYJEe.js";
|
|
11
11
|
import "./index-Cp39cXWe.js";
|
|
12
|
-
import "./dataset-
|
|
12
|
+
import "./dataset-CtujTjY_.js";
|
|
13
13
|
import "./ops/cpu/attentionMask.js";
|
|
14
14
|
import "./ops/webgl/attentionMask.js";
|
|
15
15
|
import "./ops/grads/attentionMask.js";
|
|
16
16
|
import "./ops/cpu/rope.js";
|
|
17
17
|
import "./ops/webgl/rope.js";
|
|
18
|
-
import "./rope-
|
|
18
|
+
import "./rope-DaRtCQFa.js";
|
|
19
19
|
import "./ops/cpu/appendCache.js";
|
|
20
20
|
import "./ops/webgl/appendCache.js";
|
|
21
21
|
import "./ops/grads/softmax16.js";
|
|
22
|
-
import "./matMul16-
|
|
22
|
+
import "./matMul16-qjE_mKX8.js";
|
|
23
23
|
import "./ops/webgl/matMul16.js";
|
|
24
24
|
import "./ops/cpu/matMul16.js";
|
|
25
|
-
import "./pack16-
|
|
25
|
+
import "./pack16-CQUYvE0Y.js";
|
|
26
26
|
import "./ops/transpose16.js";
|
|
27
27
|
import "./ops/reshape16.js";
|
|
28
28
|
import "./ops/cpu/qkv.js";
|
|
29
29
|
import "./ops/webgl/qkv.js";
|
|
30
30
|
import "./ops/grads/qkv.js";
|
|
31
|
-
import "./ops/mul16.js";
|
|
32
31
|
import "./ops/cpu/normRMS.js";
|
|
33
32
|
import "./ops/webgl/normRMS.js";
|
|
34
33
|
import "./ops/grads/normRMS.js";
|
|
34
|
+
import "./ops/dropout16.js";
|
|
35
|
+
import "./ops/webgl/dropout16.js";
|
|
35
36
|
import "./ops/grads/add16.js";
|
|
36
37
|
import c from "./tokeniser/CharTokeniser.js";
|
|
37
38
|
import g from "./tokeniser/bpe.js";
|
|
@@ -42,11 +43,11 @@ import "./ops/webgl/gatherSub.js";
|
|
|
42
43
|
import "./ops/cpu/scatterSub.js";
|
|
43
44
|
import "./ops/webgl/scatterSub.js";
|
|
44
45
|
import "./ops/cpu/matMulGelu.js";
|
|
45
|
-
import "./matMulGelu-
|
|
46
|
+
import "./matMulGelu-BM8qerU0.js";
|
|
46
47
|
import "./ops/grads/matMulGelu.js";
|
|
47
48
|
import "./ops/cpu/gelu.js";
|
|
48
49
|
import "./ops/webgl/gelu.js";
|
|
49
|
-
import "./gelu-
|
|
50
|
+
import "./gelu-Bq6zXPEw.js";
|
|
50
51
|
import "./ops/webgl/log.js";
|
|
51
52
|
import "./ops/cpu/adamMoments.js";
|
|
52
53
|
import "./ops/webgl/adamMoments.js";
|
|
@@ -71,6 +72,14 @@ class a {
|
|
|
71
72
|
get vocab() {
|
|
72
73
|
return this._tokeniser?.getVocab() || [];
|
|
73
74
|
}
|
|
75
|
+
get phase() {
|
|
76
|
+
return this._model?.metaData?.phase ?? "untrained";
|
|
77
|
+
}
|
|
78
|
+
set phase(t) {
|
|
79
|
+
if (!this._model)
|
|
80
|
+
throw new Error("model_not_initialized.");
|
|
81
|
+
this._model.metaData.phase = t, this.ee.emit("phase", t);
|
|
82
|
+
}
|
|
74
83
|
/** Model is fully loaded */
|
|
75
84
|
get loaded() {
|
|
76
85
|
return !!this._model && !!this._tokeniser && !!this._config;
|
|
@@ -117,9 +126,9 @@ class a {
|
|
|
117
126
|
}
|
|
118
127
|
static loadModel(t, r) {
|
|
119
128
|
const e = new a();
|
|
120
|
-
return
|
|
129
|
+
return p(t, r).then(({ model: o, tokeniser: n, metaData: i }) => {
|
|
121
130
|
m(o.config), e._model = o, e._tokeniser = n, e._config = o.config, i?.name && (e.meta.name = i.name), e.setStatus("warmup"), l(o).then((s) => {
|
|
122
|
-
e._memoryRequirements = s, e.setStatus("ready"), e.ee.emit("loaded");
|
|
131
|
+
e._memoryRequirements = s, e.setStatus("ready"), e.ee.emit("loaded"), e.ee.emit("phase", e.phase);
|
|
123
132
|
}).catch((s) => {
|
|
124
133
|
e.setStatus("error"), e.ee.emit("error", s), console.error("Error during warmup:", s);
|
|
125
134
|
});
|
|
@@ -131,7 +140,7 @@ class a {
|
|
|
131
140
|
m(r);
|
|
132
141
|
const e = r, o = t === "char" ? new c(e.vocabSize) : new g(e.vocabSize), n = k(e), i = new a(o, n);
|
|
133
142
|
return i.setStatus("warmup"), l(n).then((s) => {
|
|
134
|
-
i._memoryRequirements = s, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (_) => {
|
|
143
|
+
i._memoryRequirements = s, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded"), i.ee.emit("phase", i.phase)) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.ee.emit("phase", i.phase), i.tokeniser.once("trainStatus", (_) => {
|
|
135
144
|
_ === "trained" && i.setStatus("ready");
|
|
136
145
|
}));
|
|
137
146
|
}).catch((s) => {
|
|
@@ -160,11 +169,13 @@ class a {
|
|
|
160
169
|
throw new Error("model_or_tokeniser_not_initialized.");
|
|
161
170
|
this._trainer && t && this._trainer.trainingType !== t && (this._trainer.dispose(), this._trainer = null);
|
|
162
171
|
const e = this._trainer === null ? new h(this._model, this._tokeniser, t, r) : new h(this._trainer, r);
|
|
163
|
-
return e.on("start", () =>
|
|
172
|
+
return e.on("start", () => {
|
|
173
|
+
this.setStatus("training"), this.phase = t === "sft" ? "finetuned" : "pretrained";
|
|
174
|
+
}), e.on("stop", () => this.setStatus("ready")), e.on("log", async (o) => {
|
|
164
175
|
const n = this.ee.listeners("trainStep");
|
|
165
176
|
for (const i of n)
|
|
166
177
|
await i(o);
|
|
167
|
-
}), this._trainer && this._trainer !== e && this._trainer.
|
|
178
|
+
}), this._trainer && this._trainer !== e && this._trainer.removeAllListeners(), this._trainer = e, e;
|
|
168
179
|
}
|
|
169
180
|
async train(t, r, e) {
|
|
170
181
|
const o = this.trainer(e, r);
|
|
@@ -179,7 +190,7 @@ class a {
|
|
|
179
190
|
generator() {
|
|
180
191
|
if (!this._model || !this._tokeniser)
|
|
181
192
|
throw new Error("model_or_tokeniser_not_initialized.");
|
|
182
|
-
const t = new
|
|
193
|
+
const t = new u(this._model, this._tokeniser);
|
|
183
194
|
return t.on("start", () => {
|
|
184
195
|
this.status === "ready" && this.setStatus("busy");
|
|
185
196
|
}), t.on("stop", () => {
|
package/dist/backend.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { g as o, s as e, r as s } from "./index-
|
|
1
|
+
import { g as o, s as e, r as s } from "./index-BU-PZJ0l.js";
|
|
2
2
|
async function c(t, a) {
|
|
3
3
|
if (o() !== t) {
|
|
4
4
|
if (t === "webgpu") {
|
|
5
5
|
const { registerWebGPUBackend: i } = await import("./patches/webgpu_base.js");
|
|
6
|
-
i(a), await import("./index-
|
|
6
|
+
i(a), await import("./index-BSjeovee.js"), await import("./ops/webgpu/index.js");
|
|
7
7
|
}
|
|
8
8
|
await e(t), await s(), console.log(`Backend set to ${t}`);
|
|
9
9
|
}
|