npm - @genai-fi/nanogpt - Versions diffs - 0.15.0 → 0.15.2 - Mend

@genai-fi/nanogpt 0.15.0 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

package/dist/Generator.js +33 -31
package/dist/{RealDiv-B2Tyc34U.js → RealDiv-CJpH9Bif.js} +13 -13
package/dist/{Reshape-Bqk-z_7-.js → Reshape-C4ZzbS5c.js} +3 -3
package/dist/{Reshape-D973Ba8R.js → Reshape-CKzb2DIN.js} +4 -4
package/dist/TeachableLLM.d.ts +5 -0
package/dist/TeachableLLM.js +30 -18
package/dist/Trainer.d.ts +1 -0
package/dist/Trainer.js +65 -62
package/dist/{axis_util-RrJzDQJc.js → axis_util-BBaWKQoo.js} +1 -1
package/dist/backend.js +2 -2
package/dist/{backend_util-9wV3yg0r.js → backend_util-DLIicY0X.js} +50 -50
package/dist/{backend_webgpu-CnFoGvzK.js → backend_webgpu-BwfUOSiJ.js} +21 -21
package/dist/{broadcast_to-hAMmZJpr.js → broadcast_to-CxKUM6zp.js} +2 -2
package/dist/checks/appendCache.js +2 -2
package/dist/checks/attentionMask.js +3 -3
package/dist/checks/gelu.js +2 -2
package/dist/checks/matMulGelu.js +2 -2
package/dist/checks/normRMS.js +6 -6
package/dist/checks/normRMSGrad.js +3 -3
package/dist/checks/packUnpack.js +2 -2
package/dist/checks/qkv.js +2 -2
package/dist/checks/rope.js +2 -2
package/dist/clip_by_value-lDwNWeyI.js +12 -0
package/dist/{complex-BDvCF_r9.js → complex-NXAORdbW.js} +1 -1
package/dist/{concat-B9WckkXa.js → concat-DCm6KW65.js} +1 -1
package/dist/{concat_util-DVNU-Nn3.js → concat_util-DT0Mofs3.js} +1 -1
package/dist/{dataset-ZUdlBUXV.js → dataset-Bwcib9pp.js} +3 -3
package/dist/dropout_util-Crmm4aOV.js +27 -0
package/dist/{expand_dims-DoiHvcDw.js → expand_dims-DgU0Vlpg.js} +1 -1
package/dist/{exports_initializers-8SQOHjAF.js → exports_initializers-VKuLTIiX.js} +1 -1
package/dist/floor-Bhmfrtly.js +9 -0
package/dist/{gather-BYhIiO5e.js → gather-FIoUa4Zd.js} +1 -1
package/dist/{gelu-9_DFp2Q5.js → gelu-CmkPheOK.js} +1 -1
package/dist/{gpgpu_math-Dzx_EUJa.js → gpgpu_math-D83bWKYw.js} +25 -25
package/dist/{index-3FfEY3tm.js → index-D0b5F1JD.js} +58 -58
package/dist/{index-B8eBIyjS.js → index-nwvWLdRt.js} +89 -89
package/dist/{kernel_funcs_utils-BLvDeLPe.js → kernel_funcs_utils-Bu6bS4D_.js} +11 -11
package/dist/layers/BaseLayer.d.ts +4 -0
package/dist/layers/BaseLayer.js +11 -7
package/dist/layers/CausalSelfAttention.js +55 -51
package/dist/layers/LoRA.js +4 -4
package/dist/layers/MLP.d.ts +1 -1
package/dist/layers/MLP.js +20 -19
package/dist/layers/PositionEmbedding.js +10 -10
package/dist/layers/RMSNorm.js +3 -3
package/dist/layers/RoPECache.js +4 -4
package/dist/layers/TiedEmbedding.js +6 -6
package/dist/layers/TransformerBlock.js +1 -1
package/dist/layers/WeightStore.js +3 -3
package/dist/loader/loadTransformers.js +1 -1
package/dist/loader/oldZipLoad.js +20 -18
package/dist/loader/save.js +6 -5
package/dist/loader/types.d.ts +2 -0
package/dist/main.js +9 -9
package/dist/{matMul16-Bp17gt56.js → matMul16-bI7XM831.js} +3 -3
package/dist/{matMulGelu-Bdxn3VPX.js → matMulGelu-Cbtq3pxJ.js} +21 -21
package/dist/{mat_mul-BUuYg3qo.js → mat_mul-BQY_GSqm.js} +1 -1
package/dist/{mod-4q-X1J5l.js → mod-ChddM4vN.js} +1 -1
package/dist/models/NanoGPTV1.js +9 -9
package/dist/models/NanoGPTV2.js +12 -10
package/dist/models/model.d.ts +1 -1
package/dist/models/model.js +14 -12
package/dist/not_equal-duCIyEXv.js +64 -0
package/dist/{ones-aGZXepq3.js → ones-Piv0gZxv.js} +3 -3
package/dist/ops/adamAdjust.js +1 -1
package/dist/ops/adamMoments.js +1 -1
package/dist/ops/add16.js +1 -1
package/dist/ops/appendCache.js +3 -3
package/dist/ops/attentionMask.js +1 -1
package/dist/ops/concat16.js +2 -2
package/dist/ops/cpu/adamAdjust.js +1 -1
package/dist/ops/cpu/adamMoments.js +2 -2
package/dist/ops/cpu/appendCache.js +2 -2
package/dist/ops/cpu/attentionMask.js +6 -6
package/dist/ops/cpu/fusedSoftmax.js +2 -2
package/dist/ops/cpu/gatherSub.js +6 -6
package/dist/ops/cpu/gelu.js +1 -1
package/dist/ops/cpu/matMul16.js +2 -2
package/dist/ops/cpu/matMulGelu.js +3 -3
package/dist/ops/cpu/matMulMul.js +1 -1
package/dist/ops/cpu/mulDropout.js +1 -1
package/dist/ops/cpu/normRMS.js +1 -1
package/dist/ops/cpu/qkv.js +3 -3
package/dist/ops/cpu/rope.js +5 -5
package/dist/ops/cpu/scatterSub.js +9 -9
package/dist/ops/dot16.js +2 -2
package/dist/ops/dropout.d.ts +2 -0
package/dist/ops/dropout.js +14 -0
package/dist/ops/dropout16.d.ts +2 -0
package/dist/ops/dropout16.js +25 -0
package/dist/ops/gatherSub.js +1 -1
package/dist/ops/gelu.js +2 -2
package/dist/ops/globalNorm.js +2 -2
package/dist/ops/grads/add16.js +1 -1
package/dist/ops/grads/attentionMask.js +2 -2
package/dist/ops/grads/dropout16.d.ts +1 -0
package/dist/ops/grads/dropout16.js +2 -0
package/dist/ops/grads/gelu.js +2 -2
package/dist/ops/grads/matMul16.js +3 -3
package/dist/ops/grads/matMulGelu.js +1 -1
package/dist/ops/grads/mul16.d.ts +1 -0
package/dist/ops/grads/mul16.js +4 -0
package/dist/ops/grads/normRMS.js +1 -1
package/dist/ops/grads/pack16.js +3 -3
package/dist/ops/grads/qkv.js +3 -3
package/dist/ops/grads/rope.js +2 -2
package/dist/ops/grads/softmax16.js +1 -1
package/dist/ops/grads/unpack16.js +2 -2
package/dist/ops/matMul16.js +3 -3
package/dist/ops/matMulGelu.js +2 -2
package/dist/ops/matMulMul.js +1 -1
package/dist/ops/mul16.js +36 -5
package/dist/ops/mulDrop.js +1 -1
package/dist/ops/normRMS.js +13 -4
package/dist/ops/pack16.js +2 -2
package/dist/ops/qkv.js +1 -1
package/dist/ops/reshape16.js +2 -2
package/dist/ops/rope.js +2 -2
package/dist/ops/scatterSub.js +1 -1
package/dist/ops/slice16.js +2 -2
package/dist/ops/softmax16.js +1 -1
package/dist/ops/sub16.js +1 -1
package/dist/ops/sum16.js +2 -2
package/dist/ops/transpose16.js +3 -3
package/dist/ops/unpack16.js +2 -2
package/dist/ops/webgl/adamAdjust.js +2 -2
package/dist/ops/webgl/adamMoments.js +1 -1
package/dist/ops/webgl/appendCache.js +1 -1
package/dist/ops/webgl/attentionMask.js +1 -1
package/dist/ops/webgl/dropout16.d.ts +1 -0
package/dist/ops/webgl/dropout16.js +11 -0
package/dist/ops/webgl/fusedSoftmax.js +6 -6
package/dist/ops/webgl/gatherSub.js +1 -1
package/dist/ops/webgl/gelu.js +2 -2
package/dist/ops/webgl/log.js +3 -3
package/dist/ops/webgl/matMul16.js +5 -5
package/dist/ops/webgl/matMulGelu.js +4 -4
package/dist/ops/webgl/matMulMul.js +2 -2
package/dist/ops/webgl/mulDropout.js +1 -1
package/dist/ops/webgl/normRMS.js +2 -2
package/dist/ops/webgl/qkv.js +1 -1
package/dist/ops/webgl/rope.js +1 -1
package/dist/ops/webgl/scatterSub.js +1 -1
package/dist/ops/webgpu/adamAdjust.js +3 -3
package/dist/ops/webgpu/adamMoments.js +3 -3
package/dist/ops/webgpu/add16.js +1 -1
package/dist/ops/webgpu/appendCache.js +3 -3
package/dist/ops/webgpu/attentionMask.js +2 -2
package/dist/ops/webgpu/attentionMask32_program.js +2 -2
package/dist/ops/webgpu/clipScale.js +1 -1
package/dist/ops/webgpu/concat16.js +12 -12
package/dist/ops/webgpu/dropout16.d.ts +1 -0
package/dist/ops/webgpu/dropout16.js +51 -0
package/dist/ops/webgpu/gatherSub.js +3 -3
package/dist/ops/webgpu/gelu.js +3 -3
package/dist/ops/webgpu/index.js +1 -0
package/dist/ops/webgpu/matMul16.js +14 -14
package/dist/ops/webgpu/matMul16_program.js +2 -2
package/dist/ops/webgpu/mul16.js +9 -9
package/dist/ops/webgpu/norm2.js +1 -1
package/dist/ops/webgpu/normRMS.js +2 -2
package/dist/ops/webgpu/normRMSGrad.js +4 -4
package/dist/ops/webgpu/pack16.js +1 -1
package/dist/ops/webgpu/pack16_program.js +2 -2
package/dist/ops/webgpu/qkv.js +2 -2
package/dist/ops/webgpu/rope.js +3 -3
package/dist/ops/webgpu/scatterSub.js +3 -3
package/dist/ops/webgpu/slice16.js +4 -4
package/dist/ops/webgpu/softmax16.js +2 -2
package/dist/ops/webgpu/softmax16_program.js +2 -2
package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
package/dist/ops/webgpu/softmax16grad.js +1 -1
package/dist/ops/webgpu/sub16.js +1 -1
package/dist/ops/webgpu/sum16.js +5 -5
package/dist/ops/webgpu/transpose16.js +2 -2
package/dist/ops/webgpu/transpose16_program.js +2 -2
package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
package/dist/ops/webgpu/unpack16.js +3 -3
package/dist/ops/webgpu/utils/binary_op.d.ts +16 -0
package/dist/ops/webgpu/utils/binary_op.js +74 -13
package/dist/ops/webgpu/utils/reductions.js +5 -5
package/dist/{ops-BLDakU_V.js → ops-BXr-37bF.js} +30 -30
package/dist/{pack16-F9gxcBrq.js → pack16-DO9GrRdk.js} +2 -2
package/dist/patches/webgpu_backend.js +9 -9
package/dist/patches/webgpu_base.js +1 -1
package/dist/patches/webgpu_program.js +2 -2
package/dist/rand_util-CZ7yLoUm.js +50 -0
package/dist/random_normal-CO9xf9dz.js +14 -0
package/dist/{random_width-DSeITIFc.js → random_width-CliSj-et.js} +164 -162
package/dist/{range-BvA7g6TS.js → range-Dx4PwA2-.js} +1 -1
package/dist/{readers-lNVRVUDO.js → readers-DwZhCW0C.js} +2 -2
package/dist/{relu-DyGjd4UV.js → relu-BnpM8PVa.js} +1 -1
package/dist/{reshape-3ugLpT-p.js → reshape-DVh8yLpI.js} +1 -1
package/dist/{resize_nearest_neighbor-DBPfHMkZ.js → resize_nearest_neighbor-Dl7ehaQl.js} +39 -39
package/dist/{rope-D5BJXlc7.js → rope-DjON_IMj.js} +1 -1
package/dist/{scatter_nd_util-6lhBuxGa.js → scatter_nd_util-SSoGmfpx.js} +1 -1
package/dist/{selu_util-emNhirms.js → selu_util-C0DN3KhX.js} +5 -5
package/dist/{shared-Wn4Lkf40.js → shared-CefTy5O1.js} +1 -1
package/dist/{shared-DeC0UJkK.js → shared-DgNUoqSc.js} +35 -35
package/dist/{slice-C1VU5kjs.js → slice-BluUPHKL.js} +1 -1
package/dist/{slice_util-5UIO9Akz.js → slice_util-DK4kHJjN.js} +1 -1
package/dist/{softmax-BSXRSMAA.js → softmax-HULrSwJC.js} +1 -1
package/dist/{split-Z_OF59mV.js → split-QwVeUPZt.js} +1 -1
package/dist/{squeeze-DuB_IYFY.js → squeeze-Brkwo5OI.js} +2 -2
package/dist/{stack-CdjLGyjr.js → stack-C_8ubcjt.js} +1 -1
package/dist/{step-CA-PdcE1.js → step-wz0MZ7BP.js} +1 -1
package/dist/{sum-CX6lFpfv.js → sum-iKJXG43N.js} +1 -1
package/dist/{tensor-BLWBtdey.js → tensor-Dfy8cN1y.js} +1 -1
package/dist/{tensor1d-Dp80hTtj.js → tensor1d-CoOFcAZs.js} +1 -1
package/dist/{tensor2d-DryAvP1o.js → tensor2d-C8gFDiIC.js} +1 -1
package/dist/{tensor4d-BR5YioKH.js → tensor4d-Bvqzr_Wu.js} +1 -1
package/dist/{tfjs_backend-BuO7pU2h.js → tfjs_backend-9QO-TAAZ.js} +275 -295
package/dist/{tile-CB7Cg2Cm.js → tile-CcpklBqG.js} +1 -1
package/dist/training/AdamW.js +2 -2
package/dist/training/BasicTrainer.d.ts +6 -0
package/dist/training/BasicTrainer.js +74 -60
package/dist/training/DatasetBuilder.js +3 -3
package/dist/training/Evaluator.js +2 -2
package/dist/training/SFTDatasetBuilder.js +3 -3
package/dist/training/SFTTrainer.js +6 -6
package/dist/training/loss.d.ts +1 -1
package/dist/training/loss.js +12 -8
package/dist/training/orthoGrad.js +1 -1
package/dist/training/sparseCrossEntropy.d.ts +2 -2
package/dist/training/sparseCrossEntropy.js +54 -31
package/dist/training/types.d.ts +4 -0
package/dist/training/validation.js +19 -17
package/dist/{transpose-COw0-lqd.js → transpose-CwEYsCv1.js} +2 -2
package/dist/{unsorted_segment_sum-C23hrdi0.js → unsorted_segment_sum-DRVX2bX2.js} +22 -22
package/dist/utilities/dummy.js +2 -2
package/dist/utilities/multinomialCPU.js +2 -2
package/dist/utilities/packed.js +1 -1
package/dist/utilities/parameters.d.ts +1 -0
package/dist/utilities/parameters.js +20 -15
package/dist/utilities/performance.js +1 -1
package/dist/utilities/profile.js +1 -1
package/dist/utilities/safetensors.js +2 -2
package/dist/utilities/sentences.js +5 -5
package/dist/utilities/weights.js +2 -2
package/dist/{variable-lnPOlwsK.js → variable-CqrRzzxM.js} +1 -1
package/dist/{webgpu_program-CuMK2hhh.js → webgpu_program-BlAY4Q29.js} +1 -1
package/dist/{webgpu_util-DWXgz54K.js → webgpu_util-D1Ynuktt.js} +1 -1
package/dist/{zeros-BJogAj4Z.js → zeros-B8VPk-mx.js} +2 -2
package/dist/{zeros_like-WQK7VrX-.js → zeros_like-DfWM-ezN.js} +90 -89
package/package.json +1 -1
package/dist/floor-B6EO3Z6x.js +0 -18
package/dist/not_equal-BO_DB61m.js +0 -64
package/dist/random_normal-dxcPUb9x.js +0 -61

package/dist/Generator.js CHANGED Viewed

@@ -1,40 +1,40 @@
 import { E as Ui } from "./index-DvYrXKkX.js";
-import { o as Hi, q as Xi, E as Ki, dn as Ss, am as pe, a8 as _, ar as oo, as as ao, e as Oe, a_ as Dt, ax as ro, ay as io, at as ji, au as Ft, aD as Ge, ab as co, aG as ws, aH as qi, N as G, ac as _e, aI as Yi, D as Ns, aJ as Rs, R as Qi, af as Zi, x as te, B as lo, Y as Ne, a6 as ee, bd as uo, c9 as Ts, ca as Es, cQ as po, bo as ho, ad as ue, L as Ye, bp as fo, bq as mo, cb as go, cc as Ds, cd as Fs, ce as Ps, cf as Os, cg as As, br as xo, a9 as nt, cA as Co, cR as bo, cS as Io, bu as yo, bt as ko, bf as $o, dg as vo, aj as _s, cU as So, an as wo, C as No, bv as Ro, c4 as $e, cF as To, bw as Eo, cB as Do, cV as Fo, cC as Po, bx as Ls, by as Vs, bh as Oo, bz as Ao, ai as Qe, Q as Ji, bA as _o, cD as Lo, ch as Vo, bB as Wo, cH as Mo, cI as Bo, dh as Go, ci as zo, bZ as ns, bT as St, bW as Ws, cW as yn, dv as ut, dw as Uo, cX as kn, di as ec, K as tc, bg as Ho, cY as Xo, bD as Ms, z as Ko, aX as jo, aV as mt, cr as qo, de as Yo, df as Qo, bi as Zo, cG as Jo, dk as ea, ah as ta, G as sa, cs as na, cj as Bs, ck as Gs, cl as zs, dl as oa, b5 as Us, b6 as Hs, bF as Xs, cn as Ks, cm as aa, c_ as ra, aM as sc, bG as ia, cE as ca, c$ as la, d0 as ua, dm as da, aP as pa, a$ as ha, co as fa, bS as ma, M as js, F as ga, bk as xa, bm as Ca, bl as ba, bH as Ia, d5 as ya, bI as ka, P as $a, a4 as va, bJ as Sa, d1 as qs, dx as wa, dy as Na, dz as Ra, X as Ta, cp as Ys, bb as Ea, d2 as Da, bc as Fa, d3 as Pa, bL as Oa, bj as Aa, b8 as Qs, ag as _a, dp as La, aL as Va, bN as Zs, cq as Js, bO as en, bP as tn, bE as sn, bK as Wa, dA as Ma, dB as Ba, dq as Ga, dr as za, ds as Ua, H as Ha, d4 as Xa, aK as nn, ct as Ka, dt as ja, dC as qa, dD as Ya, cu as on, bs as an, du as Qa, T as Za, cv as Ja, bn as er, a3 as rn, cw as tr, ba as sr, bQ as nr, c as or, dE as ar, dF as $n, av as vn, aw as nc, t as rr, a as oc, dG as ac, dH as rc, c2 as ic, aq as cc, bR as lc, bX as uc, S as dc, bY as pc, aQ as hc, ap as fc, bU as mc, bV as gc, b_ as xc, aS as ir, bC as Cc, aN as bc, b$ as Ic, ak as yc, c0 as kc, dj as $c, b1 as vc, b2 as Sc, b3 as wc, b4 as Nc, aO as Rc, c1 as Tc, b7 as Ec, c7 as Dc, ao as Fc, c3 as Pc, aW as cr, bM as Oc, aF as Ac, c5 as _c, b9 as Lc, c6 as Vc, k as Wc } from "./index-3FfEY3tm.js";
-import { n as Mc } from "./random_width-DSeITIFc.js";
-import { t as Bc } from "./zeros_like-WQK7VrX-.js";
+import { o as Hi, q as Xi, E as Ki, dn as Ss, a5 as pe, ab as _, as as oo, at as ao, e as Oe, a_ as Dt, ay as ro, az as io, au as ji, av as Ft, aD as Ge, ae as co, aG as ws, aH as qi, U as G, af as _e, aI as Yi, H as Ns, aJ as Rs, R as Qi, aj as Zi, x as te, D as lo, _ as Ne, a9 as ee, bd as uo, c9 as Ts, ca as Es, cQ as po, bo as ho, ah as ue, Q as Ye, bp as fo, bq as mo, cb as go, cc as Ds, cd as Fs, ce as Ps, cf as Os, cg as As, br as xo, ac as nt, cA as Co, cR as bo, cS as Io, bu as yo, bt as ko, bf as $o, dg as vo, C as _s, cU as So, ao as wo, z as No, bv as Ro, c4 as $e, cF as To, bw as Eo, cB as Do, cV as Fo, cC as Po, bx as Ls, by as Vs, bh as Oo, bz as Ao, am as Qe, V as Ji, bA as _o, cD as Lo, ch as Vo, bB as Wo, cH as Mo, cI as Bo, dh as Go, ci as zo, bZ as ns, bT as St, bW as Ws, cW as yn, dv as ut, dw as Uo, cX as kn, di as ec, N as tc, bg as Ho, cY as Xo, bD as Ms, A as Ko, aX as jo, aV as mt, cr as qo, de as Yo, df as Qo, bi as Zo, cG as Jo, dk as ea, al as ta, G as sa, cs as na, cj as Bs, ck as Gs, cl as zs, dl as oa, b5 as Us, b6 as Hs, bF as Xs, cn as Ks, cm as aa, c_ as ra, aM as sc, bG as ia, cE as ca, c$ as la, d0 as ua, dm as da, aP as pa, a$ as ha, co as fa, bS as ma, M as js, I as ga, bk as xa, bm as Ca, bl as ba, bH as Ia, d5 as ya, bI as ka, P as $a, a7 as va, bJ as Sa, d1 as qs, dx as wa, dy as Na, dz as Ra, Z as Ta, cp as Ys, bb as Ea, d2 as Da, bc as Fa, d3 as Pa, bL as Oa, bj as Aa, b8 as Qs, ak as _a, dp as La, aL as Va, bN as Zs, cq as Js, bO as en, bP as tn, bE as sn, bK as Wa, dA as Ma, dB as Ba, dq as Ga, dr as za, ds as Ua, J as Ha, d4 as Xa, aK as nn, ct as Ka, dt as ja, dC as qa, dD as Ya, cu as on, bs as an, du as Qa, T as Za, cv as Ja, bn as er, a6 as rn, cw as tr, ba as sr, bQ as nr, c as or, dE as ar, dF as $n, aw as vn, ax as nc, t as rr, a as oc, dG as ac, dH as rc, c2 as ic, ar as cc, bR as lc, bX as uc, S as dc, bY as pc, aQ as hc, aq as fc, bU as mc, bV as gc, b_ as xc, aS as ir, bC as Cc, aN as bc, b$ as Ic, F as yc, c0 as kc, dj as $c, b1 as vc, b2 as Sc, b3 as wc, b4 as Nc, aO as Rc, c1 as Tc, b7 as Ec, c7 as Dc, ap as Fc, c3 as Pc, aW as cr, bM as Oc, aF as Ac, c5 as _c, b9 as Lc, c6 as Vc, k as Wc } from "./index-D0b5F1JD.js";
+import { n as Mc } from "./random_width-CliSj-et.js";
+import { t as Bc } from "./zeros_like-DfWM-ezN.js";
 import "./index-Cp39cXWe.js";
-import "./dataset-ZUdlBUXV.js";
-import { a as j, u as ae, c as ot, i as at, b as Gc, d as wt, t as Re, e as gt, f as dt, g as lr, r as Nt, h as Ae, j as zc, k as Uc, l as cn, z as Hc, m as ln, n as ur, o as Xc, p as Kc, q as jc, v as qc, w as Yc, x as Qc, y as Zc, A as Jc, B as el, C as tl, D as lt, E as sl, F as nl, G as dr, H as ol, I as al, J as rl, K as il, L as cl, M as ll, N as ul, O as dl, P as pl, Q as hl, R as fl, S as ml, T as gl, U as xl, V as Cl, W as bl, X as Il, Y as yl, Z as kl, _ as $l, $ as vl, a0 as Sl, a1 as wl, a2 as Nl, a3 as Rl, a4 as Tl, a5 as El, a6 as Dl, a7 as Fl, a8 as Pl, a9 as Ol, aa as Al, ab as _l, ac as Ll, ad as Vl, ae as Wl, af as Ml, ag as Bl, ah as Gl, ai as zl } from "./shared-DeC0UJkK.js";
+import "./dataset-Bwcib9pp.js";
+import { a as j, u as ae, c as ot, i as at, b as Gc, d as wt, t as Re, e as gt, f as dt, g as lr, r as Nt, h as Ae, j as zc, k as Uc, l as cn, z as Hc, m as ln, n as ur, o as Xc, p as Kc, q as jc, v as qc, w as Yc, x as Qc, y as Zc, A as Jc, B as el, C as tl, D as lt, E as sl, F as nl, G as dr, H as ol, I as al, J as rl, K as il, L as cl, M as ll, N as ul, O as dl, P as pl, Q as hl, R as fl, S as ml, T as gl, U as xl, V as Cl, W as bl, X as Il, Y as yl, Z as kl, _ as $l, $ as vl, a0 as Sl, a1 as wl, a2 as Nl, a3 as Rl, a4 as Tl, a5 as El, a6 as Dl, a7 as Fl, a8 as Pl, a9 as Ol, aa as Al, ab as _l, ac as Ll, ad as Vl, ae as Wl, af as Ml, ag as Bl, ah as Gl, ai as zl } from "./shared-DgNUoqSc.js";
 import { m as pt, g as pr, s as Ul, c as Hl, b as Xl, d as Kl, a as jl, e as ql } from "./complex_util-Yc1A_gV1.js";
-import { a as ge, b as xe, d as ke, c as ve, e as Te, g as os } from "./axis_util-RrJzDQJc.js";
-import { k as Ze, h as Le, i as Je, j as rt, b as Se, d as xt, g as as } from "./step-CA-PdcE1.js";
-import { z as rs, A as is, B as cs, C as hr, D as fr, F as mr, G as gr, H as xr, I as Cr, J as br, y as Ir, x as yr, w as kr, u as $r, t as vr, E as Sr, K as wr, L as Nr, M as Rr, N as Tr, c as Er, f as Yl, O as Ql, P as Zl } from "./backend_util-9wV3yg0r.js";
-import { a as Dr, c as Ue } from "./concat_util-DVNU-Nn3.js";
+import { a as ge, b as xe, d as ke, c as ve, e as Te, g as os } from "./axis_util-BBaWKQoo.js";
+import { k as Ze, h as Le, i as Je, j as rt, b as Se, d as xt, g as as } from "./step-wz0MZ7BP.js";
+import { z as rs, A as is, B as cs, C as hr, D as fr, F as mr, G as gr, H as xr, I as Cr, J as br, y as Ir, x as yr, w as kr, u as $r, t as vr, E as Sr, K as wr, L as Nr, M as Rr, N as Tr, c as Er, f as Yl, O as Ql, P as Zl } from "./backend_util-DLIicY0X.js";
+import { a as Dr, c as Ue } from "./concat_util-DT0Mofs3.js";
 import { s as Jl } from "./index-CieiGp4Y.js";
 import { n as Fr, b as Pr, a as Or } from "./non_max_suppression_impl-B2W7YjZB.js";
-import { c as Ct } from "./scatter_nd_util-6lhBuxGa.js";
-import { S as Ar, a as _r } from "./selu_util-emNhirms.js";
-import { b as Lr, d as Vr, p as eu, a as tu, i as su, c as nu } from "./slice_util-5UIO9Akz.js";
-import { h as Sn, j as ou, k as au, l as ru, m as iu, n as cu, o as lu, P as un, p as Ve, u as Pe, q as Wr, c as Mr, T as De, E as Br, g as Gr, a as zr, r as uu, s as du, t as Y, v as Pt, w as pu, x as wn, y as hu, z as fu, A as Ot, B as mu, C as gu, D as bs, F as Gt, G as zt, H as xu, I as Cu, J as Nn, K as bu, L as Iu, M as fs, N as yu, O as ku, Q as $u, R as Ut, S as ms, U as vu, f as he, V as be, W as Ht, X as Xt, Y as Su, d as Rn, e as Tn, i as Ur, Z as wu, _ as Nu, $ as Ru, a0 as Tu, a1 as Eu, a2 as Du, a3 as At } from "./gpgpu_math-Dzx_EUJa.js";
-import { s as Hr, a as Fu, t as Xr, b as Pu, c as Ou, d as Kr, e as Au, n as _u, f as Lu, g as Vu, h as Wu, i as Mu, j as Bu, k as Gu, l as zu, o as Uu, p as Hu, q as Xu, r as Ku, u as ju, v as qu, w as Yu, x as Qu, y as Zu, z as Ju, A as ed, B as td, C as sd, D as nd, E as od, F as ad, G as rd, H as id, I as cd, J as ld, K as ud, L as dd, M as jr, N as pd, O as hd, P as fd, Q as md, R as gd, S as xd, T as Cd, U as bd, V as Id, W as yd } from "./shared-Wn4Lkf40.js";
-import { a as ye, c as kd, U as st, d as qe, e as ze, A as En, f as bt, B as dn, h as pn, m as Rt, u as se, C as We, b as Ce, i as Fe, j as hn, k as it, l as It, n as $d, o as vd, p as Sd, q as wd } from "./kernel_funcs_utils-BLvDeLPe.js";
-import { R as Nd, r as U, a as Rd } from "./Reshape-D973Ba8R.js";
-import { M as qr } from "./matMulGelu-Bdxn3VPX.js";
-import { t as Yr, s as fn, a as _t, m as Td, r as Ed, b as Dd, c as Fd, d as Pd } from "./RealDiv-B2Tyc34U.js";
-import { z as Od } from "./zeros-BJogAj4Z.js";
+import { c as Ct } from "./scatter_nd_util-SSoGmfpx.js";
+import { S as Ar, a as _r } from "./selu_util-C0DN3KhX.js";
+import { b as Lr, d as Vr, p as eu, a as tu, i as su, c as nu } from "./slice_util-DK4kHJjN.js";
+import { h as Sn, j as ou, k as au, l as ru, m as iu, n as cu, o as lu, P as un, p as Ve, u as Pe, q as Wr, c as Mr, T as De, E as Br, g as Gr, a as zr, r as uu, s as du, t as Y, v as Pt, w as pu, x as wn, y as hu, z as fu, A as Ot, B as mu, C as gu, D as bs, F as Gt, G as zt, H as xu, I as Cu, J as Nn, K as bu, L as Iu, M as fs, N as yu, O as ku, Q as $u, R as Ut, S as ms, U as vu, f as he, V as be, W as Ht, X as Xt, Y as Su, d as Rn, e as Tn, i as Ur, Z as wu, _ as Nu, $ as Ru, a0 as Tu, a1 as Eu, a2 as Du, a3 as At } from "./gpgpu_math-D83bWKYw.js";
+import { s as Hr, a as Fu, t as Xr, b as Pu, c as Ou, d as Kr, e as Au, n as _u, f as Lu, g as Vu, h as Wu, i as Mu, j as Bu, k as Gu, l as zu, o as Uu, p as Hu, q as Xu, r as Ku, u as ju, v as qu, w as Yu, x as Qu, y as Zu, z as Ju, A as ed, B as td, C as sd, D as nd, E as od, F as ad, G as rd, H as id, I as cd, J as ld, K as ud, L as dd, M as jr, N as pd, O as hd, P as fd, Q as md, R as gd, S as xd, T as Cd, U as bd, V as Id, W as yd } from "./shared-CefTy5O1.js";
+import { a as ye, c as kd, U as st, d as qe, e as ze, A as En, f as bt, B as dn, h as pn, m as Rt, u as se, C as We, b as Ce, i as Fe, j as hn, k as it, l as It, n as $d, o as vd, p as Sd, q as wd } from "./kernel_funcs_utils-Bu6bS4D_.js";
+import { R as Nd, r as U, a as Rd } from "./Reshape-CKzb2DIN.js";
+import { M as qr } from "./matMulGelu-Cbtq3pxJ.js";
+import { t as Yr, s as fn, a as _t, m as Td, r as Ed, b as Dd, c as Fd, d as Pd } from "./RealDiv-CJpH9Bif.js";
+import { z as Od } from "./zeros-B8VPk-mx.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
-import "./rope-D5BJXlc7.js";
+import "./rope-DjON_IMj.js";
 import "./ops/cpu/appendCache.js";
 import "./ops/webgl/appendCache.js";
 import "./ops/grads/softmax16.js";
-import "./matMul16-Bp17gt56.js";
+import "./matMul16-bI7XM831.js";
 import "./ops/webgl/matMul16.js";
 import "./ops/cpu/matMul16.js";
-import "./pack16-F9gxcBrq.js";
+import "./pack16-DO9GrRdk.js";
 import "./ops/transpose16.js";
 import "./ops/reshape16.js";
 import "./ops/cpu/qkv.js";
@@ -43,6 +43,8 @@ import "./ops/grads/qkv.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
+import "./ops/dropout16.js";
+import "./ops/webgl/dropout16.js";
 import "./ops/grads/add16.js";
 import "./jszip.min-Bz5-11Bk.js";
 import Ad from "./tokeniser/CharTokeniser.js";
@@ -62,17 +64,17 @@ import "./ops/cpu/matMulGelu.js";
 import "./ops/grads/matMulGelu.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-9_DFp2Q5.js";
+import "./gelu-CmkPheOK.js";
 import "./ops/webgl/log.js";
 import "./checks/normRMS.js";
 import "./checks/normRMSGrad.js";
 import Wd from "./utilities/multinomialCPU.js";
-import { r as Dn } from "./reshape-3ugLpT-p.js";
-import { t as Kt } from "./tensor2d-DryAvP1o.js";
-import { z as Md } from "./unsorted_segment_sum-C23hrdi0.js";
-import { s as gs } from "./softmax-BSXRSMAA.js";
-import { g as Bd } from "./gather-BYhIiO5e.js";
-import { c as Gd } from "./concat-B9WckkXa.js";
+import { r as Dn } from "./reshape-DVh8yLpI.js";
+import { t as Kt } from "./tensor2d-C8gFDiIC.js";
+import { z as Md } from "./unsorted_segment_sum-DRVX2bX2.js";
+import { s as gs } from "./softmax-HULrSwJC.js";
+import { g as Bd } from "./gather-FIoUa4Zd.js";
+import { c as Gd } from "./concat-DCm6KW65.js";
 function zd(a, t, e, n = !1) {
   const s = Xi(a, "logits", "multinomial"), o = s.size, r = s.rank;
   if (o < 2)
@@ -11676,7 +11678,7 @@ const lv = [
 function uv(a, t) {
   return a.length === t ? a : a.length > t ? a.slice(0, t) : a.concat(Array(t - a.length).fill(""));
 }
-class FS extends Ui {
+class OS extends Ui {
   constructor(t, e) {
     super(), this.model = t, this.tokeniser = e, this.actualTokeniser = e;
   }
@@ -11872,6 +11874,6 @@ class FS extends Ui {
   }
 }
 export {
-  FS as default,
+  OS as default,
   cv as isConversation
 };

package/dist/{RealDiv-B2Tyc34U.js → RealDiv-CJpH9Bif.js} RENAMED Viewed

@@ -1,10 +1,10 @@
-import { aE as E, a8 as T, ad as O, N as V, aW as B, K as F, aM as K, aX as W } from "./index-3FfEY3tm.js";
-import { r as $ } from "./Reshape-D973Ba8R.js";
-import { a as A, b as k, d as C, c as N, e as R } from "./axis_util-RrJzDQJc.js";
-import { t as U, m as _ } from "./shared-Wn4Lkf40.js";
-import { c as j } from "./backend_util-9wV3yg0r.js";
-import { f as y } from "./gpgpu_math-Dzx_EUJa.js";
-import { g as G, b as L } from "./kernel_funcs_utils-BLvDeLPe.js";
+import { aE as E, ab as T, ah as O, U as V, aW as B, N as F, aM as U, aX as W } from "./index-D0b5F1JD.js";
+import { r as $ } from "./Reshape-CKzb2DIN.js";
+import { a as A, b as k, d as C, c as N, e as R } from "./axis_util-BBaWKQoo.js";
+import { t as K, m as _ } from "./shared-CefTy5O1.js";
+import { c as j } from "./backend_util-DLIicY0X.js";
+import { f as y } from "./gpgpu_math-D83bWKYw.js";
+import { g as G, b as L } from "./kernel_funcs_utils-Bu6bS4D_.js";
 class w {
   constructor(s, e) {
     this.variableNames = ["x"];
@@ -273,7 +273,7 @@ function Q(a, s, e, t) {
   const [p, h] = N(u.shape, i);
   let d = p;
   e && (d = R(p, r));
-  const f = V(h), g = V(a.shape) / f, x = $({ inputs: { x: u }, attrs: { shape: [g, f] }, backend: t }), b = B(a.dtype), I = M(x, b, "sum", t), m = $({ inputs: { x: I }, attrs: { shape: d }, backend: t });
+  const f = V(h), g = V(a.shape) / f, x = $({ inputs: { x: u }, attrs: { shape: [g, f] }, backend: t }), S = B(a.dtype), I = M(x, S, "sum", t), m = $({ inputs: { x: I }, attrs: { shape: d }, backend: t });
   return t.disposeIntermediateTensorInfo(x), t.disposeIntermediateTensorInfo(I), o && t.disposeIntermediateTensorInfo(u), m;
 }
 function Z(a) {
@@ -299,7 +299,7 @@ function te(a) {
       const I = e.texData.get(d.dataId).values, m = new Array(i);
       for (let v = 0; v < m.length; v++)
         m[v] = n.shape[u[v]];
-      const z = U(I, n.shape, n.dtype, u, m);
+      const z = K(I, n.shape, n.dtype, u, m);
       d = e.makeTensorInfo(m, n.dtype);
       const D = e.texData.get(d.dataId);
       D.values = z;
@@ -308,21 +308,21 @@ function te(a) {
     o = k(o.length, i);
   }
   C("max", o, i);
-  const [f, S] = N(d.shape, o);
+  const [f, b] = N(d.shape, o);
   let g = f;
   r && (g = R(f, c));
   let x;
   if (h) {
-    const I = e.texData.get(d.dataId).values, m = _(I, V(S), g, n.dtype);
+    const I = e.texData.get(d.dataId).values, m = _(I, V(b), g, n.dtype);
     x = e.makeTensorInfo(g, n.dtype);
     const z = e.texData.get(x.dataId);
     z.values = m;
   } else
-    x = ee(d, S, g, e);
+    x = ee(d, b, g, e);
   return p && e.disposeIntermediateTensorInfo(d), x;
 }
 const he = {
-  kernelName: K,
+  kernelName: U,
   backendName: "webgl",
   kernelFunc: te
 };

package/dist/{Reshape-Bqk-z_7-.js → Reshape-C4ZzbS5c.js} RENAMED Viewed

@@ -1,14 +1,14 @@
-import { N as h, af as d, x as c, R as m } from "./index-3FfEY3tm.js";
+import { U as h, aj as d, x as c, R as m } from "./index-D0b5F1JD.js";
 function i(n) {
   const { inputs: p, attrs: o } = n, { x: e } = p, { shape: r } = o, a = h(e.shape), s = d(r, a), t = h(s);
   return c(a === t, () => `The new shape (${s}) has ${t} elements and the old shape (${e.shape}) has ${a} elements. The new shape and old shape must have the same number of elements.`), n.backend.incRef(e.dataId), { dataId: e.dataId, shape: s, dtype: e.dtype };
 }
-const f = {
+const u = {
   kernelName: m,
   backendName: "webgpu",
   kernelFunc: i
 };
 export {
-  f as a,
+  u as a,
   i as r
 };

package/dist/{Reshape-D973Ba8R.js → Reshape-CKzb2DIN.js} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { R as C, N as c, af as f, x as R } from "./index-3FfEY3tm.js";
-import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-Dzx_EUJa.js";
+import { R as C, U as c, aj as R, x as f } from "./index-D0b5F1JD.js";
+import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-D83bWKYw.js";
 class S {
   constructor(t, i) {
     this.variableNames = ["A"], this.packedInputs = !0, this.packedOutput = !0, this.customUniforms = [{ name: "inputShape", type: "ivec3" }], this.outputShape = t, this.enableShapeUniforms = g(this.outputShape.length);
@@ -62,8 +62,8 @@ function b(s, t, i) {
   return { dataId: h.dataId, shape: t, dtype: h.dtype };
 }
 function y(s) {
-  const { inputs: t, backend: i, attrs: a } = s, { x: e } = t, { shape: o } = a, r = i, p = c(e.shape), n = f(o, p), h = c(n);
-  R(p === h, () => `The new shape (${n}) has ${h} elements and the old shape (${e.shape}) has ${p} elements. The new shape and old shape must have the same number of elements.`);
+  const { inputs: t, backend: i, attrs: a } = s, { x: e } = t, { shape: o } = a, r = i, p = c(e.shape), n = R(o, p), h = c(n);
+  f(p === h, () => `The new shape (${n}) has ${h} elements and the old shape (${e.shape}) has ${p} elements. The new shape and old shape must have the same number of elements.`);
   const d = r.texData.get(e.dataId);
   return d.isPacked && !l(e.shape, n) && !(d.texture !== null && l(d.shape, n)) ? b(e, n, r) : (r.incRef(e.dataId), { dataId: e.dataId, shape: n, dtype: e.dtype });
 }

package/dist/TeachableLLM.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { default as MemoryProfiler } from './utilities/profile';
 import { default as Model, ModelForwardAttributes } from './models/model';
 import { Task } from './training/tasks/Task';
 import { TrainingLogEntry, TrainingOptions } from './training/types';
+import { ModelPhase } from './loader/types';
 type TeachableLLMStatus = 'warmup' | 'awaitingTokens' | 'ready' | 'training' | 'loading' | 'busy' | 'error';
 interface TeachableLLMMeta {
     name?: string;
@@ -26,6 +27,8 @@ export default class TeachableLLM {
     private _trainer;
     constructor(tokeniser?: ITokeniser, model?: Model<ModelForwardAttributes, GPTConfig>);
     get vocab(): string[];
+    get phase(): ModelPhase;
+    set phase(phase: ModelPhase);
     /** Model is fully loaded */
     get loaded(): boolean;
     get config(): GPTConfig;
@@ -52,10 +55,12 @@ export default class TeachableLLM {
     generateText(options?: IGenerateOptions): Promise<Conversation[]>;
     dispose(): void;
     on(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
+    on(event: 'phase', listener: (phase: ModelPhase) => void): void;
     on(event: 'error', listener: (error: Error) => void): void;
     on(event: 'trainStep', listener: (step: TrainingLogEntry) => void): void;
     on(event: 'loaded', listener: () => void): void;
     off(event: 'status', listener: (status: TeachableLLMStatus) => void): void;
+    off(event: 'phase', listener: (phase: ModelPhase) => void): void;
     off(event: 'error', listener: (error: Error) => void): void;
     off(event: 'trainStep', listener: (step: TrainingLogEntry) => void): void;
     off(event: 'loaded', listener: () => void): void;

package/dist/TeachableLLM.js CHANGED Viewed

@@ -1,28 +1,28 @@
 import { validateConfig as m } from "./models/config.js";
 import { saveModel as d } from "./loader/save.js";
-import { loadModel as u } from "./loader/load.js";
-import p from "./Generator.js";
+import { loadModel as p } from "./loader/load.js";
+import u from "./Generator.js";
 import h from "./Trainer.js";
 import { E as f } from "./index-DvYrXKkX.js";
 import { dummyPassTrainAsync as l } from "./utilities/dummy.js";
-import "./index-3FfEY3tm.js";
-import "./random_width-DSeITIFc.js";
-import "./zeros_like-WQK7VrX-.js";
+import "./index-D0b5F1JD.js";
+import "./random_width-CliSj-et.js";
+import "./zeros_like-DfWM-ezN.js";
 import "./index-Cp39cXWe.js";
-import "./dataset-ZUdlBUXV.js";
+import "./dataset-Bwcib9pp.js";
 import "./ops/cpu/attentionMask.js";
 import "./ops/webgl/attentionMask.js";
 import "./ops/grads/attentionMask.js";
 import "./ops/cpu/rope.js";
 import "./ops/webgl/rope.js";
-import "./rope-D5BJXlc7.js";
+import "./rope-DjON_IMj.js";
 import "./ops/cpu/appendCache.js";
 import "./ops/webgl/appendCache.js";
 import "./ops/grads/softmax16.js";
-import "./matMul16-Bp17gt56.js";
+import "./matMul16-bI7XM831.js";
 import "./ops/webgl/matMul16.js";
 import "./ops/cpu/matMul16.js";
-import "./pack16-F9gxcBrq.js";
+import "./pack16-DO9GrRdk.js";
 import "./ops/transpose16.js";
 import "./ops/reshape16.js";
 import "./ops/cpu/qkv.js";
@@ -31,6 +31,8 @@ import "./ops/grads/qkv.js";
 import "./ops/cpu/normRMS.js";
 import "./ops/webgl/normRMS.js";
 import "./ops/grads/normRMS.js";
+import "./ops/dropout16.js";
+import "./ops/webgl/dropout16.js";
 import "./ops/grads/add16.js";
 import c from "./tokeniser/CharTokeniser.js";
 import g from "./tokeniser/bpe.js";
@@ -41,11 +43,11 @@ import "./ops/webgl/gatherSub.js";
 import "./ops/cpu/scatterSub.js";
 import "./ops/webgl/scatterSub.js";
 import "./ops/cpu/matMulGelu.js";
-import "./matMulGelu-Bdxn3VPX.js";
+import "./matMulGelu-Cbtq3pxJ.js";
 import "./ops/grads/matMulGelu.js";
 import "./ops/cpu/gelu.js";
 import "./ops/webgl/gelu.js";
-import "./gelu-9_DFp2Q5.js";
+import "./gelu-CmkPheOK.js";
 import "./ops/webgl/log.js";
 import "./ops/cpu/adamMoments.js";
 import "./ops/webgl/adamMoments.js";
@@ -70,6 +72,14 @@ class a {
   get vocab() {
     return this._tokeniser?.getVocab() || [];
   }
+  get phase() {
+    return this._model?.metaData?.phase ?? "untrained";
+  }
+  set phase(t) {
+    if (!this._model)
+      throw new Error("model_not_initialized.");
+    this._model.metaData.phase = t, this.ee.emit("phase", t);
+  }
   /** Model is fully loaded */
   get loaded() {
     return !!this._model && !!this._tokeniser && !!this._config;
@@ -116,9 +126,9 @@ class a {
   }
   static loadModel(t, r) {
     const e = new a();
-    return u(t, r).then(({ model: o, tokeniser: n, metaData: i }) => {
+    return p(t, r).then(({ model: o, tokeniser: n, metaData: i }) => {
       m(o.config), e._model = o, e._tokeniser = n, e._config = o.config, i?.name && (e.meta.name = i.name), e.setStatus("warmup"), l(o).then((s) => {
-        e._memoryRequirements = s, e.setStatus("ready"), e.ee.emit("loaded");
+        e._memoryRequirements = s, e.setStatus("ready"), e.ee.emit("loaded"), e.ee.emit("phase", e.phase);
       }).catch((s) => {
         e.setStatus("error"), e.ee.emit("error", s), console.error("Error during warmup:", s);
       });
@@ -130,7 +140,7 @@ class a {
     m(r);
     const e = r, o = t === "char" ? new c(e.vocabSize) : new g(e.vocabSize), n = k(e), i = new a(o, n);
     return i.setStatus("warmup"), l(n).then((s) => {
-      i._memoryRequirements = s, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (_) => {
+      i._memoryRequirements = s, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded"), i.ee.emit("phase", i.phase)) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.ee.emit("phase", i.phase), i.tokeniser.once("trainStatus", (_) => {
         _ === "trained" && i.setStatus("ready");
       }));
     }).catch((s) => {
@@ -159,11 +169,13 @@ class a {
       throw new Error("model_or_tokeniser_not_initialized.");
     this._trainer && t && this._trainer.trainingType !== t && (this._trainer.dispose(), this._trainer = null);
     const e = this._trainer === null ? new h(this._model, this._tokeniser, t, r) : new h(this._trainer, r);
-    return e.on("start", () => this.setStatus("training")), e.on("stop", () => this.setStatus("ready")), e.on("log", async (o) => {
+    return e.on("start", () => {
+      this.setStatus("training"), this.phase = t === "sft" ? "finetuned" : "pretrained";
+    }), e.on("stop", () => this.setStatus("ready")), e.on("log", async (o) => {
       const n = this.ee.listeners("trainStep");
       for (const i of n)
         await i(o);
-    }), this._trainer = e, e;
+    }), this._trainer && this._trainer !== e && this._trainer.dispose(), this._trainer = e, e;
   }
   async train(t, r, e) {
     const o = this.trainer(e, r);
@@ -178,7 +190,7 @@ class a {
   generator() {
     if (!this._model || !this._tokeniser)
       throw new Error("model_or_tokeniser_not_initialized.");
-    const t = new p(this._model, this._tokeniser);
+    const t = new u(this._model, this._tokeniser);
     return t.on("start", () => {
       this.status === "ready" && this.setStatus("busy");
     }), t.on("stop", () => {
@@ -189,7 +201,7 @@ class a {
     return Array.isArray(t) ? this.generator().generate(t, r) : this.generator().generate([], r);
   }
   dispose() {
-    this._model?.dispose(), this.ee.removeAllListeners();
+    this._trainer && (this._trainer.dispose(), this._trainer = null), this._model?.dispose(), this.ee.removeAllListeners();
   }
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   on(t, r) {

package/dist/Trainer.d.ts CHANGED Viewed

@@ -20,6 +20,7 @@ export default class Trainer extends EE<'start' | 'stop' | 'log'> {
     log: TrainingLogEntry[];
     private progress;
     options: TrainingOptions;
+    protected tokenizer: ITokeniser;
     constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser, trainingType?: TrainingType, options?: TrainingOptions);
     constructor(trainer: Trainer, options?: TrainingOptions);
     get model(): Model<ModelForwardAttributes>;

package/dist/Trainer.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { E as g } from "./index-DvYrXKkX.js";
-import s from "./training/PreTrainer.js";
+import o from "./training/PreTrainer.js";
 import { createTrainValidationSplit as p } from "./training/validation.js";
-import n from "./training/SFTTrainer.js";
-class o extends g {
+import h from "./training/SFTTrainer.js";
+class l extends g {
   trainer;
   trainingType = "pretraining";
   hasTrained = !1;
@@ -16,19 +16,22 @@ class o extends g {
     sftMode: "full",
     logInterval: 10
   };
-  constructor(i, t, e = "pretraining", a) {
-    if (super(), i instanceof o) {
-      this.trainer = i.trainer, this.trainingType = i.trainingType, this.options = t ?? i.options, this.trainer.updateOptimizer(this.options), this.log = i.log, this.progress = i.progress, this.totalSamples = i.totalSamples;
+  tokenizer;
+  constructor(t, i, e = "pretraining", a) {
+    if (super(), t instanceof l) {
+      const r = i || t.options, n = t.options;
+      let s = !1;
+      t.trainingType === "sft" && r.sftMode !== n.sftMode && (s = !0), e !== t.trainingType && (s = !0), s ? (t.trainingType === "sft" ? this.trainer = new h(t.model, t.tokenizer, r) : this.trainer = new o(t.model, t.tokenizer, r), this.trainingType = e, this.options = r, this.tokenizer = t.tokenizer) : (this.trainer = t.trainer, this.trainingType = e, this.options = r, this.trainer.updateOptimizer(this.options), this.log = t.log, this.progress = t.progress, this.totalSamples = t.totalSamples, this.tokenizer = t.tokenizer, r.batchSize === n.batchSize && (this.trainDataset = t.trainDataset, this.validationDataset = t.validationDataset));
       return;
     }
-    if (!t)
-      throw new Error("Tokeniser must be provided when initializing Trainer with a model");
     if (!i)
+      throw new Error("Tokeniser must be provided when initializing Trainer with a model");
+    if (!t)
       throw new Error("Model must be provided when initializing Trainer");
     this.options = a || {
       batchSize: 32,
       sftMode: "full"
-    }, e === "sft" ? this.trainer = new n(i, t, a) : this.trainer = new s(i, t, a), this.trainingType = e;
+    }, e === "sft" ? this.trainer = new h(t, i, a) : this.trainer = new o(t, i, a), this.trainingType = e, this.tokenizer = i;
   }
   get model() {
     return this.trainer.model;
@@ -48,110 +51,110 @@ class o extends g {
   getTotalSamples() {
     return this.totalSamples;
   }
-  setOptions(i) {
-    const t = new Set(
-      Object.keys(i).filter(
-        (e) => i[e] !== this.options[e]
+  setOptions(t) {
+    const i = new Set(
+      Object.keys(t).filter(
+        (e) => t[e] !== this.options[e]
       )
     );
     if (this.trainer.isRunning) {
-      if (t.has("batchSize"))
+      if (i.has("batchSize"))
         throw new Error("Cannot change batch size during training");
-      if (t.has("sftMode"))
+      if (i.has("sftMode"))
         throw new Error("Cannot change SFT mode during training");
-      if (t.has("loraConfig"))
+      if (i.has("loraConfig"))
         throw new Error("Cannot change LoRA configuration during training");
-      if (t.has("validationSplit"))
+      if (i.has("validationSplit"))
         throw new Error("Cannot change validation split during training");
-      if (t.has("trainableWeights"))
+      if (i.has("trainableWeights"))
         throw new Error("Cannot change trainable weights during training");
-      if (t.has("mixedPrecision"))
+      if (i.has("mixedPrecision"))
         throw new Error("Cannot change mixed precision setting during training");
-      if (t.has("gradientCheckpointing"))
+      if (i.has("gradientCheckpointing"))
         throw new Error("Cannot change gradient checkpointing setting during training");
     }
     this.options = {
       ...this.options,
-      ...i
-    }, this.trainer.updateOptimizer(this.options), t.has("metrics") && this.trainer.setMetrics(i.metrics || []);
+      ...t
+    }, this.trainer.updateOptimizer(this.options), i.has("metrics") && this.trainer.setMetrics(t.metrics || []);
   }
-  async prepare(i = []) {
-    const t = this.options;
-    if (this.trainingType === "pretraining" && this.trainer instanceof s) {
-      const { trainDataset: e, validationDataset: a, size: r, trainState: h } = await p(
-        i,
+  async prepare(t = []) {
+    const i = this.options;
+    if (this.trainingType === "pretraining" && this.trainer instanceof o) {
+      const { trainDataset: e, validationDataset: a, size: r, trainState: n } = await p(
+        t,
         this.trainer.tokenizer,
         this.trainer.datasetBuilder,
-        t?.batchSize || 32,
-        t?.validationSplit || 0.1
-      ), l = r * (1 - (t?.validationSplit || 0));
-      this.trainDataset = e, this.validationDataset = a, this.totalSamples = l, this.options.epochSteps = Math.ceil(h.shuffledIndexes.length / (t?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
-    } else if (this.trainingType === "sft" && this.trainer instanceof n) {
-      if (i instanceof Uint16Array)
+        i?.batchSize || 32,
+        i?.validationSplit || 0.1
+      ), s = r * (1 - (i?.validationSplit || 0));
+      this.trainDataset = e, this.validationDataset = a, this.totalSamples = s, this.options.epochSteps = Math.ceil(n.shuffledIndexes.length / (i?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
+    } else if (this.trainingType === "sft" && this.trainer instanceof h) {
+      if (t instanceof Uint16Array)
         throw new Error("SFT training requires Task[] input");
       const e = await this.trainer.datasetBuilder.createSFTDataset(
-        i,
-        t?.batchSize || 32,
+        t,
+        i?.batchSize || 32,
         -100
       );
-      this.trainDataset = e, this.totalSamples = i.reduce((a, r) => a + r.length, 0), this.options.epochSteps = Math.ceil(this.totalSamples / (t?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
+      this.trainDataset = e, this.totalSamples = t.reduce((a, r) => a + r.length, 0), this.options.epochSteps = Math.ceil(this.totalSamples / (i?.batchSize || 32)), this.trainer.updateOptimizer(this.options);
     }
   }
-  configureModel(i) {
-    const t = i?.sftMode || "full";
+  configureModel(t) {
+    const i = t?.sftMode || "full";
     if (this.trainingType === "pretraining" && (this.trainer.model.hasLoRA() && this.trainer.model.detachLoRA(), this.trainer.model.weightStore.setTrainable(["*"])), this.trainingType === "sft") {
-      if (t === "lora") {
-        if (!i?.loraConfig)
+      if (i === "lora") {
+        if (!t?.loraConfig)
           throw new Error("LoRA configuration must be provided for lora mode");
         if (this.trainer.model.hasLoRA()) {
           const e = this.trainer.model.lora;
-          (e.alpha !== i.loraConfig.alpha || e.rank !== i.loraConfig.rank) && (this.trainer.model.detachLoRA(), this.trainer.model.attachLoRA(i.loraConfig));
+          (e.alpha !== t.loraConfig.alpha || e.rank !== t.loraConfig.rank) && (this.trainer.model.detachLoRA(), this.trainer.model.attachLoRA(t.loraConfig));
         } else
-          this.trainer.model.attachLoRA(i.loraConfig);
+          this.trainer.model.attachLoRA(t.loraConfig);
       } else
         this.trainer.model.hasLoRA() && this.trainer.model.detachLoRA();
-      t === "last-layer" ? this.trainer.model.weightStore.setTrainable([
+      i === "last-layer" ? this.trainer.model.weightStore.setTrainable([
         `block_${this.trainer.model.config.nLayer - 1}_*`,
         "token_embedding"
-      ]) : t === "full" && this.trainer.model.weightStore.setTrainable(["*"]);
+      ]) : i === "full" && this.trainer.model.weightStore.setTrainable(["*"]);
     }
-    i?.trainableWeights && this.trainer.model.weightStore.setTrainable(i.trainableWeights);
+    t?.trainableWeights && this.trainer.model.weightStore.setTrainable(t.trainableWeights);
   }
   async train() {
-    const i = this.options;
+    const t = this.options;
     if (!this.trainDataset)
       throw new Error("Dataset not prepared");
-    this.hasTrained || this.trainer.setLearningRate(i?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), this.trainer.setGradientCheckpointing(i?.gradientCheckpointing || !1), this.trainer.setMixedPrecision(i?.mixedPrecision || !1), this.configureModel(i), await this.trainer.trainOnDataset(
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start"), this.trainer.setGradientCheckpointing(t?.gradientCheckpointing || !1), this.trainer.setMixedPrecision(t?.mixedPrecision || !1), this.trainer.setLabelSmoothing(t?.labelSmoothing || 0), this.trainer.setDropout(t?.dropout || 0), this.trainer.setLayerDrop(t?.layerDrop || 0), this.configureModel(t), await this.trainer.trainOnDataset(
       this.trainDataset,
       {
-        ...i,
-        onStep: async (t) => {
-          this.log.push(t), this.progress = {
-            lastLog: t,
-            progress: t.totalSamples / this.totalSamples,
+        ...t,
+        onStep: async (i) => {
+          this.log.push(i), this.progress = {
+            lastLog: i,
+            progress: i.totalSamples / this.totalSamples,
             remaining: Math.max(
               0,
-              (this.totalSamples - t.totalSamples) / t.totalSamples * t.duration
+              (this.totalSamples - i.totalSamples) / i.totalSamples * i.duration
             )
           };
           const e = this.listeners("log");
           for (const a of e)
-            await a(t, this.progress);
+            await a(i, this.progress);
         }
       },
       this.validationDataset
     ), this.emit("stop");
   }
-  async step(i) {
+  async step(t) {
     if (!this.trainDataset)
       throw new Error("Dataset not prepared");
-    this.hasTrained || this.trainer.setLearningRate(i?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start");
-    const { log: t } = await this.trainer.stepDataset(this.trainDataset, i || {}, this.validationDataset), e = this.listeners("log");
+    this.hasTrained || this.trainer.setLearningRate(t?.learningRate || 1e-3), this.hasTrained = !0, this.emit("start");
+    const { log: i } = await this.trainer.stepDataset(this.trainDataset, t || {}, this.validationDataset), e = this.listeners("log");
     for (const a of e)
-      await a(t, {
-        lastLog: t,
-        progress: t.totalSamples / this.totalSamples,
-        remaining: Math.max(0, (this.totalSamples - t.totalSamples) / t.totalSamples * t.duration)
+      await a(i, {
+        lastLog: i,
+        progress: i.totalSamples / this.totalSamples,
+        remaining: Math.max(0, (this.totalSamples - i.totalSamples) / i.totalSamples * i.duration)
       });
     this.emit("stop");
   }
@@ -166,5 +169,5 @@ class o extends g {
   }
 }
 export {
-  o as default
+  l as default
 };

package/dist/{axis_util-RrJzDQJc.js → axis_util-BBaWKQoo.js} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { x as c } from "./index-3FfEY3tm.js";
+import { x as c } from "./index-D0b5F1JD.js";
 function i(e, n) {
   for (let t = 0; t < e.length; ++t)
     if (e[e.length - t - 1] !== n - 1 - t)

package/dist/backend.js CHANGED Viewed

@@ -1,9 +1,9 @@
-import { g as o, s as e, r as s } from "./index-3FfEY3tm.js";
+import { g as o, s as e, r as s } from "./index-D0b5F1JD.js";
 async function c(t, a) {
   if (o() !== t) {
     if (t === "webgpu") {
       const { registerWebGPUBackend: i } = await import("./patches/webgpu_base.js");
-      i(a), await import("./index-B8eBIyjS.js"), await import("./ops/webgpu/index.js");
+      i(a), await import("./index-nwvWLdRt.js"), await import("./ops/webgpu/index.js");
     }
     await e(t), await s(), console.log(`Backend set to ${t}`);
   }