@genai-fi/nanogpt 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +30 -30
- package/dist/{RealDiv-ioj6Z-ox.js → RealDiv-CGwv0liw.js} +9 -9
- package/dist/{Reshape-BZC-ebeR.js → Reshape-BW__R4mZ.js} +7 -7
- package/dist/{Reshape-pwprEaej.js → Reshape-CPBkTIH2.js} +1 -1
- package/dist/TeachableLLM.js +17 -17
- package/dist/Trainer.js +97 -95
- package/dist/{axis_util-QWWgLjut.js → axis_util-GTVlo58H.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/{backend_util-qwSFfxYx.js → backend_util-GaFarB78.js} +21 -21
- package/dist/{backend_webgpu-DI2wXEC2.js → backend_webgpu-BqASlsbV.js} +8 -8
- package/dist/{broadcast_to-C_EJTVTZ.js → broadcast_to-eS93CCN_.js} +2 -2
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +5 -5
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +2 -2
- package/dist/checks/normRMS.js +6 -6
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +6 -6
- package/dist/checks/qkv.js +2 -2
- package/dist/checks/rope.js +2 -2
- package/dist/{clip_by_value-CLAD4h_I.js → clip_by_value-DDA7rrcT.js} +1 -1
- package/dist/complex-DI35Q-gW.js +11 -0
- package/dist/{concat-Dqk7Xk7h.js → concat-CAQpCret.js} +5 -5
- package/dist/{concat_util-C1Mxe27t.js → concat_util-D18dJ4fD.js} +1 -1
- package/dist/{dataset-DlqAN81i.js → dataset-CGGp1z9P.js} +3 -3
- package/dist/{dropout_util-N0z8Os-K.js → dropout_util--NxWuYg2.js} +1 -1
- package/dist/{expand_dims-D0rBtgT1.js → expand_dims-Bkd1YD5x.js} +4 -4
- package/dist/{exports_initializers-DIOZQt_L.js → exports_initializers-CYzKLjN7.js} +1 -1
- package/dist/{floor-CymuCmTO.js → floor-BQtb-Azg.js} +1 -1
- package/dist/{gather-DEyjXNb1.js → gather-qIqEqaGn.js} +1 -1
- package/dist/{gelu-DpTCC3eB.js → gelu-B220X1Go.js} +1 -1
- package/dist/{gpgpu_math-3bCb5ooU.js → gpgpu_math-BwvV12df.js} +25 -25
- package/dist/{index-DSGwv2Yx.js → index-CUXkjxiT.js} +33 -33
- package/dist/{index-BQvB7LCC.js → index-CjOWnMXP.js} +15 -15
- package/dist/{kernel_funcs_utils-DGqzNlHT.js → kernel_funcs_utils-pq0CK9co.js} +6 -6
- package/dist/layers/BaseLayer.js +4 -4
- package/dist/layers/CausalSelfAttention.d.ts +1 -0
- package/dist/layers/CausalSelfAttention.js +14 -14
- package/dist/layers/LoRA.js +4 -4
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.d.ts +1 -0
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/layers/WeightStore.js +2 -2
- package/dist/loader/loadTransformers.js +9 -9
- package/dist/loader/oldZipLoad.js +9 -9
- package/dist/loader/save.js +38 -30
- package/dist/loader/types.d.ts +1 -0
- package/dist/main.js +9 -9
- package/dist/{matMul16-BIT70Vya.js → matMul16-BcVC_E62.js} +3 -3
- package/dist/{matMulGelu-CsZnh18H.js → matMulGelu-JNLZqKQp.js} +18 -18
- package/dist/mat_mul-DhG0Newp.js +11 -0
- package/dist/mod-CSdCpRjf.js +11 -0
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/NanoGPTV2.js +2 -2
- package/dist/models/model.d.ts +1 -0
- package/dist/models/model.js +9 -9
- package/dist/{not_equal-CkQKkKZy.js → not_equal-hurPF26l.js} +15 -15
- package/dist/{ones-DbVB5N58.js → ones-BytntneX.js} +3 -3
- package/dist/ops/adamAdjust.js +3 -3
- package/dist/ops/adamMoments.js +3 -3
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +6 -6
- package/dist/ops/attentionMask.js +3 -3
- package/dist/ops/concat16.js +3 -3
- package/dist/ops/cpu/adamAdjust.js +9 -9
- package/dist/ops/cpu/adamMoments.js +5 -5
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +6 -6
- package/dist/ops/cpu/fusedSoftmax.js +4 -4
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +4 -4
- package/dist/ops/cpu/matMul16.js +2 -2
- package/dist/ops/cpu/matMulGelu.js +7 -7
- package/dist/ops/cpu/matMulMul.js +2 -2
- package/dist/ops/cpu/mulDropout.js +5 -5
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +5 -5
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/dropout.js +6 -6
- package/dist/ops/dropout16.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/globalNorm.js +7 -7
- package/dist/ops/grads/add16.js +1 -1
- package/dist/ops/grads/attentionMask.js +2 -2
- package/dist/ops/grads/dropout16.js +1 -1
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMul16.js +3 -3
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/mul16.js +1 -1
- package/dist/ops/grads/normRMS.js +7 -7
- package/dist/ops/grads/pack16.js +3 -3
- package/dist/ops/grads/qkv.js +11 -11
- package/dist/ops/grads/rope.js +2 -2
- package/dist/ops/grads/softmax16.js +1 -1
- package/dist/ops/grads/unpack16.js +2 -2
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +6 -6
- package/dist/ops/matMulMul.js +3 -3
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +3 -3
- package/dist/ops/normRMS.js +4 -4
- package/dist/ops/pack16.js +2 -2
- package/dist/ops/qkv.js +3 -3
- package/dist/ops/reshape16.js +6 -6
- package/dist/ops/rope.js +2 -2
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +1 -1
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +6 -6
- package/dist/ops/transpose16.js +3 -3
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/dropout16.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +7 -7
- package/dist/ops/webgl/gatherSub.js +3 -3
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMul16.js +13 -13
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +2 -2
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +2 -2
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/add16.js +6 -6
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +2 -2
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/clipScale.js +7 -7
- package/dist/ops/webgpu/concat16.js +5 -5
- package/dist/ops/webgpu/dropout16.js +6 -6
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +8 -8
- package/dist/ops/webgpu/matMul16.js +16 -16
- package/dist/ops/webgpu/matMul16_program.js +2 -2
- package/dist/ops/webgpu/mul16.js +5 -5
- package/dist/ops/webgpu/norm2.js +1 -1
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +4 -4
- package/dist/ops/webgpu/pack16.js +4 -4
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +2 -2
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/slice16.js +4 -4
- package/dist/ops/webgpu/softmax16.js +4 -4
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +4 -4
- package/dist/ops/webgpu/sub16.js +6 -6
- package/dist/ops/webgpu/sum16.js +3 -3
- package/dist/ops/webgpu/transpose16.js +8 -8
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
- package/dist/ops/webgpu/unpack16.js +3 -3
- package/dist/ops/webgpu/utils/binary_op.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +5 -5
- package/dist/{ops-CURIZSVt.js → ops-CsXeTq1P.js} +100 -100
- package/dist/{pack16-WlOSOuZA.js → pack16-bqltoUlR.js} +2 -2
- package/dist/patches/webgpu_backend.js +6 -6
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +2 -2
- package/dist/{random_normal-CIm8lk2-.js → random_normal-IBRrha8a.js} +1 -1
- package/dist/{random_width-B_fVXhGx.js → random_width-DN5ZtQkM.js} +131 -131
- package/dist/{range-BDxO73mk.js → range-C-CjF-LI.js} +1 -1
- package/dist/relu-J_X6MUzx.js +9 -0
- package/dist/{reshape-BIN71H3p.js → reshape-BDOuCSNW.js} +1 -1
- package/dist/{resize_nearest_neighbor-C6_0dAnK.js → resize_nearest_neighbor-BojqlfRe.js} +41 -41
- package/dist/{rope-CC5RjmKU.js → rope-DcrZM_e6.js} +5 -5
- package/dist/{scatter_nd_util-C-x73Cj6.js → scatter_nd_util-ByNJaL6I.js} +1 -1
- package/dist/{segment_util-4zuHV5IG.js → segment_util-Dasb2Zaf.js} +2 -2
- package/dist/{selu_util-BXdhy_W6.js → selu_util-BLhIqRkw.js} +5 -5
- package/dist/{shared-zTaJ5siv.js → shared-3agzAqQ_.js} +1 -1
- package/dist/{shared-DRWDyk9w.js → shared-CagdqkLh.js} +6 -6
- package/dist/slice-BzS11Qh0.js +12 -0
- package/dist/{slice_util-DPY56GzQ.js → slice_util-CC35pLmT.js} +5 -5
- package/dist/{softmax-BLGJqdwx.js → softmax-D4q1LJN7.js} +1 -1
- package/dist/split-C2Sj255c.js +9 -0
- package/dist/{squeeze-O_YWJpw_.js → squeeze-ho4wLUek.js} +2 -2
- package/dist/{stack-z6QE7kmP.js → stack-DudVrtmG.js} +1 -1
- package/dist/{step-DQY6_ABw.js → step-BTxPtq1r.js} +4 -4
- package/dist/{sum-D39FeU5h.js → sum-BpiwSWvg.js} +3 -3
- package/dist/{tensor-D8e0Gd7c.js → tensor-BWFldCso.js} +1 -1
- package/dist/{tensor1d-BMl0eZYV.js → tensor1d-LMGMIUlr.js} +1 -1
- package/dist/{tensor2d-DTtQ1QcT.js → tensor2d-BnXMKScO.js} +1 -1
- package/dist/{tensor4d-Dj4rDssL.js → tensor4d-C6UCG_u8.js} +1 -1
- package/dist/{tfjs_backend-Bk3PmK91.js → tfjs_backend-BGnG-ppu.js} +65 -65
- package/dist/{tile-CsWlVKKz.js → tile-CFy-xTO6.js} +1 -1
- package/dist/tokeniser/BaseTokeniser.d.ts +5 -1
- package/dist/tokeniser/BaseTokeniser.js +62 -53
- package/dist/tokeniser/type.d.ts +8 -0
- package/dist/training/AdamW.js +2 -2
- package/dist/training/BasicTrainer.d.ts +1 -0
- package/dist/training/BasicTrainer.js +95 -79
- package/dist/training/DatasetBuilder.d.ts +6 -2
- package/dist/training/DatasetBuilder.js +60 -41
- package/dist/training/Evaluator.d.ts +1 -2
- package/dist/training/Evaluator.js +21 -31
- package/dist/training/SFTTrainer.d.ts +3 -2
- package/dist/training/SFTTrainer.js +4 -3
- package/dist/training/orthoGrad.js +1 -1
- package/dist/training/sparseCrossEntropy.js +38 -38
- package/dist/training/tasks/ConversationTask.d.ts +4 -0
- package/dist/training/tasks/ConversationTask.js +7 -7
- package/dist/training/tasks/PretrainingTask.d.ts +4 -0
- package/dist/training/tasks/PretrainingTask.js +11 -7
- package/dist/training/tasks/StartSentenceTask.d.ts +4 -0
- package/dist/training/tasks/StartSentenceTask.js +5 -5
- package/dist/training/tasks/Task.d.ts +12 -0
- package/dist/training/tasks/Task.js +55 -31
- package/dist/training/types.d.ts +1 -0
- package/dist/training/validation.d.ts +1 -1
- package/dist/training/validation.js +33 -32
- package/dist/{transpose-Qxz-4os3.js → transpose-9kRxIXWR.js} +7 -7
- package/dist/{unsorted_segment_sum-BfFVV9Zm.js → unsorted_segment_sum-DJvk5xnh.js} +20 -20
- package/dist/utilities/dummy.js +6 -6
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.js +1 -1
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-SSATClyt.js → variable-Ck482e3n.js} +1 -1
- package/dist/{webgpu_program-CbjdYLYk.js → webgpu_program-B4HmApL1.js} +1 -1
- package/dist/{webgpu_util-DuofJBMo.js → webgpu_util-DYlGSwOJ.js} +7 -7
- package/dist/{zeros-Bw0puq_w.js → zeros-DvZpK8s6.js} +2 -2
- package/dist/{zeros_like-rOHr54NY.js → zeros_like-CWjDdwr-.js} +69 -69
- package/package.json +1 -1
- package/dist/complex-3DpPEG9B.js +0 -11
- package/dist/mat_mul-DP86qZtZ.js +0 -11
- package/dist/mod-BXjLYwvM.js +0 -11
- package/dist/readers-17HLdxVM.js +0 -12
- package/dist/relu-DTvZKBsZ.js +0 -9
- package/dist/slice-BvItlgXu.js +0 -12
- package/dist/split-BN9LkEgS.js +0 -9
- package/dist/training/SFTDatasetBuilder.d.ts +0 -23
- package/dist/training/SFTDatasetBuilder.js +0 -85
|
@@ -3226,34 +3226,34 @@ function vs() {
|
|
|
3226
3226
|
vs();
|
|
3227
3227
|
export {
|
|
3228
3228
|
di as $,
|
|
3229
|
-
|
|
3230
|
-
|
|
3229
|
+
Kr as A,
|
|
3230
|
+
K as B,
|
|
3231
3231
|
yr as C,
|
|
3232
|
-
|
|
3232
|
+
ur as D,
|
|
3233
3233
|
f as E,
|
|
3234
3234
|
Yr as F,
|
|
3235
3235
|
Qr as G,
|
|
3236
|
-
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3236
|
+
Ja as H,
|
|
3237
|
+
So as I,
|
|
3238
|
+
wo as J,
|
|
3239
|
+
Gs as K,
|
|
3240
|
+
Rt as L,
|
|
3241
3241
|
Na as M,
|
|
3242
|
-
|
|
3242
|
+
yo as N,
|
|
3243
3243
|
H as O,
|
|
3244
3244
|
Oa as P,
|
|
3245
|
-
|
|
3245
|
+
se as Q,
|
|
3246
3246
|
Xa as R,
|
|
3247
3247
|
uo as S,
|
|
3248
3248
|
Co as T,
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3249
|
+
z as U,
|
|
3250
|
+
Ue as V,
|
|
3251
|
+
Cs as W,
|
|
3252
|
+
Tn as X,
|
|
3253
|
+
vn as Y,
|
|
3254
|
+
qa as Z,
|
|
3255
3255
|
os as _,
|
|
3256
|
-
|
|
3256
|
+
q as a,
|
|
3257
3257
|
ea as a$,
|
|
3258
3258
|
gi as a0,
|
|
3259
3259
|
tt as a1,
|
|
@@ -3318,7 +3318,7 @@ export {
|
|
|
3318
3318
|
$s as ax,
|
|
3319
3319
|
xt as ay,
|
|
3320
3320
|
he as az,
|
|
3321
|
-
|
|
3321
|
+
ni as b,
|
|
3322
3322
|
Qe as b$,
|
|
3323
3323
|
na as b0,
|
|
3324
3324
|
la as b1,
|
|
@@ -3383,7 +3383,7 @@ export {
|
|
|
3383
3383
|
Fr as bx,
|
|
3384
3384
|
Mr as by,
|
|
3385
3385
|
xr as bz,
|
|
3386
|
-
|
|
3386
|
+
Ho as c,
|
|
3387
3387
|
en as c$,
|
|
3388
3388
|
tn as c0,
|
|
3389
3389
|
za as c1,
|
|
@@ -3448,7 +3448,7 @@ export {
|
|
|
3448
3448
|
gs as cx,
|
|
3449
3449
|
cr as cy,
|
|
3450
3450
|
Tr as cz,
|
|
3451
|
-
|
|
3451
|
+
M as d,
|
|
3452
3452
|
to as d0,
|
|
3453
3453
|
Za as d1,
|
|
3454
3454
|
Bo as d2,
|
|
@@ -3491,26 +3491,26 @@ export {
|
|
|
3491
3491
|
ja as dx,
|
|
3492
3492
|
ko as dy,
|
|
3493
3493
|
Io as dz,
|
|
3494
|
-
|
|
3494
|
+
Jo as e,
|
|
3495
3495
|
as as f,
|
|
3496
3496
|
ai as g,
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3497
|
+
ei as h,
|
|
3498
|
+
$ as i,
|
|
3499
|
+
S as j,
|
|
3500
3500
|
Fn as k,
|
|
3501
|
-
|
|
3501
|
+
nt as l,
|
|
3502
3502
|
b as m,
|
|
3503
|
-
|
|
3503
|
+
V as n,
|
|
3504
3504
|
N as o,
|
|
3505
3505
|
Qt as p,
|
|
3506
|
-
|
|
3506
|
+
T as q,
|
|
3507
3507
|
ri as r,
|
|
3508
3508
|
si as s,
|
|
3509
3509
|
A as t,
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
|
|
3510
|
+
Ct as u,
|
|
3511
|
+
hs as v,
|
|
3512
|
+
Jn as w,
|
|
3513
|
+
p as x,
|
|
3514
|
+
An as y,
|
|
3515
|
+
wr as z
|
|
3516
3516
|
};
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
import { W as Mt } from "./backend_webgpu-
|
|
2
|
-
import { ae as Et, ab as X,
|
|
3
|
-
import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-
|
|
1
|
+
import { W as Mt } from "./backend_webgpu-BqASlsbV.js";
|
|
2
|
+
import { ae as Et, ab as X, x as L, dc as Ut, dd as Ht, bX as Gt, U as D, _ as j, bb as Xt, ai as Ye, aU as Kt, a7 as qt, am as fe, bP as Yt, c7 as jt, c8 as Qt, bV as Zt, cO as Jt, ar as es, ac as De, ah as te, aW as ts, bm as ss, bn as os, bo as is, c9 as as, ca as rs, cb as ns, cc as us, cd as ds, ce as ls, aM as cs, aP as hs, bp as ps, cy as fs, cP as ms, cQ as gs, D as xs, S as Cs, br as ws, bd as ys, de as Ss, aQ as bs, aq as vs, bS as ks, bT as Is, af as Rs, bY as Ps, C as $s, cS as Ds, ao as Ns, z as zs, bt as As, cD as Fs, bu as Ws, cz as Ls, cT as Vs, cA as Bs, bv as Ts, bw as _s, bf as Os, bx as Ms, by as Es, cB as Us, cf as Hs, bz as Gs, cF as Xs, cG as Ks, df as qs, cg as Ys, cU as js, cV as Qs, dg as Zs, c0 as Js, N as eo, be as to, aG as so, cW as oo, bA as io, bB as ao, aN as ro, A as no, bZ as uo, cp as lo, bg as co, F as ho, b_ as po, dh as fo, a9 as at, bs as mo, cE as go, di as xo, al as Co, G as wo, a5 as ke, a$ as yo, b0 as So, cq as bo, ch as vo, ci as ko, cj as Io, aH as Ro, b1 as Po, b2 as $o, dj as Do, aO as No, b3 as zo, b4 as Ao, bD as Fo, cl as Wo, ck as Lo, cY as Vo, b$ as Bo, bE as To, cC as _o, cZ as Oo, c_ as Mo, dk as Eo, aZ as Uo, b5 as Ho, cm as Go, M as Xo, I as Ko, dl as qo, ap as Yo, bi as jo, bj as Qo, bF as Zo, d3 as Jo, bG as ei, P as ti, a6 as si, bH as oi, c$ as ii, aI as ai, c1 as ri, Z as ni, aX as ui, cn as di, H as li, aJ as ci, b9 as hi, d0 as pi, ba as fi, d1 as mi, bJ as gi, bh as xi, b6 as Ci, bK as wi, ak as yi, dm as Si, aL as bi, bL as vi, aF as ki, co as Ii, bM as Ri, bN as Pi, bC as $i, bI as Di, dn as Ni, dp as zi, T as Ai, av as rt, dq as Fi, Q as Wi, J as Li, c3 as Vi, d2 as Bi, b7 as Ti, aK as _i, cr as Oi, dr as Mi, c5 as Ei, cs as Ui, bq as Hi, ds as Gi, ct as Xi, bl as Ki, b8 as qi, bO as Yi, c as ji } from "./index-CUXkjxiT.js";
|
|
3
|
+
import { i as Qi, a as Zi, c as b, f as v, M as Y, b as nt, d as ut, e as dt } from "./webgpu_util-DYlGSwOJ.js";
|
|
4
4
|
import { g as _e, B as F } from "./binary_op_util-pKXltfxI.js";
|
|
5
|
-
import { S as Ji, a as ea } from "./selu_util-
|
|
6
|
-
import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga } from "./backend_util-
|
|
7
|
-
import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as xa, a as Ca } from "./webgpu_program-
|
|
8
|
-
import { r as R, a as wa } from "./Reshape-
|
|
9
|
-
import { s as ya } from "./shared-
|
|
10
|
-
import { c as Oe, a as Ce, b as we, d as Me, e as Sa, g as ft } from "./axis_util-
|
|
11
|
-
import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-
|
|
12
|
-
import { p as ba, a as va, s as ka, b as Ia } from "./slice_util-
|
|
13
|
-
import { z as Ra } from "./zeros-
|
|
14
|
-
import { c as me, a as Pa } from "./concat_util-
|
|
15
|
-
import { c as $a, a as Da } from "./segment_util-
|
|
5
|
+
import { S as Ji, a as ea } from "./selu_util-BLhIqRkw.js";
|
|
6
|
+
import { E as ta, t as sa, u as oa, w as ia, x as aa, y as ra, f as je, z as lt, A as ct, B as ht, C as na, D as ua, F as da, G as la, H as ca, I as ha, J as pa, K as fa, L as ma, M as ga } from "./backend_util-GaFarB78.js";
|
|
7
|
+
import { t as W, e as S, h as Q, b as G, c as Ie, P as pt, d as xa, a as Ca } from "./webgpu_program-B4HmApL1.js";
|
|
8
|
+
import { r as R, a as wa } from "./Reshape-CPBkTIH2.js";
|
|
9
|
+
import { s as ya } from "./shared-CagdqkLh.js";
|
|
10
|
+
import { c as Oe, a as Ce, b as we, d as Me, e as Sa, g as ft } from "./axis_util-GTVlo58H.js";
|
|
11
|
+
import { h as ye, i as Ne, j as Se, b as Z, d as Ee, g as Ue, k as mt } from "./step-BTxPtq1r.js";
|
|
12
|
+
import { p as ba, a as va, s as ka, b as Ia } from "./slice_util-CC35pLmT.js";
|
|
13
|
+
import { z as Ra } from "./zeros-DvZpK8s6.js";
|
|
14
|
+
import { c as me, a as Pa } from "./concat_util-D18dJ4fD.js";
|
|
15
|
+
import { c as $a, a as Da } from "./segment_util-Dasb2Zaf.js";
|
|
16
16
|
import { n as Na, a as za } from "./non_max_suppression_impl-B2W7YjZB.js";
|
|
17
|
-
import { c as He } from "./scatter_nd_util-
|
|
17
|
+
import { c as He } from "./scatter_nd_util-ByNJaL6I.js";
|
|
18
18
|
Qi() && Et(
|
|
19
19
|
"webgpu",
|
|
20
20
|
async () => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { _ as B,
|
|
2
|
-
import { u as O, f as H } from "./gpgpu_math-
|
|
3
|
-
import { f as v } from "./backend_util-
|
|
1
|
+
import { _ as B, U as G, aU as K, a7 as W, aH as z, aV as V, ab as N, aI as F, am as S } from "./index-CUXkjxiT.js";
|
|
2
|
+
import { u as O, f as H } from "./gpgpu_math-BwvV12df.js";
|
|
3
|
+
import { f as v } from "./backend_util-GaFarB78.js";
|
|
4
4
|
function Y(t, e) {
|
|
5
5
|
return ["x", "y", "z", "w", "u", "v"].slice(0, e).map((s) => `${t}.${s}`);
|
|
6
6
|
}
|
|
@@ -200,12 +200,12 @@ const be = {
|
|
|
200
200
|
backendName: "webgl",
|
|
201
201
|
kernelFunc: oe
|
|
202
202
|
};
|
|
203
|
-
const
|
|
203
|
+
const U = "return (a < 0.) ? b * a : a;", k = `
|
|
204
204
|
vec4 aLessThanZero = vec4(lessThan(a, vec4(0.)));
|
|
205
205
|
return (aLessThanZero * (b * a)) + ((vec4(1.0) - aLessThanZero) * a);
|
|
206
206
|
`;
|
|
207
207
|
function ue(t) {
|
|
208
|
-
const { inputs: e, backend: s } = t, { x: r, alpha: u } = e, n = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") ? new E(
|
|
208
|
+
const { inputs: e, backend: s } = t, { x: r, alpha: u } = e, n = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") ? new E(k, r.shape, u.shape) : new b(U, r.shape, u.shape);
|
|
209
209
|
return s.runWebGLProgram(n, [r, u], "float32");
|
|
210
210
|
}
|
|
211
211
|
const Ne = {
|
|
@@ -273,7 +273,7 @@ function Ae(t, e = !1) {
|
|
|
273
273
|
if (t === "relu6")
|
|
274
274
|
return e ? ae : Q;
|
|
275
275
|
if (t === "prelu")
|
|
276
|
-
return e ?
|
|
276
|
+
return e ? k : U;
|
|
277
277
|
if (t === "leakyrelu")
|
|
278
278
|
return e ? R : w;
|
|
279
279
|
if (t === "sigmoid")
|
package/dist/layers/BaseLayer.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a2 as p,
|
|
1
|
+
import { a2 as p, h as s, a4 as g } from "../index-CUXkjxiT.js";
|
|
2
2
|
import b from "./WeightStore.js";
|
|
3
3
|
class T {
|
|
4
4
|
parent;
|
|
@@ -59,11 +59,11 @@ class T {
|
|
|
59
59
|
checkpointingFn(t, ...e) {
|
|
60
60
|
const r = this.trainableVariables;
|
|
61
61
|
return p((...i) => {
|
|
62
|
-
const o = i[i.length - 1], a = i.slice(0, e.length),
|
|
63
|
-
return o(a), { value:
|
|
62
|
+
const o = i[i.length - 1], a = i.slice(0, e.length), h = this.forward(t, ...a);
|
|
63
|
+
return o(a), { value: h, gradFunc: (n, l) => {
|
|
64
64
|
const c = s().state.activeTape;
|
|
65
65
|
s().state.activeTape = [];
|
|
66
|
-
const d = g((...u) => this.forward(t, ...u.slice(0, a.length)))([...l, ...r],
|
|
66
|
+
const d = g((...u) => this.forward(t, ...u.slice(0, a.length)))([...l, ...r], n);
|
|
67
67
|
return s().state.activeTape = c, d;
|
|
68
68
|
} };
|
|
69
69
|
})(...e, ...r);
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
import { attentionMask as R } from "../ops/attentionMask.js";
|
|
2
2
|
import J from "./BaseLayer.js";
|
|
3
|
-
import { r as v } from "../rope-
|
|
3
|
+
import { r as v } from "../rope-DcrZM_e6.js";
|
|
4
4
|
import { appendCache as A } from "../ops/appendCache.js";
|
|
5
|
-
import { k as c, t as L } from "../index-
|
|
5
|
+
import { k as c, t as L } from "../index-CUXkjxiT.js";
|
|
6
6
|
import { softmax16 as y } from "../ops/softmax16.js";
|
|
7
|
-
import { b as M } from "../matMul16-
|
|
8
|
-
import { p as K } from "../pack16-
|
|
7
|
+
import { b as M } from "../matMul16-BcVC_E62.js";
|
|
8
|
+
import { p as K } from "../pack16-bqltoUlR.js";
|
|
9
9
|
import { transpose16 as j } from "../ops/transpose16.js";
|
|
10
10
|
import { dot16 as E } from "../ops/dot16.js";
|
|
11
11
|
import { reshape16 as _ } from "../ops/reshape16.js";
|
|
12
|
-
import { isPackedTensor as
|
|
12
|
+
import { isPackedTensor as f } from "../utilities/packed.js";
|
|
13
13
|
import { qkv as q } from "../ops/qkv.js";
|
|
14
14
|
import { normRMS as O } from "../ops/normRMS.js";
|
|
15
15
|
import { dropout16 as x } from "../ops/dropout16.js";
|
|
16
|
-
import { v as P } from "../variable-
|
|
17
|
-
import { r as S } from "../random_normal-
|
|
16
|
+
import { v as P } from "../variable-Ck482e3n.js";
|
|
17
|
+
import { r as S } from "../random_normal-IBRrha8a.js";
|
|
18
18
|
class it extends J {
|
|
19
19
|
constructor(t, o, s, i) {
|
|
20
20
|
super(o, i), this.attentionConfig = s, this.index = t, this.units = o.nEmbed * 3, this.projUnits = o.nEmbed, this.ATTN = `block_${this.index}_cAttn`, this.PROJ = `block_${this.index}_cProj`, this.addVariable(this.ATTN), this.addVariable(this.PROJ), this.divisor = 1 / Math.sqrt(o.nEmbed / o.nHead);
|
|
@@ -39,11 +39,11 @@ class it extends J {
|
|
|
39
39
|
return i.dispose(), e;
|
|
40
40
|
}
|
|
41
41
|
getQKV(t) {
|
|
42
|
-
const o =
|
|
43
|
-
return
|
|
42
|
+
const o = f(t) ? K(this.getVariable(this.ATTN)) : this.getVariable(this.ATTN), s = q(t, o, this.config.nHead);
|
|
43
|
+
return f(t) && o.dispose(), s;
|
|
44
44
|
}
|
|
45
45
|
getOutputProjection(t) {
|
|
46
|
-
const o = t.shape[0], s = t.shape[2], i = this.config.nEmbed, e =
|
|
46
|
+
const o = t.shape[0], s = t.shape[2], i = this.config.nEmbed, e = f(t), r = j(t, [0, 2, 1, 3]), n = _(r, [o, s, e ? i / 2 : i]);
|
|
47
47
|
r.dispose();
|
|
48
48
|
const p = e ? K(this.getVariable(this.PROJ)) : this.getVariable(this.PROJ), a = E(n, p);
|
|
49
49
|
return e && p.dispose(), n.dispose(), a;
|
|
@@ -59,13 +59,13 @@ class it extends J {
|
|
|
59
59
|
forward(t, o) {
|
|
60
60
|
return L(() => {
|
|
61
61
|
this.startMemory();
|
|
62
|
-
const [s, i, e] = this.getQKV(o), r = t.pastKV ? t.pastKV.cumulativeLength : 0, n = t.ropeCache, p = n ? v(s, n, r) : s, a = n ? v(i, n, r) : i, h = this.attentionConfig.useQKNorm ?? !1, m = h ? O(p) : p;
|
|
62
|
+
const [s, i, e] = this.getQKV(o), r = t.pastKV ? t.pastKV.cumulativeLength : t.ropePositionOffset || 0, n = t.ropeCache, p = n ? v(s, n, r) : s, a = n ? v(i, n, r) : i, h = this.attentionConfig.useQKNorm ?? !1, m = h ? O(p) : p;
|
|
63
63
|
h && p.dispose();
|
|
64
|
-
const
|
|
64
|
+
const l = h ? O(a) : a;
|
|
65
65
|
h && a.dispose(), n && (s.dispose(), i.dispose());
|
|
66
66
|
const T = t.pastKV ? t.pastKV.length : 0;
|
|
67
|
-
t.pastKV && !t.training && this.updateCache(
|
|
68
|
-
const u = t.pastKV?.k ? t.pastKV.k :
|
|
67
|
+
t.pastKV && !t.training && this.updateCache(l, e, t.pastKV);
|
|
68
|
+
const u = t.pastKV?.k ? t.pastKV.k : l, V = t.pastKV?.v ? t.pastKV.v : e;
|
|
69
69
|
let d;
|
|
70
70
|
T > 0 ? d = this.getAttentionScores(m, u, T) : d = this.getAttentionScores(m, u), m.dispose(), t.pastKV || u.dispose();
|
|
71
71
|
const g = M(d, V), b = t.attentionScores !== void 0 && t.attentionScores.attentionOut !== void 0;
|
package/dist/layers/LoRA.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a as m, t as n } from "../index-CUXkjxiT.js";
|
|
2
2
|
import { p } from "../index-DmeWGGmS.js";
|
|
3
|
-
import { v as g } from "../variable-
|
|
4
|
-
import { r as S } from "../random_normal-
|
|
5
|
-
import { z as _ } from "../zeros-
|
|
3
|
+
import { v as g } from "../variable-Ck482e3n.js";
|
|
4
|
+
import { r as S } from "../random_normal-IBRrha8a.js";
|
|
5
|
+
import { z as _ } from "../zeros-DvZpK8s6.js";
|
|
6
6
|
class B {
|
|
7
7
|
weightStore;
|
|
8
8
|
alpha;
|
package/dist/layers/MLP.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { t as M } from "../index-
|
|
1
|
+
import { t as M } from "../index-CUXkjxiT.js";
|
|
2
2
|
import f from "./BaseLayer.js";
|
|
3
|
-
import { b as h } from "../matMul16-
|
|
3
|
+
import { b as h } from "../matMul16-BcVC_E62.js";
|
|
4
4
|
import { reshape16 as d } from "../ops/reshape16.js";
|
|
5
5
|
import { dropout16 as L } from "../ops/dropout16.js";
|
|
6
|
-
import { v as n } from "../variable-
|
|
7
|
-
import { r as m } from "../random_normal-
|
|
6
|
+
import { v as n } from "../variable-Ck482e3n.js";
|
|
7
|
+
import { r as m } from "../random_normal-IBRrha8a.js";
|
|
8
8
|
class N extends f {
|
|
9
9
|
index;
|
|
10
10
|
hiddenUnits;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { t as c, an as p,
|
|
1
|
+
import { t as c, an as p, a as s } from "../index-CUXkjxiT.js";
|
|
2
2
|
import f from "./BaseLayer.js";
|
|
3
|
-
import { E as u } from "../random_width-
|
|
4
|
-
import { r as b } from "../exports_initializers-
|
|
5
|
-
import { m as g } from "../mod-
|
|
6
|
-
import { r as l } from "../range-
|
|
3
|
+
import { E as u } from "../random_width-DN5ZtQkM.js";
|
|
4
|
+
import { r as b } from "../exports_initializers-CYzKLjN7.js";
|
|
5
|
+
import { m as g } from "../mod-CSdCpRjf.js";
|
|
6
|
+
import { r as l } from "../range-C-CjF-LI.js";
|
|
7
7
|
function h(e) {
|
|
8
8
|
return new u(e);
|
|
9
9
|
}
|
package/dist/layers/RMSNorm.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { t as i } from "../index-
|
|
1
|
+
import { t as i } from "../index-CUXkjxiT.js";
|
|
2
2
|
import e from "./BaseLayer.js";
|
|
3
3
|
import { normRMS as m } from "../ops/normRMS.js";
|
|
4
|
-
import { v as a } from "../variable-
|
|
5
|
-
import { o as M } from "../ones-
|
|
4
|
+
import { v as a } from "../variable-Ck482e3n.js";
|
|
5
|
+
import { o as M } from "../ones-BytntneX.js";
|
|
6
6
|
class l extends e {
|
|
7
7
|
GAMMA;
|
|
8
8
|
rmsConfig;
|
package/dist/layers/RoPECache.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as l } from "../zeros_like-
|
|
3
|
-
import { c as f, s as m } from "../unsorted_segment_sum-
|
|
4
|
-
import { r as h } from "../range-
|
|
1
|
+
import { i as a, a as n, p as c, t as p, k as r } from "../index-CUXkjxiT.js";
|
|
2
|
+
import { r as l } from "../zeros_like-CWjDdwr-.js";
|
|
3
|
+
import { c as f, s as m } from "../unsorted_segment_sum-DJvk5xnh.js";
|
|
4
|
+
import { r as h } from "../range-C-CjF-LI.js";
|
|
5
5
|
class x {
|
|
6
6
|
constructor(s) {
|
|
7
7
|
this.config = s;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import "../random_width-
|
|
2
|
-
import "../index-
|
|
3
|
-
import { r as s } from "../exports_initializers-
|
|
1
|
+
import "../random_width-DN5ZtQkM.js";
|
|
2
|
+
import "../index-CUXkjxiT.js";
|
|
3
|
+
import { r as s } from "../exports_initializers-CYzKLjN7.js";
|
|
4
4
|
import a from "./BaseLayer.js";
|
|
5
5
|
import { dot16 as o } from "../ops/dot16.js";
|
|
6
6
|
import { isPackedTensor as r } from "../utilities/packed.js";
|
|
7
|
-
import { p as m } from "../pack16-
|
|
7
|
+
import { p as m } from "../pack16-bqltoUlR.js";
|
|
8
8
|
import { transpose16 as d } from "../ops/transpose16.js";
|
|
9
|
-
import { v as p } from "../variable-
|
|
10
|
-
import { g as h } from "../gather-
|
|
9
|
+
import { v as p } from "../variable-Ck482e3n.js";
|
|
10
|
+
import { g as h } from "../gather-qIqEqaGn.js";
|
|
11
11
|
class g extends a {
|
|
12
12
|
vocabSize;
|
|
13
13
|
embedDim;
|
|
@@ -8,6 +8,7 @@ interface BlockAttributes extends ForwardAttributes {
|
|
|
8
8
|
pastKV?: KVCache;
|
|
9
9
|
seed?: number;
|
|
10
10
|
attentionScores?: AttentionScores;
|
|
11
|
+
ropePositionOffset?: number;
|
|
11
12
|
}
|
|
12
13
|
export type TransformerBlockConfig = MLPConfig & RMSNormConfig & CausalSelfAttentionConfig;
|
|
13
14
|
export default class Block extends BaseLayer<BlockAttributes> {
|
|
@@ -2,7 +2,7 @@ import p from "./CausalSelfAttention.js";
|
|
|
2
2
|
import h from "./MLP.js";
|
|
3
3
|
import l from "./RMSNorm.js";
|
|
4
4
|
import m from "./BaseLayer.js";
|
|
5
|
-
import { k as n, t as u } from "../index-
|
|
5
|
+
import { k as n, t as u } from "../index-CUXkjxiT.js";
|
|
6
6
|
import { add16 as d } from "../ops/add16.js";
|
|
7
7
|
class _ extends m {
|
|
8
8
|
ln1;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { w as o } from "../index-CUXkjxiT.js";
|
|
2
2
|
import { p as h } from "../index-DmeWGGmS.js";
|
|
3
|
-
import { v as b } from "../variable-
|
|
3
|
+
import { v as b } from "../variable-Ck482e3n.js";
|
|
4
4
|
class d {
|
|
5
5
|
_variables = /* @__PURE__ */ new Map();
|
|
6
6
|
touchedVariables = /* @__PURE__ */ new Set();
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import p from "../tokeniser/CharTokeniser.js";
|
|
2
2
|
import _ from "../tokeniser/bpe.js";
|
|
3
3
|
import { load_safetensors as b } from "../utilities/safetensors.js";
|
|
4
|
-
import { aa as c } from "../index-
|
|
4
|
+
import { aa as c } from "../index-CUXkjxiT.js";
|
|
5
5
|
import { dummyPassAsync as u } from "../utilities/dummy.js";
|
|
6
6
|
import y from "../models/factory.js";
|
|
7
7
|
function h(e) {
|
|
@@ -28,15 +28,15 @@ function h(e) {
|
|
|
28
28
|
windowSize: e.windowSize
|
|
29
29
|
}, a;
|
|
30
30
|
}
|
|
31
|
-
async function G(e, a,
|
|
32
|
-
const m = h(e),
|
|
33
|
-
a.datasetID && (
|
|
34
|
-
const
|
|
35
|
-
for (const [
|
|
36
|
-
s.set(
|
|
31
|
+
async function G(e, a, r, n) {
|
|
32
|
+
const m = h(e), o = (a.type ?? "char") === "char" ? new p(a.vocab) : new _(a.vocab, a.merges);
|
|
33
|
+
a.datasetID && (o.datasetID = a.datasetID), a.id ? o.id = a.id : o.generateID();
|
|
34
|
+
const d = await b(n), s = /* @__PURE__ */ new Map();
|
|
35
|
+
for (const [i, l] of Object.entries(d))
|
|
36
|
+
s.set(i, [l]);
|
|
37
37
|
c();
|
|
38
|
-
const
|
|
39
|
-
return
|
|
38
|
+
const t = y(m);
|
|
39
|
+
return t.metaData = r, await u(t), t.weightStore.loadWeights(s, !!r.url), { model: t, tokeniser: o, metaData: r };
|
|
40
40
|
}
|
|
41
41
|
export {
|
|
42
42
|
G as default,
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
import { aa as y } from "../index-
|
|
2
|
-
import "../random_width-
|
|
3
|
-
import "../zeros_like-
|
|
1
|
+
import { aa as y } from "../index-CUXkjxiT.js";
|
|
2
|
+
import "../random_width-DN5ZtQkM.js";
|
|
3
|
+
import "../zeros_like-CWjDdwr-.js";
|
|
4
4
|
import "../Generator.js";
|
|
5
5
|
import "../index-Cp39cXWe.js";
|
|
6
|
-
import "../dataset-
|
|
6
|
+
import "../dataset-CGGp1z9P.js";
|
|
7
7
|
import "../ops/cpu/attentionMask.js";
|
|
8
8
|
import "../ops/webgl/attentionMask.js";
|
|
9
9
|
import "../ops/grads/attentionMask.js";
|
|
10
10
|
import "../ops/cpu/rope.js";
|
|
11
11
|
import "../ops/webgl/rope.js";
|
|
12
|
-
import "../rope-
|
|
12
|
+
import "../rope-DcrZM_e6.js";
|
|
13
13
|
import "../ops/cpu/appendCache.js";
|
|
14
14
|
import "../ops/webgl/appendCache.js";
|
|
15
15
|
import "../ops/grads/softmax16.js";
|
|
16
|
-
import "../matMul16-
|
|
16
|
+
import "../matMul16-BcVC_E62.js";
|
|
17
17
|
import "../ops/webgl/matMul16.js";
|
|
18
18
|
import "../ops/cpu/matMul16.js";
|
|
19
|
-
import "../pack16-
|
|
19
|
+
import "../pack16-bqltoUlR.js";
|
|
20
20
|
import "../ops/transpose16.js";
|
|
21
21
|
import "../ops/reshape16.js";
|
|
22
22
|
import "../ops/cpu/qkv.js";
|
|
@@ -44,11 +44,11 @@ import "../ops/cpu/scatterSub.js";
|
|
|
44
44
|
import "../ops/webgl/scatterSub.js";
|
|
45
45
|
import "../papaparse.min-C0cScC2i.js";
|
|
46
46
|
import "../ops/cpu/matMulGelu.js";
|
|
47
|
-
import "../matMulGelu-
|
|
47
|
+
import "../matMulGelu-JNLZqKQp.js";
|
|
48
48
|
import "../ops/grads/matMulGelu.js";
|
|
49
49
|
import "../ops/cpu/gelu.js";
|
|
50
50
|
import "../ops/webgl/gelu.js";
|
|
51
|
-
import "../gelu-
|
|
51
|
+
import "../gelu-B220X1Go.js";
|
|
52
52
|
import "../ops/webgl/log.js";
|
|
53
53
|
import "../checks/normRMS.js";
|
|
54
54
|
import "../checks/normRMSGrad.js";
|
package/dist/loader/save.js
CHANGED
|
@@ -1,31 +1,38 @@
|
|
|
1
1
|
import { z as y } from "../jszip.min-BZhlzntC.js";
|
|
2
|
-
import
|
|
3
|
-
import { save_safetensors as
|
|
4
|
-
import { VERSION as
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
2
|
+
import _ from "../tokeniser/CharTokeniser.js";
|
|
3
|
+
import { save_safetensors as b } from "../utilities/safetensors.js";
|
|
4
|
+
import { VERSION as h } from "./load.js";
|
|
5
|
+
function m(i) {
|
|
6
|
+
if (i.length > 1e3) {
|
|
7
|
+
const n = Math.ceil(i.length / 1e3);
|
|
8
|
+
return i.filter((o, a) => a % n === 0 || a === i.length - 1);
|
|
9
|
+
}
|
|
10
|
+
return i;
|
|
11
|
+
}
|
|
12
|
+
async function u(i, n, o, a) {
|
|
13
|
+
const s = /* @__PURE__ */ new Map();
|
|
14
|
+
i.weightStore.saveWeights(s);
|
|
15
|
+
const e = new y();
|
|
16
|
+
if (a?.optimizer) {
|
|
17
|
+
const t = await a.optimizer.saveMoments();
|
|
18
|
+
e.file("optimizer.safetensors", t, { binary: !0 }), e.file("optimizer_config.json", JSON.stringify(a.optimizer.serializeConfig()), {
|
|
12
19
|
binary: !1
|
|
13
20
|
});
|
|
14
21
|
}
|
|
15
|
-
|
|
22
|
+
a?.trainingLog && e.file("training_log.json", JSON.stringify(m(a.trainingLog), void 0, 4), {
|
|
16
23
|
binary: !1
|
|
17
24
|
});
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
25
|
+
const g = {};
|
|
26
|
+
s.forEach((t, f) => {
|
|
27
|
+
t.length === 1 && (g[f] = t[0]);
|
|
21
28
|
});
|
|
22
|
-
const c = await
|
|
23
|
-
|
|
29
|
+
const c = await b(g);
|
|
30
|
+
e.file("model.safetensors", c, { binary: !0 });
|
|
24
31
|
const p = i.config.modelType;
|
|
25
32
|
let r;
|
|
26
33
|
p === "GenAI_NanoGPT_v1" ? r = {
|
|
27
34
|
model_type: "GenAI_NanoGPT_v1",
|
|
28
|
-
vocab_size:
|
|
35
|
+
vocab_size: n.getVocab().length,
|
|
29
36
|
hidden_size: i.config.nEmbed,
|
|
30
37
|
num_hidden_layers: i.config.nLayer,
|
|
31
38
|
num_attention_heads: i.config.nHead,
|
|
@@ -34,7 +41,7 @@ async function d(i, e, o, t) {
|
|
|
34
41
|
useRope: i.config.useRope
|
|
35
42
|
} : r = {
|
|
36
43
|
model_type: "GenAI_NanoGPT_v2",
|
|
37
|
-
vocab_size:
|
|
44
|
+
vocab_size: n.getVocab().length,
|
|
38
45
|
hidden_size: i.config.nEmbed,
|
|
39
46
|
num_hidden_layers: i.config.nLayer,
|
|
40
47
|
num_attention_heads: i.config.nHead,
|
|
@@ -43,11 +50,11 @@ async function d(i, e, o, t) {
|
|
|
43
50
|
loraConfig: i.config.loraConfig ? Object.fromEntries(i.config.loraConfig) : void 0,
|
|
44
51
|
loraName: i.config.loraName,
|
|
45
52
|
windowSize: i.config.windowSize
|
|
46
|
-
},
|
|
53
|
+
}, e.file("config.json", JSON.stringify(r, void 0, 4), {
|
|
47
54
|
binary: !1
|
|
48
55
|
});
|
|
49
56
|
const l = {
|
|
50
|
-
version:
|
|
57
|
+
version: h,
|
|
51
58
|
application: "@genai-fi/nanogpt",
|
|
52
59
|
meta: o?.metadata,
|
|
53
60
|
name: o?.name,
|
|
@@ -59,24 +66,25 @@ async function d(i, e, o, t) {
|
|
|
59
66
|
generationSettings: i.metaData?.generationSettings || void 0,
|
|
60
67
|
actionLog: i.metaData?.actionLog || void 0
|
|
61
68
|
};
|
|
62
|
-
if (
|
|
69
|
+
if (e.file("meta.json", JSON.stringify(l, void 0, 4), {
|
|
63
70
|
binary: !1
|
|
64
|
-
}),
|
|
71
|
+
}), e.file(
|
|
65
72
|
"tokeniser.json",
|
|
66
73
|
JSON.stringify({
|
|
67
|
-
type:
|
|
68
|
-
vocab:
|
|
69
|
-
merges:
|
|
70
|
-
datasetID:
|
|
74
|
+
type: n instanceof _ ? "char" : "bpe",
|
|
75
|
+
vocab: n.getVocab(),
|
|
76
|
+
merges: n.getMerges(),
|
|
77
|
+
datasetID: n.datasetID,
|
|
78
|
+
id: n.id
|
|
71
79
|
}),
|
|
72
80
|
{
|
|
73
81
|
binary: !1
|
|
74
82
|
}
|
|
75
83
|
), o?.files)
|
|
76
|
-
for (const [
|
|
77
|
-
|
|
78
|
-
return
|
|
84
|
+
for (const [t, f] of Object.entries(o.files))
|
|
85
|
+
e.file(t, JSON.stringify(f), { binary: !1 });
|
|
86
|
+
return e.generateAsync({ type: "blob" });
|
|
79
87
|
}
|
|
80
88
|
export {
|
|
81
|
-
|
|
89
|
+
u as saveModel
|
|
82
90
|
};
|
package/dist/loader/types.d.ts
CHANGED