@genai-fi/nanogpt 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +10 -5
- package/dist/Generator.js +1789 -1765
- package/dist/{RealDiv-KAPDe8zB.js → RealDiv-Ds-jvL09.js} +22 -22
- package/dist/{Reshape-BYkmUnAv.js → Reshape-Cd6e-Otn.js} +1 -1
- package/dist/{Reshape-Zt6eb7yh.js → Reshape-Ct266DEk.js} +9 -9
- package/dist/TeachableLLM.d.ts +4 -3
- package/dist/TeachableLLM.js +14 -14
- package/dist/Trainer.d.ts +2 -2
- package/dist/Trainer.js +6 -6
- package/dist/{axis_util-BaG7mf5A.js → axis_util-DofAuy0p.js} +3 -3
- package/dist/backend.js +2 -2
- package/dist/{backend_util-RCe-rHaj.js → backend_util-C7NWHpv7.js} +7 -7
- package/dist/{backend_webgpu-DE3ACOLx.js → backend_webgpu-B0Vls736.js} +10 -10
- package/dist/{broadcast_to-B3eYlZm7.js → broadcast_to-DDaNMbX7.js} +2 -2
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +2 -2
- package/dist/checks/normRMS.js +4 -4
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/packUnpack.js +2 -2
- package/dist/checks/qkv.js +4 -4
- package/dist/checks/rope.js +2 -2
- package/dist/{clip_by_value-BnO7-a88.js → clip_by_value-Dn5tzexi.js} +4 -4
- package/dist/complex-DClmWqJt.js +11 -0
- package/dist/{concat-BV8bt5H-.js → concat-C6X3AAlQ.js} +1 -1
- package/dist/{concat_util-DpW8mL_l.js → concat_util-CHsJFZJJ.js} +1 -1
- package/dist/{dataset-BcwmTGYc.js → dataset-DcjWqUVQ.js} +7 -7
- package/dist/{dropout-BcvN9JYi.js → dropout-OxuaJz6z.js} +11 -11
- package/dist/{expand_dims-DT4tEPwA.js → expand_dims-BzfJK2uc.js} +3 -3
- package/dist/{exports_initializers-Hta_rEnm.js → exports_initializers-eS9QJ6ut.js} +1 -1
- package/dist/{floor-D5QdR_le.js → floor-DIb-lN_u.js} +1 -1
- package/dist/gather-BcO5UQNJ.js +9 -0
- package/dist/{gelu-CjNPL4OH.js → gelu-DqTbCx5x.js} +1 -1
- package/dist/{gpgpu_math-DAOmgtXR.js → gpgpu_math-CJcbnKPC.js} +2 -2
- package/dist/{index-DOvlwCh-.js → index-D0RBWjq8.js} +52 -52
- package/dist/{index-BwexR4lA.js → index-Dj5TkmPY.js} +89 -89
- package/dist/{kernel_funcs_utils-CCzYdUZg.js → kernel_funcs_utils-CSaumNDs.js} +11 -11
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +4 -4
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +6 -6
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +17 -17
- package/dist/log_sum_exp-VLZgbFAH.js +39 -0
- package/dist/main.d.ts +1 -1
- package/dist/main.js +9 -9
- package/dist/{matMul16-BWRSOCWB.js → matMul16-cDxwemKj.js} +7 -7
- package/dist/{matMulGelu-CzfgT6Wq.js → matMulGelu-B2s_80-H.js} +18 -18
- package/dist/{mat_mul-SjpJRLyL.js → mat_mul-DxpNTCRz.js} +3 -3
- package/dist/{mod-AnXEvvpo.js → mod-PrOKlFxH.js} +1 -1
- package/dist/models/NanoGPTV1.js +2 -2
- package/dist/models/model.js +9 -9
- package/dist/{ones-D2rT0xk2.js → ones-BX_wEgzB.js} +3 -3
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/add16.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/concat16.js +2 -2
- package/dist/ops/cpu/adamAdjust.js +6 -6
- package/dist/ops/cpu/adamMoments.js +2 -2
- package/dist/ops/cpu/appendCache.js +5 -5
- package/dist/ops/cpu/attentionMask.js +10 -10
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +6 -6
- package/dist/ops/cpu/gelu.js +9 -9
- package/dist/ops/cpu/matMul16.js +2 -2
- package/dist/ops/cpu/matMulGelu.js +3 -3
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +3 -3
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +9 -9
- package/dist/ops/cpu/scatterSub.js +11 -11
- package/dist/ops/dot16.js +2 -2
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/add16.js +4 -4
- package/dist/ops/grads/attentionMask.js +2 -2
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMul16.js +3 -3
- package/dist/ops/grads/matMulGelu.js +3 -3
- package/dist/ops/grads/normRMS.js +7 -7
- package/dist/ops/grads/pack16.js +3 -3
- package/dist/ops/grads/qkv.js +6 -6
- package/dist/ops/grads/rope.js +2 -2
- package/dist/ops/grads/softmax16.js +1 -1
- package/dist/ops/grads/unpack16.js +2 -2
- package/dist/ops/matMul16.js +3 -3
- package/dist/ops/matMulGelu.js +2 -2
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mul16.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/pack16.js +2 -2
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/reshape16.js +6 -6
- package/dist/ops/rope.js +2 -2
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/slice16.js +2 -2
- package/dist/ops/softmax16.js +1 -1
- package/dist/ops/sub16.js +1 -1
- package/dist/ops/sum16.js +2 -2
- package/dist/ops/transpose16.js +3 -3
- package/dist/ops/unpack16.js +2 -2
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +4 -4
- package/dist/ops/webgl/fusedSoftmax.js +6 -6
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMul16.js +11 -11
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +7 -7
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +7 -7
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +4 -4
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/add16.js +1 -1
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +5 -5
- package/dist/ops/webgpu/attentionMask32_program.js +2 -2
- package/dist/ops/webgpu/concat16.js +5 -5
- package/dist/ops/webgpu/gatherSub.js +5 -5
- package/dist/ops/webgpu/gelu.js +3 -3
- package/dist/ops/webgpu/matMul16.js +18 -18
- package/dist/ops/webgpu/matMul16_program.js +2 -2
- package/dist/ops/webgpu/mul16.js +4 -4
- package/dist/ops/webgpu/normRMS.js +6 -6
- package/dist/ops/webgpu/normRMSGrad.js +4 -4
- package/dist/ops/webgpu/pack16.js +1 -1
- package/dist/ops/webgpu/pack16_program.js +2 -2
- package/dist/ops/webgpu/qkv.js +6 -6
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/slice16.js +4 -4
- package/dist/ops/webgpu/softmax16.js +2 -2
- package/dist/ops/webgpu/softmax16_program.js +2 -2
- package/dist/ops/webgpu/softmax16_subgroup_program.js +2 -2
- package/dist/ops/webgpu/softmax16grad.js +1 -1
- package/dist/ops/webgpu/sub16.js +4 -4
- package/dist/ops/webgpu/sum16.js +6 -6
- package/dist/ops/webgpu/transpose16.js +2 -2
- package/dist/ops/webgpu/transpose16_program.js +2 -2
- package/dist/ops/webgpu/transpose16_shared_program.js +3 -3
- package/dist/ops/webgpu/unpack16.js +3 -3
- package/dist/ops/webgpu/utils/binary_op.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +4 -4
- package/dist/{ops-B5yanEdW.js → ops-FJapAPfm.js} +56 -56
- package/dist/{pack16-nQ6JaLo-.js → pack16-k4jq6aMX.js} +7 -7
- package/dist/patches/webgpu_backend.js +7 -7
- package/dist/patches/webgpu_base.js +1 -1
- package/dist/patches/webgpu_program.js +8 -8
- package/dist/{random_width-or-CEftb.js → random_width-UGQn4OWb.js} +33 -33
- package/dist/range-CuGvVN2c.js +10 -0
- package/dist/{relu-CP0ZcxWO.js → relu-Cf80uA2p.js} +1 -1
- package/dist/{reshape-ByE68wS9.js → reshape-CkjKPPqB.js} +1 -1
- package/dist/{resize_nearest_neighbor-B19mCEg2.js → resize_nearest_neighbor-DB8k9KN_.js} +43 -43
- package/dist/{rope-Ir4mTyD1.js → rope-BmZmp9uP.js} +1 -1
- package/dist/{scatter_nd_util-lvSiX8q4.js → scatter_nd_util-BY22Cc-C.js} +1 -1
- package/dist/{selu_util-kbhpTdYD.js → selu_util-BuLbmbrl.js} +5 -5
- package/dist/{shared-DT1TkE6w.js → shared-B7USJZgw.js} +1 -1
- package/dist/{shared-dntlHIDQ.js → shared-BQboIImQ.js} +86 -86
- package/dist/{slice-BfEGSH82.js → slice-Aqy7KbJh.js} +3 -3
- package/dist/{slice_util-uTKwiEpW.js → slice_util-D8CQRenR.js} +7 -7
- package/dist/{softmax-CA5jFsLR.js → softmax-faLoUZVT.js} +1 -1
- package/dist/{split-CVLc0w--.js → split-BNz5jcGc.js} +3 -3
- package/dist/{squeeze-C7Z2srUo.js → squeeze--YMgaAAf.js} +2 -2
- package/dist/{stack-Cf4n9h0N.js → stack-WJK22CFn.js} +1 -1
- package/dist/{step-CINUs5QB.js → step-dXR33iOg.js} +32 -32
- package/dist/sum-BdplSvq_.js +11 -0
- package/dist/tensor-BQqrDvpx.js +8 -0
- package/dist/tensor1d-LxP9asMm.js +11 -0
- package/dist/{tensor2d-Bs9wZRc7.js → tensor2d-BN1sSfQO.js} +3 -3
- package/dist/{tensor4d-BARPdTaS.js → tensor4d-DVwr7pLF.js} +1 -1
- package/dist/{tfjs_backend-y1cvNhLA.js → tfjs_backend-Vi4JfLzT.js} +28 -28
- package/dist/{tile-mbfagpsB.js → tile-CvN_LyVr.js} +4 -4
- package/dist/tokeniser/BaseTokeniser.d.ts +27 -0
- package/dist/tokeniser/BaseTokeniser.js +94 -0
- package/dist/tokeniser/CharTokeniser.d.ts +4 -3
- package/dist/tokeniser/CharTokeniser.js +46 -32
- package/dist/tokeniser/bpe.d.ts +4 -3
- package/dist/tokeniser/bpe.js +60 -45
- package/dist/tokeniser/type.d.ts +11 -0
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.d.ts +2 -2
- package/dist/training/DatasetBuilder.js +32 -36
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.d.ts +3 -3
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +3 -3
- package/dist/{transpose-ClWiBS_b.js → transpose-JawVKyZy.js} +5 -5
- package/dist/{unsorted_segment_sum-BDDhB_E6.js → unsorted_segment_sum-LAbmE9G4.js} +78 -78
- package/dist/utilities/dummy.js +3 -3
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/packed.js +1 -1
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/sentences.js +5 -5
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-WawDEaAb.js → variable-DQ9yYgEU.js} +1 -1
- package/dist/{webgpu_program-DuOXPQol.js → webgpu_program-CAE4RICo.js} +3 -3
- package/dist/{webgpu_util-RxEF33Rj.js → webgpu_util-BdovYhXr.js} +1 -1
- package/dist/{zeros-KnWaWf-X.js → zeros-DeiE2zTa.js} +2 -2
- package/dist/{zeros_like-DvE73F4e.js → zeros_like-BAz3iKru.js} +77 -77
- package/package.json +1 -1
- package/dist/complex-DjxcVmoX.js +0 -11
- package/dist/gather-D3JcZUaI.js +0 -9
- package/dist/log_sum_exp-ngO0-4pK.js +0 -39
- package/dist/range-BklejeeW.js +0 -10
- package/dist/sum-DWAtNGez.js +0 -11
- package/dist/tensor-DJoc7gJU.js +0 -8
- package/dist/tensor1d-D11P_7Dp.js +0 -11
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { t as c, al as u, b as i } from "../index-
|
|
1
|
+
import { t as c, al as u, b as i } from "../index-D0RBWjq8.js";
|
|
2
2
|
import f from "./BaseLayer.js";
|
|
3
|
-
import { E as g, D as h } from "../random_width-
|
|
4
|
-
import { r as l } from "../exports_initializers-
|
|
5
|
-
import { m as b } from "../mod-
|
|
6
|
-
import { r as w } from "../range-
|
|
3
|
+
import { E as g, D as h } from "../random_width-UGQn4OWb.js";
|
|
4
|
+
import { r as l } from "../exports_initializers-eS9QJ6ut.js";
|
|
5
|
+
import { m as b } from "../mod-PrOKlFxH.js";
|
|
6
|
+
import { r as w } from "../range-CuGvVN2c.js";
|
|
7
7
|
function E(t) {
|
|
8
8
|
return new h(t);
|
|
9
9
|
}
|
package/dist/layers/RMSNorm.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { t as s } from "../index-
|
|
1
|
+
import { t as s } from "../index-D0RBWjq8.js";
|
|
2
2
|
import e from "./BaseLayer.js";
|
|
3
3
|
import { normRMS as a } from "../ops/normRMS.js";
|
|
4
|
-
import { v as i } from "../variable-
|
|
5
|
-
import { o as m } from "../ones-
|
|
4
|
+
import { v as i } from "../variable-DQ9yYgEU.js";
|
|
5
|
+
import { o as m } from "../ones-BX_wEgzB.js";
|
|
6
6
|
class l extends e {
|
|
7
7
|
GAMMA;
|
|
8
8
|
constructor(r, t = "", o) {
|
package/dist/layers/RoPECache.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as l } from "../zeros_like-
|
|
3
|
-
import { c as f, s as m } from "../unsorted_segment_sum-
|
|
4
|
-
import { r as h } from "../range-
|
|
1
|
+
import { l as a, b as n, p as c, t as p, k as r } from "../index-D0RBWjq8.js";
|
|
2
|
+
import { r as l } from "../zeros_like-BAz3iKru.js";
|
|
3
|
+
import { c as f, s as m } from "../unsorted_segment_sum-LAbmE9G4.js";
|
|
4
|
+
import { r as h } from "../range-CuGvVN2c.js";
|
|
5
5
|
class D {
|
|
6
6
|
constructor(s) {
|
|
7
7
|
this.config = s;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import "../random_width-
|
|
2
|
-
import "../index-
|
|
3
|
-
import { r as s } from "../exports_initializers-
|
|
1
|
+
import "../random_width-UGQn4OWb.js";
|
|
2
|
+
import "../index-D0RBWjq8.js";
|
|
3
|
+
import { r as s } from "../exports_initializers-eS9QJ6ut.js";
|
|
4
4
|
import a from "./BaseLayer.js";
|
|
5
5
|
import { dot16 as o } from "../ops/dot16.js";
|
|
6
6
|
import { isPackedTensor as r } from "../utilities/packed.js";
|
|
7
|
-
import { p as m } from "../pack16-
|
|
7
|
+
import { p as m } from "../pack16-k4jq6aMX.js";
|
|
8
8
|
import { transpose16 as d } from "../ops/transpose16.js";
|
|
9
|
-
import { v as p } from "../variable-
|
|
10
|
-
import { g as h } from "../gather-
|
|
9
|
+
import { v as p } from "../variable-DQ9yYgEU.js";
|
|
10
|
+
import { g as h } from "../gather-BcO5UQNJ.js";
|
|
11
11
|
class g extends a {
|
|
12
12
|
vocabSize;
|
|
13
13
|
embedDim;
|
|
@@ -2,7 +2,7 @@ import p from "./CausalSelfAttention.js";
|
|
|
2
2
|
import m from "./MLP.js";
|
|
3
3
|
import d from "./RMSNorm.js";
|
|
4
4
|
import h from "./BaseLayer.js";
|
|
5
|
-
import { k as n, t as u } from "../index-
|
|
5
|
+
import { k as n, t as u } from "../index-D0RBWjq8.js";
|
|
6
6
|
import { add16 as l } from "../ops/add16.js";
|
|
7
7
|
class g extends h {
|
|
8
8
|
ln1;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import l from "../tokeniser/CharTokeniser.js";
|
|
2
2
|
import c from "../tokeniser/bpe.js";
|
|
3
3
|
import { load_safetensors as b } from "../utilities/safetensors.js";
|
|
4
|
-
import {
|
|
4
|
+
import { aa as y } from "../index-D0RBWjq8.js";
|
|
5
5
|
import { dummyPassAsync as u } from "../utilities/dummy.js";
|
|
6
6
|
import _ from "../models/factory.js";
|
|
7
7
|
async function L(e, a, r, t) {
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import "../random_width-
|
|
3
|
-
import "../zeros_like-
|
|
1
|
+
import { aa as y } from "../index-D0RBWjq8.js";
|
|
2
|
+
import "../random_width-UGQn4OWb.js";
|
|
3
|
+
import "../zeros_like-BAz3iKru.js";
|
|
4
4
|
import "../Generator.js";
|
|
5
5
|
import "../index-Cp39cXWe.js";
|
|
6
|
-
import "../dataset-
|
|
6
|
+
import "../dataset-DcjWqUVQ.js";
|
|
7
7
|
import "../ops/cpu/attentionMask.js";
|
|
8
8
|
import "../ops/webgl/attentionMask.js";
|
|
9
9
|
import "../ops/grads/attentionMask.js";
|
|
10
10
|
import "../ops/cpu/rope.js";
|
|
11
11
|
import "../ops/webgl/rope.js";
|
|
12
|
-
import "../rope-
|
|
12
|
+
import "../rope-BmZmp9uP.js";
|
|
13
13
|
import "../ops/cpu/appendCache.js";
|
|
14
14
|
import "../ops/webgl/appendCache.js";
|
|
15
15
|
import "../ops/grads/softmax16.js";
|
|
16
|
-
import "../matMul16-
|
|
16
|
+
import "../matMul16-cDxwemKj.js";
|
|
17
17
|
import "../ops/webgl/matMul16.js";
|
|
18
18
|
import "../ops/cpu/matMul16.js";
|
|
19
|
-
import "../pack16-
|
|
19
|
+
import "../pack16-k4jq6aMX.js";
|
|
20
20
|
import "../ops/transpose16.js";
|
|
21
21
|
import "../ops/reshape16.js";
|
|
22
22
|
import "../ops/cpu/qkv.js";
|
|
@@ -42,26 +42,26 @@ import "../ops/cpu/adamMoments.js";
|
|
|
42
42
|
import "../ops/webgl/adamMoments.js";
|
|
43
43
|
import "../papaparse.min-C0cScC2i.js";
|
|
44
44
|
import "../ops/cpu/matMulGelu.js";
|
|
45
|
-
import "../matMulGelu-
|
|
45
|
+
import "../matMulGelu-B2s_80-H.js";
|
|
46
46
|
import "../ops/grads/matMulGelu.js";
|
|
47
47
|
import "../ops/cpu/gelu.js";
|
|
48
48
|
import "../ops/webgl/gelu.js";
|
|
49
|
-
import "../gelu-
|
|
49
|
+
import "../gelu-DqTbCx5x.js";
|
|
50
50
|
import "../ops/webgl/log.js";
|
|
51
51
|
import "../checks/normRMS.js";
|
|
52
52
|
import "../checks/normRMSGrad.js";
|
|
53
53
|
import { importWeights as u } from "../utilities/weights.js";
|
|
54
54
|
async function Mt(r) {
|
|
55
|
-
const e = /* @__PURE__ */ new Map(),
|
|
56
|
-
if (!
|
|
55
|
+
const e = /* @__PURE__ */ new Map(), a = await r.file("manifest.json")?.async("string");
|
|
56
|
+
if (!a)
|
|
57
57
|
throw new Error("Manifest file not found in the zip archive");
|
|
58
|
-
const
|
|
59
|
-
for (const [t, o] of Object.entries(
|
|
58
|
+
const p = JSON.parse(a);
|
|
59
|
+
for (const [t, o] of Object.entries(p.weightSpec))
|
|
60
60
|
e.set(t, { spec: o, data: new Float32Array() });
|
|
61
|
-
const
|
|
62
|
-
if (!
|
|
61
|
+
const s = await r.file("tokeniser.json")?.async("string");
|
|
62
|
+
if (!s)
|
|
63
63
|
throw new Error("Tokeniser file not found in the zip archive");
|
|
64
|
-
const i = JSON.parse(
|
|
64
|
+
const i = JSON.parse(s), f = (i.type ?? "char") === "char" ? new h(i.vocab) : new k(i.vocab, i.merges), c = /* @__PURE__ */ new Map();
|
|
65
65
|
for (const t of Object.keys(r.files))
|
|
66
66
|
if (t.endsWith(".bin")) {
|
|
67
67
|
const o = t.replace(".bin", ""), l = await r.file(t).async("arraybuffer"), w = new Float32Array(l), n = e.get(o) || { spec: [], data: new Float32Array() };
|
|
@@ -70,7 +70,7 @@ async function Mt(r) {
|
|
|
70
70
|
c.set(o, d);
|
|
71
71
|
}
|
|
72
72
|
y();
|
|
73
|
-
const m = b(
|
|
73
|
+
const m = b(p.config);
|
|
74
74
|
return await g(m), m.loadWeights(c), { model: m, tokeniser: f };
|
|
75
75
|
}
|
|
76
76
|
export {
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { q as r, u as x, E as p, am as E, an as $, ao as d, af as h, c as S, n as K } from "./index-D0RBWjq8.js";
|
|
2
|
+
import { e as _ } from "./axis_util-DofAuy0p.js";
|
|
3
|
+
import { r as m } from "./reshape-CkjKPPqB.js";
|
|
4
|
+
import { s as T } from "./sum-BdplSvq_.js";
|
|
5
|
+
function b(s, o = null, n = !1) {
|
|
6
|
+
const a = { x: x(s, "x", "max") }, e = { reductionIndices: o, keepDims: n };
|
|
7
|
+
return p.runKernel(E, a, e);
|
|
8
|
+
}
|
|
9
|
+
const I = /* @__PURE__ */ r({ max_: b });
|
|
10
|
+
function M(s) {
|
|
11
|
+
const n = { x: x(s, "x", "exp") };
|
|
12
|
+
return p.runKernel($, n);
|
|
13
|
+
}
|
|
14
|
+
const N = /* @__PURE__ */ r({ exp_: M });
|
|
15
|
+
function q(s) {
|
|
16
|
+
const n = { x: x(s, "x", "log", "float32") };
|
|
17
|
+
return p.runKernel(d, n);
|
|
18
|
+
}
|
|
19
|
+
const v = /* @__PURE__ */ r({ log_: q });
|
|
20
|
+
function w(s, o = null, n = !1) {
|
|
21
|
+
const t = x(s, "x", "logSumExp"), a = h(o, t.shape), e = I(
|
|
22
|
+
t,
|
|
23
|
+
a,
|
|
24
|
+
!0
|
|
25
|
+
/* keepDims */
|
|
26
|
+
), l = S(t, e), i = N(l), f = T(i, a), u = v(f), c = K(m(e, u.shape), u);
|
|
27
|
+
if (n) {
|
|
28
|
+
const g = _(c.shape, a);
|
|
29
|
+
return m(c, g);
|
|
30
|
+
}
|
|
31
|
+
return c;
|
|
32
|
+
}
|
|
33
|
+
const P = /* @__PURE__ */ r({ logSumExp_: w });
|
|
34
|
+
export {
|
|
35
|
+
v as a,
|
|
36
|
+
N as e,
|
|
37
|
+
P as l,
|
|
38
|
+
I as m
|
|
39
|
+
};
|
package/dist/main.d.ts
CHANGED
|
@@ -14,7 +14,7 @@ export { default as Generator } from './Generator';
|
|
|
14
14
|
export type { ITrainerOptions } from './Trainer';
|
|
15
15
|
export type { IGenerateOptions } from './Generator';
|
|
16
16
|
export { type ModelForwardAttributes, default as Model } from './models/model';
|
|
17
|
-
export type { ITokeniser } from './tokeniser/type';
|
|
17
|
+
export type { ITokeniser, Conversation, Roles } from './tokeniser/type';
|
|
18
18
|
export type { TrainingProgress, TrainingLogEntry } from './training/Trainer';
|
|
19
19
|
export type { GPTConfig } from './models/config';
|
|
20
20
|
export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
|
package/dist/main.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import "./index-
|
|
2
|
-
import "./random_width-
|
|
3
|
-
import "./zeros_like-
|
|
1
|
+
import "./index-D0RBWjq8.js";
|
|
2
|
+
import "./random_width-UGQn4OWb.js";
|
|
3
|
+
import "./zeros_like-BAz3iKru.js";
|
|
4
4
|
import { default as oo } from "./Generator.js";
|
|
5
5
|
import "./index-Cp39cXWe.js";
|
|
6
|
-
import "./dataset-
|
|
6
|
+
import "./dataset-DcjWqUVQ.js";
|
|
7
7
|
import { default as to } from "./models/NanoGPTV1.js";
|
|
8
8
|
import { default as mo } from "./TeachableLLM.js";
|
|
9
9
|
import { default as io } from "./tokeniser/CharTokeniser.js";
|
|
@@ -25,15 +25,15 @@ import "./ops/webgl/qkv.js";
|
|
|
25
25
|
import "./ops/grads/qkv.js";
|
|
26
26
|
import "./ops/cpu/rope.js";
|
|
27
27
|
import "./ops/webgl/rope.js";
|
|
28
|
-
import "./rope-
|
|
28
|
+
import "./rope-BmZmp9uP.js";
|
|
29
29
|
import "./ops/cpu/appendCache.js";
|
|
30
30
|
import "./ops/webgl/appendCache.js";
|
|
31
31
|
import "./ops/cpu/matMulGelu.js";
|
|
32
|
-
import "./matMulGelu-
|
|
32
|
+
import "./matMulGelu-B2s_80-H.js";
|
|
33
33
|
import "./ops/grads/matMulGelu.js";
|
|
34
34
|
import "./ops/cpu/gelu.js";
|
|
35
35
|
import "./ops/webgl/gelu.js";
|
|
36
|
-
import "./gelu-
|
|
36
|
+
import "./gelu-DqTbCx5x.js";
|
|
37
37
|
import "./ops/cpu/normRMS.js";
|
|
38
38
|
import "./ops/webgl/normRMS.js";
|
|
39
39
|
import "./ops/grads/normRMS.js";
|
|
@@ -42,9 +42,9 @@ import "./ops/cpu/adamMoments.js";
|
|
|
42
42
|
import "./ops/webgl/adamMoments.js";
|
|
43
43
|
import "./ops/cpu/adamAdjust.js";
|
|
44
44
|
import "./ops/webgl/adamAdjust.js";
|
|
45
|
-
import { u as o, p as r } from "./pack16-
|
|
45
|
+
import { u as o, p as r } from "./pack16-k4jq6aMX.js";
|
|
46
46
|
import "./ops/grads/softmax16.js";
|
|
47
|
-
import "./matMul16-
|
|
47
|
+
import "./matMul16-cDxwemKj.js";
|
|
48
48
|
import "./ops/webgl/matMul16.js";
|
|
49
49
|
import "./ops/cpu/matMul16.js";
|
|
50
50
|
import "./ops/transpose16.js";
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { j as y, e as h } from "./index-D0RBWjq8.js";
|
|
2
2
|
import "./ops/webgl/matMul16.js";
|
|
3
3
|
import "./ops/cpu/matMul16.js";
|
|
4
4
|
import { isPackedTensor as g } from "./utilities/packed.js";
|
|
5
|
-
import { p as v } from "./pack16-
|
|
6
|
-
import { d as k } from "./gelu-
|
|
5
|
+
import { p as v } from "./pack16-k4jq6aMX.js";
|
|
6
|
+
import { d as k } from "./gelu-DqTbCx5x.js";
|
|
7
7
|
import { transpose16 as S } from "./ops/transpose16.js";
|
|
8
8
|
import { reshape16 as w } from "./ops/reshape16.js";
|
|
9
9
|
const G = {
|
|
@@ -54,7 +54,7 @@ function l(r, o, n = !1, s = !1, t = {}) {
|
|
|
54
54
|
const e = g(r), f = g(o), i = e || f, a = !i || e ? r : v(r), p = !i || f ? o : v(o), c = h().runKernel("MatMul16", { A: a, B: p }, { transposeA: n, transposeB: s, ...t });
|
|
55
55
|
return i && !e && a.dispose(), i && !f && p.dispose(), c;
|
|
56
56
|
}
|
|
57
|
-
function
|
|
57
|
+
function F(r, o, n, s = !1, t = !1) {
|
|
58
58
|
return l(r, o, s, t, { scale: n });
|
|
59
59
|
}
|
|
60
60
|
function B(r, o, n, s = !1, t = !1) {
|
|
@@ -63,14 +63,14 @@ function B(r, o, n, s = !1, t = !1) {
|
|
|
63
63
|
function M(r, o, n, s = !1, t = !1) {
|
|
64
64
|
return l(r, o, s, t, { scaleB: n });
|
|
65
65
|
}
|
|
66
|
-
function
|
|
66
|
+
function K(r, o, n = !1, s = !1) {
|
|
67
67
|
return l(r, o, n, s, { activation: "gelu" });
|
|
68
68
|
}
|
|
69
69
|
export {
|
|
70
70
|
G as a,
|
|
71
71
|
l as b,
|
|
72
|
-
|
|
72
|
+
K as c,
|
|
73
73
|
B as d,
|
|
74
74
|
M as e,
|
|
75
|
-
|
|
75
|
+
F as m
|
|
76
76
|
};
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as M } from "./Reshape-
|
|
3
|
-
import { u as H } from "./gpgpu_math-
|
|
4
|
-
import { m as B } from "./mat_mul-
|
|
1
|
+
import { h as C, t as R, e as I, V as G, $ as L, y as F, ak as U } from "./index-D0RBWjq8.js";
|
|
2
|
+
import { r as M } from "./Reshape-Ct266DEk.js";
|
|
3
|
+
import { u as H } from "./gpgpu_math-CJcbnKPC.js";
|
|
4
|
+
import { m as B } from "./mat_mul-DxpNTCRz.js";
|
|
5
5
|
class W {
|
|
6
6
|
constructor(e, s, a, n = !1, o = !1, r = !1, i = null, u = !1, l = !1) {
|
|
7
7
|
this.variableNames = ["matrixA", "matrixB"], this.packedInputs = !0, this.packedOutput = !0, this.outputShape = a, this.enableShapeUniforms = H(this.outputShape.length);
|
|
8
8
|
const p = n ? e[1] : e[2], h = Math.ceil(p / 2), d = n ? "i * 2, rc.y" : "rc.y, i * 2", x = o ? "rc.z, i * 2" : "i * 2, rc.z", b = n ? ["a.xxyy", "a.zzww"] : ["a.xxzz", "a.yyww"], m = o ? ["b.xzxz", "b.ywyw"] : ["b.xyxy", "b.zwzw"];
|
|
9
|
-
let c = "",
|
|
9
|
+
let c = "", $ = "";
|
|
10
10
|
i && (u ? c = `vec4 activation(vec4 a) {
|
|
11
11
|
vec4 b = getPreluActivationWeightsAtOutCoords();
|
|
12
12
|
${i}
|
|
@@ -15,8 +15,8 @@ class W {
|
|
|
15
15
|
${i}
|
|
16
16
|
}` : c = `vec4 activation(vec4 x) {
|
|
17
17
|
${i}
|
|
18
|
-
}`,
|
|
19
|
-
const
|
|
18
|
+
}`, $ = "result = activation(result);");
|
|
19
|
+
const g = r ? "result += getBiasAtOutCoords();" : "";
|
|
20
20
|
r && this.variableNames.push("bias"), u && this.variableNames.push("preluActivationWeights"), l && this.variableNames.push("leakyreluAlpha");
|
|
21
21
|
let f = "rc.x", v = "rc.x";
|
|
22
22
|
e[0] < s[0] ? f = `imod(rc.x, ${e[0]})` : s[0] < e[0] && (v = `imod(rc.x, ${s[0]})`), this.userCode = `
|
|
@@ -44,10 +44,10 @@ class W {
|
|
|
44
44
|
ivec3 rc = getOutputCoords();
|
|
45
45
|
vec4 result = dot2x2ARowBCol(rc);
|
|
46
46
|
|
|
47
|
-
${$}
|
|
48
|
-
|
|
49
47
|
${g}
|
|
50
48
|
|
|
49
|
+
${$}
|
|
50
|
+
|
|
51
51
|
setOutput(result);
|
|
52
52
|
}
|
|
53
53
|
`;
|
|
@@ -90,7 +90,7 @@ function O({
|
|
|
90
90
|
activationSnippet: o,
|
|
91
91
|
multiplier: r
|
|
92
92
|
}) {
|
|
93
|
-
const i = t.shape.length, u = e.shape.length, l = s ? t.shape[i - 2] : t.shape[i - 1], p = a ? e.shape[u - 1] : e.shape[u - 2], h = s ? t.shape[i - 1] : t.shape[i - 2], d = a ? e.shape[u - 2] : e.shape[u - 1], x = t.shape.slice(0, -2), b = e.shape.slice(0, -2), m = G(x), c = G(b),
|
|
93
|
+
const i = t.shape.length, u = e.shape.length, l = s ? t.shape[i - 2] : t.shape[i - 1], p = a ? e.shape[u - 1] : e.shape[u - 2], h = s ? t.shape[i - 1] : t.shape[i - 2], d = a ? e.shape[u - 2] : e.shape[u - 1], x = t.shape.slice(0, -2), b = e.shape.slice(0, -2), m = G(x), c = G(b), g = L(t.shape.slice(0, -2), e.shape.slice(0, -2)).concat([h, d]);
|
|
94
94
|
F(
|
|
95
95
|
l === p,
|
|
96
96
|
() => `Error in matMul: inner shapes (${l}) and (${p}) of Tensors with shapes ${t.shape} and ${e.shape} and transposeA=${s} and transposeB=${a} must match.`
|
|
@@ -107,13 +107,13 @@ function O({
|
|
|
107
107
|
!1
|
|
108
108
|
), D = [A, y];
|
|
109
109
|
r && D.push(r);
|
|
110
|
-
const z = n.runWebGLProgram(_, D, T), K = M({ inputs: { x: z }, backend: n, attrs: { shape:
|
|
110
|
+
const z = n.runWebGLProgram(_, D, T), K = M({ inputs: { x: z }, backend: n, attrs: { shape: g } });
|
|
111
111
|
k.push(z);
|
|
112
112
|
for (const P of k)
|
|
113
113
|
n.disposeIntermediateTensorInfo(P);
|
|
114
114
|
return K;
|
|
115
115
|
}
|
|
116
|
-
function
|
|
116
|
+
function V(t) {
|
|
117
117
|
const { inputs: e, backend: s } = t, { x: a, kernel: n } = e;
|
|
118
118
|
if (a === void 0 || n === void 0)
|
|
119
119
|
throw new Error("BatchMatMul requires two input tensors.");
|
|
@@ -126,13 +126,13 @@ function J(t) {
|
|
|
126
126
|
activationSnippet: j
|
|
127
127
|
});
|
|
128
128
|
}
|
|
129
|
-
const
|
|
129
|
+
const J = {
|
|
130
130
|
kernelName: "MatMulGelu",
|
|
131
131
|
backendName: "webgl",
|
|
132
|
-
kernelFunc:
|
|
132
|
+
kernelFunc: V
|
|
133
133
|
};
|
|
134
|
-
C(
|
|
135
|
-
function
|
|
134
|
+
C(J);
|
|
135
|
+
function Q(t) {
|
|
136
136
|
const { dy: e, x: s, kernel: a } = t.inputs, n = t.backend;
|
|
137
137
|
return R(() => {
|
|
138
138
|
const o = I().makeTensorFromTensorInfo(
|
|
@@ -152,12 +152,12 @@ function V(t) {
|
|
|
152
152
|
const X = {
|
|
153
153
|
kernelName: "MatMulGeluGrad",
|
|
154
154
|
backendName: "webgl",
|
|
155
|
-
kernelFunc:
|
|
155
|
+
kernelFunc: Q
|
|
156
156
|
};
|
|
157
157
|
C(X);
|
|
158
158
|
export {
|
|
159
159
|
W as M,
|
|
160
160
|
se as a,
|
|
161
161
|
O as b,
|
|
162
|
-
|
|
162
|
+
V as c
|
|
163
163
|
};
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { q as m, u as s, D as c, E as M, H as p } from "./index-D0RBWjq8.js";
|
|
2
2
|
function f(e, n, o = !1, l = !1) {
|
|
3
3
|
let a = s(e, "a", "matMul"), t = s(n, "b", "matMul");
|
|
4
|
-
[a, t] =
|
|
4
|
+
[a, t] = c(a, t);
|
|
5
5
|
const r = { a, b: t }, u = { transposeA: o, transposeB: l };
|
|
6
|
-
return
|
|
6
|
+
return M.runKernel(p, r, u);
|
|
7
7
|
}
|
|
8
8
|
const i = /* @__PURE__ */ m({ matMul_: f });
|
|
9
9
|
export {
|
package/dist/models/NanoGPTV1.js
CHANGED
|
@@ -3,11 +3,11 @@ import b from "../layers/TransformerBlock.js";
|
|
|
3
3
|
import k from "../layers/TiedEmbedding.js";
|
|
4
4
|
import w from "../layers/RoPECache.js";
|
|
5
5
|
import E from "../layers/RMSNorm.js";
|
|
6
|
-
import { t as l, k as u } from "../index-
|
|
6
|
+
import { t as l, k as u } from "../index-D0RBWjq8.js";
|
|
7
7
|
import C from "./model.js";
|
|
8
8
|
import P from "../layers/PositionEmbedding.js";
|
|
9
9
|
import { packingSupported as _ } from "../utilities/packed.js";
|
|
10
|
-
import { p as y, u as M } from "../pack16-
|
|
10
|
+
import { p as y, u as M } from "../pack16-k4jq6aMX.js";
|
|
11
11
|
class I extends C {
|
|
12
12
|
wte;
|
|
13
13
|
// Token embeddings
|
package/dist/models/model.js
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
import m from "../layers/BaseLayer.js";
|
|
2
|
-
import "../index-
|
|
3
|
-
import "../random_width-
|
|
4
|
-
import "../zeros_like-
|
|
2
|
+
import "../index-D0RBWjq8.js";
|
|
3
|
+
import "../random_width-UGQn4OWb.js";
|
|
4
|
+
import "../zeros_like-BAz3iKru.js";
|
|
5
5
|
import "../Generator.js";
|
|
6
6
|
import "../index-Cp39cXWe.js";
|
|
7
|
-
import "../dataset-
|
|
7
|
+
import "../dataset-DcjWqUVQ.js";
|
|
8
8
|
import "../ops/cpu/attentionMask.js";
|
|
9
9
|
import "../ops/webgl/attentionMask.js";
|
|
10
10
|
import "../ops/grads/attentionMask.js";
|
|
11
11
|
import "../ops/cpu/rope.js";
|
|
12
12
|
import "../ops/webgl/rope.js";
|
|
13
|
-
import "../rope-
|
|
13
|
+
import "../rope-BmZmp9uP.js";
|
|
14
14
|
import "../ops/cpu/appendCache.js";
|
|
15
15
|
import "../ops/webgl/appendCache.js";
|
|
16
16
|
import "../ops/grads/softmax16.js";
|
|
17
|
-
import "../matMul16-
|
|
17
|
+
import "../matMul16-cDxwemKj.js";
|
|
18
18
|
import "../ops/webgl/matMul16.js";
|
|
19
19
|
import "../ops/cpu/matMul16.js";
|
|
20
|
-
import "../pack16-
|
|
20
|
+
import "../pack16-k4jq6aMX.js";
|
|
21
21
|
import "../ops/transpose16.js";
|
|
22
22
|
import "../ops/reshape16.js";
|
|
23
23
|
import "../ops/cpu/qkv.js";
|
|
@@ -40,11 +40,11 @@ import "../ops/webgl/scatterSub.js";
|
|
|
40
40
|
import "../ops/cpu/gatherSub.js";
|
|
41
41
|
import "../ops/webgl/gatherSub.js";
|
|
42
42
|
import "../ops/cpu/matMulGelu.js";
|
|
43
|
-
import "../matMulGelu-
|
|
43
|
+
import "../matMulGelu-B2s_80-H.js";
|
|
44
44
|
import "../ops/grads/matMulGelu.js";
|
|
45
45
|
import "../ops/cpu/gelu.js";
|
|
46
46
|
import "../ops/webgl/gelu.js";
|
|
47
|
-
import "../gelu-
|
|
47
|
+
import "../gelu-DqTbCx5x.js";
|
|
48
48
|
import "../ops/webgl/log.js";
|
|
49
49
|
import "../checks/normRMS.js";
|
|
50
50
|
import "../checks/normRMSGrad.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { c as f } from "./complex-
|
|
3
|
-
import { z as c } from "./zeros-
|
|
1
|
+
import { w as n, W as t, V as m, E as i } from "./index-D0RBWjq8.js";
|
|
2
|
+
import { c as f } from "./complex-DClmWqJt.js";
|
|
3
|
+
import { z as c } from "./zeros-DeiE2zTa.js";
|
|
4
4
|
function l(o, r = "float32") {
|
|
5
5
|
if (n(o), r === "complex64") {
|
|
6
6
|
const s = l(o, "float32"), a = c(o, "float32");
|
package/dist/ops/adamAdjust.js
CHANGED
package/dist/ops/adamMoments.js
CHANGED
package/dist/ops/add16.js
CHANGED
package/dist/ops/appendCache.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { e as a } from "../index-
|
|
1
|
+
import { e as a } from "../index-D0RBWjq8.js";
|
|
2
2
|
import "./cpu/appendCache.js";
|
|
3
3
|
import "./webgl/appendCache.js";
|
|
4
4
|
import { isPackedTensor as c } from "../utilities/packed.js";
|
|
5
|
-
import { c as t } from "../concat-
|
|
6
|
-
import { z as f } from "../zeros-
|
|
5
|
+
import { c as t } from "../concat-C6X3AAlQ.js";
|
|
6
|
+
import { z as f } from "../zeros-DeiE2zTa.js";
|
|
7
7
|
function C(r, o, n, p) {
|
|
8
8
|
if (!p) {
|
|
9
9
|
const e = r.shape[2], s = c(r);
|
package/dist/ops/concat16.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { isPackedTensor as o } from "../utilities/packed.js";
|
|
2
|
-
import { e } from "../index-
|
|
3
|
-
import { c } from "../concat-
|
|
2
|
+
import { e } from "../index-D0RBWjq8.js";
|
|
3
|
+
import { c } from "../concat-C6X3AAlQ.js";
|
|
4
4
|
function p(r, n) {
|
|
5
5
|
return o(r[0]) ? e().runKernel("Concat16", r, { axis: n ?? -1 }) : c(r, n);
|
|
6
6
|
}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { h, l as t, n as i, m as k, o as z } from "../../index-D0RBWjq8.js";
|
|
2
2
|
function A(c) {
|
|
3
|
-
const { moments:
|
|
4
|
-
|
|
3
|
+
const { moments: n, value: r } = c.inputs, { beta1: l, beta2: m, epsilon: u, learningRate: d } = c.attrs, e = n.shape.length, a = new Array(e).fill(0), s = n.shape.slice();
|
|
4
|
+
s[e - 1] = 1;
|
|
5
5
|
const o = a.slice();
|
|
6
6
|
o[e - 1] = 1;
|
|
7
|
-
const
|
|
7
|
+
const b = s.slice(), p = n.slice(a, s).squeeze([e - 1]), M = n.slice(o, b).squeeze([e - 1]), g = t(p, l), f = t(M, m);
|
|
8
8
|
return i(
|
|
9
|
-
|
|
9
|
+
k(t(g, i(z(f), u ?? 1e-8)), -d),
|
|
10
10
|
r
|
|
11
11
|
);
|
|
12
12
|
}
|
|
@@ -15,4 +15,4 @@ const C = {
|
|
|
15
15
|
backendName: "cpu",
|
|
16
16
|
kernelFunc: A
|
|
17
17
|
};
|
|
18
|
-
|
|
18
|
+
h(C);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { p } from "../../index-
|
|
2
|
-
import { s as b } from "../../stack-
|
|
1
|
+
import { h as p } from "../../index-D0RBWjq8.js";
|
|
2
|
+
import { s as b } from "../../stack-WJK22CFn.js";
|
|
3
3
|
function f(t) {
|
|
4
4
|
const { moments: n, gradient: c } = t.inputs, { beta1: o, beta2: m } = t.attrs, e = n.shape.length, a = new Array(e).fill(0), s = n.shape.slice();
|
|
5
5
|
s[e - 1] = 1;
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { c as
|
|
1
|
+
import { h as d } from "../../index-D0RBWjq8.js";
|
|
2
|
+
import { c as m } from "../../concat-C6X3AAlQ.js";
|
|
3
3
|
function u(p) {
|
|
4
4
|
const { cache: n, item: s } = p.inputs, { maxSize: i, pastLen: c } = p.attrs, t = n.shape[0], o = n.shape[1], a = n.shape[3], e = s.shape[2];
|
|
5
5
|
if (c + e <= i) {
|
|
6
|
-
const f = n.slice([0, 0, 0, 0], [t, o, c, a]),
|
|
7
|
-
return f.dispose(),
|
|
6
|
+
const f = n.slice([0, 0, 0, 0], [t, o, c, a]), h = n.slice([0, 0, c + e, 0], [t, o, i - c - e, a]), r = e < e ? s.slice([0, 0, 0, 0], [t, o, e, a]) : s, k = m([f, r, h], 2);
|
|
7
|
+
return f.dispose(), h.dispose(), r !== s && r.dispose(), k;
|
|
8
8
|
}
|
|
9
|
-
const l = n.slice([0, 0, e, 0], [t, o, i - e, a]), C =
|
|
9
|
+
const l = n.slice([0, 0, e, 0], [t, o, i - e, a]), C = m([l, s], 2);
|
|
10
10
|
return l.dispose(), C;
|
|
11
11
|
}
|
|
12
12
|
const w = {
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { l as N } from "../../ops-
|
|
3
|
-
import { o as b } from "../../ones-
|
|
4
|
-
import { z as A } from "../../zeros-
|
|
5
|
-
import { w as I } from "../../resize_nearest_neighbor-
|
|
6
|
-
import { m as g } from "../../mat_mul-
|
|
1
|
+
import { h as o, i as d, b as u } from "../../index-D0RBWjq8.js";
|
|
2
|
+
import { l as N } from "../../ops-FJapAPfm.js";
|
|
3
|
+
import { o as b } from "../../ones-BX_wEgzB.js";
|
|
4
|
+
import { z as A } from "../../zeros-DeiE2zTa.js";
|
|
5
|
+
import { w as I } from "../../resize_nearest_neighbor-DB8k9KN_.js";
|
|
6
|
+
import { m as g } from "../../mat_mul-DxpNTCRz.js";
|
|
7
7
|
function a(n) {
|
|
8
8
|
const { q: s, k: e } = n.inputs, { divisor: r } = n.attrs, c = s.shape[2], t = e.shape[2], m = N.bandPart(b([t, t]), -1, 0).cast("bool"), i = A([t, t]), l = d([t, t], Number.NEGATIVE_INFINITY), f = I(m, i, l), k = g(s, e, !1, !0).mul(u(r)), p = f.slice([0, 0], [c, t]).expandDims(0).expandDims(0);
|
|
9
9
|
return k.add(p);
|
|
10
10
|
}
|
|
11
|
-
const
|
|
11
|
+
const h = {
|
|
12
12
|
kernelName: "AttentionMask",
|
|
13
13
|
backendName: "cpu",
|
|
14
14
|
kernelFunc: a
|
|
15
15
|
};
|
|
16
|
-
o(
|
|
17
|
-
const
|
|
16
|
+
o(h);
|
|
17
|
+
const w = {
|
|
18
18
|
kernelName: "AttentionMask",
|
|
19
19
|
backendName: "tensorflow",
|
|
20
20
|
kernelFunc: a
|
|
21
21
|
};
|
|
22
|
-
o(
|
|
22
|
+
o(w);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { s as m } from "../../softmax-
|
|
1
|
+
import { h as e } from "../../index-D0RBWjq8.js";
|
|
2
|
+
import { s as m } from "../../softmax-faLoUZVT.js";
|
|
3
3
|
function n(t) {
|
|
4
4
|
const { inputs: s, attrs: a } = t, { logits: o } = s, { dim: i, dropoutRate: r } = a;
|
|
5
5
|
if (!o)
|