@genai-fi/nanogpt 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +2 -1
- package/dist/Generator.js +44 -37
- package/dist/{RealDiv-N8TpOMYv.js → RealDiv-D_q39E3A.js} +14 -14
- package/dist/{Reshape-B-lWQRnF.js → Reshape-41YpQqEo.js} +1 -1
- package/dist/{Reshape-Bo8HzP8V.js → Reshape-Bh_jzKzV.js} +2 -2
- package/dist/TeachableLLM.js +7 -5
- package/dist/{axis_util-DubwyOhW.js → axis_util-Did9235A.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/{backend_util-BJ-_jSeK.js → backend_util-yC3YH1jo.js} +17 -17
- package/dist/{broadcast_to-BYfCp5iL.js → broadcast_to-CUvOdOT5.js} +2 -2
- package/dist/checks/appendCache.d.ts +1 -0
- package/dist/checks/appendCache.js +22 -0
- package/dist/checks/attentionMask.d.ts +1 -0
- package/dist/checks/attentionMask.js +37 -0
- package/dist/checks/check.d.ts +9 -0
- package/dist/checks/check.js +20 -0
- package/dist/checks/gelu.d.ts +1 -0
- package/dist/checks/gelu.js +18 -0
- package/dist/checks/index.d.ts +22 -0
- package/dist/checks/index.js +24 -0
- package/dist/checks/normRMS.d.ts +1 -0
- package/dist/checks/normRMS.js +16 -0
- package/dist/checks/normRMSGrad.d.ts +1 -0
- package/dist/checks/normRMSGrad.js +12 -0
- package/dist/checks/qkv.d.ts +1 -0
- package/dist/checks/qkv.js +50 -0
- package/dist/checks/rope.d.ts +1 -0
- package/dist/checks/rope.js +38 -0
- package/dist/checks/weights.d.ts +16 -0
- package/dist/checks/weights.js +29 -0
- package/dist/{concat-BmDqqFsa.js → concat-pHiVqR3L.js} +1 -1
- package/dist/{dataset-CJmEGu6D.js → dataset-DPPl-iLT.js} +7 -7
- package/dist/{dropout-sx0sjVAT.js → dropout-CcKSfOYE.js} +11 -11
- package/dist/{exports_initializers-DAKM8UO9.js → exports_initializers-DKk7-bsx.js} +1 -1
- package/dist/{gather-C1siEkdp.js → gather-CPg6ZlQA.js} +1 -1
- package/dist/{gelu-Bd3UBBxg.js → gelu-BkcmEEyD.js} +1 -1
- package/dist/{gpgpu_math-TFLxaLkw.js → gpgpu_math-D_ODOLix.js} +2 -2
- package/dist/{index-CUQrfsw_.js → index-DdmHGZjq.js} +655 -647
- package/dist/{index-BaPo_0H8.js → index-evZ57wr4.js} +10 -10
- package/dist/{kernel_funcs_utils-P9aFa232.js → kernel_funcs_utils-CDfFpUab.js} +15 -15
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +29 -29
- package/dist/layers/MLP.js +18 -18
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +11 -11
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +9 -7
- package/dist/{log_sum_exp-C142qZqY.js → log_sum_exp-C8yFJfZz.js} +45 -24
- package/dist/main.d.ts +2 -0
- package/dist/main.js +9 -7
- package/dist/{mat_mul-DMkduNJu.js → mat_mul-Dpy2mMRu.js} +1 -1
- package/dist/{mod-uUuj4gSb.js → mod-CbibJi3D.js} +1 -1
- package/dist/models/NanoGPTV1.js +1 -1
- package/dist/models/model.js +9 -7
- package/dist/{mulmat_packed_gpu-Cm2gw-c8.js → mulmat_packed_gpu-q_Gmwyld.js} +1 -1
- package/dist/{ones-ZdgQGBCP.js → ones-BAqVh-eA.js} +2 -2
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/adamAdjust.js +1 -1
- package/dist/ops/cpu/adamMoments.js +2 -2
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +2 -2
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +13 -13
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +3 -3
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +3 -3
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +5 -5
- package/dist/ops/webgpu/qkv.js +3 -3
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +4 -4
- package/dist/ops-542ai2vG.js +1525 -0
- package/dist/{random_width-D8Pwy_na.js → random_width-DKGeiFuR.js} +1514 -1581
- package/dist/{range-LVHrSLdi.js → range-BcUvLuf5.js} +1 -1
- package/dist/{reciprocal-CaR9e67G.js → reciprocal-DhDWSKiD.js} +1 -1
- package/dist/{register_all_kernels-DUshvVWP.js → register_all_kernels-Do9VvZmo.js} +2312 -2335
- package/dist/{max-B3JOcNGb.js → relu-B1AXs7p5.js} +6 -6
- package/dist/{reshape-DEfQGSin.js → reshape-WeJkT3ja.js} +1 -1
- package/dist/{scatter_nd_util-CUPPNLaA.js → scatter_nd_util-B7yDhiQr.js} +1 -1
- package/dist/{selu_util-8vv5JxQV.js → selu_util-BgUO9gHY.js} +125 -146
- package/dist/{shared-D1elLckx.js → shared-CZiWmQCI.js} +1 -1
- package/dist/{shared-CkNorDcU.js → shared-V6D_md-c.js} +120 -120
- package/dist/{sin-D2CKKmyR.js → sin-CPxad7Am.js} +1 -1
- package/dist/{slice-BnyE-M_7.js → slice-B7jXtPnp.js} +1 -1
- package/dist/{softmax-DLoZWYBx.js → softmax-BfsyI4As.js} +1 -1
- package/dist/{split-By_n4TKP.js → split-BPxr8_8m.js} +1 -1
- package/dist/{stack-DkdFLq37.js → stack-BNwLzE43.js} +1 -1
- package/dist/{sum-l_0SqM4h.js → sum-ByFINZgi.js} +1 -1
- package/dist/{tensor-BAQdLqoU.js → tensor-DbqgIV9B.js} +1 -1
- package/dist/tensor1d-CtJq5BOv.js +27 -0
- package/dist/{tensor2d-BHy261cI.js → tensor2d-CObBWBkW.js} +1 -1
- package/dist/tensor4d-DLtk7Nxh.js +30 -0
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +8 -9
- package/dist/utilities/arrayClose.d.ts +1 -1
- package/dist/utilities/arrayClose.js +16 -7
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-C9hihzDB.js → variable-DPFOJyRG.js} +1 -1
- package/dist/{webgpu_program-dFEVbDPL.js → webgpu_program-Dhk9R5aG.js} +1 -1
- package/dist/{webgpu_util-DLImlSc6.js → webgpu_util-BqGnZg8t.js} +1 -1
- package/dist/{zeros-VZ72lWXM.js → zeros-Dnwix0p4.js} +1 -1
- package/package.json +1 -1
- package/dist/ops-C_1K_-35.js +0 -1202
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { m as hr, c as pr, P as He, t as B, g as y, a as J, b as q, d as Pe, e as fr, f as mr } from "./webgpu_program-
|
|
3
|
-
import { i as wt, G as it, a as gr, c as S, f as I, M as j, b as bt, d as yt, e as St } from "./webgpu_util-
|
|
4
|
-
import { m as rt, E as xr, u as Cr, w as wr, x as br, y as yr, z as Sr, f as at, A as vt, B as It, C as kt, D as vr, F as Ir, G as xe, H as kr, I as Rr, J as Pr, K as Dr, L as Nr, M as $r, N as zr, O as Ar, P as Fr, Q as Lr, S as Br } from "./backend_util-
|
|
5
|
-
import { S as Er, a as Tr, h as be, i as Ae, p as Wr, q as _r, j as ye, d as ee, e as Xe, g as Ke, k as Rt,
|
|
6
|
-
import { r as R, a as Mr } from "./Reshape-
|
|
7
|
-
import { s as Or } from "./shared-
|
|
8
|
-
import { c as qe, g as Se, a as ve, b as Ye, e as Gr, h as Pt } from "./axis_util-
|
|
9
|
-
import { z as Hr } from "./zeros-
|
|
1
|
+
import { ag as U, d4 as Jt, d5 as es, e as We, n as L, d6 as _e, j as $, aO as Ge, ap as de, d7 as st, d8 as ts, d9 as ss, aP as os, bn as Ct, a3 as Re, da as is, db as rs, dc as as, bv as ns, l as Z, cg as us, dd as ot, az as ds, aa as ls, u as ge, bm as cs, co as hs, cp as ps, bt as fs, cq as ms, bc as gs, af as ze, p as se, aB as xs, bJ as Cs, bK as ws, bL as bs, cr as ys, cs as Ss, ct as vs, cv as Is, cu as ks, cw as Rs, ah as Ps, b5 as Ds, bM as Ns, bN as $s, cx as zs, cy as As, U as Fs, $ as Ls, bP as Bs, aS as Es, de as Ts, b7 as Ws, b8 as _s, bq as Vs, br as Us, bw as Ms, bR as Os, cA as Gs, a$ as Hs, I as Xs, bS as Ks, cc as qs, bT as Ys, bU as Qs, cB as js, bV as Zs, ac as Js, bW as eo, bd as to, bX as so, bY as oo, bZ as io, df as ro, b_ as ao, ce as no, cf as uo, dg as lo, cC as co, cD as ho, cE as po, dh as fo, bB as mo, a1 as go, aU as xo, as as Co, cF as wo, bx as bo, b$ as yo, ai as So, aY as vo, by as Io, di as ko, be as Ro, ao as Po, bz as Do, dj as No, bQ as $o, cd as zo, dk as Ao, a8 as Fo, G as Lo, aZ as Bo, a_ as Eo, dl as To, cG as Wo, cH as _o, cI as Vo, at as Uo, b0 as Mo, b1 as Oo, dm as Go, aj as Ho, b2 as Xo, b4 as Ko, c1 as qo, dn as Yo, cL as Qo, cK as jo, bA as Zo, c2 as Jo, c3 as ei, cM as ti, cN as si, dp as oi, aV as ii, b6 as ri, cO as ai, Y as ni, S as ui, a6 as di, b3 as li, bg as ci, bh as hi, c4 as pi, cW as fi, c5 as mi, P as gi, a4 as xi, c6 as Ci, cP as wi, au as bi, bC as yi, R as Si, aC as vi, Z as Ii, _ as ki, av as Ri, bj as Pi, cQ as Di, bk as Ni, cR as $i, c8 as zi, bf as Ai, b9 as Fi, bD as Li, a7 as Bi, dq as Ei, aT as Ti, c9 as Wi, ar as _i, cS as Vi, ad as Ui, ca as Mi, c0 as Oi, c7 as Gi, dr as Hi, ds as Xi, X as Ki, dt as qi, ab as Yi, W as Qi, bF as ji, cT as Zi, ba as Ji, aw as er, du as tr, dv as sr, bH as or, cU as ir, bO as rr, dw as ar, dx as nr, bl as ur, bb as dr, cb as lr, f as cr } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { m as hr, c as pr, P as He, t as B, g as y, a as J, b as q, d as Pe, e as fr, f as mr } from "./webgpu_program-Dhk9R5aG.js";
|
|
3
|
+
import { i as wt, G as it, a as gr, c as S, f as I, M as j, b as bt, d as yt, e as St } from "./webgpu_util-BqGnZg8t.js";
|
|
4
|
+
import { m as rt, E as xr, u as Cr, w as wr, x as br, y as yr, z as Sr, f as at, A as vt, B as It, C as kt, D as vr, F as Ir, G as xe, H as kr, I as Rr, J as Pr, K as Dr, L as Nr, M as $r, N as zr, O as Ar, P as Fr, Q as Lr, S as Br } from "./backend_util-yC3YH1jo.js";
|
|
5
|
+
import { S as Er, a as Tr, h as be, i as Ae, p as Wr, q as _r, j as ye, d as ee, e as Xe, g as Ke, k as Rt, A as Vr, B as Ur } from "./selu_util-BgUO9gHY.js";
|
|
6
|
+
import { r as R, a as Mr } from "./Reshape-41YpQqEo.js";
|
|
7
|
+
import { s as Or } from "./shared-V6D_md-c.js";
|
|
8
|
+
import { c as qe, g as Se, a as ve, b as Ye, e as Gr, h as Pt } from "./axis_util-Did9235A.js";
|
|
9
|
+
import { z as Hr } from "./zeros-Dnwix0p4.js";
|
|
10
10
|
import { n as Xr, a as Kr } from "./non_max_suppression_impl-CsEgBuMA.js";
|
|
11
|
-
import { c as Qe } from "./scatter_nd_util-
|
|
11
|
+
import { c as Qe } from "./scatter_nd_util-B7yDhiQr.js";
|
|
12
12
|
/**
|
|
13
13
|
* @license
|
|
14
14
|
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { l as B, j as G,
|
|
2
|
-
import { u as O, f as Y } from "./gpgpu_math-
|
|
3
|
-
import { f as v } from "./backend_util-
|
|
1
|
+
import { l as B, j as G, az as K, aa as z, at as W, aA as V, ag as N, au as F, u as S } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { u as O, f as Y } from "./gpgpu_math-D_ODOLix.js";
|
|
3
|
+
import { f as v } from "./backend_util-yC3YH1jo.js";
|
|
4
4
|
/**
|
|
5
5
|
* @license
|
|
6
6
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -295,7 +295,7 @@ function L(t) {
|
|
|
295
295
|
return o.complexTensorInfos = { real: i, imag: a }, n;
|
|
296
296
|
}
|
|
297
297
|
const me = {
|
|
298
|
-
kernelName:
|
|
298
|
+
kernelName: z,
|
|
299
299
|
backendName: "webgl",
|
|
300
300
|
kernelFunc: L
|
|
301
301
|
};
|
|
@@ -324,7 +324,7 @@ function oe(t) {
|
|
|
324
324
|
return s.disposeIntermediateTensorInfo(o), a;
|
|
325
325
|
}
|
|
326
326
|
const be = {
|
|
327
|
-
kernelName:
|
|
327
|
+
kernelName: W,
|
|
328
328
|
backendName: "webgl",
|
|
329
329
|
kernelFunc: oe
|
|
330
330
|
};
|
|
@@ -386,7 +386,7 @@ function ye({ opSnippet: t, packedOpSnippet: e, cpuKernelImpl: s, dtype: r }) {
|
|
|
386
386
|
return c ? l = new ne(o.shape, e) : l = new q(o.shape, t), i.runWebGLProgram(l, [o], a);
|
|
387
387
|
};
|
|
388
388
|
}
|
|
389
|
-
function
|
|
389
|
+
function Ae({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: s = !1, supportsComplex: r = !1, cpuKernelImpl: u, dtype: n }) {
|
|
390
390
|
return ({ inputs: o, backend: i }) => {
|
|
391
391
|
const { a, b: c } = o, l = i;
|
|
392
392
|
if (r && a.dtype === "complex64") {
|
|
@@ -404,8 +404,8 @@ function Ie({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: s = !1, suppor
|
|
|
404
404
|
shape: c.shape
|
|
405
405
|
}, D = new b(t, a.shape, c.shape);
|
|
406
406
|
return l.runWebGLProgram(D, [$, _], S(p.dtype, x.dtype));
|
|
407
|
-
}),
|
|
408
|
-
return l.disposeIntermediateTensorInfo(g), l.disposeIntermediateTensorInfo(m),
|
|
407
|
+
}), I = L({ inputs: { real: g, imag: m }, backend: l });
|
|
408
|
+
return l.disposeIntermediateTensorInfo(g), l.disposeIntermediateTensorInfo(m), I;
|
|
409
409
|
}
|
|
410
410
|
const d = n || S(a.dtype, c.dtype);
|
|
411
411
|
if ((a.dtype === "string" || c.dtype === "string" || l.shouldExecuteOnCPU([a, c])) && u != null) {
|
|
@@ -415,15 +415,15 @@ function Ie({ opSnippet: t, packedOpSnippet: e, checkOutOfBounds: s = !1, suppor
|
|
|
415
415
|
) : h, m = a.dtype === "string" ? (
|
|
416
416
|
// tslint:disable-next-line: no-any
|
|
417
417
|
v(f)
|
|
418
|
-
) : f, [
|
|
419
|
-
return x.values =
|
|
418
|
+
) : f, [I, C] = u(a.shape, c.shape, g, m, d), p = l.makeTensorInfo(C, d), x = l.texData.get(p.dataId);
|
|
419
|
+
return x.values = I, p;
|
|
420
420
|
}
|
|
421
421
|
const y = N().getBool("WEBGL_PACK_BINARY_OPERATIONS") && e != null;
|
|
422
|
-
let
|
|
423
|
-
return y ?
|
|
422
|
+
let A;
|
|
423
|
+
return y ? A = new E(e, a.shape, c.shape, s) : A = new b(t, a.shape, c.shape), l.runWebGLProgram(A, [a, c], d);
|
|
424
424
|
};
|
|
425
425
|
}
|
|
426
|
-
function
|
|
426
|
+
function Ie(t, e = !1) {
|
|
427
427
|
if (t === "linear")
|
|
428
428
|
return e ? ee : j;
|
|
429
429
|
if (t === "relu")
|
|
@@ -446,7 +446,7 @@ export {
|
|
|
446
446
|
T as C,
|
|
447
447
|
ne as U,
|
|
448
448
|
Z as a,
|
|
449
|
-
|
|
449
|
+
Ae as b,
|
|
450
450
|
pe as c,
|
|
451
451
|
he as d,
|
|
452
452
|
q as e,
|
|
@@ -457,7 +457,7 @@ export {
|
|
|
457
457
|
fe as j,
|
|
458
458
|
xe as k,
|
|
459
459
|
Oe as l,
|
|
460
|
-
|
|
460
|
+
Ie as m,
|
|
461
461
|
me as n,
|
|
462
462
|
ge as o,
|
|
463
463
|
be as p,
|
package/dist/layers/BaseLayer.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { T as p,
|
|
2
|
-
import { v as _ } from "../variable-
|
|
1
|
+
import { T as p, J as g, e as o, K as v } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { v as _ } from "../variable-DPFOJyRG.js";
|
|
3
3
|
class T {
|
|
4
4
|
parent;
|
|
5
5
|
config;
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { attentionMask as g } from "../ops/attentionMask.js";
|
|
2
2
|
import O from "./BaseLayer.js";
|
|
3
|
-
import { qkv as
|
|
3
|
+
import { qkv as _ } from "../ops/qkv.js";
|
|
4
4
|
import { rope as v } from "../ops/rope.js";
|
|
5
5
|
import { appendCache as V } from "../ops/appendCache.js";
|
|
6
|
-
import { k as c, t as
|
|
7
|
-
import { fusedSoftmax as
|
|
8
|
-
import { d as
|
|
9
|
-
import { v as
|
|
10
|
-
import { r as
|
|
11
|
-
import { r as
|
|
12
|
-
import { m as
|
|
13
|
-
class
|
|
6
|
+
import { k as c, t as P } from "../index-DdmHGZjq.js";
|
|
7
|
+
import { fusedSoftmax as b } from "../ops/fusedSoftmax.js";
|
|
8
|
+
import { d as C } from "../random_width-DKGeiFuR.js";
|
|
9
|
+
import { v as k } from "../variable-DPFOJyRG.js";
|
|
10
|
+
import { r as T, d as L } from "../dropout-CcKSfOYE.js";
|
|
11
|
+
import { r as j } from "../reshape-WeJkT3ja.js";
|
|
12
|
+
import { m as x } from "../mat_mul-Dpy2mMRu.js";
|
|
13
|
+
class W extends O {
|
|
14
14
|
divisor;
|
|
15
15
|
index;
|
|
16
16
|
units;
|
|
@@ -23,34 +23,34 @@ class $ extends O {
|
|
|
23
23
|
build() {
|
|
24
24
|
this.hasVariable(this.ATTN) === !1 && this.setVariable(
|
|
25
25
|
this.ATTN,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
!0
|
|
29
|
-
|
|
26
|
+
k(
|
|
27
|
+
T([this.config.nEmbed, this.units], 0, 0.02),
|
|
28
|
+
!0,
|
|
29
|
+
`block_${this.index}_attn_cAttn_kernel`
|
|
30
30
|
)
|
|
31
31
|
), this.hasVariable(this.PROJ) === !1 && this.setVariable(
|
|
32
32
|
this.PROJ,
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
!0
|
|
36
|
-
|
|
33
|
+
k(
|
|
34
|
+
T([this.projUnits, this.config.nEmbed], 0, 0.02),
|
|
35
|
+
!0,
|
|
36
|
+
`block_${this.index}_attn_cProj_kernel`
|
|
37
37
|
)
|
|
38
38
|
);
|
|
39
39
|
}
|
|
40
40
|
getAttentionScores(t, i, s, o) {
|
|
41
|
-
const e = g(t, i, this.divisor), n =
|
|
41
|
+
const e = g(t, i, this.divisor), n = b(e, s ? this.config.dropout : 0, o);
|
|
42
42
|
return e.dispose(), n;
|
|
43
43
|
}
|
|
44
44
|
// Attention with optional past. If pastLen > 0 and T_cur == 1, no mask needed.
|
|
45
45
|
getAttentionScoresWithPast(t, i, s) {
|
|
46
|
-
const o = g(t, i, this.divisor, s), e =
|
|
46
|
+
const o = g(t, i, this.divisor, s), e = b(o, 0, 0);
|
|
47
47
|
return o.dispose(), e;
|
|
48
48
|
}
|
|
49
49
|
getQKV(t) {
|
|
50
|
-
return
|
|
50
|
+
return _(t, this.getVariable(this.ATTN), this.config.nHead);
|
|
51
51
|
}
|
|
52
52
|
getOutputProjection(t) {
|
|
53
|
-
const i = t.shape[0], s = t.shape[2], o = this.config.nEmbed, e = t.transpose([0, 2, 1, 3]), n =
|
|
53
|
+
const i = t.shape[0], s = t.shape[2], o = this.config.nEmbed, e = t.transpose([0, 2, 1, 3]), n = j(e, [i, s, o]), r = C(n, this.getVariable(this.PROJ));
|
|
54
54
|
return n.dispose(), e.dispose(), r;
|
|
55
55
|
}
|
|
56
56
|
updateCache(t, i, s) {
|
|
@@ -62,19 +62,19 @@ class $ extends O {
|
|
|
62
62
|
s.length = d, s.cumulativeLength = h, s.k = c(r), s.v = c(p);
|
|
63
63
|
}
|
|
64
64
|
forward(t, i) {
|
|
65
|
-
return
|
|
65
|
+
return P(() => {
|
|
66
66
|
this.startMemory();
|
|
67
67
|
const [s, o, e] = this.getQKV(i), n = t.pastKV ? t.pastKV.cumulativeLength : 0, r = t.ropeCache, p = r ? v(s, r, n) : s, d = r ? v(o, r, n) : o;
|
|
68
68
|
r && (s.dispose(), o.dispose());
|
|
69
69
|
const h = t.pastKV ? t.pastKV.length : 0;
|
|
70
70
|
t.pastKV && !t.training && this.updateCache(d, e, t.pastKV);
|
|
71
|
-
const u = t.pastKV?.k ? t.pastKV.k : d,
|
|
71
|
+
const u = t.pastKV?.k ? t.pastKV.k : d, l = t.pastKV?.v ? t.pastKV.v : e;
|
|
72
72
|
let a;
|
|
73
73
|
h > 0 ? a = this.getAttentionScoresWithPast(p, u, h) : a = this.getAttentionScores(p, u, t.training, t.seed || 0), p.dispose(), t.pastKV || u.dispose();
|
|
74
|
-
const
|
|
75
|
-
f || a.dispose(), t.pastKV ||
|
|
76
|
-
const A = this.getOutputProjection(
|
|
77
|
-
if (
|
|
74
|
+
const m = x(a, l), f = t.attentionScores !== void 0 && t.attentionScores.attentionOut !== void 0;
|
|
75
|
+
f || a.dispose(), t.pastKV || l.dispose();
|
|
76
|
+
const A = this.getOutputProjection(m);
|
|
77
|
+
if (m.dispose(), f && t.attentionScores && t.attentionScores.attentionOut !== void 0) {
|
|
78
78
|
const K = a.shape[1], S = a.shape[2];
|
|
79
79
|
t.attentionScores.attentionOut?.push(
|
|
80
80
|
c(a.slice([0, 0, 0, 0], [1, -1, -1, -1]).reshape([K, S, -1]))
|
|
@@ -85,12 +85,12 @@ class $ extends O {
|
|
|
85
85
|
}
|
|
86
86
|
dropout(t) {
|
|
87
87
|
if (this.config.dropout > 0) {
|
|
88
|
-
const i =
|
|
88
|
+
const i = L(t, this.config.dropout);
|
|
89
89
|
return t.dispose(), i;
|
|
90
90
|
} else
|
|
91
91
|
return t;
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
export {
|
|
95
|
-
|
|
95
|
+
W as default
|
|
96
96
|
};
|
package/dist/layers/MLP.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { t as p } from "../index-
|
|
1
|
+
import { t as p } from "../index-DdmHGZjq.js";
|
|
2
2
|
import u from "./BaseLayer.js";
|
|
3
3
|
import { matMulGelu as M } from "../ops/matMulGelu.js";
|
|
4
|
-
import { v as
|
|
5
|
-
import { r as
|
|
6
|
-
import { r as
|
|
7
|
-
import { m as
|
|
4
|
+
import { v as a } from "../variable-DPFOJyRG.js";
|
|
5
|
+
import { r as d, d as c } from "../dropout-CcKSfOYE.js";
|
|
6
|
+
import { r as h } from "../reshape-WeJkT3ja.js";
|
|
7
|
+
import { m as b } from "../mat_mul-Dpy2mMRu.js";
|
|
8
8
|
class H extends u {
|
|
9
9
|
index;
|
|
10
10
|
hiddenUnits;
|
|
@@ -16,32 +16,32 @@ class H extends u {
|
|
|
16
16
|
build() {
|
|
17
17
|
this.hasVariable(this.MLPHIDDEN) === !1 && this.setVariable(
|
|
18
18
|
this.MLPHIDDEN,
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
!0
|
|
22
|
-
|
|
19
|
+
a(
|
|
20
|
+
d([this.config.nEmbed, this.hiddenUnits], 0, 0.02),
|
|
21
|
+
!0,
|
|
22
|
+
`block_${this.index}_mlpHidden_kernel`
|
|
23
23
|
)
|
|
24
24
|
), this.hasVariable(this.MLPOUT) === !1 && this.setVariable(
|
|
25
25
|
this.MLPOUT,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
!0
|
|
29
|
-
|
|
26
|
+
a(
|
|
27
|
+
d([this.hiddenUnits, this.config.nEmbed], 0, 0.02 / Math.sqrt(2 * this.config.nLayer)),
|
|
28
|
+
!0,
|
|
29
|
+
`block_${this.index}_mlpOut_kernel`
|
|
30
30
|
)
|
|
31
31
|
);
|
|
32
32
|
}
|
|
33
33
|
forward(i, t) {
|
|
34
34
|
return p(() => {
|
|
35
35
|
this.startMemory();
|
|
36
|
-
const [s,
|
|
37
|
-
|
|
38
|
-
const
|
|
39
|
-
return this.endMemory("MLP"),
|
|
36
|
+
const [s, e, r] = t.shape, n = h(t, [s * e, r]), o = M(n, this.getVariable(this.MLPHIDDEN)), l = b(o, this.getVariable(this.MLPOUT));
|
|
37
|
+
o.dispose();
|
|
38
|
+
const m = h(l, [s, e, r]);
|
|
39
|
+
return this.endMemory("MLP"), m;
|
|
40
40
|
});
|
|
41
41
|
}
|
|
42
42
|
dropout(i) {
|
|
43
43
|
if (this.config.dropout > 0) {
|
|
44
|
-
const t =
|
|
44
|
+
const t = c(i, this.config.dropout);
|
|
45
45
|
return i.dispose(), t;
|
|
46
46
|
}
|
|
47
47
|
return i;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { t as c,
|
|
1
|
+
import { t as c, a9 as u, b as i } from "../index-DdmHGZjq.js";
|
|
2
2
|
import f from "./BaseLayer.js";
|
|
3
|
-
import { E as g, D as h } from "../random_width-
|
|
4
|
-
import { r as b } from "../exports_initializers-
|
|
5
|
-
import { m as l } from "../mod-
|
|
6
|
-
import { r as w } from "../range-
|
|
3
|
+
import { E as g, D as h } from "../random_width-DKGeiFuR.js";
|
|
4
|
+
import { r as b } from "../exports_initializers-DKk7-bsx.js";
|
|
5
|
+
import { m as l } from "../mod-CbibJi3D.js";
|
|
6
|
+
import { r as w } from "../range-BcUvLuf5.js";
|
|
7
7
|
/**
|
|
8
8
|
* @license
|
|
9
9
|
* Copyright 2018 Google LLC
|
package/dist/layers/RMSNorm.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { t as s } from "../index-
|
|
1
|
+
import { t as s } from "../index-DdmHGZjq.js";
|
|
2
2
|
import e from "./BaseLayer.js";
|
|
3
3
|
import { normRMS as a } from "../ops/normRMS.js";
|
|
4
|
-
import { v as i } from "../variable-
|
|
5
|
-
import { o as m } from "../ones-
|
|
4
|
+
import { v as i } from "../variable-DPFOJyRG.js";
|
|
5
|
+
import { o as m } from "../ones-BAqVh-eA.js";
|
|
6
6
|
class l extends e {
|
|
7
7
|
GAMMA;
|
|
8
8
|
constructor(r, t = "", o) {
|
package/dist/layers/RoPECache.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { b as t, x as h, t as n, k as p } from "../index-
|
|
2
|
-
import { r as c } from "../reciprocal-
|
|
3
|
-
import { c as f, s as m } from "../sin-
|
|
4
|
-
import { r as a } from "../range-
|
|
1
|
+
import { b as t, x as h, t as n, k as p } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { r as c } from "../reciprocal-DhDWSKiD.js";
|
|
3
|
+
import { c as f, s as m } from "../sin-CPxad7Am.js";
|
|
4
|
+
import { r as a } from "../range-BcUvLuf5.js";
|
|
5
5
|
class D {
|
|
6
6
|
constructor(o) {
|
|
7
7
|
this.config = o;
|
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
import { d as r } from "../random_width-
|
|
2
|
-
import "../index-
|
|
3
|
-
import { r as a } from "../exports_initializers-
|
|
1
|
+
import { d as r } from "../random_width-DKGeiFuR.js";
|
|
2
|
+
import "../index-DdmHGZjq.js";
|
|
3
|
+
import { r as a } from "../exports_initializers-DKk7-bsx.js";
|
|
4
4
|
import s from "./BaseLayer.js";
|
|
5
|
-
import { v as
|
|
6
|
-
import { g as
|
|
5
|
+
import { v as o } from "../variable-DPFOJyRG.js";
|
|
6
|
+
import { g as m } from "../gather-CPg6ZlQA.js";
|
|
7
7
|
class S extends s {
|
|
8
8
|
vocabSize;
|
|
9
9
|
embedDim;
|
|
10
10
|
initializer;
|
|
11
11
|
WEIGHTS;
|
|
12
|
-
constructor(i,
|
|
13
|
-
super(i,
|
|
12
|
+
constructor(i, t, e) {
|
|
13
|
+
super(i, e), this.WEIGHTS = t, this.vocabSize = i.vocabSize, this.embedDim = i.nEmbed, this.initializer = a({
|
|
14
14
|
mean: 0,
|
|
15
15
|
stddev: 0.02
|
|
16
|
-
}), this.addVariable(this.WEIGHTS,
|
|
16
|
+
}), this.addVariable(this.WEIGHTS, o(this.initializer.apply([this.vocabSize, this.embedDim]), !0, t));
|
|
17
17
|
}
|
|
18
18
|
embed(i) {
|
|
19
|
-
return
|
|
19
|
+
return m(this.getVariable(this.WEIGHTS), i, 0);
|
|
20
20
|
}
|
|
21
21
|
project(i) {
|
|
22
22
|
return r(i, this.getVariable(this.WEIGHTS).transpose());
|
|
23
23
|
}
|
|
24
24
|
// Dummy, should not be used.
|
|
25
|
-
forward(i,
|
|
26
|
-
return this.project(
|
|
25
|
+
forward(i, t) {
|
|
26
|
+
return this.project(t);
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
export {
|
|
@@ -2,7 +2,7 @@ import l from "./CausalSelfAttention.js";
|
|
|
2
2
|
import r from "./MLP.js";
|
|
3
3
|
import o from "./RMSNorm.js";
|
|
4
4
|
import d from "./BaseLayer.js";
|
|
5
|
-
import { t as p } from "../index-
|
|
5
|
+
import { t as p } from "../index-DdmHGZjq.js";
|
|
6
6
|
class k extends d {
|
|
7
7
|
ln1;
|
|
8
8
|
attn;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import l from "../tokeniser/CharTokeniser.js";
|
|
2
2
|
import c from "../tokeniser/bpe.js";
|
|
3
3
|
import { load_safetensors as b } from "../utilities/safetensors.js";
|
|
4
|
-
import {
|
|
4
|
+
import { a2 as y } from "../index-DdmHGZjq.js";
|
|
5
5
|
import { dummyPassAsync as u } from "../utilities/dummy.js";
|
|
6
6
|
import _ from "../models/factory.js";
|
|
7
7
|
async function L(e, a, r, t) {
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { a2 as y } from "../index-DdmHGZjq.js";
|
|
2
2
|
import "../ops/cpu/attentionMask.js";
|
|
3
3
|
import "../ops/webgl/attentionMask.js";
|
|
4
4
|
import "../ops/grads/attentionMask.js";
|
|
5
5
|
import "../ops/cpu/qkv.js";
|
|
6
6
|
import "../ops/webgl/qkv.js";
|
|
7
7
|
import "../ops/grads/qkv.js";
|
|
8
|
-
import "../random_width-
|
|
9
|
-
import "../register_all_kernels-
|
|
8
|
+
import "../random_width-DKGeiFuR.js";
|
|
9
|
+
import "../register_all_kernels-Do9VvZmo.js";
|
|
10
10
|
import "../index-Tf7vU29b.js";
|
|
11
|
-
import "../dataset-
|
|
11
|
+
import "../dataset-DPPl-iLT.js";
|
|
12
12
|
import "../ops/cpu/rope.js";
|
|
13
13
|
import "../ops/webgl/rope.js";
|
|
14
14
|
import "../ops/grads/rope.js";
|
|
@@ -41,10 +41,12 @@ import "../ops/webgl/adamMoments.js";
|
|
|
41
41
|
import "../papaparse.min-C8l2Kvo1.js";
|
|
42
42
|
import "../ops/cpu/gelu.js";
|
|
43
43
|
import "../ops/webgl/gelu.js";
|
|
44
|
-
import "../gelu-
|
|
44
|
+
import "../gelu-BkcmEEyD.js";
|
|
45
45
|
import "../ops/webgl/log.js";
|
|
46
|
+
import "../checks/normRMS.js";
|
|
47
|
+
import "../checks/normRMSGrad.js";
|
|
46
48
|
import { importWeights as u } from "../utilities/weights.js";
|
|
47
|
-
async function
|
|
49
|
+
async function ht(r) {
|
|
48
50
|
const e = /* @__PURE__ */ new Map(), a = await r.file("manifest.json")?.async("string");
|
|
49
51
|
if (!a)
|
|
50
52
|
throw new Error("Manifest file not found in the zip archive");
|
|
@@ -67,5 +69,5 @@ async function dt(r) {
|
|
|
67
69
|
return await g(n), n.loadWeights(c), { model: n, tokeniser: f };
|
|
68
70
|
}
|
|
69
71
|
export {
|
|
70
|
-
|
|
72
|
+
ht as default
|
|
71
73
|
};
|
|
@@ -1,8 +1,28 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { e as
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
|
|
1
|
+
import { C as r, D as x, E as p, ah as h, ai as E, aj as $, p as d, c as S, q as K } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { e as _ } from "./axis_util-Did9235A.js";
|
|
3
|
+
import { r as m } from "./reshape-WeJkT3ja.js";
|
|
4
|
+
import { s as T } from "./sum-ByFINZgi.js";
|
|
5
|
+
/**
|
|
6
|
+
* @license
|
|
7
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
8
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
* you may not use this file except in compliance with the License.
|
|
10
|
+
* You may obtain a copy of the License at
|
|
11
|
+
*
|
|
12
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
+
*
|
|
14
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
* See the License for the specific language governing permissions and
|
|
18
|
+
* limitations under the License.
|
|
19
|
+
* =============================================================================
|
|
20
|
+
*/
|
|
21
|
+
function b(s, o = null, n = !1) {
|
|
22
|
+
const t = { x: x(s, "x", "max") }, e = { reductionIndices: o, keepDims: n };
|
|
23
|
+
return p.runKernel(h, t, e);
|
|
24
|
+
}
|
|
25
|
+
const D = /* @__PURE__ */ r({ max_: b });
|
|
6
26
|
/**
|
|
7
27
|
* @license
|
|
8
28
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -19,11 +39,11 @@ import { s as _ } from "./sum-l_0SqM4h.js";
|
|
|
19
39
|
* limitations under the License.
|
|
20
40
|
* =============================================================================
|
|
21
41
|
*/
|
|
22
|
-
function
|
|
23
|
-
const
|
|
24
|
-
return
|
|
42
|
+
function I(s) {
|
|
43
|
+
const n = { x: x(s, "x", "exp") };
|
|
44
|
+
return p.runKernel(E, n);
|
|
25
45
|
}
|
|
26
|
-
const
|
|
46
|
+
const M = /* @__PURE__ */ r({ exp_: I });
|
|
27
47
|
/**
|
|
28
48
|
* @license
|
|
29
49
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -40,11 +60,11 @@ const N = /* @__PURE__ */ r({ exp_: b });
|
|
|
40
60
|
* limitations under the License.
|
|
41
61
|
* =============================================================================
|
|
42
62
|
*/
|
|
43
|
-
function
|
|
44
|
-
const
|
|
45
|
-
return
|
|
63
|
+
function N(s) {
|
|
64
|
+
const n = { x: x(s, "x", "log", "float32") };
|
|
65
|
+
return p.runKernel($, n);
|
|
46
66
|
}
|
|
47
|
-
const
|
|
67
|
+
const j = /* @__PURE__ */ r({ log_: N });
|
|
48
68
|
/**
|
|
49
69
|
* @license
|
|
50
70
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -61,22 +81,23 @@ const v = /* @__PURE__ */ r({ log_: q });
|
|
|
61
81
|
* limitations under the License.
|
|
62
82
|
* =============================================================================
|
|
63
83
|
*/
|
|
64
|
-
function
|
|
65
|
-
const a =
|
|
84
|
+
function q(s, o = null, n = !1) {
|
|
85
|
+
const a = x(s, "x", "logSumExp"), t = d(o, a.shape), e = D(
|
|
66
86
|
a,
|
|
67
87
|
t,
|
|
68
88
|
!0
|
|
69
89
|
/* keepDims */
|
|
70
|
-
), i =
|
|
71
|
-
if (
|
|
72
|
-
const g =
|
|
73
|
-
return m(
|
|
90
|
+
), i = S(a, e), l = M(i), f = T(l, t), u = j(f), c = K(m(e, u.shape), u);
|
|
91
|
+
if (n) {
|
|
92
|
+
const g = _(c.shape, t);
|
|
93
|
+
return m(c, g);
|
|
74
94
|
}
|
|
75
|
-
return
|
|
95
|
+
return c;
|
|
76
96
|
}
|
|
77
|
-
const
|
|
97
|
+
const G = /* @__PURE__ */ r({ logSumExp_: q });
|
|
78
98
|
export {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
99
|
+
j as a,
|
|
100
|
+
M as e,
|
|
101
|
+
G as l,
|
|
102
|
+
D as m
|
|
82
103
|
};
|
package/dist/main.d.ts
CHANGED
package/dist/main.js
CHANGED
|
@@ -7,7 +7,7 @@ import { default as eo } from "./data/textLoader.js";
|
|
|
7
7
|
import { default as io } from "./Generator.js";
|
|
8
8
|
import { default as fo } from "./models/model.js";
|
|
9
9
|
import { estimateMemoryUsage as lo, estimateParameterCount as xo, estimateResources as no, estimateTrainingMemoryUsage as uo, validateConfig as co } from "./utilities/parameters.js";
|
|
10
|
-
import "./index-
|
|
10
|
+
import "./index-DdmHGZjq.js";
|
|
11
11
|
import "./ops/cpu/scatterSub.js";
|
|
12
12
|
import "./ops/webgl/scatterSub.js";
|
|
13
13
|
import "./ops/cpu/gatherSub.js";
|
|
@@ -18,10 +18,10 @@ import "./ops/grads/attentionMask.js";
|
|
|
18
18
|
import "./ops/cpu/qkv.js";
|
|
19
19
|
import "./ops/webgl/qkv.js";
|
|
20
20
|
import "./ops/grads/qkv.js";
|
|
21
|
-
import "./random_width-
|
|
22
|
-
import "./register_all_kernels-
|
|
21
|
+
import "./random_width-DKGeiFuR.js";
|
|
22
|
+
import "./register_all_kernels-Do9VvZmo.js";
|
|
23
23
|
import "./index-Tf7vU29b.js";
|
|
24
|
-
import "./dataset-
|
|
24
|
+
import "./dataset-DPPl-iLT.js";
|
|
25
25
|
import "./ops/cpu/rope.js";
|
|
26
26
|
import "./ops/webgl/rope.js";
|
|
27
27
|
import "./ops/grads/rope.js";
|
|
@@ -35,7 +35,7 @@ import "./ops/webgl/matMulGelu.js";
|
|
|
35
35
|
import "./ops/grads/matMulGelu.js";
|
|
36
36
|
import "./ops/cpu/gelu.js";
|
|
37
37
|
import "./ops/webgl/gelu.js";
|
|
38
|
-
import "./gelu-
|
|
38
|
+
import "./gelu-BkcmEEyD.js";
|
|
39
39
|
import "./ops/cpu/normRMS.js";
|
|
40
40
|
import "./ops/webgl/normRMS.js";
|
|
41
41
|
import "./ops/grads/normRMS.js";
|
|
@@ -44,13 +44,14 @@ import "./ops/cpu/adamMoments.js";
|
|
|
44
44
|
import "./ops/webgl/adamMoments.js";
|
|
45
45
|
import "./ops/cpu/adamAdjust.js";
|
|
46
46
|
import "./ops/webgl/adamAdjust.js";
|
|
47
|
-
import { selectBackend as
|
|
47
|
+
import { selectBackend as ko } from "./backend.js";
|
|
48
48
|
import { default as Co } from "./utilities/performance.js";
|
|
49
49
|
import o from "./layers/CausalSelfAttention.js";
|
|
50
50
|
import r from "./layers/MLP.js";
|
|
51
51
|
import t from "./layers/TransformerBlock.js";
|
|
52
52
|
import m from "./layers/RoPECache.js";
|
|
53
53
|
import { default as go } from "./training/AdamExt.js";
|
|
54
|
+
import { default as Bo } from "./checks/index.js";
|
|
54
55
|
const O = {
|
|
55
56
|
CausalSelfAttention: o,
|
|
56
57
|
MLP: r,
|
|
@@ -65,6 +66,7 @@ export {
|
|
|
65
66
|
fo as Model,
|
|
66
67
|
W as NanoGPT,
|
|
67
68
|
Y as TeachableLLM,
|
|
69
|
+
Bo as checks,
|
|
68
70
|
lo as estimateMemoryUsage,
|
|
69
71
|
xo as estimateParameterCount,
|
|
70
72
|
no as estimateResources,
|
|
@@ -72,7 +74,7 @@ export {
|
|
|
72
74
|
O as layers,
|
|
73
75
|
eo as loadTextData,
|
|
74
76
|
Co as performanceTest,
|
|
75
|
-
|
|
77
|
+
ko as selectBackend,
|
|
76
78
|
co as validateConfig,
|
|
77
79
|
to as waitForModel
|
|
78
80
|
};
|