@genai-fi/nanogpt 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +2 -1
- package/dist/Generator.js +44 -37
- package/dist/{RealDiv-N8TpOMYv.js → RealDiv-D_q39E3A.js} +14 -14
- package/dist/{Reshape-B-lWQRnF.js → Reshape-41YpQqEo.js} +1 -1
- package/dist/{Reshape-Bo8HzP8V.js → Reshape-Bh_jzKzV.js} +2 -2
- package/dist/TeachableLLM.js +7 -5
- package/dist/{axis_util-DubwyOhW.js → axis_util-Did9235A.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/{backend_util-BJ-_jSeK.js → backend_util-yC3YH1jo.js} +17 -17
- package/dist/{broadcast_to-BYfCp5iL.js → broadcast_to-CUvOdOT5.js} +2 -2
- package/dist/checks/appendCache.d.ts +1 -0
- package/dist/checks/appendCache.js +22 -0
- package/dist/checks/attentionMask.d.ts +1 -0
- package/dist/checks/attentionMask.js +37 -0
- package/dist/checks/check.d.ts +9 -0
- package/dist/checks/check.js +20 -0
- package/dist/checks/gelu.d.ts +1 -0
- package/dist/checks/gelu.js +18 -0
- package/dist/checks/index.d.ts +22 -0
- package/dist/checks/index.js +24 -0
- package/dist/checks/normRMS.d.ts +1 -0
- package/dist/checks/normRMS.js +16 -0
- package/dist/checks/normRMSGrad.d.ts +1 -0
- package/dist/checks/normRMSGrad.js +12 -0
- package/dist/checks/qkv.d.ts +1 -0
- package/dist/checks/qkv.js +50 -0
- package/dist/checks/rope.d.ts +1 -0
- package/dist/checks/rope.js +38 -0
- package/dist/checks/weights.d.ts +16 -0
- package/dist/checks/weights.js +29 -0
- package/dist/{concat-BmDqqFsa.js → concat-pHiVqR3L.js} +1 -1
- package/dist/{dataset-CJmEGu6D.js → dataset-DPPl-iLT.js} +7 -7
- package/dist/{dropout-sx0sjVAT.js → dropout-CcKSfOYE.js} +11 -11
- package/dist/{exports_initializers-DAKM8UO9.js → exports_initializers-DKk7-bsx.js} +1 -1
- package/dist/{gather-C1siEkdp.js → gather-CPg6ZlQA.js} +1 -1
- package/dist/{gelu-Bd3UBBxg.js → gelu-BkcmEEyD.js} +1 -1
- package/dist/{gpgpu_math-TFLxaLkw.js → gpgpu_math-D_ODOLix.js} +2 -2
- package/dist/{index-CUQrfsw_.js → index-DdmHGZjq.js} +655 -647
- package/dist/{index-BaPo_0H8.js → index-evZ57wr4.js} +10 -10
- package/dist/{kernel_funcs_utils-P9aFa232.js → kernel_funcs_utils-CDfFpUab.js} +15 -15
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +29 -29
- package/dist/layers/MLP.js +18 -18
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +11 -11
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +9 -7
- package/dist/{log_sum_exp-C142qZqY.js → log_sum_exp-C8yFJfZz.js} +45 -24
- package/dist/main.d.ts +2 -0
- package/dist/main.js +9 -7
- package/dist/{mat_mul-DMkduNJu.js → mat_mul-Dpy2mMRu.js} +1 -1
- package/dist/{mod-uUuj4gSb.js → mod-CbibJi3D.js} +1 -1
- package/dist/models/NanoGPTV1.js +1 -1
- package/dist/models/model.js +9 -7
- package/dist/{mulmat_packed_gpu-Cm2gw-c8.js → mulmat_packed_gpu-q_Gmwyld.js} +1 -1
- package/dist/{ones-ZdgQGBCP.js → ones-BAqVh-eA.js} +2 -2
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/adamAdjust.js +1 -1
- package/dist/ops/cpu/adamMoments.js +2 -2
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +2 -2
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +13 -13
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +3 -3
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +3 -3
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +5 -5
- package/dist/ops/webgpu/qkv.js +3 -3
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +4 -4
- package/dist/ops-542ai2vG.js +1525 -0
- package/dist/{random_width-D8Pwy_na.js → random_width-DKGeiFuR.js} +1514 -1581
- package/dist/{range-LVHrSLdi.js → range-BcUvLuf5.js} +1 -1
- package/dist/{reciprocal-CaR9e67G.js → reciprocal-DhDWSKiD.js} +1 -1
- package/dist/{register_all_kernels-DUshvVWP.js → register_all_kernels-Do9VvZmo.js} +2312 -2335
- package/dist/{max-B3JOcNGb.js → relu-B1AXs7p5.js} +6 -6
- package/dist/{reshape-DEfQGSin.js → reshape-WeJkT3ja.js} +1 -1
- package/dist/{scatter_nd_util-CUPPNLaA.js → scatter_nd_util-B7yDhiQr.js} +1 -1
- package/dist/{selu_util-8vv5JxQV.js → selu_util-BgUO9gHY.js} +125 -146
- package/dist/{shared-D1elLckx.js → shared-CZiWmQCI.js} +1 -1
- package/dist/{shared-CkNorDcU.js → shared-V6D_md-c.js} +120 -120
- package/dist/{sin-D2CKKmyR.js → sin-CPxad7Am.js} +1 -1
- package/dist/{slice-BnyE-M_7.js → slice-B7jXtPnp.js} +1 -1
- package/dist/{softmax-DLoZWYBx.js → softmax-BfsyI4As.js} +1 -1
- package/dist/{split-By_n4TKP.js → split-BPxr8_8m.js} +1 -1
- package/dist/{stack-DkdFLq37.js → stack-BNwLzE43.js} +1 -1
- package/dist/{sum-l_0SqM4h.js → sum-ByFINZgi.js} +1 -1
- package/dist/{tensor-BAQdLqoU.js → tensor-DbqgIV9B.js} +1 -1
- package/dist/tensor1d-CtJq5BOv.js +27 -0
- package/dist/{tensor2d-BHy261cI.js → tensor2d-CObBWBkW.js} +1 -1
- package/dist/tensor4d-DLtk7Nxh.js +30 -0
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +8 -9
- package/dist/utilities/arrayClose.d.ts +1 -1
- package/dist/utilities/arrayClose.js +16 -7
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-C9hihzDB.js → variable-DPFOJyRG.js} +1 -1
- package/dist/{webgpu_program-dFEVbDPL.js → webgpu_program-Dhk9R5aG.js} +1 -1
- package/dist/{webgpu_util-DLImlSc6.js → webgpu_util-BqGnZg8t.js} +1 -1
- package/dist/{zeros-VZ72lWXM.js → zeros-Dnwix0p4.js} +1 -1
- package/package.json +1 -1
- package/dist/ops-C_1K_-35.js +0 -1202
package/dist/Generator.d.ts
CHANGED
|
@@ -45,7 +45,8 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
|
|
|
45
45
|
generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
|
|
46
46
|
stop(): void;
|
|
47
47
|
getText(): string;
|
|
48
|
-
getAttentionData(): number[][][][];
|
|
48
|
+
getAttentionData(): number[][][][][];
|
|
49
49
|
getProbabilitiesData(): number[][][];
|
|
50
|
+
getEmbeddingsData(): number[][][][];
|
|
50
51
|
getTokens(): number[];
|
|
51
52
|
}
|
package/dist/Generator.js
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import { E as z } from "./index-Dwqa6Zy2.js";
|
|
2
|
-
import {
|
|
2
|
+
import { C as A, D as L, E as C, a6 as I, t as O, k as R } from "./index-DdmHGZjq.js";
|
|
3
3
|
import "./ops/cpu/attentionMask.js";
|
|
4
4
|
import "./ops/webgl/attentionMask.js";
|
|
5
5
|
import "./ops/grads/attentionMask.js";
|
|
6
6
|
import "./ops/cpu/qkv.js";
|
|
7
7
|
import "./ops/webgl/qkv.js";
|
|
8
8
|
import "./ops/grads/qkv.js";
|
|
9
|
-
import { p as _ } from "./random_width-
|
|
10
|
-
import { t as K } from "./register_all_kernels-
|
|
9
|
+
import { p as _ } from "./random_width-DKGeiFuR.js";
|
|
10
|
+
import { t as K } from "./register_all_kernels-Do9VvZmo.js";
|
|
11
11
|
import "./index-Tf7vU29b.js";
|
|
12
|
-
import "./dataset-
|
|
12
|
+
import "./dataset-DPPl-iLT.js";
|
|
13
13
|
import "./ops/cpu/rope.js";
|
|
14
14
|
import "./ops/webgl/rope.js";
|
|
15
15
|
import "./ops/grads/rope.js";
|
|
@@ -37,14 +37,16 @@ import "./ops/webgl/adamMoments.js";
|
|
|
37
37
|
import "./papaparse.min-C8l2Kvo1.js";
|
|
38
38
|
import "./ops/cpu/gelu.js";
|
|
39
39
|
import "./ops/webgl/gelu.js";
|
|
40
|
-
import "./gelu-
|
|
40
|
+
import "./gelu-BkcmEEyD.js";
|
|
41
41
|
import "./ops/webgl/log.js";
|
|
42
|
+
import "./checks/normRMS.js";
|
|
43
|
+
import "./checks/normRMSGrad.js";
|
|
42
44
|
import $ from "./utilities/multinomialCPU.js";
|
|
43
|
-
import { r as x } from "./reshape-
|
|
44
|
-
import { t as P } from "./tensor2d-
|
|
45
|
-
import { s as v } from "./softmax-
|
|
46
|
-
import { g as q } from "./gather-
|
|
47
|
-
import { c as G } from "./concat-
|
|
45
|
+
import { r as x } from "./reshape-WeJkT3ja.js";
|
|
46
|
+
import { t as P } from "./tensor2d-CObBWBkW.js";
|
|
47
|
+
import { s as v } from "./softmax-BfsyI4As.js";
|
|
48
|
+
import { g as q } from "./gather-CPg6ZlQA.js";
|
|
49
|
+
import { c as G } from "./concat-pHiVqR3L.js";
|
|
48
50
|
/**
|
|
49
51
|
* @license
|
|
50
52
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -61,8 +63,8 @@ import { c as G } from "./concat-BmDqqFsa.js";
|
|
|
61
63
|
* limitations under the License.
|
|
62
64
|
* =============================================================================
|
|
63
65
|
*/
|
|
64
|
-
function N(
|
|
65
|
-
const o = L(
|
|
66
|
+
function N(m, t, e, i = !1) {
|
|
67
|
+
const o = L(m, "logits", "multinomial"), s = o.size, n = o.rank;
|
|
66
68
|
if (s < 2)
|
|
67
69
|
throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
|
|
68
70
|
if (n > 2)
|
|
@@ -71,8 +73,8 @@ function N(h, t, e, i = !1) {
|
|
|
71
73
|
const a = { logits: n === 1 ? x(o, [1, -1]) : o }, p = { numSamples: t, seed: e, normalized: i }, l = C.runKernel(I, a, p);
|
|
72
74
|
return n === 1 ? x(l, [l.size]) : l;
|
|
73
75
|
}
|
|
74
|
-
const
|
|
75
|
-
...Array.from({ length: 95 }, (
|
|
76
|
+
const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
|
|
77
|
+
...Array.from({ length: 95 }, (m, t) => String.fromCharCode(t + 32)),
|
|
76
78
|
// ASCII
|
|
77
79
|
// Spanish accented letters and punctuation
|
|
78
80
|
..."áéíóúüñ¿¡",
|
|
@@ -83,10 +85,10 @@ const S = /* @__PURE__ */ A({ multinomial_: N }), B = [
|
|
|
83
85
|
// Cyrillic letters
|
|
84
86
|
..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
|
|
85
87
|
];
|
|
86
|
-
function
|
|
87
|
-
return
|
|
88
|
+
function U(m, t) {
|
|
89
|
+
return m.length === t ? m : m.length > t ? m.slice(0, t) : m.concat(Array(t - m.length).fill(""));
|
|
88
90
|
}
|
|
89
|
-
class
|
|
91
|
+
class qt extends z {
|
|
90
92
|
constructor(t, e) {
|
|
91
93
|
super(), this.model = t, this.tokeniser = e, this.actualTokeniser = e;
|
|
92
94
|
}
|
|
@@ -110,7 +112,9 @@ class Mt extends z {
|
|
|
110
112
|
return null;
|
|
111
113
|
const n = await t.decode([s]);
|
|
112
114
|
if (i) {
|
|
113
|
-
const d = await Promise.all(
|
|
115
|
+
const d = await Promise.all(
|
|
116
|
+
i.map((a) => a.array().then((p) => p))
|
|
117
|
+
);
|
|
114
118
|
i.forEach((a) => a.dispose()), this.attentionData.push(d);
|
|
115
119
|
}
|
|
116
120
|
if (o) {
|
|
@@ -129,13 +133,13 @@ class Mt extends z {
|
|
|
129
133
|
cache: e,
|
|
130
134
|
outputEmbeddings: i?.embeddings ?? !1
|
|
131
135
|
}, p = O(() => {
|
|
132
|
-
const r = t,
|
|
133
|
-
[0,
|
|
136
|
+
const r = t, h = r.shape[1], u = h <= this.model.config.blockSize ? r : r.slice(
|
|
137
|
+
[0, h - this.model.config.blockSize],
|
|
134
138
|
[r.shape[0], this.model.config.blockSize]
|
|
135
|
-
),
|
|
139
|
+
), g = d ? this.model.config.blockSize - u.shape[1] : 0, b = g > 0 ? _(u, [
|
|
136
140
|
[0, 0],
|
|
137
|
-
[0,
|
|
138
|
-
]) : u, [f] = this.model.forward(a, b), y = f.shape[1] - 1 -
|
|
141
|
+
[0, g]
|
|
142
|
+
]) : u, [f] = this.model.forward(a, b), y = f.shape[1] - 1 - g, c = f.slice([0, y, 0], [f.shape[0], 1, f.shape[2]]);
|
|
139
143
|
return a.attentionScores?.attentionOut && a.attentionScores.attentionOut.forEach((T, E) => {
|
|
140
144
|
T.shape[1] !== 1 && (a.attentionScores.attentionOut[E] = R(
|
|
141
145
|
T.slice([0, y, 0], [T.shape[0], 1, T.shape[2]])
|
|
@@ -144,32 +148,32 @@ class Mt extends z {
|
|
|
144
148
|
});
|
|
145
149
|
let l;
|
|
146
150
|
if (n) {
|
|
147
|
-
const r = v(p),
|
|
151
|
+
const r = v(p), h = await r.array();
|
|
148
152
|
r.dispose();
|
|
149
|
-
const u =
|
|
150
|
-
let
|
|
153
|
+
const u = h[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
|
|
154
|
+
let g = 0;
|
|
151
155
|
const b = new Array(u.length).fill(0);
|
|
152
156
|
for (const c of u)
|
|
153
|
-
if (
|
|
157
|
+
if (g += c.prob, b[c.index] = c.prob, g >= n)
|
|
154
158
|
break;
|
|
155
|
-
const f = b.reduce((c,
|
|
159
|
+
const f = b.reduce((c, k) => c + k, 0), y = b.map((c) => c / f);
|
|
156
160
|
l = $(y);
|
|
157
161
|
} else if (s) {
|
|
158
|
-
const { values: r, indices:
|
|
159
|
-
l = q(
|
|
162
|
+
const { values: r, indices: h } = K(p, s), u = D(r, 1);
|
|
163
|
+
l = q(h, u, 1), r.dispose(), h.dispose(), u.dispose();
|
|
160
164
|
} else
|
|
161
|
-
l =
|
|
165
|
+
l = D(p, 1);
|
|
162
166
|
let w;
|
|
163
167
|
i?.includeProbabilities && (w = v(p)), a.embeddings && this.embeddingsData.push(
|
|
164
168
|
await Promise.all(
|
|
165
169
|
a.embeddings.map(async (r) => {
|
|
166
|
-
const
|
|
167
|
-
return r.dispose(),
|
|
170
|
+
const h = await r.array();
|
|
171
|
+
return r.dispose(), h;
|
|
168
172
|
})
|
|
169
173
|
)
|
|
170
174
|
);
|
|
171
|
-
const
|
|
172
|
-
return l.dispose(), l =
|
|
175
|
+
const S = l.reshape([1, 1]);
|
|
176
|
+
return l.dispose(), l = S, p.dispose(), { output: l, probabilities: w, attention: a.attentionScores?.attentionOut };
|
|
173
177
|
}
|
|
174
178
|
/** Generate multiple tokens in a loop and produce text */
|
|
175
179
|
async _generate(t) {
|
|
@@ -213,7 +217,7 @@ class Mt extends z {
|
|
|
213
217
|
s[n] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
|
|
214
218
|
this.cache = s, this.lastToken = -1;
|
|
215
219
|
}
|
|
216
|
-
const o = this.tokeniser.trained ? this.tokeniser : new M(H
|
|
220
|
+
const o = this.tokeniser.trained ? this.tokeniser : new M(U(H, this.tokeniser.vocabSize));
|
|
217
221
|
this.actualTokeniser = o;
|
|
218
222
|
}
|
|
219
223
|
async step(t, e) {
|
|
@@ -237,10 +241,13 @@ class Mt extends z {
|
|
|
237
241
|
getProbabilitiesData() {
|
|
238
242
|
return this.probabilitiesData;
|
|
239
243
|
}
|
|
244
|
+
getEmbeddingsData() {
|
|
245
|
+
return this.embeddingsData;
|
|
246
|
+
}
|
|
240
247
|
getTokens() {
|
|
241
248
|
return this.tokens;
|
|
242
249
|
}
|
|
243
250
|
}
|
|
244
251
|
export {
|
|
245
|
-
|
|
252
|
+
qt as default
|
|
246
253
|
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as $ } from "./Reshape-
|
|
3
|
-
import { g as A, a as
|
|
4
|
-
import { t as U, m as W } from "./shared-
|
|
5
|
-
import { c as _ } from "./backend_util-
|
|
6
|
-
import { f as y } from "./gpgpu_math-
|
|
7
|
-
import { g as G, b as L } from "./kernel_funcs_utils-
|
|
1
|
+
import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { r as $ } from "./Reshape-Bh_jzKzV.js";
|
|
3
|
+
import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-Did9235A.js";
|
|
4
|
+
import { t as U, m as W } from "./shared-CZiWmQCI.js";
|
|
5
|
+
import { c as _ } from "./backend_util-yC3YH1jo.js";
|
|
6
|
+
import { f as y } from "./gpgpu_math-D_ODOLix.js";
|
|
7
|
+
import { g as G, b as L } from "./kernel_funcs_utils-CDfFpUab.js";
|
|
8
8
|
/**
|
|
9
9
|
* @license
|
|
10
10
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -105,7 +105,7 @@ class w {
|
|
|
105
105
|
* limitations under the License.
|
|
106
106
|
* =============================================================================
|
|
107
107
|
*/
|
|
108
|
-
class
|
|
108
|
+
class q {
|
|
109
109
|
constructor(s, e) {
|
|
110
110
|
this.variableNames = ["x"];
|
|
111
111
|
const { windowSize: t, batchSize: n, inSize: l, outSize: r } = s;
|
|
@@ -229,7 +229,7 @@ class X {
|
|
|
229
229
|
* limitations under the License.
|
|
230
230
|
* =============================================================================
|
|
231
231
|
*/
|
|
232
|
-
function
|
|
232
|
+
function X(a) {
|
|
233
233
|
const s = [];
|
|
234
234
|
for (; s.length === 0 || s[s.length - 1].outSize !== 1; ) {
|
|
235
235
|
const e = s.length ? s[s.length - 1].outSize : a[1], t = _(e);
|
|
@@ -242,12 +242,12 @@ function q(a) {
|
|
|
242
242
|
return s;
|
|
243
243
|
}
|
|
244
244
|
function P(a, s, e, t) {
|
|
245
|
-
const n =
|
|
245
|
+
const n = X(a.shape);
|
|
246
246
|
let l = a;
|
|
247
247
|
for (let r = 0; r < n.length; r++) {
|
|
248
248
|
const { inSize: i, windowSize: c, outSize: o } = n[r];
|
|
249
249
|
let u, p;
|
|
250
|
-
e === "mean" ? u = r === 0 ? new w({ windowSize: c, inSize: i, batchSize: a.shape[0], outSize: o }, i) : new w({ windowSize: c, inSize: i, batchSize: a.shape[0], outSize: o }) : u = new
|
|
250
|
+
e === "mean" ? u = r === 0 ? new w({ windowSize: c, inSize: i, batchSize: a.shape[0], outSize: o }, i) : new w({ windowSize: c, inSize: i, batchSize: a.shape[0], outSize: o }) : u = new q({ windowSize: c, inSize: i, batchSize: a.shape[0], outSize: o }, e), p = l, l = t.runWebGLProgram(u, [l], s), p.dataId !== a.dataId && t.disposeIntermediateTensorInfo(p);
|
|
251
251
|
}
|
|
252
252
|
return l;
|
|
253
253
|
}
|
|
@@ -381,7 +381,7 @@ function Q(a, s, e, t) {
|
|
|
381
381
|
let i = r;
|
|
382
382
|
const c = A(i, l), o = c != null;
|
|
383
383
|
let u = a;
|
|
384
|
-
o && (u = D(a, c, t), i =
|
|
384
|
+
o && (u = D(a, c, t), i = C(i.length, l)), k("sum", i, l);
|
|
385
385
|
const [p, h] = N(u.shape, i);
|
|
386
386
|
let d = p;
|
|
387
387
|
e && (d = R(p, r));
|
|
@@ -465,9 +465,9 @@ function te(a) {
|
|
|
465
465
|
M.values = z;
|
|
466
466
|
} else
|
|
467
467
|
d = D(n, u, e);
|
|
468
|
-
o =
|
|
468
|
+
o = C(o.length, i);
|
|
469
469
|
}
|
|
470
|
-
|
|
470
|
+
k("max", o, i);
|
|
471
471
|
const [f, S] = N(d.shape, o);
|
|
472
472
|
let g = f;
|
|
473
473
|
r && (g = R(f, c));
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { j as c,
|
|
2
|
-
import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-
|
|
1
|
+
import { j as c, a5 as C, n as f, V as R } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-D_ODOLix.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -5,17 +5,17 @@ import u from "./Generator.js";
|
|
|
5
5
|
import f from "./Trainer.js";
|
|
6
6
|
import { E as p } from "./index-Dwqa6Zy2.js";
|
|
7
7
|
import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
|
|
8
|
-
import "./index-
|
|
8
|
+
import "./index-DdmHGZjq.js";
|
|
9
9
|
import "./ops/cpu/attentionMask.js";
|
|
10
10
|
import "./ops/webgl/attentionMask.js";
|
|
11
11
|
import "./ops/grads/attentionMask.js";
|
|
12
12
|
import "./ops/cpu/qkv.js";
|
|
13
13
|
import "./ops/webgl/qkv.js";
|
|
14
14
|
import "./ops/grads/qkv.js";
|
|
15
|
-
import "./random_width-
|
|
16
|
-
import "./register_all_kernels-
|
|
15
|
+
import "./random_width-DKGeiFuR.js";
|
|
16
|
+
import "./register_all_kernels-Do9VvZmo.js";
|
|
17
17
|
import "./index-Tf7vU29b.js";
|
|
18
|
-
import "./dataset-
|
|
18
|
+
import "./dataset-DPPl-iLT.js";
|
|
19
19
|
import "./ops/cpu/rope.js";
|
|
20
20
|
import "./ops/webgl/rope.js";
|
|
21
21
|
import "./ops/grads/rope.js";
|
|
@@ -40,12 +40,14 @@ import "./papaparse.min-C8l2Kvo1.js";
|
|
|
40
40
|
import "./jszip.min-CjP2V1VV.js";
|
|
41
41
|
import "./ops/cpu/gelu.js";
|
|
42
42
|
import "./ops/webgl/gelu.js";
|
|
43
|
-
import "./gelu-
|
|
43
|
+
import "./gelu-BkcmEEyD.js";
|
|
44
44
|
import "./ops/webgl/log.js";
|
|
45
45
|
import "./ops/cpu/adamMoments.js";
|
|
46
46
|
import "./ops/webgl/adamMoments.js";
|
|
47
47
|
import "./ops/cpu/adamAdjust.js";
|
|
48
48
|
import "./ops/webgl/adamAdjust.js";
|
|
49
|
+
import "./checks/normRMS.js";
|
|
50
|
+
import "./checks/normRMSGrad.js";
|
|
49
51
|
import k from "./utilities/profile.js";
|
|
50
52
|
import w from "./models/factory.js";
|
|
51
53
|
class a {
|
package/dist/backend.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { g as a, s as i, r as o } from "./index-
|
|
1
|
+
import { g as a, s as i, r as o } from "./index-DdmHGZjq.js";
|
|
2
2
|
async function e(t) {
|
|
3
|
-
a() !== t && (t === "webgpu" && (await import("./index-
|
|
3
|
+
a() !== t && (t === "webgpu" && (await import("./index-evZ57wr4.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
|
|
4
4
|
}
|
|
5
5
|
export {
|
|
6
6
|
e as selectBackend
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { j as m,
|
|
2
|
-
import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-
|
|
3
|
-
import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-
|
|
4
|
-
import { c as re, v as oe, a as ae } from "./scatter_nd_util-
|
|
1
|
+
import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-Did9235A.js";
|
|
3
|
+
import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-BgUO9gHY.js";
|
|
4
|
+
import { c as re, v as oe, a as ae } from "./scatter_nd_util-B7yDhiQr.js";
|
|
5
5
|
function ie(e, n) {
|
|
6
6
|
const r = e.shape.length, t = n.shape.length;
|
|
7
7
|
if (r < 1)
|
|
@@ -24,7 +24,7 @@ function ie(e, n) {
|
|
|
24
24
|
for (let i = o; i < r; ++i)
|
|
25
25
|
h *= u[i], c.push(u[i]);
|
|
26
26
|
const d = [
|
|
27
|
-
...
|
|
27
|
+
...R(e.shape).map((i) => i / h),
|
|
28
28
|
1
|
|
29
29
|
].slice(0, o);
|
|
30
30
|
return [c, a, h, d];
|
|
@@ -255,7 +255,7 @@ function Se(e, n, r) {
|
|
|
255
255
|
* limitations under the License.
|
|
256
256
|
* =============================================================================
|
|
257
257
|
*/
|
|
258
|
-
const we = 0.3275911, Ae = 0.254829592,
|
|
258
|
+
const we = 0.3275911, Ae = 0.254829592, Re = -0.284496736, Oe = 1.421413741, _e = -1.453152027, Me = 1.061405429;
|
|
259
259
|
/**
|
|
260
260
|
* @license
|
|
261
261
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -593,21 +593,21 @@ const rt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
|
593
593
|
*/
|
|
594
594
|
function ot(e) {
|
|
595
595
|
try {
|
|
596
|
-
return e.map((n) =>
|
|
596
|
+
return e.map((n) => O(n));
|
|
597
597
|
} catch (n) {
|
|
598
598
|
throw new Error(`Failed to decode encoded string bytes into utf-8, error: ${n}`);
|
|
599
599
|
}
|
|
600
600
|
}
|
|
601
601
|
function at(e) {
|
|
602
|
-
return e.map((n) =>
|
|
602
|
+
return e.map((n) => _(n));
|
|
603
603
|
}
|
|
604
604
|
const ht = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
605
605
|
__proto__: null,
|
|
606
606
|
ERF_A1: Ae,
|
|
607
|
-
ERF_A2:
|
|
608
|
-
ERF_A3:
|
|
609
|
-
ERF_A4:
|
|
610
|
-
ERF_A5:
|
|
607
|
+
ERF_A2: Re,
|
|
608
|
+
ERF_A3: Oe,
|
|
609
|
+
ERF_A4: _e,
|
|
610
|
+
ERF_A5: Me,
|
|
611
611
|
ERF_P: we,
|
|
612
612
|
PARALLELIZE_THRESHOLD: I,
|
|
613
613
|
get RowPartitionType() {
|
|
@@ -616,7 +616,7 @@ const ht = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
|
616
616
|
SELU_SCALE: U,
|
|
617
617
|
SELU_SCALEALPHA: B,
|
|
618
618
|
applyActivation: V,
|
|
619
|
-
assertAndGetBroadcastShape:
|
|
619
|
+
assertAndGetBroadcastShape: M,
|
|
620
620
|
assertAxesAreInnerMostDims: L,
|
|
621
621
|
assertParamsConsistent: ue,
|
|
622
622
|
assignToTypedArray: Le,
|
|
@@ -742,8 +742,8 @@ export {
|
|
|
742
742
|
et as t,
|
|
743
743
|
Ae as u,
|
|
744
744
|
pe as v,
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
745
|
+
Re as w,
|
|
746
|
+
Oe as x,
|
|
747
|
+
_e as y,
|
|
748
|
+
Me as z
|
|
749
749
|
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as T } from "./reshape-
|
|
1
|
+
import { C as h, D as f, M as p, H as g, E as u, X as b } from "./index-DdmHGZjq.js";
|
|
2
|
+
import { r as T } from "./reshape-WeJkT3ja.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][]>;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { s, e as a } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { t } from "../tensor4d-DLtk7Nxh.js";
|
|
3
|
+
async function u(e) {
|
|
4
|
+
await s(e);
|
|
5
|
+
const n = t(
|
|
6
|
+
[
|
|
7
|
+
[
|
|
8
|
+
[
|
|
9
|
+
[0.1, 0.2, 0, 0],
|
|
10
|
+
[0.1, 0.2, 0, 0],
|
|
11
|
+
[0, 0, 0, 0],
|
|
12
|
+
[0, 0, 0, 0]
|
|
13
|
+
]
|
|
14
|
+
]
|
|
15
|
+
],
|
|
16
|
+
[1, 1, 4, 4]
|
|
17
|
+
), r = t([[[[0.1, 0.2, 0.3, 0.4]]]], [1, 1, 1, 4]);
|
|
18
|
+
return await a().runKernel("AppendCache", { cache: n, item: r }, { maxSize: 4, pastLen: 2 }).array();
|
|
19
|
+
}
|
|
20
|
+
export {
|
|
21
|
+
u as execute
|
|
22
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][]>;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { s as i, e } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { t } from "../tensor4d-DLtk7Nxh.js";
|
|
3
|
+
import { t as a } from "../tensor2d-CObBWBkW.js";
|
|
4
|
+
async function k(n) {
|
|
5
|
+
await i(n);
|
|
6
|
+
const s = t(
|
|
7
|
+
[
|
|
8
|
+
[
|
|
9
|
+
[
|
|
10
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
11
|
+
[0.3, 0.4, 0.5, 0.6]
|
|
12
|
+
]
|
|
13
|
+
]
|
|
14
|
+
],
|
|
15
|
+
[1, 1, 2, 4]
|
|
16
|
+
), o = t(
|
|
17
|
+
[
|
|
18
|
+
[
|
|
19
|
+
[
|
|
20
|
+
[0.5, 0.6, 0.5, 0.6],
|
|
21
|
+
[0.7, 0.8, 0.7, 0.8]
|
|
22
|
+
]
|
|
23
|
+
]
|
|
24
|
+
],
|
|
25
|
+
[1, 1, 2, 4]
|
|
26
|
+
), r = a(
|
|
27
|
+
[
|
|
28
|
+
[0, -1 / 0, -1 / 0, -1 / 0],
|
|
29
|
+
[0, 0, 0, -1 / 0]
|
|
30
|
+
],
|
|
31
|
+
[2, 4]
|
|
32
|
+
);
|
|
33
|
+
return await e().runKernel("AttentionMask", { q: s, k: o, mask: r }, { divisor: 0.5, pastLen: 0 }).array();
|
|
34
|
+
}
|
|
35
|
+
export {
|
|
36
|
+
k as execute
|
|
37
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { arraysClose as l } from "../utilities/arrayClose.js";
|
|
2
|
+
async function f(c, a) {
|
|
3
|
+
const n = ["cpu", "webgl", "webgpu"], t = [];
|
|
4
|
+
for (const e of n)
|
|
5
|
+
try {
|
|
6
|
+
const r = await c(e);
|
|
7
|
+
t.push({ backend: e, result: r, passed: !0 });
|
|
8
|
+
} catch (r) {
|
|
9
|
+
t.push({ backend: e, error: r.message, result: [], passed: !1 });
|
|
10
|
+
}
|
|
11
|
+
const s = await Promise.all(t), u = s[0].result;
|
|
12
|
+
for (let e = 1; e < s.length; e++) {
|
|
13
|
+
const r = s[e].result, o = l(u, r);
|
|
14
|
+
s[e].passed = o <= (a ?? 1e-6), s[e].maxError = o;
|
|
15
|
+
}
|
|
16
|
+
return s;
|
|
17
|
+
}
|
|
18
|
+
export {
|
|
19
|
+
f as default
|
|
20
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][]>;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { s as e, e as o } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { t as s } from "../tensor2d-CObBWBkW.js";
|
|
3
|
+
async function m(t) {
|
|
4
|
+
await e(t);
|
|
5
|
+
const r = s(
|
|
6
|
+
[
|
|
7
|
+
[0.1, 0.2, 0, 0],
|
|
8
|
+
[0.1, 0.2, 0, 0],
|
|
9
|
+
[0, 0, 0, 0],
|
|
10
|
+
[0, 0, 0, 0]
|
|
11
|
+
],
|
|
12
|
+
[4, 4]
|
|
13
|
+
);
|
|
14
|
+
return await o().runKernel("Gelu", { x: r }).array();
|
|
15
|
+
}
|
|
16
|
+
export {
|
|
17
|
+
m as execute
|
|
18
|
+
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { execute as rope } from './rope';
|
|
2
|
+
import { execute as normRMS } from './normRMS';
|
|
3
|
+
import { execute as qkv } from './qkv';
|
|
4
|
+
import { execute as gelu } from './gelu';
|
|
5
|
+
import { execute as normRMSGrad } from './normRMSGrad';
|
|
6
|
+
import { execute as appendCache } from './appendCache';
|
|
7
|
+
import { execute as attentionMask } from './attentionMask';
|
|
8
|
+
import { default as runCheck } from './check';
|
|
9
|
+
import { createWeightStatistics, createTensorStatistics } from './weights';
|
|
10
|
+
declare const checks: {
|
|
11
|
+
rope: typeof rope;
|
|
12
|
+
qkv: typeof qkv;
|
|
13
|
+
gelu: typeof gelu;
|
|
14
|
+
normRMS: typeof normRMS;
|
|
15
|
+
normRMSGrad: typeof normRMSGrad;
|
|
16
|
+
appendCache: typeof appendCache;
|
|
17
|
+
attentionMask: typeof attentionMask;
|
|
18
|
+
runCheck: typeof runCheck;
|
|
19
|
+
createLayerWeightStatistics: typeof createWeightStatistics;
|
|
20
|
+
createWeightStatistics: typeof createTensorStatistics;
|
|
21
|
+
};
|
|
22
|
+
export default checks;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { execute as e } from "./rope.js";
|
|
2
|
+
import { execute as t } from "./normRMS.js";
|
|
3
|
+
import { execute as r } from "./qkv.js";
|
|
4
|
+
import { execute as c } from "./gelu.js";
|
|
5
|
+
import { execute as o } from "./normRMSGrad.js";
|
|
6
|
+
import { execute as a } from "./appendCache.js";
|
|
7
|
+
import { execute as i } from "./attentionMask.js";
|
|
8
|
+
import m from "./check.js";
|
|
9
|
+
import { createTensorStatistics as s, createWeightStatistics as u } from "./weights.js";
|
|
10
|
+
const d = {
|
|
11
|
+
rope: e,
|
|
12
|
+
qkv: r,
|
|
13
|
+
gelu: c,
|
|
14
|
+
normRMS: t,
|
|
15
|
+
normRMSGrad: o,
|
|
16
|
+
appendCache: a,
|
|
17
|
+
attentionMask: i,
|
|
18
|
+
runCheck: m,
|
|
19
|
+
createLayerWeightStatistics: u,
|
|
20
|
+
createWeightStatistics: s
|
|
21
|
+
};
|
|
22
|
+
export {
|
|
23
|
+
d as default
|
|
24
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<(number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][])[]>;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { s as u, y as A, e as y } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { a as h } from "../ops-542ai2vG.js";
|
|
3
|
+
import { t as p } from "../tensor1d-CtJq5BOv.js";
|
|
4
|
+
import { t as a } from "../tensor-DbqgIV9B.js";
|
|
5
|
+
const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
6
|
+
async function k(t) {
|
|
7
|
+
await u(t);
|
|
8
|
+
const o = p(x, "float32"), n = a(w, [16, 128, 192], "float32"), s = a(M, [16, 128, 192], "float32"), e = (d, g) => {
|
|
9
|
+
const i = y().runKernel("RMSNorm", { x: d, gamma: g });
|
|
10
|
+
return h.meanSquaredError(i, s);
|
|
11
|
+
}, { value: m, grads: r } = A(e)([n, o]), c = await m.array(), f = await r[0].array(), l = await r[1].array();
|
|
12
|
+
return [c, f, l];
|
|
13
|
+
}
|
|
14
|
+
export {
|
|
15
|
+
k as execute
|
|
16
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<(number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][])[]>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { s as c, e as d } from "../index-DdmHGZjq.js";
|
|
2
|
+
import { t as f } from "../tensor1d-CtJq5BOv.js";
|
|
3
|
+
import { t as r } from "../tensor-DbqgIV9B.js";
|
|
4
|
+
const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
5
|
+
async function x(t) {
|
|
6
|
+
await c(t);
|
|
7
|
+
const o = f(i, "float32"), n = r(y, [16, 128, 192], "float32"), m = r(l, [16, 128, 192], "float32"), a = d().runKernel("RMSNormGrad", { x: n, gamma: o, dy: m }), s = await a[0].array(), e = await a[1].array();
|
|
8
|
+
return [s, e];
|
|
9
|
+
}
|
|
10
|
+
export {
|
|
11
|
+
x as execute
|
|
12
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function execute(backend: string): Promise<(number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][])[]>;
|