@genai-fi/nanogpt 0.8.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +4 -1
- package/dist/Generator.js +144 -124
- package/dist/{RealDiv-D_q39E3A.js → RealDiv-D4EzDsC0.js} +7 -7
- package/dist/{Reshape-Bh_jzKzV.js → Reshape-Bowtk9BP.js} +2 -2
- package/dist/{Reshape-41YpQqEo.js → Reshape-DUqYftGC.js} +1 -1
- package/dist/TeachableLLM.js +5 -5
- package/dist/Trainer.d.ts +1 -0
- package/dist/Trainer.js +3 -0
- package/dist/{axis_util-Did9235A.js → axis_util-TbGYJ208.js} +1 -1
- package/dist/backend.js +2 -2
- package/dist/{backend_util-yC3YH1jo.js → backend_util-CJIiDoV1.js} +4 -4
- package/dist/{broadcast_to-CUvOdOT5.js → broadcast_to-DzlNweb8.js} +2 -2
- package/dist/checks/appendCache.js +2 -2
- package/dist/checks/attentionMask.js +3 -3
- package/dist/checks/gelu.js +2 -2
- package/dist/checks/matMulGelu.js +5 -5
- package/dist/checks/normRMS.js +4 -4
- package/dist/checks/normRMSGrad.js +3 -3
- package/dist/checks/qkv.js +2 -2
- package/dist/checks/rope.js +2 -2
- package/dist/{concat-pHiVqR3L.js → concat-B912vBbo.js} +1 -1
- package/dist/{dataset-DPPl-iLT.js → dataset-DlZtKmBq.js} +3 -3
- package/dist/{dropout-CcKSfOYE.js → dropout-C-csYCLj.js} +6 -6
- package/dist/{exports_initializers-DKk7-bsx.js → exports_initializers-B8iZMgQ0.js} +1 -1
- package/dist/{gather-CPg6ZlQA.js → gather-Dnpgw-YQ.js} +1 -1
- package/dist/{gelu-BkcmEEyD.js → gelu-Bp_-935b.js} +1 -1
- package/dist/{gpgpu_math-D_ODOLix.js → gpgpu_math-CDaYiyE_.js} +2 -2
- package/dist/{index-DdmHGZjq.js → index-BzFyqcy-.js} +13 -13
- package/dist/{index-evZ57wr4.js → index-C1rx_Ajs.js} +10 -10
- package/dist/{kernel_funcs_utils-CDfFpUab.js → kernel_funcs_utils-DKLK0Mg3.js} +3 -3
- package/dist/layers/BaseLayer.js +2 -2
- package/dist/layers/CausalSelfAttention.js +6 -6
- package/dist/layers/MLP.js +5 -5
- package/dist/layers/PositionEmbedding.js +5 -5
- package/dist/layers/RMSNorm.js +3 -3
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.js +5 -5
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/loadTransformers.js +1 -1
- package/dist/loader/oldZipLoad.js +5 -5
- package/dist/{log_sum_exp-C8yFJfZz.js → log_sum_exp-DO6z8tSE.js} +9 -9
- package/dist/main.d.ts +1 -0
- package/dist/main.js +18 -16
- package/dist/{mat_mul-Dpy2mMRu.js → mat_mul-DzjTFx-u.js} +1 -1
- package/dist/{mod-CbibJi3D.js → mod-Dobti4j4.js} +1 -1
- package/dist/models/NanoGPTV1.d.ts +1 -0
- package/dist/models/NanoGPTV1.js +12 -9
- package/dist/models/model.d.ts +1 -0
- package/dist/models/model.js +5 -5
- package/dist/{mulmat_packed_gpu-q_Gmwyld.js → mulmat_packed_gpu-BT60jmzP.js} +1 -1
- package/dist/{ones-BAqVh-eA.js → ones-tIJeHlq-.js} +2 -2
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/adamAdjust.js +1 -1
- package/dist/ops/cpu/adamMoments.js +2 -2
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +5 -5
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +2 -2
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +13 -13
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +4 -4
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +3 -3
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +37 -35
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +5 -5
- package/dist/ops/webgpu/qkv.js +3 -3
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +4 -4
- package/dist/{ops-542ai2vG.js → ops-LuCMAnmM.js} +65 -65
- package/dist/{random_width-DKGeiFuR.js → random_width-CXVRloNK.js} +23 -23
- package/dist/{range-BcUvLuf5.js → range-CWcz7xFA.js} +3 -3
- package/dist/{reciprocal-DhDWSKiD.js → reciprocal-C4rNcM-S.js} +1 -1
- package/dist/{register_all_kernels-Do9VvZmo.js → register_all_kernels-DIGpEwcf.js} +31 -31
- package/dist/{relu-B1AXs7p5.js → relu-BjCh_SYb.js} +1 -1
- package/dist/{reshape-WeJkT3ja.js → reshape-CnIwVG1c.js} +1 -1
- package/dist/{scatter_nd_util-B7yDhiQr.js → scatter_nd_util-BQdz--Gn.js} +1 -1
- package/dist/{selu_util-BgUO9gHY.js → selu_util-OtRzVwW5.js} +23 -23
- package/dist/{shared-V6D_md-c.js → shared-DmRsFyaJ.js} +6 -6
- package/dist/{shared-CZiWmQCI.js → shared-DuP7ue-R.js} +1 -1
- package/dist/{sin-CPxad7Am.js → sin-gpDNRxE0.js} +1 -1
- package/dist/{slice-B7jXtPnp.js → slice-d0Vo9XTN.js} +1 -1
- package/dist/{softmax-BfsyI4As.js → softmax-D7Jj3p_P.js} +1 -1
- package/dist/{split-BPxr8_8m.js → split-DK2k5eHf.js} +1 -1
- package/dist/{stack-BNwLzE43.js → stack-DFatutCx.js} +1 -1
- package/dist/{sum-ByFINZgi.js → sum-CJ0ULhmt.js} +1 -1
- package/dist/{tensor-DbqgIV9B.js → tensor-CZr4dh61.js} +1 -1
- package/dist/{tensor1d-CtJq5BOv.js → tensor1d-vML0r3q6.js} +1 -1
- package/dist/{tensor2d-CObBWBkW.js → tensor2d-D76QGjF3.js} +1 -1
- package/dist/{tensor4d-DLtk7Nxh.js → tensor4d-Df1WlVDY.js} +1 -1
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +2 -2
- package/dist/training/sparseCrossEntropy.js +3 -3
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/topP.d.ts +1 -0
- package/dist/utilities/topP.js +13 -0
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-DPFOJyRG.js → variable-Bm2OFwGI.js} +1 -1
- package/dist/{webgpu_program-Dhk9R5aG.js → webgpu_program-DkQJOJSd.js} +1 -1
- package/dist/{webgpu_util-BqGnZg8t.js → webgpu_util-pLEV9tks.js} +1 -1
- package/dist/{zeros-Dnwix0p4.js → zeros-Bj5rMYA7.js} +1 -1
- package/package.json +1 -1
package/dist/Generator.d.ts
CHANGED
|
@@ -8,7 +8,8 @@ export interface GenerateOptions {
|
|
|
8
8
|
usePadding?: boolean;
|
|
9
9
|
attentionScores?: boolean;
|
|
10
10
|
includeProbabilities?: boolean;
|
|
11
|
-
embeddings?:
|
|
11
|
+
embeddings?: 'embedding' | 'logits' | 'softmax' | 'all';
|
|
12
|
+
targets?: number[];
|
|
12
13
|
}
|
|
13
14
|
export interface IGenerateOptions extends GenerateOptions {
|
|
14
15
|
maxLength?: number;
|
|
@@ -31,6 +32,7 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
|
|
|
31
32
|
private probabilitiesData;
|
|
32
33
|
private embeddingsData;
|
|
33
34
|
private tokens;
|
|
35
|
+
private lastLoss;
|
|
34
36
|
constructor(model: Model<ModelForwardAttributes>, tokeniser: ITokeniser);
|
|
35
37
|
private tokenisePrompt;
|
|
36
38
|
private processResponse;
|
|
@@ -52,4 +54,5 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
|
|
|
52
54
|
tensor: number[][];
|
|
53
55
|
}[][];
|
|
54
56
|
getTokens(): number[];
|
|
57
|
+
getLastLoss(): number | null;
|
|
55
58
|
}
|
package/dist/Generator.js
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
import { E as
|
|
2
|
-
import {
|
|
1
|
+
import { E as C } from "./index-Dwqa6Zy2.js";
|
|
2
|
+
import { E as _, F as I, G as O, a6 as R, t as q, k as K } from "./index-BzFyqcy-.js";
|
|
3
3
|
import "./ops/cpu/attentionMask.js";
|
|
4
4
|
import "./ops/webgl/attentionMask.js";
|
|
5
5
|
import "./ops/grads/attentionMask.js";
|
|
6
6
|
import "./ops/cpu/qkv.js";
|
|
7
7
|
import "./ops/webgl/qkv.js";
|
|
8
8
|
import "./ops/grads/qkv.js";
|
|
9
|
-
import { p as
|
|
10
|
-
import { t as
|
|
9
|
+
import { p as j } from "./random_width-CXVRloNK.js";
|
|
10
|
+
import { t as G } from "./register_all_kernels-DIGpEwcf.js";
|
|
11
11
|
import "./index-Tf7vU29b.js";
|
|
12
|
-
import "./dataset-
|
|
12
|
+
import "./dataset-DlZtKmBq.js";
|
|
13
13
|
import "./ops/cpu/rope.js";
|
|
14
14
|
import "./ops/webgl/rope.js";
|
|
15
15
|
import "./ops/grads/rope.js";
|
|
@@ -24,29 +24,31 @@ import "./ops/grads/matMulGelu.js";
|
|
|
24
24
|
import "./ops/cpu/normRMS.js";
|
|
25
25
|
import "./ops/webgl/normRMS.js";
|
|
26
26
|
import "./ops/grads/normRMS.js";
|
|
27
|
-
import "./
|
|
28
|
-
import "./ops/webgl/gatherSub.js";
|
|
29
|
-
import "./ops/cpu/scatterSub.js";
|
|
30
|
-
import "./ops/webgl/scatterSub.js";
|
|
27
|
+
import { sparseSoftmaxCrossEntropy as V } from "./training/sparseCrossEntropy.js";
|
|
31
28
|
import "./jszip.min-CjP2V1VV.js";
|
|
32
|
-
import
|
|
29
|
+
import $ from "./tokeniser/CharTokeniser.js";
|
|
33
30
|
import "./ops/cpu/adamAdjust.js";
|
|
34
31
|
import "./ops/webgl/adamAdjust.js";
|
|
35
32
|
import "./ops/cpu/adamMoments.js";
|
|
36
33
|
import "./ops/webgl/adamMoments.js";
|
|
37
34
|
import "./papaparse.min-C8l2Kvo1.js";
|
|
35
|
+
import M from "./utilities/topP.js";
|
|
36
|
+
import "./ops/cpu/scatterSub.js";
|
|
37
|
+
import "./ops/webgl/scatterSub.js";
|
|
38
|
+
import "./ops/cpu/gatherSub.js";
|
|
39
|
+
import "./ops/webgl/gatherSub.js";
|
|
38
40
|
import "./ops/cpu/gelu.js";
|
|
39
41
|
import "./ops/webgl/gelu.js";
|
|
40
|
-
import "./gelu-
|
|
42
|
+
import "./gelu-Bp_-935b.js";
|
|
41
43
|
import "./ops/webgl/log.js";
|
|
42
44
|
import "./checks/normRMS.js";
|
|
43
45
|
import "./checks/normRMSGrad.js";
|
|
44
|
-
import
|
|
45
|
-
import { r as
|
|
46
|
-
import { t as P } from "./tensor2d-
|
|
47
|
-
import { s as
|
|
48
|
-
import { g as
|
|
49
|
-
import { c as
|
|
46
|
+
import N from "./utilities/multinomialCPU.js";
|
|
47
|
+
import { r as E } from "./reshape-CnIwVG1c.js";
|
|
48
|
+
import { t as P } from "./tensor2d-D76QGjF3.js";
|
|
49
|
+
import { s as S } from "./softmax-D7Jj3p_P.js";
|
|
50
|
+
import { g as F } from "./gather-Dnpgw-YQ.js";
|
|
51
|
+
import { c as H } from "./concat-B912vBbo.js";
|
|
50
52
|
/**
|
|
51
53
|
* @license
|
|
52
54
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -63,18 +65,18 @@ import { c as G } from "./concat-pHiVqR3L.js";
|
|
|
63
65
|
* limitations under the License.
|
|
64
66
|
* =============================================================================
|
|
65
67
|
*/
|
|
66
|
-
function
|
|
67
|
-
const o =
|
|
68
|
-
if (
|
|
69
|
-
throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${
|
|
70
|
-
if (
|
|
71
|
-
throw new Error(`Rank of probabilities must be 1 or 2, but is ${
|
|
72
|
-
|
|
73
|
-
const
|
|
74
|
-
return
|
|
68
|
+
function U(p, t, s, e = !1) {
|
|
69
|
+
const o = I(p, "logits", "multinomial"), i = o.size, c = o.rank;
|
|
70
|
+
if (i < 2)
|
|
71
|
+
throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${i}.`);
|
|
72
|
+
if (c > 2)
|
|
73
|
+
throw new Error(`Rank of probabilities must be 1 or 2, but is ${c}`);
|
|
74
|
+
s = s || Math.random();
|
|
75
|
+
const n = { logits: c === 1 ? E(o, [1, -1]) : o }, l = { numSamples: t, seed: s, normalized: e }, d = O.runKernel(R, n, l);
|
|
76
|
+
return c === 1 ? E(d, [d.size]) : d;
|
|
75
77
|
}
|
|
76
|
-
const
|
|
77
|
-
...Array.from({ length: 95 }, (
|
|
78
|
+
const z = /* @__PURE__ */ _({ multinomial_: U }), W = [
|
|
79
|
+
...Array.from({ length: 95 }, (p, t) => String.fromCharCode(t + 32)),
|
|
78
80
|
// ASCII
|
|
79
81
|
// Spanish accented letters and punctuation
|
|
80
82
|
..."áéíóúüñ¿¡",
|
|
@@ -85,12 +87,12 @@ const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
|
|
|
85
87
|
// Cyrillic letters
|
|
86
88
|
..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
|
|
87
89
|
];
|
|
88
|
-
function
|
|
89
|
-
return
|
|
90
|
+
function B(p, t) {
|
|
91
|
+
return p.length === t ? p : p.length > t ? p.slice(0, t) : p.concat(Array(t - p.length).fill(""));
|
|
90
92
|
}
|
|
91
|
-
class
|
|
92
|
-
constructor(t,
|
|
93
|
-
super(), this.model = t, this.tokeniser =
|
|
93
|
+
class Wt extends C {
|
|
94
|
+
constructor(t, s) {
|
|
95
|
+
super(), this.model = t, this.tokeniser = s, this.actualTokeniser = s;
|
|
94
96
|
}
|
|
95
97
|
active = !1;
|
|
96
98
|
cache = null;
|
|
@@ -102,130 +104,145 @@ class qt extends z {
|
|
|
102
104
|
probabilitiesData = [];
|
|
103
105
|
embeddingsData = [];
|
|
104
106
|
tokens = [];
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
107
|
+
lastLoss = null;
|
|
108
|
+
async tokenisePrompt(t, s) {
|
|
109
|
+
const e = s ? await t.tokenise([s], !0) : [[t.eosToken]];
|
|
110
|
+
return e[0].length > this.model.config.blockSize && (e[0] = e[0].slice(-this.model.config.blockSize)), P(e, [1, e[0].length], "int32");
|
|
108
111
|
}
|
|
109
|
-
async processResponse(t,
|
|
110
|
-
const
|
|
111
|
-
if (this.lastToken =
|
|
112
|
+
async processResponse(t, s, e, o) {
|
|
113
|
+
const i = (await s.array())[0][0];
|
|
114
|
+
if (this.lastToken = i, i === this.tokeniser.eosToken)
|
|
112
115
|
return null;
|
|
113
|
-
const
|
|
114
|
-
if (
|
|
115
|
-
const
|
|
116
|
-
|
|
116
|
+
const c = await t.decode([i]);
|
|
117
|
+
if (e) {
|
|
118
|
+
const T = await Promise.all(
|
|
119
|
+
e.map((n) => n.array().then((l) => l))
|
|
117
120
|
);
|
|
118
|
-
|
|
119
|
-
}
|
|
120
|
-
if (o) {
|
|
121
|
-
const d = await o.array();
|
|
122
|
-
o.dispose(), this.probabilitiesData.push(d);
|
|
121
|
+
e.forEach((n) => n.dispose()), this.attentionData.push(T);
|
|
123
122
|
}
|
|
124
|
-
return this.tokens.push(
|
|
123
|
+
return o && this.probabilitiesData.push(o), this.tokens.push(i), this.emit("tokens", [i], c), c;
|
|
125
124
|
}
|
|
126
125
|
/** Generate logits and select a token. */
|
|
127
|
-
async _generateToken(t,
|
|
128
|
-
const o =
|
|
126
|
+
async _generateToken(t, s, e) {
|
|
127
|
+
const o = e?.temperature ?? 1, i = e?.topK, c = e?.topP, T = e?.usePadding ?? !1, n = {
|
|
129
128
|
training: !1,
|
|
130
|
-
attentionScores:
|
|
129
|
+
attentionScores: e?.attentionScores ? {
|
|
131
130
|
attentionOut: []
|
|
132
131
|
} : void 0,
|
|
133
|
-
cache:
|
|
134
|
-
outputEmbeddings:
|
|
135
|
-
}, l =
|
|
136
|
-
const
|
|
137
|
-
[0,
|
|
138
|
-
[
|
|
139
|
-
),
|
|
132
|
+
cache: s,
|
|
133
|
+
outputEmbeddings: !!e?.embeddings
|
|
134
|
+
}, [l, d] = q(() => {
|
|
135
|
+
const a = t, m = a.shape[1], h = m <= this.model.config.blockSize ? a : a.slice(
|
|
136
|
+
[0, m - this.model.config.blockSize],
|
|
137
|
+
[a.shape[0], this.model.config.blockSize]
|
|
138
|
+
), r = T ? this.model.config.blockSize - h.shape[1] : 0, v = r > 0 ? j(h, [
|
|
140
139
|
[0, 0],
|
|
141
|
-
[0,
|
|
142
|
-
]) :
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
140
|
+
[0, r]
|
|
141
|
+
]) : h, [g] = this.model.forward(n, v), u = g.shape[1] - 1 - r, f = g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]);
|
|
142
|
+
let y;
|
|
143
|
+
if (e?.targets) {
|
|
144
|
+
const k = e.targets.shift();
|
|
145
|
+
if (k !== void 0) {
|
|
146
|
+
const w = P([[k]], [1, 1], "int32"), D = V(f, w);
|
|
147
|
+
y = D.mean(), w.dispose(), D.dispose();
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return n.attentionScores?.attentionOut && n.attentionScores.attentionOut.forEach((k, w) => {
|
|
151
|
+
k.shape[1] !== 1 && (n.attentionScores.attentionOut[w] = K(
|
|
152
|
+
k.slice([0, u, 0], [k.shape[0], 1, k.shape[2]])
|
|
153
|
+
), k.dispose());
|
|
154
|
+
}), g.dispose(), [f.div(o).squeeze([1]), y];
|
|
148
155
|
});
|
|
149
|
-
let
|
|
150
|
-
if (
|
|
151
|
-
const
|
|
152
|
-
|
|
153
|
-
const
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
const
|
|
160
|
-
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
m =
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
156
|
+
let b, x;
|
|
157
|
+
if (c) {
|
|
158
|
+
const a = S(l), m = await a.array();
|
|
159
|
+
a.dispose();
|
|
160
|
+
const h = M(m, c);
|
|
161
|
+
e?.includeProbabilities && (x = m), b = N(h);
|
|
162
|
+
} else if (i) {
|
|
163
|
+
const { values: a, indices: m } = G(l, i), h = z(a, 1);
|
|
164
|
+
b = F(m, h, 1), a.dispose(), m.dispose(), h.dispose();
|
|
165
|
+
} else if (b = z(l, 1), e?.includeProbabilities) {
|
|
166
|
+
const a = S(l);
|
|
167
|
+
x = await a.array(), a.dispose();
|
|
168
|
+
}
|
|
169
|
+
if (n.embeddings) {
|
|
170
|
+
const m = (e?.embeddings === "all" ? n.embeddings : n.embeddings.filter((r) => r.name.startsWith("block_output_"))).map(async (r) => {
|
|
171
|
+
const v = r.tensor.shape[1], g = r.tensor.slice([0, v - 1, 0], [r.tensor.shape[0], 1, r.tensor.shape[2]]);
|
|
172
|
+
r.tensor.dispose();
|
|
173
|
+
const u = g.squeeze([1]);
|
|
174
|
+
if (g.dispose(), e?.embeddings === "softmax") {
|
|
175
|
+
const f = this.model.project(u);
|
|
176
|
+
u.dispose();
|
|
177
|
+
const y = S(f, -1);
|
|
178
|
+
return f.dispose(), { name: r.name, tensor: await y.array() };
|
|
179
|
+
} else if (e?.embeddings === "logits") {
|
|
180
|
+
const f = this.model.project(u);
|
|
181
|
+
return u.dispose(), { name: r.name, tensor: await f.array() };
|
|
182
|
+
} else {
|
|
183
|
+
const f = await u.array();
|
|
184
|
+
return u.dispose(), { name: r.name, tensor: f };
|
|
185
|
+
}
|
|
186
|
+
}), h = await Promise.all(m);
|
|
187
|
+
this.embeddingsData.push(h);
|
|
173
188
|
}
|
|
174
|
-
const
|
|
175
|
-
|
|
189
|
+
const A = b.reshape([1, 1]);
|
|
190
|
+
b.dispose(), b = A, l.dispose();
|
|
191
|
+
let L;
|
|
192
|
+
return d && (L = await d.array(), d.dispose()), { output: b, probabilities: x, attention: n.attentionScores?.attentionOut, loss: L };
|
|
176
193
|
}
|
|
177
194
|
/** Generate multiple tokens in a loop and produce text */
|
|
178
195
|
async _generate(t) {
|
|
179
|
-
let
|
|
180
|
-
const
|
|
181
|
-
for (let o = 0; o <
|
|
196
|
+
let s = this.lastToken >= 0 && this.cache ? P([this.lastToken], [1, 1], "int32") : await this.tokenisePrompt(this.actualTokeniser, this.outputText);
|
|
197
|
+
const e = t?.maxLength ?? 1e3;
|
|
198
|
+
for (let o = 0; o < e && this.active; o++) {
|
|
182
199
|
const {
|
|
183
|
-
output:
|
|
184
|
-
probabilities:
|
|
185
|
-
attention:
|
|
186
|
-
|
|
200
|
+
output: i,
|
|
201
|
+
probabilities: c,
|
|
202
|
+
attention: T,
|
|
203
|
+
loss: n
|
|
204
|
+
} = await this._generateToken(s, this.cache ? this.cache : void 0, {
|
|
187
205
|
...t,
|
|
188
206
|
usePadding: !this.cache
|
|
189
207
|
});
|
|
190
|
-
if (this.cache)
|
|
191
|
-
|
|
208
|
+
if (n !== void 0 && (this.lastLoss = n), this.cache)
|
|
209
|
+
s.dispose(), s = i;
|
|
192
210
|
else {
|
|
193
|
-
const
|
|
194
|
-
|
|
211
|
+
const d = s;
|
|
212
|
+
s = H([s, i], 1), d.dispose();
|
|
195
213
|
}
|
|
196
|
-
const
|
|
197
|
-
if (this.cache ||
|
|
214
|
+
const l = await this.processResponse(this.actualTokeniser, i, T, c);
|
|
215
|
+
if (this.cache || i.dispose(), l === null)
|
|
198
216
|
break;
|
|
199
|
-
this.outputText +=
|
|
217
|
+
this.outputText += l;
|
|
200
218
|
}
|
|
201
|
-
return
|
|
219
|
+
return s.dispose(), this.outputText;
|
|
202
220
|
}
|
|
203
221
|
reset() {
|
|
204
222
|
this.cache && (this.cache.forEach((t) => {
|
|
205
223
|
t && (t.k && t.k.dispose(), t.v && t.v.dispose());
|
|
206
|
-
}), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [];
|
|
224
|
+
}), this.cache = null), this.outputText = "", this.initialPrompt = null, this.lastToken = -1, this.attentionData = [], this.probabilitiesData = [], this.tokens = [], this.lastLoss = null;
|
|
207
225
|
}
|
|
208
226
|
dispose() {
|
|
209
227
|
this.reset();
|
|
210
228
|
}
|
|
211
|
-
initialise(t,
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
this.cache = s, this.lastToken = -1;
|
|
229
|
+
initialise(t, s) {
|
|
230
|
+
if (this.cache && s?.noCache && this.reset(), this.initialPrompt = t || null, this.lastToken === -1 && (this.outputText = this.initialPrompt || ""), !this.cache && !s?.noCache && this.model.config.useRope) {
|
|
231
|
+
const o = new Array(this.model.config.nLayer);
|
|
232
|
+
for (let i = 0; i < this.model.config.nLayer; i++)
|
|
233
|
+
o[i] = { k: void 0, v: void 0, length: 0, cumulativeLength: 0 };
|
|
234
|
+
this.cache = o, this.lastToken = -1;
|
|
218
235
|
}
|
|
219
|
-
const
|
|
220
|
-
this.actualTokeniser =
|
|
236
|
+
const e = this.tokeniser.trained ? this.tokeniser : new $(B(W, this.tokeniser.vocabSize));
|
|
237
|
+
this.actualTokeniser = e;
|
|
221
238
|
}
|
|
222
|
-
async step(t,
|
|
223
|
-
const
|
|
224
|
-
return this.generate(t,
|
|
239
|
+
async step(t, s) {
|
|
240
|
+
const e = { ...s, maxLength: 1 };
|
|
241
|
+
return this.generate(t, e);
|
|
225
242
|
}
|
|
226
|
-
async generate(t,
|
|
227
|
-
this.initialise(t,
|
|
228
|
-
const o = await this._generate(
|
|
243
|
+
async generate(t, s) {
|
|
244
|
+
this.initialise(t, s), this.active = !0, s?.maxLength !== 1 && this.emit("start");
|
|
245
|
+
const o = await this._generate(s);
|
|
229
246
|
return this.active = !1, this.emit("stop"), o;
|
|
230
247
|
}
|
|
231
248
|
stop() {
|
|
@@ -246,7 +263,10 @@ class qt extends z {
|
|
|
246
263
|
getTokens() {
|
|
247
264
|
return this.tokens;
|
|
248
265
|
}
|
|
266
|
+
getLastLoss() {
|
|
267
|
+
return this.lastLoss;
|
|
268
|
+
}
|
|
249
269
|
}
|
|
250
270
|
export {
|
|
251
|
-
|
|
271
|
+
Wt as default
|
|
252
272
|
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-
|
|
2
|
-
import { r as $ } from "./Reshape-
|
|
3
|
-
import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-
|
|
4
|
-
import { t as U, m as W } from "./shared-
|
|
5
|
-
import { c as _ } from "./backend_util-
|
|
6
|
-
import { f as y } from "./gpgpu_math-
|
|
7
|
-
import { g as G, b as L } from "./kernel_funcs_utils-
|
|
1
|
+
import { aq as T, ag as E, p as O, j as V, aB as B, a1 as F, ah as j, aC as K } from "./index-BzFyqcy-.js";
|
|
2
|
+
import { r as $ } from "./Reshape-Bowtk9BP.js";
|
|
3
|
+
import { g as A, a as C, b as k, c as N, e as R } from "./axis_util-TbGYJ208.js";
|
|
4
|
+
import { t as U, m as W } from "./shared-DuP7ue-R.js";
|
|
5
|
+
import { c as _ } from "./backend_util-CJIiDoV1.js";
|
|
6
|
+
import { f as y } from "./gpgpu_math-CDaYiyE_.js";
|
|
7
|
+
import { g as G, b as L } from "./kernel_funcs_utils-DKLK0Mg3.js";
|
|
8
8
|
/**
|
|
9
9
|
* @license
|
|
10
10
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { j as c, a5 as C, n as f, V as R } from "./index-
|
|
2
|
-
import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-
|
|
1
|
+
import { j as c, a5 as C, n as f, V as R } from "./index-BzFyqcy-.js";
|
|
2
|
+
import { u as g, g as I, a as x, b as F, c as $, d as u, e as m, i as l } from "./gpgpu_math-CDaYiyE_.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -5,17 +5,17 @@ import u from "./Generator.js";
|
|
|
5
5
|
import f from "./Trainer.js";
|
|
6
6
|
import { E as p } from "./index-Dwqa6Zy2.js";
|
|
7
7
|
import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
|
|
8
|
-
import "./index-
|
|
8
|
+
import "./index-BzFyqcy-.js";
|
|
9
9
|
import "./ops/cpu/attentionMask.js";
|
|
10
10
|
import "./ops/webgl/attentionMask.js";
|
|
11
11
|
import "./ops/grads/attentionMask.js";
|
|
12
12
|
import "./ops/cpu/qkv.js";
|
|
13
13
|
import "./ops/webgl/qkv.js";
|
|
14
14
|
import "./ops/grads/qkv.js";
|
|
15
|
-
import "./random_width-
|
|
16
|
-
import "./register_all_kernels-
|
|
15
|
+
import "./random_width-CXVRloNK.js";
|
|
16
|
+
import "./register_all_kernels-DIGpEwcf.js";
|
|
17
17
|
import "./index-Tf7vU29b.js";
|
|
18
|
-
import "./dataset-
|
|
18
|
+
import "./dataset-DlZtKmBq.js";
|
|
19
19
|
import "./ops/cpu/rope.js";
|
|
20
20
|
import "./ops/webgl/rope.js";
|
|
21
21
|
import "./ops/grads/rope.js";
|
|
@@ -40,7 +40,7 @@ import "./papaparse.min-C8l2Kvo1.js";
|
|
|
40
40
|
import "./jszip.min-CjP2V1VV.js";
|
|
41
41
|
import "./ops/cpu/gelu.js";
|
|
42
42
|
import "./ops/webgl/gelu.js";
|
|
43
|
-
import "./gelu-
|
|
43
|
+
import "./gelu-Bp_-935b.js";
|
|
44
44
|
import "./ops/webgl/log.js";
|
|
45
45
|
import "./ops/cpu/adamMoments.js";
|
|
46
46
|
import "./ops/webgl/adamMoments.js";
|
package/dist/Trainer.d.ts
CHANGED
package/dist/Trainer.js
CHANGED
package/dist/backend.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { g as a, s as i, r as o } from "./index-
|
|
1
|
+
import { g as a, s as i, r as o } from "./index-BzFyqcy-.js";
|
|
2
2
|
async function e(t) {
|
|
3
|
-
a() !== t && (t === "webgpu" && (await import("./index-
|
|
3
|
+
a() !== t && (t === "webgpu" && (await import("./index-C1rx_Ajs.js"), await import("./ops/webgpu/index.js")), await i(t), await o(), console.log(`Backend set to ${t}`));
|
|
4
4
|
}
|
|
5
5
|
export {
|
|
6
6
|
e as selectBackend
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-
|
|
2
|
-
import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-
|
|
3
|
-
import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-
|
|
4
|
-
import { c as re, v as oe, a as ae } from "./scatter_nd_util-
|
|
1
|
+
import { j as m, a3 as R, n as g, aN as $, aO as O, aP as _, l as M, ae as y, ax as D, aQ as T, u as b, aR as F } from "./index-BzFyqcy-.js";
|
|
2
|
+
import { b as L, d as W, f as v, c as N, e as x, g as P, a as C, h as z } from "./axis_util-TbGYJ208.js";
|
|
3
|
+
import { S as U, a as B, b as V, c as j, d as G, e as H, f as k, g as q, h as Z, i as X, j as J, k as K, l as Q, m as Y, s as ee, n as te, o as ne, t as se } from "./selu_util-OtRzVwW5.js";
|
|
4
|
+
import { c as re, v as oe, a as ae } from "./scatter_nd_util-BQdz--Gn.js";
|
|
5
5
|
function ie(e, n) {
|
|
6
6
|
const r = e.shape.length, t = n.shape.length;
|
|
7
7
|
if (r < 1)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { r as T } from "./reshape-
|
|
1
|
+
import { E as h, F as f, M as p, J as g, G as u, X as b } from "./index-BzFyqcy-.js";
|
|
2
|
+
import { r as T } from "./reshape-CnIwVG1c.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { s as i, e } from "../index-
|
|
2
|
-
import { t } from "../tensor4d-
|
|
3
|
-
import { t as a } from "../tensor2d-
|
|
1
|
+
import { s as i, e } from "../index-BzFyqcy-.js";
|
|
2
|
+
import { t } from "../tensor4d-Df1WlVDY.js";
|
|
3
|
+
import { t as a } from "../tensor2d-D76QGjF3.js";
|
|
4
4
|
async function k(n) {
|
|
5
5
|
await i(n);
|
|
6
6
|
const s = t(
|
package/dist/checks/gelu.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { s as e, e as o } from "../index-
|
|
2
|
-
import { t as s } from "../tensor2d-
|
|
1
|
+
import { s as e, e as o } from "../index-BzFyqcy-.js";
|
|
2
|
+
import { t as s } from "../tensor2d-D76QGjF3.js";
|
|
3
3
|
async function m(t) {
|
|
4
4
|
await e(t);
|
|
5
5
|
const r = s(
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { s as n, e as s } from "../index-
|
|
2
|
-
import "../random_width-
|
|
3
|
-
import "../register_all_kernels-
|
|
1
|
+
import { s as n, e as s } from "../index-BzFyqcy-.js";
|
|
2
|
+
import "../random_width-CXVRloNK.js";
|
|
3
|
+
import "../register_all_kernels-DIGpEwcf.js";
|
|
4
4
|
import "../index-Tf7vU29b.js";
|
|
5
|
-
import "../dataset-
|
|
6
|
-
import { t as e } from "../tensor2d-
|
|
5
|
+
import "../dataset-DlZtKmBq.js";
|
|
6
|
+
import { t as e } from "../tensor2d-D76QGjF3.js";
|
|
7
7
|
async function f(t) {
|
|
8
8
|
await n(t);
|
|
9
9
|
const r = e(
|
package/dist/checks/normRMS.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { s as u, y as A, e as y } from "../index-
|
|
2
|
-
import { a as h } from "../ops-
|
|
3
|
-
import { t as p } from "../tensor1d-
|
|
4
|
-
import { t as a } from "../tensor-
|
|
1
|
+
import { s as u, y as A, e as y } from "../index-BzFyqcy-.js";
|
|
2
|
+
import { a as h } from "../ops-LuCMAnmM.js";
|
|
3
|
+
import { t as p } from "../tensor1d-vML0r3q6.js";
|
|
4
|
+
import { t as a } from "../tensor-CZr4dh61.js";
|
|
5
5
|
const w = Array.from({ length: 2048 * 192 }, () => Math.random()), x = Array.from({ length: 192 }, () => Math.random()), M = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
6
6
|
async function k(t) {
|
|
7
7
|
await u(t);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { s as c, e as d } from "../index-
|
|
2
|
-
import { t as f } from "../tensor1d-
|
|
3
|
-
import { t as r } from "../tensor-
|
|
1
|
+
import { s as c, e as d } from "../index-BzFyqcy-.js";
|
|
2
|
+
import { t as f } from "../tensor1d-vML0r3q6.js";
|
|
3
|
+
import { t as r } from "../tensor-CZr4dh61.js";
|
|
4
4
|
const y = Array.from({ length: 2048 * 192 }, () => Math.random()), i = Array.from({ length: 192 }, () => Math.random()), l = Array.from({ length: 2048 * 192 }, () => Math.random());
|
|
5
5
|
async function x(t) {
|
|
6
6
|
await c(t);
|
package/dist/checks/qkv.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { t as f } from "../tensor2d-
|
|
1
|
+
import { B as i, C as u, D as c, s as l, e as h } from "../index-BzFyqcy-.js";
|
|
2
|
+
import { t as f } from "../tensor2d-D76QGjF3.js";
|
|
3
3
|
/**
|
|
4
4
|
* @license
|
|
5
5
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
package/dist/checks/rope.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import t from "../layers/RoPECache.js";
|
|
2
|
-
import { s as c, e as i } from "../index-
|
|
3
|
-
import { t as p } from "../tensor4d-
|
|
2
|
+
import { s as c, e as i } from "../index-BzFyqcy-.js";
|
|
3
|
+
import { t as p } from "../tensor4d-Df1WlVDY.js";
|
|
4
4
|
async function y(a) {
|
|
5
5
|
await c(a);
|
|
6
6
|
const o = p(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { ak as S, T as h, ag as k, d as v, al as o, am as p, an as g, n as N, t as y } from "./index-
|
|
1
|
+
import { ak as S, T as h, ag as k, d as v, al as o, am as p, an as g, n as N, t as y } from "./index-BzFyqcy-.js";
|
|
2
2
|
import { s as R } from "./index-C4L8Cm77.js";
|
|
3
|
-
import { s as $ } from "./stack-
|
|
4
|
-
import { t as B } from "./tensor-
|
|
3
|
+
import { s as $ } from "./stack-DFatutCx.js";
|
|
4
|
+
import { t as B } from "./tensor-CZr4dh61.js";
|
|
5
5
|
/**
|
|
6
6
|
* @license
|
|
7
7
|
* Copyright 2018 Google LLC. All Rights Reserved.
|