@genai-fi/nanogpt 0.2.12 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +30 -25
- package/dist/NanoGPTModel.d.ts +13 -14
- package/dist/NanoGPTModel.js +142 -70
- package/dist/TeachableLLM.d.ts +16 -7
- package/dist/TeachableLLM.js +81 -44
- package/dist/Trainer.js +8 -8
- package/dist/concat-BIZS_td9.js +33 -0
- package/dist/data/parquet.js +1 -1
- package/dist/exports_layers-tbTBcwMM.js +25 -0
- package/dist/{sum-D7fu15XL.js → gather-BPGW8RsB.js} +6 -8
- package/dist/index-C4L8Cm77.js +349 -0
- package/dist/{index-YPKosni4.js → index-pWA4_lUh.js} +1020 -782
- package/dist/layers/CausalSelfAttention.d.ts +11 -11
- package/dist/layers/CausalSelfAttention.js +71 -63
- package/dist/layers/MLP.d.ts +6 -7
- package/dist/layers/MLP.js +18 -16
- package/dist/layers/RMSNorm.d.ts +6 -7
- package/dist/layers/RMSNorm.js +15 -13
- package/dist/layers/RoPECache.d.ts +4 -5
- package/dist/layers/RoPECache.js +36 -12
- package/dist/layers/TiedEmbedding.d.ts +7 -8
- package/dist/layers/TiedEmbedding.js +16 -418
- package/dist/layers/TransformerBlock.d.ts +8 -9
- package/dist/layers/TransformerBlock.js +12 -12
- package/dist/main.d.ts +2 -0
- package/dist/main.js +35 -21
- package/dist/{mat_mul-Bu7bhLms.js → mat_mul-D7_a4KJn.js} +5 -5
- package/dist/moments-DfcpfwKi.js +132 -0
- package/dist/ones-Cog-G2ag.js +29 -0
- package/dist/ops/appendCache.d.ts +2 -0
- package/dist/ops/appendCache.js +9 -0
- package/dist/ops/attentionMask.d.ts +1 -1
- package/dist/ops/attentionMask.js +7 -85
- package/dist/ops/cpu/appendCache.d.ts +2 -0
- package/dist/ops/cpu/appendCache.js +28 -0
- package/dist/ops/cpu/attentionMask.js +18 -0
- package/dist/ops/cpu/gatherSub.d.ts +1 -0
- package/dist/ops/cpu/gatherSub.js +34 -0
- package/dist/ops/cpu/qkv.d.ts +5 -0
- package/dist/ops/cpu/qkv.js +38 -0
- package/dist/ops/cpu/rope.d.ts +6 -0
- package/dist/ops/cpu/rope.js +38 -0
- package/dist/ops/cpu/scatterSub.d.ts +1 -0
- package/dist/ops/cpu/scatterSub.js +70 -0
- package/dist/ops/gatherSub.d.ts +1 -1
- package/dist/ops/gatherSub.js +6 -63
- package/dist/ops/grads/attentionMask.d.ts +1 -0
- package/dist/ops/grads/attentionMask.js +21 -0
- package/dist/ops/grads/qkv.d.ts +1 -0
- package/dist/ops/grads/qkv.js +20 -0
- package/dist/ops/grads/rope.d.ts +1 -0
- package/dist/ops/grads/rope.js +14 -0
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/qkv.d.ts +1 -6
- package/dist/ops/qkv.js +7 -124
- package/dist/ops/rope.d.ts +0 -5
- package/dist/ops/rope.js +7 -151
- package/dist/ops/scatterSub.d.ts +1 -1
- package/dist/ops/scatterSub.js +6 -147
- package/dist/ops/webgl/appendCache.d.ts +1 -0
- package/dist/ops/webgl/appendCache.js +43 -0
- package/dist/ops/webgl/attentionMask.d.ts +1 -0
- package/dist/ops/webgl/attentionMask.js +43 -0
- package/dist/ops/webgl/gatherSub.d.ts +1 -0
- package/dist/ops/webgl/gatherSub.js +27 -0
- package/dist/ops/webgl/qkv.d.ts +1 -0
- package/dist/ops/webgl/qkv.js +46 -0
- package/dist/ops/webgl/rope.d.ts +1 -0
- package/dist/ops/webgl/rope.js +56 -0
- package/dist/ops/webgl/scatterSub.d.ts +1 -0
- package/dist/ops/webgl/scatterSub.js +27 -0
- package/dist/{parquet-BRl5lE_I.js → parquet-C0Tlmv9c.js} +3045 -3048
- package/dist/random_width-oeUIlUZj.js +15487 -0
- package/dist/range-CcDl05lo.js +26 -0
- package/dist/{reshape-DmnmKT6r.js → reshape-C8CR_Bad.js} +3 -3
- package/dist/sin-BJIrfnj7.js +47 -0
- package/dist/softmax-Be_lsqUc.js +105 -0
- package/dist/{complex-CJ-qCcLB.js → split-DZbvruEP.js} +6 -8
- package/dist/stack-BMm-efee.js +27 -0
- package/dist/sum-C7Mgy9Bw.js +104 -0
- package/dist/tensor-DJVbYhh1.js +24 -0
- package/dist/tensor2d-ZuQSh2D-.js +30 -0
- package/dist/tokeniser/bpe.d.ts +17 -6
- package/dist/tokeniser/bpe.js +89 -61
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.d.ts +6 -6
- package/dist/training/DatasetBuilder.js +1262 -17
- package/dist/training/Evaluator.d.ts +3 -2
- package/dist/training/FullTrainer.d.ts +9 -8
- package/dist/training/FullTrainer.js +26 -25
- package/dist/training/LayerTrainer.d.ts +9 -8
- package/dist/training/LayerTrainer.js +34 -33
- package/dist/training/Trainer.d.ts +22 -21
- package/dist/training/Trainer.js +21 -18
- package/dist/training/sparseCrossEntropy.js +22 -166
- package/dist/utilities/dummy.js +10 -8
- package/dist/utilities/generate.js +14 -11
- package/dist/utilities/load.d.ts +1 -2
- package/dist/utilities/load.js +37 -35
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/save.js +14 -9
- package/dist/utilities/tokenParse.d.ts +1 -1
- package/dist/utilities/tokenParse.js +7 -61
- package/dist/utilities/weights.d.ts +3 -3
- package/dist/utilities/weights.js +21 -19
- package/dist/variable-Dl_ub3pk.js +23 -0
- package/dist/{stack-BtKpB0Ry.js → zeros-CCy9C3uU.js} +18 -16
- package/package.json +2 -1
- package/dist/assets/worker-BYeSPNkq.js +0 -1
- package/dist/tokeniser/NodeTokeniser.d.ts +0 -20
- package/dist/tokeniser/NodeTokeniser.js +0 -46
- package/dist/tokeniser/WebTokeniser.d.ts +0 -18
- package/dist/tokeniser/WebTokeniser.js +0 -96
- package/dist/tokeniser/worker.js +0 -53
- /package/dist/{tokeniser/worker.d.ts → ops/cpu/attentionMask.d.ts} +0 -0
|
@@ -1,438 +1,36 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* @license
|
|
8
|
-
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
9
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
-
* you may not use this file except in compliance with the License.
|
|
11
|
-
* You may obtain a copy of the License at
|
|
12
|
-
*
|
|
13
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
-
*
|
|
15
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
16
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
-
* See the License for the specific language governing permissions and
|
|
19
|
-
* limitations under the License.
|
|
20
|
-
* =============================================================================
|
|
21
|
-
*/
|
|
22
|
-
function he(t) {
|
|
23
|
-
const s = { x: i(t, "x", "sigmoid", "float32") };
|
|
24
|
-
return o.runKernel(X, s);
|
|
25
|
-
}
|
|
26
|
-
const fe = /* @__PURE__ */ h({ sigmoid_: he });
|
|
27
|
-
/**
|
|
28
|
-
* @license
|
|
29
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
30
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
31
|
-
* you may not use this file except in compliance with the License.
|
|
32
|
-
* You may obtain a copy of the License at
|
|
33
|
-
*
|
|
34
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
35
|
-
*
|
|
36
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
37
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
38
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
39
|
-
* See the License for the specific language governing permissions and
|
|
40
|
-
* limitations under the License.
|
|
41
|
-
* =============================================================================
|
|
42
|
-
*/
|
|
43
|
-
function de(t) {
|
|
44
|
-
const s = { x: i(t, "x", "elu", "float32") };
|
|
45
|
-
return o.runKernel(Y, s);
|
|
46
|
-
}
|
|
47
|
-
const me = /* @__PURE__ */ h({ elu_: de });
|
|
48
|
-
/**
|
|
49
|
-
* @license
|
|
50
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
51
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
52
|
-
* you may not use this file except in compliance with the License.
|
|
53
|
-
* You may obtain a copy of the License at
|
|
54
|
-
*
|
|
55
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
56
|
-
*
|
|
57
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
58
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
59
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
60
|
-
* See the License for the specific language governing permissions and
|
|
61
|
-
* limitations under the License.
|
|
62
|
-
* =============================================================================
|
|
63
|
-
*/
|
|
64
|
-
function ge(t) {
|
|
65
|
-
const s = { input: i(t, "input", "imag") };
|
|
66
|
-
return o.runKernel(Z, s);
|
|
67
|
-
}
|
|
68
|
-
const $e = /* @__PURE__ */ h({ imag_: ge });
|
|
69
|
-
/**
|
|
70
|
-
* @license
|
|
71
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
72
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
73
|
-
* you may not use this file except in compliance with the License.
|
|
74
|
-
* You may obtain a copy of the License at
|
|
75
|
-
*
|
|
76
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
77
|
-
*
|
|
78
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
79
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
80
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
81
|
-
* See the License for the specific language governing permissions and
|
|
82
|
-
* limitations under the License.
|
|
83
|
-
* =============================================================================
|
|
84
|
-
*/
|
|
85
|
-
function xe(t, e = 0.2) {
|
|
86
|
-
const n = { x: i(t, "x", "leakyRelu") }, r = { alpha: e };
|
|
87
|
-
return o.runKernel(J, n, r);
|
|
88
|
-
}
|
|
89
|
-
const ke = /* @__PURE__ */ h({ leakyRelu_: xe });
|
|
90
|
-
/**
|
|
91
|
-
* @license
|
|
92
|
-
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
93
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
94
|
-
* you may not use this file except in compliance with the License.
|
|
95
|
-
* You may obtain a copy of the License at
|
|
96
|
-
*
|
|
97
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
98
|
-
*
|
|
99
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
100
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
101
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
102
|
-
* See the License for the specific language governing permissions and
|
|
103
|
-
* limitations under the License.
|
|
104
|
-
* =============================================================================
|
|
105
|
-
*/
|
|
106
|
-
function De(t) {
|
|
107
|
-
const s = { x: i(t, "x", "neg") };
|
|
108
|
-
return o.runKernel(ee, s);
|
|
109
|
-
}
|
|
110
|
-
const be = /* @__PURE__ */ h({ neg_: De });
|
|
111
|
-
/**
|
|
112
|
-
* @license
|
|
113
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
114
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
115
|
-
* you may not use this file except in compliance with the License.
|
|
116
|
-
* You may obtain a copy of the License at
|
|
117
|
-
*
|
|
118
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
119
|
-
*
|
|
120
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
121
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
122
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
123
|
-
* See the License for the specific language governing permissions and
|
|
124
|
-
* limitations under the License.
|
|
125
|
-
* =============================================================================
|
|
126
|
-
*/
|
|
127
|
-
function ye(t, e) {
|
|
128
|
-
const s = i(t, "x", "prelu"), n = i(e, "alpha", "prelu"), r = { x: s, alpha: n };
|
|
129
|
-
return o.runKernel(te, r);
|
|
130
|
-
}
|
|
131
|
-
const Se = /* @__PURE__ */ h({ prelu_: ye });
|
|
132
|
-
/**
|
|
133
|
-
* @license
|
|
134
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
135
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
136
|
-
* you may not use this file except in compliance with the License.
|
|
137
|
-
* You may obtain a copy of the License at
|
|
138
|
-
*
|
|
139
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
140
|
-
*
|
|
141
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
142
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
143
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
144
|
-
* See the License for the specific language governing permissions and
|
|
145
|
-
* limitations under the License.
|
|
146
|
-
* =============================================================================
|
|
147
|
-
*/
|
|
148
|
-
function Ke(t) {
|
|
149
|
-
const s = { input: i(t, "input", "real") };
|
|
150
|
-
return o.runKernel(se, s);
|
|
151
|
-
}
|
|
152
|
-
const _e = /* @__PURE__ */ h({ real_: Ke });
|
|
153
|
-
/**
|
|
154
|
-
* @license
|
|
155
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
156
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
157
|
-
* you may not use this file except in compliance with the License.
|
|
158
|
-
* You may obtain a copy of the License at
|
|
159
|
-
*
|
|
160
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
161
|
-
*
|
|
162
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
163
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
164
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
165
|
-
* See the License for the specific language governing permissions and
|
|
166
|
-
* limitations under the License.
|
|
167
|
-
* =============================================================================
|
|
168
|
-
*/
|
|
169
|
-
function Me(t) {
|
|
170
|
-
const s = { x: i(t, "x", "relu") };
|
|
171
|
-
return o.runKernel(ne, s);
|
|
172
|
-
}
|
|
173
|
-
const We = /* @__PURE__ */ h({ relu_: Me });
|
|
174
|
-
/**
|
|
175
|
-
* @license
|
|
176
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
177
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
178
|
-
* you may not use this file except in compliance with the License.
|
|
179
|
-
* You may obtain a copy of the License at
|
|
180
|
-
*
|
|
181
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
182
|
-
*
|
|
183
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
184
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
185
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
186
|
-
* See the License for the specific language governing permissions and
|
|
187
|
-
* limitations under the License.
|
|
188
|
-
* =============================================================================
|
|
189
|
-
*/
|
|
190
|
-
function we(t) {
|
|
191
|
-
const s = { x: i(t, "x", "relu6") };
|
|
192
|
-
return o.runKernel(re, s);
|
|
193
|
-
}
|
|
194
|
-
const ze = /* @__PURE__ */ h({ relu6_: we });
|
|
195
|
-
/**
|
|
196
|
-
* @license
|
|
197
|
-
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
198
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
199
|
-
* you may not use this file except in compliance with the License.
|
|
200
|
-
* You may obtain a copy of the License at
|
|
201
|
-
*
|
|
202
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
203
|
-
*
|
|
204
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
205
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
206
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
207
|
-
* See the License for the specific language governing permissions and
|
|
208
|
-
* limitations under the License.
|
|
209
|
-
* =============================================================================
|
|
210
|
-
*/
|
|
211
|
-
function Ee(t, e = 0) {
|
|
212
|
-
const n = { x: i(t, "x", "step") }, r = { alpha: e };
|
|
213
|
-
return o.runKernel(ue, n, r);
|
|
214
|
-
}
|
|
215
|
-
const Oe = /* @__PURE__ */ h({ step_: Ee });
|
|
216
|
-
/**
|
|
217
|
-
* @license
|
|
218
|
-
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
219
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
220
|
-
* you may not use this file except in compliance with the License.
|
|
221
|
-
* You may obtain a copy of the License at
|
|
222
|
-
*
|
|
223
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
224
|
-
*
|
|
225
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
226
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
227
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
228
|
-
* See the License for the specific language governing permissions and
|
|
229
|
-
* limitations under the License.
|
|
230
|
-
* =============================================================================
|
|
231
|
-
*/
|
|
232
|
-
function Fe(t, e, s) {
|
|
233
|
-
const n = i(t, "x", "transpose");
|
|
234
|
-
if (e == null && (e = n.shape.map((l, p) => p).reverse()), L(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
|
|
235
|
-
L(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
|
|
236
|
-
}), n.rank <= 1)
|
|
237
|
-
return n.clone();
|
|
238
|
-
const r = { x: n }, c = { perm: e };
|
|
239
|
-
return n.dtype === "complex64" ? ae(() => {
|
|
240
|
-
let l = _e(n), p = $e(n);
|
|
241
|
-
return l = o.runKernel(A, { x: l }, c), p = o.runKernel(A, { x: p }, c), s && (p = be(p)), pe(l, p);
|
|
242
|
-
}) : o.runKernel(A, r, c);
|
|
243
|
-
}
|
|
244
|
-
const Re = /* @__PURE__ */ h({ transpose_: Fe });
|
|
245
|
-
/**
|
|
246
|
-
* @license
|
|
247
|
-
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
248
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
249
|
-
* you may not use this file except in compliance with the License.
|
|
250
|
-
* You may obtain a copy of the License at
|
|
251
|
-
*
|
|
252
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
253
|
-
*
|
|
254
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
255
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
256
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
257
|
-
* See the License for the specific language governing permissions and
|
|
258
|
-
* limitations under the License.
|
|
259
|
-
* =============================================================================
|
|
260
|
-
*/
|
|
261
|
-
function Ae(t, e, s) {
|
|
262
|
-
if (s == null || s === "linear")
|
|
263
|
-
return t;
|
|
264
|
-
if (s === "relu")
|
|
265
|
-
return ie(t, Oe(e));
|
|
266
|
-
throw new Error(`Cannot compute gradient for fused activation ${s}.`);
|
|
267
|
-
}
|
|
268
|
-
function Le(t, e) {
|
|
269
|
-
let s = e;
|
|
270
|
-
const n = oe(t.shape, e.shape);
|
|
271
|
-
return n.length > 0 && (s = ce(s, n)), f(s, t.shape);
|
|
272
|
-
}
|
|
273
|
-
function Te(t, e, s, n) {
|
|
274
|
-
if (e === "linear")
|
|
275
|
-
return t;
|
|
276
|
-
if (e === "relu")
|
|
277
|
-
return We(t);
|
|
278
|
-
if (e === "elu")
|
|
279
|
-
return me(t);
|
|
280
|
-
if (e === "relu6")
|
|
281
|
-
return ze(t);
|
|
282
|
-
if (e === "prelu")
|
|
283
|
-
return Se(t, s);
|
|
284
|
-
if (e === "leakyrelu")
|
|
285
|
-
return ke(t, n);
|
|
286
|
-
if (e === "sigmoid")
|
|
287
|
-
return fe(t);
|
|
288
|
-
throw new Error(`Unknown fused activation ${e}.`);
|
|
289
|
-
}
|
|
290
|
-
const Be = (t, e) => !(t > 0) || e === "linear";
|
|
291
|
-
/**
|
|
292
|
-
* @license
|
|
293
|
-
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
294
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
295
|
-
* you may not use this file except in compliance with the License.
|
|
296
|
-
* You may obtain a copy of the License at
|
|
297
|
-
*
|
|
298
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
299
|
-
*
|
|
300
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
301
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
302
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
303
|
-
* See the License for the specific language governing permissions and
|
|
304
|
-
* limitations under the License.
|
|
305
|
-
* =============================================================================
|
|
306
|
-
*/
|
|
307
|
-
function Ne({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: c = "linear", preluActivationWeights: l, leakyreluAlpha: p = 0.2 }) {
|
|
308
|
-
if (Be(o.state.gradientDepth, c) === !1) {
|
|
309
|
-
let x = m(t, e, s, n);
|
|
310
|
-
return r != null && (x = le(x, r)), Te(x, c, l, p);
|
|
311
|
-
}
|
|
312
|
-
let u = i(t, "a", "fused matMul"), a = i(e, "b", "fused matMul");
|
|
313
|
-
[u, a] = q(u, a);
|
|
314
|
-
const D = s ? u.shape[u.rank - 2] : u.shape[u.rank - 1], b = n ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = s ? u.shape[u.rank - 1] : u.shape[u.rank - 2], w = n ? a.shape[a.rank - 2] : a.shape[a.rank - 1], T = u.shape.slice(0, -2), y = a.shape.slice(0, -2), B = C(T), N = C(y);
|
|
315
|
-
L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${u.shape} and ${a.shape} and transposeA=${s} and transposeB=${n} must match.`);
|
|
316
|
-
const O = P(u.shape.slice(0, -2), a.shape.slice(0, -2)).concat([W, w]), F = s ? f(u, [B, D, W]) : f(u, [B, W, D]), R = n ? f(a, [N, w, b]) : f(a, [N, b, w]);
|
|
317
|
-
let S;
|
|
318
|
-
r != null && (S = i(r, "bias", "fused matMul"), [S] = q(S, u), P(O, S.shape));
|
|
319
|
-
let v;
|
|
320
|
-
l != null && (v = i(l, "prelu weights", "fused matMul"));
|
|
321
|
-
const G = (x, M) => {
|
|
322
|
-
const [g, $, k, z] = M, d = Ae(f(x, k.shape), k, c);
|
|
323
|
-
let K, _;
|
|
324
|
-
if (!s && !n ? (K = m(d, $, !1, !0), _ = m(g, d, !0, !1)) : !s && n ? (K = m(d, $, !1, !1), _ = m(d, g, !0, !1)) : s && !n ? (K = m($, d, !1, !0), _ = m(g, d, !1, !1)) : (K = m($, d, !0, !0), _ = m(d, g, !0, !0)), r != null) {
|
|
325
|
-
const V = Le(z, d);
|
|
326
|
-
return [K, _, V];
|
|
327
|
-
} else
|
|
328
|
-
return [K, _];
|
|
329
|
-
}, I = {
|
|
330
|
-
a: F,
|
|
331
|
-
b: R,
|
|
332
|
-
bias: S,
|
|
333
|
-
preluActivationWeights: v
|
|
334
|
-
}, j = { transposeA: s, transposeB: n, activation: c, leakyreluAlpha: p };
|
|
335
|
-
return r == null ? U((M, g, $) => {
|
|
336
|
-
const k = (
|
|
337
|
-
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
338
|
-
o.runKernel(H, I, j)
|
|
339
|
-
);
|
|
340
|
-
return $([M, g, k]), { value: f(k, O), gradFunc: G };
|
|
341
|
-
})(F, R) : U((M, g, $, k) => {
|
|
342
|
-
const z = (
|
|
343
|
-
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
344
|
-
o.runKernel(H, I, j)
|
|
345
|
-
);
|
|
346
|
-
return k([M, g, z, $]), { value: f(z, O), gradFunc: G };
|
|
347
|
-
})(F, R, S);
|
|
348
|
-
}
|
|
349
|
-
const Q = /* @__PURE__ */ h({ fusedMatMul_: Ne });
|
|
350
|
-
/**
|
|
351
|
-
* @license
|
|
352
|
-
* Copyright 2018 Google LLC
|
|
353
|
-
*
|
|
354
|
-
* Use of this source code is governed by an MIT-style
|
|
355
|
-
* license that can be found in the LICENSE file or at
|
|
356
|
-
* https://opensource.org/licenses/MIT.
|
|
357
|
-
* =============================================================================
|
|
358
|
-
*/
|
|
359
|
-
class E extends Error {
|
|
360
|
-
constructor(e) {
|
|
361
|
-
super(e), Object.setPrototypeOf(this, E.prototype);
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
/**
|
|
365
|
-
* @license
|
|
366
|
-
* Copyright 2018 Google LLC
|
|
367
|
-
*
|
|
368
|
-
* Use of this source code is governed by an MIT-style
|
|
369
|
-
* license that can be found in the LICENSE file or at
|
|
370
|
-
* https://opensource.org/licenses/MIT.
|
|
371
|
-
* =============================================================================
|
|
372
|
-
*/
|
|
373
|
-
function ve(t, e, s, n) {
|
|
374
|
-
if (t.rank < 2 || e.rank < 2)
|
|
375
|
-
throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
|
|
376
|
-
if (e.rank >= 3) {
|
|
377
|
-
const r = t.shape.slice(-1)[0], c = e.shape.slice(-2)[0];
|
|
378
|
-
if (r !== c)
|
|
379
|
-
throw new E(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and y shape = ${e.shape}`);
|
|
380
|
-
}
|
|
381
|
-
if (t.rank === 2 && e.rank === 2)
|
|
382
|
-
return Q({
|
|
383
|
-
a: t,
|
|
384
|
-
b: e,
|
|
385
|
-
transposeA: !1,
|
|
386
|
-
transposeB: !1,
|
|
387
|
-
bias: null,
|
|
388
|
-
activation: s
|
|
389
|
-
});
|
|
390
|
-
{
|
|
391
|
-
const r = t.shape.slice(), c = r.pop();
|
|
392
|
-
t = f(t, [-1, c]);
|
|
393
|
-
const l = e.shape.slice(), p = l.pop(), u = l.pop(), a = [...l, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
|
|
394
|
-
e = f(Re(e, D), [u, -1]);
|
|
395
|
-
const b = [...r, ...a];
|
|
396
|
-
return f(Q({
|
|
397
|
-
a: t,
|
|
398
|
-
b: e,
|
|
399
|
-
transposeA: !1,
|
|
400
|
-
transposeB: !1,
|
|
401
|
-
bias: null,
|
|
402
|
-
activation: s
|
|
403
|
-
}), b);
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
class Ue {
|
|
1
|
+
import { r as t, d as s } from "../random_width-oeUIlUZj.js";
|
|
2
|
+
import "../index-pWA4_lUh.js";
|
|
3
|
+
import { v as r } from "../variable-Dl_ub3pk.js";
|
|
4
|
+
import { g as d } from "../gather-BPGW8RsB.js";
|
|
5
|
+
class b {
|
|
407
6
|
vocabSize;
|
|
408
7
|
embedDim;
|
|
409
|
-
tf;
|
|
410
8
|
tiedWeights;
|
|
411
9
|
initializer;
|
|
412
|
-
constructor(
|
|
413
|
-
this.vocabSize =
|
|
10
|
+
constructor(i, e) {
|
|
11
|
+
this.vocabSize = i.vocabSize, this.embedDim = i.embedDim, this.initializer = t({
|
|
414
12
|
mean: 0,
|
|
415
13
|
stddev: 0.02
|
|
416
|
-
}), this.tiedWeights =
|
|
14
|
+
}), this.tiedWeights = r(
|
|
417
15
|
this.initializer.apply([this.vocabSize, this.embedDim]),
|
|
418
16
|
!0,
|
|
419
|
-
|
|
17
|
+
e || "tied_embedding"
|
|
420
18
|
);
|
|
421
19
|
}
|
|
422
20
|
get variables() {
|
|
423
21
|
return [this.tiedWeights];
|
|
424
22
|
}
|
|
425
|
-
embed(
|
|
426
|
-
return
|
|
23
|
+
embed(i) {
|
|
24
|
+
return d(this.tiedWeights, i, 0);
|
|
427
25
|
}
|
|
428
|
-
project(
|
|
429
|
-
return
|
|
26
|
+
project(i) {
|
|
27
|
+
return s(i, this.tiedWeights.transpose());
|
|
430
28
|
}
|
|
431
29
|
getWeights() {
|
|
432
30
|
return [this.tiedWeights];
|
|
433
31
|
}
|
|
434
|
-
setWeights(
|
|
435
|
-
this.tiedWeights.assign(
|
|
32
|
+
setWeights(i) {
|
|
33
|
+
this.tiedWeights.assign(i[0]);
|
|
436
34
|
}
|
|
437
35
|
getConfig() {
|
|
438
36
|
return {
|
|
@@ -445,5 +43,5 @@ class Ue {
|
|
|
445
43
|
}
|
|
446
44
|
}
|
|
447
45
|
export {
|
|
448
|
-
|
|
46
|
+
b as default
|
|
449
47
|
};
|
|
@@ -1,29 +1,28 @@
|
|
|
1
|
-
import { default as TF } from '@tensorflow/tfjs';
|
|
2
1
|
import { GPTConfig } from '../config';
|
|
3
2
|
import { KVCache } from './CausalSelfAttention';
|
|
4
3
|
import { default as RoPECache } from './RoPECache';
|
|
5
4
|
import { default as MemoryProfiler } from '../utilities/profile';
|
|
6
5
|
import { default as BaseLayer } from './BaseLayer';
|
|
6
|
+
import { Tensor, Variable } from '@tensorflow/tfjs-core';
|
|
7
7
|
export default class Block extends BaseLayer {
|
|
8
8
|
private ln1;
|
|
9
9
|
private attn;
|
|
10
10
|
private ln2;
|
|
11
11
|
private mlp;
|
|
12
|
-
private tf;
|
|
13
12
|
private index;
|
|
14
13
|
private _trainable;
|
|
15
14
|
skipped: boolean;
|
|
16
|
-
constructor(
|
|
15
|
+
constructor(index: number, config: GPTConfig, ropeCache?: RoPECache);
|
|
17
16
|
setProfiler(value: MemoryProfiler | undefined): void;
|
|
18
|
-
get variables():
|
|
17
|
+
get variables(): Variable[];
|
|
19
18
|
get trainable(): boolean;
|
|
20
19
|
set trainable(value: boolean);
|
|
21
|
-
saveWeights(map: Map<string,
|
|
22
|
-
loadWeights(weights: Map<string,
|
|
20
|
+
saveWeights(map: Map<string, Tensor[]>): void;
|
|
21
|
+
loadWeights(weights: Map<string, Tensor[]>): void;
|
|
23
22
|
private getMLPOutput;
|
|
24
|
-
call(x:
|
|
25
|
-
output:
|
|
26
|
-
attention?:
|
|
23
|
+
call(x: Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
|
|
24
|
+
output: Tensor;
|
|
25
|
+
attention?: Tensor;
|
|
27
26
|
cache?: KVCache;
|
|
28
27
|
};
|
|
29
28
|
dispose(): void;
|
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
import
|
|
1
|
+
import h from "./CausalSelfAttention.js";
|
|
2
2
|
import o from "./MLP.js";
|
|
3
3
|
import r from "./RMSNorm.js";
|
|
4
4
|
import p from "./BaseLayer.js";
|
|
5
|
-
|
|
5
|
+
import { t as d } from "../index-pWA4_lUh.js";
|
|
6
|
+
class g extends p {
|
|
6
7
|
ln1;
|
|
7
8
|
attn;
|
|
8
9
|
ln2;
|
|
9
10
|
mlp;
|
|
10
|
-
tf;
|
|
11
11
|
index;
|
|
12
12
|
_trainable = !0;
|
|
13
13
|
skipped = !1;
|
|
14
|
-
constructor(t,
|
|
15
|
-
super(), this.
|
|
14
|
+
constructor(t, s, i) {
|
|
15
|
+
super(), this.index = t, this.ln1 = new r([s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new h(this.index, s, i), this.ln2 = new r([s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.index, s);
|
|
16
16
|
}
|
|
17
17
|
setProfiler(t) {
|
|
18
18
|
this._profiler = t, this.attn.setProfiler(t), this.mlp.setProfiler(t), this.ln1.setProfiler(t), this.ln2.setProfiler(t);
|
|
@@ -37,17 +37,17 @@ class f extends p {
|
|
|
37
37
|
loadWeights(t) {
|
|
38
38
|
this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_rms1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_rms2`) || []);
|
|
39
39
|
}
|
|
40
|
-
getMLPOutput(t,
|
|
41
|
-
const
|
|
40
|
+
getMLPOutput(t, s) {
|
|
41
|
+
const i = this.ln2.apply(t), e = this.mlp.call(i, s);
|
|
42
42
|
return t.add(e);
|
|
43
43
|
}
|
|
44
|
-
call(t,
|
|
45
|
-
return
|
|
44
|
+
call(t, s = !1, i = !1, e) {
|
|
45
|
+
return d(() => {
|
|
46
46
|
if (this.skipped)
|
|
47
47
|
return { output: t };
|
|
48
|
-
const l = this.ln1.apply(t), n = this.attn.call(l,
|
|
48
|
+
const l = this.ln1.apply(t), n = this.attn.call(l, s, i, e), a = t.add(n.output);
|
|
49
49
|
return {
|
|
50
|
-
output: this.getMLPOutput(
|
|
50
|
+
output: this.getMLPOutput(a, s),
|
|
51
51
|
attention: n.attention,
|
|
52
52
|
cache: n.presentKV
|
|
53
53
|
};
|
|
@@ -58,5 +58,5 @@ class f extends p {
|
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
export {
|
|
61
|
-
|
|
61
|
+
g as default
|
|
62
62
|
};
|
package/dist/main.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
export { default as NanoGPT } from './NanoGPTModel';
|
|
2
2
|
export { default as TeachableLLM } from './TeachableLLM';
|
|
3
3
|
export { default as CharTokeniser } from './tokeniser/CharTokeniser';
|
|
4
|
+
export { default as BPETokeniser } from './tokeniser/bpe';
|
|
4
5
|
export { default as waitForModel } from './utilities/waitForModel';
|
|
5
6
|
export { default as loadTextData } from './data/textLoader';
|
|
6
7
|
export type { ITrainerOptions } from './Trainer';
|
|
7
8
|
export type { IGenerateOptions } from './Generator';
|
|
8
9
|
export type { TrainingLogEntry } from './NanoGPTModel';
|
|
10
|
+
export type { ITokeniser } from './tokeniser/type';
|
|
9
11
|
export type { GPTConfig } from './config';
|
|
10
12
|
export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
|
package/dist/main.js
CHANGED
|
@@ -1,23 +1,37 @@
|
|
|
1
|
-
import { default as
|
|
2
|
-
import { default as
|
|
3
|
-
import { default as
|
|
4
|
-
import { default as
|
|
5
|
-
import { default as
|
|
6
|
-
import {
|
|
7
|
-
import "./
|
|
8
|
-
import "./
|
|
9
|
-
import "./ops/
|
|
10
|
-
import "./ops/
|
|
11
|
-
import "./ops/
|
|
1
|
+
import { default as P } from "./NanoGPTModel.js";
|
|
2
|
+
import { default as h } from "./TeachableLLM.js";
|
|
3
|
+
import { default as y } from "./tokeniser/CharTokeniser.js";
|
|
4
|
+
import { default as U } from "./tokeniser/bpe.js";
|
|
5
|
+
import { default as v } from "./utilities/waitForModel.js";
|
|
6
|
+
import { default as B } from "./data/textLoader.js";
|
|
7
|
+
import { estimateMemoryUsage as E, estimateParameterCount as F, estimateResources as G, estimateTrainingMemoryUsage as N, validateConfig as R } from "./utilities/parameters.js";
|
|
8
|
+
import "./index-pWA4_lUh.js";
|
|
9
|
+
import "./ops/cpu/scatterSub.js";
|
|
10
|
+
import "./ops/webgl/scatterSub.js";
|
|
11
|
+
import "./ops/cpu/gatherSub.js";
|
|
12
|
+
import "./ops/webgl/gatherSub.js";
|
|
13
|
+
import "./ops/cpu/attentionMask.js";
|
|
14
|
+
import "./ops/webgl/attentionMask.js";
|
|
15
|
+
import "./ops/grads/attentionMask.js";
|
|
16
|
+
import "./ops/cpu/qkv.js";
|
|
17
|
+
import "./ops/webgl/qkv.js";
|
|
18
|
+
import "./ops/grads/qkv.js";
|
|
19
|
+
import "@tensorflow/tfjs";
|
|
20
|
+
import "./ops/cpu/rope.js";
|
|
21
|
+
import "./ops/webgl/rope.js";
|
|
22
|
+
import "./ops/grads/rope.js";
|
|
23
|
+
import "./ops/cpu/appendCache.js";
|
|
24
|
+
import "./ops/webgl/appendCache.js";
|
|
12
25
|
export {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
26
|
+
U as BPETokeniser,
|
|
27
|
+
y as CharTokeniser,
|
|
28
|
+
P as NanoGPT,
|
|
29
|
+
h as TeachableLLM,
|
|
30
|
+
E as estimateMemoryUsage,
|
|
31
|
+
F as estimateParameterCount,
|
|
32
|
+
G as estimateResources,
|
|
33
|
+
N as estimateTrainingMemoryUsage,
|
|
34
|
+
B as loadTextData,
|
|
35
|
+
R as validateConfig,
|
|
36
|
+
v as waitForModel
|
|
23
37
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as m,
|
|
1
|
+
import { o as m, h as s, p as c, E as M, B as p } from "./index-pWA4_lUh.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,13 +15,13 @@ import { o as m, d as s, f as c, E as M, B as f } from "./index-YPKosni4.js";
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
* =============================================================================
|
|
17
17
|
*/
|
|
18
|
-
function
|
|
18
|
+
function f(e, o, n = !1, l = !1) {
|
|
19
19
|
let a = s(e, "a", "matMul"), t = s(o, "b", "matMul");
|
|
20
20
|
[a, t] = c(a, t);
|
|
21
21
|
const r = { a, b: t }, u = { transposeA: n, transposeB: l };
|
|
22
|
-
return M.runKernel(
|
|
22
|
+
return M.runKernel(p, r, u);
|
|
23
23
|
}
|
|
24
|
-
const
|
|
24
|
+
const h = /* @__PURE__ */ m({ matMul_: f });
|
|
25
25
|
export {
|
|
26
|
-
|
|
26
|
+
h as m
|
|
27
27
|
};
|