@genai-fi/nanogpt 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +7 -0
- package/README.md +20 -0
- package/dist/Generator.d.ts +14 -0
- package/dist/Generator.js +39 -0
- package/dist/NanoGPTModel.d.ts +35 -0
- package/dist/NanoGPTModel.js +129 -0
- package/dist/TeachableLLM.d.ts +21 -0
- package/dist/TeachableLLM.js +47 -0
- package/dist/Trainer.d.ts +19 -0
- package/dist/Trainer.js +34 -0
- package/dist/_commonjsHelpers-DaMA6jEr.js +8 -0
- package/dist/assets/worker-BYeSPNkq.js +1 -0
- package/dist/config.d.ts +11 -0
- package/dist/config.js +19 -0
- package/dist/index-B8nyc6IR.js +3899 -0
- package/dist/index-SOhdqzHq.js +113 -0
- package/dist/jszip.min-BLbRbbKt.js +2324 -0
- package/dist/layers/CausalSelfAttention.d.ts +22 -0
- package/dist/layers/CausalSelfAttention.js +75 -0
- package/dist/layers/LayerNorm.d.ts +12 -0
- package/dist/layers/LayerNorm.js +30 -0
- package/dist/layers/MLP.d.ts +17 -0
- package/dist/layers/MLP.js +57 -0
- package/dist/layers/TiedEmbedding.d.ts +22 -0
- package/dist/layers/TiedEmbedding.js +532 -0
- package/dist/layers/TransformerBlock.d.ts +19 -0
- package/dist/layers/TransformerBlock.js +47 -0
- package/dist/main.d.ts +6 -0
- package/dist/main.js +8 -0
- package/dist/tokeniser/CharTokeniser.d.ts +20 -0
- package/dist/tokeniser/CharTokeniser.js +52 -0
- package/dist/tokeniser/NodeTokeniser.d.ts +19 -0
- package/dist/tokeniser/NodeTokeniser.js +46 -0
- package/dist/tokeniser/WebTokeniser.d.ts +18 -0
- package/dist/tokeniser/WebTokeniser.js +96 -0
- package/dist/tokeniser/bpe.d.ts +14 -0
- package/dist/tokeniser/bpe.js +102 -0
- package/dist/tokeniser/messages.d.ts +61 -0
- package/dist/tokeniser/messages.js +1 -0
- package/dist/tokeniser/type.d.ts +14 -0
- package/dist/tokeniser/type.js +1 -0
- package/dist/tokeniser/worker.d.ts +1 -0
- package/dist/tokeniser/worker.js +53 -0
- package/dist/training/AdamExt.d.ts +23 -0
- package/dist/training/AdamExt.js +43 -0
- package/dist/training/DatasetBuilder.d.ts +12 -0
- package/dist/training/DatasetBuilder.js +27 -0
- package/dist/training/FullTrainer.d.ts +17 -0
- package/dist/training/FullTrainer.js +75 -0
- package/dist/training/LayerTrainer.d.ts +28 -0
- package/dist/training/LayerTrainer.js +108 -0
- package/dist/training/Trainer.d.ts +73 -0
- package/dist/training/Trainer.js +87 -0
- package/dist/training/lwSchedule.d.ts +7 -0
- package/dist/training/lwSchedule.js +162 -0
- package/dist/utilities/generate.d.ts +3 -0
- package/dist/utilities/generate.js +22 -0
- package/dist/utilities/load.d.ts +7 -0
- package/dist/utilities/load.js +47 -0
- package/dist/utilities/save.d.ts +3 -0
- package/dist/utilities/save.js +21 -0
- package/dist/utilities/textLoader.d.ts +1 -0
- package/dist/utilities/textLoader.js +438 -0
- package/dist/utilities/tokenParse.d.ts +1 -0
- package/dist/utilities/tokenParse.js +66 -0
- package/dist/utilities/weights.d.ts +12 -0
- package/dist/utilities/weights.js +43 -0
- package/package.json +59 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee, f as te, g as se, h as ne, I as re, L as ae, N as ue, P as ie, i as oe, j as le, k as ce, l as pe, n as B, t as he, T as L, m as fe, p as me, q as de, r as q, u as P, v as U, _ as H } from "../index-B8nyc6IR.js";
|
|
2
|
+
/**
|
|
3
|
+
* @license
|
|
4
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
* =============================================================================
|
|
17
|
+
*/
|
|
18
|
+
function ge(t, e) {
|
|
19
|
+
const s = a(t, "real", "complex"), n = a(e, "imag", "complex");
|
|
20
|
+
V(s.shape, n.shape, `real and imag shapes, ${s.shape} and ${n.shape}, must match in call to tf.complex().`);
|
|
21
|
+
const r = { real: s, imag: n };
|
|
22
|
+
return u.runKernel(X, r);
|
|
23
|
+
}
|
|
24
|
+
const $e = /* @__PURE__ */ p({ complex_: ge });
|
|
25
|
+
/**
|
|
26
|
+
* @license
|
|
27
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
28
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
29
|
+
* you may not use this file except in compliance with the License.
|
|
30
|
+
* You may obtain a copy of the License at
|
|
31
|
+
*
|
|
32
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
33
|
+
*
|
|
34
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
35
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
36
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
37
|
+
* See the License for the specific language governing permissions and
|
|
38
|
+
* limitations under the License.
|
|
39
|
+
* =============================================================================
|
|
40
|
+
*/
|
|
41
|
+
function xe(t, e) {
|
|
42
|
+
const n = { x: a(t, "x", "reshape", "string_or_numeric") }, r = { shape: e };
|
|
43
|
+
return u.runKernel(Y, n, r);
|
|
44
|
+
}
|
|
45
|
+
const f = /* @__PURE__ */ p({ reshape_: xe });
|
|
46
|
+
/**
|
|
47
|
+
* @license
|
|
48
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
49
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
50
|
+
* you may not use this file except in compliance with the License.
|
|
51
|
+
* You may obtain a copy of the License at
|
|
52
|
+
*
|
|
53
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
54
|
+
*
|
|
55
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
56
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
57
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
58
|
+
* See the License for the specific language governing permissions and
|
|
59
|
+
* limitations under the License.
|
|
60
|
+
* =============================================================================
|
|
61
|
+
*/
|
|
62
|
+
function ke(t, e, s = !1, n = !1) {
|
|
63
|
+
let r = a(t, "a", "matMul"), i = a(e, "b", "matMul");
|
|
64
|
+
[r, i] = A(r, i);
|
|
65
|
+
const l = { a: r, b: i }, h = { transposeA: s, transposeB: n };
|
|
66
|
+
return u.runKernel(Z, l, h);
|
|
67
|
+
}
|
|
68
|
+
const d = /* @__PURE__ */ p({ matMul_: ke });
|
|
69
|
+
/**
|
|
70
|
+
* @license
|
|
71
|
+
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
72
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
73
|
+
* you may not use this file except in compliance with the License.
|
|
74
|
+
* You may obtain a copy of the License at
|
|
75
|
+
*
|
|
76
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
77
|
+
*
|
|
78
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
79
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
80
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
81
|
+
* See the License for the specific language governing permissions and
|
|
82
|
+
* limitations under the License.
|
|
83
|
+
* =============================================================================
|
|
84
|
+
*/
|
|
85
|
+
function be(t) {
|
|
86
|
+
const s = { x: a(t, "x", "sigmoid", "float32") };
|
|
87
|
+
return u.runKernel(ee, s);
|
|
88
|
+
}
|
|
89
|
+
const De = /* @__PURE__ */ p({ sigmoid_: be });
|
|
90
|
+
/**
|
|
91
|
+
* @license
|
|
92
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
93
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
94
|
+
* you may not use this file except in compliance with the License.
|
|
95
|
+
* You may obtain a copy of the License at
|
|
96
|
+
*
|
|
97
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
98
|
+
*
|
|
99
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
100
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
101
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
102
|
+
* See the License for the specific language governing permissions and
|
|
103
|
+
* limitations under the License.
|
|
104
|
+
* =============================================================================
|
|
105
|
+
*/
|
|
106
|
+
function Se(t) {
|
|
107
|
+
const s = { x: a(t, "x", "elu", "float32") };
|
|
108
|
+
return u.runKernel(te, s);
|
|
109
|
+
}
|
|
110
|
+
const ye = /* @__PURE__ */ p({ elu_: Se });
|
|
111
|
+
/**
|
|
112
|
+
* @license
|
|
113
|
+
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
114
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
115
|
+
* you may not use this file except in compliance with the License.
|
|
116
|
+
* You may obtain a copy of the License at
|
|
117
|
+
*
|
|
118
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
119
|
+
*
|
|
120
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
121
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
122
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
123
|
+
* See the License for the specific language governing permissions and
|
|
124
|
+
* limitations under the License.
|
|
125
|
+
* =============================================================================
|
|
126
|
+
*/
|
|
127
|
+
function _e(t, e = null, s = !1) {
|
|
128
|
+
let n = a(t, "x", "sum");
|
|
129
|
+
n.dtype === "bool" && (n = se(n, "int32"));
|
|
130
|
+
const r = { x: n }, i = { axis: e, keepDims: s };
|
|
131
|
+
return u.runKernel(ne, r, i);
|
|
132
|
+
}
|
|
133
|
+
const Me = /* @__PURE__ */ p({ sum_: _e });
|
|
134
|
+
/**
|
|
135
|
+
* @license
|
|
136
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
137
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
138
|
+
* you may not use this file except in compliance with the License.
|
|
139
|
+
* You may obtain a copy of the License at
|
|
140
|
+
*
|
|
141
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
142
|
+
*
|
|
143
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
144
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
145
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
146
|
+
* See the License for the specific language governing permissions and
|
|
147
|
+
* limitations under the License.
|
|
148
|
+
* =============================================================================
|
|
149
|
+
*/
|
|
150
|
+
function Ke(t) {
|
|
151
|
+
const s = { input: a(t, "input", "imag") };
|
|
152
|
+
return u.runKernel(re, s);
|
|
153
|
+
}
|
|
154
|
+
const we = /* @__PURE__ */ p({ imag_: Ke });
|
|
155
|
+
/**
|
|
156
|
+
* @license
|
|
157
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
158
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
159
|
+
* you may not use this file except in compliance with the License.
|
|
160
|
+
* You may obtain a copy of the License at
|
|
161
|
+
*
|
|
162
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
163
|
+
*
|
|
164
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
165
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
166
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
167
|
+
* See the License for the specific language governing permissions and
|
|
168
|
+
* limitations under the License.
|
|
169
|
+
* =============================================================================
|
|
170
|
+
*/
|
|
171
|
+
function ze(t, e = 0.2) {
|
|
172
|
+
const n = { x: a(t, "x", "leakyRelu") }, r = { alpha: e };
|
|
173
|
+
return u.runKernel(ae, n, r);
|
|
174
|
+
}
|
|
175
|
+
const Ee = /* @__PURE__ */ p({ leakyRelu_: ze });
|
|
176
|
+
/**
|
|
177
|
+
* @license
|
|
178
|
+
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
179
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
180
|
+
* you may not use this file except in compliance with the License.
|
|
181
|
+
* You may obtain a copy of the License at
|
|
182
|
+
*
|
|
183
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
184
|
+
*
|
|
185
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
186
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
187
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
188
|
+
* See the License for the specific language governing permissions and
|
|
189
|
+
* limitations under the License.
|
|
190
|
+
* =============================================================================
|
|
191
|
+
*/
|
|
192
|
+
function We(t) {
|
|
193
|
+
const s = { x: a(t, "x", "neg") };
|
|
194
|
+
return u.runKernel(ue, s);
|
|
195
|
+
}
|
|
196
|
+
const Oe = /* @__PURE__ */ p({ neg_: We });
|
|
197
|
+
/**
|
|
198
|
+
* @license
|
|
199
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
200
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
201
|
+
* you may not use this file except in compliance with the License.
|
|
202
|
+
* You may obtain a copy of the License at
|
|
203
|
+
*
|
|
204
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
205
|
+
*
|
|
206
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
207
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
208
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
209
|
+
* See the License for the specific language governing permissions and
|
|
210
|
+
* limitations under the License.
|
|
211
|
+
* =============================================================================
|
|
212
|
+
*/
|
|
213
|
+
function Re(t, e) {
|
|
214
|
+
const s = a(t, "x", "prelu"), n = a(e, "alpha", "prelu"), r = { x: s, alpha: n };
|
|
215
|
+
return u.runKernel(ie, r);
|
|
216
|
+
}
|
|
217
|
+
const Fe = /* @__PURE__ */ p({ prelu_: Re });
|
|
218
|
+
/**
|
|
219
|
+
* @license
|
|
220
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
221
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
222
|
+
* you may not use this file except in compliance with the License.
|
|
223
|
+
* You may obtain a copy of the License at
|
|
224
|
+
*
|
|
225
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
226
|
+
*
|
|
227
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
228
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
229
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
230
|
+
* See the License for the specific language governing permissions and
|
|
231
|
+
* limitations under the License.
|
|
232
|
+
* =============================================================================
|
|
233
|
+
*/
|
|
234
|
+
function Le(t) {
|
|
235
|
+
const s = { input: a(t, "input", "real") };
|
|
236
|
+
return u.runKernel(oe, s);
|
|
237
|
+
}
|
|
238
|
+
const Ae = /* @__PURE__ */ p({ real_: Le });
|
|
239
|
+
/**
|
|
240
|
+
* @license
|
|
241
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
242
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
243
|
+
* you may not use this file except in compliance with the License.
|
|
244
|
+
* You may obtain a copy of the License at
|
|
245
|
+
*
|
|
246
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
247
|
+
*
|
|
248
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
249
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
250
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
251
|
+
* See the License for the specific language governing permissions and
|
|
252
|
+
* limitations under the License.
|
|
253
|
+
* =============================================================================
|
|
254
|
+
*/
|
|
255
|
+
function Be(t) {
|
|
256
|
+
const s = { x: a(t, "x", "relu") };
|
|
257
|
+
return u.runKernel(le, s);
|
|
258
|
+
}
|
|
259
|
+
const Te = /* @__PURE__ */ p({ relu_: Be });
|
|
260
|
+
/**
|
|
261
|
+
* @license
|
|
262
|
+
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
263
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
264
|
+
* you may not use this file except in compliance with the License.
|
|
265
|
+
* You may obtain a copy of the License at
|
|
266
|
+
*
|
|
267
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
268
|
+
*
|
|
269
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
270
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
271
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
272
|
+
* See the License for the specific language governing permissions and
|
|
273
|
+
* limitations under the License.
|
|
274
|
+
* =============================================================================
|
|
275
|
+
*/
|
|
276
|
+
function Ne(t) {
|
|
277
|
+
const s = { x: a(t, "x", "relu6") };
|
|
278
|
+
return u.runKernel(ce, s);
|
|
279
|
+
}
|
|
280
|
+
const ve = /* @__PURE__ */ p({ relu6_: Ne });
|
|
281
|
+
/**
|
|
282
|
+
* @license
|
|
283
|
+
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
284
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
285
|
+
* you may not use this file except in compliance with the License.
|
|
286
|
+
* You may obtain a copy of the License at
|
|
287
|
+
*
|
|
288
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
289
|
+
*
|
|
290
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
291
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
292
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
293
|
+
* See the License for the specific language governing permissions and
|
|
294
|
+
* limitations under the License.
|
|
295
|
+
* =============================================================================
|
|
296
|
+
*/
|
|
297
|
+
function Ce(t, e = 0) {
|
|
298
|
+
const n = { x: a(t, "x", "step") }, r = { alpha: e };
|
|
299
|
+
return u.runKernel(pe, n, r);
|
|
300
|
+
}
|
|
301
|
+
const Ge = /* @__PURE__ */ p({ step_: Ce });
|
|
302
|
+
/**
|
|
303
|
+
* @license
|
|
304
|
+
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
305
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
306
|
+
* you may not use this file except in compliance with the License.
|
|
307
|
+
* You may obtain a copy of the License at
|
|
308
|
+
*
|
|
309
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
310
|
+
*
|
|
311
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
312
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
313
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
314
|
+
* See the License for the specific language governing permissions and
|
|
315
|
+
* limitations under the License.
|
|
316
|
+
* =============================================================================
|
|
317
|
+
*/
|
|
318
|
+
function Ie(t, e, s) {
|
|
319
|
+
const n = a(t, "x", "transpose");
|
|
320
|
+
if (e == null && (e = n.shape.map((l, h) => h).reverse()), B(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
|
|
321
|
+
B(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
|
|
322
|
+
}), n.rank <= 1)
|
|
323
|
+
return n.clone();
|
|
324
|
+
const r = { x: n }, i = { perm: e };
|
|
325
|
+
return n.dtype === "complex64" ? he(() => {
|
|
326
|
+
let l = Ae(n), h = we(n);
|
|
327
|
+
return l = u.runKernel(L, { x: l }, i), h = u.runKernel(L, { x: h }, i), s && (h = Oe(h)), $e(l, h);
|
|
328
|
+
}) : u.runKernel(L, r, i);
|
|
329
|
+
}
|
|
330
|
+
const je = /* @__PURE__ */ p({ transpose_: Ie });
|
|
331
|
+
/**
|
|
332
|
+
* @license
|
|
333
|
+
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
334
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
335
|
+
* you may not use this file except in compliance with the License.
|
|
336
|
+
* You may obtain a copy of the License at
|
|
337
|
+
*
|
|
338
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
339
|
+
*
|
|
340
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
341
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
342
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
343
|
+
* See the License for the specific language governing permissions and
|
|
344
|
+
* limitations under the License.
|
|
345
|
+
* =============================================================================
|
|
346
|
+
*/
|
|
347
|
+
function qe(t, e, s) {
|
|
348
|
+
if (s == null || s === "linear")
|
|
349
|
+
return t;
|
|
350
|
+
if (s === "relu")
|
|
351
|
+
return fe(t, Ge(e));
|
|
352
|
+
throw new Error(`Cannot compute gradient for fused activation ${s}.`);
|
|
353
|
+
}
|
|
354
|
+
function Pe(t, e) {
|
|
355
|
+
let s = e;
|
|
356
|
+
const n = me(t.shape, e.shape);
|
|
357
|
+
return n.length > 0 && (s = Me(s, n)), f(s, t.shape);
|
|
358
|
+
}
|
|
359
|
+
function Ue(t, e, s, n) {
|
|
360
|
+
if (e === "linear")
|
|
361
|
+
return t;
|
|
362
|
+
if (e === "relu")
|
|
363
|
+
return Te(t);
|
|
364
|
+
if (e === "elu")
|
|
365
|
+
return ye(t);
|
|
366
|
+
if (e === "relu6")
|
|
367
|
+
return ve(t);
|
|
368
|
+
if (e === "prelu")
|
|
369
|
+
return Fe(t, s);
|
|
370
|
+
if (e === "leakyrelu")
|
|
371
|
+
return Ee(t, n);
|
|
372
|
+
if (e === "sigmoid")
|
|
373
|
+
return De(t);
|
|
374
|
+
throw new Error(`Unknown fused activation ${e}.`);
|
|
375
|
+
}
|
|
376
|
+
const He = (t, e) => !(t > 0) || e === "linear";
|
|
377
|
+
/**
|
|
378
|
+
* @license
|
|
379
|
+
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
380
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
381
|
+
* you may not use this file except in compliance with the License.
|
|
382
|
+
* You may obtain a copy of the License at
|
|
383
|
+
*
|
|
384
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
385
|
+
*
|
|
386
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
387
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
388
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
389
|
+
* See the License for the specific language governing permissions and
|
|
390
|
+
* limitations under the License.
|
|
391
|
+
* =============================================================================
|
|
392
|
+
*/
|
|
393
|
+
function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: l, leakyreluAlpha: h = 0.2 }) {
|
|
394
|
+
if (He(u.state.gradientDepth, i) === !1) {
|
|
395
|
+
let x = d(t, e, s, n);
|
|
396
|
+
return r != null && (x = de(x, r)), Ue(x, i, l, h);
|
|
397
|
+
}
|
|
398
|
+
let o = a(t, "a", "fused matMul"), c = a(e, "b", "fused matMul");
|
|
399
|
+
[o, c] = A(o, c);
|
|
400
|
+
const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], z = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
|
|
401
|
+
B(b === D, () => `Error in fused matMul: inner shapes (${b}) and (${D}) of Tensors with shapes ${o.shape} and ${c.shape} and transposeA=${s} and transposeB=${n} must match.`);
|
|
402
|
+
const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, z]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, z, D]) : f(c, [v, D, z]);
|
|
403
|
+
let y;
|
|
404
|
+
r != null && (y = a(r, "bias", "fused matMul"), [y] = A(y, o), P(O, y.shape));
|
|
405
|
+
let C;
|
|
406
|
+
l != null && (C = a(l, "prelu weights", "fused matMul"));
|
|
407
|
+
const G = (x, K) => {
|
|
408
|
+
const [g, $, k, E] = K, m = qe(f(x, k.shape), k, i);
|
|
409
|
+
let _, M;
|
|
410
|
+
if (!s && !n ? (_ = d(m, $, !1, !0), M = d(g, m, !0, !1)) : !s && n ? (_ = d(m, $, !1, !1), M = d(m, g, !0, !1)) : s && !n ? (_ = d($, m, !1, !0), M = d(g, m, !1, !1)) : (_ = d($, m, !0, !0), M = d(m, g, !0, !0)), r != null) {
|
|
411
|
+
const Q = Pe(E, m);
|
|
412
|
+
return [_, M, Q];
|
|
413
|
+
} else
|
|
414
|
+
return [_, M];
|
|
415
|
+
}, I = {
|
|
416
|
+
a: R,
|
|
417
|
+
b: F,
|
|
418
|
+
bias: y,
|
|
419
|
+
preluActivationWeights: C
|
|
420
|
+
}, j = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: h };
|
|
421
|
+
return r == null ? U((K, g, $) => {
|
|
422
|
+
const k = (
|
|
423
|
+
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
424
|
+
u.runKernel(H, I, j)
|
|
425
|
+
);
|
|
426
|
+
return $([K, g, k]), { value: f(k, O), gradFunc: G };
|
|
427
|
+
})(R, F) : U((K, g, $, k) => {
|
|
428
|
+
const E = (
|
|
429
|
+
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
430
|
+
u.runKernel(H, I, j)
|
|
431
|
+
);
|
|
432
|
+
return k([K, g, E, $]), { value: f(E, O), gradFunc: G };
|
|
433
|
+
})(R, F, y);
|
|
434
|
+
}
|
|
435
|
+
const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
|
|
436
|
+
/**
|
|
437
|
+
* @license
|
|
438
|
+
* Copyright 2018 Google LLC
|
|
439
|
+
*
|
|
440
|
+
* Use of this source code is governed by an MIT-style
|
|
441
|
+
* license that can be found in the LICENSE file or at
|
|
442
|
+
* https://opensource.org/licenses/MIT.
|
|
443
|
+
* =============================================================================
|
|
444
|
+
*/
|
|
445
|
+
class W extends Error {
|
|
446
|
+
constructor(e) {
|
|
447
|
+
super(e), Object.setPrototypeOf(this, W.prototype);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* @license
|
|
452
|
+
* Copyright 2018 Google LLC
|
|
453
|
+
*
|
|
454
|
+
* Use of this source code is governed by an MIT-style
|
|
455
|
+
* license that can be found in the LICENSE file or at
|
|
456
|
+
* https://opensource.org/licenses/MIT.
|
|
457
|
+
* =============================================================================
|
|
458
|
+
*/
|
|
459
|
+
function Qe(t, e, s, n) {
|
|
460
|
+
if (t.rank < 2 || e.rank < 2)
|
|
461
|
+
throw new W(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
|
|
462
|
+
if (e.rank >= 3) {
|
|
463
|
+
const r = t.shape.slice(-1)[0], i = e.shape.slice(-2)[0];
|
|
464
|
+
if (r !== i)
|
|
465
|
+
throw new W(`If rank y >= 3, then the second last dim of y must equal the last dim of x but got x shape = ${t.shape} and y shape = ${e.shape}`);
|
|
466
|
+
}
|
|
467
|
+
if (t.rank === 2 && e.rank === 2)
|
|
468
|
+
return J({
|
|
469
|
+
a: t,
|
|
470
|
+
b: e,
|
|
471
|
+
transposeA: !1,
|
|
472
|
+
transposeB: !1,
|
|
473
|
+
bias: null,
|
|
474
|
+
activation: s
|
|
475
|
+
});
|
|
476
|
+
{
|
|
477
|
+
const r = t.shape.slice(), i = r.pop();
|
|
478
|
+
t = f(t, [-1, i]);
|
|
479
|
+
const l = e.shape.slice(), h = l.pop(), o = l.pop(), c = [...l, h], b = Array.from({ length: e.rank }, (T, S) => S === 0 ? e.rank - 2 : S <= e.rank - 2 ? S - 1 : S);
|
|
480
|
+
e = f(je(e, b), [o, -1]);
|
|
481
|
+
const D = [...r, ...c];
|
|
482
|
+
return f(J({
|
|
483
|
+
a: t,
|
|
484
|
+
b: e,
|
|
485
|
+
transposeA: !1,
|
|
486
|
+
transposeB: !1,
|
|
487
|
+
bias: null,
|
|
488
|
+
activation: s
|
|
489
|
+
}), D);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
class Ye {
|
|
493
|
+
vocabSize;
|
|
494
|
+
embedDim;
|
|
495
|
+
tf;
|
|
496
|
+
tiedWeights;
|
|
497
|
+
initializer;
|
|
498
|
+
constructor(e, s, n) {
|
|
499
|
+
this.vocabSize = s.vocabSize, this.embedDim = s.embedDim, this.tf = e, this.initializer = this.tf.initializers.randomNormal({
|
|
500
|
+
mean: 0,
|
|
501
|
+
stddev: 0.02
|
|
502
|
+
}), this.tiedWeights = this.tf.variable(
|
|
503
|
+
this.initializer.apply([this.vocabSize, this.embedDim]),
|
|
504
|
+
!0,
|
|
505
|
+
n || "tied_embedding"
|
|
506
|
+
);
|
|
507
|
+
}
|
|
508
|
+
get variables() {
|
|
509
|
+
return [this.tiedWeights];
|
|
510
|
+
}
|
|
511
|
+
embed(e) {
|
|
512
|
+
return this.tf.gather(this.tiedWeights, e, 0);
|
|
513
|
+
}
|
|
514
|
+
project(e) {
|
|
515
|
+
return Qe(e, this.tiedWeights.transpose());
|
|
516
|
+
}
|
|
517
|
+
getWeights() {
|
|
518
|
+
return [this.tiedWeights];
|
|
519
|
+
}
|
|
520
|
+
setWeights(e) {
|
|
521
|
+
this.tiedWeights.assign(e[0]);
|
|
522
|
+
}
|
|
523
|
+
getConfig() {
|
|
524
|
+
return {
|
|
525
|
+
vocabSize: this.vocabSize,
|
|
526
|
+
embedDim: this.embedDim
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
export {
|
|
531
|
+
Ye as default
|
|
532
|
+
};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { default as TF } from '@tensorflow/tfjs';
|
|
2
|
+
import { GPTConfig } from '../config';
|
|
3
|
+
export default class Block {
|
|
4
|
+
private ln1;
|
|
5
|
+
private attn;
|
|
6
|
+
private ln2;
|
|
7
|
+
private mlp;
|
|
8
|
+
private tf;
|
|
9
|
+
private index;
|
|
10
|
+
private _trainable;
|
|
11
|
+
skipped: boolean;
|
|
12
|
+
constructor(tf: typeof TF, index: number, config: GPTConfig);
|
|
13
|
+
get variables(): TF.Variable[];
|
|
14
|
+
get trainable(): boolean;
|
|
15
|
+
set trainable(value: boolean);
|
|
16
|
+
saveWeights(map: Map<string, TF.Tensor[]>): void;
|
|
17
|
+
loadWeights(weights: Map<string, TF.Tensor[]>): void;
|
|
18
|
+
call(x: TF.Tensor, training?: boolean): TF.Tensor;
|
|
19
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import r from "./CausalSelfAttention.js";
|
|
2
|
+
import d from "./MLP.js";
|
|
3
|
+
import n from "./LayerNorm.js";
|
|
4
|
+
class _ {
|
|
5
|
+
ln1;
|
|
6
|
+
attn;
|
|
7
|
+
ln2;
|
|
8
|
+
mlp;
|
|
9
|
+
tf;
|
|
10
|
+
index;
|
|
11
|
+
_trainable = !0;
|
|
12
|
+
skipped = !1;
|
|
13
|
+
constructor(t, s, i) {
|
|
14
|
+
this.tf = t, this.index = s, this.ln1 = new n(t, [i.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new r(this.tf, this.index, i), this.ln2 = new n(t, [i.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new d(this.tf, this.index, i);
|
|
15
|
+
}
|
|
16
|
+
get variables() {
|
|
17
|
+
return [
|
|
18
|
+
...this.ln1.trainableWeights.map((t) => t),
|
|
19
|
+
...this.attn.variables,
|
|
20
|
+
...this.ln2.trainableWeights.map((t) => t),
|
|
21
|
+
...this.mlp.variables
|
|
22
|
+
];
|
|
23
|
+
}
|
|
24
|
+
get trainable() {
|
|
25
|
+
return this._trainable;
|
|
26
|
+
}
|
|
27
|
+
set trainable(t) {
|
|
28
|
+
this._trainable = t, this.ln1.trainable = t, this.ln2.trainable = t, this.attn.trainable = t, this.mlp.trainable = t;
|
|
29
|
+
}
|
|
30
|
+
saveWeights(t) {
|
|
31
|
+
this.attn.saveWeights(t), this.mlp.saveWeights(t), t.set(`block_${this.index}_ln1`, this.ln1.getWeights()), t.set(`block_${this.index}_ln2`, this.ln2.getWeights());
|
|
32
|
+
}
|
|
33
|
+
loadWeights(t) {
|
|
34
|
+
this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
|
|
35
|
+
}
|
|
36
|
+
call(t, s = !1) {
|
|
37
|
+
return this.tf.tidy(() => {
|
|
38
|
+
if (this.skipped)
|
|
39
|
+
return t;
|
|
40
|
+
const i = this.ln1.apply(t), l = this.attn.call(i, s), e = t.add(l), a = this.ln2.apply(e), h = this.mlp.call(a, s);
|
|
41
|
+
return e.add(h);
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
export {
|
|
46
|
+
_ as default
|
|
47
|
+
};
|
package/dist/main.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { default as NanoGPT } from './NanoGPTModel';
|
|
2
|
+
export { default as TeachableLLM } from './TeachableLLM';
|
|
3
|
+
export { default as CharTokeniser } from './tokeniser/CharTokeniser';
|
|
4
|
+
export type { ITrainerOptions } from './Trainer';
|
|
5
|
+
export type { IGenerateOptions } from './Generator';
|
|
6
|
+
export type { TrainingLogEntry } from './NanoGPTModel';
|
package/dist/main.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { default as EE } from 'eventemitter3';
|
|
2
|
+
import { ITokeniser } from './type';
|
|
3
|
+
export default class CharTokeniser extends EE<'trainStatus'> implements ITokeniser {
|
|
4
|
+
vocabSize: number;
|
|
5
|
+
eosToken: number;
|
|
6
|
+
vocab: string[];
|
|
7
|
+
private cache;
|
|
8
|
+
constructor(vocab?: string[]);
|
|
9
|
+
get trained(): boolean;
|
|
10
|
+
destroy(): void;
|
|
11
|
+
train(text: string[]): Promise<number>;
|
|
12
|
+
tokenise(text: string[], numeric: true): Promise<number[][]>;
|
|
13
|
+
tokenise(text: string[]): Promise<string[][]>;
|
|
14
|
+
detokenise(tokens: number[][]): Promise<string[]>;
|
|
15
|
+
encode(text: string): Promise<number[]>;
|
|
16
|
+
decode(tokens: number[]): Promise<string>;
|
|
17
|
+
getVocab(): string[];
|
|
18
|
+
getMerges(): Promise<[string, string][]>;
|
|
19
|
+
createTrainingData(text: string[], windowSize?: number): Promise<[number[], number[]]>;
|
|
20
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { E as r } from "../index-SOhdqzHq.js";
|
|
2
|
+
class h extends r {
|
|
3
|
+
vocabSize = 0;
|
|
4
|
+
eosToken = 0;
|
|
5
|
+
vocab = [];
|
|
6
|
+
cache = /* @__PURE__ */ new Map();
|
|
7
|
+
constructor(t) {
|
|
8
|
+
super(), this.vocab = t || [], this.vocab.length > 0 && (this.vocabSize = this.vocab.length, this.eosToken = this.vocab.indexOf("<eos>"), this.vocab.forEach((a, s) => {
|
|
9
|
+
this.cache.set(a, s);
|
|
10
|
+
}));
|
|
11
|
+
}
|
|
12
|
+
get trained() {
|
|
13
|
+
return this.vocabSize > 0;
|
|
14
|
+
}
|
|
15
|
+
destroy() {
|
|
16
|
+
}
|
|
17
|
+
async train(t) {
|
|
18
|
+
const a = new Set(t.map((e) => e.split("")).flat()), s = Array.from(a);
|
|
19
|
+
return s.sort((e, o) => e.charCodeAt(0) - o.charCodeAt(0)), this.vocab = [...s, "<eos>"], this.eosToken = this.vocab.indexOf("<eos>"), this.vocabSize = this.vocab.length, this.vocab.forEach((e, o) => {
|
|
20
|
+
this.cache.set(e, o);
|
|
21
|
+
}), this.vocabSize;
|
|
22
|
+
}
|
|
23
|
+
async tokenise(t, a) {
|
|
24
|
+
if (!this.trained)
|
|
25
|
+
throw new Error("Tokeniser not trained");
|
|
26
|
+
return t.map((e) => a ? e.split("").map((o) => this.cache.get(o) ?? -1) : e.split(""));
|
|
27
|
+
}
|
|
28
|
+
async detokenise(t) {
|
|
29
|
+
return t.map((s) => s.map((e) => this.vocab[e]).join(""));
|
|
30
|
+
}
|
|
31
|
+
async encode(t) {
|
|
32
|
+
return (await this.tokenise([t], !0))[0];
|
|
33
|
+
}
|
|
34
|
+
async decode(t) {
|
|
35
|
+
return (await this.detokenise([t]))[0];
|
|
36
|
+
}
|
|
37
|
+
getVocab() {
|
|
38
|
+
return this.vocab;
|
|
39
|
+
}
|
|
40
|
+
async getMerges() {
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
async createTrainingData(t, a = 5) {
|
|
44
|
+
const s = await this.tokenise(t, !0), e = [], o = [];
|
|
45
|
+
for (let i = 0; i < s.length - a; i++)
|
|
46
|
+
e.push(...s[i].slice(0, a)), o.push(s[i + 1][0]);
|
|
47
|
+
return [e, o];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
export {
|
|
51
|
+
h as default
|
|
52
|
+
};
|