@genai-fi/nanogpt 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/NanoGPTModel.js +43 -44
- package/dist/TeachableLLM.js +2 -0
- package/dist/complex-D6Bq1XDf.js +27 -0
- package/dist/{index-DcaSvB38.js → index-D1SlunD-.js} +553 -522
- package/dist/layers/TiedEmbedding.js +113 -178
- package/dist/main.d.ts +2 -0
- package/dist/main.js +18 -10
- package/dist/ops/gatherSub.d.ts +2 -0
- package/dist/ops/gatherSub.js +66 -0
- package/dist/ops/node/sparseCrossEntropy.d.ts +1 -0
- package/dist/ops/node/sparseCrossEntropy.js +11 -0
- package/dist/ops/scatterSub.d.ts +2 -0
- package/dist/ops/scatterSub.js +150 -0
- package/dist/stack-DB2YLlAs.js +50 -0
- package/dist/sum-02UQ5Eaq.js +49 -0
- package/dist/tokeniser/CharTokeniser.d.ts +1 -0
- package/dist/tokeniser/CharTokeniser.js +48 -39
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +3 -2
- package/dist/training/Trainer.js +3 -3
- package/dist/training/sparseCrossEntropy.d.ts +11 -0
- package/dist/training/sparseCrossEntropy.js +177 -0
- package/dist/utilities/parameters.d.ts +10 -0
- package/dist/utilities/parameters.js +52 -0
- package/package.json +3 -2
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
import { o as
|
|
1
|
+
import { o as h, c as u, x as B, E as c, B as V, y as X, D as Y, I as Z, F as ee, N as te, H as se, J as ne, K as re, O as ae, Q as ue, f as L, w as ie, T as A, m as oe, U as le, t as ce, k as C, V as P, v as U, _ as H } from "../index-D1SlunD-.js";
|
|
2
|
+
import { s as pe, r as f } from "../sum-02UQ5Eaq.js";
|
|
3
|
+
import { c as he } from "../complex-D6Bq1XDf.js";
|
|
2
4
|
/**
|
|
3
5
|
* @license
|
|
4
6
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -15,57 +17,13 @@ import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee
|
|
|
15
17
|
* limitations under the License.
|
|
16
18
|
* =============================================================================
|
|
17
19
|
*/
|
|
18
|
-
function
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const r = {
|
|
22
|
-
return
|
|
20
|
+
function fe(t, e, s = !1, n = !1) {
|
|
21
|
+
let r = u(t, "a", "matMul"), i = u(e, "b", "matMul");
|
|
22
|
+
[r, i] = B(r, i);
|
|
23
|
+
const o = { a: r, b: i }, p = { transposeA: s, transposeB: n };
|
|
24
|
+
return c.runKernel(V, o, p);
|
|
23
25
|
}
|
|
24
|
-
const
|
|
25
|
-
/**
|
|
26
|
-
* @license
|
|
27
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
28
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
29
|
-
* you may not use this file except in compliance with the License.
|
|
30
|
-
* You may obtain a copy of the License at
|
|
31
|
-
*
|
|
32
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
33
|
-
*
|
|
34
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
35
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
36
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
37
|
-
* See the License for the specific language governing permissions and
|
|
38
|
-
* limitations under the License.
|
|
39
|
-
* =============================================================================
|
|
40
|
-
*/
|
|
41
|
-
function xe(t, e) {
|
|
42
|
-
const n = { x: a(t, "x", "reshape", "string_or_numeric") }, r = { shape: e };
|
|
43
|
-
return u.runKernel(Y, n, r);
|
|
44
|
-
}
|
|
45
|
-
const f = /* @__PURE__ */ p({ reshape_: xe });
|
|
46
|
-
/**
|
|
47
|
-
* @license
|
|
48
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
49
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
50
|
-
* you may not use this file except in compliance with the License.
|
|
51
|
-
* You may obtain a copy of the License at
|
|
52
|
-
*
|
|
53
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
54
|
-
*
|
|
55
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
56
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
57
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
58
|
-
* See the License for the specific language governing permissions and
|
|
59
|
-
* limitations under the License.
|
|
60
|
-
* =============================================================================
|
|
61
|
-
*/
|
|
62
|
-
function ke(t, e, s = !1, n = !1) {
|
|
63
|
-
let r = a(t, "a", "matMul"), i = a(e, "b", "matMul");
|
|
64
|
-
[r, i] = A(r, i);
|
|
65
|
-
const l = { a: r, b: i }, h = { transposeA: s, transposeB: n };
|
|
66
|
-
return u.runKernel(Z, l, h);
|
|
67
|
-
}
|
|
68
|
-
const d = /* @__PURE__ */ p({ matMul_: ke });
|
|
26
|
+
const m = /* @__PURE__ */ h({ matMul_: fe });
|
|
69
27
|
/**
|
|
70
28
|
* @license
|
|
71
29
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -82,11 +40,11 @@ const d = /* @__PURE__ */ p({ matMul_: ke });
|
|
|
82
40
|
* limitations under the License.
|
|
83
41
|
* =============================================================================
|
|
84
42
|
*/
|
|
85
|
-
function
|
|
86
|
-
const s = { x:
|
|
87
|
-
return
|
|
43
|
+
function de(t) {
|
|
44
|
+
const s = { x: u(t, "x", "sigmoid", "float32") };
|
|
45
|
+
return c.runKernel(X, s);
|
|
88
46
|
}
|
|
89
|
-
const
|
|
47
|
+
const me = /* @__PURE__ */ h({ sigmoid_: de });
|
|
90
48
|
/**
|
|
91
49
|
* @license
|
|
92
50
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -103,34 +61,11 @@ const De = /* @__PURE__ */ p({ sigmoid_: be });
|
|
|
103
61
|
* limitations under the License.
|
|
104
62
|
* =============================================================================
|
|
105
63
|
*/
|
|
106
|
-
function
|
|
107
|
-
const s = { x:
|
|
108
|
-
return
|
|
109
|
-
}
|
|
110
|
-
const ye = /* @__PURE__ */ p({ elu_: Se });
|
|
111
|
-
/**
|
|
112
|
-
* @license
|
|
113
|
-
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
114
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
115
|
-
* you may not use this file except in compliance with the License.
|
|
116
|
-
* You may obtain a copy of the License at
|
|
117
|
-
*
|
|
118
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
119
|
-
*
|
|
120
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
121
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
122
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
123
|
-
* See the License for the specific language governing permissions and
|
|
124
|
-
* limitations under the License.
|
|
125
|
-
* =============================================================================
|
|
126
|
-
*/
|
|
127
|
-
function _e(t, e = null, s = !1) {
|
|
128
|
-
let n = a(t, "x", "sum");
|
|
129
|
-
n.dtype === "bool" && (n = se(n, "int32"));
|
|
130
|
-
const r = { x: n }, i = { axis: e, keepDims: s };
|
|
131
|
-
return u.runKernel(ne, r, i);
|
|
64
|
+
function ge(t) {
|
|
65
|
+
const s = { x: u(t, "x", "elu", "float32") };
|
|
66
|
+
return c.runKernel(Y, s);
|
|
132
67
|
}
|
|
133
|
-
const
|
|
68
|
+
const $e = /* @__PURE__ */ h({ elu_: ge });
|
|
134
69
|
/**
|
|
135
70
|
* @license
|
|
136
71
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -147,11 +82,11 @@ const Me = /* @__PURE__ */ p({ sum_: _e });
|
|
|
147
82
|
* limitations under the License.
|
|
148
83
|
* =============================================================================
|
|
149
84
|
*/
|
|
150
|
-
function
|
|
151
|
-
const s = { input:
|
|
152
|
-
return
|
|
85
|
+
function xe(t) {
|
|
86
|
+
const s = { input: u(t, "input", "imag") };
|
|
87
|
+
return c.runKernel(Z, s);
|
|
153
88
|
}
|
|
154
|
-
const
|
|
89
|
+
const ke = /* @__PURE__ */ h({ imag_: xe });
|
|
155
90
|
/**
|
|
156
91
|
* @license
|
|
157
92
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -168,11 +103,11 @@ const we = /* @__PURE__ */ p({ imag_: Ke });
|
|
|
168
103
|
* limitations under the License.
|
|
169
104
|
* =============================================================================
|
|
170
105
|
*/
|
|
171
|
-
function
|
|
172
|
-
const n = { x:
|
|
173
|
-
return
|
|
106
|
+
function De(t, e = 0.2) {
|
|
107
|
+
const n = { x: u(t, "x", "leakyRelu") }, r = { alpha: e };
|
|
108
|
+
return c.runKernel(ee, n, r);
|
|
174
109
|
}
|
|
175
|
-
const
|
|
110
|
+
const be = /* @__PURE__ */ h({ leakyRelu_: De });
|
|
176
111
|
/**
|
|
177
112
|
* @license
|
|
178
113
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -189,11 +124,11 @@ const ze = /* @__PURE__ */ p({ leakyRelu_: We });
|
|
|
189
124
|
* limitations under the License.
|
|
190
125
|
* =============================================================================
|
|
191
126
|
*/
|
|
192
|
-
function
|
|
193
|
-
const s = { x:
|
|
194
|
-
return
|
|
127
|
+
function ye(t) {
|
|
128
|
+
const s = { x: u(t, "x", "neg") };
|
|
129
|
+
return c.runKernel(te, s);
|
|
195
130
|
}
|
|
196
|
-
const
|
|
131
|
+
const Se = /* @__PURE__ */ h({ neg_: ye });
|
|
197
132
|
/**
|
|
198
133
|
* @license
|
|
199
134
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -210,11 +145,11 @@ const Oe = /* @__PURE__ */ p({ neg_: Ee });
|
|
|
210
145
|
* limitations under the License.
|
|
211
146
|
* =============================================================================
|
|
212
147
|
*/
|
|
213
|
-
function
|
|
214
|
-
const s =
|
|
215
|
-
return
|
|
148
|
+
function Me(t, e) {
|
|
149
|
+
const s = u(t, "x", "prelu"), n = u(e, "alpha", "prelu"), r = { x: s, alpha: n };
|
|
150
|
+
return c.runKernel(se, r);
|
|
216
151
|
}
|
|
217
|
-
const
|
|
152
|
+
const Ke = /* @__PURE__ */ h({ prelu_: Me });
|
|
218
153
|
/**
|
|
219
154
|
* @license
|
|
220
155
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -231,11 +166,11 @@ const Fe = /* @__PURE__ */ p({ prelu_: Re });
|
|
|
231
166
|
* limitations under the License.
|
|
232
167
|
* =============================================================================
|
|
233
168
|
*/
|
|
234
|
-
function
|
|
235
|
-
const s = { input:
|
|
236
|
-
return
|
|
169
|
+
function _e(t) {
|
|
170
|
+
const s = { input: u(t, "input", "real") };
|
|
171
|
+
return c.runKernel(ne, s);
|
|
237
172
|
}
|
|
238
|
-
const
|
|
173
|
+
const we = /* @__PURE__ */ h({ real_: _e });
|
|
239
174
|
/**
|
|
240
175
|
* @license
|
|
241
176
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -252,11 +187,11 @@ const Ae = /* @__PURE__ */ p({ real_: Le });
|
|
|
252
187
|
* limitations under the License.
|
|
253
188
|
* =============================================================================
|
|
254
189
|
*/
|
|
255
|
-
function
|
|
256
|
-
const s = { x:
|
|
257
|
-
return
|
|
190
|
+
function We(t) {
|
|
191
|
+
const s = { x: u(t, "x", "relu") };
|
|
192
|
+
return c.runKernel(re, s);
|
|
258
193
|
}
|
|
259
|
-
const
|
|
194
|
+
const ze = /* @__PURE__ */ h({ relu_: We });
|
|
260
195
|
/**
|
|
261
196
|
* @license
|
|
262
197
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
@@ -273,11 +208,11 @@ const Te = /* @__PURE__ */ p({ relu_: Be });
|
|
|
273
208
|
* limitations under the License.
|
|
274
209
|
* =============================================================================
|
|
275
210
|
*/
|
|
276
|
-
function
|
|
277
|
-
const s = { x:
|
|
278
|
-
return
|
|
211
|
+
function Ee(t) {
|
|
212
|
+
const s = { x: u(t, "x", "relu6") };
|
|
213
|
+
return c.runKernel(ae, s);
|
|
279
214
|
}
|
|
280
|
-
const
|
|
215
|
+
const Oe = /* @__PURE__ */ h({ relu6_: Ee });
|
|
281
216
|
/**
|
|
282
217
|
* @license
|
|
283
218
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -294,11 +229,11 @@ const ve = /* @__PURE__ */ p({ relu6_: Ne });
|
|
|
294
229
|
* limitations under the License.
|
|
295
230
|
* =============================================================================
|
|
296
231
|
*/
|
|
297
|
-
function
|
|
298
|
-
const n = { x:
|
|
299
|
-
return
|
|
232
|
+
function Fe(t, e = 0) {
|
|
233
|
+
const n = { x: u(t, "x", "step") }, r = { alpha: e };
|
|
234
|
+
return c.runKernel(ue, n, r);
|
|
300
235
|
}
|
|
301
|
-
const
|
|
236
|
+
const Re = /* @__PURE__ */ h({ step_: Fe });
|
|
302
237
|
/**
|
|
303
238
|
* @license
|
|
304
239
|
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
@@ -315,19 +250,19 @@ const Ge = /* @__PURE__ */ p({ step_: Ce });
|
|
|
315
250
|
* limitations under the License.
|
|
316
251
|
* =============================================================================
|
|
317
252
|
*/
|
|
318
|
-
function
|
|
319
|
-
const n =
|
|
320
|
-
if (e == null && (e = n.shape.map((
|
|
321
|
-
|
|
253
|
+
function Ae(t, e, s) {
|
|
254
|
+
const n = u(t, "x", "transpose");
|
|
255
|
+
if (e == null && (e = n.shape.map((o, p) => p).reverse()), L(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((o) => {
|
|
256
|
+
L(o >= 0 && o < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
|
|
322
257
|
}), n.rank <= 1)
|
|
323
258
|
return n.clone();
|
|
324
259
|
const r = { x: n }, i = { perm: e };
|
|
325
|
-
return n.dtype === "complex64" ?
|
|
326
|
-
let
|
|
327
|
-
return
|
|
328
|
-
}) :
|
|
260
|
+
return n.dtype === "complex64" ? ie(() => {
|
|
261
|
+
let o = we(n), p = ke(n);
|
|
262
|
+
return o = c.runKernel(A, { x: o }, i), p = c.runKernel(A, { x: p }, i), s && (p = Se(p)), he(o, p);
|
|
263
|
+
}) : c.runKernel(A, r, i);
|
|
329
264
|
}
|
|
330
|
-
const
|
|
265
|
+
const Be = /* @__PURE__ */ h({ transpose_: Ae });
|
|
331
266
|
/**
|
|
332
267
|
* @license
|
|
333
268
|
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
@@ -344,36 +279,36 @@ const je = /* @__PURE__ */ p({ transpose_: Ie });
|
|
|
344
279
|
* limitations under the License.
|
|
345
280
|
* =============================================================================
|
|
346
281
|
*/
|
|
347
|
-
function
|
|
282
|
+
function Le(t, e, s) {
|
|
348
283
|
if (s == null || s === "linear")
|
|
349
284
|
return t;
|
|
350
285
|
if (s === "relu")
|
|
351
|
-
return
|
|
286
|
+
return oe(t, Re(e));
|
|
352
287
|
throw new Error(`Cannot compute gradient for fused activation ${s}.`);
|
|
353
288
|
}
|
|
354
|
-
function
|
|
289
|
+
function Te(t, e) {
|
|
355
290
|
let s = e;
|
|
356
|
-
const n =
|
|
357
|
-
return n.length > 0 && (s =
|
|
291
|
+
const n = le(t.shape, e.shape);
|
|
292
|
+
return n.length > 0 && (s = pe(s, n)), f(s, t.shape);
|
|
358
293
|
}
|
|
359
|
-
function
|
|
294
|
+
function Ne(t, e, s, n) {
|
|
360
295
|
if (e === "linear")
|
|
361
296
|
return t;
|
|
362
297
|
if (e === "relu")
|
|
363
|
-
return
|
|
298
|
+
return ze(t);
|
|
364
299
|
if (e === "elu")
|
|
365
|
-
return
|
|
300
|
+
return $e(t);
|
|
366
301
|
if (e === "relu6")
|
|
367
|
-
return
|
|
302
|
+
return Oe(t);
|
|
368
303
|
if (e === "prelu")
|
|
369
|
-
return
|
|
304
|
+
return Ke(t, s);
|
|
370
305
|
if (e === "leakyrelu")
|
|
371
|
-
return
|
|
306
|
+
return be(t, n);
|
|
372
307
|
if (e === "sigmoid")
|
|
373
|
-
return
|
|
308
|
+
return me(t);
|
|
374
309
|
throw new Error(`Unknown fused activation ${e}.`);
|
|
375
310
|
}
|
|
376
|
-
const
|
|
311
|
+
const ve = (t, e) => !(t > 0) || e === "linear";
|
|
377
312
|
/**
|
|
378
313
|
* @license
|
|
379
314
|
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
@@ -390,49 +325,49 @@ const He = (t, e) => !(t > 0) || e === "linear";
|
|
|
390
325
|
* limitations under the License.
|
|
391
326
|
* =============================================================================
|
|
392
327
|
*/
|
|
393
|
-
function
|
|
394
|
-
if (
|
|
395
|
-
let x =
|
|
396
|
-
return r != null && (x =
|
|
328
|
+
function Ge({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: o, leakyreluAlpha: p = 0.2 }) {
|
|
329
|
+
if (ve(c.state.gradientDepth, i) === !1) {
|
|
330
|
+
let x = m(t, e, s, n);
|
|
331
|
+
return r != null && (x = ce(x, r)), Ne(x, i, o, p);
|
|
397
332
|
}
|
|
398
|
-
let
|
|
399
|
-
[
|
|
400
|
-
const
|
|
401
|
-
|
|
402
|
-
const O = P(
|
|
403
|
-
let
|
|
404
|
-
r != null && (
|
|
405
|
-
let
|
|
406
|
-
|
|
407
|
-
const
|
|
408
|
-
const [g, $, k, z] =
|
|
409
|
-
let
|
|
410
|
-
if (!s && !n ? (
|
|
411
|
-
const Q =
|
|
412
|
-
return [
|
|
333
|
+
let a = u(t, "a", "fused matMul"), l = u(e, "b", "fused matMul");
|
|
334
|
+
[a, l] = B(a, l);
|
|
335
|
+
const D = s ? a.shape[a.rank - 2] : a.shape[a.rank - 1], b = n ? l.shape[l.rank - 1] : l.shape[l.rank - 2], w = s ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = n ? l.shape[l.rank - 2] : l.shape[l.rank - 1], T = a.shape.slice(0, -2), y = l.shape.slice(0, -2), N = C(T), v = C(y);
|
|
336
|
+
L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${a.shape} and ${l.shape} and transposeA=${s} and transposeB=${n} must match.`);
|
|
337
|
+
const O = P(a.shape.slice(0, -2), l.shape.slice(0, -2)).concat([w, W]), F = s ? f(a, [N, D, w]) : f(a, [N, w, D]), R = n ? f(l, [v, W, b]) : f(l, [v, b, W]);
|
|
338
|
+
let S;
|
|
339
|
+
r != null && (S = u(r, "bias", "fused matMul"), [S] = B(S, a), P(O, S.shape));
|
|
340
|
+
let G;
|
|
341
|
+
o != null && (G = u(o, "prelu weights", "fused matMul"));
|
|
342
|
+
const I = (x, _) => {
|
|
343
|
+
const [g, $, k, z] = _, d = Le(f(x, k.shape), k, i);
|
|
344
|
+
let M, K;
|
|
345
|
+
if (!s && !n ? (M = m(d, $, !1, !0), K = m(g, d, !0, !1)) : !s && n ? (M = m(d, $, !1, !1), K = m(d, g, !0, !1)) : s && !n ? (M = m($, d, !1, !0), K = m(g, d, !1, !1)) : (M = m($, d, !0, !0), K = m(d, g, !0, !0)), r != null) {
|
|
346
|
+
const Q = Te(z, d);
|
|
347
|
+
return [M, K, Q];
|
|
413
348
|
} else
|
|
414
|
-
return [
|
|
415
|
-
},
|
|
416
|
-
a:
|
|
417
|
-
b:
|
|
418
|
-
bias:
|
|
419
|
-
preluActivationWeights:
|
|
420
|
-
},
|
|
421
|
-
return r == null ? U((
|
|
349
|
+
return [M, K];
|
|
350
|
+
}, j = {
|
|
351
|
+
a: F,
|
|
352
|
+
b: R,
|
|
353
|
+
bias: S,
|
|
354
|
+
preluActivationWeights: G
|
|
355
|
+
}, q = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: p };
|
|
356
|
+
return r == null ? U((_, g, $) => {
|
|
422
357
|
const k = (
|
|
423
358
|
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
424
|
-
|
|
359
|
+
c.runKernel(H, j, q)
|
|
425
360
|
);
|
|
426
|
-
return $([
|
|
427
|
-
})(
|
|
361
|
+
return $([_, g, k]), { value: f(k, O), gradFunc: I };
|
|
362
|
+
})(F, R) : U((_, g, $, k) => {
|
|
428
363
|
const z = (
|
|
429
364
|
// tslint:disable-next-line: no-unnecessary-type-assertion
|
|
430
|
-
|
|
365
|
+
c.runKernel(H, j, q)
|
|
431
366
|
);
|
|
432
|
-
return k([
|
|
433
|
-
})(
|
|
367
|
+
return k([_, g, z, $]), { value: f(z, O), gradFunc: I };
|
|
368
|
+
})(F, R, S);
|
|
434
369
|
}
|
|
435
|
-
const J = /* @__PURE__ */
|
|
370
|
+
const J = /* @__PURE__ */ h({ fusedMatMul_: Ge });
|
|
436
371
|
/**
|
|
437
372
|
* @license
|
|
438
373
|
* Copyright 2018 Google LLC
|
|
@@ -456,7 +391,7 @@ class E extends Error {
|
|
|
456
391
|
* https://opensource.org/licenses/MIT.
|
|
457
392
|
* =============================================================================
|
|
458
393
|
*/
|
|
459
|
-
function
|
|
394
|
+
function Ie(t, e, s, n) {
|
|
460
395
|
if (t.rank < 2 || e.rank < 2)
|
|
461
396
|
throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
|
|
462
397
|
if (e.rank >= 3) {
|
|
@@ -476,9 +411,9 @@ function Qe(t, e, s, n) {
|
|
|
476
411
|
{
|
|
477
412
|
const r = t.shape.slice(), i = r.pop();
|
|
478
413
|
t = f(t, [-1, i]);
|
|
479
|
-
const
|
|
480
|
-
e = f(
|
|
481
|
-
const
|
|
414
|
+
const o = e.shape.slice(), p = o.pop(), a = o.pop(), l = [...o, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
|
|
415
|
+
e = f(Be(e, D), [a, -1]);
|
|
416
|
+
const b = [...r, ...l];
|
|
482
417
|
return f(J({
|
|
483
418
|
a: t,
|
|
484
419
|
b: e,
|
|
@@ -486,10 +421,10 @@ function Qe(t, e, s, n) {
|
|
|
486
421
|
transposeB: !1,
|
|
487
422
|
bias: null,
|
|
488
423
|
activation: s
|
|
489
|
-
}),
|
|
424
|
+
}), b);
|
|
490
425
|
}
|
|
491
426
|
}
|
|
492
|
-
class
|
|
427
|
+
class Ue {
|
|
493
428
|
vocabSize;
|
|
494
429
|
embedDim;
|
|
495
430
|
tf;
|
|
@@ -512,7 +447,7 @@ class Ye {
|
|
|
512
447
|
return this.tf.gather(this.tiedWeights, e, 0);
|
|
513
448
|
}
|
|
514
449
|
project(e) {
|
|
515
|
-
return
|
|
450
|
+
return Ie(e, this.tiedWeights.transpose());
|
|
516
451
|
}
|
|
517
452
|
getWeights() {
|
|
518
453
|
return [this.tiedWeights];
|
|
@@ -531,5 +466,5 @@ class Ye {
|
|
|
531
466
|
}
|
|
532
467
|
}
|
|
533
468
|
export {
|
|
534
|
-
|
|
469
|
+
Ue as default
|
|
535
470
|
};
|
package/dist/main.d.ts
CHANGED
|
@@ -6,3 +6,5 @@ export { default as loadTextData } from './data/textLoader';
|
|
|
6
6
|
export type { ITrainerOptions } from './Trainer';
|
|
7
7
|
export type { IGenerateOptions } from './Generator';
|
|
8
8
|
export type { TrainingLogEntry } from './NanoGPTModel';
|
|
9
|
+
export type { GPTConfig } from './config';
|
|
10
|
+
export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';
|
package/dist/main.js
CHANGED
|
@@ -1,12 +1,20 @@
|
|
|
1
|
-
import { default as
|
|
2
|
-
import { default as
|
|
3
|
-
import { default as
|
|
4
|
-
import { default as
|
|
5
|
-
import { default as
|
|
1
|
+
import { default as r } from "./NanoGPTModel.js";
|
|
2
|
+
import { default as s } from "./TeachableLLM.js";
|
|
3
|
+
import { default as i } from "./tokeniser/CharTokeniser.js";
|
|
4
|
+
import { default as d } from "./utilities/waitForModel.js";
|
|
5
|
+
import { default as u } from "./data/textLoader.js";
|
|
6
|
+
import { estimateMemoryUsage as n, estimateParameterCount as T, estimateResources as g, estimateTrainingMemoryUsage as M, validateConfig as C } from "./utilities/parameters.js";
|
|
7
|
+
import "./ops/scatterSub.js";
|
|
8
|
+
import "./ops/gatherSub.js";
|
|
6
9
|
export {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
i as CharTokeniser,
|
|
11
|
+
r as NanoGPT,
|
|
12
|
+
s as TeachableLLM,
|
|
13
|
+
n as estimateMemoryUsage,
|
|
14
|
+
T as estimateParameterCount,
|
|
15
|
+
g as estimateResources,
|
|
16
|
+
M as estimateTrainingMemoryUsage,
|
|
17
|
+
u as loadTextData,
|
|
18
|
+
C as validateConfig,
|
|
19
|
+
d as waitForModel
|
|
12
20
|
};
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { engine as l } from "@tensorflow/tfjs";
|
|
2
|
+
import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-D1SlunD-.js";
|
|
3
|
+
import { r as p, s as f } from "../stack-DB2YLlAs.js";
|
|
4
|
+
/**
|
|
5
|
+
* @license
|
|
6
|
+
* Copyright 2018 Google LLC. All Rights Reserved.
|
|
7
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
* you may not use this file except in compliance with the License.
|
|
9
|
+
* You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
* =============================================================================
|
|
19
|
+
*/
|
|
20
|
+
function m(e, t) {
|
|
21
|
+
const n = i(t, "indices", "gatherND", "int32"), r = { params: i(e, "x", "gatherND", "string_or_numeric"), indices: n };
|
|
22
|
+
return b.runKernel(d, r);
|
|
23
|
+
}
|
|
24
|
+
const N = /* @__PURE__ */ g({ gatherND_: m });
|
|
25
|
+
class S {
|
|
26
|
+
variableNames = ["labels", "logits", "values"];
|
|
27
|
+
outputShape;
|
|
28
|
+
userCode;
|
|
29
|
+
constructor(t) {
|
|
30
|
+
this.outputShape = [t], this.userCode = `
|
|
31
|
+
void main() {
|
|
32
|
+
int coords = getOutputCoords();
|
|
33
|
+
int index = int(getLabelsAtOutCoords());
|
|
34
|
+
float val = getValuesAtOutCoords();
|
|
35
|
+
float logit = getLogits(coords, index);
|
|
36
|
+
setOutput(val - logit);
|
|
37
|
+
}
|
|
38
|
+
`;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
function k(e) {
|
|
42
|
+
const { logits: t, labels: n, values: s } = e.inputs, r = e.backend, o = n.shape[0], a = new S(o);
|
|
43
|
+
return r.runWebGLProgram(a, [n, t, s], "float32");
|
|
44
|
+
}
|
|
45
|
+
const G = {
|
|
46
|
+
kernelName: "EfficientGatherSub",
|
|
47
|
+
backendName: "webgl",
|
|
48
|
+
kernelFunc: k
|
|
49
|
+
};
|
|
50
|
+
c(G);
|
|
51
|
+
function v(e) {
|
|
52
|
+
const { values: t, labels: n, logits: s } = e.inputs, r = n.shape[0], o = p(0, r, 1, "int32"), a = f([o, n], 1), u = N(s, a);
|
|
53
|
+
return h(t, u);
|
|
54
|
+
}
|
|
55
|
+
const C = {
|
|
56
|
+
kernelName: "EfficientGatherSub",
|
|
57
|
+
backendName: "cpu",
|
|
58
|
+
kernelFunc: v
|
|
59
|
+
};
|
|
60
|
+
c(C);
|
|
61
|
+
function K(e, t, n) {
|
|
62
|
+
return l().runKernel("EfficientGatherSub", { logits: n, labels: t, values: e }, {});
|
|
63
|
+
}
|
|
64
|
+
export {
|
|
65
|
+
K as gatherSub
|
|
66
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { r as o } from "../../index-D1SlunD-.js";
|
|
2
|
+
function r(e) {
|
|
3
|
+
const { logits: t, labels: n } = e.inputs;
|
|
4
|
+
return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
|
|
5
|
+
}
|
|
6
|
+
const s = {
|
|
7
|
+
kernelName: "NativeSparseSoftmaxCrossEntropy",
|
|
8
|
+
backendName: "tensorflow",
|
|
9
|
+
kernelFunc: r
|
|
10
|
+
};
|
|
11
|
+
o(s);
|