@genai-fi/nanogpt 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.js +3 -3
- package/dist/NanoGPTModel.js +8 -8
- package/dist/Reshape-CiAY8ltP.js +212 -0
- package/dist/TeachableLLM.js +14 -5
- package/dist/{TiedEmbedding-CnJ1bx4q.js → TiedEmbedding-DznFwzcB.js} +244 -244
- package/dist/{axis_util-BgTGy5w8.js → axis_util-QP0LdI1v.js} +1 -1
- package/dist/{concat-CuRsVY-K.js → concat-DvWM7HGZ.js} +1 -1
- package/dist/data/parquet.js +9 -6
- package/dist/data/textLoader.js +6 -5
- package/dist/{dropout-DfDdklfL.js → dropout-DFEXTPV0.js} +4 -4
- package/dist/{gather-ZYRWhmXR.js → gather-C5D8PxwA.js} +1 -1
- package/dist/gpgpu_math-CUzjlO9A.js +23 -0
- package/dist/{index-C4JCoBvj.js → index--6vO-cOz.js} +87 -87
- package/dist/{kernel_funcs_utils-CAd1h9X1.js → kernel_funcs_utils-C6YBCuOt.js} +72 -91
- package/dist/layers/CausalSelfAttention.js +47 -46
- package/dist/layers/MLP.js +31 -33
- package/dist/layers/RMSNorm.d.ts +1 -2
- package/dist/layers/RMSNorm.js +10 -10
- package/dist/layers/RoPECache.js +3 -3
- package/dist/layers/TiedEmbedding.js +5 -5
- package/dist/layers/TransformerBlock.js +2 -2
- package/dist/{log_sum_exp-BswFnwOb.js → log_sum_exp-CiEy1aUe.js} +7 -7
- package/dist/main.js +28 -19
- package/dist/{mat_mul-415y5Qn2.js → mat_mul-BEHRPMh0.js} +1 -1
- package/dist/{max-CP_9O2Yd.js → max-BUShNgfh.js} +1 -1
- package/dist/{moments-CjeIaVdp.js → moments-DYOHXoRV.js} +5 -5
- package/dist/{norm-CZM380I3.js → norm-DSva3hI3.js} +13 -13
- package/dist/{ones-Bf3YR48P.js → ones-D6kB8bdY.js} +2 -2
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +2 -2
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +4 -4
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.d.ts +1 -0
- package/dist/ops/cpu/matMulGelu.js +40 -0
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.d.ts +1 -0
- package/dist/ops/cpu/normRMS.js +39 -0
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +4 -4
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +24 -3
- package/dist/ops/grads/matMulGelu.d.ts +1 -0
- package/dist/ops/grads/matMulGelu.js +17 -0
- package/dist/ops/grads/normRMS.d.ts +2 -0
- package/dist/ops/grads/normRMS.js +20 -0
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.d.ts +3 -0
- package/dist/ops/matMulGelu.js +14 -0
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/node/sparseCrossEntropy.js +1 -1
- package/dist/ops/normRMS.d.ts +2 -0
- package/dist/ops/normRMS.js +10 -0
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +689 -895
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/matMulGelu.d.ts +21 -0
- package/dist/ops/webgl/matMulGelu.js +168 -0
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.d.ts +1 -0
- package/dist/ops/webgl/normRMS.js +78 -0
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/{range-9AzeApCc.js → range-C_vpUjBu.js} +1 -1
- package/dist/{reshape-Boe4DuIO.js → reshape-z51Eu-re.js} +1 -1
- package/dist/{sin-KmhiDuMa.js → sin-H567uayl.js} +1 -1
- package/dist/{slice_util-19zDNNSn.js → slice_util-BdhYwFY_.js} +2 -2
- package/dist/{softmax-Cujsg4ay.js → softmax-Dsxflvdl.js} +1 -1
- package/dist/{split-DbcNm1-i.js → split-B_k_jwud.js} +1 -1
- package/dist/{stack-D1YjmgKN.js → stack-CmqSdsfs.js} +1 -1
- package/dist/{sum-R28pucR5.js → sum-DdkDf2MG.js} +1 -1
- package/dist/{tensor-BVeHdl7V.js → tensor-BGYi41cj.js} +1 -1
- package/dist/{tensor2d-DqFGNs_K.js → tensor2d-DUr_htjt.js} +1 -1
- package/dist/{tfjs_backend-Cug-PH75.js → tfjs_backend-DuKis_xG.js} +46 -46
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +18 -18
- package/dist/training/FullTrainer.js +1 -1
- package/dist/training/Trainer.js +5 -5
- package/dist/training/sparseCrossEntropy.js +4 -4
- package/dist/utilities/dummy.js +2 -2
- package/dist/utilities/generate.js +3 -3
- package/dist/utilities/load.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-LJT9Ld63.js → variable-BJTZ3jOy.js} +1 -1
- package/dist/{zeros-dnQxFgAD.js → zeros-8xl-W2DC.js} +1 -1
- package/package.json +1 -1
- package/dist/gelu-CnCt17Lk.js +0 -26
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { aj as $, ah as d, L as M, a as R, ak as f, al as v, am as z, j as _, t as x } from "../index--6vO-cOz.js";
|
|
2
2
|
import { s as E } from "../index-C4L8Cm77.js";
|
|
3
|
-
import { s as P } from "../stack-
|
|
4
|
-
import { t as D } from "../tensor-
|
|
3
|
+
import { s as P } from "../stack-CmqSdsfs.js";
|
|
4
|
+
import { t as D } from "../tensor-BGYi41cj.js";
|
|
5
5
|
import "../index-Tf7vU29b.js";
|
|
6
6
|
/**
|
|
7
7
|
* @license
|
|
@@ -82,10 +82,10 @@ function p(s) {
|
|
|
82
82
|
const { StringDecoder: e } = require("string_decoder");
|
|
83
83
|
t = s instanceof e;
|
|
84
84
|
}
|
|
85
|
-
return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof
|
|
85
|
+
return s != null && !ArrayBuffer.isView(s) && (Array.isArray(s) || typeof s == "object" && !(s instanceof d) && !(s instanceof Promise) && !t);
|
|
86
86
|
}
|
|
87
87
|
function H(s) {
|
|
88
|
-
return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof
|
|
88
|
+
return s == null || q(s) || Array.isArray(s) || typeof s == "object" && s instanceof d || $(s);
|
|
89
89
|
}
|
|
90
90
|
function q(s) {
|
|
91
91
|
return s === null || typeof s != "object" && typeof s != "function";
|
|
@@ -111,7 +111,7 @@ function Q(s) {
|
|
|
111
111
|
return L(s, G);
|
|
112
112
|
}
|
|
113
113
|
function G(s) {
|
|
114
|
-
return s instanceof
|
|
114
|
+
return s instanceof d ? { value: s.clone(), recurse: !1 } : p(s) ? { value: null, recurse: !0 } : { value: s, recurse: !1 };
|
|
115
115
|
}
|
|
116
116
|
/**
|
|
117
117
|
* @license
|
|
@@ -477,7 +477,7 @@ class i {
|
|
|
477
477
|
* of the original element type.
|
|
478
478
|
*/
|
|
479
479
|
rowMajorBatch(t, e = !0) {
|
|
480
|
-
return new
|
|
480
|
+
return new K(this, t, e);
|
|
481
481
|
}
|
|
482
482
|
/**
|
|
483
483
|
* Groups elements into batches, represented in column-major form.
|
|
@@ -535,7 +535,7 @@ class i {
|
|
|
535
535
|
* unaltered.
|
|
536
536
|
*/
|
|
537
537
|
take(t) {
|
|
538
|
-
return t < 0 || t == null ? this : new
|
|
538
|
+
return t < 0 || t == null ? this : new j(this, t);
|
|
539
539
|
}
|
|
540
540
|
/**
|
|
541
541
|
* Skips the first `count` items in this stream.
|
|
@@ -641,7 +641,7 @@ class X extends i {
|
|
|
641
641
|
return this.upstream.next();
|
|
642
642
|
}
|
|
643
643
|
}
|
|
644
|
-
class
|
|
644
|
+
class j extends i {
|
|
645
645
|
constructor(t, e) {
|
|
646
646
|
super(), this.upstream = t, this.maxCount = e, this.count = 0;
|
|
647
647
|
}
|
|
@@ -652,7 +652,7 @@ class K extends i {
|
|
|
652
652
|
return this.count++ >= this.maxCount ? { value: null, done: !0 } : this.upstream.next();
|
|
653
653
|
}
|
|
654
654
|
}
|
|
655
|
-
class
|
|
655
|
+
class K extends i {
|
|
656
656
|
constructor(t, e, r = !0) {
|
|
657
657
|
super(), this.upstream = t, this.batchSize = e, this.enableSmallLastBatch = r, this.lastRead = Promise.resolve({ value: null, done: !1 });
|
|
658
658
|
}
|
|
@@ -1219,7 +1219,7 @@ function at(s) {
|
|
|
1219
1219
|
function it(s) {
|
|
1220
1220
|
if (s.length === 0)
|
|
1221
1221
|
throw new Error("Can't make a batch of zero elements.");
|
|
1222
|
-
return s[0] instanceof
|
|
1222
|
+
return s[0] instanceof d ? P(s) : D(s);
|
|
1223
1223
|
}
|
|
1224
1224
|
/**
|
|
1225
1225
|
* @license
|
|
@@ -1244,7 +1244,7 @@ function ut(s) {
|
|
|
1244
1244
|
return k(() => t.next());
|
|
1245
1245
|
});
|
|
1246
1246
|
}
|
|
1247
|
-
class
|
|
1247
|
+
class mt {
|
|
1248
1248
|
tokenizer;
|
|
1249
1249
|
blockSize;
|
|
1250
1250
|
constructor(t, e = 128) {
|
|
@@ -1257,20 +1257,20 @@ class dt {
|
|
|
1257
1257
|
n === 1 ? void 0 : Math.floor(n * h.length)
|
|
1258
1258
|
), w = (function* () {
|
|
1259
1259
|
for (; ; ) {
|
|
1260
|
-
const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)),
|
|
1261
|
-
yield { xs:
|
|
1260
|
+
const u = Math.floor(Math.random() * (c.length - this.blockSize - 1)), m = c.slice(u, u + this.blockSize), B = c.slice(u + 1, u + this.blockSize + 1);
|
|
1261
|
+
yield { xs: m, ys: B };
|
|
1262
1262
|
}
|
|
1263
1263
|
}).bind(this);
|
|
1264
1264
|
return ut(w).batch(e).map((u) => {
|
|
1265
|
-
const
|
|
1265
|
+
const m = u;
|
|
1266
1266
|
return x(() => ({
|
|
1267
|
-
xs:
|
|
1268
|
-
ys:
|
|
1267
|
+
xs: m.xs.cast("int32"),
|
|
1268
|
+
ys: m.ys.cast("int32")
|
|
1269
1269
|
// this.tf.oneHot(batchData.ys.cast('int32'), this.tokenizer.vocabSize),
|
|
1270
1270
|
}));
|
|
1271
1271
|
}).prefetch(2);
|
|
1272
1272
|
}
|
|
1273
1273
|
}
|
|
1274
1274
|
export {
|
|
1275
|
-
|
|
1275
|
+
mt as DatasetBuilder
|
|
1276
1276
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { generateText as v } from "../utilities/generate.js";
|
|
2
2
|
import L from "./Trainer.js";
|
|
3
3
|
import x from "./Evaluator.js";
|
|
4
|
-
import { a as h } from "../index-
|
|
4
|
+
import { a as h } from "../index--6vO-cOz.js";
|
|
5
5
|
const D = {
|
|
6
6
|
desiredLoss: 0.01,
|
|
7
7
|
logInterval: 1,
|
package/dist/training/Trainer.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { DatasetBuilder as d } from "./DatasetBuilder.js";
|
|
2
2
|
import h from "./AdamExt.js";
|
|
3
|
-
import { t as g, v as u, a as o } from "../index-
|
|
4
|
-
import { m as y, n as f } from "../norm-
|
|
5
|
-
import { m as S, a as z } from "../moments-
|
|
6
|
-
import { m as b } from "../max-
|
|
7
|
-
import { z as n } from "../zeros-
|
|
3
|
+
import { t as g, v as u, a as o } from "../index--6vO-cOz.js";
|
|
4
|
+
import { m as y, n as f } from "../norm-DSva3hI3.js";
|
|
5
|
+
import { m as S, a as z } from "../moments-DYOHXoRV.js";
|
|
6
|
+
import { m as b } from "../max-BUShNgfh.js";
|
|
7
|
+
import { z as n } from "../zeros-8xl-W2DC.js";
|
|
8
8
|
class G {
|
|
9
9
|
constructor(t, s, e = 1e-3) {
|
|
10
10
|
this.tokenizer = s, this.model = t, this.learningRate = e, this.resetOptimizer(), this.datasetBuilder = new d(s, t.config.gpt.blockSize);
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { gatherSub as L } from "../ops/gatherSub.js";
|
|
2
2
|
import { scatterSub as y } from "../ops/scatterSub.js";
|
|
3
|
-
import { e as u, c as i, z as S, t as f, s as G } from "../index-
|
|
4
|
-
import { s as v } from "../softmax-
|
|
5
|
-
import { m as z } from "../max-
|
|
6
|
-
import { l as k } from "../log_sum_exp-
|
|
3
|
+
import { e as u, c as i, z as S, t as f, s as G } from "../index--6vO-cOz.js";
|
|
4
|
+
import { s as v } from "../softmax-Dsxflvdl.js";
|
|
5
|
+
import { m as z } from "../max-BUShNgfh.js";
|
|
6
|
+
import { l as k } from "../log_sum_exp-CiEy1aUe.js";
|
|
7
7
|
function F(a, s) {
|
|
8
8
|
return f(() => {
|
|
9
9
|
const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
|
package/dist/utilities/dummy.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import { z as n } from "../zeros-
|
|
1
|
+
import "../index--6vO-cOz.js";
|
|
2
|
+
import { z as n } from "../zeros-8xl-W2DC.js";
|
|
3
3
|
async function a(s) {
|
|
4
4
|
const o = n([1, s.config.gpt.blockSize], "int32"), { logits: t, loss: i } = s.forward(o, void 0, !1);
|
|
5
5
|
await t.data(), t.dispose(), i && i.dispose(), o.dispose();
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { t as y } from "../index-
|
|
2
|
-
import { t as x } from "../tensor2d-
|
|
3
|
-
import { c as f } from "../concat-
|
|
1
|
+
import { t as y } from "../index--6vO-cOz.js";
|
|
2
|
+
import { t as x } from "../tensor2d-DUr_htjt.js";
|
|
3
|
+
import { c as f } from "../concat-DvWM7HGZ.js";
|
|
4
4
|
async function A(o, r, a, c, T) {
|
|
5
5
|
if (c <= 0)
|
|
6
6
|
throw new Error("Length must be a positive integer");
|
package/dist/utilities/load.js
CHANGED
|
@@ -3,7 +3,7 @@ import { importWeights as b } from "./weights.js";
|
|
|
3
3
|
import u from "../tokeniser/CharTokeniser.js";
|
|
4
4
|
import F from "../NanoGPTModel.js";
|
|
5
5
|
import { dummyPassAsync as j } from "./dummy.js";
|
|
6
|
-
import { d as T } from "../index-
|
|
6
|
+
import { d as T } from "../index--6vO-cOz.js";
|
|
7
7
|
import E from "../tokeniser/bpe.js";
|
|
8
8
|
async function A(t) {
|
|
9
9
|
const o = await fetch(t);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import "../index-
|
|
2
|
-
import { t as p } from "../tensor-
|
|
1
|
+
import "../index--6vO-cOz.js";
|
|
2
|
+
import { t as p } from "../tensor-BGYi41cj.js";
|
|
3
3
|
function h(n) {
|
|
4
4
|
const e = n.reduce((s, o) => s + o.length, 0), a = new Float32Array(e);
|
|
5
5
|
let t = 0;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { o as m, h as r,
|
|
1
|
+
import { o as m, h as r, X as l, E as c, Y as i, k as p, Z as u, n as f } from "./index--6vO-cOz.js";
|
|
2
2
|
/**
|
|
3
3
|
* @license
|
|
4
4
|
* Copyright 2020 Google LLC. All Rights Reserved.
|
package/package.json
CHANGED
package/dist/gelu-CnCt17Lk.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import { g as t, e as n } from "./index-C4JCoBvj.js";
|
|
2
|
-
import "./ops/cpu/gelu.js";
|
|
3
|
-
import "./ops/webgl/gelu.js";
|
|
4
|
-
const a = {
|
|
5
|
-
kernelName: "Gelu",
|
|
6
|
-
inputsToSave: ["x"],
|
|
7
|
-
outputsToSave: [],
|
|
8
|
-
gradFunc: (e, r) => {
|
|
9
|
-
const [u] = r;
|
|
10
|
-
return {
|
|
11
|
-
x: () => o(e, u)
|
|
12
|
-
};
|
|
13
|
-
}
|
|
14
|
-
};
|
|
15
|
-
t(a);
|
|
16
|
-
function g(e) {
|
|
17
|
-
return n().runKernel("Gelu", { x: e });
|
|
18
|
-
}
|
|
19
|
-
function o(e, r) {
|
|
20
|
-
return n().runKernel("GeluGrad", { dy: e, x: r });
|
|
21
|
-
}
|
|
22
|
-
export {
|
|
23
|
-
a,
|
|
24
|
-
o as d,
|
|
25
|
-
g
|
|
26
|
-
};
|