@genai-fi/nanogpt 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/TeachableLLM.d.ts
CHANGED
|
@@ -22,12 +22,15 @@ export default class TeachableLLM {
|
|
|
22
22
|
meta: TeachableLLMMeta;
|
|
23
23
|
constructor(tokeniser?: ITokeniser, model?: NanoGPT);
|
|
24
24
|
get vocab(): string[];
|
|
25
|
+
/** Model is fully loaded */
|
|
25
26
|
get loaded(): boolean;
|
|
26
27
|
get config(): GPTConfig;
|
|
27
28
|
get model(): NanoGPT;
|
|
28
29
|
get tokeniser(): ITokeniser;
|
|
29
30
|
get status(): TeachableLLMStatus;
|
|
31
|
+
/** Model is both ready and not busy */
|
|
30
32
|
get ready(): boolean;
|
|
33
|
+
get busy(): boolean;
|
|
31
34
|
estimateTrainingMemoryUsage(batchSize: number): number;
|
|
32
35
|
private setStatus;
|
|
33
36
|
saveModel(options?: SaveOptions): Promise<Blob>;
|
package/dist/TeachableLLM.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import { defaultConfig as
|
|
2
|
-
import
|
|
3
|
-
import { saveModel as
|
|
4
|
-
import { loadModel as
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import { E as
|
|
1
|
+
import { defaultConfig as _ } from "./config.js";
|
|
2
|
+
import f from "./NanoGPTModel.js";
|
|
3
|
+
import { saveModel as u } from "./utilities/save.js";
|
|
4
|
+
import { loadModel as d } from "./loader/load.js";
|
|
5
|
+
import l from "./Generator.js";
|
|
6
|
+
import p from "./Trainer.js";
|
|
7
|
+
import { E as g } from "./index-Dwqa6Zy2.js";
|
|
8
8
|
import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
|
|
9
|
-
import
|
|
10
|
-
import
|
|
9
|
+
import c from "./tokeniser/CharTokeniser.js";
|
|
10
|
+
import k from "./tokeniser/bpe.js";
|
|
11
11
|
import "./papaparse.min-C8l2Kvo1.js";
|
|
12
12
|
import "./index-Tf7vU29b.js";
|
|
13
13
|
import "./jszip.min-CjP2V1VV.js";
|
|
@@ -43,9 +43,9 @@ import "./ops/cpu/normRMS.js";
|
|
|
43
43
|
import "./ops/webgl/normRMS.js";
|
|
44
44
|
import "./ops/grads/normRMS.js";
|
|
45
45
|
import "./ops/webgl/log.js";
|
|
46
|
-
import
|
|
46
|
+
import w from "./utilities/profile.js";
|
|
47
47
|
class a {
|
|
48
|
-
ee = new
|
|
48
|
+
ee = new g();
|
|
49
49
|
_config;
|
|
50
50
|
_model;
|
|
51
51
|
_tokeniser;
|
|
@@ -58,66 +58,71 @@ class a {
|
|
|
58
58
|
get vocab() {
|
|
59
59
|
return this._tokeniser?.getVocab() || [];
|
|
60
60
|
}
|
|
61
|
+
/** Model is fully loaded */
|
|
61
62
|
get loaded() {
|
|
62
63
|
return !!this._model && !!this._tokeniser && !!this._config;
|
|
63
64
|
}
|
|
64
65
|
get config() {
|
|
65
66
|
if (!this._config)
|
|
66
|
-
throw new Error("
|
|
67
|
+
throw new Error("configuration_not_initialized.");
|
|
67
68
|
return this._config.gpt;
|
|
68
69
|
}
|
|
69
70
|
get model() {
|
|
70
71
|
if (!this._model)
|
|
71
|
-
throw new Error("
|
|
72
|
+
throw new Error("model_not_initialized.");
|
|
72
73
|
return this._model;
|
|
73
74
|
}
|
|
74
75
|
get tokeniser() {
|
|
75
76
|
if (!this._tokeniser)
|
|
76
|
-
throw new Error("
|
|
77
|
+
throw new Error("tokeniser_not_initialized.");
|
|
77
78
|
return this._tokeniser;
|
|
78
79
|
}
|
|
79
80
|
get status() {
|
|
80
81
|
return this._status;
|
|
81
82
|
}
|
|
83
|
+
/** Model is both ready and not busy */
|
|
82
84
|
get ready() {
|
|
83
|
-
return this._status === "ready" && !!this._model && !!this._tokeniser
|
|
85
|
+
return this._status === "ready" && !!this._model && !!this._tokeniser;
|
|
86
|
+
}
|
|
87
|
+
get busy() {
|
|
88
|
+
return this._status === "busy" || this._status === "training";
|
|
84
89
|
}
|
|
85
90
|
estimateTrainingMemoryUsage(t) {
|
|
86
|
-
const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 },
|
|
87
|
-
return
|
|
91
|
+
const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
|
|
92
|
+
return i * 0.66 + o * 4;
|
|
88
93
|
}
|
|
89
94
|
setStatus(t) {
|
|
90
95
|
this._status !== t && (this._status = t, this.ee.emit("status", t));
|
|
91
96
|
}
|
|
92
97
|
saveModel(t) {
|
|
93
98
|
if (!this._model || !this._tokeniser)
|
|
94
|
-
throw new Error("
|
|
95
|
-
return
|
|
99
|
+
throw new Error("model_or_tokeniser_not_initialized.");
|
|
100
|
+
return u(this._model, this._tokeniser, {
|
|
96
101
|
...t,
|
|
97
102
|
name: t?.name || this.meta.name
|
|
98
103
|
});
|
|
99
104
|
}
|
|
100
105
|
static loadModel(t) {
|
|
101
106
|
const e = new a();
|
|
102
|
-
return
|
|
103
|
-
e._model =
|
|
104
|
-
e._memoryRequirements =
|
|
105
|
-
}).catch((
|
|
106
|
-
e.setStatus("error"), e.ee.emit("error",
|
|
107
|
+
return d(t).then(({ model: i, tokeniser: o, name: s }) => {
|
|
108
|
+
e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
|
|
109
|
+
e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
|
|
110
|
+
}).catch((r) => {
|
|
111
|
+
e.setStatus("error"), e.ee.emit("error", r);
|
|
107
112
|
});
|
|
108
|
-
}).catch((
|
|
109
|
-
e.setStatus("error"), e.ee.emit("error",
|
|
113
|
+
}).catch((i) => {
|
|
114
|
+
e.setStatus("error"), e.ee.emit("error", i);
|
|
110
115
|
}), e;
|
|
111
116
|
}
|
|
112
117
|
static create(t, e = {}) {
|
|
113
|
-
const
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
h === "trained" &&
|
|
118
|
+
const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
|
|
119
|
+
return r.setStatus("warmup"), m(s).then((n) => {
|
|
120
|
+
r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
|
|
121
|
+
h === "trained" && r.setStatus("ready");
|
|
117
122
|
}));
|
|
118
123
|
}).catch((n) => {
|
|
119
|
-
|
|
120
|
-
}),
|
|
124
|
+
r.setStatus("error"), r.ee.emit("error", n);
|
|
125
|
+
}), r;
|
|
121
126
|
}
|
|
122
127
|
getProfiler() {
|
|
123
128
|
return this._model?.getProfiler();
|
|
@@ -128,24 +133,22 @@ class a {
|
|
|
128
133
|
set enableProfiler(t) {
|
|
129
134
|
if (t) {
|
|
130
135
|
if (!this._config)
|
|
131
|
-
|
|
132
|
-
this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new
|
|
136
|
+
return;
|
|
137
|
+
this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
|
|
133
138
|
} else
|
|
134
139
|
this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
|
|
135
140
|
}
|
|
136
141
|
getNumParams() {
|
|
137
|
-
|
|
138
|
-
throw new Error("Model is not initialized.");
|
|
139
|
-
return this._model.getNumParams();
|
|
142
|
+
return this._model ? this._model.getNumParams() : 0;
|
|
140
143
|
}
|
|
141
144
|
trainer() {
|
|
142
145
|
if (!this._model || !this._tokeniser)
|
|
143
|
-
throw new Error("
|
|
144
|
-
const t = new
|
|
145
|
-
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e,
|
|
146
|
+
throw new Error("model_or_tokeniser_not_initialized.");
|
|
147
|
+
const t = new p(this._model, this._tokeniser);
|
|
148
|
+
return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
|
|
146
149
|
const o = this.ee.listeners("trainStep");
|
|
147
150
|
for (const s of o)
|
|
148
|
-
await s(e,
|
|
151
|
+
await s(e, i);
|
|
149
152
|
}), t;
|
|
150
153
|
}
|
|
151
154
|
train(t, e) {
|
|
@@ -160,7 +163,7 @@ class a {
|
|
|
160
163
|
generator() {
|
|
161
164
|
if (!this._model || !this._tokeniser)
|
|
162
165
|
throw new Error("model_or_tokeniser_not_initialized.");
|
|
163
|
-
const t = new
|
|
166
|
+
const t = new l(this._model, this._tokeniser);
|
|
164
167
|
return t.on("start", () => {
|
|
165
168
|
this.status === "ready" && this.setStatus("busy");
|
|
166
169
|
}), t.on("stop", () => {
|
|
@@ -1,35 +1,28 @@
|
|
|
1
|
-
import { gatherSub as
|
|
2
|
-
import { scatterSub as
|
|
3
|
-
import {
|
|
4
|
-
import { s as
|
|
1
|
+
import { gatherSub as x } from "../ops/gatherSub.js";
|
|
2
|
+
import { scatterSub as L } from "../ops/scatterSub.js";
|
|
3
|
+
import { l as C, t as u, z as E, b as G } from "../index-BAzbokzv.js";
|
|
4
|
+
import { s as y } from "../softmax-fsdtf6JC.js";
|
|
5
5
|
import { m as z } from "../max-DtlIuVeW.js";
|
|
6
|
-
import { l as
|
|
7
|
-
function
|
|
8
|
-
return
|
|
9
|
-
const
|
|
10
|
-
return
|
|
6
|
+
import { l as v } from "../log_sum_exp-YEo2h3gb.js";
|
|
7
|
+
function k(t, s) {
|
|
8
|
+
return u(() => {
|
|
9
|
+
const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = G(h, r), m = v(a, -1);
|
|
10
|
+
return x(m, p, a);
|
|
11
11
|
});
|
|
12
12
|
}
|
|
13
|
-
function
|
|
14
|
-
return
|
|
15
|
-
const o = s.shape.length > 2 ? s.reshape([-1, s.shape[s.shape.length - 1]]) : s, p = e.shape.length > 1 ? e.reshape([-1]).cast("int32") : e.cast("int32"), [n, t] = u().runKernel(
|
|
16
|
-
"NativeSparseSoftmaxCrossEntropy",
|
|
17
|
-
{ logits: o, labels: p },
|
|
18
|
-
{}
|
|
19
|
-
);
|
|
20
|
-
return m([t.reshape(s.shape)]), { value: n, gradFunc: (r, h) => [h[0], S(e)] };
|
|
21
|
-
}) : i(
|
|
13
|
+
function A() {
|
|
14
|
+
return C(
|
|
22
15
|
// @ts-expect-error Invalid params
|
|
23
|
-
(s,
|
|
24
|
-
const
|
|
25
|
-
return
|
|
26
|
-
const
|
|
27
|
-
return [
|
|
16
|
+
(s, n, d) => {
|
|
17
|
+
const c = s.shape[s.shape.length - 1], p = s.shape.slice(0, -1).reduce((o, e) => o * e, 1), r = s.reshape([p, c]), a = n.reshape([p]).cast("int32"), m = k(r, a);
|
|
18
|
+
return d([r, a]), r.dispose(), a.dispose(), { value: m, gradFunc: (o, e) => u(() => {
|
|
19
|
+
const S = e[0], f = e[1], b = y(S), l = L(b, f, o), g = E(n);
|
|
20
|
+
return [l.reshape(s.shape), g];
|
|
28
21
|
}) };
|
|
29
22
|
}
|
|
30
23
|
);
|
|
31
24
|
}
|
|
32
25
|
export {
|
|
33
|
-
|
|
34
|
-
|
|
26
|
+
A as createSoftmaxCrossEntropyWithGrad,
|
|
27
|
+
k as sparseSoftmaxCrossEntropy
|
|
35
28
|
};
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import { r as o } from "../../index-BAzbokzv.js";
|
|
2
|
-
function r(e) {
|
|
3
|
-
const { logits: t, labels: n } = e.inputs;
|
|
4
|
-
return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
|
|
5
|
-
}
|
|
6
|
-
const s = {
|
|
7
|
-
kernelName: "NativeSparseSoftmaxCrossEntropy",
|
|
8
|
-
backendName: "tensorflow",
|
|
9
|
-
kernelFunc: r
|
|
10
|
-
};
|
|
11
|
-
o(s);
|