@genai-fi/nanogpt 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,12 +22,15 @@ export default class TeachableLLM {
22
22
  meta: TeachableLLMMeta;
23
23
  constructor(tokeniser?: ITokeniser, model?: NanoGPT);
24
24
  get vocab(): string[];
25
+ /** Model is fully loaded */
25
26
  get loaded(): boolean;
26
27
  get config(): GPTConfig;
27
28
  get model(): NanoGPT;
28
29
  get tokeniser(): ITokeniser;
29
30
  get status(): TeachableLLMStatus;
31
+ /** Model is both ready and not busy */
30
32
  get ready(): boolean;
33
+ get busy(): boolean;
31
34
  estimateTrainingMemoryUsage(batchSize: number): number;
32
35
  private setStatus;
33
36
  saveModel(options?: SaveOptions): Promise<Blob>;
@@ -1,13 +1,13 @@
1
- import { defaultConfig as d } from "./config.js";
2
- import l from "./NanoGPTModel.js";
3
- import { saveModel as f } from "./utilities/save.js";
4
- import { loadModel as u } from "./loader/load.js";
5
- import p from "./Generator.js";
6
- import _ from "./Trainer.js";
7
- import { E as c } from "./index-Dwqa6Zy2.js";
1
+ import { defaultConfig as _ } from "./config.js";
2
+ import f from "./NanoGPTModel.js";
3
+ import { saveModel as u } from "./utilities/save.js";
4
+ import { loadModel as d } from "./loader/load.js";
5
+ import l from "./Generator.js";
6
+ import p from "./Trainer.js";
7
+ import { E as g } from "./index-Dwqa6Zy2.js";
8
8
  import { dummyPassTrainAsync as m } from "./utilities/dummy.js";
9
- import g from "./tokeniser/CharTokeniser.js";
10
- import w from "./tokeniser/bpe.js";
9
+ import c from "./tokeniser/CharTokeniser.js";
10
+ import k from "./tokeniser/bpe.js";
11
11
  import "./papaparse.min-C8l2Kvo1.js";
12
12
  import "./index-Tf7vU29b.js";
13
13
  import "./jszip.min-CjP2V1VV.js";
@@ -43,9 +43,9 @@ import "./ops/cpu/normRMS.js";
43
43
  import "./ops/webgl/normRMS.js";
44
44
  import "./ops/grads/normRMS.js";
45
45
  import "./ops/webgl/log.js";
46
- import k from "./utilities/profile.js";
46
+ import w from "./utilities/profile.js";
47
47
  class a {
48
- ee = new c();
48
+ ee = new g();
49
49
  _config;
50
50
  _model;
51
51
  _tokeniser;
@@ -58,66 +58,71 @@ class a {
58
58
  get vocab() {
59
59
  return this._tokeniser?.getVocab() || [];
60
60
  }
61
+ /** Model is fully loaded */
61
62
  get loaded() {
62
63
  return !!this._model && !!this._tokeniser && !!this._config;
63
64
  }
64
65
  get config() {
65
66
  if (!this._config)
66
- throw new Error("Model configuration is not initialized.");
67
+ throw new Error("configuration_not_initialized.");
67
68
  return this._config.gpt;
68
69
  }
69
70
  get model() {
70
71
  if (!this._model)
71
- throw new Error("Model is not initialized.");
72
+ throw new Error("model_not_initialized.");
72
73
  return this._model;
73
74
  }
74
75
  get tokeniser() {
75
76
  if (!this._tokeniser)
76
- throw new Error("Tokeniser is not initialized.");
77
+ throw new Error("tokeniser_not_initialized.");
77
78
  return this._tokeniser;
78
79
  }
79
80
  get status() {
80
81
  return this._status;
81
82
  }
83
+ /** Model is both ready and not busy */
82
84
  get ready() {
83
- return this._status === "ready" && !!this._model && !!this._tokeniser && this.tokeniser.trained;
85
+ return this._status === "ready" && !!this._model && !!this._tokeniser;
86
+ }
87
+ get busy() {
88
+ return this._status === "busy" || this._status === "training";
84
89
  }
85
90
  estimateTrainingMemoryUsage(t) {
86
- const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, r = e.perBatch * t, o = e.gradients;
87
- return r * 0.66 + o * 4;
91
+ const e = this._memoryRequirements ?? { perBatch: 0, gradients: 0 }, i = e.perBatch * t, o = e.gradients;
92
+ return i * 0.66 + o * 4;
88
93
  }
89
94
  setStatus(t) {
90
95
  this._status !== t && (this._status = t, this.ee.emit("status", t));
91
96
  }
92
97
  saveModel(t) {
93
98
  if (!this._model || !this._tokeniser)
94
- throw new Error("Model or tokeniser is not initialized.");
95
- return f(this._model, this._tokeniser, {
99
+ throw new Error("model_or_tokeniser_not_initialized.");
100
+ return u(this._model, this._tokeniser, {
96
101
  ...t,
97
102
  name: t?.name || this.meta.name
98
103
  });
99
104
  }
100
105
  static loadModel(t) {
101
106
  const e = new a();
102
- return u(t).then(({ model: r, tokeniser: o, name: s }) => {
103
- e._model = r, e._tokeniser = o, e._config = r.config, s && (e.meta.name = s), e.setStatus("warmup"), m(r).then((i) => {
104
- e._memoryRequirements = i, e.setStatus("ready"), e.ee.emit("loaded");
105
- }).catch((i) => {
106
- e.setStatus("error"), e.ee.emit("error", i);
107
+ return d(t).then(({ model: i, tokeniser: o, name: s }) => {
108
+ e._model = i, e._tokeniser = o, e._config = i.config, s && (e.meta.name = s), e.setStatus("warmup"), m(i).then((r) => {
109
+ e._memoryRequirements = r, e.setStatus("ready"), e.ee.emit("loaded");
110
+ }).catch((r) => {
111
+ e.setStatus("error"), e.ee.emit("error", r);
107
112
  });
108
- }).catch((r) => {
109
- e.setStatus("error"), e.ee.emit("error", r);
113
+ }).catch((i) => {
114
+ e.setStatus("error"), e.ee.emit("error", i);
110
115
  }), e;
111
116
  }
112
117
  static create(t, e = {}) {
113
- const r = { ...d, ...e }, o = t === "char" ? new g(r.vocabSize) : new w(r.vocabSize), s = new l(r), i = new a(o, s);
114
- return i.setStatus("warmup"), m(s).then((n) => {
115
- i._memoryRequirements = n, i.tokeniser.trained ? (i.setStatus("ready"), i.ee.emit("loaded")) : (i.setStatus("awaitingTokens"), i.ee.emit("loaded"), i.tokeniser.once("trainStatus", (h) => {
116
- h === "trained" && i.setStatus("ready");
118
+ const i = { ..._, ...e }, o = t === "char" ? new c(i.vocabSize) : new k(i.vocabSize), s = new f(i), r = new a(o, s);
119
+ return r.setStatus("warmup"), m(s).then((n) => {
120
+ r._memoryRequirements = n, r.tokeniser.trained ? (r.setStatus("ready"), r.ee.emit("loaded")) : (r.setStatus("awaitingTokens"), r.ee.emit("loaded"), r.tokeniser.once("trainStatus", (h) => {
121
+ h === "trained" && r.setStatus("ready");
117
122
  }));
118
123
  }).catch((n) => {
119
- i.setStatus("error"), i.ee.emit("error", n);
120
- }), i;
124
+ r.setStatus("error"), r.ee.emit("error", n);
125
+ }), r;
121
126
  }
122
127
  getProfiler() {
123
128
  return this._model?.getProfiler();
@@ -128,24 +133,22 @@ class a {
128
133
  set enableProfiler(t) {
129
134
  if (t) {
130
135
  if (!this._config)
131
- throw new Error("Model is not initialized.");
132
- this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new k());
136
+ return;
137
+ this._config.layerConfig.profiler || (this._config.layerConfig.profiler = new w());
133
138
  } else
134
139
  this._config?.layerConfig.profiler && (this._config.layerConfig.profiler = void 0);
135
140
  }
136
141
  getNumParams() {
137
- if (!this._model)
138
- throw new Error("Model is not initialized.");
139
- return this._model.getNumParams();
142
+ return this._model ? this._model.getNumParams() : 0;
140
143
  }
141
144
  trainer() {
142
145
  if (!this._model || !this._tokeniser)
143
- throw new Error("Model or tokeniser is not initialized.");
144
- const t = new _(this._model, this._tokeniser);
145
- return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, r) => {
146
+ throw new Error("model_or_tokeniser_not_initialized.");
147
+ const t = new p(this._model, this._tokeniser);
148
+ return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (e, i) => {
146
149
  const o = this.ee.listeners("trainStep");
147
150
  for (const s of o)
148
- await s(e, r);
151
+ await s(e, i);
149
152
  }), t;
150
153
  }
151
154
  train(t, e) {
@@ -160,7 +163,7 @@ class a {
160
163
  generator() {
161
164
  if (!this._model || !this._tokeniser)
162
165
  throw new Error("model_or_tokeniser_not_initialized.");
163
- const t = new p(this._model, this._tokeniser);
166
+ const t = new l(this._model, this._tokeniser);
164
167
  return t.on("start", () => {
165
168
  this.status === "ready" && this.setStatus("busy");
166
169
  }), t.on("stop", () => {
@@ -1,35 +1,28 @@
1
- import { gatherSub as L } from "../ops/gatherSub.js";
2
- import { scatterSub as y } from "../ops/scatterSub.js";
3
- import { e as u, l as i, z as S, t as f, b as G } from "../index-BAzbokzv.js";
4
- import { s as v } from "../softmax-fsdtf6JC.js";
1
+ import { gatherSub as x } from "../ops/gatherSub.js";
2
+ import { scatterSub as L } from "../ops/scatterSub.js";
3
+ import { l as C, t as u, z as E, b as G } from "../index-BAzbokzv.js";
4
+ import { s as y } from "../softmax-fsdtf6JC.js";
5
5
  import { m as z } from "../max-DtlIuVeW.js";
6
- import { l as k } from "../log_sum_exp-YEo2h3gb.js";
7
- function F(a, s) {
8
- return f(() => {
9
- const e = a.shape[a.shape.length - 1], o = a.shape.slice(0, -1).reduce((d, c) => d * c, 1), p = a.shape.length > 2 ? a.reshape([o, e]) : a, n = s.shape.length > 1 ? s.reshape([o]).cast("int32") : s.cast("int32"), t = z(p, -1, !0), r = G(p, t), h = k(r, -1);
10
- return L(h, n, r);
6
+ import { l as v } from "../log_sum_exp-YEo2h3gb.js";
7
+ function k(t, s) {
8
+ return u(() => {
9
+ const n = t.shape[t.shape.length - 1], c = t.shape.slice(0, -1).reduce((o, e) => o * e, 1), h = t.shape.length > 2 ? t.reshape([c, n]) : t, p = s.shape.length > 1 ? s.reshape([c]).cast("int32") : s.cast("int32"), r = z(h, -1, !0), a = G(h, r), m = v(a, -1);
10
+ return x(m, p, a);
11
11
  });
12
12
  }
13
- function j() {
14
- return u().backendName === "tensorflow" ? i((s, e, m) => {
15
- const o = s.shape.length > 2 ? s.reshape([-1, s.shape[s.shape.length - 1]]) : s, p = e.shape.length > 1 ? e.reshape([-1]).cast("int32") : e.cast("int32"), [n, t] = u().runKernel(
16
- "NativeSparseSoftmaxCrossEntropy",
17
- { logits: o, labels: p },
18
- {}
19
- );
20
- return m([t.reshape(s.shape)]), { value: n, gradFunc: (r, h) => [h[0], S(e)] };
21
- }) : i(
13
+ function A() {
14
+ return C(
22
15
  // @ts-expect-error Invalid params
23
- (s, e, m) => {
24
- const o = s.shape[s.shape.length - 1], n = s.shape.slice(0, -1).reduce((d, c) => d * c, 1), t = s.reshape([n, o]), r = e.reshape([n]).cast("int32"), h = F(t, r);
25
- return m([t, r]), t.dispose(), r.dispose(), { value: h, gradFunc: (d, c) => f(() => {
26
- const g = c[0], b = c[1], x = v(g), C = y(x, b, d), E = S(e);
27
- return [C.reshape(s.shape), E];
16
+ (s, n, d) => {
17
+ const c = s.shape[s.shape.length - 1], p = s.shape.slice(0, -1).reduce((o, e) => o * e, 1), r = s.reshape([p, c]), a = n.reshape([p]).cast("int32"), m = k(r, a);
18
+ return d([r, a]), r.dispose(), a.dispose(), { value: m, gradFunc: (o, e) => u(() => {
19
+ const S = e[0], f = e[1], b = y(S), l = L(b, f, o), g = E(n);
20
+ return [l.reshape(s.shape), g];
28
21
  }) };
29
22
  }
30
23
  );
31
24
  }
32
25
  export {
33
- j as createSoftmaxCrossEntropyWithGrad,
34
- F as sparseSoftmaxCrossEntropy
26
+ A as createSoftmaxCrossEntropyWithGrad,
27
+ k as sparseSoftmaxCrossEntropy
35
28
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.6.2",
3
+ "version": "0.6.3",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",
@@ -1 +0,0 @@
1
- export {};
@@ -1,11 +0,0 @@
1
- import { r as o } from "../../index-BAzbokzv.js";
2
- function r(e) {
3
- const { logits: t, labels: n } = e.inputs;
4
- return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
5
- }
6
- const s = {
7
- kernelName: "NativeSparseSoftmaxCrossEntropy",
8
- backendName: "tensorflow",
9
- kernelFunc: r
10
- };
11
- o(s);