@genai-fi/nanogpt 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,17 @@
1
- import { default as NanoGPT } from './NanoGPTModel';
1
+ import { default as NanoGPT, GenerateOptions } from './NanoGPTModel';
2
2
  import { ITokeniser } from './tokeniser/type';
3
3
  import { default as EE } from 'eventemitter3';
4
- export interface IGenerateOptions {
4
+ export interface IGenerateOptions extends GenerateOptions {
5
5
  maxLength?: number;
6
- temperature?: number;
7
- topK?: number;
8
- usePadding?: boolean;
9
- includeAttention?: boolean;
10
- includeProbabilities?: boolean;
6
+ noCache?: boolean;
11
7
  }
12
8
  export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
13
9
  private readonly model;
14
10
  private readonly tokeniser;
15
11
  constructor(model: NanoGPT, tokeniser: ITokeniser);
16
- private generateBlockOfTokens;
12
+ private tokenisePrompt;
13
+ private generateNoCache;
14
+ private processResponse;
15
+ private generateCache;
17
16
  generate(prompt?: string, options?: IGenerateOptions): Promise<string>;
18
17
  }
package/dist/Generator.js CHANGED
@@ -1,62 +1,65 @@
1
- import { E as m } from "./index-SOhdqzHq.js";
2
- const b = 4;
3
- class x extends m {
4
- constructor(a, t) {
5
- super(), this.model = a, this.tokeniser = t;
1
+ import { E as u } from "./index-SOhdqzHq.js";
2
+ class p extends u {
3
+ constructor(s, e) {
4
+ super(), this.model = s, this.tokeniser = e;
6
5
  }
7
- generateBlockOfTokens(a, t) {
8
- const g = t?.temperature ?? 1, c = t?.topK, d = t?.usePadding ?? t?.includeAttention ?? !1, k = t?.includeAttention ?? !1, h = t?.includeProbabilities ?? !1;
9
- let i = a, n, s;
10
- for (let e = 0; e < b; e++) {
6
+ async tokenisePrompt(s) {
7
+ const e = s ? await this.tokeniser.tokenise([s], !0) : [[this.tokeniser.eosToken]];
8
+ return this.model.tf.tensor2d(e, [1, e[0].length], "int32");
9
+ }
10
+ async generateNoCache(s, e) {
11
+ let t = await this.tokenisePrompt(s), n = s || "";
12
+ const a = e?.maxLength ?? 1e3;
13
+ for (let i = 0; i < a; i++) {
11
14
  const {
12
- output: u,
13
- attention: l,
14
- probabilities: r
15
- } = this.model.generate(i, {
16
- temperature: g,
17
- topK: c,
18
- usePadding: d,
19
- includeAttention: k,
20
- includeProbabilities: h
21
- }), p = i;
22
- if (i = this.model.tf.concat([i, u], 1), n && l) {
23
- const o = n;
24
- n = this.model.tf.concat([n, l], 0), o.dispose();
25
- } else l && (n = l);
26
- if (s && r) {
27
- const o = s;
28
- s = this.model.tf.concat([s, r], 0), o.dispose();
29
- } else r && (s = r);
30
- p.dispose(), u.dispose();
15
+ output: o,
16
+ attention: c,
17
+ probabilities: h
18
+ } = this.model.generate(t, void 0, e), l = t;
19
+ t = this.model.tf.concat([t, o], 1), l.dispose();
20
+ const r = await this.processResponse(o, c, h);
21
+ if (o.dispose(), r === null)
22
+ break;
23
+ n += r;
31
24
  }
32
- return { output: i, attention: n, probabilities: s };
25
+ return t.dispose(), n;
33
26
  }
34
- async generate(a, t) {
35
- const g = a ? await this.tokeniser.tokenise([a], !0) : [[this.tokeniser.eosToken]];
36
- let c = this.model.tf.tensor2d(g, [1, g[0].length], "int32");
37
- this.emit("start");
38
- let d = a || "";
39
- for (; ; ) {
40
- const { output: k, attention: h, probabilities: i } = this.generateBlockOfTokens(c, t), n = c;
41
- c = k;
42
- const s = k.slice([0, n.shape[1]], [1, b]), e = (await s.array())[0];
43
- n.dispose(), s.dispose();
44
- let u = !1, l = !1;
45
- const r = e.indexOf(this.tokeniser.eosToken);
46
- r !== -1 && (u = !0, e.splice(r)), e.length + d.length >= (t?.maxLength ?? 1e3) && (l = !0, e.splice(
47
- t?.maxLength ? t.maxLength - d.length : e.length
48
- ));
49
- const p = await this.tokeniser.decode(e);
50
- d += p;
51
- let o;
52
- h && (o = await h.array(), h.dispose(), o.length > e.length && (o = o.slice(0, e.length)));
53
- let f;
54
- if (i && (f = await i.array(), i.dispose(), f.length > e.length && (f = f.slice(0, e.length))), this.emit("tokens", e, p, o, f), u || l)
27
+ async processResponse(s, e, t) {
28
+ const n = (await s.array())[0][0];
29
+ if (n === this.tokeniser.eosToken)
30
+ return null;
31
+ const a = await this.tokeniser.decode([n]);
32
+ let i;
33
+ e && (i = await e.array(), e.dispose());
34
+ let o;
35
+ return t && (o = await t.array(), t.dispose()), this.emit("tokens", [n], a, i, o), a;
36
+ }
37
+ async generateCache(s, e) {
38
+ let t = await this.tokenisePrompt(s), n = s || "";
39
+ const a = new Array(this.model.config.nLayer).fill(void 0), i = e?.maxLength ?? 1e3;
40
+ for (let o = 0; o < i; o++) {
41
+ const {
42
+ output: c,
43
+ attention: h,
44
+ probabilities: l
45
+ } = this.model.generate(t, a, {
46
+ ...e,
47
+ usePadding: !1
48
+ });
49
+ t.dispose(), t = c;
50
+ const r = await this.processResponse(c, h, l);
51
+ if (r === null)
55
52
  break;
53
+ n += r;
56
54
  }
57
- return c.dispose(), this.emit("stop"), d;
55
+ return t.dispose(), n;
56
+ }
57
+ async generate(s, e) {
58
+ this.emit("start");
59
+ const t = this.model.config.useRope && !e?.noCache ? this.generateCache(s, e) : this.generateNoCache(s, e);
60
+ return this.emit("stop"), t;
58
61
  }
59
62
  }
60
63
  export {
61
- x as default
64
+ p as default
62
65
  };
@@ -1,5 +1,6 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from './config';
3
+ import { KVCache } from './layers/CausalSelfAttention';
3
4
  export interface TrainingLogEntry {
4
5
  loss: number;
5
6
  valLoss?: number;
@@ -18,10 +19,11 @@ export interface GenerateOptions {
18
19
  export default class NanoGPT {
19
20
  readonly config: GPTConfig;
20
21
  private wte;
21
- private wpe;
22
+ private wpe?;
22
23
  private drop;
23
24
  private blocks;
24
25
  private lnF;
26
+ private ropeCache?;
25
27
  readonly tf: typeof TF;
26
28
  log: TrainingLogEntry[];
27
29
  constructor(tf: typeof TF, config?: Partial<GPTConfig>);
@@ -35,12 +37,12 @@ export default class NanoGPT {
35
37
  private validateInput;
36
38
  private calculateLoss;
37
39
  private computeAttentionRollout;
38
- forward(idx: TF.Tensor, targets?: TF.Tensor, training?: boolean, includeAttention?: boolean): {
40
+ forward(idx: TF.Tensor, targets?: TF.Tensor, training?: boolean, includeAttention?: boolean, cache?: (KVCache | undefined)[]): {
39
41
  logits: TF.Tensor;
40
42
  loss?: TF.Tensor;
41
43
  attention?: TF.Tensor;
42
44
  };
43
- generate(idx: TF.Tensor, options?: GenerateOptions): {
45
+ generate(idx: TF.Tensor, cache?: (KVCache | undefined)[], options?: GenerateOptions): {
44
46
  output: TF.Tensor;
45
47
  attention?: TF.Tensor;
46
48
  probabilities?: TF.Tensor;
@@ -1,8 +1,9 @@
1
1
  import { defaultConfig as z } from "./config.js";
2
- import v from "./layers/TransformerBlock.js";
2
+ import $ from "./layers/TransformerBlock.js";
3
3
  import S from "./layers/TiedEmbedding.js";
4
- import _ from "./layers/LayerNorm.js";
5
- class $ {
4
+ import I from "./layers/RoPECache.js";
5
+ import _ from "./layers/RMSNorm.js";
6
+ class M {
6
7
  config;
7
8
  wte;
8
9
  // Token embeddings
@@ -13,6 +14,7 @@ class $ {
13
14
  blocks;
14
15
  lnF;
15
16
  // Final layer norm
17
+ ropeCache;
16
18
  tf;
17
19
  log = [];
18
20
  // Training log
@@ -21,19 +23,19 @@ class $ {
21
23
  vocabSize: this.config.vocabSize,
22
24
  embedDim: this.config.nEmbed,
23
25
  name: "token_embedding"
24
- }), this.wpe = this.tf.layers.embedding({
26
+ }), this.config.useRope === !1 ? this.wpe = this.tf.layers.embedding({
25
27
  inputDim: this.config.blockSize,
26
28
  outputDim: this.config.nEmbed,
27
29
  name: "positional_embedding",
28
30
  embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
29
- }), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
30
- for (let s = 0; s < this.config.nLayer; s++)
31
- this.blocks.push(new v(this.tf, s, this.config));
32
- this.lnF = new _(t, [this.config.nEmbed], 1e-5, "final_layer_norm");
31
+ }) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
32
+ for (let o = 0; o < this.config.nLayer; o++)
33
+ this.blocks.push(new $(this.tf, o, this.config, this.ropeCache));
34
+ this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
33
35
  }
34
36
  get variables() {
35
37
  return [
36
- ...this.wpe.trainableWeights.map((t) => t.read()),
38
+ //...this.wpe.trainableWeights.map((v) => v.read() as TF.Variable),
37
39
  ...this.blocks.flatMap((t) => t.variables),
38
40
  ...this.lnF.trainableWeights.map((t) => t),
39
41
  ...this.wte.variables
@@ -41,21 +43,28 @@ class $ {
41
43
  }
42
44
  saveWeights() {
43
45
  const t = /* @__PURE__ */ new Map();
44
- t.set("token_embedding", this.wte.getWeights()), t.set("positional_embedding", this.wpe.getWeights());
46
+ t.set("token_embedding", this.wte.getWeights()), this.wpe && t.set("positional_embedding", this.wpe.getWeights());
45
47
  for (let e = 0; e < this.blocks.length; e++)
46
48
  this.blocks[e].saveWeights(t);
47
- return t.set("final_layer_norm", this.lnF.getWeights()), t;
49
+ return t.set("final_rms_norm", this.lnF.getWeights()), t;
48
50
  }
49
51
  loadWeights(t) {
50
- this.wte.setWeights(t.get("token_embedding") || []), this.wpe.setWeights(t.get("positional_embedding") || []);
52
+ this.wte.setWeights(t.get("token_embedding") || []), this.wpe && this.wpe.setWeights(t.get("positional_embedding") || []);
51
53
  for (let e = 0; e < this.blocks.length; e++)
52
54
  this.blocks[e].loadWeights(t);
53
- this.lnF.setWeights(t.get("final_layer_norm") || []);
55
+ this.lnF.setWeights(t.get("final_rms_norm") || []);
54
56
  }
55
- inputPhase(t, e = !1) {
57
+ inputPhase(t, e, o = !1) {
56
58
  return this.tf.tidy(() => {
57
- const [, s] = t.shape, i = this.wte.embed(t), n = this.tf.range(0, s, 1, "int32"), h = this.wpe.apply(n), o = i.add(h);
58
- return this.drop.apply(o, { training: e });
59
+ const i = this.wte.embed(t);
60
+ if (this.config.useRope === !1) {
61
+ const [, s] = t.shape, r = this.config.blockSize, l = this.tf.range(0, s, 1, "int32"), n = this.tf.mod(
62
+ this.tf.add(l, this.tf.scalar(e, "int32")),
63
+ this.tf.scalar(r, "int32")
64
+ ), h = this.wpe.apply(n), c = i.add(h);
65
+ return this.drop.apply(c, { training: o });
66
+ } else
67
+ return this.drop.apply(i, { training: o });
59
68
  });
60
69
  }
61
70
  setSkipMask(t) {
@@ -73,7 +82,7 @@ class $ {
73
82
  set trainable(t) {
74
83
  for (const e of this.blocks)
75
84
  e.trainable = t;
76
- this.wpe.trainable = t, this.lnF.trainable = t;
85
+ this.lnF.trainable = t;
77
86
  }
78
87
  validateInput(t) {
79
88
  if (t.shape.length !== 2)
@@ -86,8 +95,8 @@ class $ {
86
95
  calculateLoss(t, e) {
87
96
  try {
88
97
  return this.tf.losses.softmaxCrossEntropy(e, t, this.tf.Reduction.MEAN);
89
- } catch (s) {
90
- throw console.error("Error computing loss:", s), new Error(`Loss computation failed: ${s}`);
98
+ } catch (o) {
99
+ throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
91
100
  }
92
101
  }
93
102
  // Attention rollout per Abnar & Zuidema (2020)
@@ -96,60 +105,88 @@ class $ {
96
105
  return this.tf.tidy(() => {
97
106
  if (t.length === 0)
98
107
  throw new Error("No attentions for rollout");
99
- const e = t[0].shape[0], s = t[0].shape[1], i = this.tf.eye(s, s).expandDims(0);
100
- let n = i.tile([e, 1, 1]);
101
- for (const h of t) {
102
- let o = h.add(i);
103
- o = o.div(o.sum(-1, !0)), n = o.matMul(n);
108
+ const [e, o, i] = t[0].shape;
109
+ for (const s of t) {
110
+ const [r, l, n] = s.shape;
111
+ if (r !== e || l !== o || n !== i)
112
+ throw new Error(
113
+ `Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${r},${l},${n}]`
114
+ );
115
+ }
116
+ if (o === i) {
117
+ const s = this.tf.eye(i, i).expandDims(0);
118
+ let r = s.tile([e, 1, 1]);
119
+ for (const l of t) {
120
+ const n = l.add(s);
121
+ r = n.div(n.sum(-1, !0)).matMul(r);
122
+ }
123
+ return r;
124
+ }
125
+ if (o === 1) {
126
+ let s = null;
127
+ const r = this.tf.tensor1d([i - 1], "int32"), l = this.tf.oneHot(r, i).reshape([1, 1, i]).tile([e, 1, 1]);
128
+ r.dispose();
129
+ for (const n of t) {
130
+ let h = n.add(l);
131
+ h = h.div(h.sum(-1, !0)), s == null ? s = h : (s = s.mul(h), s = s.div(s.sum(-1, !0)));
132
+ }
133
+ return s;
104
134
  }
105
- return n;
135
+ throw new Error(`Unsupported attention shapes for rollout: [B=${e}, Q=${o}, K=${i}]`);
106
136
  });
107
137
  }
108
- forward(t, e, s = !1, i = !1) {
138
+ forward(t, e, o = !1, i = !1, s) {
109
139
  return this.validateInput(t), this.tf.tidy(() => {
110
- let n = this.inputPhase(t, s);
111
- const h = [];
112
- for (const c of this.blocks) {
113
- const { output: d, attention: l } = c.call(n, s, i);
114
- n = d, i && l && h.push(l);
140
+ const r = s?.[0]?.length ?? 0;
141
+ let l = this.inputPhase(t, r, o);
142
+ const n = [];
143
+ if (s && s.length !== this.blocks.length)
144
+ throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
145
+ for (let a = 0; a < this.blocks.length; a++) {
146
+ const d = this.blocks[a], {
147
+ output: g,
148
+ attention: m,
149
+ cache: p
150
+ } = d.call(l, o, i, s ? s[a] : void 0);
151
+ l = g, i && m && n.push(m), s && p ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = p) : p && (p.k.dispose(), p.v.dispose());
115
152
  }
116
- let o;
117
- i && h.length > 0 && (o = this.computeAttentionRollout(h)), n = this.lnF.apply(n);
118
- const a = this.wte.project(n);
119
- let r;
120
- return e && (r = this.calculateLoss(a, e)), { logits: a, loss: r, attention: i ? o : void 0 };
153
+ let h;
154
+ i && n.length > 0 && (h = this.computeAttentionRollout(n)), l = this.lnF.apply(l);
155
+ const c = this.wte.project(l);
156
+ let f;
157
+ return e && (f = this.calculateLoss(c, e)), { logits: c, loss: f, attention: i ? h : void 0 };
121
158
  });
122
159
  }
123
- generate(t, e) {
124
- const s = e?.temperature ?? 1, i = e?.topK, n = e?.usePadding ?? !1, h = e?.includeAttention ?? !1;
160
+ generate(t, e, o) {
161
+ const i = o?.temperature ?? 1, s = o?.topK, r = o?.usePadding ?? !1, l = o?.includeAttention ?? !1;
125
162
  return this.tf.tidy(() => {
126
- const o = t, a = o.shape[1], r = a <= this.config.blockSize ? o : o.slice(
127
- [0, a - this.config.blockSize],
128
- [o.shape[0], this.config.blockSize]
129
- ), c = n ? this.config.blockSize - r.shape[1] : 0, d = c > 0 ? this.tf.pad(r, [
163
+ const n = t, h = n.shape[1], c = h <= this.config.blockSize ? n : n.slice(
164
+ [0, h - this.config.blockSize],
165
+ [n.shape[0], this.config.blockSize]
166
+ ), f = r ? this.config.blockSize - c.shape[1] : 0, a = f > 0 ? this.tf.pad(c, [
130
167
  [0, 0],
131
- [0, c]
132
- ]) : r, { logits: l, attention: p } = this.forward(d, void 0, !1, h), b = l.shape[1] - 1 - c, u = l.slice([0, b, 0], [l.shape[0], 1, l.shape[2]]), k = p ? p.slice([0, b, 0], [p.shape[0], 1, p.shape[2]]) : void 0, g = u.div(s);
133
- let f;
134
- if (i) {
135
- const { values: w, indices: E } = this.tf.topk(g, i), y = this.tf.multinomial(w.squeeze([1]), 1);
136
- f = this.tf.gather(E.squeeze([1]), y, 1);
168
+ [0, f]
169
+ ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, l, e), m = d.shape[1] - 1 - f, p = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, u = p.div(i);
170
+ let b;
171
+ if (s) {
172
+ const { values: E, indices: v } = this.tf.topk(u, s), y = this.tf.multinomial(E.squeeze([1]), 1);
173
+ b = this.tf.gather(v.squeeze([1]), y, 1);
137
174
  } else
138
- f = this.tf.multinomial(g.squeeze([1]), 1);
139
- let m;
140
- return e?.includeProbabilities && (m = this.tf.softmax(g.squeeze([1]))), f = f.reshape([1, 1]), { output: f, attention: k?.squeeze([1]), probabilities: m };
175
+ b = this.tf.multinomial(u.squeeze([1]), 1);
176
+ let k;
177
+ return o?.includeProbabilities && (k = this.tf.softmax(u.squeeze([1]))), b = b.reshape([1, 1]), { output: b, attention: w?.squeeze([1]), probabilities: k };
141
178
  });
142
179
  }
143
180
  getNumParams() {
144
181
  const t = this.config.vocabSize * this.config.nEmbed + this.config.blockSize * this.config.nEmbed, e = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // qkv + proj
145
- 2 * this.config.nEmbed), s = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // fc
182
+ 2 * this.config.nEmbed), o = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // fc
146
183
  this.config.nEmbed * 4 * this.config.nEmbed), i = this.config.nEmbed + this.config.vocabSize * this.config.nEmbed;
147
- return t + e + s + i;
184
+ return t + e + o + i;
148
185
  }
149
186
  dispose() {
150
- this.wte.dispose(), this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
187
+ this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
151
188
  }
152
189
  }
153
190
  export {
154
- $ as default
191
+ M as default
155
192
  };
@@ -1,5 +1,5 @@
1
- import d from "./NanoGPTModel.js";
2
- import { defaultConfig as u } from "./config.js";
1
+ import { defaultConfig as d } from "./config.js";
2
+ import u from "./NanoGPTModel.js";
3
3
  import { saveModel as m } from "./utilities/save.js";
4
4
  import { loadModel as l } from "./utilities/load.js";
5
5
  import f from "./Generator.js";
@@ -47,25 +47,25 @@ class a extends c {
47
47
  }
48
48
  static loadModel(t, r) {
49
49
  const e = new a(t);
50
- return l(t, r).then(({ model: i, tokeniser: o }) => {
51
- e._model = i, e._tokeniser = o, e._config = i.config, e.setStatus("warmup"), h(i).then(() => {
50
+ return l(t, r).then(({ model: s, tokeniser: o }) => {
51
+ e._model = s, e._tokeniser = o, e._config = s.config, e.setStatus("warmup"), h(s).then(() => {
52
52
  e.setStatus("ready");
53
- }).catch((s) => {
54
- e.setStatus("error"), e.emit("error", s);
53
+ }).catch((i) => {
54
+ e.setStatus("error"), e.emit("error", i);
55
55
  });
56
- }).catch((i) => {
57
- e.setStatus("error"), e.emit("error", i);
56
+ }).catch((s) => {
57
+ e.setStatus("error"), e.emit("error", s);
58
58
  }), e;
59
59
  }
60
60
  static create(t, r = {}) {
61
- const e = { ...u, ...r }, i = new g(e.vocabSize), o = new d(t, e), s = new a(t, i, o);
62
- return s.setStatus("warmup"), h(o).then(() => {
63
- s.setStatus("awaitingTokens"), s.tokeniser.once("trainStatus", (n) => {
64
- n === "trained" && s.setStatus("ready");
65
- });
61
+ const e = { ...d, ...r }, s = new g(e.vocabSize), o = new u(t, e), i = new a(t, s, o);
62
+ return i.setStatus("warmup"), h(o).then(() => {
63
+ i.tokeniser.trained ? i.setStatus("ready") : (i.setStatus("awaitingTokens"), i.tokeniser.once("trainStatus", (n) => {
64
+ n === "trained" && i.setStatus("ready");
65
+ }));
66
66
  }).catch((n) => {
67
- s.setStatus("error"), s.emit("error", n);
68
- }), s;
67
+ i.setStatus("error"), i.emit("error", n);
68
+ }), i;
69
69
  }
70
70
  getNumParams() {
71
71
  if (!this._model)
@@ -78,8 +78,8 @@ class a extends c {
78
78
  const t = new _(this._model, this._tokeniser);
79
79
  return t.on("start", () => this.setStatus("training")), t.on("stop", () => this.setStatus("ready")), t.on("log", async (r) => {
80
80
  const e = this.listeners("trainStep");
81
- for (const i of e)
82
- await i(r);
81
+ for (const s of e)
82
+ await s(r);
83
83
  }), t;
84
84
  }
85
85
  train(t, r) {
package/dist/config.d.ts CHANGED
@@ -8,5 +8,6 @@ export interface GPTConfig {
8
8
  biasInLinear: boolean;
9
9
  biasInLayerNorm: boolean;
10
10
  mlpFactor: number;
11
+ useRope: boolean;
11
12
  }
12
13
  export declare const defaultConfig: GPTConfig;
package/dist/config.js CHANGED
@@ -1,4 +1,4 @@
1
- const a = {
1
+ const e = {
2
2
  vocabSize: 50304,
3
3
  // GPT-2 vocab size
4
4
  blockSize: 1024,
@@ -13,8 +13,10 @@ const a = {
13
13
  // Dropout probability
14
14
  biasInLinear: !1,
15
15
  biasInLayerNorm: !1,
16
- mlpFactor: 4
16
+ mlpFactor: 4,
17
+ useRope: !1
18
+ // Use Rotary Position Embeddings
17
19
  };
18
20
  export {
19
- a as defaultConfig
21
+ e as defaultConfig
20
22
  };
@@ -1,6 +1,14 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from '../config';
3
+ import { default as RoPECache } from './RoPECache';
4
+ export type KVCache = {
5
+ k: TF.Tensor;
6
+ v: TF.Tensor;
7
+ length: number;
8
+ cumulativeLength: number;
9
+ };
3
10
  export default class CausalSelfAttention {
11
+ private readonly ropeCache?;
4
12
  private config;
5
13
  private cAttn;
6
14
  private cProj;
@@ -12,18 +20,20 @@ export default class CausalSelfAttention {
12
20
  private divisor;
13
21
  private index;
14
22
  private _trainable;
15
- constructor(tf: typeof TF, index: number, config: GPTConfig);
23
+ constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache | undefined);
16
24
  get variables(): TF.Variable[];
17
25
  get trainable(): boolean;
18
26
  set trainable(value: boolean);
19
27
  saveWeights(map: Map<string, TF.Tensor[]>): void;
20
28
  loadWeights(weights: Map<string, TF.Tensor[]>): void;
21
29
  private getAttentionScores;
30
+ private getAttentionScoresWithPast;
22
31
  private getQKV;
23
32
  private getOutputProjection;
24
- call(x: TF.Tensor, training?: boolean, includeAttention?: boolean): {
33
+ call(x: TF.Tensor, training?: boolean, includeAttention?: boolean, pastKV?: KVCache): {
25
34
  output: TF.Tensor;
26
35
  attention?: TF.Tensor;
36
+ presentKV?: KVCache;
27
37
  };
28
38
  dispose(): void;
29
39
  }
@@ -1,20 +1,9 @@
1
- class m {
2
- config;
3
- cAttn;
4
- cProj;
5
- attnDropout;
6
- residDropout;
7
- bias;
8
- maskInf;
9
- tf;
10
- divisor;
11
- index;
12
- _trainable = !0;
13
- constructor(t, e, s) {
14
- this.config = s, this.tf = t, this.index = e, this.cAttn = this.tf.layers.dense({
1
+ class S {
2
+ constructor(t, i, s, e) {
3
+ this.ropeCache = e, this.config = s, this.tf = t, this.index = i, this.cAttn = this.tf.layers.dense({
15
4
  units: 3 * s.nEmbed,
16
5
  useBias: s.biasInLinear,
17
- name: `block_${e}_attn_cAttn`,
6
+ name: `block_${i}_attn_cAttn`,
18
7
  kernelInitializer: this.tf.initializers.randomNormal({
19
8
  mean: 0,
20
9
  stddev: 0.02
@@ -23,14 +12,27 @@ class m {
23
12
  }), this.cProj = this.tf.layers.dense({
24
13
  units: s.nEmbed,
25
14
  useBias: s.biasInLinear,
26
- name: `block_${e}_attn_cProj`,
15
+ name: `block_${i}_attn_cProj`,
27
16
  kernelInitializer: this.tf.initializers.randomNormal({
28
17
  mean: 0,
29
18
  stddev: 0.02 / Math.sqrt(2 * s.nLayer)
30
19
  }),
31
20
  biasInitializer: "zeros"
32
- }), this.attnDropout = this.tf.layers.dropout({ rate: s.dropout }), this.residDropout = this.tf.layers.dropout({ rate: s.dropout }), this.bias = this.tf.linalg.bandPart(this.tf.ones([s.blockSize, s.blockSize]), -1, 0).cast("bool"), this.divisor = this.tf.scalar(1 / Math.sqrt(s.nEmbed / s.nHead)), this.maskInf = this.tf.zeros([s.blockSize, s.blockSize]).where(this.bias, -1 / 0);
21
+ }), this.attnDropout = this.tf.layers.dropout({ rate: s.dropout }), this.residDropout = this.tf.layers.dropout({ rate: s.dropout }), this.bias = this.tf.linalg.bandPart(this.tf.ones([s.blockSize, s.blockSize]), -1, 0).cast("bool"), this.divisor = this.tf.scalar(1 / Math.sqrt(s.nEmbed / s.nHead));
22
+ const a = this.tf.zeros([s.blockSize, s.blockSize]), h = this.tf.fill([s.blockSize, s.blockSize], Number.NEGATIVE_INFINITY);
23
+ this.maskInf = this.tf.where(this.bias, a, h);
33
24
  }
25
+ config;
26
+ cAttn;
27
+ cProj;
28
+ attnDropout;
29
+ residDropout;
30
+ bias;
31
+ maskInf;
32
+ tf;
33
+ divisor;
34
+ index;
35
+ _trainable = !0;
34
36
  get variables() {
35
37
  return [
36
38
  ...this.cAttn.trainableWeights.map((t) => t.read()),
@@ -49,34 +51,65 @@ class m {
49
51
  loadWeights(t) {
50
52
  this.cAttn.setWeights(t.get(`block_${this.index}_cAttn`) || []), this.cProj.setWeights(t.get(`block_${this.index}_cProj`) || []);
51
53
  }
52
- getAttentionScores(t, e, s) {
53
- const a = t.shape[2], o = this.tf.matMul(t, e, !1, !0).mul(this.divisor), i = this.maskInf.slice([0, 0], [a, a]), n = o.add(i), h = this.tf.softmax(n, -1);
54
- return this.attnDropout.apply(h, { training: s });
54
+ getAttentionScores(t, i, s) {
55
+ const e = t.shape[2], h = this.tf.matMul(t, i, !1, !0).mul(this.divisor), n = this.maskInf.slice([0, 0], [e, e]).expandDims(0).expandDims(0), r = h.add(n), o = this.tf.softmax(r, -1);
56
+ return this.attnDropout.apply(o, { training: s });
57
+ }
58
+ // Attention with optional past. If pastLen > 0 and T_cur == 1, no mask needed.
59
+ getAttentionScoresWithPast(t, i, s, e) {
60
+ const a = t.shape[2];
61
+ let n = this.tf.matMul(t, i, !1, !0).mul(this.divisor);
62
+ if (a > 1 && e > 0)
63
+ throw new Error("Cannot use past with T_cur > 1");
64
+ if (a > 1) {
65
+ const o = this.maskInf.slice([0, 0], [a, a]).expandDims(0).expandDims(0);
66
+ n = n.add(o);
67
+ }
68
+ const r = this.tf.softmax(n, -1);
69
+ return this.attnDropout.apply(r, { training: s });
55
70
  }
56
71
  getQKV(t) {
57
- const [e, s, a] = t.shape, r = this.cAttn.apply(t), [o, i, n] = this.tf.split(r, 3, -1);
58
- r.dispose();
59
- const h = a / this.config.nHead, c = this.tf.reshape(o, [e, s, this.config.nHead, h]);
60
- o.dispose();
61
- const l = c.transpose([0, 2, 1, 3]);
62
- c.dispose();
63
- const d = this.tf.reshape(i, [e, s, this.config.nHead, h]);
64
- i.dispose();
65
- const u = d.transpose([0, 2, 1, 3]);
66
- d.dispose();
67
- const p = this.tf.reshape(n, [e, s, this.config.nHead, h]);
72
+ const [i, s, e] = t.shape, a = this.cAttn.apply(t), [h, n, r] = this.tf.split(a, 3, -1);
73
+ a.dispose();
74
+ const o = e / this.config.nHead, u = this.tf.reshape(h, [i, s, this.config.nHead, o]);
75
+ h.dispose();
76
+ const f = u.transpose([0, 2, 1, 3]);
77
+ u.dispose();
78
+ const d = this.tf.reshape(n, [i, s, this.config.nHead, o]);
68
79
  n.dispose();
69
- const b = p.transpose([0, 2, 1, 3]);
70
- return p.dispose(), [l, u, b];
80
+ const c = d.transpose([0, 2, 1, 3]);
81
+ d.dispose();
82
+ const l = this.tf.reshape(r, [i, s, this.config.nHead, o]);
83
+ r.dispose();
84
+ const p = l.transpose([0, 2, 1, 3]);
85
+ return l.dispose(), [f, c, p];
71
86
  }
72
- getOutputProjection(t, e) {
73
- const s = t.shape[0], a = t.shape[2], r = this.config.nEmbed, o = t.transpose([0, 2, 1, 3]), i = this.tf.reshape(o, [s, a, r]), n = this.cProj.apply(i);
74
- return this.residDropout.apply(n, { training: e });
87
+ getOutputProjection(t, i) {
88
+ const s = t.shape[0], e = t.shape[2], a = this.config.nEmbed, h = t.transpose([0, 2, 1, 3]), n = this.tf.reshape(h, [s, e, a]), r = this.cProj.apply(n);
89
+ return this.residDropout.apply(r, { training: i });
75
90
  }
76
- call(t, e = !1, s = !1) {
91
+ // Added optional KV cache support (pastKV). Returns presentKV for chaining.
92
+ call(t, i = !1, s = !1, e) {
93
+ if (e && !this.config.useRope)
94
+ throw new Error("Cannot use pastKV without RoPE enabled");
77
95
  return this.tf.tidy(() => {
78
- const [a, r, o] = this.getQKV(t), i = this.getAttentionScores(a, r, e), n = this.tf.matMul(i, o);
79
- return { output: this.getOutputProjection(n, e), attention: s ? i.mean(1) : void 0 };
96
+ const [a, h, n] = this.getQKV(t), r = a.shape[2], o = this.config.blockSize, u = e ? e.cumulativeLength : 0, [f, d] = this.ropeCache ? this.ropeCache.applyRoPE(a, h, u) : [a, h];
97
+ let c = d, l = n, p = 0;
98
+ e && (p = e.length, c = this.tf.concat([e.k, d], 2), l = this.tf.concat([e.v, n], 2));
99
+ const b = c.shape[2];
100
+ if (b > o) {
101
+ const k = b - o, g = c.shape[0], v = c.shape[1], I = c.shape[3];
102
+ c = c.slice([0, 0, k, 0], [g, v, o, I]), l = l.slice([0, 0, k, 0], [g, v, o, I]), p = o - r;
103
+ }
104
+ let m;
105
+ p > 0 ? m = this.getAttentionScoresWithPast(f, c, i, p) : m = this.getAttentionScores(f, c, i);
106
+ const _ = this.tf.matMul(m, l), A = this.getOutputProjection(_, i), P = {
107
+ k: this.tf.keep(c),
108
+ v: this.tf.keep(l),
109
+ length: p + r,
110
+ cumulativeLength: e ? e.cumulativeLength + r : r
111
+ };
112
+ return { output: A, attention: s ? m.mean(1) : void 0, presentKV: P };
80
113
  });
81
114
  }
82
115
  dispose() {
@@ -84,5 +117,5 @@ class m {
84
117
  }
85
118
  }
86
119
  export {
87
- m as default
120
+ S as default
88
121
  };
@@ -0,0 +1,13 @@
1
+ import { default as TF } from '@tensorflow/tfjs';
2
+ export default class RMSNorm {
3
+ private gamma;
4
+ private epsilon;
5
+ private tf;
6
+ constructor(tf: typeof TF, shape: number[], epsilon?: number, name?: string);
7
+ get trainableWeights(): TF.Variable[];
8
+ set trainable(value: boolean);
9
+ getWeights(): TF.Tensor[];
10
+ setWeights(weights: TF.Tensor[]): void;
11
+ apply(x: TF.Tensor): TF.Tensor;
12
+ dispose(): void;
13
+ }
@@ -0,0 +1,32 @@
1
+ class m {
2
+ gamma;
3
+ epsilon;
4
+ tf;
5
+ constructor(a, s, t = 1e-8, e = "") {
6
+ this.tf = a, this.epsilon = t, this.gamma = a.variable(a.ones(s), !0, `${e}_gamma`, "float32");
7
+ }
8
+ get trainableWeights() {
9
+ return [this.gamma];
10
+ }
11
+ set trainable(a) {
12
+ this.gamma.trainable = a;
13
+ }
14
+ getWeights() {
15
+ return [this.gamma];
16
+ }
17
+ setWeights(a) {
18
+ this.gamma.assign(a[0]);
19
+ }
20
+ apply(a) {
21
+ return this.tf.tidy(() => {
22
+ const t = a.square().mean(-1, !0).add(this.epsilon).rsqrt();
23
+ return a.mul(t).mul(this.gamma);
24
+ });
25
+ }
26
+ dispose() {
27
+ this.gamma.dispose();
28
+ }
29
+ }
30
+ export {
31
+ m as default
32
+ };
@@ -0,0 +1,16 @@
1
+ import { default as TF } from '@tensorflow/tfjs';
2
+ import { GPTConfig } from '../config';
3
+ export default class RoPECache {
4
+ private readonly tf;
5
+ private readonly config;
6
+ private rotaryDim;
7
+ private ropeBase;
8
+ private ropeInvFreq;
9
+ private ropeCos;
10
+ private ropeSin;
11
+ private ropeCacheLen;
12
+ constructor(tf: typeof TF, config: GPTConfig);
13
+ private ensureRopeCache;
14
+ applyRoPE(q: TF.Tensor, k: TF.Tensor, pastLen: number): [TF.Tensor, TF.Tensor];
15
+ dispose(): void;
16
+ }
@@ -0,0 +1,44 @@
1
+ class b {
2
+ constructor(s, r) {
3
+ this.tf = s, this.config = r;
4
+ const o = this.config.nEmbed / this.config.nHead;
5
+ if (this.rotaryDim = o, this.rotaryDim % 2 !== 0)
6
+ throw new Error("rotaryDim must be even");
7
+ this.ropeBase = 1e4;
8
+ const i = this.tf.range(0, this.rotaryDim, 2, "float32"), t = i.div(this.tf.scalar(this.rotaryDim, "float32")), e = this.tf.pow(this.tf.scalar(this.ropeBase, "float32"), t);
9
+ this.ropeInvFreq = this.tf.reciprocal(e), t.dispose(), e.dispose(), i.dispose(), this.config.useRope === !1 ? (this.ropeCos = null, this.ropeSin = null, this.ropeCacheLen = 0) : this.tf.tidy(() => {
10
+ this.ensureRopeCache(this.config.blockSize * 4);
11
+ });
12
+ }
13
+ rotaryDim;
14
+ ropeBase;
15
+ ropeInvFreq;
16
+ ropeCos = null;
17
+ // [cacheLen, rotaryDim/2]
18
+ ropeSin = null;
19
+ // [cacheLen, rotaryDim/2]
20
+ ropeCacheLen = 0;
21
+ ensureRopeCache(s) {
22
+ if (s <= this.ropeCacheLen) return;
23
+ this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose();
24
+ const o = this.tf.range(0, s, 1, "float32").expandDims(1).mul(this.ropeInvFreq.expandDims(0));
25
+ this.ropeCos = this.tf.keep(this.tf.cos(o).expandDims(-1)), this.ropeSin = this.tf.keep(this.tf.sin(o).expandDims(-1)), this.ropeCacheLen = s;
26
+ }
27
+ applyRoPE(s, r, o) {
28
+ const i = s.shape[3], t = this.rotaryDim;
29
+ if (t > i) return [s, r];
30
+ const e = s.shape[2], v = o + e;
31
+ this.ensureRopeCache(v);
32
+ const n = t / 2, p = this.ropeCos.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), a = this.ropeSin.slice([o, 0, 0], [e, n, 1]).reshape([1, 1, e, n]), h = s.shape[0], c = s.shape[1], f = this.tf.range(0, t, 2, "int32"), l = this.tf.range(1, t, 2, "int32"), d = (u) => {
33
+ const m = u.slice([0, 0, 0, 0], [h, c, e, t]), C = t < i ? u.slice([0, 0, 0, t], [h, c, e, i - t]) : null, D = this.tf.gather(m, f, 3), g = this.tf.gather(m, l, 3), x = D.mul(p).sub(g.mul(a)), k = g.mul(p).add(D.mul(a)), R = this.tf.stack([x, k], -1).reshape([h, c, e, t]);
34
+ return C ? this.tf.concat([R, C], 3) : R;
35
+ }, y = d(s), S = d(r);
36
+ return f.dispose(), l.dispose(), [y, S];
37
+ }
38
+ dispose() {
39
+ this.ropeCos && this.ropeCos.dispose(), this.ropeSin && this.ropeSin.dispose(), this.ropeInvFreq.dispose();
40
+ }
41
+ }
42
+ export {
43
+ b as default
44
+ };
@@ -1,5 +1,7 @@
1
1
  import { default as TF } from '@tensorflow/tfjs';
2
2
  import { GPTConfig } from '../config';
3
+ import { KVCache } from './CausalSelfAttention';
4
+ import { default as RoPECache } from './RoPECache';
3
5
  export default class Block {
4
6
  private ln1;
5
7
  private attn;
@@ -9,16 +11,17 @@ export default class Block {
9
11
  private index;
10
12
  private _trainable;
11
13
  skipped: boolean;
12
- constructor(tf: typeof TF, index: number, config: GPTConfig);
14
+ constructor(tf: typeof TF, index: number, config: GPTConfig, ropeCache?: RoPECache);
13
15
  get variables(): TF.Variable[];
14
16
  get trainable(): boolean;
15
17
  set trainable(value: boolean);
16
18
  saveWeights(map: Map<string, TF.Tensor[]>): void;
17
19
  loadWeights(weights: Map<string, TF.Tensor[]>): void;
18
20
  private getMLPOutput;
19
- call(x: TF.Tensor, training?: boolean, includeAttention?: boolean): {
21
+ call(x: TF.Tensor, training?: boolean, includeAttention?: boolean, cache?: KVCache): {
20
22
  output: TF.Tensor;
21
23
  attention?: TF.Tensor;
24
+ cache?: KVCache;
22
25
  };
23
26
  dispose(): void;
24
27
  }
@@ -1,6 +1,6 @@
1
- import h from "./CausalSelfAttention.js";
2
- import r from "./MLP.js";
3
- import l from "./LayerNorm.js";
1
+ import r from "./CausalSelfAttention.js";
2
+ import o from "./MLP.js";
3
+ import a from "./RMSNorm.js";
4
4
  class u {
5
5
  ln1;
6
6
  attn;
@@ -10,8 +10,8 @@ class u {
10
10
  index;
11
11
  _trainable = !0;
12
12
  skipped = !1;
13
- constructor(t, i, s) {
14
- this.tf = t, this.index = i, this.ln1 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln1`), this.attn = new h(this.tf, this.index, s), this.ln2 = new l(t, [s.nEmbed], 1e-5, `block_${this.index}_ln2`), this.mlp = new r(this.tf, this.index, s);
13
+ constructor(t, i, s, e) {
14
+ this.tf = t, this.index = i, this.ln1 = new a(t, [s.nEmbed], 1e-8, `block_${this.index}_rms1`), this.attn = new r(this.tf, this.index, s, e), this.ln2 = new a(t, [s.nEmbed], 1e-8, `block_${this.index}_rms2`), this.mlp = new o(this.tf, this.index, s);
15
15
  }
16
16
  get variables() {
17
17
  return [
@@ -28,21 +28,25 @@ class u {
28
28
  this._trainable = t, this.ln1.trainable = t, this.ln2.trainable = t, this.attn.trainable = t, this.mlp.trainable = t;
29
29
  }
30
30
  saveWeights(t) {
31
- this.attn.saveWeights(t), this.mlp.saveWeights(t), t.set(`block_${this.index}_ln1`, this.ln1.getWeights()), t.set(`block_${this.index}_ln2`, this.ln2.getWeights());
31
+ this.attn.saveWeights(t), this.mlp.saveWeights(t), t.set(`block_${this.index}_rms1`, this.ln1.getWeights()), t.set(`block_${this.index}_rms2`, this.ln2.getWeights());
32
32
  }
33
33
  loadWeights(t) {
34
- this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_ln1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_ln2`) || []);
34
+ this.attn.loadWeights(t), this.mlp.loadWeights(t), this.ln1.setWeights(t.get(`block_${this.index}_rms1`) || []), this.ln2.setWeights(t.get(`block_${this.index}_rms2`) || []);
35
35
  }
36
36
  getMLPOutput(t, i) {
37
37
  const s = this.ln2.apply(t), e = this.mlp.call(s, i);
38
38
  return t.add(e);
39
39
  }
40
- call(t, i = !1, s = !1) {
40
+ call(t, i = !1, s = !1, e) {
41
41
  return this.tf.tidy(() => {
42
42
  if (this.skipped)
43
43
  return { output: t };
44
- const e = this.ln1.apply(t), n = this.attn.call(e, i, s), a = t.add(n.output);
45
- return { output: this.getMLPOutput(a, i), attention: n.attention };
44
+ const l = this.ln1.apply(t), n = this.attn.call(l, i, s, e), h = t.add(n.output);
45
+ return {
46
+ output: this.getMLPOutput(h, i),
47
+ attention: n.attention,
48
+ cache: n.presentKV
49
+ };
46
50
  });
47
51
  }
48
52
  dispose() {
package/dist/main.d.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  export { default as NanoGPT } from './NanoGPTModel';
2
2
  export { default as TeachableLLM } from './TeachableLLM';
3
3
  export { default as CharTokeniser } from './tokeniser/CharTokeniser';
4
+ export { default as waitForModel } from './utilities/waitForModel';
4
5
  export type { ITrainerOptions } from './Trainer';
5
6
  export type { IGenerateOptions } from './Generator';
6
7
  export type { TrainingLogEntry } from './NanoGPTModel';
package/dist/main.js CHANGED
@@ -1,8 +1,10 @@
1
1
  import { default as o } from "./NanoGPTModel.js";
2
- import { default as f } from "./TeachableLLM.js";
2
+ import { default as t } from "./TeachableLLM.js";
3
3
  import { default as l } from "./tokeniser/CharTokeniser.js";
4
+ import { default as s } from "./utilities/waitForModel.js";
4
5
  export {
5
6
  l as CharTokeniser,
6
7
  o as NanoGPT,
7
- f as TeachableLLM
8
+ t as TeachableLLM,
9
+ s as waitForModel
8
10
  };
@@ -1,20 +1,20 @@
1
- async function w(n, t, r, s, g) {
2
- if (s <= 0)
1
+ async function h(r, t, a, c, g) {
2
+ if (c <= 0)
3
3
  throw new Error("Length must be a positive integer");
4
- if (r.length === 0)
4
+ if (a.length === 0)
5
5
  throw new Error("Prompt cannot be an empty string");
6
- const i = await n.tokenise([r], !0), a = t.tf.tidy(() => {
7
- let e = t.tf.tensor2d(i, [1, i[0].length], "int32");
8
- for (let d = 0; d < s; d++) {
9
- const { output: p } = t.generate(e, g), f = e;
10
- e = t.tf.concat([e, p], 1), f.dispose(), p.dispose();
6
+ const p = await r.tokenise([a], !0), s = t.config.useRope ? new Array(t.config.nLayer).fill(void 0) : void 0, u = t.tf.tidy(() => {
7
+ let e = t.tf.tensor2d(p, [1, p[0].length], "int32"), n = e;
8
+ for (let f = 0; f < c; f++) {
9
+ const { output: o } = t.generate(e, s, g), w = e, y = n;
10
+ n = t.tf.concat([n, o], 1), e = s ? o : t.tf.concat([e, o], 1), w.dispose(), y.dispose(), s || o.dispose();
11
11
  }
12
- return e;
13
- }), u = await a.array();
14
- a.dispose();
15
- const o = u[0], c = o.indexOf(n.eosToken);
16
- return c !== -1 && o.splice(c), await n.decode(o);
12
+ return n;
13
+ }), T = await u.array();
14
+ u.dispose();
15
+ const i = T[0], d = i.indexOf(r.eosToken);
16
+ return d !== -1 && i.splice(d), await r.decode(i);
17
17
  }
18
18
  export {
19
- w as generateText
19
+ h as generateText
20
20
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.1.9",
3
+ "version": "0.2.1",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",