@genai-fi/nanogpt 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,6 +47,9 @@ export default class Generator extends EE<'start' | 'stop' | 'tokens'> {
47
47
  getText(): string;
48
48
  getAttentionData(): number[][][][][];
49
49
  getProbabilitiesData(): number[][][];
50
- getEmbeddingsData(): number[][][][];
50
+ getEmbeddingsData(): {
51
+ name: string;
52
+ tensor: number[][];
53
+ }[][];
51
54
  getTokens(): number[];
52
55
  }
package/dist/Generator.js CHANGED
@@ -63,18 +63,18 @@ import { c as G } from "./concat-pHiVqR3L.js";
63
63
  * limitations under the License.
64
64
  * =============================================================================
65
65
  */
66
- function N(m, t, e, i = !1) {
67
- const o = L(m, "logits", "multinomial"), s = o.size, n = o.rank;
66
+ function N(h, t, e, i = !1) {
67
+ const o = L(h, "logits", "multinomial"), s = o.size, n = o.rank;
68
68
  if (s < 2)
69
69
  throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
70
70
  if (n > 2)
71
71
  throw new Error(`Rank of probabilities must be 1 or 2, but is ${n}`);
72
72
  e = e || Math.random();
73
- const a = { logits: n === 1 ? x(o, [1, -1]) : o }, p = { numSamples: t, seed: e, normalized: i }, l = C.runKernel(I, a, p);
74
- return n === 1 ? x(l, [l.size]) : l;
73
+ const a = { logits: n === 1 ? x(o, [1, -1]) : o }, l = { numSamples: t, seed: e, normalized: i }, m = C.runKernel(I, a, l);
74
+ return n === 1 ? x(m, [m.size]) : m;
75
75
  }
76
76
  const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
77
- ...Array.from({ length: 95 }, (m, t) => String.fromCharCode(t + 32)),
77
+ ...Array.from({ length: 95 }, (h, t) => String.fromCharCode(t + 32)),
78
78
  // ASCII
79
79
  // Spanish accented letters and punctuation
80
80
  ..."áéíóúüñ¿¡",
@@ -85,8 +85,8 @@ const D = /* @__PURE__ */ A({ multinomial_: N }), H = [
85
85
  // Cyrillic letters
86
86
  ..."абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
87
87
  ];
88
- function U(m, t) {
89
- return m.length === t ? m : m.length > t ? m.slice(0, t) : m.concat(Array(t - m.length).fill(""));
88
+ function U(h, t) {
89
+ return h.length === t ? h : h.length > t ? h.slice(0, t) : h.concat(Array(t - h.length).fill(""));
90
90
  }
91
91
  class qt extends z {
92
92
  constructor(t, e) {
@@ -113,7 +113,7 @@ class qt extends z {
113
113
  const n = await t.decode([s]);
114
114
  if (i) {
115
115
  const d = await Promise.all(
116
- i.map((a) => a.array().then((p) => p))
116
+ i.map((a) => a.array().then((l) => l))
117
117
  );
118
118
  i.forEach((a) => a.dispose()), this.attentionData.push(d);
119
119
  }
@@ -132,48 +132,47 @@ class qt extends z {
132
132
  } : void 0,
133
133
  cache: e,
134
134
  outputEmbeddings: i?.embeddings ?? !1
135
- }, p = O(() => {
136
- const r = t, h = r.shape[1], u = h <= this.model.config.blockSize ? r : r.slice(
137
- [0, h - this.model.config.blockSize],
138
- [r.shape[0], this.model.config.blockSize]
139
- ), g = d ? this.model.config.blockSize - u.shape[1] : 0, b = g > 0 ? _(u, [
135
+ }, l = O(() => {
136
+ const p = t, u = p.shape[1], r = u <= this.model.config.blockSize ? p : p.slice(
137
+ [0, u - this.model.config.blockSize],
138
+ [p.shape[0], this.model.config.blockSize]
139
+ ), f = d ? this.model.config.blockSize - r.shape[1] : 0, b = f > 0 ? _(r, [
140
140
  [0, 0],
141
- [0, g]
142
- ]) : u, [f] = this.model.forward(a, b), y = f.shape[1] - 1 - g, c = f.slice([0, y, 0], [f.shape[0], 1, f.shape[2]]);
141
+ [0, f]
142
+ ]) : r, [g] = this.model.forward(a, b), y = g.shape[1] - 1 - f, c = g.slice([0, y, 0], [g.shape[0], 1, g.shape[2]]);
143
143
  return a.attentionScores?.attentionOut && a.attentionScores.attentionOut.forEach((T, E) => {
144
144
  T.shape[1] !== 1 && (a.attentionScores.attentionOut[E] = R(
145
145
  T.slice([0, y, 0], [T.shape[0], 1, T.shape[2]])
146
146
  ), T.dispose());
147
- }), f.dispose(), c.div(o).squeeze([1]);
147
+ }), g.dispose(), c.div(o).squeeze([1]);
148
148
  });
149
- let l;
149
+ let m;
150
150
  if (n) {
151
- const r = v(p), h = await r.array();
152
- r.dispose();
153
- const u = h[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
154
- let g = 0;
155
- const b = new Array(u.length).fill(0);
156
- for (const c of u)
157
- if (g += c.prob, b[c.index] = c.prob, g >= n)
151
+ const p = v(l), u = await p.array();
152
+ p.dispose();
153
+ const r = u[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
154
+ let f = 0;
155
+ const b = new Array(r.length).fill(0);
156
+ for (const c of r)
157
+ if (f += c.prob, b[c.index] = c.prob, f >= n)
158
158
  break;
159
- const f = b.reduce((c, k) => c + k, 0), y = b.map((c) => c / f);
160
- l = $(y);
159
+ const g = b.reduce((c, k) => c + k, 0), y = b.map((c) => c / g);
160
+ m = $(y);
161
161
  } else if (s) {
162
- const { values: r, indices: h } = K(p, s), u = D(r, 1);
163
- l = q(h, u, 1), r.dispose(), h.dispose(), u.dispose();
162
+ const { values: p, indices: u } = K(l, s), r = D(p, 1);
163
+ m = q(u, r, 1), p.dispose(), u.dispose(), r.dispose();
164
164
  } else
165
- l = D(p, 1);
165
+ m = D(l, 1);
166
166
  let w;
167
- i?.includeProbabilities && (w = v(p)), a.embeddings && this.embeddingsData.push(
168
- await Promise.all(
169
- a.embeddings.map(async (r) => {
170
- const h = await r.array();
171
- return r.dispose(), h;
172
- })
173
- )
174
- );
175
- const S = l.reshape([1, 1]);
176
- return l.dispose(), l = S, p.dispose(), { output: l, probabilities: w, attention: a.attentionScores?.attentionOut };
167
+ if (i?.includeProbabilities && (w = v(l)), a.embeddings) {
168
+ const p = a.embeddings.map(async (r) => {
169
+ const f = await r.tensor.array();
170
+ return r.tensor.dispose(), { name: r.name, tensor: f };
171
+ }), u = await Promise.all(p);
172
+ this.embeddingsData.push(u);
173
+ }
174
+ const S = m.reshape([1, 1]);
175
+ return m.dispose(), m = S, l.dispose(), { output: m, probabilities: w, attention: a.attentionScores?.attentionOut };
177
176
  }
178
177
  /** Generate multiple tokens in a loop and produce text */
179
178
  async _generate(t) {
@@ -191,8 +190,8 @@ class qt extends z {
191
190
  if (this.cache)
192
191
  e.dispose(), e = s;
193
192
  else {
194
- const p = e;
195
- e = G([e, s], 1), p.dispose();
193
+ const l = e;
194
+ e = G([e, s], 1), l.dispose();
196
195
  }
197
196
  const a = await this.processResponse(this.actualTokeniser, s, d, n);
198
197
  if (this.cache || s.dispose(), a === null)
@@ -4,12 +4,12 @@ async function m(t) {
4
4
  await e(t);
5
5
  const r = s(
6
6
  [
7
- [0.1, 0.2, 0, 0],
8
- [0.1, 0.2, 0, 0],
9
- [0, 0, 0, 0],
10
- [0, 0, 0, 0]
7
+ [0.1, 0.2, 0, 0, 1230, 1232331234, -12234234],
8
+ [0.1, 0.2, 0, 0, -1230, -1232331234, 12234234],
9
+ [0, 0, 0, 0, -1, 0, 0],
10
+ [0, 0, 0, 0, -0.1, 1e-3, 0]
11
11
  ],
12
- [4, 4]
12
+ [4, 7]
13
13
  );
14
14
  return await o().runKernel("Gelu", { x: r }).array();
15
15
  }
@@ -5,6 +5,7 @@ import { execute as gelu } from './gelu';
5
5
  import { execute as normRMSGrad } from './normRMSGrad';
6
6
  import { execute as appendCache } from './appendCache';
7
7
  import { execute as attentionMask } from './attentionMask';
8
+ import { execute as matMulGelu } from './matMulGelu';
8
9
  import { default as runCheck } from './check';
9
10
  import { createWeightStatistics, createTensorStatistics } from './weights';
10
11
  declare const checks: {
@@ -15,6 +16,7 @@ declare const checks: {
15
16
  normRMSGrad: typeof normRMSGrad;
16
17
  appendCache: typeof appendCache;
17
18
  attentionMask: typeof attentionMask;
19
+ matMulGelu: typeof matMulGelu;
18
20
  runCheck: typeof runCheck;
19
21
  createLayerWeightStatistics: typeof createWeightStatistics;
20
22
  createWeightStatistics: typeof createTensorStatistics;
@@ -4,9 +4,10 @@ import { execute as r } from "./qkv.js";
4
4
  import { execute as c } from "./gelu.js";
5
5
  import { execute as o } from "./normRMSGrad.js";
6
6
  import { execute as a } from "./appendCache.js";
7
- import { execute as i } from "./attentionMask.js";
8
- import m from "./check.js";
9
- import { createTensorStatistics as s, createWeightStatistics as u } from "./weights.js";
7
+ import { execute as m } from "./attentionMask.js";
8
+ import { execute as i } from "./matMulGelu.js";
9
+ import s from "./check.js";
10
+ import { createTensorStatistics as u, createWeightStatistics as x } from "./weights.js";
10
11
  const d = {
11
12
  rope: e,
12
13
  qkv: r,
@@ -14,10 +15,11 @@ const d = {
14
15
  normRMS: t,
15
16
  normRMSGrad: o,
16
17
  appendCache: a,
17
- attentionMask: i,
18
- runCheck: m,
19
- createLayerWeightStatistics: u,
20
- createWeightStatistics: s
18
+ attentionMask: m,
19
+ matMulGelu: i,
20
+ runCheck: s,
21
+ createLayerWeightStatistics: x,
22
+ createWeightStatistics: u
21
23
  };
22
24
  export {
23
25
  d as default
@@ -0,0 +1 @@
1
+ export declare function execute(backend: string): Promise<number | number[] | number[][] | number[][][] | number[][][][] | number[][][][][] | number[][][][][][]>;
@@ -0,0 +1,32 @@
1
+ import { s as n, e as s } from "../index-DdmHGZjq.js";
2
+ import "../random_width-DKGeiFuR.js";
3
+ import "../register_all_kernels-Do9VvZmo.js";
4
+ import "../index-Tf7vU29b.js";
5
+ import "../dataset-DPPl-iLT.js";
6
+ import { t as e } from "../tensor2d-CObBWBkW.js";
7
+ async function f(t) {
8
+ await n(t);
9
+ const r = e(
10
+ [
11
+ [0.1, 0.2, 9, 10, 11],
12
+ [0.3, 0.4, -9, -10, -11],
13
+ [0.3, 0.4, -9, -10, -11],
14
+ [0.3, 0.4, -9, -10, -11],
15
+ [0.3, 0.4, -9, -10, -11]
16
+ ],
17
+ [5, 5]
18
+ ), o = e(
19
+ [
20
+ [0.5, 0.6, 7e4, -8e3, 0],
21
+ [0.7, 0.8, -7e4, 8e4, 0],
22
+ [0.7, 0.8, -7e4, 8e4, 0],
23
+ [0.7, 0.8, -7e4, 8e4, 0],
24
+ [0.7, 0.8, -7e4, 8e4, 0]
25
+ ],
26
+ [5, 5]
27
+ );
28
+ return await s().runKernel("MatMulGelu", { x: o, kernel: r }).array();
29
+ }
30
+ export {
31
+ f as execute
32
+ };
@@ -6,6 +6,11 @@ export interface ForwardAttributes {
6
6
  training: boolean;
7
7
  checkpointing?: boolean;
8
8
  ropeCache?: RoPECache;
9
+ outputEmbeddings?: boolean;
10
+ embeddings?: {
11
+ name: string;
12
+ tensor: Tensor;
13
+ }[];
9
14
  }
10
15
  export default abstract class BaseLayer<ATTR extends ForwardAttributes = ForwardAttributes> {
11
16
  readonly parent?: BaseLayer;
@@ -1,32 +1,32 @@
1
1
  import l from "./CausalSelfAttention.js";
2
- import r from "./MLP.js";
2
+ import p from "./MLP.js";
3
3
  import o from "./RMSNorm.js";
4
- import d from "./BaseLayer.js";
5
- import { t as p } from "../index-DdmHGZjq.js";
6
- class k extends d {
4
+ import m from "./BaseLayer.js";
5
+ import { k as n, t as h } from "../index-DdmHGZjq.js";
6
+ class k extends m {
7
7
  ln1;
8
8
  attn;
9
9
  ln2;
10
10
  mlp;
11
11
  index;
12
12
  skipped = !1;
13
- constructor(t, s, i) {
14
- super(s, i), this.index = t, this.ln1 = new o(s, `block_${this.index}_rms1`, this), this.attn = new l(this.index, s, this), this.ln2 = new o(s, `block_${this.index}_rms2`, this), this.mlp = new r(this.index, s, this);
13
+ constructor(i, s, e) {
14
+ super(s, e), this.index = i, this.ln1 = new o(s, `block_${this.index}_rms1`, this), this.attn = new l(this.index, s, this), this.ln2 = new o(s, `block_${this.index}_rms2`, this), this.mlp = new p(this.index, s, this);
15
15
  }
16
- getMLPOutput(t, s) {
17
- const i = this.ln2.call({ training: s }, t), e = this.mlp.call({ training: s }, i);
18
- i.dispose();
19
- const n = t.add(e);
20
- return t.dispose(), e.dispose(), n;
16
+ getMLPOutput(i, s) {
17
+ const e = this.ln2.call({ training: s.training }, i), t = this.mlp.call({ training: s.training }, e);
18
+ s.outputEmbeddings ? (n(e), s.embeddings.push({ name: `block_ln2_${this.index}`, tensor: e })) : e.dispose();
19
+ const d = i.add(t);
20
+ return i.dispose(), s.outputEmbeddings ? (n(t), s.embeddings.push({ name: `block_mlp_out_${this.index}`, tensor: t })) : t.dispose(), d;
21
21
  }
22
- forward(t, s) {
23
- return p(() => {
22
+ forward(i, s) {
23
+ return h(() => {
24
24
  if (this.skipped)
25
25
  return s;
26
- const i = this.ln1.call(t, s), e = this.attn.call(t, i);
27
- i.dispose();
28
- const n = s.add(e);
29
- return e.dispose(), this.getMLPOutput(n, t.training);
26
+ const e = this.ln1.call(i, s), t = this.attn.call(i, e);
27
+ i.outputEmbeddings ? (n(e), i.embeddings.push({ name: `block_ln1_${this.index}`, tensor: e })) : e.dispose();
28
+ const d = s.add(t);
29
+ return i.outputEmbeddings ? (n(t), i.embeddings.push({ name: `block_attn_out_${this.index}`, tensor: t })) : t.dispose(), this.getMLPOutput(d, i);
30
30
  });
31
31
  }
32
32
  dispose() {
@@ -1,6 +1,6 @@
1
- import { defaultConfig as m } from "./config.js";
2
- import f from "../layers/TransformerBlock.js";
3
- import u from "../layers/TiedEmbedding.js";
1
+ import { defaultConfig as a } from "./config.js";
2
+ import u from "../layers/TransformerBlock.js";
3
+ import f from "../layers/TiedEmbedding.js";
4
4
  import g from "../layers/RoPECache.js";
5
5
  import b from "../layers/RMSNorm.js";
6
6
  import { t as l, k as p } from "../index-DdmHGZjq.js";
@@ -17,9 +17,9 @@ class R extends w {
17
17
  // Final layer norm
18
18
  ropeCache;
19
19
  constructor(e = {}) {
20
- super({ ...m, ...e }), this.wte = new u(this.config, "token_embedding", this), this.config.useRope === !1 ? this.wpe = new k(this.config, "positional_embedding", this) : this.ropeCache = new g(this.config), this.blocks = [];
20
+ super({ ...a, ...e }), this.wte = new f(this.config, "token_embedding", this), this.config.useRope === !1 ? this.wpe = new k(this.config, "positional_embedding", this) : this.ropeCache = new g(this.config), this.blocks = [];
21
21
  for (let i = 0; i < this.config.nLayer; i++)
22
- this.blocks.push(new f(i, this.config, this));
22
+ this.blocks.push(new u(i, this.config, this));
23
23
  this.lnF = new b(this.config, "final_rms_norm", this);
24
24
  }
25
25
  getClassName() {
@@ -47,17 +47,15 @@ class R extends w {
47
47
  );
48
48
  for (let t = 0; t < this.blocks.length; t++) {
49
49
  const c = this.blocks[t], d = Math.random() * 1e9, r = {
50
- ropeCache: e.ropeCache,
51
- training: e.training,
50
+ ...e,
52
51
  seed: d,
53
- attentionScores: e.attentionScores,
54
52
  pastKV: e.cache ? e.cache[t] : void 0
55
- }, a = e.checkpointing && e.training ? c.callCheckpoint(r, o) : c.call(r, o);
56
- e.outputEmbeddings ? (p(o), e.embeddings.push(o)) : o.dispose(), o = a;
53
+ }, m = e.checkpointing && e.training ? c.callCheckpoint(r, o) : c.call(r, o);
54
+ e.outputEmbeddings ? (p(o), e.embeddings.push({ name: `block_output_${t}`, tensor: o })) : o.dispose(), o = m;
57
55
  }
58
56
  o = this.lnF.call(e, o);
59
57
  const n = this.wte.project(o);
60
- e.outputEmbeddings ? (p(o), e.embeddings.push(o)) : o.dispose();
58
+ e.outputEmbeddings ? (p(o), e.embeddings.push({ name: "final_norm_output", tensor: o })) : o.dispose();
61
59
  let h;
62
60
  return s && (h = this.calculateLoss(n, s)), this.endMemory("Forward"), h ? [n, h] : [n];
63
61
  });
@@ -5,8 +5,6 @@ export interface ModelForwardAttributes extends ForwardAttributes {
5
5
  cache?: KVCache[];
6
6
  attentionScores?: AttentionScores;
7
7
  seed?: number;
8
- outputEmbeddings?: boolean;
9
- embeddings?: Tensor[];
10
8
  }
11
9
  interface TrainingState {
12
10
  steps: number;
@@ -1,19 +1,19 @@
1
1
  import { f as a } from "../../index-DdmHGZjq.js";
2
- import { u as s, C as x } from "../../kernel_funcs_utils-CDfFpUab.js";
3
- const t = 0.7978845608028654, r = 0.044715, c = x + `
2
+ import { u as s, C as c } from "../../kernel_funcs_utils-CDfFpUab.js";
3
+ const t = 0.7978845608028654, r = 0.044715, d = c + `
4
4
  float x3 = x * x * x;
5
5
  float inner = x + ${r} * x3;
6
6
  inner = ${t} * inner;
7
7
  inner = tanh(inner);
8
8
  inner = 0.5 * (1.0 + inner);
9
- x = x * inner;
10
- return x;
11
- `, d = s({ opSnippet: c }), i = {
9
+ inner = x * inner;
10
+ return inner;
11
+ `, i = s({ opSnippet: d }), x = {
12
12
  kernelName: "Gelu",
13
13
  backendName: "webgl",
14
- kernelFunc: d
14
+ kernelFunc: i
15
15
  };
16
- a(i);
16
+ a(x);
17
17
  class f {
18
18
  // Inputs: dy, x
19
19
  variableNames = ["dy", "x"];
@@ -46,5 +46,5 @@ const p = {
46
46
  };
47
47
  a(p);
48
48
  export {
49
- d as gelu
49
+ i as gelu
50
50
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.8.2",
3
+ "version": "0.8.4",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",