@genai-fi/nanogpt 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
- import { defaultConfig as z } from "./config.js";
2
- import $ from "./layers/TransformerBlock.js";
1
+ import { defaultConfig as $ } from "./config.js";
2
+ import z from "./layers/TransformerBlock.js";
3
3
  import S from "./layers/TiedEmbedding.js";
4
4
  import I from "./layers/RoPECache.js";
5
5
  import _ from "./layers/RMSNorm.js";
6
- class x {
6
+ import { estimateParameterCount as W } from "./utilities/parameters.js";
7
+ import { createSoftmaxCrossEntropyWithGrad as C } from "./training/sparseCrossEntropy.js";
8
+ class K {
7
9
  config;
8
10
  wte;
9
11
  // Token embeddings
@@ -19,7 +21,7 @@ class x {
19
21
  log = [];
20
22
  // Training log
21
23
  constructor(t, e = {}) {
22
- this.tf = t, this.config = { ...z, ...e }, this.wte = new S(t, {
24
+ this.tf = t, this.config = { ...$, ...e }, this.wte = new S(t, {
23
25
  vocabSize: this.config.vocabSize,
24
26
  embedDim: this.config.nEmbed,
25
27
  name: "token_embedding"
@@ -30,7 +32,7 @@ class x {
30
32
  embeddingsInitializer: this.tf.initializers.randomNormal({ mean: 0, stddev: 0.02 })
31
33
  }) : this.ropeCache = new I(t, this.config), this.drop = this.tf.layers.dropout({ rate: this.config.dropout }), this.blocks = [];
32
34
  for (let o = 0; o < this.config.nLayer; o++)
33
- this.blocks.push(new $(this.tf, o, this.config, this.ropeCache));
35
+ this.blocks.push(new z(this.tf, o, this.config, this.ropeCache));
34
36
  this.lnF = new _(t, [this.config.nEmbed], 1e-8, "final_rms_norm");
35
37
  }
36
38
  get variables() {
@@ -58,9 +60,9 @@ class x {
58
60
  return this.tf.tidy(() => {
59
61
  const i = this.wte.embed(t);
60
62
  if (this.config.useRope === !1) {
61
- const [, s] = t.shape, r = this.config.blockSize, l = this.tf.range(0, s, 1, "int32"), n = this.tf.mod(
62
- this.tf.add(l, this.tf.scalar(e, "int32")),
63
- this.tf.scalar(r, "int32")
63
+ const [, s] = t.shape, l = this.config.blockSize, r = this.tf.range(0, s, 1, "int32"), n = this.tf.mod(
64
+ this.tf.add(r, this.tf.scalar(e, "int32")),
65
+ this.tf.scalar(l, "int32")
64
66
  ), h = this.wpe.apply(n), c = i.add(h);
65
67
  return this.drop.apply(c, { training: o });
66
68
  } else
@@ -94,7 +96,7 @@ class x {
94
96
  }
95
97
  calculateLoss(t, e) {
96
98
  try {
97
- return this.tf.losses.softmaxCrossEntropy(e, t, this.tf.Reduction.MEAN);
99
+ return C()(t, e).mean();
98
100
  } catch (o) {
99
101
  throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
100
102
  }
@@ -107,27 +109,27 @@ class x {
107
109
  throw new Error("No attentions for rollout");
108
110
  const [e, o, i] = t[0].shape;
109
111
  for (const s of t) {
110
- const [r, l, n] = s.shape;
111
- if (r !== e || l !== o || n !== i)
112
+ const [l, r, n] = s.shape;
113
+ if (l !== e || r !== o || n !== i)
112
114
  throw new Error(
113
- `Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${r},${l},${n}]`
115
+ `Inconsistent attention shapes in rollout: expected [${e},${o},${i}] got [${l},${r},${n}]`
114
116
  );
115
117
  }
116
118
  if (o === i) {
117
119
  const s = this.tf.eye(i, i).expandDims(0);
118
- let r = s.tile([e, 1, 1]);
119
- for (const l of t) {
120
- const n = l.add(s);
121
- r = n.div(n.sum(-1, !0)).matMul(r);
120
+ let l = s.tile([e, 1, 1]);
121
+ for (const r of t) {
122
+ const n = r.add(s);
123
+ l = n.div(n.sum(-1, !0)).matMul(l);
122
124
  }
123
- return r;
125
+ return l;
124
126
  }
125
127
  if (o === 1) {
126
128
  let s = null;
127
- const r = this.tf.tensor1d([i - 1], "int32"), l = this.tf.oneHot(r, i).reshape([1, 1, i]).tile([e, 1, 1]);
128
- r.dispose();
129
+ const l = this.tf.tensor1d([i - 1], "int32"), r = this.tf.oneHot(l, i).reshape([1, 1, i]).tile([e, 1, 1]);
130
+ l.dispose();
129
131
  for (const n of t) {
130
- let h = n.add(l);
132
+ let h = n.add(r);
131
133
  h = h.div(h.sum(-1, !0)), s == null ? s = h : (s = s.mul(h), s = s.div(s.sum(-1, !0)));
132
134
  }
133
135
  return s;
@@ -137,56 +139,53 @@ class x {
137
139
  }
138
140
  forward(t, e, o = !1, i = !1, s) {
139
141
  return this.validateInput(t), this.tf.tidy(() => {
140
- const r = s?.[0]?.length ?? 0;
141
- let l = this.inputPhase(t, r, o);
142
+ const l = s?.[0]?.length ?? 0;
143
+ let r = this.inputPhase(t, l, o);
142
144
  const n = [];
143
145
  if (s && s.length !== this.blocks.length)
144
146
  throw console.error("Cache", s), new Error(`Cache length ${s.length} does not match number of blocks ${this.blocks.length}`);
145
147
  for (let a = 0; a < this.blocks.length; a++) {
146
148
  const d = this.blocks[a], {
147
149
  output: g,
148
- attention: m,
149
- cache: p
150
- } = d.call(l, o, i, s ? s[a] : void 0);
151
- l = g, i && m && n.push(m), s && p ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = p) : p && (p.k.dispose(), p.v.dispose());
150
+ attention: u,
151
+ cache: f
152
+ } = d.call(r, o, i, s ? s[a] : void 0);
153
+ r = g, i && u && n.push(u), s && f ? (s[a]?.k.dispose(), s[a]?.v.dispose(), s[a] = f) : f && (f.k.dispose(), f.v.dispose());
152
154
  }
153
155
  let h;
154
- i && n.length > 0 && (h = this.computeAttentionRollout(n)), l = this.lnF.apply(l);
155
- const c = this.wte.project(l);
156
- let f;
157
- return e && (f = this.calculateLoss(c, e)), { logits: c, loss: f, attention: i ? h : void 0 };
156
+ i && n.length > 0 && (h = this.computeAttentionRollout(n)), r = this.lnF.apply(r);
157
+ const c = this.wte.project(r);
158
+ let p;
159
+ return e && (p = this.calculateLoss(c, e)), { logits: c, loss: p, attention: i ? h : void 0 };
158
160
  });
159
161
  }
160
162
  generate(t, e, o) {
161
- const i = o?.temperature ?? 1, s = o?.topK, r = o?.usePadding ?? !1, l = o?.includeAttention ?? !1;
163
+ const i = o?.temperature ?? 1, s = o?.topK, l = o?.usePadding ?? !1, r = o?.includeAttention ?? !1;
162
164
  return this.tf.tidy(() => {
163
165
  const n = t, h = n.shape[1], c = h <= this.config.blockSize ? n : n.slice(
164
166
  [0, h - this.config.blockSize],
165
167
  [n.shape[0], this.config.blockSize]
166
- ), f = r ? this.config.blockSize - c.shape[1] : 0, a = f > 0 ? this.tf.pad(c, [
168
+ ), p = l ? this.config.blockSize - c.shape[1] : 0, a = p > 0 ? this.tf.pad(c, [
167
169
  [0, 0],
168
- [0, f]
169
- ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, l, e), m = d.shape[1] - 1 - f, p = d.slice([0, m, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, m, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = p.div(i);
170
- let u;
170
+ [0, p]
171
+ ]) : c, { logits: d, attention: g } = this.forward(a, void 0, !1, r, e), u = d.shape[1] - 1 - p, f = d.slice([0, u, 0], [d.shape[0], 1, d.shape[2]]), w = g ? g.slice([0, u, 0], [g.shape[0], 1, g.shape[2]]) : void 0, b = f.div(i);
172
+ let m;
171
173
  if (s) {
172
- const { values: E, indices: v } = this.tf.topk(b, s), y = this.tf.multinomial(E.squeeze([1]), 1);
173
- u = this.tf.gather(v.squeeze([1]), y, 1);
174
+ const { values: v, indices: y } = this.tf.topk(b, s), E = this.tf.multinomial(v.squeeze([1]), 1);
175
+ m = this.tf.gather(y.squeeze([1]), E, 1);
174
176
  } else
175
- u = this.tf.multinomial(b.squeeze([1]), 1);
177
+ m = this.tf.multinomial(b.squeeze([1]), 1);
176
178
  let k;
177
- return o?.includeProbabilities && (k = this.tf.softmax(b.squeeze([1]))), u = u.reshape([1, 1]), { output: u, attention: w?.squeeze([1]), probabilities: k };
179
+ return o?.includeProbabilities && (k = this.tf.softmax(b.squeeze([1]))), m = m.reshape([1, 1]), { output: m, attention: w?.squeeze([1]), probabilities: k };
178
180
  });
179
181
  }
180
182
  getNumParams() {
181
- const t = this.config.vocabSize * this.config.nEmbed, e = this.config.nLayer * (4 * this.config.nEmbed * this.config.nEmbed + // qkv + proj
182
- 2 * this.config.nEmbed), o = this.config.nLayer * (this.config.mlpFactor * this.config.nEmbed * this.config.nEmbed + // fc
183
- this.config.nEmbed * this.config.mlpFactor * this.config.nEmbed), i = this.config.nEmbed;
184
- return t + e + o + i;
183
+ return W(this.config);
185
184
  }
186
185
  dispose() {
187
186
  this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
188
187
  }
189
188
  }
190
189
  export {
191
- x as default
190
+ K as default
192
191
  };
@@ -9,6 +9,9 @@ import { dummyPassAsync as h } from "./utilities/dummy.js";
9
9
  import g from "./tokeniser/CharTokeniser.js";
10
10
  import "./papaparse.min-C8l2Kvo1.js";
11
11
  import "./index-Tf7vU29b.js";
12
+ import "./jszip.min-CjP2V1VV.js";
13
+ import "./ops/scatterSub.js";
14
+ import "./ops/gatherSub.js";
12
15
  class a extends c {
13
16
  _config;
14
17
  _model;
@@ -0,0 +1,27 @@
1
+ import { o as t, c as s, b as n, E as m, C as r } from "./index-D1SlunD-.js";
2
+ /**
3
+ * @license
4
+ * Copyright 2020 Google LLC. All Rights Reserved.
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ * =============================================================================
17
+ */
18
+ function l(o, c) {
19
+ const a = s(o, "real", "complex"), e = s(c, "imag", "complex");
20
+ n(a.shape, e.shape, `real and imag shapes, ${a.shape} and ${e.shape}, must match in call to tf.complex().`);
21
+ const p = { real: a, imag: e };
22
+ return m.runKernel(r, p);
23
+ }
24
+ const i = /* @__PURE__ */ t({ complex_: l });
25
+ export {
26
+ i as c
27
+ };
@@ -0,0 +1 @@
1
+ export declare function loadDOCX(file: Blob | Uint8Array): Promise<string[]>;
@@ -0,0 +1,15 @@
1
+ import { j as a } from "../jszip.min-CjP2V1VV.js";
2
+ async function c(n) {
3
+ const t = await (await a.loadAsync(n)).file("word/document.xml")?.async("string");
4
+ if (!t) throw new Error("Failed to load document.xml");
5
+ return s(t).split(`
6
+ `).filter((r) => r.trim().length > 10);
7
+ }
8
+ function s(n) {
9
+ const t = new DOMParser().parseFromString(n, "application/xml");
10
+ return Array.from(t.getElementsByTagName("w:t")).map((r) => r.textContent).join(`
11
+ `);
12
+ }
13
+ export {
14
+ c as loadDOCX
15
+ };
@@ -1 +1 @@
1
- export declare function loadPDF(file: File, maxSize?: number): Promise<string[]>;
1
+ export declare function loadPDF(file: Blob | Uint8Array, maxSize?: number): Promise<string[]>;