@genai-fi/nanogpt 0.13.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,10 +36,10 @@ import c from "./tokeniser/CharTokeniser.js";
36
36
  import g from "./tokeniser/bpe.js";
37
37
  import "./papaparse.min-C0cScC2i.js";
38
38
  import "./jszip.min-Bz5-11Bk.js";
39
- import "./ops/cpu/scatterSub.js";
40
- import "./ops/webgl/scatterSub.js";
41
39
  import "./ops/cpu/gatherSub.js";
42
40
  import "./ops/webgl/gatherSub.js";
41
+ import "./ops/cpu/scatterSub.js";
42
+ import "./ops/webgl/scatterSub.js";
43
43
  import "./ops/cpu/matMulGelu.js";
44
44
  import "./matMulGelu-CoUYwB2k.js";
45
45
  import "./ops/grads/matMulGelu.js";
package/dist/main.d.ts CHANGED
@@ -14,7 +14,9 @@ export { default as BPETokeniser } from './tokeniser/bpe';
14
14
  export { default as waitForModel } from './utilities/waitForModel';
15
15
  export { default as loadTextData } from './data/textLoader';
16
16
  export { default as Generator } from './Generator';
17
+ export { default as Evaluator } from './training/Evaluator';
17
18
  export type { ITrainerOptions } from './Trainer';
19
+ export { default as Trainer } from './Trainer';
18
20
  export type { IGenerateOptions } from './Generator';
19
21
  export { type ModelForwardAttributes, default as Model } from './models/model';
20
22
  export type { ITokeniser, Conversation, Roles } from './tokeniser/type';
package/dist/main.js CHANGED
@@ -6,14 +6,16 @@ import "./index-Cp39cXWe.js";
6
6
  import "./dataset-BMe3pbsL.js";
7
7
  import { default as fo } from "./models/NanoGPTV1.js";
8
8
  import { default as lo } from "./TeachableLLM.js";
9
- import { default as co } from "./tokeniser/CharTokeniser.js";
9
+ import { default as uo } from "./tokeniser/CharTokeniser.js";
10
10
  import { default as ko } from "./tokeniser/bpe.js";
11
11
  import { default as go } from "./utilities/waitForModel.js";
12
12
  import { default as Co } from "./data/textLoader.js";
13
- import { default as Eo } from "./models/model.js";
14
- import { estimateMemoryUsage as Bo, estimateParameterCount as bo, estimateResources as yo, estimateTrainingMemoryUsage as Lo, validateConfig as So } from "./utilities/parameters.js";
15
- import { default as Ao } from "./utilities/topP.js";
16
- import { Task as Go, tokensFromTasks as Ro } from "./training/tasks/Task.js";
13
+ import { default as Mo } from "./training/Evaluator.js";
14
+ import { default as Bo } from "./Trainer.js";
15
+ import { default as vo } from "./models/model.js";
16
+ import { estimateMemoryUsage as Lo, estimateParameterCount as So, estimateResources as Ao, estimateTrainingMemoryUsage as Fo, validateConfig as Go } from "./utilities/parameters.js";
17
+ import { default as Uo } from "./utilities/topP.js";
18
+ import { Task as Do, tokensFromTasks as No } from "./training/tasks/Task.js";
17
19
  import o from "./training/tasks/PretrainingTask.js";
18
20
  import r from "./training/tasks/StartSentenceTask.js";
19
21
  import t from "./training/tasks/ConversationTask.js";
@@ -52,15 +54,15 @@ import "./matMul16-CH8D42Kx.js";
52
54
  import "./ops/webgl/matMul16.js";
53
55
  import "./ops/cpu/matMul16.js";
54
56
  import "./ops/transpose16.js";
55
- import { selectBackend as wo } from "./backend.js";
56
- import { default as No } from "./utilities/performance.js";
57
- import p from "./layers/CausalSelfAttention.js";
58
- import a from "./layers/MLP.js";
57
+ import { selectBackend as qo } from "./backend.js";
58
+ import { default as Ho } from "./utilities/performance.js";
59
+ import a from "./layers/CausalSelfAttention.js";
60
+ import p from "./layers/MLP.js";
59
61
  import i from "./layers/TransformerBlock.js";
60
62
  import s from "./layers/RoPECache.js";
61
- import { default as qo } from "./training/AdamExt.js";
62
- import { default as Ho } from "./checks/index.js";
63
- import { sentenceEmbeddings as Jo, sentenceEmbeddingsTensor as Ko } from "./utilities/sentences.js";
63
+ import { default as Jo } from "./training/AdamExt.js";
64
+ import { default as Oo } from "./checks/index.js";
65
+ import { sentenceEmbeddings as Vo, sentenceEmbeddingsTensor as Wo } from "./utilities/sentences.js";
64
66
  const to = {
65
67
  PretrainingTask: o,
66
68
  StartSentenceTask: r,
@@ -69,35 +71,37 @@ const to = {
69
71
  pack16: m,
70
72
  unpack16: e
71
73
  }, mo = {
72
- CausalSelfAttention: p,
73
- MLP: a,
74
+ CausalSelfAttention: a,
75
+ MLP: p,
74
76
  TransformerBlock: i,
75
77
  RoPECache: s
76
78
  };
77
79
  export {
78
- qo as AdamExt,
80
+ Jo as AdamExt,
79
81
  ko as BPETokeniser,
80
- co as CharTokeniser,
82
+ uo as CharTokeniser,
83
+ Mo as Evaluator,
81
84
  io as Generator,
82
- Eo as Model,
85
+ vo as Model,
83
86
  fo as NanoGPT,
84
- Go as Task,
87
+ Do as Task,
85
88
  lo as TeachableLLM,
86
- Ho as checks,
87
- Bo as estimateMemoryUsage,
88
- bo as estimateParameterCount,
89
- yo as estimateResources,
90
- Lo as estimateTrainingMemoryUsage,
89
+ Bo as Trainer,
90
+ Oo as checks,
91
+ Lo as estimateMemoryUsage,
92
+ So as estimateParameterCount,
93
+ Ao as estimateResources,
94
+ Fo as estimateTrainingMemoryUsage,
91
95
  mo as layers,
92
96
  Co as loadTextData,
93
97
  eo as ops,
94
- No as performanceTest,
95
- wo as selectBackend,
96
- Jo as sentenceEmbeddings,
97
- Ko as sentenceEmbeddingsTensor,
98
+ Ho as performanceTest,
99
+ qo as selectBackend,
100
+ Vo as sentenceEmbeddings,
101
+ Wo as sentenceEmbeddingsTensor,
98
102
  to as tasks,
99
- Ro as tokensFromTasks,
100
- Ao as topP,
101
- So as validateConfig,
103
+ No as tokensFromTasks,
104
+ Uo as topP,
105
+ Go as validateConfig,
102
106
  go as waitForModel
103
107
  };
@@ -52,19 +52,19 @@ class R {
52
52
  mixedPrecision: this._mixedPrecision
53
53
  },
54
54
  d
55
- ), s = T(e, l, this.maskedLoss);
55
+ ), t = T(e, l, this.maskedLoss);
56
56
  e.dispose();
57
- const g = s.mul(S(this.lossScaling));
58
- return s.dispose(), g;
59
- }, { value: t, grads: n } = L(o);
57
+ const g = t.mul(S(this.lossScaling));
58
+ return t.dispose(), g;
59
+ }, { value: s, grads: n } = L(o);
60
60
  if (a)
61
61
  this.model.getProfiler()?.endMemory("Training");
62
62
  else {
63
63
  this.optimizer.applyGradients(n);
64
64
  const e = Object.keys(n);
65
- this.model.weightStore.touchVariables(e), this.model.getProfiler()?.endMemory("Training"), c ? (i.gradients = n, Object.values(n).forEach((s) => P(s))) : m(n);
65
+ this.model.weightStore.touchVariables(e), this.model.getProfiler()?.endMemory("Training"), c ? (i.gradients = n, Object.values(n).forEach((t) => P(t))) : m(n);
66
66
  }
67
- return t.mul(S(1 / this.lossScaling));
67
+ return s.mul(S(1 / this.lossScaling));
68
68
  });
69
69
  }
70
70
  async dummyPass() {
@@ -99,13 +99,13 @@ class R {
99
99
  ...r
100
100
  }, d = Date.now(), l = this.createEmptyState();
101
101
  this.lastState = l, await this.dummyPass(), r?.advancedMetrics && (this.model.getProfiler() || this.model.setProfiler(new y())), this.running = !0, l.logStartTime = d;
102
- const o = a ? new u(this.model, a) : void 0, t = await i.iterator();
102
+ const o = a ? new u(this.model, a) : void 0, s = await i.iterator();
103
103
  try {
104
104
  for (; this.running; ) {
105
- const n = await t.next();
105
+ const n = await s.next();
106
106
  if (n.done) break;
107
- const e = n.value, s = this.trainStep(l, e, !1);
108
- e.xs.dispose(), e.ys.dispose(), l.step++, l.totalSteps++, l.step % c === 0 && await this.performLogging(s, e.xs.shape[0], r, o), s.dispose();
107
+ const e = n.value, t = this.trainStep(l, e, !1);
108
+ e.xs.dispose(), e.ys.dispose(), l.step++, l.totalSteps++, l.step % c === 0 && await this.performLogging(t, e.xs.shape[0], r, o), t.dispose();
109
109
  }
110
110
  } catch (n) {
111
111
  throw console.error("Training error:", n), m(), n;
@@ -116,45 +116,45 @@ class R {
116
116
  const { onStep: d } = {
117
117
  ...p,
118
118
  ...a
119
- }, l = a?.gradientMetrics || !1, o = (await i.data())[0], t = this.lastState;
120
- t.lastLoss = o;
119
+ }, l = a?.gradientMetrics || !1, o = (await i.data())[0], s = this.lastState;
120
+ s.lastLoss = o;
121
121
  const n = Date.now();
122
- t.trainingDuration += n - t.logStartTime;
122
+ s.trainingDuration += n - s.logStartTime;
123
123
  const e = {
124
- loss: t.lastLoss,
125
- step: t.step,
126
- time: Date.now() - t.logStartTime,
124
+ loss: s.lastLoss,
125
+ step: s.step,
126
+ time: Date.now() - s.logStartTime,
127
127
  batchSize: r,
128
128
  learningRate: a?.advancedMetrics ? this.optimizer.lr : void 0
129
129
  };
130
130
  if (this.model.trainingState = {
131
- steps: t.totalSteps,
131
+ steps: s.totalSteps,
132
132
  learningRate: this.optimizer.lr,
133
133
  batchSize: r,
134
- loss: t.lastLoss
135
- }, a?.gradientMetrics && l && t.gradients) {
136
- const s = /* @__PURE__ */ new Map();
137
- for (const [g, h] of Object.entries(t.gradients))
138
- s.set(g, await k(h)), h.dispose();
139
- e.gradientMetrics = s;
134
+ loss: s.lastLoss
135
+ }, a?.gradientMetrics && l && s.gradients) {
136
+ const t = /* @__PURE__ */ new Map();
137
+ for (const [g, h] of Object.entries(s.gradients))
138
+ t.set(g, await k(h)), h.dispose();
139
+ e.gradientMetrics = t;
140
140
  }
141
141
  if (c)
142
142
  try {
143
- const s = await c.evaluate(5);
144
- t.validationLosses.push(s), e.valLoss = s;
145
- } catch (s) {
146
- console.error("Validation error:", s);
143
+ const t = await c.evaluate(5);
144
+ Array.isArray(t) ? e.valLoss = t[0] : (s.validationLosses.push(t), e.valLoss = t);
145
+ } catch (t) {
146
+ console.error("Validation error:", t);
147
147
  }
148
148
  if (d) {
149
- const s = {
150
- duration: t.trainingDuration,
151
- totalSamples: t.totalSteps * e.batchSize,
152
- samplesPerSecond: t.totalSteps * e.batchSize / (t.trainingDuration / 1e3),
149
+ const t = {
150
+ duration: s.trainingDuration,
151
+ totalSamples: s.totalSteps * e.batchSize,
152
+ samplesPerSecond: s.totalSteps * e.batchSize / (s.trainingDuration / 1e3),
153
153
  memory: a?.advancedMetrics ? this.model.getProfiler()?.getPeakMemory() || 0 : void 0
154
154
  };
155
- await d(e, s);
155
+ await d(e, t);
156
156
  }
157
- t.logStartTime = Date.now();
157
+ s.logStartTime = Date.now();
158
158
  }
159
159
  async trainOnDataset(i, r, a) {
160
160
  const { logInterval: c, maxSteps: d } = {
@@ -162,13 +162,13 @@ class R {
162
162
  ...r
163
163
  }, l = Date.now(), o = this.createEmptyState();
164
164
  this.lastState = o, await this.dummyPass(), r?.advancedMetrics && (this.model.getProfiler() || this.model.setProfiler(new y())), this.running = !0, o.logStartTime = l;
165
- const t = a ? new u(this.model, a) : void 0, n = await i.iterator();
165
+ const s = a ? new u(this.model, a) : void 0, n = await i.iterator();
166
166
  try {
167
167
  for (; this.running; ) {
168
168
  const e = await n.next();
169
169
  if (e.done) break;
170
- const s = e.value, g = o.step % c === 0, h = (r?.gradientMetrics || !1) && g, f = this.trainStep(o, s, !1, h);
171
- s.xs.dispose(), s.ys.dispose(), o.step++, o.totalSteps++, g && await this.performLogging(f, s.xs.shape[0], r, t), f.dispose(), o.step >= d && this.stop();
170
+ const t = e.value, g = o.step % c === 0, h = (r?.gradientMetrics || !1) && g, f = this.trainStep(o, t, !1, h);
171
+ t.xs.dispose(), t.ys.dispose(), o.step++, o.totalSteps++, g && await this.performLogging(f, t.xs.shape[0], r, s), f.dispose(), o.step >= d && this.stop();
172
172
  }
173
173
  } catch (e) {
174
174
  throw console.error("Training error:", e), m(), e;
@@ -1,9 +1,14 @@
1
1
  import { Dataset } from '@tensorflow/tfjs-data';
2
2
  import { TensorContainer } from '@tensorflow/tfjs-core';
3
3
  import { default as Model, ModelForwardAttributes } from '../models/model';
4
+ import { Conversation, ITokeniser } from '../main';
4
5
  export default class Evaluator {
5
6
  private model;
6
- private iterator;
7
- constructor(model: Model<ModelForwardAttributes>, dataset: Dataset<TensorContainer>);
8
- evaluate(maxBatches?: number): Promise<number>;
7
+ private iterator?;
8
+ private xs?;
9
+ private ys?;
10
+ constructor(model: Model<ModelForwardAttributes>, dataset: Dataset<TensorContainer> | Conversation[][], tokeniser?: ITokeniser);
11
+ dispose(): void;
12
+ private calculateBatchLoss;
13
+ evaluate(maxBatches?: number): Promise<number | number[]>;
9
14
  }
@@ -1,23 +1,47 @@
1
- import { calculateLoss as h } from "./loss.js";
2
- class m {
3
- constructor(s, t) {
4
- this.model = s, this.iterator = t.iterator();
1
+ import "../index-twYeuV3_.js";
2
+ import { calculateLoss as u } from "./loss.js";
3
+ import { buildSFTExample as p } from "./SFTDatasetBuilder.js";
4
+ import { t as c } from "../tensor-CO6h2H2F.js";
5
+ class b {
6
+ constructor(i, t, a) {
7
+ if (this.model = i, Array.isArray(t)) {
8
+ if (!a)
9
+ throw new Error("Tokeniser is required when dataset is an array of conversations");
10
+ const o = t.map((s) => p(s, -100, a, i.config.blockSize)).filter((s) => s !== null);
11
+ if (o.length === 0)
12
+ return;
13
+ this.xs = c(o.map((s) => s.xs)), this.ys = c(o.map((s) => s.ys));
14
+ } else
15
+ this.iterator = t.iterator();
5
16
  }
6
17
  iterator;
7
- async evaluate(s = 100) {
8
- let t = 0, o = 0;
9
- const n = await this.iterator;
10
- for (let a = 0; a < s; a++) {
11
- const e = await n.next();
12
- if (e.done) break;
13
- const u = e.value, { xs: r, ys: i } = u, l = this.model.forward({ training: !1 }, r), c = h(l, i);
14
- l.dispose(), r.dispose(), i.dispose();
15
- const d = await c.array();
16
- c.dispose(), t += d, o++;
17
- }
18
- return t / o;
18
+ xs;
19
+ ys;
20
+ dispose() {
21
+ this.xs && this.xs.dispose(), this.ys && this.ys.dispose();
22
+ }
23
+ async calculateBatchLoss(i, t, a, o) {
24
+ const s = this.model.forward({ training: !1 }, i), r = u(s, t, o, a);
25
+ s.dispose();
26
+ const e = await r.array();
27
+ return r.dispose(), e;
28
+ }
29
+ async evaluate(i = 100) {
30
+ let t = 0, a = 0;
31
+ if (this.iterator) {
32
+ const o = await this.iterator;
33
+ for (let s = 0; s < i; s++) {
34
+ const r = await o.next();
35
+ if (r.done) break;
36
+ const l = r.value, { xs: e, ys: n } = l, h = this.model.forward({ training: !1 }, e), f = await this.calculateBatchLoss(h, n, !1, !1);
37
+ e.dispose(), n.dispose(), t += f, a++;
38
+ }
39
+ return t / a;
40
+ } else if (this.xs && this.ys)
41
+ return this.calculateBatchLoss(this.xs, this.ys, !0, !0);
42
+ throw new Error("No data available for evaluation");
19
43
  }
20
44
  }
21
45
  export {
22
- m as default
46
+ b as default
23
47
  };
@@ -1,7 +1,11 @@
1
1
  import { Tensor } from '@tensorflow/tfjs-core';
2
- import { ITokeniser } from '../tokeniser/type';
2
+ import { Conversation, ITokeniser } from '../tokeniser/type';
3
3
  import { Dataset } from '@tensorflow/tfjs-data';
4
4
  import { Task } from './tasks/Task';
5
+ export declare function buildSFTExample(conversation: Conversation[], ignoreIndex: number, tokenizer: ITokeniser, blockSize: number): {
6
+ xs: Int32Array;
7
+ ys: Int32Array;
8
+ } | null;
5
9
  export declare class SFTDatasetBuilder {
6
10
  tokenizer: ITokeniser;
7
11
  blockSize: number;
@@ -1,41 +1,44 @@
1
1
  import { t as x } from "../index-twYeuV3_.js";
2
2
  import "../dataset-BMe3pbsL.js";
3
- import { g as y } from "../readers-C_41Nuv3.js";
3
+ import { g as I } from "../readers-C_41Nuv3.js";
4
4
  import "../index-Cp39cXWe.js";
5
- function I(u, a, t, c) {
6
- const s = [t.bosToken], n = [!1], d = {
5
+ function w(u, a, t, l) {
6
+ const s = [t.bosToken], n = [!1], f = {
7
7
  user: t.getSpecialTokenIndex("<|user_start|>"),
8
8
  assistant: t.getSpecialTokenIndex("<|assistant_start|>"),
9
9
  system: t.getSpecialTokenIndex("<|system_start|>")
10
- }, r = {
10
+ }, i = {
11
11
  user: t.getSpecialTokenIndex("<|user_end|>"),
12
12
  assistant: t.getSpecialTokenIndex("<|assistant_end|>"),
13
13
  system: t.getSpecialTokenIndex("<|system_end|>")
14
14
  };
15
15
  for (const e of u) {
16
- const p = d[e.role], l = r[e.role];
17
- if (p == null || l == null)
16
+ const c = f[e.role], h = i[e.role];
17
+ if (c == null || h == null)
18
18
  throw new Error(`Missing special tokens for role: ${e.role}`);
19
- s.push(p), n.push(!1);
20
- const T = t.encode(e.content);
21
- for (const g of T) {
22
- s.push(g);
23
- const S = t.isSpecialToken(g), k = e.role === "assistant";
24
- n.push(k && !S);
19
+ s.push(c), n.push(!1);
20
+ const k = t.encode(e.content);
21
+ for (const m of k) {
22
+ s.push(m);
23
+ const S = t.isSpecialToken(m), y = e.role === "assistant";
24
+ n.push(y && !S);
25
25
  }
26
- s.push(l), n.push(!1);
26
+ s.push(h), n.push(!1);
27
27
  }
28
28
  s.push(t.eosToken), n.push(!1);
29
- const o = c + 1;
29
+ const o = l + 1;
30
30
  if (s.length < o) {
31
- const e = o - s.length, p = t.getSpecialTokenIndex("<pad>");
32
- for (let l = 0; l < e; l++)
33
- s.push(p), n.push(!1);
31
+ const e = o - s.length, c = t.getSpecialTokenIndex("<pad>");
32
+ for (let h = 0; h < e; h++)
33
+ s.push(c), n.push(!1);
34
34
  } else s.length > o && (s.length = o, n.length = o);
35
- const h = new Int32Array(s.slice(0, c)), i = s.slice(1, c + 1), m = n.slice(1, c + 1), f = new Int32Array(i.length);
36
- for (let e = 0; e < i.length; e++)
37
- f[e] = m[e] ? i[e] : a;
38
- return { xs: h, ys: f };
35
+ const p = new Int32Array(s.slice(0, l)), r = s.slice(1, l + 1), T = n.slice(1, l + 1), d = new Int32Array(r.length);
36
+ let g = !1;
37
+ for (let e = 0; e < r.length; e++) {
38
+ const c = T[e] ? r[e] : a;
39
+ d[e] = c, c !== a && (g = !0);
40
+ }
41
+ return g ? { xs: p, ys: d } : null;
39
42
  }
40
43
  class A {
41
44
  tokenizer;
@@ -49,17 +52,17 @@ class A {
49
52
  * - Pads with eosToken and masks padding.
50
53
  * - Masks non-assistant tokens in labels with ignoreIndex (default -100).
51
54
  */
52
- async createSFTDataset(a, t = 32, c = -100) {
55
+ async createSFTDataset(a, t = 32, l = -100) {
53
56
  if (!a.length)
54
57
  throw new Error("No conversations provided.");
55
58
  const s = this.tokenizer, n = this.blockSize;
56
- return y(function* () {
59
+ return I(function* () {
57
60
  for (; ; ) {
58
- const r = Math.floor(Math.random() * a.length), h = a[r].getRandomConversation();
59
- yield I(h, c, s, n);
61
+ const i = Math.floor(Math.random() * a.length), p = a[i].getRandomConversation(), r = w(p, l, s, n);
62
+ r && (yield r);
60
63
  }
61
- }).batch(t).map((r) => {
62
- const o = r;
64
+ }).batch(t).map((i) => {
65
+ const o = i;
63
66
  return x(() => ({
64
67
  xs: o.xs.cast("int32"),
65
68
  ys: o.ys.cast("int32")
@@ -68,5 +71,6 @@ class A {
68
71
  }
69
72
  }
70
73
  export {
71
- A as SFTDatasetBuilder
74
+ A as SFTDatasetBuilder,
75
+ w as buildSFTExample
72
76
  };
@@ -1,2 +1,2 @@
1
1
  import { Tensor } from '@tensorflow/tfjs-core';
2
- export declare function calculateLoss(logits: Tensor, targets: Tensor, masked?: boolean): Tensor;
2
+ export declare function calculateLoss(logits: Tensor, targets: Tensor, masked?: boolean, keepBatch?: boolean): Tensor;
@@ -1,11 +1,11 @@
1
- import { createSoftmaxCrossEntropyWithGrad as n } from "./sparseCrossEntropy.js";
2
- function l(r, s, t) {
1
+ import { createSoftmaxCrossEntropyWithGrad as c } from "./sparseCrossEntropy.js";
2
+ function a(r, s, t, n) {
3
3
  try {
4
- return n(t)(r, s);
4
+ return c(t, n)(r, s);
5
5
  } catch (o) {
6
6
  throw console.error("Error computing loss:", o), new Error(`Loss computation failed: ${o}`);
7
7
  }
8
8
  }
9
9
  export {
10
- l as calculateLoss
10
+ a as calculateLoss
11
11
  };
@@ -3,5 +3,5 @@ import * as tf from '@tensorflow/tfjs-core';
3
3
  * Numerically stable sparse cross-entropy with gradient support
4
4
  * This version handles potential numerical issues better
5
5
  */
6
- export declare function sparseSoftmaxCrossEntropy(logits: tf.Tensor, labels: tf.Tensor, validMask?: tf.Tensor): tf.Tensor;
7
- export declare function createSoftmaxCrossEntropyWithGrad(masked?: boolean): (...args: tf.Tensor[]) => tf.Tensor<tf.Rank>;
6
+ export declare function sparseSoftmaxCrossEntropy(logits: tf.Tensor, labels: tf.Tensor, validMask?: tf.Tensor, keepBatch?: boolean, originalBatchShape?: number[]): tf.Tensor;
7
+ export declare function createSoftmaxCrossEntropyWithGrad(masked?: boolean, keepBatch?: boolean): (...args: tf.Tensor[]) => tf.Tensor<tf.Rank>;
@@ -1,42 +1,46 @@
1
- import { gatherSub as T } from "../ops/gatherSub.js";
2
- import { scatterSub as j } from "../ops/scatterSub.js";
3
- import { t as L, c as k, m as C, j as q, a1 as B, a as y, a2 as v } from "../index-twYeuV3_.js";
4
- import { m as F, l as P, a as V, n as W, w as $ } from "../not_equal-DXJHGhGS.js";
5
- import { s as b } from "../sum-CgGUPVhu.js";
6
- import { s as A } from "../softmax-DpG1TdjZ.js";
7
- function D(a, p, d) {
8
- return L(() => {
9
- const o = a.shape[a.shape.length - 1], h = a.shape.slice(0, -1).reduce((t, S) => t * S, 1), i = a.shape.length > 2 ? a.reshape([h, o]) : a, g = p.shape.length > 1 ? p.reshape([h]).cast("int32") : p.cast("int32"), l = F(i, -1, !0), r = k(i, l), n = P(r, -1);
10
- let s = T(n, g, r);
11
- if (d) {
12
- s = C(s, d);
13
- const t = b(d);
14
- s = q(b(s), t);
15
- } else
16
- s = V(s);
1
+ import { gatherSub as P } from "../ops/gatherSub.js";
2
+ import { scatterSub as V } from "../ops/scatterSub.js";
3
+ import { t as z, c as W, m as w, j as C, a1 as $, a as L, a2 as E } from "../index-twYeuV3_.js";
4
+ import { m as A, l as D, a as G, n as H, w as J } from "../not_equal-DXJHGhGS.js";
5
+ import { s as m } from "../sum-CgGUPVhu.js";
6
+ import { s as K } from "../softmax-DpG1TdjZ.js";
7
+ function N(e, d, h, x, a) {
8
+ return z(() => {
9
+ const u = e.shape[e.shape.length - 1], c = a || e.shape.slice(0, -1), f = c.reduce((n, y) => n * y, 1), i = e.shape.length > 2 ? e.reshape([f, u]) : e, S = d.shape.length > 1 ? d.reshape([f]).cast("int32") : d.cast("int32"), p = A(i, -1, !0), t = W(i, p), r = D(t, -1);
10
+ let s = P(r, S, t);
11
+ if (h)
12
+ if (s = w(s, h), x) {
13
+ const n = m(h.reshape(c), -1);
14
+ s = C(m(s.reshape(c), -1), n);
15
+ } else {
16
+ const n = m(h);
17
+ s = C(m(s), n);
18
+ }
19
+ else
20
+ x ? s = G(s.reshape(c), -1) : s = G(s);
17
21
  return s;
18
22
  });
19
23
  }
20
- function Y(a) {
21
- return B(
24
+ function k(e, d) {
25
+ return $(
22
26
  // @ts-expect-error Invalid params
23
- (o, u, h) => {
24
- const i = o.shape[o.shape.length - 1], l = o.shape.slice(0, -1).reduce((c, e) => c * e, 1), r = o.reshape([l, i]), n = u.reshape([l]).cast("int32");
25
- let s, t = null;
26
- if (a) {
27
- const c = y(-100, "int32"), e = W(n, c);
28
- t = e.cast("float32"), s = $(e, n, v(n));
27
+ (a, u, c) => {
28
+ const f = a.shape[a.shape.length - 1], i = a.shape.slice(0, -1), S = i.reduce((l, o) => l * o, 1), p = a.reshape([S, f]), t = u.reshape([S]).cast("int32");
29
+ let r, s = null;
30
+ if (e) {
31
+ const l = L(-100, "int32"), o = H(t, l);
32
+ s = o.cast("float32"), r = J(o, t, E(t));
29
33
  } else
30
- s = n;
31
- const S = D(r, s, t || void 0);
32
- return h(t ? [r, s, t] : [r, s]), r.dispose(), n.dispose(), { value: S, gradFunc: (c, e) => L(() => {
33
- const f = e[0], E = e[1], m = a ? e[2] : void 0, G = A(f), z = m ? b(m) : y(f.shape[0], "float32"), x = c.div(z).broadcastTo([f.shape[0]]), w = m && a ? C(x, m) : x, I = j(G, E, w), M = v(u);
34
- return [I.reshape(o.shape), M];
34
+ r = t;
35
+ const n = N(p, r, s || void 0, d, i);
36
+ return c(s ? [p, r, s] : [p, r]), p.dispose(), t.dispose(), { value: n, gradFunc: (l, o) => z(() => {
37
+ const b = o[0], I = o[1], g = e ? o[2] : void 0, T = K(b), j = g ? m(g) : L(b.shape[0], "float32"), v = l.div(j).broadcastTo([b.shape[0]]), q = g && e ? w(v, g) : v, F = V(T, I, q), M = E(u);
38
+ return [F.reshape(a.shape), M];
35
39
  }) };
36
40
  }
37
41
  );
38
42
  }
39
43
  export {
40
- Y as createSoftmaxCrossEntropyWithGrad,
41
- D as sparseSoftmaxCrossEntropy
44
+ k as createSoftmaxCrossEntropyWithGrad,
45
+ N as sparseSoftmaxCrossEntropy
42
46
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@genai-fi/nanogpt",
3
- "version": "0.13.0",
3
+ "version": "0.13.1",
4
4
  "type": "module",
5
5
  "main": "dist/main.js",
6
6
  "types": "dist/main.d.ts",