npm - @genai-fi/nanogpt - Versions diffs - 0.2.5 → 0.2.7 - Mend

@genai-fi/nanogpt 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/NanoGPTModel.js +43 -44
package/dist/TeachableLLM.js +2 -0
package/dist/complex-D6Bq1XDf.js +27 -0
package/dist/{index-DcaSvB38.js → index-D1SlunD-.js} +553 -522
package/dist/layers/TiedEmbedding.js +113 -178
package/dist/main.d.ts +2 -0
package/dist/main.js +18 -10
package/dist/ops/gatherSub.d.ts +2 -0
package/dist/ops/gatherSub.js +66 -0
package/dist/ops/node/sparseCrossEntropy.d.ts +1 -0
package/dist/ops/node/sparseCrossEntropy.js +11 -0
package/dist/ops/scatterSub.d.ts +2 -0
package/dist/ops/scatterSub.js +150 -0
package/dist/stack-DB2YLlAs.js +50 -0
package/dist/sum-02UQ5Eaq.js +49 -0
package/dist/tokeniser/CharTokeniser.d.ts +1 -0
package/dist/tokeniser/CharTokeniser.js +48 -39
package/dist/training/AdamExt.js +1 -1
package/dist/training/DatasetBuilder.js +3 -2
package/dist/training/Trainer.js +3 -3
package/dist/training/sparseCrossEntropy.d.ts +11 -0
package/dist/training/sparseCrossEntropy.js +177 -0
package/dist/utilities/parameters.d.ts +10 -0
package/dist/utilities/parameters.js +52 -0
package/package.json +3 -2

package/dist/layers/TiedEmbedding.js CHANGED Viewed

@@ -1,4 +1,6 @@
-import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee, f as te, g as se, h as ne, I as re, L as ae, N as ue, P as ie, i as oe, j as le, k as ce, l as pe, n as B, t as he, T as L, m as fe, p as me, q as de, r as q, u as P, v as U, _ as H } from "../index-DcaSvB38.js";
+import { o as h, c as u, x as B, E as c, B as V, y as X, D as Y, I as Z, F as ee, N as te, H as se, J as ne, K as re, O as ae, Q as ue, f as L, w as ie, T as A, m as oe, U as le, t as ce, k as C, V as P, v as U, _ as H } from "../index-D1SlunD-.js";
+import { s as pe, r as f } from "../sum-02UQ5Eaq.js";
+import { c as he } from "../complex-D6Bq1XDf.js";
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -15,57 +17,13 @@ import { o as p, c as a, b as V, E as u, C as X, R as Y, d as A, B as Z, S as ee
  * limitations under the License.
  * =============================================================================
  */
-function ge(t, e) {
-  const s = a(t, "real", "complex"), n = a(e, "imag", "complex");
-  V(s.shape, n.shape, `real and imag shapes, ${s.shape} and ${n.shape}, must match in call to tf.complex().`);
-  const r = { real: s, imag: n };
-  return u.runKernel(X, r);
+function fe(t, e, s = !1, n = !1) {
+  let r = u(t, "a", "matMul"), i = u(e, "b", "matMul");
+  [r, i] = B(r, i);
+  const o = { a: r, b: i }, p = { transposeA: s, transposeB: n };
+  return c.runKernel(V, o, p);
 }
-const $e = /* @__PURE__ */ p({ complex_: ge });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function xe(t, e) {
-  const n = { x: a(t, "x", "reshape", "string_or_numeric") }, r = { shape: e };
-  return u.runKernel(Y, n, r);
-}
-const f = /* @__PURE__ */ p({ reshape_: xe });
-/**
- * @license
- * Copyright 2020 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function ke(t, e, s = !1, n = !1) {
-  let r = a(t, "a", "matMul"), i = a(e, "b", "matMul");
-  [r, i] = A(r, i);
-  const l = { a: r, b: i }, h = { transposeA: s, transposeB: n };
-  return u.runKernel(Z, l, h);
-}
-const d = /* @__PURE__ */ p({ matMul_: ke });
+const m = /* @__PURE__ */ h({ matMul_: fe });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -82,11 +40,11 @@ const d = /* @__PURE__ */ p({ matMul_: ke });
  * limitations under the License.
  * =============================================================================
  */
-function be(t) {
-  const s = { x: a(t, "x", "sigmoid", "float32") };
-  return u.runKernel(ee, s);
+function de(t) {
+  const s = { x: u(t, "x", "sigmoid", "float32") };
+  return c.runKernel(X, s);
 }
-const De = /* @__PURE__ */ p({ sigmoid_: be });
+const me = /* @__PURE__ */ h({ sigmoid_: de });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -103,34 +61,11 @@ const De = /* @__PURE__ */ p({ sigmoid_: be });
  * limitations under the License.
  * =============================================================================
  */
-function Se(t) {
-  const s = { x: a(t, "x", "elu", "float32") };
-  return u.runKernel(te, s);
-}
-const ye = /* @__PURE__ */ p({ elu_: Se });
-/**
- * @license
- * Copyright 2018 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-function _e(t, e = null, s = !1) {
-  let n = a(t, "x", "sum");
-  n.dtype === "bool" && (n = se(n, "int32"));
-  const r = { x: n }, i = { axis: e, keepDims: s };
-  return u.runKernel(ne, r, i);
+function ge(t) {
+  const s = { x: u(t, "x", "elu", "float32") };
+  return c.runKernel(Y, s);
 }
-const Me = /* @__PURE__ */ p({ sum_: _e });
+const $e = /* @__PURE__ */ h({ elu_: ge });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -147,11 +82,11 @@ const Me = /* @__PURE__ */ p({ sum_: _e });
  * limitations under the License.
  * =============================================================================
  */
-function Ke(t) {
-  const s = { input: a(t, "input", "imag") };
-  return u.runKernel(re, s);
+function xe(t) {
+  const s = { input: u(t, "input", "imag") };
+  return c.runKernel(Z, s);
 }
-const we = /* @__PURE__ */ p({ imag_: Ke });
+const ke = /* @__PURE__ */ h({ imag_: xe });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -168,11 +103,11 @@ const we = /* @__PURE__ */ p({ imag_: Ke });
  * limitations under the License.
  * =============================================================================
  */
-function We(t, e = 0.2) {
-  const n = { x: a(t, "x", "leakyRelu") }, r = { alpha: e };
-  return u.runKernel(ae, n, r);
+function De(t, e = 0.2) {
+  const n = { x: u(t, "x", "leakyRelu") }, r = { alpha: e };
+  return c.runKernel(ee, n, r);
 }
-const ze = /* @__PURE__ */ p({ leakyRelu_: We });
+const be = /* @__PURE__ */ h({ leakyRelu_: De });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -189,11 +124,11 @@ const ze = /* @__PURE__ */ p({ leakyRelu_: We });
  * limitations under the License.
  * =============================================================================
  */
-function Ee(t) {
-  const s = { x: a(t, "x", "neg") };
-  return u.runKernel(ue, s);
+function ye(t) {
+  const s = { x: u(t, "x", "neg") };
+  return c.runKernel(te, s);
 }
-const Oe = /* @__PURE__ */ p({ neg_: Ee });
+const Se = /* @__PURE__ */ h({ neg_: ye });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -210,11 +145,11 @@ const Oe = /* @__PURE__ */ p({ neg_: Ee });
  * limitations under the License.
  * =============================================================================
  */
-function Re(t, e) {
-  const s = a(t, "x", "prelu"), n = a(e, "alpha", "prelu"), r = { x: s, alpha: n };
-  return u.runKernel(ie, r);
+function Me(t, e) {
+  const s = u(t, "x", "prelu"), n = u(e, "alpha", "prelu"), r = { x: s, alpha: n };
+  return c.runKernel(se, r);
 }
-const Fe = /* @__PURE__ */ p({ prelu_: Re });
+const Ke = /* @__PURE__ */ h({ prelu_: Me });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -231,11 +166,11 @@ const Fe = /* @__PURE__ */ p({ prelu_: Re });
  * limitations under the License.
  * =============================================================================
  */
-function Le(t) {
-  const s = { input: a(t, "input", "real") };
-  return u.runKernel(oe, s);
+function _e(t) {
+  const s = { input: u(t, "input", "real") };
+  return c.runKernel(ne, s);
 }
-const Ae = /* @__PURE__ */ p({ real_: Le });
+const we = /* @__PURE__ */ h({ real_: _e });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -252,11 +187,11 @@ const Ae = /* @__PURE__ */ p({ real_: Le });
  * limitations under the License.
  * =============================================================================
  */
-function Be(t) {
-  const s = { x: a(t, "x", "relu") };
-  return u.runKernel(le, s);
+function We(t) {
+  const s = { x: u(t, "x", "relu") };
+  return c.runKernel(re, s);
 }
-const Te = /* @__PURE__ */ p({ relu_: Be });
+const ze = /* @__PURE__ */ h({ relu_: We });
 /**
  * @license
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -273,11 +208,11 @@ const Te = /* @__PURE__ */ p({ relu_: Be });
  * limitations under the License.
  * =============================================================================
  */
-function Ne(t) {
-  const s = { x: a(t, "x", "relu6") };
-  return u.runKernel(ce, s);
+function Ee(t) {
+  const s = { x: u(t, "x", "relu6") };
+  return c.runKernel(ae, s);
 }
-const ve = /* @__PURE__ */ p({ relu6_: Ne });
+const Oe = /* @__PURE__ */ h({ relu6_: Ee });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -294,11 +229,11 @@ const ve = /* @__PURE__ */ p({ relu6_: Ne });
  * limitations under the License.
  * =============================================================================
  */
-function Ce(t, e = 0) {
-  const n = { x: a(t, "x", "step") }, r = { alpha: e };
-  return u.runKernel(pe, n, r);
+function Fe(t, e = 0) {
+  const n = { x: u(t, "x", "step") }, r = { alpha: e };
+  return c.runKernel(ue, n, r);
 }
-const Ge = /* @__PURE__ */ p({ step_: Ce });
+const Re = /* @__PURE__ */ h({ step_: Fe });
 /**
  * @license
  * Copyright 2018 Google LLC. All Rights Reserved.
@@ -315,19 +250,19 @@ const Ge = /* @__PURE__ */ p({ step_: Ce });
  * limitations under the License.
  * =============================================================================
  */
-function Ie(t, e, s) {
-  const n = a(t, "x", "transpose");
-  if (e == null && (e = n.shape.map((l, h) => h).reverse()), B(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((l) => {
-    B(l >= 0 && l < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
+function Ae(t, e, s) {
+  const n = u(t, "x", "transpose");
+  if (e == null && (e = n.shape.map((o, p) => p).reverse()), L(n.rank === e.length, () => `Error in transpose: rank of input ${n.rank} must match length of perm ${e}.`), e.forEach((o) => {
+    L(o >= 0 && o < n.rank, () => `All entries in 'perm' must be between 0 and ${n.rank - 1} but got ${e}`);
   }), n.rank <= 1)
     return n.clone();
   const r = { x: n }, i = { perm: e };
-  return n.dtype === "complex64" ? he(() => {
-    let l = Ae(n), h = we(n);
-    return l = u.runKernel(L, { x: l }, i), h = u.runKernel(L, { x: h }, i), s && (h = Oe(h)), $e(l, h);
-  }) : u.runKernel(L, r, i);
+  return n.dtype === "complex64" ? ie(() => {
+    let o = we(n), p = ke(n);
+    return o = c.runKernel(A, { x: o }, i), p = c.runKernel(A, { x: p }, i), s && (p = Se(p)), he(o, p);
+  }) : c.runKernel(A, r, i);
 }
-const je = /* @__PURE__ */ p({ transpose_: Ie });
+const Be = /* @__PURE__ */ h({ transpose_: Ae });
 /**
  * @license
  * Copyright 2019 Google LLC. All Rights Reserved.
@@ -344,36 +279,36 @@ const je = /* @__PURE__ */ p({ transpose_: Ie });
  * limitations under the License.
  * =============================================================================
  */
-function qe(t, e, s) {
+function Le(t, e, s) {
   if (s == null || s === "linear")
     return t;
   if (s === "relu")
-    return fe(t, Ge(e));
+    return oe(t, Re(e));
   throw new Error(`Cannot compute gradient for fused activation ${s}.`);
 }
-function Pe(t, e) {
+function Te(t, e) {
   let s = e;
-  const n = me(t.shape, e.shape);
-  return n.length > 0 && (s = Me(s, n)), f(s, t.shape);
+  const n = le(t.shape, e.shape);
+  return n.length > 0 && (s = pe(s, n)), f(s, t.shape);
 }
-function Ue(t, e, s, n) {
+function Ne(t, e, s, n) {
   if (e === "linear")
     return t;
   if (e === "relu")
-    return Te(t);
+    return ze(t);
   if (e === "elu")
-    return ye(t);
+    return $e(t);
   if (e === "relu6")
-    return ve(t);
+    return Oe(t);
   if (e === "prelu")
-    return Fe(t, s);
+    return Ke(t, s);
   if (e === "leakyrelu")
-    return ze(t, n);
+    return be(t, n);
   if (e === "sigmoid")
-    return De(t);
+    return me(t);
   throw new Error(`Unknown fused activation ${e}.`);
 }
-const He = (t, e) => !(t > 0) || e === "linear";
+const ve = (t, e) => !(t > 0) || e === "linear";
 /**
  * @license
  * Copyright 2019 Google LLC. All Rights Reserved.
@@ -390,49 +325,49 @@ const He = (t, e) => !(t > 0) || e === "linear";
  * limitations under the License.
  * =============================================================================
  */
-function Je({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: l, leakyreluAlpha: h = 0.2 }) {
-  if (He(u.state.gradientDepth, i) === !1) {
-    let x = d(t, e, s, n);
-    return r != null && (x = de(x, r)), Ue(x, i, l, h);
+function Ge({ a: t, b: e, transposeA: s = !1, transposeB: n = !1, bias: r, activation: i = "linear", preluActivationWeights: o, leakyreluAlpha: p = 0.2 }) {
+  if (ve(c.state.gradientDepth, i) === !1) {
+    let x = m(t, e, s, n);
+    return r != null && (x = ce(x, r)), Ne(x, i, o, p);
   }
-  let o = a(t, "a", "fused matMul"), c = a(e, "b", "fused matMul");
-  [o, c] = A(o, c);
-  const b = s ? o.shape[o.rank - 2] : o.shape[o.rank - 1], D = n ? c.shape[c.rank - 1] : c.shape[c.rank - 2], w = s ? o.shape[o.rank - 1] : o.shape[o.rank - 2], W = n ? c.shape[c.rank - 2] : c.shape[c.rank - 1], T = o.shape.slice(0, -2), S = c.shape.slice(0, -2), N = q(T), v = q(S);
-  B(b === D, () => `Error in fused matMul: inner shapes (${b}) and (${D}) of Tensors with shapes ${o.shape} and ${c.shape} and transposeA=${s} and transposeB=${n} must match.`);
-  const O = P(o.shape.slice(0, -2), c.shape.slice(0, -2)).concat([w, W]), R = s ? f(o, [N, b, w]) : f(o, [N, w, b]), F = n ? f(c, [v, W, D]) : f(c, [v, D, W]);
-  let y;
-  r != null && (y = a(r, "bias", "fused matMul"), [y] = A(y, o), P(O, y.shape));
-  let C;
-  l != null && (C = a(l, "prelu weights", "fused matMul"));
-  const G = (x, K) => {
-    const [g, $, k, z] = K, m = qe(f(x, k.shape), k, i);
-    let _, M;
-    if (!s && !n ? (_ = d(m, $, !1, !0), M = d(g, m, !0, !1)) : !s && n ? (_ = d(m, $, !1, !1), M = d(m, g, !0, !1)) : s && !n ? (_ = d($, m, !1, !0), M = d(g, m, !1, !1)) : (_ = d($, m, !0, !0), M = d(m, g, !0, !0)), r != null) {
-      const Q = Pe(z, m);
-      return [_, M, Q];
+  let a = u(t, "a", "fused matMul"), l = u(e, "b", "fused matMul");
+  [a, l] = B(a, l);
+  const D = s ? a.shape[a.rank - 2] : a.shape[a.rank - 1], b = n ? l.shape[l.rank - 1] : l.shape[l.rank - 2], w = s ? a.shape[a.rank - 1] : a.shape[a.rank - 2], W = n ? l.shape[l.rank - 2] : l.shape[l.rank - 1], T = a.shape.slice(0, -2), y = l.shape.slice(0, -2), N = C(T), v = C(y);
+  L(D === b, () => `Error in fused matMul: inner shapes (${D}) and (${b}) of Tensors with shapes ${a.shape} and ${l.shape} and transposeA=${s} and transposeB=${n} must match.`);
+  const O = P(a.shape.slice(0, -2), l.shape.slice(0, -2)).concat([w, W]), F = s ? f(a, [N, D, w]) : f(a, [N, w, D]), R = n ? f(l, [v, W, b]) : f(l, [v, b, W]);
+  let S;
+  r != null && (S = u(r, "bias", "fused matMul"), [S] = B(S, a), P(O, S.shape));
+  let G;
+  o != null && (G = u(o, "prelu weights", "fused matMul"));
+  const I = (x, _) => {
+    const [g, $, k, z] = _, d = Le(f(x, k.shape), k, i);
+    let M, K;
+    if (!s && !n ? (M = m(d, $, !1, !0), K = m(g, d, !0, !1)) : !s && n ? (M = m(d, $, !1, !1), K = m(d, g, !0, !1)) : s && !n ? (M = m($, d, !1, !0), K = m(g, d, !1, !1)) : (M = m($, d, !0, !0), K = m(d, g, !0, !0)), r != null) {
+      const Q = Te(z, d);
+      return [M, K, Q];
     } else
-      return [_, M];
-  }, I = {
-    a: R,
-    b: F,
-    bias: y,
-    preluActivationWeights: C
-  }, j = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: h };
-  return r == null ? U((K, g, $) => {
+      return [M, K];
+  }, j = {
+    a: F,
+    b: R,
+    bias: S,
+    preluActivationWeights: G
+  }, q = { transposeA: s, transposeB: n, activation: i, leakyreluAlpha: p };
+  return r == null ? U((_, g, $) => {
     const k = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      u.runKernel(H, I, j)
+      c.runKernel(H, j, q)
     );
-    return $([K, g, k]), { value: f(k, O), gradFunc: G };
-  })(R, F) : U((K, g, $, k) => {
+    return $([_, g, k]), { value: f(k, O), gradFunc: I };
+  })(F, R) : U((_, g, $, k) => {
     const z = (
       // tslint:disable-next-line: no-unnecessary-type-assertion
-      u.runKernel(H, I, j)
+      c.runKernel(H, j, q)
     );
-    return k([K, g, z, $]), { value: f(z, O), gradFunc: G };
-  })(R, F, y);
+    return k([_, g, z, $]), { value: f(z, O), gradFunc: I };
+  })(F, R, S);
 }
-const J = /* @__PURE__ */ p({ fusedMatMul_: Je });
+const J = /* @__PURE__ */ h({ fusedMatMul_: Ge });
 /**
  * @license
  * Copyright 2018 Google LLC
@@ -456,7 +391,7 @@ class E extends Error {
  * https://opensource.org/licenses/MIT.
  * =============================================================================
  */
-function Qe(t, e, s, n) {
+function Ie(t, e, s, n) {
   if (t.rank < 2 || e.rank < 2)
     throw new E(`dot requires both inputs to be rank >= 2 but got x shape = ${t.shape} and y shape = ${e.shape}`);
   if (e.rank >= 3) {
@@ -476,9 +411,9 @@ function Qe(t, e, s, n) {
   {
     const r = t.shape.slice(), i = r.pop();
     t = f(t, [-1, i]);
-    const l = e.shape.slice(), h = l.pop(), o = l.pop(), c = [...l, h], b = Array.from({ length: e.rank }, (T, S) => S === 0 ? e.rank - 2 : S <= e.rank - 2 ? S - 1 : S);
-    e = f(je(e, b), [o, -1]);
-    const D = [...r, ...c];
+    const o = e.shape.slice(), p = o.pop(), a = o.pop(), l = [...o, p], D = Array.from({ length: e.rank }, (T, y) => y === 0 ? e.rank - 2 : y <= e.rank - 2 ? y - 1 : y);
+    e = f(Be(e, D), [a, -1]);
+    const b = [...r, ...l];
     return f(J({
       a: t,
       b: e,
@@ -486,10 +421,10 @@ function Qe(t, e, s, n) {
       transposeB: !1,
       bias: null,
       activation: s
-    }), D);
+    }), b);
   }
 }
-class Ye {
+class Ue {
   vocabSize;
   embedDim;
   tf;
@@ -512,7 +447,7 @@ class Ye {
     return this.tf.gather(this.tiedWeights, e, 0);
   }
   project(e) {
-    return Qe(e, this.tiedWeights.transpose());
+    return Ie(e, this.tiedWeights.transpose());
   }
   getWeights() {
     return [this.tiedWeights];
@@ -531,5 +466,5 @@ class Ye {
   }
 }
 export {
-  Ye as default
+  Ue as default
 };

package/dist/main.d.ts CHANGED Viewed

@@ -6,3 +6,5 @@ export { default as loadTextData } from './data/textLoader';
 export type { ITrainerOptions } from './Trainer';
 export type { IGenerateOptions } from './Generator';
 export type { TrainingLogEntry } from './NanoGPTModel';
+export type { GPTConfig } from './config';
+export { estimateParameterCount, estimateMemoryUsage, estimateTrainingMemoryUsage, estimateResources, validateConfig, } from './utilities/parameters';

package/dist/main.js CHANGED Viewed

@@ -1,12 +1,20 @@
-import { default as o } from "./NanoGPTModel.js";
-import { default as t } from "./TeachableLLM.js";
-import { default as l } from "./tokeniser/CharTokeniser.js";
-import { default as s } from "./utilities/waitForModel.js";
-import { default as m } from "./data/textLoader.js";
+import { default as r } from "./NanoGPTModel.js";
+import { default as s } from "./TeachableLLM.js";
+import { default as i } from "./tokeniser/CharTokeniser.js";
+import { default as d } from "./utilities/waitForModel.js";
+import { default as u } from "./data/textLoader.js";
+import { estimateMemoryUsage as n, estimateParameterCount as T, estimateResources as g, estimateTrainingMemoryUsage as M, validateConfig as C } from "./utilities/parameters.js";
+import "./ops/scatterSub.js";
+import "./ops/gatherSub.js";
 export {
-  l as CharTokeniser,
-  o as NanoGPT,
-  t as TeachableLLM,
-  m as loadTextData,
-  s as waitForModel
+  i as CharTokeniser,
+  r as NanoGPT,
+  s as TeachableLLM,
+  n as estimateMemoryUsage,
+  T as estimateParameterCount,
+  g as estimateResources,
+  M as estimateTrainingMemoryUsage,
+  u as loadTextData,
+  C as validateConfig,
+  d as waitForModel
 };

package/dist/ops/gatherSub.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { Tensor } from '@tensorflow/tfjs';
2	+ export declare function gatherSub(values: Tensor, labels: Tensor, logits: Tensor): Tensor;

package/dist/ops/gatherSub.js ADDED Viewed

@@ -0,0 +1,66 @@
+import { engine as l } from "@tensorflow/tfjs";
+import { o as g, c as i, E as b, G as d, r as c, a as h } from "../index-D1SlunD-.js";
+import { r as p, s as f } from "../stack-DB2YLlAs.js";
+/**
+ * @license
+ * Copyright 2018 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+function m(e, t) {
+  const n = i(t, "indices", "gatherND", "int32"), r = { params: i(e, "x", "gatherND", "string_or_numeric"), indices: n };
+  return b.runKernel(d, r);
+}
+const N = /* @__PURE__ */ g({ gatherND_: m });
+class S {
+  variableNames = ["labels", "logits", "values"];
+  outputShape;
+  userCode;
+  constructor(t) {
+    this.outputShape = [t], this.userCode = `
+      void main() {
+        int coords = getOutputCoords();
+        int index = int(getLabelsAtOutCoords());
+        float val = getValuesAtOutCoords();
+        float logit = getLogits(coords, index);
+        setOutput(val - logit);
+      }
+    `;
+  }
+}
+function k(e) {
+  const { logits: t, labels: n, values: s } = e.inputs, r = e.backend, o = n.shape[0], a = new S(o);
+  return r.runWebGLProgram(a, [n, t, s], "float32");
+}
+const G = {
+  kernelName: "EfficientGatherSub",
+  backendName: "webgl",
+  kernelFunc: k
+};
+c(G);
+function v(e) {
+  const { values: t, labels: n, logits: s } = e.inputs, r = n.shape[0], o = p(0, r, 1, "int32"), a = f([o, n], 1), u = N(s, a);
+  return h(t, u);
+}
+const C = {
+  kernelName: "EfficientGatherSub",
+  backendName: "cpu",
+  kernelFunc: v
+};
+c(C);
+function K(e, t, n) {
+  return l().runKernel("EfficientGatherSub", { logits: n, labels: t, values: e }, {});
+}
+export {
+  K as gatherSub
+};

package/dist/ops/node/sparseCrossEntropy.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/ops/node/sparseCrossEntropy.js ADDED Viewed

@@ -0,0 +1,11 @@
+import { r as o } from "../../index-D1SlunD-.js";
+function r(e) {
+  const { logits: t, labels: n } = e.inputs;
+  return e.backend.executeMultipleOutputs("SparseSoftmaxCrossEntropyWithLogits", [], [t, n], 2);
+}
+const s = {
+  kernelName: "NativeSparseSoftmaxCrossEntropy",
+  backendName: "tensorflow",
+  kernelFunc: r
+};
+o(s);

package/dist/ops/scatterSub.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { Tensor } from '@tensorflow/tfjs';
2	+ export declare function scatterSub(probabilities: Tensor, labels: Tensor, scale: Tensor): Tensor;