@genai-fi/nanogpt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +7 -0
  2. package/README.md +20 -0
  3. package/dist/Generator.d.ts +14 -0
  4. package/dist/Generator.js +39 -0
  5. package/dist/NanoGPTModel.d.ts +35 -0
  6. package/dist/NanoGPTModel.js +129 -0
  7. package/dist/TeachableLLM.d.ts +21 -0
  8. package/dist/TeachableLLM.js +47 -0
  9. package/dist/Trainer.d.ts +19 -0
  10. package/dist/Trainer.js +34 -0
  11. package/dist/_commonjsHelpers-DaMA6jEr.js +8 -0
  12. package/dist/assets/worker-BYeSPNkq.js +1 -0
  13. package/dist/config.d.ts +11 -0
  14. package/dist/config.js +19 -0
  15. package/dist/index-B8nyc6IR.js +3899 -0
  16. package/dist/index-SOhdqzHq.js +113 -0
  17. package/dist/jszip.min-BLbRbbKt.js +2324 -0
  18. package/dist/layers/CausalSelfAttention.d.ts +22 -0
  19. package/dist/layers/CausalSelfAttention.js +75 -0
  20. package/dist/layers/LayerNorm.d.ts +12 -0
  21. package/dist/layers/LayerNorm.js +30 -0
  22. package/dist/layers/MLP.d.ts +17 -0
  23. package/dist/layers/MLP.js +57 -0
  24. package/dist/layers/TiedEmbedding.d.ts +22 -0
  25. package/dist/layers/TiedEmbedding.js +532 -0
  26. package/dist/layers/TransformerBlock.d.ts +19 -0
  27. package/dist/layers/TransformerBlock.js +47 -0
  28. package/dist/main.d.ts +6 -0
  29. package/dist/main.js +8 -0
  30. package/dist/tokeniser/CharTokeniser.d.ts +20 -0
  31. package/dist/tokeniser/CharTokeniser.js +52 -0
  32. package/dist/tokeniser/NodeTokeniser.d.ts +19 -0
  33. package/dist/tokeniser/NodeTokeniser.js +46 -0
  34. package/dist/tokeniser/WebTokeniser.d.ts +18 -0
  35. package/dist/tokeniser/WebTokeniser.js +96 -0
  36. package/dist/tokeniser/bpe.d.ts +14 -0
  37. package/dist/tokeniser/bpe.js +102 -0
  38. package/dist/tokeniser/messages.d.ts +61 -0
  39. package/dist/tokeniser/messages.js +1 -0
  40. package/dist/tokeniser/type.d.ts +14 -0
  41. package/dist/tokeniser/type.js +1 -0
  42. package/dist/tokeniser/worker.d.ts +1 -0
  43. package/dist/tokeniser/worker.js +53 -0
  44. package/dist/training/AdamExt.d.ts +23 -0
  45. package/dist/training/AdamExt.js +43 -0
  46. package/dist/training/DatasetBuilder.d.ts +12 -0
  47. package/dist/training/DatasetBuilder.js +27 -0
  48. package/dist/training/FullTrainer.d.ts +17 -0
  49. package/dist/training/FullTrainer.js +75 -0
  50. package/dist/training/LayerTrainer.d.ts +28 -0
  51. package/dist/training/LayerTrainer.js +108 -0
  52. package/dist/training/Trainer.d.ts +73 -0
  53. package/dist/training/Trainer.js +87 -0
  54. package/dist/training/lwSchedule.d.ts +7 -0
  55. package/dist/training/lwSchedule.js +162 -0
  56. package/dist/utilities/generate.d.ts +3 -0
  57. package/dist/utilities/generate.js +22 -0
  58. package/dist/utilities/load.d.ts +7 -0
  59. package/dist/utilities/load.js +47 -0
  60. package/dist/utilities/save.d.ts +3 -0
  61. package/dist/utilities/save.js +21 -0
  62. package/dist/utilities/textLoader.d.ts +1 -0
  63. package/dist/utilities/textLoader.js +438 -0
  64. package/dist/utilities/tokenParse.d.ts +1 -0
  65. package/dist/utilities/tokenParse.js +66 -0
  66. package/dist/utilities/weights.d.ts +12 -0
  67. package/dist/utilities/weights.js +43 -0
  68. package/package.json +59 -0
@@ -0,0 +1,438 @@
1
+ import { g as ke } from "../_commonjsHelpers-DaMA6jEr.js";
2
+ var ne = { exports: {} };
3
+ /* @license
4
+ Papa Parse
5
+ v5.5.3
6
+ https://github.com/mholt/PapaParse
7
+ License: MIT
8
+ */
9
+ var ve = ne.exports, ge;
10
+ function Ee() {
11
+ return ge || (ge = 1, function(se, fe) {
12
+ ((J, y) => {
13
+ se.exports = y();
14
+ })(ve, function J() {
15
+ var y = typeof self < "u" ? self : typeof window < "u" ? window : y !== void 0 ? y : {}, H, Z = !y.document && !!y.postMessage, ae = y.IS_PAPA_WORKER || !1, ee = {}, me = 0, u = {};
16
+ function N(e) {
17
+ this._handle = null, this._finished = !1, this._completed = !1, this._halted = !1, this._input = null, this._baseIndex = 0, this._partialLine = "", this._rowCount = 0, this._start = 0, this._nextChunk = null, this.isFirstChunk = !0, this._completeResults = { data: [], errors: [], meta: {} }, (function(t) {
18
+ var r = ue(t);
19
+ r.chunkSize = parseInt(r.chunkSize), t.step || t.chunk || (r.chunkSize = null), this._handle = new le(r), (this._handle.streamer = this)._config = r;
20
+ }).call(this, e), this.parseChunk = function(t, r) {
21
+ var n = parseInt(this._config.skipFirstNLines) || 0;
22
+ if (this.isFirstChunk && 0 < n) {
23
+ let l = this._config.newline;
24
+ l || (i = this._config.quoteChar || '"', l = this._handle.guessLineEndings(t, i)), t = [...t.split(l).slice(n)].join(l);
25
+ }
26
+ this.isFirstChunk && k(this._config.beforeFirstChunk) && (i = this._config.beforeFirstChunk(t)) !== void 0 && (t = i), this.isFirstChunk = !1, this._halted = !1;
27
+ var n = this._partialLine + t, i = (this._partialLine = "", this._handle.parse(n, this._baseIndex, !this._finished));
28
+ if (!this._handle.paused() && !this._handle.aborted()) {
29
+ if (t = i.meta.cursor, n = (this._finished || (this._partialLine = n.substring(t - this._baseIndex), this._baseIndex = t), i && i.data && (this._rowCount += i.data.length), this._finished || this._config.preview && this._rowCount >= this._config.preview), ae) y.postMessage({ results: i, workerId: u.WORKER_ID, finished: n });
30
+ else if (k(this._config.chunk) && !r) {
31
+ if (this._config.chunk(i, this._handle), this._handle.paused() || this._handle.aborted()) return void (this._halted = !0);
32
+ this._completeResults = i = void 0;
33
+ }
34
+ return this._config.step || this._config.chunk || (this._completeResults.data = this._completeResults.data.concat(i.data), this._completeResults.errors = this._completeResults.errors.concat(i.errors), this._completeResults.meta = i.meta), this._completed || !n || !k(this._config.complete) || i && i.meta.aborted || (this._config.complete(this._completeResults, this._input), this._completed = !0), n || i && i.meta.paused || this._nextChunk(), i;
35
+ }
36
+ this._halted = !0;
37
+ }, this._sendError = function(t) {
38
+ k(this._config.error) ? this._config.error(t) : ae && this._config.error && y.postMessage({ workerId: u.WORKER_ID, error: t, finished: !1 });
39
+ };
40
+ }
41
+ function te(e) {
42
+ var t;
43
+ (e = e || {}).chunkSize || (e.chunkSize = u.RemoteChunkSize), N.call(this, e), this._nextChunk = Z ? function() {
44
+ this._readChunk(), this._chunkLoaded();
45
+ } : function() {
46
+ this._readChunk();
47
+ }, this.stream = function(r) {
48
+ this._input = r, this._nextChunk();
49
+ }, this._readChunk = function() {
50
+ if (this._finished) this._chunkLoaded();
51
+ else {
52
+ if (t = new XMLHttpRequest(), this._config.withCredentials && (t.withCredentials = this._config.withCredentials), Z || (t.onload = K(this._chunkLoaded, this), t.onerror = K(this._chunkError, this)), t.open(this._config.downloadRequestBody ? "POST" : "GET", this._input, !Z), this._config.downloadRequestHeaders) {
53
+ var r, n = this._config.downloadRequestHeaders;
54
+ for (r in n) t.setRequestHeader(r, n[r]);
55
+ }
56
+ var i;
57
+ this._config.chunkSize && (i = this._start + this._config.chunkSize - 1, t.setRequestHeader("Range", "bytes=" + this._start + "-" + i));
58
+ try {
59
+ t.send(this._config.downloadRequestBody);
60
+ } catch (l) {
61
+ this._chunkError(l.message);
62
+ }
63
+ Z && t.status === 0 && this._chunkError();
64
+ }
65
+ }, this._chunkLoaded = function() {
66
+ t.readyState === 4 && (t.status < 200 || 400 <= t.status ? this._chunkError() : (this._start += this._config.chunkSize || t.responseText.length, this._finished = !this._config.chunkSize || this._start >= ((r) => (r = r.getResponseHeader("Content-Range")) !== null ? parseInt(r.substring(r.lastIndexOf("/") + 1)) : -1)(t), this.parseChunk(t.responseText)));
67
+ }, this._chunkError = function(r) {
68
+ r = t.statusText || r, this._sendError(new Error(r));
69
+ };
70
+ }
71
+ function re(e) {
72
+ (e = e || {}).chunkSize || (e.chunkSize = u.LocalChunkSize), N.call(this, e);
73
+ var t, r, n = typeof FileReader < "u";
74
+ this.stream = function(i) {
75
+ this._input = i, r = i.slice || i.webkitSlice || i.mozSlice, n ? ((t = new FileReader()).onload = K(this._chunkLoaded, this), t.onerror = K(this._chunkError, this)) : t = new FileReaderSync(), this._nextChunk();
76
+ }, this._nextChunk = function() {
77
+ this._finished || this._config.preview && !(this._rowCount < this._config.preview) || this._readChunk();
78
+ }, this._readChunk = function() {
79
+ var i = this._input, l = (this._config.chunkSize && (l = Math.min(this._start + this._config.chunkSize, this._input.size), i = r.call(i, this._start, l)), t.readAsText(i, this._config.encoding));
80
+ n || this._chunkLoaded({ target: { result: l } });
81
+ }, this._chunkLoaded = function(i) {
82
+ this._start += this._config.chunkSize, this._finished = !this._config.chunkSize || this._start >= this._input.size, this.parseChunk(i.target.result);
83
+ }, this._chunkError = function() {
84
+ this._sendError(t.error);
85
+ };
86
+ }
87
+ function G(e) {
88
+ var t;
89
+ N.call(this, e = e || {}), this.stream = function(r) {
90
+ return t = r, this._nextChunk();
91
+ }, this._nextChunk = function() {
92
+ var r, n;
93
+ if (!this._finished) return r = this._config.chunkSize, t = r ? (n = t.substring(0, r), t.substring(r)) : (n = t, ""), this._finished = !t, this.parseChunk(n);
94
+ };
95
+ }
96
+ function ie(e) {
97
+ N.call(this, e = e || {});
98
+ var t = [], r = !0, n = !1;
99
+ this.pause = function() {
100
+ N.prototype.pause.apply(this, arguments), this._input.pause();
101
+ }, this.resume = function() {
102
+ N.prototype.resume.apply(this, arguments), this._input.resume();
103
+ }, this.stream = function(i) {
104
+ this._input = i, this._input.on("data", this._streamData), this._input.on("end", this._streamEnd), this._input.on("error", this._streamError);
105
+ }, this._checkIsFinished = function() {
106
+ n && t.length === 1 && (this._finished = !0);
107
+ }, this._nextChunk = function() {
108
+ this._checkIsFinished(), t.length ? this.parseChunk(t.shift()) : r = !0;
109
+ }, this._streamData = K(function(i) {
110
+ try {
111
+ t.push(typeof i == "string" ? i : i.toString(this._config.encoding)), r && (r = !1, this._checkIsFinished(), this.parseChunk(t.shift()));
112
+ } catch (l) {
113
+ this._streamError(l);
114
+ }
115
+ }, this), this._streamError = K(function(i) {
116
+ this._streamCleanUp(), this._sendError(i);
117
+ }, this), this._streamEnd = K(function() {
118
+ this._streamCleanUp(), n = !0, this._streamData("");
119
+ }, this), this._streamCleanUp = K(function() {
120
+ this._input.removeListener("data", this._streamData), this._input.removeListener("end", this._streamEnd), this._input.removeListener("error", this._streamError);
121
+ }, this);
122
+ }
123
+ function le(e) {
124
+ var t, r, n, i, l = Math.pow(2, 53), x = -l, F = /^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/, j = /^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/, d = this, w = 0, a = 0, T = !1, h = !1, c = [], s = { data: [], errors: [], meta: {} };
125
+ function C(p) {
126
+ return e.skipEmptyLines === "greedy" ? p.join("").trim() === "" : p.length === 1 && p[0].length === 0;
127
+ }
128
+ function S() {
129
+ if (s && n && (z("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '" + u.DefaultDelimiter + "'"), n = !1), e.skipEmptyLines && (s.data = s.data.filter(function(o) {
130
+ return !C(o);
131
+ })), A()) {
132
+ let o = function(R, O) {
133
+ k(e.transformHeader) && (R = e.transformHeader(R, O)), c.push(R);
134
+ };
135
+ if (s) if (Array.isArray(s.data[0])) {
136
+ for (var p = 0; A() && p < s.data.length; p++) s.data[p].forEach(o);
137
+ s.data.splice(0, 1);
138
+ } else s.data.forEach(o);
139
+ }
140
+ function g(o, R) {
141
+ for (var O = e.header ? {} : [], m = 0; m < o.length; m++) {
142
+ var v = m, _ = o[m], _ = ((M, f) => ((E) => (e.dynamicTypingFunction && e.dynamicTyping[E] === void 0 && (e.dynamicTyping[E] = e.dynamicTypingFunction(E)), (e.dynamicTyping[E] || e.dynamicTyping) === !0))(M) ? f === "true" || f === "TRUE" || f !== "false" && f !== "FALSE" && (((E) => {
143
+ if (F.test(E) && (E = parseFloat(E), x < E && E < l))
144
+ return 1;
145
+ })(f) ? parseFloat(f) : j.test(f) ? new Date(f) : f === "" ? null : f) : f)(v = e.header ? m >= c.length ? "__parsed_extra" : c[m] : v, _ = e.transform ? e.transform(_, v) : _);
146
+ v === "__parsed_extra" ? (O[v] = O[v] || [], O[v].push(_)) : O[v] = _;
147
+ }
148
+ return e.header && (m > c.length ? z("FieldMismatch", "TooManyFields", "Too many fields: expected " + c.length + " fields but parsed " + m, a + R) : m < c.length && z("FieldMismatch", "TooFewFields", "Too few fields: expected " + c.length + " fields but parsed " + m, a + R)), O;
149
+ }
150
+ var b;
151
+ s && (e.header || e.dynamicTyping || e.transform) && (b = 1, !s.data.length || Array.isArray(s.data[0]) ? (s.data = s.data.map(g), b = s.data.length) : s.data = g(s.data, 0), e.header && s.meta && (s.meta.fields = c), a += b);
152
+ }
153
+ function A() {
154
+ return e.header && c.length === 0;
155
+ }
156
+ function z(p, g, b, o) {
157
+ p = { type: p, code: g, message: b }, o !== void 0 && (p.row = o), s.errors.push(p);
158
+ }
159
+ k(e.step) && (i = e.step, e.step = function(p) {
160
+ s = p, A() ? S() : (S(), s.data.length !== 0 && (w += p.data.length, e.preview && w > e.preview ? r.abort() : (s.data = s.data[0], i(s, d))));
161
+ }), this.parse = function(p, g, b) {
162
+ var o = e.quoteChar || '"', o = (e.newline || (e.newline = this.guessLineEndings(p, o)), n = !1, e.delimiter ? k(e.delimiter) && (e.delimiter = e.delimiter(p), s.meta.delimiter = e.delimiter) : ((o = ((R, O, m, v, _) => {
163
+ var M, f, E, W;
164
+ _ = _ || [",", " ", "|", ";", u.RECORD_SEP, u.UNIT_SEP];
165
+ for (var Q = 0; Q < _.length; Q++) {
166
+ for (var P, X = _[Q], D = 0, U = 0, I = 0, L = (E = void 0, new oe({ comments: v, delimiter: X, newline: O, preview: 10 }).parse(R)), B = 0; B < L.data.length; B++) m && C(L.data[B]) ? I++ : (P = L.data[B].length, U += P, E === void 0 ? E = P : 0 < P && (D += Math.abs(P - E), E = P));
167
+ 0 < L.data.length && (U /= L.data.length - I), (f === void 0 || D <= f) && (W === void 0 || W < U) && 1.99 < U && (f = D, M = X, W = U);
168
+ }
169
+ return { successful: !!(e.delimiter = M), bestDelimiter: M };
170
+ })(p, e.newline, e.skipEmptyLines, e.comments, e.delimitersToGuess)).successful ? e.delimiter = o.bestDelimiter : (n = !0, e.delimiter = u.DefaultDelimiter), s.meta.delimiter = e.delimiter), ue(e));
171
+ return e.preview && e.header && o.preview++, t = p, r = new oe(o), s = r.parse(t, g, b), S(), T ? { meta: { paused: !0 } } : s || { meta: { paused: !1 } };
172
+ }, this.paused = function() {
173
+ return T;
174
+ }, this.pause = function() {
175
+ T = !0, r.abort(), t = k(e.chunk) ? "" : t.substring(r.getCharIndex());
176
+ }, this.resume = function() {
177
+ d.streamer._halted ? (T = !1, d.streamer.parseChunk(t, !0)) : setTimeout(d.resume, 3);
178
+ }, this.aborted = function() {
179
+ return h;
180
+ }, this.abort = function() {
181
+ h = !0, r.abort(), s.meta.aborted = !0, k(e.complete) && e.complete(s), t = "";
182
+ }, this.guessLineEndings = function(R, o) {
183
+ R = R.substring(0, 1048576);
184
+ var o = new RegExp(Y(o) + "([^]*?)" + Y(o), "gm"), b = (R = R.replace(o, "")).split("\r"), o = R.split(`
185
+ `), R = 1 < o.length && o[0].length < b[0].length;
186
+ if (b.length === 1 || R) return `
187
+ `;
188
+ for (var O = 0, m = 0; m < b.length; m++) b[m][0] === `
189
+ ` && O++;
190
+ return O >= b.length / 2 ? `\r
191
+ ` : "\r";
192
+ };
193
+ }
194
+ function Y(e) {
195
+ return e.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
196
+ }
197
+ function oe(e) {
198
+ var t = (e = e || {}).delimiter, r = e.newline, n = e.comments, i = e.step, l = e.preview, x = e.fastMode, F = null, j = !1, d = e.quoteChar == null ? '"' : e.quoteChar, w = d;
199
+ if (e.escapeChar !== void 0 && (w = e.escapeChar), (typeof t != "string" || -1 < u.BAD_DELIMITERS.indexOf(t)) && (t = ","), n === t) throw new Error("Comment character same as delimiter");
200
+ n === !0 ? n = "#" : (typeof n != "string" || -1 < u.BAD_DELIMITERS.indexOf(n)) && (n = !1), r !== `
201
+ ` && r !== "\r" && r !== `\r
202
+ ` && (r = `
203
+ `);
204
+ var a = 0, T = !1;
205
+ this.parse = function(h, c, s) {
206
+ if (typeof h != "string") throw new Error("Input must be a string");
207
+ var C = h.length, S = t.length, A = r.length, z = n.length, p = k(i), g = [], b = [], o = [], R = a = 0;
208
+ if (!h) return D();
209
+ if (x || x !== !1 && h.indexOf(d) === -1) {
210
+ for (var O = h.split(r), m = 0; m < O.length; m++) {
211
+ if (o = O[m], a += o.length, m !== O.length - 1) a += r.length;
212
+ else if (s) return D();
213
+ if (!n || o.substring(0, z) !== n) {
214
+ if (p) {
215
+ if (g = [], W(o.split(t)), U(), T) return D();
216
+ } else W(o.split(t));
217
+ if (l && l <= m) return g = g.slice(0, l), D(!0);
218
+ }
219
+ }
220
+ return D();
221
+ }
222
+ for (var v = h.indexOf(t, a), _ = h.indexOf(r, a), M = new RegExp(Y(w) + Y(d), "g"), f = h.indexOf(d, a); ; ) if (h[a] === d) for (f = a, a++; ; ) {
223
+ if ((f = h.indexOf(d, f + 1)) === -1) return s || b.push({ type: "Quotes", code: "MissingQuotes", message: "Quoted field unterminated", row: g.length, index: a }), P();
224
+ if (f === C - 1) return P(h.substring(a, f).replace(M, d));
225
+ if (d === w && h[f + 1] === w) f++;
226
+ else if (d === w || f === 0 || h[f - 1] !== w) {
227
+ v !== -1 && v < f + 1 && (v = h.indexOf(t, f + 1));
228
+ var E = Q((_ = _ !== -1 && _ < f + 1 ? h.indexOf(r, f + 1) : _) === -1 ? v : Math.min(v, _));
229
+ if (h.substr(f + 1 + E, S) === t) {
230
+ o.push(h.substring(a, f).replace(M, d)), h[a = f + 1 + E + S] !== d && (f = h.indexOf(d, a)), v = h.indexOf(t, a), _ = h.indexOf(r, a);
231
+ break;
232
+ }
233
+ if (E = Q(_), h.substring(f + 1 + E, f + 1 + E + A) === r) {
234
+ if (o.push(h.substring(a, f).replace(M, d)), X(f + 1 + E + A), v = h.indexOf(t, a), f = h.indexOf(d, a), p && (U(), T)) return D();
235
+ if (l && g.length >= l) return D(!0);
236
+ break;
237
+ }
238
+ b.push({ type: "Quotes", code: "InvalidQuotes", message: "Trailing quote on quoted field is malformed", row: g.length, index: a }), f++;
239
+ }
240
+ }
241
+ else if (n && o.length === 0 && h.substring(a, a + z) === n) {
242
+ if (_ === -1) return D();
243
+ a = _ + A, _ = h.indexOf(r, a), v = h.indexOf(t, a);
244
+ } else if (v !== -1 && (v < _ || _ === -1)) o.push(h.substring(a, v)), a = v + S, v = h.indexOf(t, a);
245
+ else {
246
+ if (_ === -1) break;
247
+ if (o.push(h.substring(a, _)), X(_ + A), p && (U(), T)) return D();
248
+ if (l && g.length >= l) return D(!0);
249
+ }
250
+ return P();
251
+ function W(I) {
252
+ g.push(I), R = a;
253
+ }
254
+ function Q(I) {
255
+ var L = 0;
256
+ return L = I !== -1 && (I = h.substring(f + 1, I)) && I.trim() === "" ? I.length : L;
257
+ }
258
+ function P(I) {
259
+ return s || (I === void 0 && (I = h.substring(a)), o.push(I), a = C, W(o), p && U()), D();
260
+ }
261
+ function X(I) {
262
+ a = I, W(o), o = [], _ = h.indexOf(r, a);
263
+ }
264
+ function D(I) {
265
+ if (e.header && !c && g.length && !j) {
266
+ var L = g[0], B = /* @__PURE__ */ Object.create(null), he = new Set(L);
267
+ let pe = !1;
268
+ for (let $ = 0; $ < L.length; $++) {
269
+ let q = L[$];
270
+ if (B[q = k(e.transformHeader) ? e.transformHeader(q, $) : q]) {
271
+ let V, _e = B[q];
272
+ for (; V = q + "_" + _e, _e++, he.has(V); ) ;
273
+ he.add(V), L[$] = V, B[q]++, pe = !0, (F = F === null ? {} : F)[V] = q;
274
+ } else B[q] = 1, L[$] = q;
275
+ he.add(q);
276
+ }
277
+ pe && console.warn("Duplicate headers found and renamed."), j = !0;
278
+ }
279
+ return { data: g, errors: b, meta: { delimiter: t, linebreak: r, aborted: T, truncated: !!I, cursor: R + (c || 0), renamedHeaders: F } };
280
+ }
281
+ function U() {
282
+ i(D()), g = [], b = [];
283
+ }
284
+ }, this.abort = function() {
285
+ T = !0;
286
+ }, this.getCharIndex = function() {
287
+ return a;
288
+ };
289
+ }
290
+ function ye(e) {
291
+ var t = e.data, r = ee[t.workerId], n = !1;
292
+ if (t.error) r.userError(t.error, t.file);
293
+ else if (t.results && t.results.data) {
294
+ var i = { abort: function() {
295
+ n = !0, de(t.workerId, { data: [], errors: [], meta: { aborted: !0 } });
296
+ }, pause: ce, resume: ce };
297
+ if (k(r.userStep)) {
298
+ for (var l = 0; l < t.results.data.length && (r.userStep({ data: t.results.data[l], errors: t.results.errors, meta: t.results.meta }, i), !n); l++) ;
299
+ delete t.results;
300
+ } else k(r.userChunk) && (r.userChunk(t.results, i, t.file), delete t.results);
301
+ }
302
+ t.finished && !n && de(t.workerId, t.results);
303
+ }
304
+ function de(e, t) {
305
+ var r = ee[e];
306
+ k(r.userComplete) && r.userComplete(t), r.terminate(), delete ee[e];
307
+ }
308
+ function ce() {
309
+ throw new Error("Not implemented.");
310
+ }
311
+ function ue(e) {
312
+ if (typeof e != "object" || e === null) return e;
313
+ var t, r = Array.isArray(e) ? [] : {};
314
+ for (t in e) r[t] = ue(e[t]);
315
+ return r;
316
+ }
317
+ function K(e, t) {
318
+ return function() {
319
+ e.apply(t, arguments);
320
+ };
321
+ }
322
+ function k(e) {
323
+ return typeof e == "function";
324
+ }
325
+ return u.parse = function(e, t) {
326
+ var r = (t = t || {}).dynamicTyping || !1;
327
+ if (k(r) && (t.dynamicTypingFunction = r, r = {}), t.dynamicTyping = r, t.transform = !!k(t.transform) && t.transform, !t.worker || !u.WORKERS_SUPPORTED) return r = null, u.NODE_STREAM_INPUT, typeof e == "string" ? (e = ((n) => n.charCodeAt(0) !== 65279 ? n : n.slice(1))(e), r = new (t.download ? te : G)(t)) : e.readable === !0 && k(e.read) && k(e.on) ? r = new ie(t) : (y.File && e instanceof File || e instanceof Object) && (r = new re(t)), r.stream(e);
328
+ (r = (() => {
329
+ var n;
330
+ return !!u.WORKERS_SUPPORTED && (n = (() => {
331
+ var i = y.URL || y.webkitURL || null, l = J.toString();
332
+ return u.BLOB_URL || (u.BLOB_URL = i.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ", "(", l, ")();"], { type: "text/javascript" })));
333
+ })(), (n = new y.Worker(n)).onmessage = ye, n.id = me++, ee[n.id] = n);
334
+ })()).userStep = t.step, r.userChunk = t.chunk, r.userComplete = t.complete, r.userError = t.error, t.step = k(t.step), t.chunk = k(t.chunk), t.complete = k(t.complete), t.error = k(t.error), delete t.worker, r.postMessage({ input: e, config: t, workerId: r.id });
335
+ }, u.unparse = function(e, t) {
336
+ var r = !1, n = !0, i = ",", l = `\r
337
+ `, x = '"', F = x + x, j = !1, d = null, w = !1, a = ((() => {
338
+ if (typeof t == "object") {
339
+ if (typeof t.delimiter != "string" || u.BAD_DELIMITERS.filter(function(c) {
340
+ return t.delimiter.indexOf(c) !== -1;
341
+ }).length || (i = t.delimiter), typeof t.quotes != "boolean" && typeof t.quotes != "function" && !Array.isArray(t.quotes) || (r = t.quotes), typeof t.skipEmptyLines != "boolean" && typeof t.skipEmptyLines != "string" || (j = t.skipEmptyLines), typeof t.newline == "string" && (l = t.newline), typeof t.quoteChar == "string" && (x = t.quoteChar), typeof t.header == "boolean" && (n = t.header), Array.isArray(t.columns)) {
342
+ if (t.columns.length === 0) throw new Error("Option columns is empty");
343
+ d = t.columns;
344
+ }
345
+ t.escapeChar !== void 0 && (F = t.escapeChar + x), t.escapeFormulae instanceof RegExp ? w = t.escapeFormulae : typeof t.escapeFormulae == "boolean" && t.escapeFormulae && (w = /^[=+\-@\t\r].*$/);
346
+ }
347
+ })(), new RegExp(Y(x), "g"));
348
+ if (typeof e == "string" && (e = JSON.parse(e)), Array.isArray(e)) {
349
+ if (!e.length || Array.isArray(e[0])) return T(null, e, j);
350
+ if (typeof e[0] == "object") return T(d || Object.keys(e[0]), e, j);
351
+ } else if (typeof e == "object") return typeof e.data == "string" && (e.data = JSON.parse(e.data)), Array.isArray(e.data) && (e.fields || (e.fields = e.meta && e.meta.fields || d), e.fields || (e.fields = Array.isArray(e.data[0]) ? e.fields : typeof e.data[0] == "object" ? Object.keys(e.data[0]) : []), Array.isArray(e.data[0]) || typeof e.data[0] == "object" || (e.data = [e.data])), T(e.fields || [], e.data || [], j);
352
+ throw new Error("Unable to serialize unrecognized input");
353
+ function T(c, s, C) {
354
+ var S = "", A = (typeof c == "string" && (c = JSON.parse(c)), typeof s == "string" && (s = JSON.parse(s)), Array.isArray(c) && 0 < c.length), z = !Array.isArray(s[0]);
355
+ if (A && n) {
356
+ for (var p = 0; p < c.length; p++) 0 < p && (S += i), S += h(c[p], p);
357
+ 0 < s.length && (S += l);
358
+ }
359
+ for (var g = 0; g < s.length; g++) {
360
+ var b = (A ? c : s[g]).length, o = !1, R = A ? Object.keys(s[g]).length === 0 : s[g].length === 0;
361
+ if (C && !A && (o = C === "greedy" ? s[g].join("").trim() === "" : s[g].length === 1 && s[g][0].length === 0), C === "greedy" && A) {
362
+ for (var O = [], m = 0; m < b; m++) {
363
+ var v = z ? c[m] : m;
364
+ O.push(s[g][v]);
365
+ }
366
+ o = O.join("").trim() === "";
367
+ }
368
+ if (!o) {
369
+ for (var _ = 0; _ < b; _++) {
370
+ 0 < _ && !R && (S += i);
371
+ var M = A && z ? c[_] : _;
372
+ S += h(s[g][M], _);
373
+ }
374
+ g < s.length - 1 && (!C || 0 < b && !R) && (S += l);
375
+ }
376
+ }
377
+ return S;
378
+ }
379
+ function h(c, s) {
380
+ var C, S;
381
+ return c == null ? "" : c.constructor === Date ? JSON.stringify(c).slice(1, 25) : (S = !1, w && typeof c == "string" && w.test(c) && (c = "'" + c, S = !0), C = c.toString().replace(a, F), (S = S || r === !0 || typeof r == "function" && r(c, s) || Array.isArray(r) && r[s] || ((A, z) => {
382
+ for (var p = 0; p < z.length; p++) if (-1 < A.indexOf(z[p])) return !0;
383
+ return !1;
384
+ })(C, u.BAD_DELIMITERS) || -1 < C.indexOf(i) || C.charAt(0) === " " || C.charAt(C.length - 1) === " ") ? x + C + x : C);
385
+ }
386
+ }, u.RECORD_SEP = "", u.UNIT_SEP = "", u.BYTE_ORDER_MARK = "\uFEFF", u.BAD_DELIMITERS = ["\r", `
387
+ `, '"', u.BYTE_ORDER_MARK], u.WORKERS_SUPPORTED = !Z && !!y.Worker, u.NODE_STREAM_INPUT = 1, u.LocalChunkSize = 10485760, u.RemoteChunkSize = 5242880, u.DefaultDelimiter = ",", u.Parser = oe, u.ParserHandle = le, u.NetworkStreamer = te, u.FileStreamer = re, u.StringStreamer = G, u.ReadableStreamStreamer = ie, y.jQuery && ((H = y.jQuery).fn.parse = function(e) {
388
+ var t = e.config || {}, r = [];
389
+ return this.each(function(l) {
390
+ if (!(H(this).prop("tagName").toUpperCase() === "INPUT" && H(this).attr("type").toLowerCase() === "file" && y.FileReader) || !this.files || this.files.length === 0) return !0;
391
+ for (var x = 0; x < this.files.length; x++) r.push({ file: this.files[x], inputElem: this, instanceConfig: H.extend({}, t) });
392
+ }), n(), this;
393
+ function n() {
394
+ if (r.length === 0) k(e.complete) && e.complete();
395
+ else {
396
+ var l, x, F, j, d = r[0];
397
+ if (k(e.before)) {
398
+ var w = e.before(d.file, d.inputElem);
399
+ if (typeof w == "object") {
400
+ if (w.action === "abort") return l = "AbortError", x = d.file, F = d.inputElem, j = w.reason, void (k(e.error) && e.error({ name: l }, x, F, j));
401
+ if (w.action === "skip") return void i();
402
+ typeof w.config == "object" && (d.instanceConfig = H.extend(d.instanceConfig, w.config));
403
+ } else if (w === "skip") return void i();
404
+ }
405
+ var a = d.instanceConfig.complete;
406
+ d.instanceConfig.complete = function(T) {
407
+ k(a) && a(T, d.file, d.inputElem), i();
408
+ }, u.parse(d.file, d.instanceConfig);
409
+ }
410
+ }
411
+ function i() {
412
+ r.splice(0, 1), n();
413
+ }
414
+ }), ae && (y.onmessage = function(e) {
415
+ e = e.data, u.WORKER_ID === void 0 && e && (u.WORKER_ID = e.workerId), typeof e.input == "string" ? y.postMessage({ workerId: u.WORKER_ID, results: u.parse(e.input, e.config), finished: !0 }) : (y.File && e.input instanceof File || e.input instanceof Object) && (e = u.parse(e.input, e.config)) && y.postMessage({ workerId: u.WORKER_ID, results: e, finished: !0 });
416
+ }), (te.prototype = Object.create(N.prototype)).constructor = te, (re.prototype = Object.create(N.prototype)).constructor = re, (G.prototype = Object.create(G.prototype)).constructor = G, (ie.prototype = Object.create(N.prototype)).constructor = ie, u;
417
+ });
418
+ }(ne)), ne.exports;
419
+ }
420
+ var we = Ee();
421
+ const be = /* @__PURE__ */ ke(we);
422
+ async function Se(se) {
423
+ return new Promise((fe, J) => {
424
+ be.parse(se, {
425
+ header: !1,
426
+ skipEmptyLines: !0,
427
+ complete: (y) => {
428
+ y.errors.length > 0 ? J(new Error("Error parsing file")) : fe(y.data.slice(1).map((H) => H[0]));
429
+ },
430
+ error: (y) => {
431
+ J(y);
432
+ }
433
+ });
434
+ });
435
+ }
436
+ export {
437
+ Se as default
438
+ };
@@ -0,0 +1 @@
1
+ export default function parseTokens(text: string, raw?: boolean): string[];
@@ -0,0 +1,66 @@
1
+ function o(a, c) {
2
+ a.length !== 0 && c.push(` ${a.trim()}`);
3
+ }
4
+ function l(a, c) {
5
+ const r = c ? a : a.toLocaleLowerCase(), s = [];
6
+ let e = "";
7
+ for (let t = 0; t < r.length; t++) {
8
+ const n = r[t];
9
+ switch (n) {
10
+ case "0":
11
+ case "1":
12
+ case "2":
13
+ case "3":
14
+ case "4":
15
+ case "5":
16
+ case "6":
17
+ case "7":
18
+ case "8":
19
+ case "9":
20
+ case ":":
21
+ case ";":
22
+ case ",":
23
+ case ".":
24
+ case "?":
25
+ case "!":
26
+ case '"':
27
+ case "'":
28
+ case "`":
29
+ case "(":
30
+ case ")":
31
+ case "[":
32
+ case "]":
33
+ case "{":
34
+ case "}":
35
+ case "-":
36
+ case "_":
37
+ case "/":
38
+ case "\\":
39
+ case "%":
40
+ case "<":
41
+ case ">":
42
+ case "=":
43
+ case "+":
44
+ case "*":
45
+ case "&":
46
+ case "^":
47
+ case "|":
48
+ case "~":
49
+ case "@":
50
+ case "#":
51
+ case "$":
52
+ c ? s.push(e) : o(e, s), s.push(n), e = "";
53
+ break;
54
+ case " ":
55
+ c ? s.push(e) : o(e, s), e = n;
56
+ break;
57
+ default:
58
+ e += n;
59
+ break;
60
+ }
61
+ }
62
+ return e.length > 0 && (c ? s.push(e) : o(e, s)), s;
63
+ }
64
+ export {
65
+ l as default
66
+ };
@@ -0,0 +1,12 @@
1
+ import { default as TF } from '@tensorflow/tfjs';
2
+ export interface ITensorSpec {
3
+ shape: number[];
4
+ min?: number;
5
+ scale?: number;
6
+ }
7
+ export interface IWeightManifest {
8
+ spec: ITensorSpec[];
9
+ data: Float32Array;
10
+ }
11
+ export declare function exportWeights(weights: TF.Tensor[]): Promise<IWeightManifest>;
12
+ export declare function importWeights(manifest: IWeightManifest, tf: typeof TF): Promise<TF.Tensor[]>;
@@ -0,0 +1,43 @@
1
+ function h(e) {
2
+ const n = e.reduce((s, o) => s + o.length, 0), a = new Float32Array(n);
3
+ let t = 0;
4
+ for (const s of e)
5
+ a.set(s, t), t += s.length;
6
+ return a;
7
+ }
8
+ async function f(e) {
9
+ const n = {
10
+ spec: [],
11
+ data: new Float32Array()
12
+ }, a = [];
13
+ for (const t of e) {
14
+ if (!t || !Array.isArray(t.shape) || t.shape.length === 0) {
15
+ console.warn("Skipping weight with invalid shape:", t);
16
+ continue;
17
+ }
18
+ const s = t.min(), o = t.max(), c = (await s.data())[0], r = (await o.data())[0] - c;
19
+ n.spec.push({
20
+ shape: t.shape,
21
+ min: c,
22
+ scale: r
23
+ }), s.dispose(), o.dispose();
24
+ const i = await t.data();
25
+ a.push(i);
26
+ }
27
+ return n.data = h(a), n;
28
+ }
29
+ async function l(e, n) {
30
+ const a = [];
31
+ let t = 0;
32
+ for (const s of e.spec) {
33
+ const o = s.shape.reduce((i, p) => i * p, 1), c = e.data.slice(t, t + o);
34
+ t += o;
35
+ const r = n.tensor(c, s.shape, "float32");
36
+ a.push(r);
37
+ }
38
+ return a;
39
+ }
40
+ export {
41
+ f as exportWeights,
42
+ l as importWeights
43
+ };
package/package.json ADDED
@@ -0,0 +1,59 @@
1
+ {
2
+ "name": "@genai-fi/nanogpt",
3
+ "version": "0.0.1",
4
+ "type": "module",
5
+ "main": "dist/main.js",
6
+ "types": "dist/main.d.ts",
7
+ "files": [
8
+ "dist"
9
+ ],
10
+ "exports": {
11
+ ".": "./dist/main.js"
12
+ },
13
+ "repository": {
14
+ "url": "https://github.com/knicos/genai-nanogpt.git",
15
+ "type": "git"
16
+ },
17
+ "scripts": {
18
+ "dev": "vite",
19
+ "build": "tsc && vite build",
20
+ "build:script": "tsc -p scripts/tsconfig.json",
21
+ "preview": "vite preview",
22
+ "lint": "eslint",
23
+ "test": "vitest",
24
+ "ci:test": "vitest --coverage --reporter=junit --outputFile=junit.xml",
25
+ "coverage": "vitest run --coverage",
26
+ "train": "tsx scripts/train.ts --epochs 2 --batch 64",
27
+ "generate": "tsx scripts/generate.ts",
28
+ "evaluate": "tsx scripts/evaluate.ts",
29
+ "debug": "tsx scripts/debug.ts"
30
+ },
31
+ "peerDependencies": {
32
+ "@tensorflow/tfjs": "^4.22.0"
33
+ },
34
+ "devDependencies": {
35
+ "@eslint/js": "^9.32.0",
36
+ "@tensorflow/tfjs-node-gpu": "^4.22.0",
37
+ "@types/node": "^22.13.14",
38
+ "@types/papaparse": "^5.3.16",
39
+ "@types/yargs": "^17.0.33",
40
+ "@vitest/coverage-v8": "^3.2.4",
41
+ "chalk": "^5.4.1",
42
+ "dayjs": "^1.11.13",
43
+ "eslint": "^9.32.0",
44
+ "glob": "^11.0.3",
45
+ "jsdom": "^26.1.0",
46
+ "tsx": "^4.20.3",
47
+ "typescript": "^5.8.3",
48
+ "typescript-eslint": "^8.38.0",
49
+ "vite": "^7.0.6",
50
+ "vite-plugin-dts": "^4.5.4",
51
+ "vitest": "^3.2.4",
52
+ "yargs": "^18.0.0"
53
+ },
54
+ "dependencies": {
55
+ "eventemitter3": "^5.0.1",
56
+ "jszip": "^3.10.1",
57
+ "papaparse": "^5.5.3"
58
+ }
59
+ }