@genai-fi/nanogpt 0.7.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Generator.d.ts +25 -2
- package/dist/Generator.js +150 -49
- package/dist/{RealDiv-Dy0p8Bvo.js → RealDiv-N8TpOMYv.js} +14 -14
- package/dist/{Reshape-DvudQDvJ.js → Reshape-B-lWQRnF.js} +1 -1
- package/dist/{Reshape-DH5srBP0.js → Reshape-Bo8HzP8V.js} +5 -5
- package/dist/TeachableLLM.d.ts +6 -6
- package/dist/TeachableLLM.js +31 -31
- package/dist/Trainer.d.ts +13 -2
- package/dist/Trainer.js +21 -12
- package/dist/{axis_util-BzbKo31C.js → axis_util-DubwyOhW.js} +3 -3
- package/dist/backend.js +2 -2
- package/dist/{backend_util-TE7aTPhZ.js → backend_util-BJ-_jSeK.js} +46 -46
- package/dist/{broadcast_to-CdbwV-Dj.js → broadcast_to-BYfCp5iL.js} +2 -2
- package/dist/{concat-CsxrgovM.js → concat-BmDqqFsa.js} +1 -1
- package/dist/{dataset-CtdBYwjo.js → dataset-CJmEGu6D.js} +5 -5
- package/dist/{dropout-DYs5QFGQ.js → dropout-sx0sjVAT.js} +8 -8
- package/dist/exports_initializers-DAKM8UO9.js +16 -0
- package/dist/{gather-CMMy2KEG.js → gather-C1siEkdp.js} +1 -1
- package/dist/{gelu-C-dPj6Ku.js → gelu-Bd3UBBxg.js} +1 -1
- package/dist/{gpgpu_math-DGNLNL4I.js → gpgpu_math-TFLxaLkw.js} +26 -26
- package/dist/{index-CLthM0TO.js → index-BaPo_0H8.js} +185 -185
- package/dist/{index-BoWRt-10.js → index-CUQrfsw_.js} +266 -265
- package/dist/{kernel_funcs_utils-BYKWV8Aa.js → kernel_funcs_utils-P9aFa232.js} +9 -9
- package/dist/layers/BaseLayer.d.ts +8 -13
- package/dist/layers/BaseLayer.js +25 -13
- package/dist/layers/CausalSelfAttention.d.ts +3 -2
- package/dist/layers/CausalSelfAttention.js +28 -28
- package/dist/layers/MLP.d.ts +3 -2
- package/dist/layers/MLP.js +16 -20
- package/dist/layers/PositionEmbedding.d.ts +9 -0
- package/dist/layers/PositionEmbedding.js +45 -0
- package/dist/layers/RMSNorm.d.ts +3 -2
- package/dist/layers/RMSNorm.js +6 -6
- package/dist/layers/RoPECache.d.ts +1 -1
- package/dist/layers/RoPECache.js +4 -4
- package/dist/layers/TiedEmbedding.d.ts +3 -2
- package/dist/layers/TiedEmbedding.js +29 -7
- package/dist/layers/TransformerBlock.d.ts +3 -2
- package/dist/layers/TransformerBlock.js +1 -1
- package/dist/loader/load.d.ts +2 -2
- package/dist/loader/loadHF.d.ts +2 -2
- package/dist/loader/loadTransformers.d.ts +4 -2
- package/dist/loader/loadTransformers.js +10 -9
- package/dist/loader/newZipLoad.d.ts +2 -2
- package/dist/loader/oldZipLoad.d.ts +2 -2
- package/dist/loader/oldZipLoad.js +42 -51
- package/dist/loader/save.d.ts +8 -0
- package/dist/loader/save.js +62 -0
- package/dist/{log_sum_exp-DbjkV734.js → log_sum_exp-C142qZqY.js} +14 -14
- package/dist/main.d.ts +5 -4
- package/dist/main.js +22 -18
- package/dist/{mat_mul-8m8pfdcx.js → mat_mul-DMkduNJu.js} +1 -1
- package/dist/{max-Ddnnb5xe.js → max-B3JOcNGb.js} +1 -1
- package/dist/mod-uUuj4gSb.js +27 -0
- package/dist/models/NanoGPTV1.d.ts +15 -0
- package/dist/models/NanoGPTV1.js +71 -0
- package/dist/{config.d.ts → models/config.d.ts} +1 -0
- package/dist/{config.js → models/config.js} +1 -0
- package/dist/models/factory.d.ts +3 -0
- package/dist/models/factory.js +14 -0
- package/dist/models/model.d.ts +26 -0
- package/dist/models/model.js +68 -0
- package/dist/{mulmat_packed_gpu-VSekgsNv.js → mulmat_packed_gpu-Cm2gw-c8.js} +1 -1
- package/dist/{ones-Dj0SDhHf.js → ones-ZdgQGBCP.js} +2 -2
- package/dist/ops/adamAdjust.js +1 -1
- package/dist/ops/adamMoments.js +1 -1
- package/dist/ops/appendCache.js +3 -3
- package/dist/ops/attentionMask.js +1 -1
- package/dist/ops/cpu/adamAdjust.js +9 -9
- package/dist/ops/cpu/adamMoments.js +2 -2
- package/dist/ops/cpu/appendCache.js +2 -2
- package/dist/ops/cpu/attentionMask.js +5 -5
- package/dist/ops/cpu/fusedSoftmax.js +2 -2
- package/dist/ops/cpu/gatherSub.js +3 -3
- package/dist/ops/cpu/gelu.js +1 -1
- package/dist/ops/cpu/matMulGelu.js +2 -2
- package/dist/ops/cpu/matMulMul.js +1 -1
- package/dist/ops/cpu/mulDropout.js +1 -1
- package/dist/ops/cpu/normRMS.js +1 -1
- package/dist/ops/cpu/qkv.js +3 -3
- package/dist/ops/cpu/rope.js +5 -5
- package/dist/ops/cpu/scatterSub.js +11 -11
- package/dist/ops/fusedSoftmax.js +1 -1
- package/dist/ops/gatherSub.js +1 -1
- package/dist/ops/gelu.js +2 -2
- package/dist/ops/grads/attentionMask.js +1 -1
- package/dist/ops/grads/fusedSoftmax.js +2 -2
- package/dist/ops/grads/gelu.js +2 -2
- package/dist/ops/grads/matMulGelu.js +1 -1
- package/dist/ops/grads/normRMS.js +1 -1
- package/dist/ops/grads/qkv.js +1 -1
- package/dist/ops/grads/rope.js +1 -1
- package/dist/ops/matMulGelu.js +1 -1
- package/dist/ops/matMulMul.js +1 -1
- package/dist/ops/mulDrop.js +1 -1
- package/dist/ops/normRMS.js +1 -1
- package/dist/ops/qkv.js +1 -1
- package/dist/ops/rope.js +4 -4
- package/dist/ops/scatterSub.js +1 -1
- package/dist/ops/webgl/adamAdjust.js +2 -2
- package/dist/ops/webgl/adamMoments.js +1 -1
- package/dist/ops/webgl/appendCache.js +1 -1
- package/dist/ops/webgl/attentionMask.js +1 -1
- package/dist/ops/webgl/fusedSoftmax.js +4 -4
- package/dist/ops/webgl/gatherSub.js +1 -1
- package/dist/ops/webgl/gelu.js +2 -2
- package/dist/ops/webgl/log.js +3 -3
- package/dist/ops/webgl/matMulGelu.js +10 -10
- package/dist/ops/webgl/matMulMul.js +1 -1
- package/dist/ops/webgl/mulDropout.js +1 -1
- package/dist/ops/webgl/normRMS.js +2 -2
- package/dist/ops/webgl/qkv.js +1 -1
- package/dist/ops/webgl/rope.js +1 -1
- package/dist/ops/webgl/scatterSub.js +1 -1
- package/dist/ops/webgpu/adamAdjust.js +3 -3
- package/dist/ops/webgpu/adamMoments.js +3 -3
- package/dist/ops/webgpu/appendCache.js +3 -3
- package/dist/ops/webgpu/attentionMask.js +3 -3
- package/dist/ops/webgpu/gatherSub.js +3 -3
- package/dist/ops/webgpu/gelu.js +3 -3
- package/dist/ops/webgpu/normRMS.js +2 -2
- package/dist/ops/webgpu/normRMSGrad.js +5 -5
- package/dist/ops/webgpu/qkv.js +3 -3
- package/dist/ops/webgpu/rope.js +3 -3
- package/dist/ops/webgpu/scatterSub.js +3 -3
- package/dist/ops/webgpu/utils/reductions.js +4 -4
- package/dist/{ops-BFGCx8Ri.js → ops-C_1K_-35.js} +103 -103
- package/dist/{random_width-sZORGo5k.js → random_width-D8Pwy_na.js} +136 -136
- package/dist/{range-CRuAh-gd.js → range-LVHrSLdi.js} +1 -1
- package/dist/{reciprocal-BvGAyKyu.js → reciprocal-CaR9e67G.js} +1 -1
- package/dist/{register_all_kernels-BwDSRN-f.js → register_all_kernels-DUshvVWP.js} +2026 -2049
- package/dist/{reshape-CdBq1WJ6.js → reshape-DEfQGSin.js} +1 -1
- package/dist/{scatter_nd_util-DUstGbU1.js → scatter_nd_util-CUPPNLaA.js} +1 -1
- package/dist/{selu_util-BJEXVvjX.js → selu_util-8vv5JxQV.js} +3 -3
- package/dist/{shared-B8ztnyEk.js → shared-CkNorDcU.js} +83 -83
- package/dist/{shared-wS99K7_n.js → shared-D1elLckx.js} +1 -1
- package/dist/{sin-BeA3tsEd.js → sin-D2CKKmyR.js} +1 -1
- package/dist/{slice-BiOsknYS.js → slice-BnyE-M_7.js} +1 -1
- package/dist/{softmax-Bv_6lyMX.js → softmax-DLoZWYBx.js} +1 -1
- package/dist/{split-B-dikLRw.js → split-By_n4TKP.js} +1 -1
- package/dist/{stack-B17UN2nn.js → stack-DkdFLq37.js} +1 -1
- package/dist/{sum-66ew2byf.js → sum-l_0SqM4h.js} +3 -3
- package/dist/{tensor-JwS7ZYY6.js → tensor-BAQdLqoU.js} +1 -1
- package/dist/{tensor2d-wxPAnDQy.js → tensor2d-BHy261cI.js} +1 -1
- package/dist/training/Adam.js +2 -2
- package/dist/training/AdamExt.js +1 -1
- package/dist/training/DatasetBuilder.js +2 -2
- package/dist/training/Evaluator.d.ts +2 -2
- package/dist/training/FullTrainer.d.ts +3 -3
- package/dist/training/FullTrainer.js +61 -69
- package/dist/training/Trainer.d.ts +15 -3
- package/dist/training/Trainer.js +39 -47
- package/dist/training/sparseCrossEntropy.js +9 -9
- package/dist/utilities/dummy.d.ts +4 -4
- package/dist/utilities/dummy.js +13 -13
- package/dist/utilities/multinomialCPU.js +2 -2
- package/dist/utilities/parameters.d.ts +1 -1
- package/dist/utilities/performance.js +1 -1
- package/dist/utilities/profile.js +1 -1
- package/dist/utilities/safetensors.js +2 -2
- package/dist/utilities/weights.js +2 -2
- package/dist/{variable-BuddVFLa.js → variable-C9hihzDB.js} +1 -1
- package/dist/{webgpu_program-PFzf1hAQ.js → webgpu_program-dFEVbDPL.js} +1 -1
- package/dist/{webgpu_util-D____QpY.js → webgpu_util-DLImlSc6.js} +27 -27
- package/dist/{zeros--BdLQ3oG.js → zeros-VZ72lWXM.js} +1 -1
- package/package.json +2 -3
- package/dist/NanoGPTModel.d.ts +0 -52
- package/dist/NanoGPTModel.js +0 -203
- package/dist/TiedEmbedding-BxOerUmB.js +0 -43
- package/dist/utilities/generate.d.ts +0 -3
- package/dist/utilities/generate.js +0 -22
- package/dist/utilities/save.d.ts +0 -9
- package/dist/utilities/save.js +0 -61
package/dist/NanoGPTModel.d.ts
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import { GPTConfig } from './config';
|
|
2
|
-
import { AttentionScores, KVCache } from './layers/CausalSelfAttention';
|
|
3
|
-
import { default as BaseLayer, ForwardAttributes } from './layers/BaseLayer';
|
|
4
|
-
import { Tensor } from '@tensorflow/tfjs-core';
|
|
5
|
-
export interface TrainingLogEntry {
|
|
6
|
-
loss: number;
|
|
7
|
-
valLoss?: number;
|
|
8
|
-
step: number;
|
|
9
|
-
time: number;
|
|
10
|
-
example?: string;
|
|
11
|
-
batchSize: number;
|
|
12
|
-
gradientNorm?: number;
|
|
13
|
-
learningRate?: number;
|
|
14
|
-
}
|
|
15
|
-
export interface GenerateOptions {
|
|
16
|
-
temperature?: number;
|
|
17
|
-
topK?: number;
|
|
18
|
-
topP?: number;
|
|
19
|
-
usePadding?: boolean;
|
|
20
|
-
attentionScores?: boolean;
|
|
21
|
-
includeProbabilities?: boolean;
|
|
22
|
-
}
|
|
23
|
-
export interface ModelForwardAttributes extends ForwardAttributes {
|
|
24
|
-
cache?: KVCache[];
|
|
25
|
-
attentionScores?: AttentionScores;
|
|
26
|
-
seed?: number;
|
|
27
|
-
}
|
|
28
|
-
export default class NanoGPT extends BaseLayer<ModelForwardAttributes> {
|
|
29
|
-
private wte;
|
|
30
|
-
private wpe?;
|
|
31
|
-
private drop;
|
|
32
|
-
private blocks;
|
|
33
|
-
private lnF;
|
|
34
|
-
private ropeCache?;
|
|
35
|
-
log: TrainingLogEntry[];
|
|
36
|
-
constructor(config?: Partial<GPTConfig>);
|
|
37
|
-
get checkpointing(): boolean;
|
|
38
|
-
set checkpointing(value: boolean);
|
|
39
|
-
private inputPhase;
|
|
40
|
-
setSkipMask(mask: boolean[]): void;
|
|
41
|
-
setTrainableMask(mask: boolean[]): void;
|
|
42
|
-
private validateInput;
|
|
43
|
-
private calculateLoss;
|
|
44
|
-
forward(attrs: ModelForwardAttributes, idx: Tensor, targets?: Tensor): Tensor[];
|
|
45
|
-
generate(idx: Tensor, cache?: KVCache[], options?: GenerateOptions): Promise<{
|
|
46
|
-
output: Tensor;
|
|
47
|
-
probabilities?: Tensor;
|
|
48
|
-
attention?: Tensor[];
|
|
49
|
-
}>;
|
|
50
|
-
getNumParams(): number;
|
|
51
|
-
dispose(): void;
|
|
52
|
-
}
|
package/dist/NanoGPTModel.js
DELETED
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
import { defaultConfig as M } from "./config.js";
|
|
2
|
-
import v from "./layers/TransformerBlock.js";
|
|
3
|
-
import { T as x, r as T } from "./TiedEmbedding-BxOerUmB.js";
|
|
4
|
-
import F from "./layers/RoPECache.js";
|
|
5
|
-
import O from "./layers/RMSNorm.js";
|
|
6
|
-
import { estimateParameterCount as _ } from "./utilities/parameters.js";
|
|
7
|
-
import { createSoftmaxCrossEntropyWithGrad as D } from "./training/sparseCrossEntropy.js";
|
|
8
|
-
import K from "./layers/BaseLayer.js";
|
|
9
|
-
import { E as N, D as R, p as q } from "./random_width-sZORGo5k.js";
|
|
10
|
-
import { B as A, C as B, E as G, ad as V, t as C, o as j, b as z, w as U } from "./index-BoWRt-10.js";
|
|
11
|
-
import W from "./utilities/multinomialCPU.js";
|
|
12
|
-
import { m as H, t as J } from "./register_all_kernels-BwDSRN-f.js";
|
|
13
|
-
import { r as P } from "./reshape-CdBq1WJ6.js";
|
|
14
|
-
import { r as Q } from "./range-CRuAh-gd.js";
|
|
15
|
-
import { s as $ } from "./softmax-Bv_6lyMX.js";
|
|
16
|
-
import { g as X } from "./gather-CMMy2KEG.js";
|
|
17
|
-
/**
|
|
18
|
-
* @license
|
|
19
|
-
* Copyright 2020 Google LLC. All Rights Reserved.
|
|
20
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
21
|
-
* you may not use this file except in compliance with the License.
|
|
22
|
-
* You may obtain a copy of the License at
|
|
23
|
-
*
|
|
24
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
25
|
-
*
|
|
26
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
27
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
28
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
29
|
-
* See the License for the specific language governing permissions and
|
|
30
|
-
* limitations under the License.
|
|
31
|
-
* =============================================================================
|
|
32
|
-
*/
|
|
33
|
-
function Y(u, t, o, e = !1) {
|
|
34
|
-
const l = B(u, "logits", "multinomial"), s = l.size, r = l.rank;
|
|
35
|
-
if (s < 2)
|
|
36
|
-
throw new Error(`Error in multinomial: you need at least 2 outcomes, but got ${s}.`);
|
|
37
|
-
if (r > 2)
|
|
38
|
-
throw new Error(`Rank of probabilities must be 1 or 2, but is ${r}`);
|
|
39
|
-
o = o || Math.random();
|
|
40
|
-
const n = { logits: r === 1 ? P(l, [1, -1]) : l }, a = { numSamples: t, seed: o, normalized: e }, i = G.runKernel(V, n, a);
|
|
41
|
-
return r === 1 ? P(i, [i.size]) : i;
|
|
42
|
-
}
|
|
43
|
-
const I = /* @__PURE__ */ A({ multinomial_: Y });
|
|
44
|
-
/**
|
|
45
|
-
* @license
|
|
46
|
-
* Copyright 2018 Google LLC
|
|
47
|
-
*
|
|
48
|
-
* Use of this source code is governed by an MIT-style
|
|
49
|
-
* license that can be found in the LICENSE file or at
|
|
50
|
-
* https://opensource.org/licenses/MIT.
|
|
51
|
-
* =============================================================================
|
|
52
|
-
*/
|
|
53
|
-
function Z(u) {
|
|
54
|
-
return new R(u);
|
|
55
|
-
}
|
|
56
|
-
function tt(u) {
|
|
57
|
-
return new N(u);
|
|
58
|
-
}
|
|
59
|
-
class bt extends K {
|
|
60
|
-
wte;
|
|
61
|
-
// Token embeddings
|
|
62
|
-
wpe;
|
|
63
|
-
// Position embeddings
|
|
64
|
-
drop;
|
|
65
|
-
// Dropout
|
|
66
|
-
blocks;
|
|
67
|
-
lnF;
|
|
68
|
-
// Final layer norm
|
|
69
|
-
ropeCache;
|
|
70
|
-
log = [];
|
|
71
|
-
// Training log
|
|
72
|
-
constructor(t = {}) {
|
|
73
|
-
super({ gpt: { ...M, ...t }, layerConfig: {} }), this.wte = new x(this.config, "token_embedding", this), this.config.gpt.useRope === !1 ? this.wpe = tt({
|
|
74
|
-
inputDim: this.config.gpt.blockSize,
|
|
75
|
-
outputDim: this.config.gpt.nEmbed,
|
|
76
|
-
name: "positional_embedding",
|
|
77
|
-
embeddingsInitializer: T({ mean: 0, stddev: 0.02 })
|
|
78
|
-
}) : (this.ropeCache = new F(this.config.gpt), this.config.layerConfig.ropeCache = this.ropeCache), this.drop = Z({ rate: this.config.gpt.dropout }), this.blocks = [];
|
|
79
|
-
for (let o = 0; o < this.config.gpt.nLayer; o++)
|
|
80
|
-
this.blocks.push(new v(o, this.config, this));
|
|
81
|
-
this.lnF = new O(this.config, "final_rms_norm", this);
|
|
82
|
-
}
|
|
83
|
-
get checkpointing() {
|
|
84
|
-
return this.config.layerConfig.checkpointing === !0;
|
|
85
|
-
}
|
|
86
|
-
set checkpointing(t) {
|
|
87
|
-
this.config.layerConfig.checkpointing = t;
|
|
88
|
-
}
|
|
89
|
-
inputPhase(t, o, e = !1) {
|
|
90
|
-
return C(() => {
|
|
91
|
-
const l = this.wte.embed(t);
|
|
92
|
-
if (this.config.gpt.useRope === !1) {
|
|
93
|
-
const [, s] = t.shape, r = this.config.gpt.blockSize, g = Q(0, s, 1, "int32"), n = H(j(g, z(o, "int32")), z(r, "int32")), a = this.wpe.apply(n), i = l.add(a);
|
|
94
|
-
return this.drop.apply(i, { training: e });
|
|
95
|
-
} else
|
|
96
|
-
return this.drop.apply(l, { training: e });
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
setSkipMask(t) {
|
|
100
|
-
if (t.length !== this.blocks.length)
|
|
101
|
-
throw new Error(`Mask length ${t.length} does not match number of blocks ${this.blocks.length}`);
|
|
102
|
-
for (let o = 0; o < this.blocks.length; o++)
|
|
103
|
-
this.blocks[o].skipped = t[o];
|
|
104
|
-
}
|
|
105
|
-
setTrainableMask(t) {
|
|
106
|
-
if (t.length !== this.blocks.length)
|
|
107
|
-
throw new Error(`Mask length ${t.length} does not match number of blocks ${this.blocks.length}`);
|
|
108
|
-
for (let o = 0; o < this.blocks.length; o++)
|
|
109
|
-
this.blocks[o].trainable = t[o];
|
|
110
|
-
}
|
|
111
|
-
validateInput(t) {
|
|
112
|
-
if (t.shape.length !== 2)
|
|
113
|
-
throw new Error(`Invalid input shape: expected [batch_size, sequence_length], got ${t.shape}`);
|
|
114
|
-
if (t.shape[1] > this.config.gpt.blockSize)
|
|
115
|
-
throw new Error(`Input sequence length ${t.shape[1]} isn't block size ${this.config.gpt.blockSize}`);
|
|
116
|
-
if (t.dtype !== "int32")
|
|
117
|
-
throw new Error(`Input tensor must be of type int32, got ${t.dtype}`);
|
|
118
|
-
}
|
|
119
|
-
calculateLoss(t, o) {
|
|
120
|
-
try {
|
|
121
|
-
return D()(t, o).mean();
|
|
122
|
-
} catch (e) {
|
|
123
|
-
throw console.error("Error computing loss:", e), new Error(`Loss computation failed: ${e}`);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
forward(t, o, e) {
|
|
127
|
-
return this.validateInput(o), C(() => {
|
|
128
|
-
this.startMemory();
|
|
129
|
-
const l = t.cache?.[0]?.length ?? 0;
|
|
130
|
-
let s = this.inputPhase(o, l, t.training);
|
|
131
|
-
if (t.cache && t.cache.length !== this.blocks.length)
|
|
132
|
-
throw console.error("Cache", t.cache), new Error(
|
|
133
|
-
`Cache length ${t.cache.length} does not match number of blocks ${this.blocks.length}`
|
|
134
|
-
);
|
|
135
|
-
for (let n = 0; n < this.blocks.length; n++) {
|
|
136
|
-
const a = this.blocks[n], i = Math.random() * 1e9, d = {
|
|
137
|
-
training: t.training,
|
|
138
|
-
seed: i,
|
|
139
|
-
attentionScores: t.attentionScores,
|
|
140
|
-
pastKV: t.cache ? t.cache[n] : void 0
|
|
141
|
-
}, S = this.config.layerConfig.checkpointing && t.training ? a.callCheckpoint(d, s) : a.call(d, s);
|
|
142
|
-
s.dispose(), s = S;
|
|
143
|
-
}
|
|
144
|
-
s = this.lnF.call(t, s);
|
|
145
|
-
const r = this.wte.project(s);
|
|
146
|
-
s.dispose();
|
|
147
|
-
let g;
|
|
148
|
-
return e && (g = this.calculateLoss(r, e)), this.endMemory("Forward"), g ? [r, g] : [r];
|
|
149
|
-
});
|
|
150
|
-
}
|
|
151
|
-
async generate(t, o, e) {
|
|
152
|
-
const l = e?.temperature ?? 1, s = e?.topK, r = e?.topP, g = e?.usePadding ?? !1, n = {
|
|
153
|
-
training: !1,
|
|
154
|
-
attentionScores: e?.attentionScores ? {
|
|
155
|
-
attentionOut: []
|
|
156
|
-
} : void 0,
|
|
157
|
-
cache: o
|
|
158
|
-
}, a = C(() => {
|
|
159
|
-
const p = t, m = p.shape[1], h = m <= this.config.gpt.blockSize ? p : p.slice(
|
|
160
|
-
[0, m - this.config.gpt.blockSize],
|
|
161
|
-
[p.shape[0], this.config.gpt.blockSize]
|
|
162
|
-
), b = g ? this.config.gpt.blockSize - h.shape[1] : 0, w = b > 0 ? q(h, [
|
|
163
|
-
[0, 0],
|
|
164
|
-
[0, b]
|
|
165
|
-
]) : h, [f] = this.forward(n, w), E = f.shape[1] - 1 - b, c = f.slice([0, E, 0], [f.shape[0], 1, f.shape[2]]);
|
|
166
|
-
return n.attentionScores?.attentionOut && n.attentionScores.attentionOut.forEach((y, L) => {
|
|
167
|
-
y.shape[1] !== 1 && (n.attentionScores.attentionOut[L] = U(
|
|
168
|
-
y.slice([0, E, 0], [y.shape[0], 1, y.shape[2]])
|
|
169
|
-
), y.dispose());
|
|
170
|
-
}), f.dispose(), c.div(l).squeeze([1]);
|
|
171
|
-
});
|
|
172
|
-
let i;
|
|
173
|
-
if (r) {
|
|
174
|
-
const p = $(a), m = await p.array();
|
|
175
|
-
p.dispose();
|
|
176
|
-
const h = m[0].map((c, k) => ({ prob: c, index: k })).sort((c, k) => k.prob - c.prob);
|
|
177
|
-
let b = 0;
|
|
178
|
-
const w = new Array(h.length).fill(0);
|
|
179
|
-
for (const c of h)
|
|
180
|
-
if (b += c.prob, w[c.index] = c.prob, b >= r)
|
|
181
|
-
break;
|
|
182
|
-
const f = w.reduce((c, k) => c + k, 0), E = w.map((c) => c / f);
|
|
183
|
-
i = W(E);
|
|
184
|
-
} else if (s) {
|
|
185
|
-
const { values: p, indices: m } = J(a, s), h = I(p, 1);
|
|
186
|
-
i = X(m, h, 1), p.dispose(), m.dispose(), h.dispose();
|
|
187
|
-
} else
|
|
188
|
-
i = I(a, 1);
|
|
189
|
-
let d;
|
|
190
|
-
e?.includeProbabilities && (d = $(a));
|
|
191
|
-
const S = i.reshape([1, 1]);
|
|
192
|
-
return i.dispose(), i = S, a.dispose(), { output: i, probabilities: d, attention: n.attentionScores?.attentionOut };
|
|
193
|
-
}
|
|
194
|
-
getNumParams() {
|
|
195
|
-
return _(this.config.gpt);
|
|
196
|
-
}
|
|
197
|
-
dispose() {
|
|
198
|
-
this.wte.dispose(), this.wpe && this.wpe.dispose(), this.drop.dispose(), this.blocks.forEach((t) => t.dispose()), this.lnF.dispose();
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
export {
|
|
202
|
-
bt as default
|
|
203
|
-
};
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import { R as a, d as s } from "./random_width-sZORGo5k.js";
|
|
2
|
-
import "./index-BoWRt-10.js";
|
|
3
|
-
import o from "./layers/BaseLayer.js";
|
|
4
|
-
import { v as m } from "./variable-BuddVFLa.js";
|
|
5
|
-
import { g as d } from "./gather-CMMy2KEG.js";
|
|
6
|
-
/**
|
|
7
|
-
* @license
|
|
8
|
-
* Copyright 2018 Google LLC
|
|
9
|
-
*
|
|
10
|
-
* Use of this source code is governed by an MIT-style
|
|
11
|
-
* license that can be found in the LICENSE file or at
|
|
12
|
-
* https://opensource.org/licenses/MIT.
|
|
13
|
-
* =============================================================================
|
|
14
|
-
*/
|
|
15
|
-
function n(i) {
|
|
16
|
-
return new a(i);
|
|
17
|
-
}
|
|
18
|
-
class S extends o {
|
|
19
|
-
vocabSize;
|
|
20
|
-
embedDim;
|
|
21
|
-
initializer;
|
|
22
|
-
WEIGHTS;
|
|
23
|
-
constructor(t, e, r) {
|
|
24
|
-
super(t, r), this.WEIGHTS = e, this.vocabSize = t.gpt.vocabSize, this.embedDim = t.gpt.nEmbed, this.initializer = n({
|
|
25
|
-
mean: 0,
|
|
26
|
-
stddev: 0.02
|
|
27
|
-
}), this.addVariable(this.WEIGHTS, m(this.initializer.apply([this.vocabSize, this.embedDim]), !0));
|
|
28
|
-
}
|
|
29
|
-
embed(t) {
|
|
30
|
-
return d(this.getVariable(this.WEIGHTS), t, 0);
|
|
31
|
-
}
|
|
32
|
-
project(t) {
|
|
33
|
-
return s(t, this.getVariable(this.WEIGHTS).transpose());
|
|
34
|
-
}
|
|
35
|
-
// Dummy, should not be used.
|
|
36
|
-
forward(t, e) {
|
|
37
|
-
return this.project(e);
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
export {
|
|
41
|
-
S as T,
|
|
42
|
-
n as r
|
|
43
|
-
};
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import "../index-BoWRt-10.js";
|
|
2
|
-
import { t as m } from "../tensor2d-wxPAnDQy.js";
|
|
3
|
-
import { c as u } from "../concat-CsxrgovM.js";
|
|
4
|
-
async function v(o, r, a, c, f) {
|
|
5
|
-
if (c <= 0)
|
|
6
|
-
throw new Error("Length must be a positive integer");
|
|
7
|
-
if (a.length === 0)
|
|
8
|
-
throw new Error("Prompt cannot be an empty string");
|
|
9
|
-
const p = await o.tokenise([a], !0), s = r.config.gpt.useRope ? new Array(r.config.gpt.nLayer).fill(void 0) : void 0;
|
|
10
|
-
let e = m(p, [1, p[0].length], "int32"), t = e;
|
|
11
|
-
for (let g = 0; g < c; g++) {
|
|
12
|
-
const { output: n } = await r.generate(e, s, f), T = e, l = t;
|
|
13
|
-
t = u([t, n], 1), e = s ? n : u([e, n], 1), T.dispose(), l.dispose(), s || n.dispose();
|
|
14
|
-
}
|
|
15
|
-
const w = await t.array();
|
|
16
|
-
t.dispose();
|
|
17
|
-
const i = w[0], d = i.indexOf(o.eosToken);
|
|
18
|
-
return d !== -1 && i.splice(d), await o.decode(i);
|
|
19
|
-
}
|
|
20
|
-
export {
|
|
21
|
-
v as generateText
|
|
22
|
-
};
|
package/dist/utilities/save.d.ts
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { default as NanoGPT } from '../NanoGPTModel';
|
|
2
|
-
import { ITokeniser } from '../tokeniser/type';
|
|
3
|
-
export interface SaveOptions {
|
|
4
|
-
includeLog?: boolean;
|
|
5
|
-
name?: string;
|
|
6
|
-
metadata?: Record<string, unknown>;
|
|
7
|
-
files?: Record<string, unknown>;
|
|
8
|
-
}
|
|
9
|
-
export declare function saveModel(model: NanoGPT, tokeniser: ITokeniser, options?: SaveOptions): Promise<Blob>;
|
package/dist/utilities/save.js
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import { j as p } from "../jszip.min-CjP2V1VV.js";
|
|
2
|
-
import b from "../tokeniser/CharTokeniser.js";
|
|
3
|
-
import { save_safetensors as l } from "./safetensors.js";
|
|
4
|
-
import { VERSION as y } from "../loader/load.js";
|
|
5
|
-
async function N(e, a, n) {
|
|
6
|
-
const f = n?.includeLog ?? !0, s = /* @__PURE__ */ new Map();
|
|
7
|
-
e.saveWeights(s);
|
|
8
|
-
const i = new p(), r = {};
|
|
9
|
-
s.forEach((t, o) => {
|
|
10
|
-
t.length === 1 && (r[o] = t[0]);
|
|
11
|
-
});
|
|
12
|
-
const g = await l(r);
|
|
13
|
-
i.file("model.safetensors", g, { binary: !0 });
|
|
14
|
-
const c = {
|
|
15
|
-
model_type: "GenAI_NanoGPT_1",
|
|
16
|
-
vocab_size: a.getVocab().length,
|
|
17
|
-
hidden_size: e.config.gpt.nEmbed,
|
|
18
|
-
num_hidden_layers: e.config.gpt.nLayer,
|
|
19
|
-
num_attention_heads: e.config.gpt.nHead,
|
|
20
|
-
block_size: e.config.gpt.blockSize,
|
|
21
|
-
dropout: e.config.gpt.dropout,
|
|
22
|
-
biasInLinear: e.config.gpt.biasInLinear,
|
|
23
|
-
biasInLayerNorm: e.config.gpt.biasInLayerNorm,
|
|
24
|
-
mlpFactor: e.config.gpt.mlpFactor,
|
|
25
|
-
useRope: e.config.gpt.useRope
|
|
26
|
-
};
|
|
27
|
-
if (i.file("config.json", JSON.stringify(c, void 0, 4), {
|
|
28
|
-
binary: !1
|
|
29
|
-
}), i.file(
|
|
30
|
-
"meta.json",
|
|
31
|
-
JSON.stringify(
|
|
32
|
-
{
|
|
33
|
-
version: y,
|
|
34
|
-
application: "@genai-fi/nanogpt",
|
|
35
|
-
meta: n?.metadata,
|
|
36
|
-
name: n?.name
|
|
37
|
-
},
|
|
38
|
-
void 0,
|
|
39
|
-
4
|
|
40
|
-
),
|
|
41
|
-
{
|
|
42
|
-
binary: !1
|
|
43
|
-
}
|
|
44
|
-
), i.file(
|
|
45
|
-
"tokeniser.json",
|
|
46
|
-
JSON.stringify({
|
|
47
|
-
type: a instanceof b ? "char" : "bpe",
|
|
48
|
-
vocab: a.getVocab(),
|
|
49
|
-
merges: await a.getMerges()
|
|
50
|
-
}),
|
|
51
|
-
{
|
|
52
|
-
binary: !1
|
|
53
|
-
}
|
|
54
|
-
), f && i.file("log.json", JSON.stringify(e.log), { binary: !1 }), n?.files)
|
|
55
|
-
for (const [t, o] of Object.entries(n.files))
|
|
56
|
-
i.file(t, JSON.stringify(o), { binary: !1 });
|
|
57
|
-
return i.generateAsync({ type: "blob" });
|
|
58
|
-
}
|
|
59
|
-
export {
|
|
60
|
-
N as saveModel
|
|
61
|
-
};
|