@openfluke/welvet 0.2.0 → 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +200 -200
- package/README.md +56 -346
- package/dist/benchmark_tiling.html +244 -0
- package/dist/benchmark_training.html +249 -0
- package/dist/benchmark_training_comparison.html +230 -0
- package/dist/cabi_verify.html +360 -0
- package/dist/dna_evo_benchmark.html +420 -0
- package/dist/index.d.ts +105 -33
- package/dist/index.js +142 -56
- package/dist/loader.browser.d.ts +3 -3
- package/dist/loader.browser.js +8 -9
- package/dist/loader.d.ts +2 -2
- package/dist/loader.js +17 -29
- package/dist/main.wasm +0 -0
- package/dist/src/index.d.ts +135 -0
- package/dist/src/index.js +181 -0
- package/dist/src/loader.browser.d.ts +5 -0
- package/dist/src/loader.browser.js +24 -0
- package/dist/src/loader.d.ts +5 -0
- package/dist/src/loader.js +26 -0
- package/dist/src/types.d.ts +299 -0
- package/dist/src/types.js +65 -0
- package/dist/tests/benchmark.d.ts +5 -0
- package/dist/tests/benchmark.js +139 -0
- package/dist/tests/benchmark.ts +148 -0
- package/dist/tests/cabi_verify.d.ts +5 -0
- package/dist/tests/cabi_verify.js +181 -0
- package/dist/tests/cabi_verify.ts +192 -0
- package/dist/types.d.ts +269 -170
- package/dist/types.js +63 -2
- package/dist/wasm_exec.js +575 -575
- package/package.json +58 -89
- package/dist/index.browser.d.ts +0 -32
- package/dist/index.browser.js +0 -39
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* welvet WASM Browser Loader
|
|
3
|
+
* Loads the Go runtime and WASM module in a browser environment.
|
|
4
|
+
*/
|
|
5
|
+
export async function loadLoomWASMBrowser(wasmUrl) {
|
|
6
|
+
// Inject wasm_exec.js if the Go runtime is not yet available
|
|
7
|
+
if (typeof globalThis["Go"] === "undefined") {
|
|
8
|
+
const script = document.createElement("script");
|
|
9
|
+
script.src = "/dist/wasm_exec.js";
|
|
10
|
+
await new Promise((resolve, reject) => {
|
|
11
|
+
script.onload = () => resolve();
|
|
12
|
+
script.onerror = () => reject(new Error("Failed to load wasm_exec.js"));
|
|
13
|
+
document.head.appendChild(script);
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
const response = await fetch(wasmUrl ?? "/dist/main.wasm");
|
|
17
|
+
const wasmBuffer = await response.arrayBuffer();
|
|
18
|
+
// @ts-ignore — Go is injected by wasm_exec.js
|
|
19
|
+
const go = new Go();
|
|
20
|
+
const { instance } = await WebAssembly.instantiate(wasmBuffer, go.importObject);
|
|
21
|
+
go.run(instance);
|
|
22
|
+
// Allow Go goroutines to settle
|
|
23
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
24
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* welvet WASM Loader — Node.js
|
|
3
|
+
* Loads and initializes the welvet WebAssembly module.
|
|
4
|
+
*/
|
|
5
|
+
export async function loadLoomWASM() {
|
|
6
|
+
const fs = await import("fs");
|
|
7
|
+
const url = await import("url");
|
|
8
|
+
const path = await import("path");
|
|
9
|
+
const __filename = url.fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = path.dirname(__filename);
|
|
11
|
+
// Resolve root: dist/ in production, or one level up from src/
|
|
12
|
+
const root = __dirname.endsWith("dist")
|
|
13
|
+
? __dirname
|
|
14
|
+
: path.join(__dirname, "..", "dist");
|
|
15
|
+
// Bootstrap Go runtime
|
|
16
|
+
const wasmExecCode = fs.readFileSync(path.join(root, "wasm_exec.js"), "utf-8");
|
|
17
|
+
eval(wasmExecCode);
|
|
18
|
+
// Load and instantiate the WASM module
|
|
19
|
+
const wasmBuffer = fs.readFileSync(path.join(root, "main.wasm"));
|
|
20
|
+
// @ts-ignore — Go is injected by wasm_exec.js
|
|
21
|
+
const go = new Go();
|
|
22
|
+
const { instance } = await WebAssembly.instantiate(wasmBuffer, go.importObject);
|
|
23
|
+
go.run(instance);
|
|
24
|
+
// Allow Go goroutines to settle
|
|
25
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
26
|
+
}
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* welvet — Type Definitions for the M-POLY-VTD AI Engine
|
|
3
|
+
*
|
|
4
|
+
* Wraps the Loom v0.73.0 WASM module which supports 21 numerical types,
|
|
5
|
+
* systolic grid propagation, target propagation, and WebGPU acceleration.
|
|
6
|
+
*/
|
|
7
|
+
export declare const DType: {
|
|
8
|
+
readonly FLOAT64: 0;
|
|
9
|
+
readonly FLOAT32: 1;
|
|
10
|
+
readonly FLOAT16: 2;
|
|
11
|
+
readonly BFLOAT16: 3;
|
|
12
|
+
readonly FP8_E4M3: 4;
|
|
13
|
+
readonly FP8_E5M2: 5;
|
|
14
|
+
readonly INT64: 6;
|
|
15
|
+
readonly INT32: 7;
|
|
16
|
+
readonly INT16: 8;
|
|
17
|
+
readonly INT8: 9;
|
|
18
|
+
readonly UINT64: 10;
|
|
19
|
+
readonly UINT32: 11;
|
|
20
|
+
readonly UINT16: 12;
|
|
21
|
+
readonly UINT8: 13;
|
|
22
|
+
readonly INT4: 14;
|
|
23
|
+
readonly UINT4: 15;
|
|
24
|
+
readonly FP4: 16;
|
|
25
|
+
readonly INT2: 17;
|
|
26
|
+
readonly UINT2: 18;
|
|
27
|
+
readonly TERNARY: 19;
|
|
28
|
+
readonly BINARY: 20;
|
|
29
|
+
};
|
|
30
|
+
export type DTypeValue = typeof DType[keyof typeof DType];
|
|
31
|
+
export declare const LayerType: {
|
|
32
|
+
readonly DENSE: 0;
|
|
33
|
+
readonly RMS_NORM: 1;
|
|
34
|
+
readonly LAYER_NORM: 2;
|
|
35
|
+
readonly MHA: 3;
|
|
36
|
+
readonly SOFTMAX: 4;
|
|
37
|
+
readonly SWIGLU: 5;
|
|
38
|
+
readonly EMBEDDING: 6;
|
|
39
|
+
readonly RESIDUAL: 7;
|
|
40
|
+
readonly KMEANS: 8;
|
|
41
|
+
readonly RNN: 9;
|
|
42
|
+
readonly LSTM: 10;
|
|
43
|
+
readonly CNN1: 11;
|
|
44
|
+
readonly CNN2: 12;
|
|
45
|
+
readonly CNN3: 13;
|
|
46
|
+
readonly CONV_TRANSPOSED_1D: 14;
|
|
47
|
+
readonly CONV_TRANSPOSED_2D: 15;
|
|
48
|
+
readonly CONV_TRANSPOSED_3D: 16;
|
|
49
|
+
};
|
|
50
|
+
export type LayerTypeValue = typeof LayerType[keyof typeof LayerType];
|
|
51
|
+
export declare const Activation: {
|
|
52
|
+
readonly RELU: 0;
|
|
53
|
+
readonly SILU: 1;
|
|
54
|
+
readonly GELU: 2;
|
|
55
|
+
readonly TANH: 3;
|
|
56
|
+
readonly SIGMOID: 4;
|
|
57
|
+
readonly LINEAR: 5;
|
|
58
|
+
};
|
|
59
|
+
export type ActivationValue = typeof Activation[keyof typeof Activation];
|
|
60
|
+
export interface LayerSpec {
|
|
61
|
+
/** Layer type (use LayerType constants) */
|
|
62
|
+
type: string | number;
|
|
63
|
+
/** Numerical precision (use DType constants, default: FLOAT32) */
|
|
64
|
+
dtype?: number;
|
|
65
|
+
/** Activation function (use Activation constants) */
|
|
66
|
+
activation?: string | number;
|
|
67
|
+
input_height?: number;
|
|
68
|
+
input_width?: number;
|
|
69
|
+
input_depth?: number;
|
|
70
|
+
output_height?: number;
|
|
71
|
+
output_width?: number;
|
|
72
|
+
output_depth?: number;
|
|
73
|
+
input_channels?: number;
|
|
74
|
+
filters?: number;
|
|
75
|
+
kernel_size?: number;
|
|
76
|
+
stride?: number;
|
|
77
|
+
padding?: number;
|
|
78
|
+
num_heads?: number;
|
|
79
|
+
num_kv_heads?: number;
|
|
80
|
+
d_model?: number;
|
|
81
|
+
seq_length?: number;
|
|
82
|
+
vocab_size?: number;
|
|
83
|
+
embedding_dim?: number;
|
|
84
|
+
z?: number;
|
|
85
|
+
y?: number;
|
|
86
|
+
x?: number;
|
|
87
|
+
l?: number;
|
|
88
|
+
tile_size?: number;
|
|
89
|
+
}
|
|
90
|
+
export interface NetworkConfig {
|
|
91
|
+
/** Grid depth (number of z-planes). Default: 1 */
|
|
92
|
+
depth?: number;
|
|
93
|
+
/** Grid rows (y-dimension). Default: 1 */
|
|
94
|
+
rows?: number;
|
|
95
|
+
/** Grid columns (x-dimension). Default: 1 */
|
|
96
|
+
cols?: number;
|
|
97
|
+
/** Layers per cell. Default: number of layers */
|
|
98
|
+
layers_per_cell?: number;
|
|
99
|
+
/** Layer definitions (flat list, laid out z→y→x→l) */
|
|
100
|
+
layers: LayerSpec[];
|
|
101
|
+
}
|
|
102
|
+
export interface TrainingBatch {
|
|
103
|
+
input: number[] | Float32Array;
|
|
104
|
+
target: number[] | Float32Array;
|
|
105
|
+
/** Optional batch shape for input, e.g. [batchSize, features]. Defaults to [1, length]. */
|
|
106
|
+
inputShape?: number[];
|
|
107
|
+
/** Optional batch shape for target, e.g. [batchSize, outputs]. Defaults to [1, length]. */
|
|
108
|
+
targetShape?: number[];
|
|
109
|
+
}
|
|
110
|
+
export interface TrainingResult {
|
|
111
|
+
final_loss: number;
|
|
112
|
+
duration_ms: number;
|
|
113
|
+
epochs_completed: number;
|
|
114
|
+
loss_history?: number[];
|
|
115
|
+
}
|
|
116
|
+
export interface SystolicState {
|
|
117
|
+
/**
|
|
118
|
+
* Inject input into the first layer of the grid.
|
|
119
|
+
* @param data Float32Array or number[] of input values
|
|
120
|
+
*/
|
|
121
|
+
setInput(data: Float32Array | number[]): void;
|
|
122
|
+
/**
|
|
123
|
+
* Advance the systolic grid by one clock cycle.
|
|
124
|
+
* @param captureHistory Whether to store history for backpropagation
|
|
125
|
+
* @returns Duration of the step in milliseconds
|
|
126
|
+
*/
|
|
127
|
+
step(captureHistory?: boolean): number;
|
|
128
|
+
/**
|
|
129
|
+
* Read the output of a layer (default: last active layer).
|
|
130
|
+
* @param layerIdx Optional layer index (0-based)
|
|
131
|
+
*/
|
|
132
|
+
getOutput(layerIdx?: number): Float32Array;
|
|
133
|
+
/**
|
|
134
|
+
* Backpropagate gradients through the stored history.
|
|
135
|
+
* @param gradients Output gradient (Float32Array or number[])
|
|
136
|
+
* @returns Input gradient as Float32Array
|
|
137
|
+
*/
|
|
138
|
+
backward(gradients: Float32Array | number[]): Float32Array;
|
|
139
|
+
/**
|
|
140
|
+
* Apply target propagation (gradient-free alternative to backward).
|
|
141
|
+
* @param target Global target tensor
|
|
142
|
+
* @param lr Learning rate
|
|
143
|
+
*/
|
|
144
|
+
applyTargetProp(target: Float32Array | number[], lr: number): void;
|
|
145
|
+
/** Total number of systolic steps executed. */
|
|
146
|
+
stepCount(): number;
|
|
147
|
+
/** Release resources (no-op in WASM, included for API parity). */
|
|
148
|
+
free(): void;
|
|
149
|
+
}
|
|
150
|
+
export interface TargetPropState {
|
|
151
|
+
/**
|
|
152
|
+
* Forward pass through all layers, storing local targets.
|
|
153
|
+
* @param input Input data
|
|
154
|
+
* @returns Output as Float32Array
|
|
155
|
+
*/
|
|
156
|
+
forward(input: Float32Array | number[]): Float32Array;
|
|
157
|
+
/**
|
|
158
|
+
* Backward pass using target propagation (gap-based, no chain rule).
|
|
159
|
+
* @param target Desired output
|
|
160
|
+
*/
|
|
161
|
+
backward(target: Float32Array | number[]): void;
|
|
162
|
+
/**
|
|
163
|
+
* Backward pass using the chain rule (standard backprop via TP state).
|
|
164
|
+
* @param target Desired output
|
|
165
|
+
*/
|
|
166
|
+
backwardChainRule(target: Float32Array | number[]): void;
|
|
167
|
+
/**
|
|
168
|
+
* Apply accumulated gap gradients to all layer weights.
|
|
169
|
+
* @param lr Learning rate
|
|
170
|
+
*/
|
|
171
|
+
applyGaps(lr?: number): void;
|
|
172
|
+
/** Release resources (no-op in WASM). */
|
|
173
|
+
free(): void;
|
|
174
|
+
}
|
|
175
|
+
export interface NEATPopulation {
|
|
176
|
+
/** Internal handle ID. */
|
|
177
|
+
_id: number;
|
|
178
|
+
/** Number of networks in the population. */
|
|
179
|
+
size: number;
|
|
180
|
+
/** Get a network wrapper by index. */
|
|
181
|
+
getNetwork(index: number): Network;
|
|
182
|
+
/** Run one generation given pre-computed fitnesses (length must equal size). */
|
|
183
|
+
evolveWithFitnesses(fitnesses: number[]): void;
|
|
184
|
+
/** Return the best-performing network wrapper. */
|
|
185
|
+
best(): Network;
|
|
186
|
+
/** Return the best fitness score. */
|
|
187
|
+
bestFitness(): number;
|
|
188
|
+
/** Human-readable generation summary. */
|
|
189
|
+
summary(generation: number): string;
|
|
190
|
+
/** Release resources. */
|
|
191
|
+
free(): void;
|
|
192
|
+
}
|
|
193
|
+
export interface Network {
|
|
194
|
+
/**
|
|
195
|
+
* Full sequential forward pass through the network.
|
|
196
|
+
* @param input Float32Array or number[] of inputs
|
|
197
|
+
* @returns Output as Float32Array
|
|
198
|
+
*/
|
|
199
|
+
sequentialForward(input: Float32Array | number[]): Float32Array;
|
|
200
|
+
/**
|
|
201
|
+
* Returns a JSON string with network shape info.
|
|
202
|
+
* {depth, rows, cols, layers_per_cell, total_layers, use_gpu, default_dtype}
|
|
203
|
+
*/
|
|
204
|
+
getInfo(): string;
|
|
205
|
+
/**
|
|
206
|
+
* Extract the network's DNA fingerprint as a JSON string.
|
|
207
|
+
* Use compareLoomDNA() to compare two fingerprints.
|
|
208
|
+
*/
|
|
209
|
+
extractDNA(): string;
|
|
210
|
+
/**
|
|
211
|
+
* Extract the network's full blueprint as a JSON string.
|
|
212
|
+
* @param modelID Optional model identifier
|
|
213
|
+
*/
|
|
214
|
+
extractBlueprint(modelID?: string): string;
|
|
215
|
+
/** Total number of layers in the network. */
|
|
216
|
+
getLayerCount(): number;
|
|
217
|
+
/**
|
|
218
|
+
* Get the specification of a single layer.
|
|
219
|
+
* @param layerIdx 0-based layer index
|
|
220
|
+
* @returns JSON string with layer spec
|
|
221
|
+
*/
|
|
222
|
+
getLayerSpec(layerIdx: number): string;
|
|
223
|
+
/**
|
|
224
|
+
* Switch a layer's numerical type at runtime (zero-cost when cached).
|
|
225
|
+
* @param layerIdx 0-based layer index
|
|
226
|
+
* @param dtype DType constant
|
|
227
|
+
* @returns JSON status/error
|
|
228
|
+
*/
|
|
229
|
+
morphLayer(layerIdx: number, dtype: DTypeValue): string;
|
|
230
|
+
/**
|
|
231
|
+
* Initialize WebGPU for this network.
|
|
232
|
+
* @returns Promise that resolves with status JSON
|
|
233
|
+
*/
|
|
234
|
+
initGPU(): Promise<string>;
|
|
235
|
+
/**
|
|
236
|
+
* Upload all layer weights to GPU buffers.
|
|
237
|
+
* @returns Promise that resolves with status JSON
|
|
238
|
+
*/
|
|
239
|
+
syncToGPU(): Promise<string>;
|
|
240
|
+
/** Download weights back to CPU and disable GPU mode. */
|
|
241
|
+
syncToCPU(): void;
|
|
242
|
+
/**
|
|
243
|
+
* High-level supervised training loop.
|
|
244
|
+
* @param batchesJSON JSON string of TrainingBatch[]
|
|
245
|
+
* @param epochs Number of epochs
|
|
246
|
+
* @param lr Learning rate
|
|
247
|
+
* @returns JSON string with TrainingResult
|
|
248
|
+
*/
|
|
249
|
+
train(batchesJSON: string, epochs: number, lr: number): string;
|
|
250
|
+
/**
|
|
251
|
+
* Create a SystolicState for the stepping API.
|
|
252
|
+
*/
|
|
253
|
+
createSystolicState(): SystolicState;
|
|
254
|
+
/**
|
|
255
|
+
* Create a TargetPropState for gradient-free learning.
|
|
256
|
+
* @param useChainRule If true, uses chain-rule backprop instead of gap-based TP
|
|
257
|
+
*/
|
|
258
|
+
createTargetPropState(useChainRule?: boolean): TargetPropState;
|
|
259
|
+
/**
|
|
260
|
+
* Genetic crossover with another network.
|
|
261
|
+
* @param otherID The `_id` of the other parent network
|
|
262
|
+
* @param cfgJSON JSON string from defaultSpliceConfig()
|
|
263
|
+
*/
|
|
264
|
+
spliceDNA(otherID: number, cfgJSON: string): Network;
|
|
265
|
+
/**
|
|
266
|
+
* NEAT-style structural mutation.
|
|
267
|
+
* @param cfgJSON JSON string from defaultNEATConfig()
|
|
268
|
+
*/
|
|
269
|
+
neatMutate(cfgJSON: string): Network;
|
|
270
|
+
/** Internal handle ID — required for spliceDNA and population operations. */
|
|
271
|
+
_id: number;
|
|
272
|
+
/** Release resources (no-op in WASM, included for API parity). */
|
|
273
|
+
free(): void;
|
|
274
|
+
}
|
|
275
|
+
export interface DNACompareResult {
|
|
276
|
+
similarity: number;
|
|
277
|
+
layer_count_match: boolean;
|
|
278
|
+
depth_match: boolean;
|
|
279
|
+
architecture_match: boolean;
|
|
280
|
+
[key: string]: unknown;
|
|
281
|
+
}
|
|
282
|
+
declare global {
|
|
283
|
+
/** Build a VolumetricNetwork from a JSON config string. */
|
|
284
|
+
function createLoomNetwork(jsonConfig: string): Network;
|
|
285
|
+
/** Load a network from a SafeTensors file path. */
|
|
286
|
+
function loadLoomNetwork(path: string): Network;
|
|
287
|
+
/** Initialize WebGPU (returns a Promise). */
|
|
288
|
+
function setupWebGPU(): Promise<string>;
|
|
289
|
+
/** Compare two DNA JSON strings for architectural similarity. */
|
|
290
|
+
function compareLoomDNA(dnaA: string, dnaB: string): string;
|
|
291
|
+
/** Get the default TargetPropConfig. */
|
|
292
|
+
function getDefaultTargetPropConfig(): string;
|
|
293
|
+
/** Get the default SpliceConfig JSON string. */
|
|
294
|
+
function defaultSpliceConfig(): string;
|
|
295
|
+
/** Get the default NEATConfig JSON string for a given model dimension. */
|
|
296
|
+
function defaultNEATConfig(dModel: number): string;
|
|
297
|
+
/** Create a NEAT population from a seed network. */
|
|
298
|
+
function createLoomNEATPopulation(seedID: number, size: number, cfgJSON: string): NEATPopulation;
|
|
299
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* welvet — Type Definitions for the M-POLY-VTD AI Engine
|
|
3
|
+
*
|
|
4
|
+
* Wraps the Loom v0.73.0 WASM module which supports 21 numerical types,
|
|
5
|
+
* systolic grid propagation, target propagation, and WebGPU acceleration.
|
|
6
|
+
*/
|
|
7
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
8
|
+
// Numerical Type Constants (matches poly.DType)
|
|
9
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
10
|
+
export const DType = {
|
|
11
|
+
FLOAT64: 0,
|
|
12
|
+
FLOAT32: 1,
|
|
13
|
+
FLOAT16: 2,
|
|
14
|
+
BFLOAT16: 3,
|
|
15
|
+
FP8_E4M3: 4,
|
|
16
|
+
FP8_E5M2: 5,
|
|
17
|
+
INT64: 6,
|
|
18
|
+
INT32: 7,
|
|
19
|
+
INT16: 8,
|
|
20
|
+
INT8: 9,
|
|
21
|
+
UINT64: 10,
|
|
22
|
+
UINT32: 11,
|
|
23
|
+
UINT16: 12,
|
|
24
|
+
UINT8: 13,
|
|
25
|
+
INT4: 14,
|
|
26
|
+
UINT4: 15,
|
|
27
|
+
FP4: 16,
|
|
28
|
+
INT2: 17,
|
|
29
|
+
UINT2: 18,
|
|
30
|
+
TERNARY: 19,
|
|
31
|
+
BINARY: 20,
|
|
32
|
+
};
|
|
33
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
34
|
+
// Layer Type Constants (matches poly.LayerType)
|
|
35
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
36
|
+
export const LayerType = {
|
|
37
|
+
DENSE: 0,
|
|
38
|
+
RMS_NORM: 1,
|
|
39
|
+
LAYER_NORM: 2,
|
|
40
|
+
MHA: 3,
|
|
41
|
+
SOFTMAX: 4,
|
|
42
|
+
SWIGLU: 5,
|
|
43
|
+
EMBEDDING: 6,
|
|
44
|
+
RESIDUAL: 7,
|
|
45
|
+
KMEANS: 8,
|
|
46
|
+
RNN: 9,
|
|
47
|
+
LSTM: 10,
|
|
48
|
+
CNN1: 11,
|
|
49
|
+
CNN2: 12,
|
|
50
|
+
CNN3: 13,
|
|
51
|
+
CONV_TRANSPOSED_1D: 14,
|
|
52
|
+
CONV_TRANSPOSED_2D: 15,
|
|
53
|
+
CONV_TRANSPOSED_3D: 16,
|
|
54
|
+
};
|
|
55
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
56
|
+
// Activation Type Constants (matches poly.ActivationType)
|
|
57
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
58
|
+
export const Activation = {
|
|
59
|
+
RELU: 0,
|
|
60
|
+
SILU: 1,
|
|
61
|
+
GELU: 2,
|
|
62
|
+
TANH: 3,
|
|
63
|
+
SIGMOID: 4,
|
|
64
|
+
LINEAR: 5,
|
|
65
|
+
};
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* benchmark.ts
|
|
3
|
+
* TypeScript port of benchmark_training.html
|
|
4
|
+
*/
|
|
5
|
+
const TRAINING_CASES = [
|
|
6
|
+
{
|
|
7
|
+
name: 'Dense (Linear)', iters: 5, inDim: 512, outDim: 512,
|
|
8
|
+
cfg: JSON.stringify({ depth: 1, rows: 1, cols: 1, layers_per_cell: 1, layers: [
|
|
9
|
+
{ z: 0, y: 0, x: 0, l: 0, type: "Dense", input_height: 512, output_height: 512, activation: "Linear", dtype: "F32" }
|
|
10
|
+
] })
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
name: 'RMSNorm', iters: 5, inDim: 512, outDim: 512,
|
|
14
|
+
cfg: JSON.stringify({ depth: 1, rows: 1, cols: 1, layers_per_cell: 1, layers: [
|
|
15
|
+
{ z: 0, y: 0, x: 0, l: 0, type: "RMSNorm", input_height: 512, output_height: 512, dtype: "F32" }
|
|
16
|
+
] })
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
name: 'SwiGLU (MLP)', iters: 5, inDim: 512, outDim: 1024,
|
|
20
|
+
cfg: JSON.stringify({ depth: 1, rows: 1, cols: 1, layers_per_cell: 1, layers: [
|
|
21
|
+
{ z: 0, y: 0, x: 0, l: 0, type: "SwiGLU", input_height: 512, output_height: 1024, dtype: "F32" }
|
|
22
|
+
] })
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: 'Embedding', iters: 5, inDim: 16, outDim: 2048, isEmbedding: true,
|
|
26
|
+
cfg: JSON.stringify({ depth: 1, rows: 1, cols: 1, layers_per_cell: 1, layers: [
|
|
27
|
+
{ z: 0, y: 0, x: 0, l: 0, type: "Embedding", vocab_size: 1024, embedding_dim: 128, dtype: "F32" }
|
|
28
|
+
] })
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
name: 'Residual Add', iters: 5, inDim: 512, outDim: 512,
|
|
32
|
+
cfg: JSON.stringify({ depth: 1, rows: 1, cols: 1, layers_per_cell: 1, layers: [
|
|
33
|
+
{ z: 0, y: 0, x: 0, l: 0, type: "Residual", input_height: 512, output_height: 512, dtype: "F32" }
|
|
34
|
+
] })
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: 'MHA (Fused)', iters: 5, inDim: 128, outDim: 128,
|
|
38
|
+
cfg: JSON.stringify({ depth: 1, rows: 1, cols: 1, layers_per_cell: 1, layers: [
|
|
39
|
+
{ z: 0, y: 0, x: 0, l: 0, type: "MHA", input_height: 128, output_height: 128, num_heads: 4, d_model: 128, dtype: "F32" }
|
|
40
|
+
] })
|
|
41
|
+
}
|
|
42
|
+
];
|
|
43
|
+
function makeTrainBatches(inDim, outDim, nBatches, batchSize, isEmbedding) {
|
|
44
|
+
const batches = [];
|
|
45
|
+
for (let b = 0; b < nBatches; b++) {
|
|
46
|
+
const inp = new Float32Array(batchSize * inDim);
|
|
47
|
+
const tgt = new Float32Array(batchSize * outDim);
|
|
48
|
+
if (isEmbedding) {
|
|
49
|
+
for (let i = 0; i < inp.length; i++)
|
|
50
|
+
inp[i] = i % 1024;
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
for (let i = 0; i < inp.length; i++)
|
|
54
|
+
inp[i] = (Math.random() * 2 - 1) * 0.5;
|
|
55
|
+
}
|
|
56
|
+
for (let i = 0; i < tgt.length; i++)
|
|
57
|
+
tgt[i] = Math.random() * 0.1;
|
|
58
|
+
batches.push({
|
|
59
|
+
input: { shape: [batchSize, inDim], data: Array.from(inp) },
|
|
60
|
+
target: { shape: [batchSize, outDim], data: Array.from(tgt) }
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
return batches;
|
|
64
|
+
}
|
|
65
|
+
async function runCase(tc) {
|
|
66
|
+
// @ts-ignore
|
|
67
|
+
const net = globalThis.createLoomNetwork(tc.cfg);
|
|
68
|
+
const batchSize = 4;
|
|
69
|
+
const nBatches = 4;
|
|
70
|
+
const epochs = 3;
|
|
71
|
+
const batches = makeTrainBatches(tc.inDim, tc.outDim, nBatches, batchSize, tc.isEmbedding);
|
|
72
|
+
const batchesJSON = JSON.stringify(batches);
|
|
73
|
+
const input = new Float32Array(tc.inDim);
|
|
74
|
+
input.fill(0.5);
|
|
75
|
+
if (tc.isEmbedding)
|
|
76
|
+
for (let i = 0; i < input.length; i++)
|
|
77
|
+
input[i] = i % 1024;
|
|
78
|
+
// warm-up
|
|
79
|
+
net.sequentialForward(input);
|
|
80
|
+
const t0 = performance.now();
|
|
81
|
+
let lastOut;
|
|
82
|
+
for (let i = 0; i < tc.iters; i++) {
|
|
83
|
+
lastOut = net.sequentialForward(input);
|
|
84
|
+
}
|
|
85
|
+
const fwdMs = (performance.now() - t0) / tc.iters;
|
|
86
|
+
let trainMs = -1;
|
|
87
|
+
let initialLoss = null, finalLoss = null;
|
|
88
|
+
try {
|
|
89
|
+
const t1 = performance.now();
|
|
90
|
+
const trainResult = await net.train(batchesJSON, epochs, 0.001);
|
|
91
|
+
trainMs = performance.now() - t1;
|
|
92
|
+
if (typeof trainResult === 'string') {
|
|
93
|
+
try {
|
|
94
|
+
const r = JSON.parse(trainResult);
|
|
95
|
+
if (r.loss_history && r.loss_history.length > 0) {
|
|
96
|
+
initialLoss = r.loss_history[0];
|
|
97
|
+
finalLoss = r.loss_history[r.loss_history.length - 1];
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
catch (e) { }
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
catch (e) {
|
|
104
|
+
trainMs = -1;
|
|
105
|
+
}
|
|
106
|
+
const sample = lastOut ? [lastOut[0] || 0, lastOut[1] || 0, lastOut[2] || 0] : null;
|
|
107
|
+
const sanity = sample && sample.some((v) => Math.abs(v) > 1e-9);
|
|
108
|
+
net.free();
|
|
109
|
+
return { fwdMs, trainMs, sample, sanity, initialLoss, finalLoss };
|
|
110
|
+
}
|
|
111
|
+
export async function runBenchmark() {
|
|
112
|
+
console.log("=== M-POLY-VTD Training Showdown Benchmark ===");
|
|
113
|
+
// Decide which loader to use
|
|
114
|
+
if (typeof process !== "undefined" && process.versions && process.versions.node) {
|
|
115
|
+
const { loadLoomWASM } = await import("../src/loader.js");
|
|
116
|
+
await loadLoomWASM();
|
|
117
|
+
}
|
|
118
|
+
else {
|
|
119
|
+
// @ts-ignore
|
|
120
|
+
const { loadLoomWASMBrowser } = await import("../src/loader.browser.js");
|
|
121
|
+
await loadLoomWASMBrowser();
|
|
122
|
+
}
|
|
123
|
+
console.log("Layer".padEnd(15) + " | " + "Fwd ms/it".padEnd(11) + " | " + "Train ms".padEnd(10) +
|
|
124
|
+
" | " + "Init Loss".padEnd(11) + " | " + "Final Loss".padEnd(11) + " | Sanity");
|
|
125
|
+
console.log("-".repeat(85));
|
|
126
|
+
for (const tc of TRAINING_CASES) {
|
|
127
|
+
const res = await runCase(tc);
|
|
128
|
+
const fwdStr = res.fwdMs >= 0 ? res.fwdMs.toFixed(3).padEnd(10) : 'N/A'.padEnd(10);
|
|
129
|
+
const trainStr = res.trainMs >= 0 ? res.trainMs.toFixed(1).padEnd(9) : 'N/A'.padEnd(9);
|
|
130
|
+
const iLoss = res.initialLoss != null ? res.initialLoss.toFixed(4).padEnd(10) : 'N/A'.padEnd(10);
|
|
131
|
+
const fLoss = res.finalLoss != null ? res.finalLoss.toFixed(4).padEnd(10) : 'N/A'.padEnd(10);
|
|
132
|
+
const sanStr = res.sanity ? 'REAL' : 'ZERO';
|
|
133
|
+
console.log(`${tc.name.padEnd(15)} | ${fwdStr} | ${trainStr} | ${iLoss} | ${fLoss} | ${sanStr}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Auto-run if executed directly via Node.js/tsx
|
|
137
|
+
if (typeof process !== "undefined" && import.meta.url.includes(process.argv[1].replace(/\\/g, '/'))) {
|
|
138
|
+
runBenchmark();
|
|
139
|
+
}
|