@openfluke/welvet 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,8 +9,10 @@
9
9
 
10
10
  ## ✨ Features
11
11
 
12
- - 🚀 **5.4MB WASM Binary** - Complete neural network framework compiled to WebAssembly
13
- - 🧠 **All 5 Layer Types** - Dense, Conv2D, Multi-Head Attention, RNN, LSTM fully supported
12
+ - 🤖 **Transformer Inference (NEW!)** - Run LLMs like SmolLM2-135M with streaming generation
13
+ - 🚀 **6.0MB WASM Binary** - Complete neural network framework + transformer inference
14
+ - 🧠 **7 Layer Types (All CPU)** - Dense, Conv2D, Multi-Head Attention, LayerNorm, RNN, LSTM, Softmax (10 variants)
15
+ - ✅ **Full CPU Implementation** - Every layer works with complete forward/backward passes
14
16
  - 🎯 **Registry-based Initialization** - Dynamic layer creation via `CallLayerInit()` with zero manual exports
15
17
  - 🔍 **Runtime Introspection** - Discover methods, signatures, and parameters dynamically
16
18
  - 💾 **Model Serialization** - Save/load models as JSON (no filesystem required)
@@ -42,6 +44,48 @@ bun add @openfluke/welvet
42
44
 
43
45
  ## 🚀 Quick Start
44
46
 
47
+ ### 🤖 Transformer Inference (NEW!)
48
+
49
+ Run Large Language Models with streaming generation:
50
+
51
+ ```typescript
52
+ import { initLoom, createTransformerAPI } from "@openfluke/welvet";
53
+
54
+ // Initialize WASM
55
+ await initLoom();
56
+
57
+ // Create transformer API
58
+ const transformer = await createTransformerAPI();
59
+
60
+ // Load tokenizer
61
+ const tokenizerData = await fetch("models/SmolLM2-135M-Instruct/tokenizer.json")
62
+ .then((r) => r.arrayBuffer())
63
+ .then((buf) => new Uint8Array(buf));
64
+ await transformer.loadTokenizer(tokenizerData);
65
+
66
+ // Load model
67
+ const configData = await fetch("models/SmolLM2-135M-Instruct/config.json")
68
+ .then((r) => r.arrayBuffer())
69
+ .then((buf) => new Uint8Array(buf));
70
+ const weightsData = await fetch(
71
+ "models/SmolLM2-135M-Instruct/model.safetensors"
72
+ )
73
+ .then((r) => r.arrayBuffer())
74
+ .then((buf) => new Uint8Array(buf));
75
+ await transformer.loadModel(configData, weightsData);
76
+
77
+ // Stream generation token-by-token
78
+ for await (const token of transformer.generateStream(
79
+ "The capital of France is",
80
+ 50,
81
+ 0.7
82
+ )) {
83
+ process.stdout.write(token); // Paris...
84
+ }
85
+ ```
86
+
87
+ **Live Demo:** See `wasm/inference.html` for a beautiful web UI with real-time token streaming!
88
+
45
89
  ### The Easy Way: Load Complete Models
46
90
 
47
91
  Instead of manually configuring layers, **load a complete model with ONE line**:
@@ -345,6 +389,120 @@ network, _ := nn.LoadModelFromString(modelJSON, "model_id")
345
389
 
346
390
  See `examples/all_layers_validation.go` for a complete demo that generates test.json (26.4KB with 16 layers) and verifies all three platforms load it identically!
347
391
 
392
+ ## 🤖 Transformer API
393
+
394
+ ### Loading Models
395
+
396
+ ```typescript
397
+ import { initLoom, createTransformerAPI } from "@openfluke/welvet";
398
+
399
+ // Initialize WASM
400
+ await initLoom();
401
+
402
+ // Create transformer API
403
+ const transformer = await createTransformerAPI();
404
+
405
+ // Load tokenizer from bytes
406
+ const tokenizerData = await fetch("models/SmolLM2-135M/tokenizer.json")
407
+ .then((r) => r.arrayBuffer())
408
+ .then((buf) => new Uint8Array(buf));
409
+
410
+ const tokResult = await transformer.loadTokenizer(tokenizerData);
411
+ console.log(`Tokenizer loaded: ${tokResult.vocab_size} tokens`);
412
+
413
+ // Load model from config and weights
414
+ const configData = await fetch("models/SmolLM2-135M/config.json")
415
+ .then((r) => r.arrayBuffer())
416
+ .then((buf) => new Uint8Array(buf));
417
+
418
+ const weightsData = await fetch("models/SmolLM2-135M/model.safetensors")
419
+ .then((r) => r.arrayBuffer())
420
+ .then((buf) => new Uint8Array(buf));
421
+
422
+ const modelResult = await transformer.loadModel(configData, weightsData);
423
+ console.log(
424
+ `Model loaded: ${modelResult.num_layers} layers, ${modelResult.hidden_size} hidden size`
425
+ );
426
+ ```
427
+
428
+ ### Text Encoding/Decoding
429
+
430
+ ```typescript
431
+ // Encode text to token IDs
432
+ const encodeResult = await transformer.encode("Hello world", true);
433
+ console.log(encodeResult.ids); // [1, 9906, 2088]
434
+
435
+ // Decode token IDs to text
436
+ const decodeResult = await transformer.decode([1, 9906, 2088], true);
437
+ console.log(decodeResult.text); // "Hello world"
438
+ ```
439
+
440
+ ### Text Generation
441
+
442
+ #### Blocking Generation
443
+
444
+ ```typescript
445
+ const result = await transformer.generate(
446
+ "The capital of France is",
447
+ 50, // maxTokens
448
+ 0.7 // temperature
449
+ );
450
+ console.log(result.generated_text);
451
+ ```
452
+
453
+ #### Streaming Generation
454
+
455
+ ```typescript
456
+ // Stream tokens one at a time
457
+ process.stdout.write("Generated: ");
458
+ for await (const token of transformer.generateStream(
459
+ "Once upon a time",
460
+ 50, // maxTokens
461
+ 0.7 // temperature
462
+ )) {
463
+ process.stdout.write(token); // Print each token as it's generated
464
+ }
465
+ console.log();
466
+ ```
467
+
468
+ ### Transformer API Reference
469
+
470
+ ```typescript
471
+ interface TransformerAPI {
472
+ // Load tokenizer from JSON bytes
473
+ loadTokenizer(tokenizerData: Uint8Array): Promise<TokenizerLoadResult>;
474
+
475
+ // Load model from config + weights bytes
476
+ loadModel(
477
+ configData: Uint8Array,
478
+ weightsData: Uint8Array
479
+ ): Promise<TransformerLoadResult>;
480
+
481
+ // Encode text to token IDs
482
+ encode(text: string, addSpecialTokens?: boolean): Promise<EncodeResult>;
483
+
484
+ // Decode token IDs to text
485
+ decode(
486
+ tokenIds: number[],
487
+ skipSpecialTokens?: boolean
488
+ ): Promise<DecodeResult>;
489
+
490
+ // Generate text (blocking)
491
+ generate(
492
+ prompt: string,
493
+ maxTokens?: number,
494
+ temperature?: number
495
+ ): Promise<GenerateResult>;
496
+
497
+ // Generate text (streaming)
498
+ generateStream(
499
+ prompt: string,
500
+ maxTokens?: number,
501
+ temperature?: number
502
+ ): AsyncGenerator<string, void, unknown>;
503
+ }
504
+ ```
505
+
348
506
  #### Load Model (Legacy API)
349
507
 
350
508
  ````
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { InitOptions, LoomAPI } from "./types";
1
+ import type { InitOptions, LoomAPI } from "./types.js";
2
2
  /**
3
3
  * Initialize the LOOM WASM module and return the API
4
4
  *
@@ -27,5 +27,7 @@ import type { InitOptions, LoomAPI } from "./types";
27
27
  * ```
28
28
  */
29
29
  export declare function initLoom(opts?: InitOptions): Promise<LoomAPI>;
30
- export type { LoomAPI, LoomNetwork, InitOptions } from "./types";
31
- export { ActivationType } from "./types";
30
+ export type { LoomAPI, LoomNetwork, InitOptions, TransformerAPI, } from "./types.js";
31
+ export { ActivationType } from "./types.js";
32
+ export { createTransformerAPI } from "./transformer.js";
33
+ export type { TokenizerLoadResult, TransformerLoadResult, EncodeResult, DecodeResult, GenerateResult, NextTokenResult, } from "./types.js";
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { ensureGoRuntime, resolvePackagedWasmURL, instantiateGoWasm, } from "./loader";
1
+ import { ensureGoRuntime, resolvePackagedWasmURL, instantiateGoWasm, } from "./loader.js";
2
2
  // tiny helper that waits until WASM has placed symbols on globalThis
3
3
  async function waitForExports(keys, timeoutMs = 5000) {
4
4
  const t0 = performance.now();
@@ -83,4 +83,5 @@ export async function initLoom(opts = {}) {
83
83
  }
84
84
  return api;
85
85
  }
86
- export { ActivationType } from "./types";
86
+ export { ActivationType } from "./types.js";
87
+ export { createTransformerAPI } from "./transformer.js";
package/dist/loader.js CHANGED
@@ -1,4 +1,4 @@
1
- import { isBrowser } from "./env";
1
+ import { isBrowser } from "./env.js";
2
2
  let goRuntimeInjected = false;
3
3
  let wasmExecTextBundled;
4
4
  let wasmUrlBundled;
package/dist/loom.wasm CHANGED
Binary file
@@ -0,0 +1,5 @@
1
+ import type { TransformerAPI } from "./types.js";
2
+ /**
3
+ * Create transformer API wrapper around WASM functions
4
+ */
5
+ export declare function createTransformerAPI(): Promise<TransformerAPI>;
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Helper to wait for a global function to be available
3
+ */
4
+ async function waitForGlobal(name, timeoutMs = 5000) {
5
+ const t0 = performance.now();
6
+ for (;;) {
7
+ if (globalThis[name])
8
+ return;
9
+ if (performance.now() - t0 > timeoutMs) {
10
+ throw new Error(`Timeout waiting for ${name}`);
11
+ }
12
+ await new Promise((r) => setTimeout(r, 10));
13
+ }
14
+ }
15
+ /**
16
+ * Create transformer API wrapper around WASM functions
17
+ */
18
+ export async function createTransformerAPI() {
19
+ // Wait for transformer functions to be available
20
+ await Promise.all([
21
+ waitForGlobal("LoadTokenizerFromBytes"),
22
+ waitForGlobal("LoadTransformerFromBytes"),
23
+ waitForGlobal("EncodeText"),
24
+ waitForGlobal("DecodeTokens"),
25
+ waitForGlobal("GenerateNextToken"),
26
+ waitForGlobal("GenerateText"),
27
+ ]);
28
+ const g = globalThis;
29
+ return {
30
+ async loadTokenizer(tokenizerData) {
31
+ return new Promise((resolve, reject) => {
32
+ try {
33
+ const resultStr = g.LoadTokenizerFromBytes(tokenizerData);
34
+ // If it's already an object, return it directly
35
+ if (typeof resultStr === "object") {
36
+ resolve(resultStr);
37
+ return;
38
+ }
39
+ const result = JSON.parse(resultStr);
40
+ resolve(result);
41
+ }
42
+ catch (error) {
43
+ reject(error);
44
+ }
45
+ });
46
+ },
47
+ async loadModel(configData, weightsData) {
48
+ return new Promise((resolve, reject) => {
49
+ try {
50
+ const resultStr = g.LoadTransformerFromBytes(configData, weightsData);
51
+ const result = JSON.parse(resultStr);
52
+ resolve(result);
53
+ }
54
+ catch (error) {
55
+ reject(error);
56
+ }
57
+ });
58
+ },
59
+ async encode(text, addSpecialTokens = true) {
60
+ return new Promise((resolve, reject) => {
61
+ try {
62
+ const resultStr = g.EncodeText(text, addSpecialTokens);
63
+ const result = JSON.parse(resultStr);
64
+ resolve(result);
65
+ }
66
+ catch (error) {
67
+ reject(error);
68
+ }
69
+ });
70
+ },
71
+ async decode(tokenIds, skipSpecialTokens = true) {
72
+ return new Promise((resolve, reject) => {
73
+ try {
74
+ const resultStr = g.DecodeTokens(tokenIds, skipSpecialTokens);
75
+ const result = JSON.parse(resultStr);
76
+ resolve(result);
77
+ }
78
+ catch (error) {
79
+ reject(error);
80
+ }
81
+ });
82
+ },
83
+ async generate(prompt, maxTokens = 50, temperature = 0.7) {
84
+ return new Promise((resolve, reject) => {
85
+ try {
86
+ const resultStr = g.GenerateText(prompt, maxTokens, temperature);
87
+ const result = JSON.parse(resultStr);
88
+ resolve(result);
89
+ }
90
+ catch (error) {
91
+ reject(error);
92
+ }
93
+ });
94
+ },
95
+ async *generateStream(prompt, maxTokens = 50, temperature = 0.7) {
96
+ // Encode the prompt
97
+ const encodeResultStr = g.EncodeText(prompt, true);
98
+ const encodeResult = JSON.parse(encodeResultStr);
99
+ if (!encodeResult.success || !encodeResult.ids) {
100
+ throw new Error(encodeResult.error || "Failed to encode prompt");
101
+ }
102
+ const tokens = [...encodeResult.ids];
103
+ // Generate tokens one at a time
104
+ for (let i = 0; i < maxTokens; i++) {
105
+ const resultStr = g.GenerateNextToken(tokens, temperature);
106
+ const result = JSON.parse(resultStr);
107
+ if (!result.success) {
108
+ throw new Error(result.error || "Failed to generate token");
109
+ }
110
+ if (result.token === undefined) {
111
+ break;
112
+ }
113
+ tokens.push(result.token);
114
+ // Decode just this token
115
+ const decodeResultStr = g.DecodeTokens([result.token], true);
116
+ const decodeResult = JSON.parse(decodeResultStr);
117
+ if (decodeResult.success && decodeResult.text) {
118
+ yield decodeResult.text;
119
+ }
120
+ // Check for end of sequence
121
+ if (result.is_eos) {
122
+ break;
123
+ }
124
+ }
125
+ },
126
+ };
127
+ }
package/dist/types.d.ts CHANGED
@@ -113,3 +113,99 @@ export declare enum ActivationType {
113
113
  LeakyReLU = 4,
114
114
  Linear = 5
115
115
  }
116
+ /**
117
+ * Result from tokenizer loading
118
+ */
119
+ export interface TokenizerLoadResult {
120
+ success: boolean;
121
+ vocab_size?: number;
122
+ message?: string;
123
+ error?: string;
124
+ }
125
+ /**
126
+ * Result from transformer model loading
127
+ */
128
+ export interface TransformerLoadResult {
129
+ success: boolean;
130
+ num_layers?: number;
131
+ hidden_size?: number;
132
+ vocab_size?: number;
133
+ message?: string;
134
+ error?: string;
135
+ }
136
+ /**
137
+ * Result from text encoding
138
+ */
139
+ export interface EncodeResult {
140
+ success: boolean;
141
+ ids?: number[];
142
+ error?: string;
143
+ }
144
+ /**
145
+ * Result from token decoding
146
+ */
147
+ export interface DecodeResult {
148
+ success: boolean;
149
+ text?: string;
150
+ error?: string;
151
+ }
152
+ /**
153
+ * Result from text generation
154
+ */
155
+ export interface GenerateResult {
156
+ success: boolean;
157
+ generated_text?: string;
158
+ error?: string;
159
+ }
160
+ /**
161
+ * Result from next token generation
162
+ */
163
+ export interface NextTokenResult {
164
+ success: boolean;
165
+ token?: number;
166
+ is_eos?: boolean;
167
+ error?: string;
168
+ }
169
+ /**
170
+ * Transformer API for LLM inference
171
+ */
172
+ export interface TransformerAPI {
173
+ /**
174
+ * Load tokenizer from JSON bytes
175
+ * @param tokenizerData - Uint8Array of tokenizer.json file
176
+ */
177
+ loadTokenizer(tokenizerData: Uint8Array): Promise<TokenizerLoadResult>;
178
+ /**
179
+ * Load transformer model from config and weights bytes
180
+ * @param configData - Uint8Array of config.json file
181
+ * @param weightsData - Uint8Array of model.safetensors file
182
+ */
183
+ loadModel(configData: Uint8Array, weightsData: Uint8Array): Promise<TransformerLoadResult>;
184
+ /**
185
+ * Encode text to token IDs
186
+ * @param text - Input text to encode
187
+ * @param addSpecialTokens - Whether to add special tokens (default: true)
188
+ */
189
+ encode(text: string, addSpecialTokens?: boolean): Promise<EncodeResult>;
190
+ /**
191
+ * Decode token IDs to text
192
+ * @param tokenIds - Array of token IDs
193
+ * @param skipSpecialTokens - Whether to skip special tokens (default: true)
194
+ */
195
+ decode(tokenIds: number[], skipSpecialTokens?: boolean): Promise<DecodeResult>;
196
+ /**
197
+ * Generate text from prompt (blocking, all tokens at once)
198
+ * @param prompt - Input prompt
199
+ * @param maxTokens - Maximum tokens to generate (default: 50)
200
+ * @param temperature - Sampling temperature (default: 0.7)
201
+ */
202
+ generate(prompt: string, maxTokens?: number, temperature?: number): Promise<GenerateResult>;
203
+ /**
204
+ * Generate text token-by-token (streaming)
205
+ * @param prompt - Input prompt
206
+ * @param maxTokens - Maximum tokens to generate (default: 50)
207
+ * @param temperature - Sampling temperature (default: 0.7)
208
+ * @yields Token text strings
209
+ */
210
+ generateStream(prompt: string, maxTokens?: number, temperature?: number): AsyncGenerator<string, void, unknown>;
211
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openfluke/welvet",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "TypeScript/JavaScript bindings for LOOM neural network framework with WebAssembly support - GPU-accelerated machine learning in the browser",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",