transformers-llguidance 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2026 Delton Ding
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,244 @@
1
+ # llguidance-js
2
+
3
+ Structured output generation for [transformer.js](https://github.com/huggingface/transformers.js) using [llguidance](https://github.com/guidance-ai/llguidance).
4
+
5
+ This library enables constrained text generation in the browser and Node.js by integrating the high-performance llguidance Rust library with transformer.js via WebAssembly.
6
+
7
+ ## Features
8
+
9
+ - **JSON Schema constraints** - Generate valid JSON matching any JSON Schema
10
+ - **Regex patterns** - Constrain output to match regular expressions
11
+ - **Lark grammars** - Full CFG support for complex structured output
12
+ - **Speculative decoding** - Optimized performance with fast-path token validation
13
+ - **Zero server dependencies** - Runs entirely in browser/Node.js
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install llguidance-js
19
+ ```
20
+
21
+ ## Quick Start
22
+
23
+ ```typescript
24
+ import { pipeline } from '@huggingface/transformers';
25
+ import {
26
+ GuidanceParser,
27
+ GuidanceLogitsProcessor,
28
+ extractTokenizerData,
29
+ } from 'llguidance-js';
30
+
31
+ // Load a model
32
+ const generator = await pipeline('text-generation', 'Xenova/gpt2');
33
+
34
+ // Extract tokenizer data
35
+ const tokenizerData = extractTokenizerData(generator.tokenizer);
36
+
37
+ // Create a parser with JSON schema constraint
38
+ const parser = await GuidanceParser.create({
39
+ type: 'json_schema',
40
+ schema: {
41
+ type: 'object',
42
+ properties: {
43
+ name: { type: 'string' },
44
+ age: { type: 'number' }
45
+ },
46
+ required: ['name', 'age']
47
+ }
48
+ }, tokenizerData);
49
+
50
+ // Create logits processor
51
+ const processor = new GuidanceLogitsProcessor(parser);
52
+
53
+ // Generate constrained output
54
+ const output = await generator('Generate a person:', {
55
+ max_new_tokens: 50,
56
+ logits_processor: [processor],
57
+ });
58
+
59
+ console.log(output[0].generated_text);
60
+ // Output will always be valid JSON matching the schema
61
+ ```
62
+
63
+ ## Grammar Types
64
+
65
+ ### JSON Schema
66
+
67
+ ```typescript
68
+ const grammar = {
69
+ type: 'json_schema',
70
+ schema: {
71
+ type: 'object',
72
+ properties: {
73
+ name: { type: 'string' },
74
+ age: { type: 'integer', minimum: 0 }
75
+ },
76
+ required: ['name', 'age']
77
+ }
78
+ };
79
+ ```
80
+
81
+ ### Regex Pattern
82
+
83
+ ```typescript
84
+ const grammar = {
85
+ type: 'regex',
86
+ pattern: '[a-zA-Z]+@[a-zA-Z]+\\.[a-zA-Z]{2,}'
87
+ };
88
+ ```
89
+
90
+ ### Lark Grammar (CFG)
91
+
92
+ ```typescript
93
+ const grammar = {
94
+ type: 'lark',
95
+ grammar: `
96
+ start: expr
97
+ expr: term (("+"|"-") term)*
98
+ term: NUMBER
99
+ NUMBER: /[0-9]+/
100
+ `,
101
+ startSymbol: 'start'
102
+ };
103
+ ```
104
+
105
+ ## API Reference
106
+
107
+ ### `GuidanceParser`
108
+
109
+ The core parser that wraps the llguidance WASM module.
110
+
111
+ ```typescript
112
+ class GuidanceParser {
113
+ // Create a new parser instance
114
+ static async create(grammar: Grammar, tokenizer: TokenizerData): Promise<GuidanceParser>;
115
+
116
+ // Fast O(1) check if a token is allowed
117
+ isTokenAllowed(tokenId: number): boolean;
118
+
119
+ // Get full token mask (slower, use for fallback)
120
+ getTokenMask(): Uint8Array;
121
+
122
+ // Advance parser state after token selection
123
+ advance(tokenId: number): void;
124
+
125
+ // Check if generation can terminate
126
+ isComplete(): boolean;
127
+
128
+ // Reset parser for reuse
129
+ reset(): void;
130
+
131
+ // Get vocabulary size
132
+ get vocabSize(): number;
133
+ }
134
+ ```
135
+
136
+ ### `GuidanceLogitsProcessor`
137
+
138
+ Logits processor compatible with transformer.js.
139
+
140
+ ```typescript
141
+ class GuidanceLogitsProcessor {
142
+ constructor(parser: GuidanceParser, options?: ProcessorOptions);
143
+
144
+ // Process logits (called by transformer.js)
145
+ process(inputIds: number[], logits: Float32Array): Float32Array;
146
+
147
+ // Advance state after sampling (call after each token)
148
+ onToken(tokenId: number): void;
149
+
150
+ // Check if generation can stop
151
+ canStop(): boolean;
152
+
153
+ // Reset for new generation
154
+ reset(): void;
155
+ }
156
+
157
+ interface ProcessorOptions {
158
+ // Number of top tokens to try before full mask (default: 5)
159
+ speculationDepth?: number;
160
+
161
+ // Enable debug logging (default: false)
162
+ debug?: boolean;
163
+ }
164
+ ```
165
+
166
+ ### Tokenizer Utilities
167
+
168
+ ```typescript
169
+ // Extract tokenizer data from transformer.js tokenizer
170
+ function extractTokenizerData(tokenizer: TransformersTokenizer): TokenizerData;
171
+
172
+ // Load tokenizer data directly from HuggingFace Hub
173
+ async function loadTokenizerData(modelId: string, options?: {
174
+ token?: string;
175
+ baseUrl?: string;
176
+ }): Promise<TokenizerData>;
177
+ ```
178
+
179
+ ## How It Works
180
+
181
+ 1. **Grammar compilation**: llguidance compiles your grammar (JSON schema, regex, or Lark) into an efficient state machine
182
+ 2. **Speculative checking**: During generation, we first check if the model's top-k predicted tokens are valid (fast path)
183
+ 3. **Fallback masking**: If no top-k tokens are valid, we compute the full token mask (slower path)
184
+ 4. **Logit modification**: Invalid tokens have their logits set to -∞, ensuring they're never sampled
185
+
186
+ ### Generation Loop
187
+
188
+ 1. Model produces logits
189
+ 2. GuidanceLogitsProcessor.process() called
190
+ 1. Try top-5 tokens with is_token_allowed()
191
+ 2. If hit: mask all except winner
192
+ 3. If miss: compute full mask with get_token_mask()
193
+ 3. Sample from modified logits
194
+ 4. Call processor.onToken() with sampled token
195
+ 5. Repeat until processor.canStop() or max tokens
196
+
197
+ ## Building from Source
198
+
199
+ ### Prerequisites
200
+
201
+ - Node.js 18+
202
+ - Rust toolchain with `wasm32-unknown-unknown` target
203
+ - wasm-pack
204
+
205
+ ### Build
206
+
207
+ ```bash
208
+ # Install dependencies
209
+ npm install
210
+
211
+ # Build WASM module
212
+ npm run build:wasm
213
+
214
+ # Build TypeScript
215
+ npm run build
216
+
217
+ # Run tests
218
+ npm test
219
+ ```
220
+
221
+ ## Performance Tips
222
+
223
+ 1. **Use speculative decoding**: The default `speculationDepth: 5` works well for most cases. Increase for models with more uncertain predictions.
224
+
225
+ 2. **Reuse parsers**: Create the parser once and call `reset()` between generations instead of creating new instances.
226
+
227
+ 3. **Batch processing**: When generating multiple outputs with the same grammar, reuse the same parser instance.
228
+
229
+ ## Limitations
230
+
231
+ - Currently requires the WASM module to be built from source
232
+ - Some llguidance features may require adjustment for WASM compatibility
233
+ - Large grammars may increase WASM binary size
234
+
235
+ ## License
236
+
237
+ MIT
238
+
239
+ ## Acknowledgments
240
+
241
+ - [llguidance](https://github.com/guidance-ai/llguidance) - The Rust library powering the structured output
242
+ - [transformer.js](https://github.com/huggingface/transformers.js) - Machine learning in the browser
243
+ - [wasm-bindgen](https://github.com/rustwasm/wasm-bindgen) - Rust/WebAssembly interop
244
+
@@ -0,0 +1,5 @@
1
+ export { GuidanceParser } from './parser';
2
+ export { GuidanceLogitsProcessor } from './processor';
3
+ export { extractTokenizerData, loadTokenizerData, type TransformersTokenizer, } from './tokenizer-bridge';
4
+ export type { Grammar, JsonSchemaGrammar, RegexGrammar, LarkGrammar, ProcessorOptions, TokenizerData, } from './types';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAC1C,OAAO,EAAE,uBAAuB,EAAE,MAAM,aAAa,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,iBAAiB,EACjB,KAAK,qBAAqB,GAC3B,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,OAAO,EACP,iBAAiB,EACjB,YAAY,EACZ,WAAW,EACX,gBAAgB,EAChB,aAAa,GACd,MAAM,SAAS,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,292 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
+ let w, m, b, v;
5
+ let __tla = (async () => {
6
+ m = class {
7
+ constructor() {
8
+ __publicField(this, "wasmParser");
9
+ __publicField(this, "_isInitialized", false);
10
+ }
11
+ static async create(e, s) {
12
+ const t = new m();
13
+ return await t.initialize(e, s), t;
14
+ }
15
+ async initialize(e, s) {
16
+ const t = await import("./llguidance_wasm-BGZUyUcI.js").then(async (m2) => {
17
+ await m2.__tla;
18
+ return m2;
19
+ }), i = JSON.stringify(this.convertGrammar(e)), r = JSON.stringify(s);
20
+ this.wasmParser = new t.LLGuidanceParser(i, r), this._isInitialized = true;
21
+ }
22
+ convertGrammar(e) {
23
+ switch (e.type) {
24
+ case "json_schema":
25
+ return {
26
+ grammars: [
27
+ {
28
+ json_schema: e.schema
29
+ }
30
+ ]
31
+ };
32
+ case "regex":
33
+ return {
34
+ grammars: [
35
+ {
36
+ rx: e.pattern
37
+ }
38
+ ]
39
+ };
40
+ case "lark":
41
+ return {
42
+ grammars: [
43
+ {
44
+ lark: e.grammar,
45
+ start: e.startSymbol ?? "start"
46
+ }
47
+ ]
48
+ };
49
+ }
50
+ }
51
+ isTokenAllowed(e) {
52
+ return this.ensureInitialized(), this.wasmParser.is_token_allowed(e);
53
+ }
54
+ getTokenMask() {
55
+ return this.ensureInitialized(), this.wasmParser.get_token_mask();
56
+ }
57
+ advance(e) {
58
+ this.ensureInitialized(), this.wasmParser.advance(e);
59
+ }
60
+ isComplete() {
61
+ return this.ensureInitialized(), this.wasmParser.is_complete();
62
+ }
63
+ reset(e) {
64
+ if (this.ensureInitialized(), e) {
65
+ const s = JSON.stringify(this.convertGrammar(e));
66
+ this.wasmParser.reset(s);
67
+ } else this.wasmParser.reset("");
68
+ }
69
+ get vocabSize() {
70
+ return this.ensureInitialized(), this.wasmParser.vocab_size();
71
+ }
72
+ ensureInitialized() {
73
+ if (!this._isInitialized) throw new Error("GuidanceParser not initialized. Use GuidanceParser.create() to create an instance.");
74
+ }
75
+ };
76
+ w = class {
77
+ constructor(e, s = {}) {
78
+ __publicField(this, "parser");
79
+ __publicField(this, "speculationDepth");
80
+ __publicField(this, "debug");
81
+ __publicField(this, "generatedTokens", []);
82
+ this.parser = e, this.speculationDepth = s.speculationDepth ?? 5, this.debug = s.debug ?? false;
83
+ }
84
+ process(e, s) {
85
+ const t = this.getTopK(s, this.speculationDepth);
86
+ this.debug && console.log(`[GuidanceProcessor] Top-${this.speculationDepth} tokens:`, t);
87
+ for (const { tokenId: r } of t) if (this.parser.isTokenAllowed(r)) return this.debug && console.log(`[GuidanceProcessor] Speculation hit: token ${r}`), this.maskAllExcept(s, r);
88
+ this.debug && console.log("[GuidanceProcessor] Speculation miss, computing full mask");
89
+ const i = this.parser.getTokenMask();
90
+ return this.applyBitmask(s, i);
91
+ }
92
+ onToken(e) {
93
+ this.generatedTokens.push(e), this.parser.advance(e), this.debug && console.log(`[GuidanceProcessor] Advanced with token ${e}`);
94
+ }
95
+ canStop() {
96
+ return this.parser.isComplete();
97
+ }
98
+ reset() {
99
+ this.generatedTokens = [], this.parser.reset();
100
+ }
101
+ getGeneratedTokens() {
102
+ return [
103
+ ...this.generatedTokens
104
+ ];
105
+ }
106
+ getTopK(e, s) {
107
+ const t = [];
108
+ for (let i = 0; i < e.length; i++) t.push({
109
+ tokenId: i,
110
+ logit: e[i]
111
+ });
112
+ return t.sort((i, r) => r.logit - i.logit), t.slice(0, s);
113
+ }
114
+ maskAllExcept(e, s) {
115
+ const t = new Float32Array(e.length).fill(-1 / 0);
116
+ return t[s] = e[s], t;
117
+ }
118
+ applyBitmask(e, s) {
119
+ for (let t = 0; t < e.length; t++) s[t] === 0 && (e[t] = -1 / 0);
120
+ return e;
121
+ }
122
+ };
123
+ b = function(o) {
124
+ var _a, _b, _c;
125
+ let e;
126
+ if (o.getVocab) e = o.getVocab();
127
+ else if ((_a = o.model) == null ? void 0 : _a.tokens_to_ids) e = u(o.model.tokens_to_ids);
128
+ else if ((_b = o.model) == null ? void 0 : _b.vocab) e = u(o.model.vocab);
129
+ else if (o.vocab) e = u(o.vocab);
130
+ else throw new Error("Unable to extract vocabulary from tokenizer. Ensure you are passing a valid transformer.js tokenizer instance.");
131
+ const s = ((_c = o.model) == null ? void 0 : _c.merges) ?? [], t = (o.added_tokens ?? []).map((l) => ({
132
+ id: l.id,
133
+ content: l.content,
134
+ single_word: l.single_word ?? false,
135
+ lstrip: l.lstrip ?? false,
136
+ rstrip: l.rstrip ?? false,
137
+ normalized: l.normalized ?? true,
138
+ special: l.special ?? false
139
+ })), i = f(o, e, "eos"), r = f(o, e, "bos"), c = f(o, e, "pad"), a = f(o, e, "unk");
140
+ return {
141
+ vocab: e,
142
+ merges: s,
143
+ added_tokens: t,
144
+ model_type: k(o),
145
+ eos_token_id: i,
146
+ bos_token_id: r,
147
+ pad_token_id: c,
148
+ unk_token_id: a
149
+ };
150
+ };
151
+ function f(o, e, s) {
152
+ const t = `${s}_token_id`;
153
+ if (typeof o[t] == "number") return o[t];
154
+ const i = `${s}_token`, r = o[i];
155
+ if (r && e[r] !== void 0) return e[r];
156
+ if (s === "eos") {
157
+ const c = [
158
+ "</s>",
159
+ "<|endoftext|>",
160
+ "<eos>",
161
+ "<|eos|>",
162
+ "[SEP]"
163
+ ];
164
+ for (const a of c) if (e[a] !== void 0) return e[a];
165
+ }
166
+ if (s === "bos") {
167
+ const c = [
168
+ "<s>",
169
+ "<|startoftext|>",
170
+ "<bos>",
171
+ "<|bos|>",
172
+ "[CLS]"
173
+ ];
174
+ for (const a of c) if (e[a] !== void 0) return e[a];
175
+ }
176
+ if (s === "pad") {
177
+ const c = [
178
+ "<pad>",
179
+ "<|pad|>",
180
+ "[PAD]"
181
+ ];
182
+ for (const a of c) if (e[a] !== void 0) return e[a];
183
+ }
184
+ if (s === "unk") {
185
+ const c = [
186
+ "<unk>",
187
+ "<|unk|>",
188
+ "[UNK]"
189
+ ];
190
+ for (const a of c) if (e[a] !== void 0) return e[a];
191
+ }
192
+ }
193
+ function u(o) {
194
+ if (o instanceof Map) {
195
+ const e = {};
196
+ for (const [s, t] of o) e[s] = t;
197
+ return e;
198
+ }
199
+ return o;
200
+ }
201
+ function k(o) {
202
+ var _a;
203
+ return ((_a = o.model) == null ? void 0 : _a.merges) && o.model.merges.length > 0 ? "bpe" : "unknown";
204
+ }
205
+ v = async function(o, e = {}) {
206
+ const t = `${e.baseUrl ?? "https://huggingface.co"}/${o}/resolve/main/tokenizer.json`, i = {};
207
+ e.token && (i.Authorization = `Bearer ${e.token}`);
208
+ const r = await fetch(t, {
209
+ headers: i
210
+ });
211
+ if (!r.ok) throw new Error(`Failed to fetch tokenizer from ${t}: ${r.status} ${r.statusText}`);
212
+ const c = await r.json();
213
+ return _(c);
214
+ };
215
+ function _(o) {
216
+ var _a, _b;
217
+ const e = o;
218
+ if (!((_a = e.model) == null ? void 0 : _a.vocab)) throw new Error("Invalid tokenizer.json: missing model.vocab");
219
+ const s = e.model.vocab, t = (_b = e.added_tokens) == null ? void 0 : _b.map((n) => ({
220
+ id: n.id,
221
+ content: n.content,
222
+ single_word: n.single_word ?? false,
223
+ lstrip: n.lstrip ?? false,
224
+ rstrip: n.rstrip ?? false,
225
+ normalized: n.normalized ?? true,
226
+ special: n.special ?? false
227
+ }));
228
+ let i, r, c, a;
229
+ if (t) for (const n of t) {
230
+ const d = n.content.toLowerCase();
231
+ d === "</s>" || d === "<|endoftext|>" || d === "<eos>" ? i = n.id : d === "<s>" || d === "<|startoftext|>" || d === "<bos>" ? r = n.id : d === "<pad>" || d === "<|pad|>" ? c = n.id : (d === "<unk>" || d === "<|unk|>") && (a = n.id);
232
+ }
233
+ const l = [
234
+ "</s>",
235
+ "<|endoftext|>",
236
+ "<eos>",
237
+ "<|eos|>"
238
+ ], p = [
239
+ "<s>",
240
+ "<|startoftext|>",
241
+ "<bos>",
242
+ "<|bos|>"
243
+ ], h = [
244
+ "<pad>",
245
+ "<|pad|>"
246
+ ], g = [
247
+ "<unk>",
248
+ "<|unk|>"
249
+ ];
250
+ if (i === void 0) {
251
+ for (const n of l) if (s[n] !== void 0) {
252
+ i = s[n];
253
+ break;
254
+ }
255
+ }
256
+ if (r === void 0) {
257
+ for (const n of p) if (s[n] !== void 0) {
258
+ r = s[n];
259
+ break;
260
+ }
261
+ }
262
+ if (c === void 0) {
263
+ for (const n of h) if (s[n] !== void 0) {
264
+ c = s[n];
265
+ break;
266
+ }
267
+ }
268
+ if (a === void 0) {
269
+ for (const n of g) if (s[n] !== void 0) {
270
+ a = s[n];
271
+ break;
272
+ }
273
+ }
274
+ return {
275
+ vocab: s,
276
+ merges: e.model.merges ?? [],
277
+ added_tokens: t,
278
+ model_type: e.model.type ?? "unknown",
279
+ eos_token_id: i,
280
+ bos_token_id: r,
281
+ pad_token_id: c,
282
+ unk_token_id: a
283
+ };
284
+ }
285
+ })();
286
+ export {
287
+ w as GuidanceLogitsProcessor,
288
+ m as GuidanceParser,
289
+ __tla,
290
+ b as extractTokenizerData,
291
+ v as loadTokenizerData
292
+ };