npm - transformers-llguidance - Versions diffs - 0.0.1 - Mend

transformers-llguidance 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/LICENSE +7 -0
package/README.md +244 -0
package/dist/index.d.ts +5 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +292 -0
package/dist/llguidance_wasm-BGZUyUcI.js +308 -0
package/dist/parser.d.ts +53 -0
package/dist/parser.d.ts.map +1 -0
package/dist/processor.d.ts +48 -0
package/dist/processor.d.ts.map +1 -0
package/dist/tokenizer-bridge.d.ts +77 -0
package/dist/tokenizer-bridge.d.ts.map +1 -0
package/dist/types.d.ts +72 -0
package/dist/types.d.ts.map +1 -0
package/package.json +62 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,7 @@
+Copyright 2026 Delton Ding
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,244 @@
+# llguidance-js
+Structured output generation for [transformer.js](https://github.com/huggingface/transformers.js) using [llguidance](https://github.com/guidance-ai/llguidance).
+This library enables constrained text generation in the browser and Node.js by integrating the high-performance llguidance Rust library with transformer.js via WebAssembly.
+## Features
+- **JSON Schema constraints** - Generate valid JSON matching any JSON Schema
+- **Regex patterns** - Constrain output to match regular expressions
+- **Lark grammars** - Full CFG support for complex structured output
+- **Speculative decoding** - Optimized performance with fast-path token validation
+- **Zero server dependencies** - Runs entirely in browser/Node.js
+## Installation
+```bash
+npm install llguidance-js
+```
+## Quick Start
+```typescript
+import { pipeline } from '@huggingface/transformers';
+import {
+  GuidanceParser,
+  GuidanceLogitsProcessor,
+  extractTokenizerData,
+} from 'llguidance-js';
+// Load a model
+const generator = await pipeline('text-generation', 'Xenova/gpt2');
+// Extract tokenizer data
+const tokenizerData = extractTokenizerData(generator.tokenizer);
+// Create a parser with JSON schema constraint
+const parser = await GuidanceParser.create({
+  type: 'json_schema',
+  schema: {
+    type: 'object',
+    properties: {
+      name: { type: 'string' },
+      age: { type: 'number' }
+    },
+    required: ['name', 'age']
+  }
+}, tokenizerData);
+// Create logits processor
+const processor = new GuidanceLogitsProcessor(parser);
+// Generate constrained output
+const output = await generator('Generate a person:', {
+  max_new_tokens: 50,
+  logits_processor: [processor],
+});
+console.log(output[0].generated_text);
+// Output will always be valid JSON matching the schema
+```
+## Grammar Types
+### JSON Schema
+```typescript
+const grammar = {
+  type: 'json_schema',
+  schema: {
+    type: 'object',
+    properties: {
+      name: { type: 'string' },
+      age: { type: 'integer', minimum: 0 }
+    },
+    required: ['name', 'age']
+  }
+};
+```
+### Regex Pattern
+```typescript
+const grammar = {
+  type: 'regex',
+  pattern: '[a-zA-Z]+@[a-zA-Z]+\\.[a-zA-Z]{2,}'
+};
+```
+### Lark Grammar (CFG)
+```typescript
+const grammar = {
+  type: 'lark',
+  grammar: `
+    start: expr
+    expr: term (("+"|"-") term)*
+    term: NUMBER
+    NUMBER: /[0-9]+/
+  `,
+  startSymbol: 'start'
+};
+```
+## API Reference
+### `GuidanceParser`
+The core parser that wraps the llguidance WASM module.
+```typescript
+class GuidanceParser {
+  // Create a new parser instance
+  static async create(grammar: Grammar, tokenizer: TokenizerData): Promise<GuidanceParser>;
+  // Fast O(1) check if a token is allowed
+  isTokenAllowed(tokenId: number): boolean;
+  // Get full token mask (slower, use for fallback)
+  getTokenMask(): Uint8Array;
+  // Advance parser state after token selection
+  advance(tokenId: number): void;
+  // Check if generation can terminate
+  isComplete(): boolean;
+  // Reset parser for reuse
+  reset(): void;
+  // Get vocabulary size
+  get vocabSize(): number;
+}
+```
+### `GuidanceLogitsProcessor`
+Logits processor compatible with transformer.js.
+```typescript
+class GuidanceLogitsProcessor {
+  constructor(parser: GuidanceParser, options?: ProcessorOptions);
+  // Process logits (called by transformer.js)
+  process(inputIds: number[], logits: Float32Array): Float32Array;
+  // Advance state after sampling (call after each token)
+  onToken(tokenId: number): void;
+  // Check if generation can stop
+  canStop(): boolean;
+  // Reset for new generation
+  reset(): void;
+}
+interface ProcessorOptions {
+  // Number of top tokens to try before full mask (default: 5)
+  speculationDepth?: number;
+  // Enable debug logging (default: false)
+  debug?: boolean;
+}
+```
+### Tokenizer Utilities
+```typescript
+// Extract tokenizer data from transformer.js tokenizer
+function extractTokenizerData(tokenizer: TransformersTokenizer): TokenizerData;
+// Load tokenizer data directly from HuggingFace Hub
+async function loadTokenizerData(modelId: string, options?: {
+  token?: string;
+  baseUrl?: string;
+}): Promise<TokenizerData>;
+```
+## How It Works
+1. **Grammar compilation**: llguidance compiles your grammar (JSON schema, regex, or Lark) into an efficient state machine
+2. **Speculative checking**: During generation, we first check if the model's top-k predicted tokens are valid (fast path)
+3. **Fallback masking**: If no top-k tokens are valid, we compute the full token mask (slower path)
+4. **Logit modification**: Invalid tokens have their logits set to -∞, ensuring they're never sampled
+### Generation Loop
+1. Model produces logits
+2. GuidanceLogitsProcessor.process() called
+    1. Try top-5 tokens with is_token_allowed()
+    2. If hit: mask all except winner
+    3. If miss: compute full mask with get_token_mask()
+3. Sample from modified logits
+4. Call processor.onToken() with sampled token
+5. Repeat until processor.canStop() or max tokens
+## Building from Source
+### Prerequisites
+- Node.js 18+
+- Rust toolchain with `wasm32-unknown-unknown` target
+- wasm-pack
+### Build
+```bash
+# Install dependencies
+npm install
+# Build WASM module
+npm run build:wasm
+# Build TypeScript
+npm run build
+# Run tests
+npm test
+```
+## Performance Tips
+1. **Use speculative decoding**: The default `speculationDepth: 5` works well for most cases. Increase for models with more uncertain predictions.
+2. **Reuse parsers**: Create the parser once and call `reset()` between generations instead of creating new instances.
+3. **Batch processing**: When generating multiple outputs with the same grammar, reuse the same parser instance.
+## Limitations
+- Currently requires the WASM module to be built from source
+- Some llguidance features may require adjustment for WASM compatibility
+- Large grammars may increase WASM binary size
+## License
+MIT
+## Acknowledgments
+- [llguidance](https://github.com/guidance-ai/llguidance) - The Rust library powering the structured output
+- [transformer.js](https://github.com/huggingface/transformers.js) - Machine learning in the browser
+- [wasm-bindgen](https://github.com/rustwasm/wasm-bindgen) - Rust/WebAssembly interop

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+export { GuidanceParser } from './parser';
+export { GuidanceLogitsProcessor } from './processor';
+export { extractTokenizerData, loadTokenizerData, type TransformersTokenizer, } from './tokenizer-bridge';
+export type { Grammar, JsonSchemaGrammar, RegexGrammar, LarkGrammar, ProcessorOptions, TokenizerData, } from './types';
+//# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAC1C,OAAO,EAAE,uBAAuB,EAAE,MAAM,aAAa,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,iBAAiB,EACjB,KAAK,qBAAqB,GAC3B,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,OAAO,EACP,iBAAiB,EACjB,YAAY,EACZ,WAAW,EACX,gBAAgB,EAChB,aAAa,GACd,MAAM,SAAS,CAAC"}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,292 @@
+var __defProp = Object.defineProperty;
+var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
+var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
+let w, m, b, v;
+let __tla = (async () => {
+  m = class {
+    constructor() {
+      __publicField(this, "wasmParser");
+      __publicField(this, "_isInitialized", false);
+    }
+    static async create(e, s) {
+      const t = new m();
+      return await t.initialize(e, s), t;
+    }
+    async initialize(e, s) {
+      const t = await import("./llguidance_wasm-BGZUyUcI.js").then(async (m2) => {
+        await m2.__tla;
+        return m2;
+      }), i = JSON.stringify(this.convertGrammar(e)), r = JSON.stringify(s);
+      this.wasmParser = new t.LLGuidanceParser(i, r), this._isInitialized = true;
+    }
+    convertGrammar(e) {
+      switch (e.type) {
+        case "json_schema":
+          return {
+            grammars: [
+              {
+                json_schema: e.schema
+              }
+            ]
+          };
+        case "regex":
+          return {
+            grammars: [
+              {
+                rx: e.pattern
+              }
+            ]
+          };
+        case "lark":
+          return {
+            grammars: [
+              {
+                lark: e.grammar,
+                start: e.startSymbol ?? "start"
+              }
+            ]
+          };
+      }
+    }
+    isTokenAllowed(e) {
+      return this.ensureInitialized(), this.wasmParser.is_token_allowed(e);
+    }
+    getTokenMask() {
+      return this.ensureInitialized(), this.wasmParser.get_token_mask();
+    }
+    advance(e) {
+      this.ensureInitialized(), this.wasmParser.advance(e);
+    }
+    isComplete() {
+      return this.ensureInitialized(), this.wasmParser.is_complete();
+    }
+    reset(e) {
+      if (this.ensureInitialized(), e) {
+        const s = JSON.stringify(this.convertGrammar(e));
+        this.wasmParser.reset(s);
+      } else this.wasmParser.reset("");
+    }
+    get vocabSize() {
+      return this.ensureInitialized(), this.wasmParser.vocab_size();
+    }
+    ensureInitialized() {
+      if (!this._isInitialized) throw new Error("GuidanceParser not initialized. Use GuidanceParser.create() to create an instance.");
+    }
+  };
+  w = class {
+    constructor(e, s = {}) {
+      __publicField(this, "parser");
+      __publicField(this, "speculationDepth");
+      __publicField(this, "debug");
+      __publicField(this, "generatedTokens", []);
+      this.parser = e, this.speculationDepth = s.speculationDepth ?? 5, this.debug = s.debug ?? false;
+    }
+    process(e, s) {
+      const t = this.getTopK(s, this.speculationDepth);
+      this.debug && console.log(`[GuidanceProcessor] Top-${this.speculationDepth} tokens:`, t);
+      for (const { tokenId: r } of t) if (this.parser.isTokenAllowed(r)) return this.debug && console.log(`[GuidanceProcessor] Speculation hit: token ${r}`), this.maskAllExcept(s, r);
+      this.debug && console.log("[GuidanceProcessor] Speculation miss, computing full mask");
+      const i = this.parser.getTokenMask();
+      return this.applyBitmask(s, i);
+    }
+    onToken(e) {
+      this.generatedTokens.push(e), this.parser.advance(e), this.debug && console.log(`[GuidanceProcessor] Advanced with token ${e}`);
+    }
+    canStop() {
+      return this.parser.isComplete();
+    }
+    reset() {
+      this.generatedTokens = [], this.parser.reset();
+    }
+    getGeneratedTokens() {
+      return [
+        ...this.generatedTokens
+      ];
+    }
+    getTopK(e, s) {
+      const t = [];
+      for (let i = 0; i < e.length; i++) t.push({
+        tokenId: i,
+        logit: e[i]
+      });
+      return t.sort((i, r) => r.logit - i.logit), t.slice(0, s);
+    }
+    maskAllExcept(e, s) {
+      const t = new Float32Array(e.length).fill(-1 / 0);
+      return t[s] = e[s], t;
+    }
+    applyBitmask(e, s) {
+      for (let t = 0; t < e.length; t++) s[t] === 0 && (e[t] = -1 / 0);
+      return e;
+    }
+  };
+  b = function(o) {
+    var _a, _b, _c;
+    let e;
+    if (o.getVocab) e = o.getVocab();
+    else if ((_a = o.model) == null ? void 0 : _a.tokens_to_ids) e = u(o.model.tokens_to_ids);
+    else if ((_b = o.model) == null ? void 0 : _b.vocab) e = u(o.model.vocab);
+    else if (o.vocab) e = u(o.vocab);
+    else throw new Error("Unable to extract vocabulary from tokenizer. Ensure you are passing a valid transformer.js tokenizer instance.");
+    const s = ((_c = o.model) == null ? void 0 : _c.merges) ?? [], t = (o.added_tokens ?? []).map((l) => ({
+      id: l.id,
+      content: l.content,
+      single_word: l.single_word ?? false,
+      lstrip: l.lstrip ?? false,
+      rstrip: l.rstrip ?? false,
+      normalized: l.normalized ?? true,
+      special: l.special ?? false
+    })), i = f(o, e, "eos"), r = f(o, e, "bos"), c = f(o, e, "pad"), a = f(o, e, "unk");
+    return {
+      vocab: e,
+      merges: s,
+      added_tokens: t,
+      model_type: k(o),
+      eos_token_id: i,
+      bos_token_id: r,
+      pad_token_id: c,
+      unk_token_id: a
+    };
+  };
+  function f(o, e, s) {
+    const t = `${s}_token_id`;
+    if (typeof o[t] == "number") return o[t];
+    const i = `${s}_token`, r = o[i];
+    if (r && e[r] !== void 0) return e[r];
+    if (s === "eos") {
+      const c = [
+        "</s>",
+        "<|endoftext|>",
+        "<eos>",
+        "<|eos|>",
+        "[SEP]"
+      ];
+      for (const a of c) if (e[a] !== void 0) return e[a];
+    }
+    if (s === "bos") {
+      const c = [
+        "<s>",
+        "<|startoftext|>",
+        "<bos>",
+        "<|bos|>",
+        "[CLS]"
+      ];
+      for (const a of c) if (e[a] !== void 0) return e[a];
+    }
+    if (s === "pad") {
+      const c = [
+        "<pad>",
+        "<|pad|>",
+        "[PAD]"
+      ];
+      for (const a of c) if (e[a] !== void 0) return e[a];
+    }
+    if (s === "unk") {
+      const c = [
+        "<unk>",
+        "<|unk|>",
+        "[UNK]"
+      ];
+      for (const a of c) if (e[a] !== void 0) return e[a];
+    }
+  }
+  function u(o) {
+    if (o instanceof Map) {
+      const e = {};
+      for (const [s, t] of o) e[s] = t;
+      return e;
+    }
+    return o;
+  }
+  function k(o) {
+    var _a;
+    return ((_a = o.model) == null ? void 0 : _a.merges) && o.model.merges.length > 0 ? "bpe" : "unknown";
+  }
+  v = async function(o, e = {}) {
+    const t = `${e.baseUrl ?? "https://huggingface.co"}/${o}/resolve/main/tokenizer.json`, i = {};
+    e.token && (i.Authorization = `Bearer ${e.token}`);
+    const r = await fetch(t, {
+      headers: i
+    });
+    if (!r.ok) throw new Error(`Failed to fetch tokenizer from ${t}: ${r.status} ${r.statusText}`);
+    const c = await r.json();
+    return _(c);
+  };
+  function _(o) {
+    var _a, _b;
+    const e = o;
+    if (!((_a = e.model) == null ? void 0 : _a.vocab)) throw new Error("Invalid tokenizer.json: missing model.vocab");
+    const s = e.model.vocab, t = (_b = e.added_tokens) == null ? void 0 : _b.map((n) => ({
+      id: n.id,
+      content: n.content,
+      single_word: n.single_word ?? false,
+      lstrip: n.lstrip ?? false,
+      rstrip: n.rstrip ?? false,
+      normalized: n.normalized ?? true,
+      special: n.special ?? false
+    }));
+    let i, r, c, a;
+    if (t) for (const n of t) {
+      const d = n.content.toLowerCase();
+      d === "</s>" || d === "<|endoftext|>" || d === "<eos>" ? i = n.id : d === "<s>" || d === "<|startoftext|>" || d === "<bos>" ? r = n.id : d === "<pad>" || d === "<|pad|>" ? c = n.id : (d === "<unk>" || d === "<|unk|>") && (a = n.id);
+    }
+    const l = [
+      "</s>",
+      "<|endoftext|>",
+      "<eos>",
+      "<|eos|>"
+    ], p = [
+      "<s>",
+      "<|startoftext|>",
+      "<bos>",
+      "<|bos|>"
+    ], h = [
+      "<pad>",
+      "<|pad|>"
+    ], g = [
+      "<unk>",
+      "<|unk|>"
+    ];
+    if (i === void 0) {
+      for (const n of l) if (s[n] !== void 0) {
+        i = s[n];
+        break;
+      }
+    }
+    if (r === void 0) {
+      for (const n of p) if (s[n] !== void 0) {
+        r = s[n];
+        break;
+      }
+    }
+    if (c === void 0) {
+      for (const n of h) if (s[n] !== void 0) {
+        c = s[n];
+        break;
+      }
+    }
+    if (a === void 0) {
+      for (const n of g) if (s[n] !== void 0) {
+        a = s[n];
+        break;
+      }
+    }
+    return {
+      vocab: s,
+      merges: e.model.merges ?? [],
+      added_tokens: t,
+      model_type: e.model.type ?? "unknown",
+      eos_token_id: i,
+      bos_token_id: r,
+      pad_token_id: c,
+      unk_token_id: a
+    };
+  }
+})();
+export {
+  w as GuidanceLogitsProcessor,
+  m as GuidanceParser,
+  __tla,
+  b as extractTokenizerData,
+  v as loadTokenizerData
+};