npm - @mlc-ai/web-xgrammar - Versions diffs - 0.1.0 - Mend

@mlc-ai/web-xgrammar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/lib/xgrammar.d.ts ADDED Viewed

@@ -0,0 +1,295 @@
+/**
+ * Various testing methods that are not optimized for performance.
+ */
+export declare class Testings {
+    /**
+     * Convert JSON schema string to EBNF grammar string. For test purposes.
+     *
+     * @param {string} schema The schema string.
+     * @param {number} [indent=2] The number of spaces for indentation. If -1, the grammar will
+     * enforce the output to be in one line.
+     * @param {[string, string]} [separators] Two separators that will be enforced by the grammar:
+     * comma and colon. Examples: (",", ":"), (", ", ": "). If undefined, the default separators will
+     * be used: (",", ": ") when the indent is not undefined, and (", ", ": ") otherwise. This follows
+     * the convention in Python's json.dumps(). Currently unsupported and will use the default value.
+     * @param {boolean} [strictMode=true] Whether to use strict mode. In strict mode, the generated
+     * grammar will not allow properties and items that is not specified in the schema. This is
+     * equivalent to setting unevaluatedProperties and unevaluatedItems to false.
+     * @returns {string} The EBNF grammar string.
+     */
+    static _jsonSchemaToEBNF(schema: string, indent?: number, separators?: [string, string], strictMode?: boolean): Promise<string>;
+    /**
+     *
+     * @param {Int32Array} bitmask Bitmask returned by getNextTokenBitmask().
+     * @param {number} vocabSize Vocab size returned by getVocabSize().
+     * @param {number} index The batch index of the bitmask. For batch inference, bitmask[index] will
+     *  be used. Defaults to 0.
+     * @returns An array of vocab ID that will be rejected as a result of the bitmask.
+     */
+    static debugGetMaskedTokensFromBitmask(bitmask: Int32Array, vocabSize: number, index?: number): Promise<Int32Array>;
+}
+/**
+ * This class stores the abstract syntax tree (AST) of the Backus-Naur Form (BNF) grammar and
+ * provides utilities to parse and print the AST. User should provide a BNF/EBNF (Extended
+ * Backus-Naur Form) grammar, and use from_ebnf_string to parse and simplify the grammar into an
+ * AST of BNF grammar.
+ */
+export declare class Grammar {
+    handle: any;
+    /**
+     * @internal
+     * Private constructor. Factory methods are used since binding initialization is asynchronous.
+     * @param {any} handle handle of Grammar created by binding.
+     */
+    constructor(handle: any);
+    /**
+     * Dispose this Grammar.
+     */
+    dispose(): void;
+    /**
+     * Construct a BNF grammar with a EBNF-formatted string. The grammar will be normalized
+     * (simplified) by default.
+     * EBNF grammar: see https://www.w3.org/TR/xml/#sec-notation. Note:
+     * 1. Use # as the comment mark
+     * 2. Use C-style unicode escape sequence \u01AB, \U000001AB, \xAB
+     * 3. A-B (match A and not match B) is not supported yet
+     * 4. Lookahead assertion can be added at the end of a rule to speed up matching. E.g.
+     * ```
+     * root ::= "ab" a [a-z]
+     * a ::= "cd" (=[a-z])
+     * ```
+     * The assertion (=[a-z]) means a must be followed by [a-z].
+     * @param {string} ebnfString The grammar string
+     * @param {string} [rootRule="root"] The name of the root rule. Default: "root".
+     * @returns {Grammar} The parsed BNF grammar.
+     */
+    static fromEBNF(ebnfString: string, rootRule?: string): Promise<Grammar>;
+    /**
+     * Get the grammar of standard JSON.
+     * @returns {Grammar} The JSON grammar.
+     */
+    static builtinJSONGrammar(): Promise<Grammar>;
+    /**
+     * Construct a BNF grammar from the json schema string. The schema string should be in the
+     * format of the schema of a JSON file. We will parse the schema and generate a BNF grammar.
+     *
+     * @param {string} schema The schema string.
+     * @param {number} [indent=2] The number of spaces for indentation. If -1, the grammar will
+     * enforce the output to be in one line.
+     * @param {[string, string]} [separators] Two separators that will be enforced by the grammar:
+     * comma and colon. Examples: (",", ":"), (", ", ": "). If undefined, the default separators will
+     * be used: (",", ": ") when the indent is not undefined, and (", ", ": ") otherwise. This follows
+     * the convention in Python's json.dumps(). Currently unsupported and will use the default value.
+     * @param {boolean} [strictMode=true] Whether to use strict mode. In strict mode, the generated
+     * grammar will not allow properties and items that is not specified in the schema. This is
+     * equivalent to setting unevaluatedProperties and unevaluatedItems to false.
+     * @returns {Grammar} The generated BNF grammar.
+     */
+    static fromJSONSchema(schema: string, indent?: number, separators?: [string, string], strictMode?: boolean): Promise<Grammar>;
+    /**
+     * Print the BNF grammar to a string, in standard BNF format.
+     * @returns The BNF grammar string.
+     */
+    toString(): string;
+}
+/**
+ * A class that wraps a preprocessed vocab, needed to instantiate GrammarCompiler.
+ */
+export declare class TokenizerInfo {
+    handle: any;
+    /**
+     * @internal
+     * Private constructor. Factory methods are used since binding initialization is asynchronous.
+     * @param {any} handle  handle of TokenizerInfo created by binding.
+     */
+    constructor(handle: any);
+    /**
+     * Dispose this tokenizer info object.
+     */
+    dispose(): void;
+    /**
+     * Get the vocab size.
+     */
+    getVocabSize(): number;
+    /**
+     * Get the post-processed vocab. Returned as a handle of type binding.VectorString
+     */
+    getDecodedVocabHandle(): any;
+    /**
+     * Instantiate with raw vocab and the vocab type by internally post-processing
+     * the raw vocab by decoding each token with the provided vocab type.
+     * @param {string[]} encodedVocab: the vocab in the form of a string list of tokens,
+     * ordered by their token id. It should include all the special tokens.
+     * @param {string} vocabType: either "byte_fallback", "byte_level", or `raw`. See `tokenizer.cc`
+     * for its semantic.
+     * @param {boolean} prependSpaceInTokenization: whether the tokenizer will prepend a space before
+     * the text in the tokenization process.
+     * @param {number} vocabSize: the full vocab size read from `config.json`. If not provided, will
+     * use length of `encodedVocab`. Note some model has a vocab size larger in `config.json` due
+     * to padding. Essentially the size of the logits.
+     * @param {number[] | number} [stopTokenIds=undefined] Stop tokens to override the default ones.
+     */
+    static createTokenizerInfo(encodedVocab: string[], vocabType: string, prependSpaceInTokenization: boolean, vocabSize?: number, stopTokenIds?: number[] | number): Promise<TokenizerInfo>;
+}
+export declare class CompiledGrammar {
+    handle: any;
+    /**
+     * @internal
+     * Private constructor. Factory methods are used since binding initialization is asynchronous.
+     * @param {any} handle handle of CompiledGrammar created by binding.
+     */
+    constructor(handle: any);
+    /**
+     * Dispose this compiled grammar object.
+     */
+    dispose(): void;
+    /**
+     * @returns {Grammar} The grammar used to compile this CompiledGrammar.
+     */
+    grammar(): Grammar;
+    /**
+     * @returns {TokenizerInfo} The tokenizer info used to compile this CompiledGrammar.
+     */
+    tokenizerInfo(): TokenizerInfo;
+}
+export declare class GrammarCompiler {
+    handle: any;
+    /**
+     * @internal
+     * Private constructor. Factory methods are used since binding initialization is asynchronous.
+     * @param {any} handle handle of GrammarCompiler created by binding.
+     */
+    private constructor();
+    /**
+     * Dispose this grammar compiler object.
+     */
+    dispose(): void;
+    /**
+     *
+     * @param tokenizerInfo {TokenizerInfo} The tokenizer info that contains preprocessed vocab.
+     * @param cacheEnabled {boolean} Whether to enable caching. Default is true.
+     */
+    static createGrammarCompiler(tokenizerInfo: TokenizerInfo, cacheEnabled?: boolean): Promise<GrammarCompiler>;
+    /**
+     * Get CompiledGrammar from the json schema string. The schema string should be in the
+     * format of the schema of a JSON file. We will parse the schema and generate a BNF grammar.
+     *
+     * @param {string} schema The schema string.
+     * @param {number} [indent=2] The number of spaces for indentation. If -1, the grammar will
+     * enforce the output to be in one line.
+     * @param {[string, string]} [separators] Two separators that will be enforced by the grammar:
+     * comma and colon. Examples: (",", ":"), (", ", ": "). If undefined, the default separators will
+     * be used: (",", ": ") when the indent is not undefined, and (", ", ": ") otherwise. This follows
+     * the convention in Python's json.dumps(). Currently unsupported and will use the default value.
+     * @param {boolean} [strictMode=true] Whether to use strict mode. In strict mode, the generated
+     * grammar will not allow properties and items that is not specified in the schema. This is
+     * equivalent to setting unevaluatedProperties and unevaluatedItems to false.
+     * @returns {CompiledGrammar} The compiled grammar for the specified JSON schema.
+     */
+    compileJSONSchema(schema: string, indent?: number, separators?: [string, string], strictMode?: boolean): Promise<CompiledGrammar>;
+    /**
+     * @returns {CompiledGrammar} The compiled grammar for JSON.
+     */
+    compileBuiltinJSONGrammar(): Promise<CompiledGrammar>;
+    /**
+     * Get CompiledGrammar from the EBNF-formatted string. The grammar will be normalized
+     * (simplified) by default.
+     * EBNF grammar: see https://www.w3.org/TR/xml/#sec-notation. Note:
+     * 1. Use # as the comment mark
+     * 2. Use C-style unicode escape sequence \u01AB, \U000001AB, \xAB
+     * 3. A-B (match A and not match B) is not supported yet
+     * 4. Lookahead assertion can be added at the end of a rule to speed up matching. E.g.
+     * ```
+     * root ::= "ab" a [a-z]
+     * a ::= "cd" (=[a-z])
+     * ```
+     * The assertion (=[a-z]) means a must be followed by [a-z].
+     * @param {string} ebnfString The grammar string
+     * @param {string} [rootRule="root"] The name of the root rule. Default: "root".
+     * @returns {CompiledGrammar} The compiled grammar for the specified EBNF string.
+     */
+    compileGrammar(grammar: Grammar): Promise<CompiledGrammar>;
+    compileGrammar(grammar: string, rootRule?: string): Promise<CompiledGrammar>;
+}
+/**
+ * A stateful matcher to match tokens to the specified BNF grammar. This class is the core logic
+ * of the grammar-guided generation.
+ *
+ * This class implements the non-deterministic pushdown automaton (NPDA) matching algorithm to
+ * match characters to a BNF grammar. It keep track of the current state of the matching process by
+ * maintaining several stacks internally as possible paths in the NPDA. It also supports
+ * backtracking.
+ *
+ * It is particularly capable of finding the set of tokens that are acceptable for the next step
+ * and storing them in a bitmask. This aids in grammar-guided generation.
+ */
+export declare class GrammarMatcher {
+    private handle;
+    private vocab_size;
+    /**
+     * @internal
+     * Private constructor. Factory methods are used since binding initialization is asynchronous.
+     * @param {any} handle handle of GrammarMatcher created by binding.
+     */
+    private constructor();
+    /**
+     * Dispose this grammar state matcher.
+     */
+    dispose(): void;
+    /**
+     * Construct a GrammarMatcher.
+     * @param {CompiledGrammar} compiledGrammar A compiled grammar from GrammarCompiler.
+     * @param {number[] | number} [overrideStopTokens=undefined] Stop tokens to override the default ones.
+     * @param {boolean} [terminateWithoutStopToken=false] Whether to terminate without stop token.
+     * @param {number} [maxRollbackTokens=0] Max rollback tokens.
+     * @returns {GrammarMatcher} The constructed GrammarMatcher.
+     */
+    static createGrammarMatcher(compiledGrammar: CompiledGrammar, overrideStopTokens?: number[] | number, terminateWithoutStopToken?: boolean, maxRollbackTokens?: number): Promise<GrammarMatcher>;
+    /**
+     * Get the maximum number of rollback tokens allowed.
+     */
+    getMaxRollbackTokens(): number;
+    /**
+     * Accept one token and update the state of the matcher.
+     * @param {number} tokenID The id of the token to accept.
+     * @param {boolean} [verbose=false] To print debugging info
+     * @returns {boolean} Whether the token is accepted.
+     */
+    acceptToken(tokenID: number, verbose?: boolean): boolean;
+    /**
+     * Accept one unicode codepoint to the current state. For test purposes.
+     * @param {string} inputStr The unicode codepoint of the character to be accepted.
+     * @param {boolean} [verbose=false] To print debugging info
+     * @returns {boolean} Whether the input string is accepted.
+     */
+    _debugAcceptString(inputStr: string, verbose?: boolean): boolean;
+    /**
+     * Returns a bitmask in the form of an Int32Array of length ceildiv(vocab_size, 32)
+     * based on what tokens can/cannot be accepted by the current state of the grammar state matcher.
+     *
+     * @returns {Int32Array} An array representing the bitmask that masks the rejected token IDs
+     */
+    getNextTokenBitmask(): Promise<Int32Array>;
+    /**
+     * Check if the matcher has accepted the stop token and terminated. See also
+     * GrammarMatcher.acceptToken.
+     */
+    isTerminated(): boolean;
+    /**
+     * Reset the matcher to the initial state.
+     */
+    reset(): void;
+    /**
+     * Find the jump-forward string for jump-forward decoding. This is the longest string that
+     * will be valid according to the current syntax.
+     * @returns {string} The jump-forward string.
+     */
+    findJumpForwardString(): string;
+    /**
+     * Rollback the matcher to a previous state.
+     * @param {number} numTokens The number of tokens to rollback. It cannot exceed the current
+     * number of steps, nor can it exceed the specified maximum number of rollback tokens.
+     */
+    rollBack(numTokens: number): void;
+}
+//# sourceMappingURL=xgrammar.d.ts.map

package/lib/xgrammar.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"xgrammar.d.ts","sourceRoot":"","sources":["../src/xgrammar.ts"],"names":[],"mappings":"AAUA;;GAEG;AACH,qBAAa,QAAQ;IACnB;;;;;;;;;;;;;;OAcG;WACU,iBAAiB,CAC5B,MAAM,EAAE,MAAM,EACd,MAAM,SAAI,EACV,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAC7B,UAAU,UAAO,GAChB,OAAO,CAAC,MAAM,CAAC;IAiBlB;;;;;;;OAOG;WACU,+BAA+B,CAC1C,OAAO,EAAE,UAAU,EACnB,SAAS,EAAE,MAAM,EACjB,KAAK,GAAE,MAAU,GAChB,OAAO,CAAC,UAAU,CAAC;CAavB;AAED;;;;;GAKG;AACH,qBAAa,OAAO;IAClB,MAAM,EAAE,GAAG,CAAC;IAEZ;;;;OAIG;gBACS,MAAM,EAAE,GAAG;IAIvB;;OAEG;IACH,OAAO;IAIP;;;;;;;;;;;;;;;;OAgBG;WACU,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC;IAK9E;;;OAGG;WACU,kBAAkB,IAAI,OAAO,CAAC,OAAO,CAAC;IAKnD;;;;;;;;;;;;;;;OAeG;WACU,cAAc,CACzB,MAAM,EAAE,MAAM,EACd,MAAM,SAAI,EACV,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAC7B,UAAU,UAAO,GAChB,OAAO,CAAC,OAAO,CAAC;IAkBnB;;;OAGG;IACH,QAAQ,IAAI,MAAM;CAGnB;AAED;;GAEG;AACH,qBAAa,aAAa;IACxB,MAAM,EAAE,GAAG,CAAC;IAEZ;;;;OAIG;gBACS,MAAM,EAAE,GAAG;IAIvB;;OAEG;IACH,OAAO;IAIP;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,qBAAqB,IAAI,GAAG;IAI5B;;;;;;;;;;;;;OAaG;WACU,mBAAmB,CAC9B,YAAY,EAAE,MAAM,EAAE,EACtB,SAAS,EAAE,MAAM,EACjB,0BAA0B,EAAE,OAAO,EACnC,SAAS,CAAC,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,GAC/B,OAAO,CAAC,aAAa,CAAC;CAoB1B;AAED,qBAAa,eAAe;IAC1B,MAAM,EAAE,GAAG,CAAC;IAEZ;;;;OAIG;gBACS,MAAM,EAAE,GAAG;IAIvB;;OAEG;IACH,OAAO;IAIP;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,aAAa,IAAI,aAAa;CAG/B;AAED,qBAAa,eAAe;IAC1B,MAAM,EAAE,GAAG,CAAC;IAEZ;;;;OAIG;IACH,OAAO;IAIP;;OAEG;IACH,OAAO;IAIP;;;;OAIG;WACU,qBAAqB,CAChC,aAAa,EAAE,aAAa,EAC5B,YAAY,GAAE,OAAc,GAC3B,OAAO,CAAC,eAAe,CAAC;IAU3B;;;;;;;;;;;;;;;OAeG;IACG,iBAAiB,CACrB,MAAM,EAAE,MAAM,EACd,MAAM,SAAI,EACV,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAC7B,UAAU,UAAO,GAChB,OAAO,CAAC,eAAe,CAAC;IAkB3B;;OAEG;IACG,yBAAyB,IAAI,OAAO,CAAC,eAAe,CAAC;IAK3D;;;;;;;;;;;;;;;;OAgBG;IACG,cAAc,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,eAAe,CAAC;IAC1D,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;CAUnF;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,UAAU,CAAS;IAE3B;;;;OAIG;IACH,OAAO;IAKP;;OAEG;IACH,OAAO;IAIP;;;;;;;OAOG;WACU,oBAAoB,CAC/B,eAAe,EAAE,eAAe,EAChC,kBAAkB,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EACtC,yBAAyB,GAAE,OAAe,EAC1C,iBAAiB,GAAE,MAAU,GAC5B,OAAO,CAAC,cAAc,CAAC;IAiB1B;;OAEG;IACH,oBAAoB,IAAI,MAAM;IAI9B;;;;;OAKG;IACH,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,OAAe,GAAG,OAAO;IAI/D;;;;;OAKG;IACH,kBAAkB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,OAAe,GAAG,OAAO;IAIvE;;;;;OAKG;IACG,mBAAmB,IAAI,OAAO,CAAC,UAAU,CAAC;IAShD;;;OAGG;IACH,YAAY,IAAI,OAAO;IAIvB;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;;;OAIG;IACH,qBAAqB,IAAI,MAAM;IAI/B;;;;OAIG;IACH,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;CAGlC"}

package/package.json ADDED Viewed

@@ -0,0 +1,44 @@
+{
+  "name": "@mlc-ai/web-xgrammar",
+  "version": "0.1.0",
+  "description": "",
+  "main": "lib/index.js",
+  "types": "lib/index.d.ts",
+  "type": "module",
+  "scripts": {
+    "build": "./build.sh; rollup -c",
+    "lint": "npx eslint .",
+    "test": "./run_test.sh"
+  },
+  "files": [
+    "lib"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/mlc-ai/xgrammar"
+  },
+  "keywords": [
+    "machine_learning",
+    "llm",
+    "nlp"
+  ],
+  "license": "Apache-2.0",
+  "homepage": "https://github.com/mlc-ai/xgrammar/tree/main/web",
+  "devDependencies": {
+    "@jest/globals": "^29.7.0",
+    "@rollup/plugin-commonjs": "^20.0.0",
+    "@rollup/plugin-node-resolve": "^13.0.4",
+    "@rollup/plugin-wasm": "^5.1.2",
+    "@types/jest": "^29.5.12",
+    "@typescript-eslint/eslint-plugin": "^5.59.6",
+    "@typescript-eslint/parser": "^5.59.6",
+    "eslint": "^8.41.0",
+    "jest": "^29.7.0",
+    "rollup": "^2.56.2",
+    "rollup-plugin-typescript2": "^0.34.1",
+    "ts-jest": "^29.2.5",
+    "tslib": "^2.3.1",
+    "typescript": "^4.9.5",
+    "@mlc-ai/web-tokenizers": "^0.1.5"
+  }
+}