PyPI - llguidance - Versions diffs - 1.1.1__cp39-abi3-win_amd64.whl → 1.2.0__cp39-abi3-win_amd64.whl - Mend

llguidance 1.1.1__cp39-abi3-win_amd64.whl → 1.2.0__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

llguidance/_lib.pyd +0 -0
llguidance/_lib.pyi +43 -3
llguidance/numpy.py +14 -0
llguidance/torch.py +11 -0
{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/METADATA +2 -2
{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/RECORD +8 -8
{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/WHEEL +1 -1
{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/licenses/LICENSE +0 -0

llguidance/_lib.pyd CHANGED Viewed

Binary file

llguidance/_lib.pyi CHANGED Viewed

@@ -526,18 +526,49 @@ class LLExecutor:
         self,
         interpreters: List[Tuple[LLMatcher, int]],
         trg_pointer: int,
-        one_mask_byte_size: int,
+        one_mask_bytes: int,
         trg_batch_size: int,
     ) -> None:
         """
         Compute the token mask directly into memory at the specified pointer.
         For each matcher, provide the index of the target mask.
-        If index is K, the memory will be written at trg_pointer + K * one_mask_byte_size,
+        If index is K, the memory will be written at trg_pointer + K * one_mask_bytes,
         where K < trg_batch_size.
-        Memory has to have size trg_batch_size * one_mask_byte_size.
+        Memory has to have size trg_batch_size * one_mask_bytes.
         Prefer to use fill_next_token_bitmask_par(), which wraps this.
         """
+    def unsafe_compute_mask_ptr_with_draft_token(
+        self,
+        interpreters: List[Tuple[LLMatcher, int, List[int]]],
+        trg_pointer: int,
+        one_mask_bytes: int,
+        trg_batch_size: int,
+    ) -> None:
+        """
+        Compute the token mask directly into memory at the specified pointer, including draft tokens.
+        This function extends unsafe_compute_mask_ptr() to handle draft tokens in speculative decoding.
+        For each matcher in the batch, it computes masks for both the current position and all draft tokens.
+        Args:
+            interpreters: List of tuples containing:
+                - LLMatcher: The matcher object for constrained generation
+                - int: Index K indicating the target mask position (K < trg_batch_size)
+                - List[int]: Draft tokens to be processed for speculative decoding
+            trg_pointer: Memory address where mask data will be written
+            one_mask_bytes: Size in bytes of a single token mask
+            trg_batch_size: Total batch size for memory allocation validation
+        Memory Layout:
+            - Main mask written at: trg_pointer + K * one_mask_bytes
+            - Draft token i mask written at: trg_pointer + (K + i + 1) * one_mask_bytes
+            - Total memory required: trg_batch_size * one_mask_bytes
+        The function processes each matcher's draft tokens sequentially, advancing the matcher state
+        for each valid token until encountering an invalid token or termination condition.
+        State rollback is performed to maintain matcher consistency.
+        """
 class JsonCompileOptions(TypedDict, total=False):
     # defaults to ","
@@ -565,6 +596,7 @@ class LLParserLimits:
         max_lexer_states: Optional[int] = None,
         max_grammar_size: Optional[int] = None,
         precompute_large_lexemes: Optional[bool] = None,
+        verbose_errors: Optional[bool] = None,
     ) -> None:
         """
         ParserLimits configuration for controlling parser and lexer resource usage.
@@ -597,6 +629,10 @@ class LLParserLimits:
             precompute_large_lexemes (Optional[bool]):
                 Whether to run large regexes eagerly on the entire token trie during lexer build.
                 Increases lexer construction time, but speeds up mask computation. Default: True.
+            verbose_errors (Optional[bool]):
+                If true, include parser state and grammar details in error messages.
+                Useful for debugging; may leak schema/state in logs. Default: True.
         """
     @property
@@ -627,6 +663,10 @@ class LLParserLimits:
     def precompute_large_lexemes(self) -> bool:
         """Precompute large regexes during lexer construction. Default: True"""
+    @property
+    def verbose_errors(self) -> bool:
+        """Include parser state and grammar in errors. Default: True"""
 def regex_to_lark(regex: str, use_ascii: str = "d") -> str:
     r"""

llguidance/numpy.py CHANGED Viewed

@@ -66,3 +66,17 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
     batch, vocab = bitmask.shape
     assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
     executor.unsafe_compute_mask_ptr(matchers, bitmask.ctypes.data, vocab * 4, batch)
+def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
+                                matchers: List[Tuple[LLMatcher, int, List[int]]],
+                                bitmask: NDArray[np.int32]) -> None:
+    """
+    Compute the token mask directly into the specified array.
+    For each matcher, provide the index of the target mask.
+    """
+    assert bitmask.dtype == np.int32, "Mask must be int32"
+    assert bitmask.ndim == 2, "Mask must be 2D"
+    batch, vocab = bitmask.shape
+    assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
+    executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.ctypes.data, vocab * 4, batch)

llguidance/torch.py CHANGED Viewed

@@ -66,3 +66,14 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
     assert bitmask.is_contiguous(), "Mask must be contiguous"
     executor.unsafe_compute_mask_ptr(matchers, bitmask.data_ptr(), vocab * 4,
                                      batch)
+def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
+                                matchers: List[Tuple[LLMatcher, int, List[int]]],
+                                bitmask: torch.Tensor) -> None:
+    assert bitmask.dtype == torch.int32, "Mask must be int32"
+    assert bitmask.is_cpu, "Mask must be on CPU"
+    assert bitmask.dim() == 2, "Mask must be 2D"
+    batch, vocab = bitmask.shape
+    assert bitmask.is_contiguous(), "Mask must be contiguous"
+    executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.data_ptr(), vocab * 4, batch)

{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.4
 Name: llguidance
-Version: 1.1.1
+Version: 1.2.0
 License-File: LICENSE
 Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
 Author: Michal Moskal
-License: MIT
+License-Expression: MIT
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
 Project-URL: repository, https://github.com/microsoft/llguidance

{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-llguidance-1.1.1.dist-info/METADATA,sha256=sGRiU_0_1316nEaOxyPtcskGuwk7EUTWa5sTmsUg9Wg,10425
-llguidance-1.1.1.dist-info/WHEEL,sha256=IOvNb_Dp11OuRIpo2OWqTSUXKUtcHN63pkGBkO879SE,94
-llguidance-1.1.1.dist-info/licenses/LICENSE,sha256=mQaUD2Gx8LUz-n2ZuvVReLKAj74RPqUd-_rYVyzNXys,1162
+llguidance-1.2.0.dist-info/METADATA,sha256=nidxJzRXFMnv7QS874s9-I00HSTVvxKFFOunY0O75EY,10436
+llguidance-1.2.0.dist-info/WHEEL,sha256=2XatmAWXBfp_P6DUtFAtbdzzba6f_xbhEtpqsZt_zEg,94
+llguidance-1.2.0.dist-info/licenses/LICENSE,sha256=mQaUD2Gx8LUz-n2ZuvVReLKAj74RPqUd-_rYVyzNXys,1162
 llguidance/__init__.py,sha256=tlrM-GzMdClgSIEblgA4ixP10h51VDJY5ejIP5FFJJw,620
 llguidance/_grammar_from.py,sha256=kEgwhVYBq3NE_O9MGjIRpci7dgOneLC9CvVGWpKEO84,2452
-llguidance/_lib.pyd,sha256=TeIQ9qmb3j3Ov57hPHnmd1BJBVHpYDBGrhUKcUCUwkM,7544832
-llguidance/_lib.pyi,sha256=sxxOUh24-RL6voiR0npIQHj8znMOcrFM43NU1cB8-_c,22870
+llguidance/_lib.pyd,sha256=A4GN_LLSo1y68TyTRyQE-hSRgOXlJrnkRguqyehpYpE,7582208
+llguidance/_lib.pyi,sha256=tc0mb-ZKH3LjodEf_aIdLPXFLuEbqGI3flxQpxZn-Bw,24893
 llguidance/_struct_tag.py,sha256=wEJd9KGZTpS-R1ZXyyRFmr-48aYOZw_tI4PhIRy9h4g,4548
 llguidance/_tokenizer.py,sha256=9jonF41IUypGtV6lVUl9l9FGAyn9HNnDb9HKNDywZuQ,1120
 llguidance/_util.py,sha256=6RMYO61KUynAym7KcbDn9VEMGcGdv7rj2yo6fITC-u8,274
@@ -13,8 +13,8 @@ llguidance/gbnf_to_lark.py,sha256=YsFNyVXrzG3-cjqlcrcuoG1RdquVtZJz43X_lSAXL7Y,17
 llguidance/hf.py,sha256=3LNNqC4_KafPe6bKP-Ba0aGsOVg3MqvuMvY6QYth0fY,1980
 llguidance/llamacpp.py,sha256=2LdNjiNku-8xGcswGEQga57FUsRgzdf4Z4BrLVBUfAg,2352
 llguidance/mlx.py,sha256=Q2cNocnFB3xqCCg614iE0wPcumBFJBvo0pG5JTpShds,2494
-llguidance/numpy.py,sha256=gUQkqJMWWlz9T-Vohtk6pH2XXqP0-RCoaWIOBgRHBhY,2721
+llguidance/numpy.py,sha256=eMIihG5rcPWFMMWjwl-ZPSkcEo8BXKZFUHaIHkxnFhw,3409
 llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 llguidance/tiktoken.py,sha256=sE4QWj88dANfdM0Y50ZvFI7ku6zBFomUCE4bRTtewF4,1135
-llguidance/torch.py,sha256=SgSutKZzhdlwFPRwD8fxLiPJsD45YYriNbM4Cr21jK0,2863
-llguidance-1.1.1.dist-info/RECORD,,
+llguidance/torch.py,sha256=PzTI7wRiznXnH7u4_iEP6NL2wmL2cN8p-UlR0PE8JJ4,3451
+llguidance-1.2.0.dist-info/RECORD,,

{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: maturin (1.9.1)
+Generator: maturin (1.9.3)
 Root-Is-Purelib: false
 Tag: cp39-abi3-win_amd64

{llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes