llguidance 1.1.1__cp39-abi3-macosx_10_12_x86_64.whl → 1.2.0__cp39-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llguidance/_lib.abi3.so CHANGED
Binary file
llguidance/_lib.pyi CHANGED
@@ -526,18 +526,49 @@ class LLExecutor:
526
526
  self,
527
527
  interpreters: List[Tuple[LLMatcher, int]],
528
528
  trg_pointer: int,
529
- one_mask_byte_size: int,
529
+ one_mask_bytes: int,
530
530
  trg_batch_size: int,
531
531
  ) -> None:
532
532
  """
533
533
  Compute the token mask directly into memory at the specified pointer.
534
534
  For each matcher, provide the index of the target mask.
535
- If index is K, the memory will be written at trg_pointer + K * one_mask_byte_size,
535
+ If index is K, the memory will be written at trg_pointer + K * one_mask_bytes,
536
536
  where K < trg_batch_size.
537
- Memory has to have size trg_batch_size * one_mask_byte_size.
537
+ Memory has to have size trg_batch_size * one_mask_bytes.
538
538
  Prefer to use fill_next_token_bitmask_par(), which wraps this.
539
539
  """
540
540
 
541
+ def unsafe_compute_mask_ptr_with_draft_token(
542
+ self,
543
+ interpreters: List[Tuple[LLMatcher, int, List[int]]],
544
+ trg_pointer: int,
545
+ one_mask_bytes: int,
546
+ trg_batch_size: int,
547
+ ) -> None:
548
+ """
549
+ Compute the token mask directly into memory at the specified pointer, including draft tokens.
550
+
551
+ This function extends unsafe_compute_mask_ptr() to handle draft tokens in speculative decoding.
552
+ For each matcher in the batch, it computes masks for both the current position and all draft tokens.
553
+
554
+ Args:
555
+ interpreters: List of tuples containing:
556
+ - LLMatcher: The matcher object for constrained generation
557
+ - int: Index K indicating the target mask position (K < trg_batch_size)
558
+ - List[int]: Draft tokens to be processed for speculative decoding
559
+ trg_pointer: Memory address where mask data will be written
560
+ one_mask_bytes: Size in bytes of a single token mask
561
+ trg_batch_size: Total batch size for memory allocation validation
562
+
563
+ Memory Layout:
564
+ - Main mask written at: trg_pointer + K * one_mask_bytes
565
+ - Draft token i mask written at: trg_pointer + (K + i + 1) * one_mask_bytes
566
+ - Total memory required: trg_batch_size * one_mask_bytes
567
+
568
+ The function processes each matcher's draft tokens sequentially, advancing the matcher state
569
+ for each valid token until encountering an invalid token or termination condition.
570
+ State rollback is performed to maintain matcher consistency.
571
+ """
541
572
 
542
573
  class JsonCompileOptions(TypedDict, total=False):
543
574
  # defaults to ","
@@ -565,6 +596,7 @@ class LLParserLimits:
565
596
  max_lexer_states: Optional[int] = None,
566
597
  max_grammar_size: Optional[int] = None,
567
598
  precompute_large_lexemes: Optional[bool] = None,
599
+ verbose_errors: Optional[bool] = None,
568
600
  ) -> None:
569
601
  """
570
602
  ParserLimits configuration for controlling parser and lexer resource usage.
@@ -597,6 +629,10 @@ class LLParserLimits:
597
629
  precompute_large_lexemes (Optional[bool]):
598
630
  Whether to run large regexes eagerly on the entire token trie during lexer build.
599
631
  Increases lexer construction time, but speeds up mask computation. Default: True.
632
+
633
+ verbose_errors (Optional[bool]):
634
+ If true, include parser state and grammar details in error messages.
635
+ Useful for debugging; may leak schema/state in logs. Default: True.
600
636
  """
601
637
 
602
638
  @property
@@ -627,6 +663,10 @@ class LLParserLimits:
627
663
  def precompute_large_lexemes(self) -> bool:
628
664
  """Precompute large regexes during lexer construction. Default: True"""
629
665
 
666
+ @property
667
+ def verbose_errors(self) -> bool:
668
+ """Include parser state and grammar in errors. Default: True"""
669
+
630
670
 
631
671
  def regex_to_lark(regex: str, use_ascii: str = "d") -> str:
632
672
  r"""
llguidance/numpy.py CHANGED
@@ -66,3 +66,17 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
66
66
  batch, vocab = bitmask.shape
67
67
  assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
68
68
  executor.unsafe_compute_mask_ptr(matchers, bitmask.ctypes.data, vocab * 4, batch)
69
+
70
+
71
+ def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
72
+ matchers: List[Tuple[LLMatcher, int, List[int]]],
73
+ bitmask: NDArray[np.int32]) -> None:
74
+ """
75
+ Compute the token mask directly into the specified array.
76
+ For each matcher, provide the index of the target mask.
77
+ """
78
+ assert bitmask.dtype == np.int32, "Mask must be int32"
79
+ assert bitmask.ndim == 2, "Mask must be 2D"
80
+ batch, vocab = bitmask.shape
81
+ assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
82
+ executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.ctypes.data, vocab * 4, batch)
llguidance/torch.py CHANGED
@@ -66,3 +66,14 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
66
66
  assert bitmask.is_contiguous(), "Mask must be contiguous"
67
67
  executor.unsafe_compute_mask_ptr(matchers, bitmask.data_ptr(), vocab * 4,
68
68
  batch)
69
+
70
+
71
+ def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
72
+ matchers: List[Tuple[LLMatcher, int, List[int]]],
73
+ bitmask: torch.Tensor) -> None:
74
+ assert bitmask.dtype == torch.int32, "Mask must be int32"
75
+ assert bitmask.is_cpu, "Mask must be on CPU"
76
+ assert bitmask.dim() == 2, "Mask must be 2D"
77
+ batch, vocab = bitmask.shape
78
+ assert bitmask.is_contiguous(), "Mask must be contiguous"
79
+ executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.data_ptr(), vocab * 4, batch)
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
7
- License: MIT
7
+ License-Expression: MIT
8
8
  Requires-Python: >=3.9
9
9
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
10
10
  Project-URL: repository, https://github.com/microsoft/llguidance
@@ -1,10 +1,10 @@
1
- llguidance-1.1.1.dist-info/METADATA,sha256=DvMvXzhwcvLyd3X4laEvyQo8P-eW2Yc_jb5zxtAn-X4,10289
2
- llguidance-1.1.1.dist-info/WHEEL,sha256=otjeGSkQeIMJC2Imx0w7hDDdv_y5KWUU19tBY9uEXoI,104
3
- llguidance-1.1.1.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
1
+ llguidance-1.2.0.dist-info/METADATA,sha256=0zDkSmKb7n-5DphoLqb3-upZlzpwWBQUHUUBW57F-Eg,10300
2
+ llguidance-1.2.0.dist-info/WHEEL,sha256=TiMJekJwYXi-5FCpHPqncJXv9UVKDzSHt4YRv5UDSSg,104
3
+ llguidance-1.2.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
4
4
  llguidance/__init__.py,sha256=F9svXvm6oafbuUf_eq34PHJV4c7-yN133vmbWN6nIkc,590
5
5
  llguidance/_grammar_from.py,sha256=-vHqkPqJe6t0JKKuQhlUu08kYpPIVknMh8tZlh8FYeQ,2384
6
- llguidance/_lib.abi3.so,sha256=HlHFxvjB0ml4bI0u1mi8tpsoNbse3srdhV5N27qTmVI,8583884
7
- llguidance/_lib.pyi,sha256=HFe4XkQR7Moey9T6uJ8INrzTsACJReWaBwcYExK4yj4,22230
6
+ llguidance/_lib.abi3.so,sha256=1H38cQvfwzSB7DgGPgBjVEcG24iBcjm7TvORFmVhrlA,8625700
7
+ llguidance/_lib.pyi,sha256=1m69VQ-7XaQLJKf6Vw5qf-UEh1cEigVMZM0Hj-XgSiw,24213
8
8
  llguidance/_struct_tag.py,sha256=83okmGWShxZud7S2vHjPRiInhFw0QVTHkeN8wtR8hR8,4430
9
9
  llguidance/_tokenizer.py,sha256=yC-RcgyMZN-olV-PnN4XkjlH-fOU8E9jrwO8VkXLv4M,1084
10
10
  llguidance/_util.py,sha256=6JV5SxjoH7hZPaSHhPRD_G6JzIhbKFFTqWTpp88VIiU,260
@@ -13,8 +13,8 @@ llguidance/gbnf_to_lark.py,sha256=32XJ5Dzq-iSySnkV_rLaNZ888JjHBIr_QkSYdhtMAME,16
13
13
  llguidance/hf.py,sha256=sLJKZxGpftuAY5eSCYpogzim3WZA7-vs9SOVgnzf3xI,1933
14
14
  llguidance/llamacpp.py,sha256=d_LjNbomBhj7uTo90h0muvPkOMso8NLe7H_YfPCqA8U,2284
15
15
  llguidance/mlx.py,sha256=ydMNSjnI2db8F685waOSWQLIoQTB9oehGBXnp9T3A6Q,2427
16
- llguidance/numpy.py,sha256=Z4888IfdT5oAfq4m2gn6ARfkaJ9d7nxMS79g_t8aXkg,2653
16
+ llguidance/numpy.py,sha256=ZxHoQ_OmAgUHmxa_6KA8ruuKzXZLjmanPM9BYlKI0bo,3327
17
17
  llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  llguidance/tiktoken.py,sha256=-mYDPxq4LM2FI9K8kLL0Us0qN-fZpNK0qIwboDPjfSk,1101
19
- llguidance/torch.py,sha256=7LmVtRX8-fdUULcpPh4KYuDwVrHFJEvyVxzhyQflpgM,2795
20
- llguidance-1.1.1.dist-info/RECORD,,
19
+ llguidance/torch.py,sha256=qLwQjaxZbvaVss7Qy_XP-SEPGYRqJRkvd6KE4LmzZJ8,3372
20
+ llguidance-1.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.1)
2
+ Generator: maturin (1.9.3)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-abi3-macosx_10_12_x86_64