llguidance 1.1.1__cp39-abi3-win_amd64.whl → 1.2.0__cp39-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llguidance/_lib.pyd +0 -0
- llguidance/_lib.pyi +43 -3
- llguidance/numpy.py +14 -0
- llguidance/torch.py +11 -0
- {llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/METADATA +2 -2
- {llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/RECORD +8 -8
- {llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/WHEEL +1 -1
- {llguidance-1.1.1.dist-info → llguidance-1.2.0.dist-info}/licenses/LICENSE +0 -0
llguidance/_lib.pyd
CHANGED
|
Binary file
|
llguidance/_lib.pyi
CHANGED
|
@@ -526,18 +526,49 @@ class LLExecutor:
|
|
|
526
526
|
self,
|
|
527
527
|
interpreters: List[Tuple[LLMatcher, int]],
|
|
528
528
|
trg_pointer: int,
|
|
529
|
-
|
|
529
|
+
one_mask_bytes: int,
|
|
530
530
|
trg_batch_size: int,
|
|
531
531
|
) -> None:
|
|
532
532
|
"""
|
|
533
533
|
Compute the token mask directly into memory at the specified pointer.
|
|
534
534
|
For each matcher, provide the index of the target mask.
|
|
535
|
-
If index is K, the memory will be written at trg_pointer + K *
|
|
535
|
+
If index is K, the memory will be written at trg_pointer + K * one_mask_bytes,
|
|
536
536
|
where K < trg_batch_size.
|
|
537
|
-
Memory has to have size trg_batch_size *
|
|
537
|
+
Memory has to have size trg_batch_size * one_mask_bytes.
|
|
538
538
|
Prefer to use fill_next_token_bitmask_par(), which wraps this.
|
|
539
539
|
"""
|
|
540
540
|
|
|
541
|
+
def unsafe_compute_mask_ptr_with_draft_token(
|
|
542
|
+
self,
|
|
543
|
+
interpreters: List[Tuple[LLMatcher, int, List[int]]],
|
|
544
|
+
trg_pointer: int,
|
|
545
|
+
one_mask_bytes: int,
|
|
546
|
+
trg_batch_size: int,
|
|
547
|
+
) -> None:
|
|
548
|
+
"""
|
|
549
|
+
Compute the token mask directly into memory at the specified pointer, including draft tokens.
|
|
550
|
+
|
|
551
|
+
This function extends unsafe_compute_mask_ptr() to handle draft tokens in speculative decoding.
|
|
552
|
+
For each matcher in the batch, it computes masks for both the current position and all draft tokens.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
interpreters: List of tuples containing:
|
|
556
|
+
- LLMatcher: The matcher object for constrained generation
|
|
557
|
+
- int: Index K indicating the target mask position (K < trg_batch_size)
|
|
558
|
+
- List[int]: Draft tokens to be processed for speculative decoding
|
|
559
|
+
trg_pointer: Memory address where mask data will be written
|
|
560
|
+
one_mask_bytes: Size in bytes of a single token mask
|
|
561
|
+
trg_batch_size: Total batch size for memory allocation validation
|
|
562
|
+
|
|
563
|
+
Memory Layout:
|
|
564
|
+
- Main mask written at: trg_pointer + K * one_mask_bytes
|
|
565
|
+
- Draft token i mask written at: trg_pointer + (K + i + 1) * one_mask_bytes
|
|
566
|
+
- Total memory required: trg_batch_size * one_mask_bytes
|
|
567
|
+
|
|
568
|
+
The function processes each matcher's draft tokens sequentially, advancing the matcher state
|
|
569
|
+
for each valid token until encountering an invalid token or termination condition.
|
|
570
|
+
State rollback is performed to maintain matcher consistency.
|
|
571
|
+
"""
|
|
541
572
|
|
|
542
573
|
class JsonCompileOptions(TypedDict, total=False):
|
|
543
574
|
# defaults to ","
|
|
@@ -565,6 +596,7 @@ class LLParserLimits:
|
|
|
565
596
|
max_lexer_states: Optional[int] = None,
|
|
566
597
|
max_grammar_size: Optional[int] = None,
|
|
567
598
|
precompute_large_lexemes: Optional[bool] = None,
|
|
599
|
+
verbose_errors: Optional[bool] = None,
|
|
568
600
|
) -> None:
|
|
569
601
|
"""
|
|
570
602
|
ParserLimits configuration for controlling parser and lexer resource usage.
|
|
@@ -597,6 +629,10 @@ class LLParserLimits:
|
|
|
597
629
|
precompute_large_lexemes (Optional[bool]):
|
|
598
630
|
Whether to run large regexes eagerly on the entire token trie during lexer build.
|
|
599
631
|
Increases lexer construction time, but speeds up mask computation. Default: True.
|
|
632
|
+
|
|
633
|
+
verbose_errors (Optional[bool]):
|
|
634
|
+
If true, include parser state and grammar details in error messages.
|
|
635
|
+
Useful for debugging; may leak schema/state in logs. Default: True.
|
|
600
636
|
"""
|
|
601
637
|
|
|
602
638
|
@property
|
|
@@ -627,6 +663,10 @@ class LLParserLimits:
|
|
|
627
663
|
def precompute_large_lexemes(self) -> bool:
|
|
628
664
|
"""Precompute large regexes during lexer construction. Default: True"""
|
|
629
665
|
|
|
666
|
+
@property
|
|
667
|
+
def verbose_errors(self) -> bool:
|
|
668
|
+
"""Include parser state and grammar in errors. Default: True"""
|
|
669
|
+
|
|
630
670
|
|
|
631
671
|
def regex_to_lark(regex: str, use_ascii: str = "d") -> str:
|
|
632
672
|
r"""
|
llguidance/numpy.py
CHANGED
|
@@ -66,3 +66,17 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
|
|
|
66
66
|
batch, vocab = bitmask.shape
|
|
67
67
|
assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
|
|
68
68
|
executor.unsafe_compute_mask_ptr(matchers, bitmask.ctypes.data, vocab * 4, batch)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
|
|
72
|
+
matchers: List[Tuple[LLMatcher, int, List[int]]],
|
|
73
|
+
bitmask: NDArray[np.int32]) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Compute the token mask directly into the specified array.
|
|
76
|
+
For each matcher, provide the index of the target mask.
|
|
77
|
+
"""
|
|
78
|
+
assert bitmask.dtype == np.int32, "Mask must be int32"
|
|
79
|
+
assert bitmask.ndim == 2, "Mask must be 2D"
|
|
80
|
+
batch, vocab = bitmask.shape
|
|
81
|
+
assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
|
|
82
|
+
executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.ctypes.data, vocab * 4, batch)
|
llguidance/torch.py
CHANGED
|
@@ -66,3 +66,14 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
|
|
|
66
66
|
assert bitmask.is_contiguous(), "Mask must be contiguous"
|
|
67
67
|
executor.unsafe_compute_mask_ptr(matchers, bitmask.data_ptr(), vocab * 4,
|
|
68
68
|
batch)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
|
|
72
|
+
matchers: List[Tuple[LLMatcher, int, List[int]]],
|
|
73
|
+
bitmask: torch.Tensor) -> None:
|
|
74
|
+
assert bitmask.dtype == torch.int32, "Mask must be int32"
|
|
75
|
+
assert bitmask.is_cpu, "Mask must be on CPU"
|
|
76
|
+
assert bitmask.dim() == 2, "Mask must be 2D"
|
|
77
|
+
batch, vocab = bitmask.shape
|
|
78
|
+
assert bitmask.is_contiguous(), "Mask must be contiguous"
|
|
79
|
+
executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.data_ptr(), vocab * 4, batch)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llguidance
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
License-File: LICENSE
|
|
5
5
|
Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
|
|
6
6
|
Author: Michal Moskal
|
|
7
|
-
License: MIT
|
|
7
|
+
License-Expression: MIT
|
|
8
8
|
Requires-Python: >=3.9
|
|
9
9
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
10
10
|
Project-URL: repository, https://github.com/microsoft/llguidance
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
llguidance-1.
|
|
2
|
-
llguidance-1.
|
|
3
|
-
llguidance-1.
|
|
1
|
+
llguidance-1.2.0.dist-info/METADATA,sha256=nidxJzRXFMnv7QS874s9-I00HSTVvxKFFOunY0O75EY,10436
|
|
2
|
+
llguidance-1.2.0.dist-info/WHEEL,sha256=2XatmAWXBfp_P6DUtFAtbdzzba6f_xbhEtpqsZt_zEg,94
|
|
3
|
+
llguidance-1.2.0.dist-info/licenses/LICENSE,sha256=mQaUD2Gx8LUz-n2ZuvVReLKAj74RPqUd-_rYVyzNXys,1162
|
|
4
4
|
llguidance/__init__.py,sha256=tlrM-GzMdClgSIEblgA4ixP10h51VDJY5ejIP5FFJJw,620
|
|
5
5
|
llguidance/_grammar_from.py,sha256=kEgwhVYBq3NE_O9MGjIRpci7dgOneLC9CvVGWpKEO84,2452
|
|
6
|
-
llguidance/_lib.pyd,sha256=
|
|
7
|
-
llguidance/_lib.pyi,sha256=
|
|
6
|
+
llguidance/_lib.pyd,sha256=A4GN_LLSo1y68TyTRyQE-hSRgOXlJrnkRguqyehpYpE,7582208
|
|
7
|
+
llguidance/_lib.pyi,sha256=tc0mb-ZKH3LjodEf_aIdLPXFLuEbqGI3flxQpxZn-Bw,24893
|
|
8
8
|
llguidance/_struct_tag.py,sha256=wEJd9KGZTpS-R1ZXyyRFmr-48aYOZw_tI4PhIRy9h4g,4548
|
|
9
9
|
llguidance/_tokenizer.py,sha256=9jonF41IUypGtV6lVUl9l9FGAyn9HNnDb9HKNDywZuQ,1120
|
|
10
10
|
llguidance/_util.py,sha256=6RMYO61KUynAym7KcbDn9VEMGcGdv7rj2yo6fITC-u8,274
|
|
@@ -13,8 +13,8 @@ llguidance/gbnf_to_lark.py,sha256=YsFNyVXrzG3-cjqlcrcuoG1RdquVtZJz43X_lSAXL7Y,17
|
|
|
13
13
|
llguidance/hf.py,sha256=3LNNqC4_KafPe6bKP-Ba0aGsOVg3MqvuMvY6QYth0fY,1980
|
|
14
14
|
llguidance/llamacpp.py,sha256=2LdNjiNku-8xGcswGEQga57FUsRgzdf4Z4BrLVBUfAg,2352
|
|
15
15
|
llguidance/mlx.py,sha256=Q2cNocnFB3xqCCg614iE0wPcumBFJBvo0pG5JTpShds,2494
|
|
16
|
-
llguidance/numpy.py,sha256=
|
|
16
|
+
llguidance/numpy.py,sha256=eMIihG5rcPWFMMWjwl-ZPSkcEo8BXKZFUHaIHkxnFhw,3409
|
|
17
17
|
llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
llguidance/tiktoken.py,sha256=sE4QWj88dANfdM0Y50ZvFI7ku6zBFomUCE4bRTtewF4,1135
|
|
19
|
-
llguidance/torch.py,sha256=
|
|
20
|
-
llguidance-1.
|
|
19
|
+
llguidance/torch.py,sha256=PzTI7wRiznXnH7u4_iEP6NL2wmL2cN8p-UlR0PE8JJ4,3451
|
|
20
|
+
llguidance-1.2.0.dist-info/RECORD,,
|
|
File without changes
|