llguidance 1.1.2__cp39-abi3-macosx_10_12_x86_64.whl → 1.2.0__cp39-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llguidance/_lib.abi3.so +0 -0
- llguidance/_lib.pyi +34 -3
- llguidance/numpy.py +14 -0
- llguidance/torch.py +11 -0
- {llguidance-1.1.2.dist-info → llguidance-1.2.0.dist-info}/METADATA +1 -1
- {llguidance-1.1.2.dist-info → llguidance-1.2.0.dist-info}/RECORD +8 -8
- {llguidance-1.1.2.dist-info → llguidance-1.2.0.dist-info}/WHEEL +0 -0
- {llguidance-1.1.2.dist-info → llguidance-1.2.0.dist-info}/licenses/LICENSE +0 -0
llguidance/_lib.abi3.so
CHANGED
|
Binary file
|
llguidance/_lib.pyi
CHANGED
|
@@ -526,18 +526,49 @@ class LLExecutor:
|
|
|
526
526
|
self,
|
|
527
527
|
interpreters: List[Tuple[LLMatcher, int]],
|
|
528
528
|
trg_pointer: int,
|
|
529
|
-
|
|
529
|
+
one_mask_bytes: int,
|
|
530
530
|
trg_batch_size: int,
|
|
531
531
|
) -> None:
|
|
532
532
|
"""
|
|
533
533
|
Compute the token mask directly into memory at the specified pointer.
|
|
534
534
|
For each matcher, provide the index of the target mask.
|
|
535
|
-
If index is K, the memory will be written at trg_pointer + K *
|
|
535
|
+
If index is K, the memory will be written at trg_pointer + K * one_mask_bytes,
|
|
536
536
|
where K < trg_batch_size.
|
|
537
|
-
Memory has to have size trg_batch_size *
|
|
537
|
+
Memory has to have size trg_batch_size * one_mask_bytes.
|
|
538
538
|
Prefer to use fill_next_token_bitmask_par(), which wraps this.
|
|
539
539
|
"""
|
|
540
540
|
|
|
541
|
+
def unsafe_compute_mask_ptr_with_draft_token(
|
|
542
|
+
self,
|
|
543
|
+
interpreters: List[Tuple[LLMatcher, int, List[int]]],
|
|
544
|
+
trg_pointer: int,
|
|
545
|
+
one_mask_bytes: int,
|
|
546
|
+
trg_batch_size: int,
|
|
547
|
+
) -> None:
|
|
548
|
+
"""
|
|
549
|
+
Compute the token mask directly into memory at the specified pointer, including draft tokens.
|
|
550
|
+
|
|
551
|
+
This function extends unsafe_compute_mask_ptr() to handle draft tokens in speculative decoding.
|
|
552
|
+
For each matcher in the batch, it computes masks for both the current position and all draft tokens.
|
|
553
|
+
|
|
554
|
+
Args:
|
|
555
|
+
interpreters: List of tuples containing:
|
|
556
|
+
- LLMatcher: The matcher object for constrained generation
|
|
557
|
+
- int: Index K indicating the target mask position (K < trg_batch_size)
|
|
558
|
+
- List[int]: Draft tokens to be processed for speculative decoding
|
|
559
|
+
trg_pointer: Memory address where mask data will be written
|
|
560
|
+
one_mask_bytes: Size in bytes of a single token mask
|
|
561
|
+
trg_batch_size: Total batch size for memory allocation validation
|
|
562
|
+
|
|
563
|
+
Memory Layout:
|
|
564
|
+
- Main mask written at: trg_pointer + K * one_mask_bytes
|
|
565
|
+
- Draft token i mask written at: trg_pointer + (K + i + 1) * one_mask_bytes
|
|
566
|
+
- Total memory required: trg_batch_size * one_mask_bytes
|
|
567
|
+
|
|
568
|
+
The function processes each matcher's draft tokens sequentially, advancing the matcher state
|
|
569
|
+
for each valid token until encountering an invalid token or termination condition.
|
|
570
|
+
State rollback is performed to maintain matcher consistency.
|
|
571
|
+
"""
|
|
541
572
|
|
|
542
573
|
class JsonCompileOptions(TypedDict, total=False):
|
|
543
574
|
# defaults to ","
|
llguidance/numpy.py
CHANGED
|
@@ -66,3 +66,17 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
|
|
|
66
66
|
batch, vocab = bitmask.shape
|
|
67
67
|
assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
|
|
68
68
|
executor.unsafe_compute_mask_ptr(matchers, bitmask.ctypes.data, vocab * 4, batch)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
|
|
72
|
+
matchers: List[Tuple[LLMatcher, int, List[int]]],
|
|
73
|
+
bitmask: NDArray[np.int32]) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Compute the token mask directly into the specified array.
|
|
76
|
+
For each matcher, provide the index of the target mask.
|
|
77
|
+
"""
|
|
78
|
+
assert bitmask.dtype == np.int32, "Mask must be int32"
|
|
79
|
+
assert bitmask.ndim == 2, "Mask must be 2D"
|
|
80
|
+
batch, vocab = bitmask.shape
|
|
81
|
+
assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
|
|
82
|
+
executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.ctypes.data, vocab * 4, batch)
|
llguidance/torch.py
CHANGED
|
@@ -66,3 +66,14 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
|
|
|
66
66
|
assert bitmask.is_contiguous(), "Mask must be contiguous"
|
|
67
67
|
executor.unsafe_compute_mask_ptr(matchers, bitmask.data_ptr(), vocab * 4,
|
|
68
68
|
batch)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
|
|
72
|
+
matchers: List[Tuple[LLMatcher, int, List[int]]],
|
|
73
|
+
bitmask: torch.Tensor) -> None:
|
|
74
|
+
assert bitmask.dtype == torch.int32, "Mask must be int32"
|
|
75
|
+
assert bitmask.is_cpu, "Mask must be on CPU"
|
|
76
|
+
assert bitmask.dim() == 2, "Mask must be 2D"
|
|
77
|
+
batch, vocab = bitmask.shape
|
|
78
|
+
assert bitmask.is_contiguous(), "Mask must be contiguous"
|
|
79
|
+
executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.data_ptr(), vocab * 4, batch)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
llguidance-1.
|
|
2
|
-
llguidance-1.
|
|
3
|
-
llguidance-1.
|
|
1
|
+
llguidance-1.2.0.dist-info/METADATA,sha256=0zDkSmKb7n-5DphoLqb3-upZlzpwWBQUHUUBW57F-Eg,10300
|
|
2
|
+
llguidance-1.2.0.dist-info/WHEEL,sha256=TiMJekJwYXi-5FCpHPqncJXv9UVKDzSHt4YRv5UDSSg,104
|
|
3
|
+
llguidance-1.2.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
4
4
|
llguidance/__init__.py,sha256=F9svXvm6oafbuUf_eq34PHJV4c7-yN133vmbWN6nIkc,590
|
|
5
5
|
llguidance/_grammar_from.py,sha256=-vHqkPqJe6t0JKKuQhlUu08kYpPIVknMh8tZlh8FYeQ,2384
|
|
6
|
-
llguidance/_lib.abi3.so,sha256=
|
|
7
|
-
llguidance/_lib.pyi,sha256=
|
|
6
|
+
llguidance/_lib.abi3.so,sha256=1H38cQvfwzSB7DgGPgBjVEcG24iBcjm7TvORFmVhrlA,8625700
|
|
7
|
+
llguidance/_lib.pyi,sha256=1m69VQ-7XaQLJKf6Vw5qf-UEh1cEigVMZM0Hj-XgSiw,24213
|
|
8
8
|
llguidance/_struct_tag.py,sha256=83okmGWShxZud7S2vHjPRiInhFw0QVTHkeN8wtR8hR8,4430
|
|
9
9
|
llguidance/_tokenizer.py,sha256=yC-RcgyMZN-olV-PnN4XkjlH-fOU8E9jrwO8VkXLv4M,1084
|
|
10
10
|
llguidance/_util.py,sha256=6JV5SxjoH7hZPaSHhPRD_G6JzIhbKFFTqWTpp88VIiU,260
|
|
@@ -13,8 +13,8 @@ llguidance/gbnf_to_lark.py,sha256=32XJ5Dzq-iSySnkV_rLaNZ888JjHBIr_QkSYdhtMAME,16
|
|
|
13
13
|
llguidance/hf.py,sha256=sLJKZxGpftuAY5eSCYpogzim3WZA7-vs9SOVgnzf3xI,1933
|
|
14
14
|
llguidance/llamacpp.py,sha256=d_LjNbomBhj7uTo90h0muvPkOMso8NLe7H_YfPCqA8U,2284
|
|
15
15
|
llguidance/mlx.py,sha256=ydMNSjnI2db8F685waOSWQLIoQTB9oehGBXnp9T3A6Q,2427
|
|
16
|
-
llguidance/numpy.py,sha256=
|
|
16
|
+
llguidance/numpy.py,sha256=ZxHoQ_OmAgUHmxa_6KA8ruuKzXZLjmanPM9BYlKI0bo,3327
|
|
17
17
|
llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
llguidance/tiktoken.py,sha256=-mYDPxq4LM2FI9K8kLL0Us0qN-fZpNK0qIwboDPjfSk,1101
|
|
19
|
-
llguidance/torch.py,sha256=
|
|
20
|
-
llguidance-1.
|
|
19
|
+
llguidance/torch.py,sha256=qLwQjaxZbvaVss7Qy_XP-SEPGYRqJRkvd6KE4LmzZJ8,3372
|
|
20
|
+
llguidance-1.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|