PyPI - llguidance - Versions diffs - 1.0.1__cp39-abi3-macosx_10_12_x86_64.whl → 1.1.0__cp39-abi3-macosx_10_12_x86_64.whl - Mend

llguidance 1.0.1__cp39-abi3-macosx_10_12_x86_64.whl → 1.1.0__cp39-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

llguidance/_lib.abi3.so CHANGED Viewed

Binary file

llguidance/hf.py CHANGED Viewed

@@ -1,8 +1,10 @@
+from copy import copy
 from typing import List, Optional
-from ._lib import LLTokenizer
 import transformers
+from ._lib import LLTokenizer
 def from_tokenizer(
     hf_tokenizer: transformers.PreTrainedTokenizerFast,
@@ -28,15 +30,18 @@ def from_tokenizer(
         # this will JSON-serialize the Rust impl of the tokenizer,
         # including added tokens from tokenizer_config.json
         # (which may be missing from tokenizer.json)
-        s = hf_tokenizer.backend_tokenizer.to_str() # type: ignore
+        backend_tokenizer = copy(
+            hf_tokenizer.backend_tokenizer  # type: ignore[attr-defined]
+        )
+        # disable padding and truncation on copy before converting to string
+        backend_tokenizer.no_padding()
+        backend_tokenizer.no_truncation()
+        s = backend_tokenizer.to_str()
         # This is probably not needed - it should figure it out by itself
         # if n_vocab is None:
         #     n_vocab = hf_tokenizer.backend_tokenizer.get_vocab_size(with_added_tokens=True)
         if eos_token is None:
-            eos_token = hf_tokenizer.eos_token_id # type: ignore
-        return LLTokenizer(s,
-                           n_vocab=n_vocab,
-                           eos_token=eos_token,
-                           slices=slices)
+            eos_token = hf_tokenizer.eos_token_id  # type: ignore
+        return LLTokenizer(s, n_vocab=n_vocab, eos_token=eos_token, slices=slices)
     else:
         raise ValueError("Only fast tokenizers are supported")

llguidance/llamacpp.py CHANGED Viewed

@@ -44,8 +44,14 @@ def lltokenizer_from_vocab(
         assert n <= buffer_len
         tok = bytes(buffer[:n]) # type: ignore
         attr = llama_cpp.llama_token_get_attr(vocab, token)
-        if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL:
-            tok = b"\xFF" + tok
+        # If the token is a control token or a user-defined token that looks like a control token,
+        # we prefix it with 0xff to indicate that it should be treated as a special token.
+        if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL or (
+            attr & llama_cpp.LLAMA_TOKEN_ATTR_USER_DEFINED
+            and tok.startswith(b"<")
+            and tok.endswith(b">")
+        ):
+            tok = b"\xff" + tok
         tokens.append(tok)
     if n_vocab is not None:

{llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llguidance
-Version: 1.0.1
+Version: 1.1.0
 License-File: LICENSE
 Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
 Author: Michal Moskal

{llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
-llguidance-1.0.1.dist-info/METADATA,sha256=Lbt3rqejgDvkhFmrzuDPUBX8odF4Vz9wJgERjl2n9E4,10289
-llguidance-1.0.1.dist-info/WHEEL,sha256=ZYBX952iI2I2uDwQx9fvRWesensjYLC_kvGOKEfPrrM,104
-llguidance-1.0.1.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
+llguidance-1.1.0.dist-info/METADATA,sha256=VsPyf_eha9mhKwVmvOqQorRQ2oVovMnAydUqt1desuE,10289
+llguidance-1.1.0.dist-info/WHEEL,sha256=otjeGSkQeIMJC2Imx0w7hDDdv_y5KWUU19tBY9uEXoI,104
+llguidance-1.1.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
 llguidance/__init__.py,sha256=F9svXvm6oafbuUf_eq34PHJV4c7-yN133vmbWN6nIkc,590
 llguidance/_grammar_from.py,sha256=-vHqkPqJe6t0JKKuQhlUu08kYpPIVknMh8tZlh8FYeQ,2384
-llguidance/_lib.abi3.so,sha256=MCWDfBbBpBgZx6BEHIm-f13TVz5uH3WSViRFGsKlolU,8568828
+llguidance/_lib.abi3.so,sha256=8e9WPIo7Ivzlun85dHnAQFJenPovzQODQKQNy7ij8MY,8583460
 llguidance/_lib.pyi,sha256=HFe4XkQR7Moey9T6uJ8INrzTsACJReWaBwcYExK4yj4,22230
 llguidance/_struct_tag.py,sha256=83okmGWShxZud7S2vHjPRiInhFw0QVTHkeN8wtR8hR8,4430
 llguidance/_tokenizer.py,sha256=yC-RcgyMZN-olV-PnN4XkjlH-fOU8E9jrwO8VkXLv4M,1084
 llguidance/_util.py,sha256=6JV5SxjoH7hZPaSHhPRD_G6JzIhbKFFTqWTpp88VIiU,260
 llguidance/cli.py,sha256=jhXdWbJC5rs6J8aknHtiuJeIWHcajZ7jYUj0ydeDJ68,2384
 llguidance/gbnf_to_lark.py,sha256=32XJ5Dzq-iSySnkV_rLaNZ888JjHBIr_QkSYdhtMAME,16635
-llguidance/hf.py,sha256=v6kZxo02S3ygKf9Ktlb09ZVIBV-sjPjwx4F0-SDmIR8,1743
-llguidance/llamacpp.py,sha256=P7-iqcVMtZ6HWryf26tNB99VvGNci-KGUKXfqctZ0wU,1948
+llguidance/hf.py,sha256=sLJKZxGpftuAY5eSCYpogzim3WZA7-vs9SOVgnzf3xI,1933
+llguidance/llamacpp.py,sha256=d_LjNbomBhj7uTo90h0muvPkOMso8NLe7H_YfPCqA8U,2284
 llguidance/mlx.py,sha256=ydMNSjnI2db8F685waOSWQLIoQTB9oehGBXnp9T3A6Q,2427
 llguidance/numpy.py,sha256=Z4888IfdT5oAfq4m2gn6ARfkaJ9d7nxMS79g_t8aXkg,2653
 llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 llguidance/tiktoken.py,sha256=-mYDPxq4LM2FI9K8kLL0Us0qN-fZpNK0qIwboDPjfSk,1101
 llguidance/torch.py,sha256=7LmVtRX8-fdUULcpPh4KYuDwVrHFJEvyVxzhyQflpgM,2795
-llguidance-1.0.1.dist-info/RECORD,,
+llguidance-1.1.0.dist-info/RECORD,,

{llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: maturin (1.9.0)
+Generator: maturin (1.9.1)
 Root-Is-Purelib: false
 Tag: cp39-abi3-macosx_10_12_x86_64

{llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes