llguidance 1.0.1__cp39-abi3-macosx_10_12_x86_64.whl → 1.1.0__cp39-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llguidance/_lib.abi3.so +0 -0
- llguidance/hf.py +12 -7
- llguidance/llamacpp.py +8 -2
- {llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/METADATA +1 -1
- {llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/RECORD +7 -7
- {llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/WHEEL +1 -1
- {llguidance-1.0.1.dist-info → llguidance-1.1.0.dist-info}/licenses/LICENSE +0 -0
llguidance/_lib.abi3.so
CHANGED
|
Binary file
|
llguidance/hf.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
from copy import copy
|
|
1
2
|
from typing import List, Optional
|
|
2
|
-
from ._lib import LLTokenizer
|
|
3
3
|
|
|
4
4
|
import transformers
|
|
5
5
|
|
|
6
|
+
from ._lib import LLTokenizer
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
def from_tokenizer(
|
|
8
10
|
hf_tokenizer: transformers.PreTrainedTokenizerFast,
|
|
@@ -28,15 +30,18 @@ def from_tokenizer(
|
|
|
28
30
|
# this will JSON-serialize the Rust impl of the tokenizer,
|
|
29
31
|
# including added tokens from tokenizer_config.json
|
|
30
32
|
# (which may be missing from tokenizer.json)
|
|
31
|
-
|
|
33
|
+
backend_tokenizer = copy(
|
|
34
|
+
hf_tokenizer.backend_tokenizer # type: ignore[attr-defined]
|
|
35
|
+
)
|
|
36
|
+
# disable padding and truncation on copy before converting to string
|
|
37
|
+
backend_tokenizer.no_padding()
|
|
38
|
+
backend_tokenizer.no_truncation()
|
|
39
|
+
s = backend_tokenizer.to_str()
|
|
32
40
|
# This is probably not needed - it should figure it out by itself
|
|
33
41
|
# if n_vocab is None:
|
|
34
42
|
# n_vocab = hf_tokenizer.backend_tokenizer.get_vocab_size(with_added_tokens=True)
|
|
35
43
|
if eos_token is None:
|
|
36
|
-
eos_token = hf_tokenizer.eos_token_id
|
|
37
|
-
return LLTokenizer(s,
|
|
38
|
-
n_vocab=n_vocab,
|
|
39
|
-
eos_token=eos_token,
|
|
40
|
-
slices=slices)
|
|
44
|
+
eos_token = hf_tokenizer.eos_token_id # type: ignore
|
|
45
|
+
return LLTokenizer(s, n_vocab=n_vocab, eos_token=eos_token, slices=slices)
|
|
41
46
|
else:
|
|
42
47
|
raise ValueError("Only fast tokenizers are supported")
|
llguidance/llamacpp.py
CHANGED
|
@@ -44,8 +44,14 @@ def lltokenizer_from_vocab(
|
|
|
44
44
|
assert n <= buffer_len
|
|
45
45
|
tok = bytes(buffer[:n]) # type: ignore
|
|
46
46
|
attr = llama_cpp.llama_token_get_attr(vocab, token)
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
# If the token is a control token or a user-defined token that looks like a control token,
|
|
48
|
+
# we prefix it with 0xff to indicate that it should be treated as a special token.
|
|
49
|
+
if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL or (
|
|
50
|
+
attr & llama_cpp.LLAMA_TOKEN_ATTR_USER_DEFINED
|
|
51
|
+
and tok.startswith(b"<")
|
|
52
|
+
and tok.endswith(b">")
|
|
53
|
+
):
|
|
54
|
+
tok = b"\xff" + tok
|
|
49
55
|
tokens.append(tok)
|
|
50
56
|
|
|
51
57
|
if n_vocab is not None:
|
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
llguidance-1.0.
|
|
2
|
-
llguidance-1.0.
|
|
3
|
-
llguidance-1.0.
|
|
1
|
+
llguidance-1.1.0.dist-info/METADATA,sha256=VsPyf_eha9mhKwVmvOqQorRQ2oVovMnAydUqt1desuE,10289
|
|
2
|
+
llguidance-1.1.0.dist-info/WHEEL,sha256=otjeGSkQeIMJC2Imx0w7hDDdv_y5KWUU19tBY9uEXoI,104
|
|
3
|
+
llguidance-1.1.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
4
4
|
llguidance/__init__.py,sha256=F9svXvm6oafbuUf_eq34PHJV4c7-yN133vmbWN6nIkc,590
|
|
5
5
|
llguidance/_grammar_from.py,sha256=-vHqkPqJe6t0JKKuQhlUu08kYpPIVknMh8tZlh8FYeQ,2384
|
|
6
|
-
llguidance/_lib.abi3.so,sha256=
|
|
6
|
+
llguidance/_lib.abi3.so,sha256=8e9WPIo7Ivzlun85dHnAQFJenPovzQODQKQNy7ij8MY,8583460
|
|
7
7
|
llguidance/_lib.pyi,sha256=HFe4XkQR7Moey9T6uJ8INrzTsACJReWaBwcYExK4yj4,22230
|
|
8
8
|
llguidance/_struct_tag.py,sha256=83okmGWShxZud7S2vHjPRiInhFw0QVTHkeN8wtR8hR8,4430
|
|
9
9
|
llguidance/_tokenizer.py,sha256=yC-RcgyMZN-olV-PnN4XkjlH-fOU8E9jrwO8VkXLv4M,1084
|
|
10
10
|
llguidance/_util.py,sha256=6JV5SxjoH7hZPaSHhPRD_G6JzIhbKFFTqWTpp88VIiU,260
|
|
11
11
|
llguidance/cli.py,sha256=jhXdWbJC5rs6J8aknHtiuJeIWHcajZ7jYUj0ydeDJ68,2384
|
|
12
12
|
llguidance/gbnf_to_lark.py,sha256=32XJ5Dzq-iSySnkV_rLaNZ888JjHBIr_QkSYdhtMAME,16635
|
|
13
|
-
llguidance/hf.py,sha256=
|
|
14
|
-
llguidance/llamacpp.py,sha256=
|
|
13
|
+
llguidance/hf.py,sha256=sLJKZxGpftuAY5eSCYpogzim3WZA7-vs9SOVgnzf3xI,1933
|
|
14
|
+
llguidance/llamacpp.py,sha256=d_LjNbomBhj7uTo90h0muvPkOMso8NLe7H_YfPCqA8U,2284
|
|
15
15
|
llguidance/mlx.py,sha256=ydMNSjnI2db8F685waOSWQLIoQTB9oehGBXnp9T3A6Q,2427
|
|
16
16
|
llguidance/numpy.py,sha256=Z4888IfdT5oAfq4m2gn6ARfkaJ9d7nxMS79g_t8aXkg,2653
|
|
17
17
|
llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
llguidance/tiktoken.py,sha256=-mYDPxq4LM2FI9K8kLL0Us0qN-fZpNK0qIwboDPjfSk,1101
|
|
19
19
|
llguidance/torch.py,sha256=7LmVtRX8-fdUULcpPh4KYuDwVrHFJEvyVxzhyQflpgM,2795
|
|
20
|
-
llguidance-1.0.
|
|
20
|
+
llguidance-1.1.0.dist-info/RECORD,,
|
|
File without changes
|