llguidance 1.0.1__cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 1.1.0__cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llguidance/_lib.abi3.so CHANGED
Binary file
llguidance/hf.py CHANGED
@@ -1,8 +1,10 @@
1
+ from copy import copy
1
2
  from typing import List, Optional
2
- from ._lib import LLTokenizer
3
3
 
4
4
  import transformers
5
5
 
6
+ from ._lib import LLTokenizer
7
+
6
8
 
7
9
  def from_tokenizer(
8
10
  hf_tokenizer: transformers.PreTrainedTokenizerFast,
@@ -28,15 +30,18 @@ def from_tokenizer(
28
30
  # this will JSON-serialize the Rust impl of the tokenizer,
29
31
  # including added tokens from tokenizer_config.json
30
32
  # (which may be missing from tokenizer.json)
31
- s = hf_tokenizer.backend_tokenizer.to_str() # type: ignore
33
+ backend_tokenizer = copy(
34
+ hf_tokenizer.backend_tokenizer # type: ignore[attr-defined]
35
+ )
36
+ # disable padding and truncation on copy before converting to string
37
+ backend_tokenizer.no_padding()
38
+ backend_tokenizer.no_truncation()
39
+ s = backend_tokenizer.to_str()
32
40
  # This is probably not needed - it should figure it out by itself
33
41
  # if n_vocab is None:
34
42
  # n_vocab = hf_tokenizer.backend_tokenizer.get_vocab_size(with_added_tokens=True)
35
43
  if eos_token is None:
36
- eos_token = hf_tokenizer.eos_token_id # type: ignore
37
- return LLTokenizer(s,
38
- n_vocab=n_vocab,
39
- eos_token=eos_token,
40
- slices=slices)
44
+ eos_token = hf_tokenizer.eos_token_id # type: ignore
45
+ return LLTokenizer(s, n_vocab=n_vocab, eos_token=eos_token, slices=slices)
41
46
  else:
42
47
  raise ValueError("Only fast tokenizers are supported")
llguidance/llamacpp.py CHANGED
@@ -44,8 +44,14 @@ def lltokenizer_from_vocab(
44
44
  assert n <= buffer_len
45
45
  tok = bytes(buffer[:n]) # type: ignore
46
46
  attr = llama_cpp.llama_token_get_attr(vocab, token)
47
- if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL:
48
- tok = b"\xFF" + tok
47
+ # If the token is a control token or a user-defined token that looks like a control token,
48
+ # we prefix it with 0xff to indicate that it should be treated as a special token.
49
+ if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL or (
50
+ attr & llama_cpp.LLAMA_TOKEN_ATTR_USER_DEFINED
51
+ and tok.startswith(b"<")
52
+ and tok.endswith(b">")
53
+ ):
54
+ tok = b"\xff" + tok
49
55
  tokens.append(tok)
50
56
 
51
57
  if n_vocab is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
@@ -1,20 +1,20 @@
1
- llguidance-1.0.1.dist-info/METADATA,sha256=Lbt3rqejgDvkhFmrzuDPUBX8odF4Vz9wJgERjl2n9E4,10289
2
- llguidance-1.0.1.dist-info/WHEEL,sha256=X953vE2wRLc-RGAslQIbVvI75R6kkMc3NOFgs6-DCds,129
3
- llguidance-1.0.1.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
1
+ llguidance-1.1.0.dist-info/METADATA,sha256=VsPyf_eha9mhKwVmvOqQorRQ2oVovMnAydUqt1desuE,10289
2
+ llguidance-1.1.0.dist-info/WHEEL,sha256=u4jT2QC4EdFctUy3CkTC0tOuTUpiALVhpeNFL7UFBAM,129
3
+ llguidance-1.1.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
4
4
  llguidance/__init__.py,sha256=F9svXvm6oafbuUf_eq34PHJV4c7-yN133vmbWN6nIkc,590
5
5
  llguidance/_grammar_from.py,sha256=-vHqkPqJe6t0JKKuQhlUu08kYpPIVknMh8tZlh8FYeQ,2384
6
- llguidance/_lib.abi3.so,sha256=HLJAG9nfQ3y-V8yzcPtjkCfapHG-hzZQmWQtFfsJz2E,7189368
6
+ llguidance/_lib.abi3.so,sha256=PpM9KpwkF4wGPwNsGsRrrw07u1XQ0Hsrq7asHF2G2uM,7146544
7
7
  llguidance/_lib.pyi,sha256=HFe4XkQR7Moey9T6uJ8INrzTsACJReWaBwcYExK4yj4,22230
8
8
  llguidance/_struct_tag.py,sha256=83okmGWShxZud7S2vHjPRiInhFw0QVTHkeN8wtR8hR8,4430
9
9
  llguidance/_tokenizer.py,sha256=yC-RcgyMZN-olV-PnN4XkjlH-fOU8E9jrwO8VkXLv4M,1084
10
10
  llguidance/_util.py,sha256=6JV5SxjoH7hZPaSHhPRD_G6JzIhbKFFTqWTpp88VIiU,260
11
11
  llguidance/cli.py,sha256=jhXdWbJC5rs6J8aknHtiuJeIWHcajZ7jYUj0ydeDJ68,2384
12
12
  llguidance/gbnf_to_lark.py,sha256=32XJ5Dzq-iSySnkV_rLaNZ888JjHBIr_QkSYdhtMAME,16635
13
- llguidance/hf.py,sha256=v6kZxo02S3ygKf9Ktlb09ZVIBV-sjPjwx4F0-SDmIR8,1743
14
- llguidance/llamacpp.py,sha256=P7-iqcVMtZ6HWryf26tNB99VvGNci-KGUKXfqctZ0wU,1948
13
+ llguidance/hf.py,sha256=sLJKZxGpftuAY5eSCYpogzim3WZA7-vs9SOVgnzf3xI,1933
14
+ llguidance/llamacpp.py,sha256=d_LjNbomBhj7uTo90h0muvPkOMso8NLe7H_YfPCqA8U,2284
15
15
  llguidance/mlx.py,sha256=ydMNSjnI2db8F685waOSWQLIoQTB9oehGBXnp9T3A6Q,2427
16
16
  llguidance/numpy.py,sha256=Z4888IfdT5oAfq4m2gn6ARfkaJ9d7nxMS79g_t8aXkg,2653
17
17
  llguidance/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  llguidance/tiktoken.py,sha256=-mYDPxq4LM2FI9K8kLL0Us0qN-fZpNK0qIwboDPjfSk,1101
19
19
  llguidance/torch.py,sha256=7LmVtRX8-fdUULcpPh4KYuDwVrHFJEvyVxzhyQflpgM,2795
20
- llguidance-1.0.1.dist-info/RECORD,,
20
+ llguidance-1.1.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.0)
2
+ Generator: maturin (1.9.1)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64