llguidance 0.7.22__tar.gz → 0.7.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. {llguidance-0.7.22 → llguidance-0.7.24}/CHANGELOG.md +9 -0
  2. {llguidance-0.7.22 → llguidance-0.7.24}/Cargo.lock +5 -5
  3. {llguidance-0.7.22 → llguidance-0.7.24}/PKG-INFO +1 -1
  4. {llguidance-0.7.22 → llguidance-0.7.24}/parser/Cargo.toml +1 -1
  5. {llguidance-0.7.22 → llguidance-0.7.24}/pyproject.toml +1 -1
  6. llguidance-0.7.24/python/llguidance/llamacpp.py +62 -0
  7. {llguidance-0.7.22 → llguidance-0.7.24}/python/torch_tests/test_hf.py +27 -10
  8. llguidance-0.7.24/python/torch_tests/test_llamacpp.py +42 -0
  9. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/Cargo.toml +1 -1
  10. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/src/lib.rs +1 -0
  11. llguidance-0.7.24/python_ext/src/llamatokenizer.rs +169 -0
  12. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/src/py.rs +25 -0
  13. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/install-deps.sh +1 -1
  14. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/Cargo.toml +1 -1
  15. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie_hf_downloader/Cargo.toml +1 -1
  16. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie_hf_tokenizers/Cargo.toml +1 -1
  17. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie_hf_tokenizers/src/lib.rs +7 -2
  18. {llguidance-0.7.22 → llguidance-0.7.24}/.github/workflows/rust.yml +0 -0
  19. {llguidance-0.7.22 → llguidance-0.7.24}/.github/workflows/wheels.yml +0 -0
  20. {llguidance-0.7.22 → llguidance-0.7.24}/.gitignore +0 -0
  21. {llguidance-0.7.22 → llguidance-0.7.24}/CODE_OF_CONDUCT.md +0 -0
  22. {llguidance-0.7.22 → llguidance-0.7.24}/Cargo.toml +0 -0
  23. {llguidance-0.7.22 → llguidance-0.7.24}/LICENSE +0 -0
  24. {llguidance-0.7.22 → llguidance-0.7.24}/README.md +0 -0
  25. {llguidance-0.7.22 → llguidance-0.7.24}/SECURITY.md +0 -0
  26. {llguidance-0.7.22 → llguidance-0.7.24}/SUPPORT.md +0 -0
  27. {llguidance-0.7.22 → llguidance-0.7.24}/c_sample/Makefile +0 -0
  28. {llguidance-0.7.22 → llguidance-0.7.24}/c_sample/README.md +0 -0
  29. {llguidance-0.7.22 → llguidance-0.7.24}/c_sample/c_sample.cpp +0 -0
  30. {llguidance-0.7.22 → llguidance-0.7.24}/docs/fast_forward.md +0 -0
  31. {llguidance-0.7.22 → llguidance-0.7.24}/docs/json_schema.md +0 -0
  32. {llguidance-0.7.22 → llguidance-0.7.24}/docs/mask_plot.png +0 -0
  33. {llguidance-0.7.22 → llguidance-0.7.24}/docs/optimizations.md +0 -0
  34. {llguidance-0.7.22 → llguidance-0.7.24}/docs/special_tokens.md +0 -0
  35. {llguidance-0.7.22 → llguidance-0.7.24}/docs/syntax.md +0 -0
  36. {llguidance-0.7.22 → llguidance-0.7.24}/docs/toktrie.md +0 -0
  37. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/Cargo.toml +0 -0
  38. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/expected_maskbench.json +0 -0
  39. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/jstats.sh +0 -0
  40. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/scripts/split-stats.sh +0 -0
  41. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/scripts/split_plot.py +0 -0
  42. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/src/json_stats.rs +0 -0
  43. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/src/lib.rs +0 -0
  44. {llguidance-0.7.22 → llguidance-0.7.24}/json_stats/src/stats.rs +0 -0
  45. {llguidance-0.7.22 → llguidance-0.7.24}/parser/LICENSE +0 -0
  46. {llguidance-0.7.22 → llguidance-0.7.24}/parser/README.md +0 -0
  47. {llguidance-0.7.22 → llguidance-0.7.24}/parser/build.rs +0 -0
  48. {llguidance-0.7.22 → llguidance-0.7.24}/parser/cbindgen.toml +0 -0
  49. {llguidance-0.7.22 → llguidance-0.7.24}/parser/grammars/character.json +0 -0
  50. {llguidance-0.7.22 → llguidance-0.7.24}/parser/grammars/json.json +0 -0
  51. {llguidance-0.7.22 → llguidance-0.7.24}/parser/llguidance.h +0 -0
  52. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/api.rs +0 -0
  53. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/constraint.rs +0 -0
  54. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/from_guidance.rs +0 -0
  55. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/grammar.rs +0 -0
  56. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/lexer.rs +0 -0
  57. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/lexerspec.rs +0 -0
  58. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/mod.rs +0 -0
  59. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/parser.rs +0 -0
  60. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/perf.rs +0 -0
  61. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/regexvec.rs +0 -0
  62. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/earley/slicer.rs +0 -0
  63. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/factory.rs +0 -0
  64. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/ffi.rs +0 -0
  65. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/ffi_par.rs +0 -0
  66. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/grammar_builder.rs +0 -0
  67. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/README.md +0 -0
  68. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/compiler.rs +0 -0
  69. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/context_ref.rs +0 -0
  70. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/context_simple/context.rs +0 -0
  71. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/context_simple/draft.rs +0 -0
  72. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/context_simple/mod.rs +0 -0
  73. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/formats.rs +0 -0
  74. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/mod.rs +0 -0
  75. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/numeric.rs +0 -0
  76. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/schema.rs +0 -0
  77. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json/shared_context.rs +0 -0
  78. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/json_validation.rs +0 -0
  79. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/README.md +0 -0
  80. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/ast.rs +0 -0
  81. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/common.rs +0 -0
  82. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/compiler.rs +0 -0
  83. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/lexer.rs +0 -0
  84. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/mod.rs +0 -0
  85. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lark/parser.rs +0 -0
  86. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/lib.rs +0 -0
  87. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/logging.rs +0 -0
  88. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/matcher.rs +0 -0
  89. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/output.rs +0 -0
  90. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/panic_utils.rs +0 -0
  91. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/regex_rewrite.rs +0 -0
  92. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/stop_controller.rs +0 -0
  93. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/substring.rs +0 -0
  94. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/tokenizer_json.rs +0 -0
  95. {llguidance-0.7.22 → llguidance-0.7.24}/parser/src/tokenparser.rs +0 -0
  96. {llguidance-0.7.22 → llguidance-0.7.24}/plan.md +0 -0
  97. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/__init__.py +0 -0
  98. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/_grammar_from.py +0 -0
  99. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/_lib.pyi +0 -0
  100. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/_struct_tag.py +0 -0
  101. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/_tokenizer.py +0 -0
  102. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/_util.py +0 -0
  103. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/cli.py +0 -0
  104. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/gbnf_to_lark.py +0 -0
  105. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/hf.py +0 -0
  106. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/mlx.py +0 -0
  107. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/numpy.py +0 -0
  108. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/py.typed +0 -0
  109. {llguidance-0.7.22 → llguidance-0.7.24}/python/llguidance/torch.py +0 -0
  110. {llguidance-0.7.22 → llguidance-0.7.24}/python/mypy.ini +0 -0
  111. {llguidance-0.7.22 → llguidance-0.7.24}/python/torch_tests/__init__.py +0 -0
  112. {llguidance-0.7.22 → llguidance-0.7.24}/python/torch_tests/test_bitmask.py +0 -0
  113. {llguidance-0.7.22 → llguidance-0.7.24}/python/torch_tests/test_matcher.py +0 -0
  114. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/src/llinterpreter.rs +0 -0
  115. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/src/llmatcher.rs +0 -0
  116. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/src/parserlimits.rs +0 -0
  117. {llguidance-0.7.22 → llguidance-0.7.24}/python_ext/src/pyjson.rs +0 -0
  118. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/Cargo.toml +0 -0
  119. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/README.md +0 -0
  120. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/cli.sh +0 -0
  121. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/blog.sample.json +0 -0
  122. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/blog.schema.json +0 -0
  123. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/blog.schema.ll.json +0 -0
  124. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/README.md +0 -0
  125. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
  126. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
  127. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
  128. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
  129. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
  130. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
  131. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
  132. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
  133. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
  134. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/lark.lark +0 -0
  135. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/rfc.lark +0 -0
  136. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/rfc.xml +0 -0
  137. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/data/ulysses.md +0 -0
  138. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/gtest.sh +0 -0
  139. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/lark.sh +0 -0
  140. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/run.sh +0 -0
  141. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/src/lib.rs +0 -0
  142. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/src/minimal.rs +0 -0
  143. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/src/sample_parser.rs +0 -0
  144. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/tests/test_lark.rs +0 -0
  145. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/tests/test_ll.rs +0 -0
  146. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/tests/test_raw_parser.rs +0 -0
  147. {llguidance-0.7.22 → llguidance-0.7.24}/sample_parser/tests/test_stop.rs +0 -0
  148. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/annotate_asm.js +0 -0
  149. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/bump.py +0 -0
  150. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/cbindgen.sh +0 -0
  151. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/checklinks.py +0 -0
  152. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/checklinks.sh +0 -0
  153. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/ci-publish.py +0 -0
  154. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/disasm.sh +0 -0
  155. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/gbnf_to_lark.py +0 -0
  156. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/gen-testcase.py +0 -0
  157. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/git-version.sh +0 -0
  158. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/jsonschema-stats.js +0 -0
  159. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/remote-guidance-test.sh +0 -0
  160. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/rust-size.js +0 -0
  161. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/rust_size.py +0 -0
  162. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/test-guidance.sh +0 -0
  163. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/tokenizer_test.py +0 -0
  164. {llguidance-0.7.22 → llguidance-0.7.24}/scripts/update-git.py +0 -0
  165. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/LICENSE +0 -0
  166. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/README.md +0 -0
  167. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/bytes.rs +0 -0
  168. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/lib.rs +0 -0
  169. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/recognizer.rs +0 -0
  170. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/rng.rs +0 -0
  171. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/svob.rs +0 -0
  172. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/tokenv.rs +0 -0
  173. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/src/toktree.rs +0 -0
  174. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie/tests/test_svob.rs +0 -0
  175. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie_hf_downloader/LICENSE +0 -0
  176. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie_hf_downloader/src/lib.rs +0 -0
  177. {llguidance-0.7.22 → llguidance-0.7.24}/toktrie_hf_tokenizers/LICENSE +0 -0
@@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file. Dates are d
4
4
 
5
5
  If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
6
6
 
7
+ #### [0.7.24](https://github.com/guidance-ai/llguidance/compare/v0.7.23...0.7.24) 2025-05-23
8
+
9
+ - add the sentinel token hack, fixes #180 [`#180`](https://github.com/guidance-ai/llguidance/issues/180)
10
+
11
+ #### [0.7.23](https://github.com/guidance-ai/llguidance/compare/v0.7.22...0.7.23) 2025-05-22
12
+
13
+ - native llama.cpp tokenizer support [`#179`](https://github.com/guidance-ai/llguidance/pull/179)
14
+ - improve special token detection in HF tokenizers [`6cae393`](https://github.com/guidance-ai/llguidance/commit/6cae393b9c04fe67621615ff22b46beab512d069)
15
+
7
16
  #### [0.7.22](https://github.com/guidance-ai/llguidance/compare/v0.7.21...0.7.22) 2025-05-21
8
17
 
9
18
  - Keep EOS token bytes in `TokenizerWrapper` [`#178`](https://github.com/guidance-ai/llguidance/pull/178)
@@ -1174,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
1174
1174
 
1175
1175
  [[package]]
1176
1176
  name = "llguidance"
1177
- version = "0.7.22"
1177
+ version = "0.7.24"
1178
1178
  dependencies = [
1179
1179
  "anyhow",
1180
1180
  "derivre",
@@ -1193,7 +1193,7 @@ dependencies = [
1193
1193
 
1194
1194
  [[package]]
1195
1195
  name = "llguidance_py"
1196
- version = "0.7.22"
1196
+ version = "0.7.24"
1197
1197
  dependencies = [
1198
1198
  "anyhow",
1199
1199
  "bytemuck",
@@ -2336,7 +2336,7 @@ dependencies = [
2336
2336
 
2337
2337
  [[package]]
2338
2338
  name = "toktrie"
2339
- version = "0.7.22"
2339
+ version = "0.7.24"
2340
2340
  dependencies = [
2341
2341
  "anyhow",
2342
2342
  "bytemuck",
@@ -2347,7 +2347,7 @@ dependencies = [
2347
2347
 
2348
2348
  [[package]]
2349
2349
  name = "toktrie_hf_downloader"
2350
- version = "0.7.22"
2350
+ version = "0.7.24"
2351
2351
  dependencies = [
2352
2352
  "anyhow",
2353
2353
  "hf-hub",
@@ -2358,7 +2358,7 @@ dependencies = [
2358
2358
 
2359
2359
  [[package]]
2360
2360
  name = "toktrie_hf_tokenizers"
2361
- version = "0.7.22"
2361
+ version = "0.7.24"
2362
2362
  dependencies = [
2363
2363
  "anyhow",
2364
2364
  "log",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 0.7.22
3
+ Version: 0.7.24
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance"
3
- version = "0.7.22"
3
+ version = "0.7.24"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llguidance"
3
- version = "0.7.22"
3
+ version = "0.7.24"
4
4
  description = "Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance"
5
5
  requires-python = ">=3.9"
6
6
  license = "MIT"
@@ -0,0 +1,62 @@
1
+ from typing import List, Optional
2
+
3
+ from ._lib import LLTokenizer
4
+
5
+ import llama_cpp
6
+ import ctypes
7
+
8
+ def lltokenizer_from_vocab(
9
+ vocab: llama_cpp.llama_vocab_p,
10
+ n_vocab: Optional[int] = None,
11
+ eos_token: Optional[int] = None,
12
+ slices: Optional[List[str]] = None,
13
+ ) -> LLTokenizer:
14
+ """
15
+ Create a new tokenizer from a llama.cpp vocab object.
16
+ This is an expensive operation (~1s), so the result should be cached.
17
+
18
+ Args:
19
+ vocab: llama_cpp.llama_vocab_p - the vocab object to use
20
+ n_vocab: int - override the size of the vocabulary
21
+ eos_token: int - override the EOS token
22
+ slices: List[str] - configuration for slicer optimization; pass [] to disable,
23
+ or None to use the default configuration
24
+ """
25
+
26
+ ntok = llama_cpp.llama_vocab_n_tokens(vocab)
27
+ if eos_token is None:
28
+ eos_token = llama_cpp.llama_vocab_eos(vocab)
29
+ buffer_len = 16 * 1024
30
+ buffer = ctypes.create_string_buffer(buffer_len + 1)
31
+ tokens: List[bytes] = []
32
+
33
+ for token in range(ntok):
34
+ n = llama_cpp.llama_token_to_piece(
35
+ vocab,
36
+ token,
37
+ buffer,
38
+ buffer_len,
39
+ 0,
40
+ True
41
+ )
42
+ if n < 0:
43
+ raise ValueError(f"Error writing token {token} to buffer of size {buffer_len}. Error: {n}")
44
+ assert n <= buffer_len
45
+ tok = bytes(buffer[:n]) # type: ignore
46
+ attr = llama_cpp.llama_token_get_attr(vocab, token)
47
+ if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL:
48
+ tok = b"\xFF" + tok
49
+ tokens.append(tok)
50
+
51
+ if n_vocab is not None:
52
+ while len(tokens) < n_vocab:
53
+ tokens.append(b"")
54
+
55
+ fptr = ctypes.cast(llama_cpp.llama_cpp._lib.llama_tokenize, ctypes.c_void_p).value
56
+ return LLTokenizer.from_llamacpp( # type: ignore
57
+ tokens=tokens,
58
+ vocab_ptr=vocab,
59
+ tokenize_fptr=fptr,
60
+ eos_token=eos_token,
61
+ slices=slices
62
+ )
@@ -21,7 +21,8 @@ from transformers import AutoTokenizer
21
21
 
22
22
 
23
23
  def _build_tokenizer() -> LLTokenizer:
24
- hf_tok = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct")
24
+ hf_tok = AutoTokenizer.from_pretrained(
25
+ "unsloth/Meta-Llama-3.1-8B-Instruct")
25
26
  return llguidance.hf.from_tokenizer(hf_tok)
26
27
 
27
28
 
@@ -41,6 +42,22 @@ def lark_matcher(grm: str) -> LLMatcher:
41
42
  return interp
42
43
 
43
44
 
45
+ def test_basic_tokenizer() -> None:
46
+ llt = tokenizer()
47
+ for s in [
48
+ "Hello world!", "Hello world! こんにちは世界!", "wave 👋", "heart 👋💖",
49
+ "1`a`b`c`d`e`f`g`h`i"
50
+ ]:
51
+ toks = llt.tokenize_str(s)
52
+ print(llt.dbg_tokens(toks))
53
+ assert llt.decode_str(toks) == s
54
+ toks = llt.tokenize_bytes(b"\x8b")
55
+ print(llt.dbg_tokens(toks))
56
+ print(toks)
57
+ assert len(toks) == 1
58
+ assert llt.decode_bytes(toks) == b"\x8b"
59
+
60
+
44
61
  def test_grammar() -> None:
45
62
  t = tokenizer()
46
63
  mask = allocate_token_bitmask(2, t.vocab_size)
@@ -66,7 +83,8 @@ def test_grammar() -> None:
66
83
  def test_par_grammar() -> None:
67
84
  n_gram = 50
68
85
  t = tokenizer()
69
- grammars = [(lark_matcher(r"start: /[a-zA-Z ]*/"), idx) for idx in range(n_gram)]
86
+ grammars = [(lark_matcher(r"start: /[a-zA-Z ]*/"), idx)
87
+ for idx in range(n_gram)]
70
88
  mask = allocate_token_bitmask(n_gram, t.vocab_size)
71
89
  mask2 = allocate_token_bitmask(n_gram, t.vocab_size)
72
90
  exec = LLExecutor()
@@ -88,16 +106,15 @@ def test_tokenize_partial_basic(recent_tokens: List[int]) -> None:
88
106
  """Test tokenize_partial with a simple sentence."""
89
107
  ll_tok = tokenizer()
90
108
  assert ll_tok.is_canonical
91
- new_tokens, leftover = ll_tok.tokenize_partial(
92
- b" How are you", recent_tokens=recent_tokens
93
- )
109
+ new_tokens, leftover = ll_tok.tokenize_partial(b" How are you",
110
+ recent_tokens=recent_tokens)
94
111
  assert isinstance(new_tokens, list)
95
112
  assert isinstance(leftover, bytes)
96
113
  assert len(new_tokens) >= 2
97
114
  assert ll_tok.decode_bytes(new_tokens) + leftover == b" How are you"
98
115
  for suff in ["", "r", "!", " "]:
99
116
  tok2 = ll_tok.tokenize_str(" How are you" + suff)
100
- assert tok2[0 : len(new_tokens)] == new_tokens
117
+ assert tok2[0:len(new_tokens)] == new_tokens
101
118
 
102
119
 
103
120
  def test_tokenize_partial_docs() -> None:
@@ -107,16 +124,16 @@ def test_tokenize_partial_docs() -> None:
107
124
  assert leftover == b"order"
108
125
 
109
126
  recent = ll.tokenize_bytes(b'{"')
110
- new_tok, leftover = ll.tokenize_partial(
111
- b'name_of_the_person"', recent_tokens=recent
112
- )
127
+ new_tok, leftover = ll.tokenize_partial(b'name_of_the_person"',
128
+ recent_tokens=recent)
113
129
  print(ll.dbg_tokens(new_tok))
114
130
  assert leftover == b'"'
115
131
  assert ll.decode_str(new_tok) == "name_of_the_person"
116
132
 
117
133
 
118
134
  def test_incomplete_tokenizer() -> None:
119
- hf_tok = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-135M-Instruct")
135
+ hf_tok = AutoTokenizer.from_pretrained(
136
+ "HuggingFaceTB/SmolLM-135M-Instruct")
120
137
  ll_tok = llguidance.hf.from_tokenizer(hf_tok)
121
138
 
122
139
  # unknown bytes are to be skipped
@@ -0,0 +1,42 @@
1
+ import llguidance.llamacpp
2
+ import llama_cpp
3
+ import os
4
+ import requests # type: ignore
5
+ from typing import Any
6
+
7
+ def get_llama_vocab_file(pytestconfig: Any) -> str:
8
+ url = "https://raw.githubusercontent.com/ggml-org/llama.cpp/f4ab2a41476600a98067a9474ea8f9e6db41bcfa/models/ggml-vocab-llama-bpe.gguf"
9
+ cache_dir = pytestconfig.cache.makedir("llama_vocab")
10
+ file_name = "vocab.gguf"
11
+ file_path = os.path.join(cache_dir, file_name)
12
+
13
+ if not os.path.exists(file_path):
14
+ r = requests.get(url)
15
+ r.raise_for_status()
16
+ with open(file_path, "wb") as f:
17
+ f.write(r.content)
18
+
19
+ return file_path
20
+
21
+
22
+ def test_llama_cpp(pytestconfig: Any) -> None:
23
+ filepath = get_llama_vocab_file(pytestconfig)
24
+ p = llama_cpp.llama_model_default_params()
25
+ p.vocab_only = True
26
+ model = llama_cpp.llama_model_load_from_file(filepath.encode(), p)
27
+ assert model is not None
28
+ vocab = llama_cpp.llama_model_get_vocab(model)
29
+ assert vocab is not None
30
+ llt = llguidance.llamacpp.lltokenizer_from_vocab(vocab)
31
+ for s in [
32
+ "Hello world!", "Hello world! こんにちは世界!", "wave 👋", "heart 👋💖",
33
+ "1`a`b`c`d`e`f`g`h`i"
34
+ ]:
35
+ toks = llt.tokenize_str(s)
36
+ print(llt.dbg_tokens(toks))
37
+ assert llt.decode_str(toks) == s
38
+ toks = llt.tokenize_bytes(b"\x8b")
39
+ print(llt.dbg_tokens(toks))
40
+ print(toks)
41
+ assert len(toks) == 1
42
+ assert llt.decode_bytes(toks) == b"\x8b"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance_py"
3
- version = "0.7.22"
3
+ version = "0.7.24"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -1,5 +1,6 @@
1
1
  use pyo3::prelude::*;
2
2
 
3
+ mod llamatokenizer;
3
4
  mod llinterpreter;
4
5
  mod llmatcher;
5
6
  mod parserlimits;
@@ -0,0 +1,169 @@
1
+ use std::sync::Arc;
2
+
3
+ use anyhow::{ensure, Result};
4
+ use llguidance::toktrie::{self, TokEnv, TokRxInfo, TokTrie, TokenId, TokenizerEnv};
5
+
6
+ type LlamaTokenizeFn = unsafe extern "C" fn(
7
+ vocab: *const std::os::raw::c_void,
8
+ text: *const std::os::raw::c_char,
9
+ text_len: i32,
10
+ tokens: *mut i32,
11
+ n_tokens_max: i32,
12
+ add_special: bool,
13
+ parse_special: bool,
14
+ ) -> i32;
15
+
16
+ struct LlamaTokenizer {
17
+ trie: TokTrie,
18
+ tokenize_fn: LlamaTokenizeFn,
19
+ vocab: *const std::os::raw::c_void,
20
+ sentinel: Option<u8>,
21
+ sentinel_tokens: Vec<TokenId>,
22
+ }
23
+ // SAFETY: tokenize_fn is required to be thread-safe
24
+ unsafe impl Send for LlamaTokenizer {}
25
+ unsafe impl Sync for LlamaTokenizer {}
26
+
27
+ impl LlamaTokenizer {
28
+ fn tokenize_with_sentinel(&self, s: &[u8]) -> Result<Vec<toktrie::TokenId>> {
29
+ if s.is_empty() {
30
+ return Ok(vec![]);
31
+ }
32
+
33
+ if let Some(sentinel) = self.sentinel {
34
+ let mut b = Vec::with_capacity(s.len() + 1);
35
+ b.push(sentinel);
36
+ b.extend_from_slice(s);
37
+ let mut res = self.raw_tokenize(&b);
38
+ ensure!(
39
+ res.len() > self.sentinel_tokens.len(),
40
+ "tokenize_with_sentinel: res.len() <= sentinel_tokens.len()"
41
+ );
42
+ ensure!(
43
+ res[0..self.sentinel_tokens.len()] == self.sentinel_tokens,
44
+ "tokenize_with_sentinel: res[0..sentinel_tokens.len()] != sentinel_tokens"
45
+ );
46
+ res.splice(0..self.sentinel_tokens.len(), []);
47
+ Ok(res)
48
+ } else {
49
+ Ok(self.raw_tokenize(s))
50
+ }
51
+ }
52
+
53
+ fn raw_tokenize(&self, s: &[u8]) -> Vec<toktrie::TokenId> {
54
+ let mut res_toks = vec![0u32; s.len() / 4 + 5];
55
+ let res = unsafe {
56
+ (self.tokenize_fn)(
57
+ self.vocab,
58
+ s.as_ptr() as *const std::os::raw::c_char,
59
+ s.len().try_into().unwrap(),
60
+ res_toks.as_mut_ptr() as *mut i32,
61
+ res_toks.len().try_into().unwrap(),
62
+ false,
63
+ false,
64
+ )
65
+ };
66
+
67
+ let res = if res < 0 {
68
+ let n_toks = (-res) as usize;
69
+ res_toks.resize(n_toks, 0);
70
+ let res2 = unsafe {
71
+ (self.tokenize_fn)(
72
+ self.vocab,
73
+ s.as_ptr() as *const std::os::raw::c_char,
74
+ s.len().try_into().unwrap(),
75
+ res_toks.as_mut_ptr() as *mut i32,
76
+ res_toks.len().try_into().unwrap(),
77
+ false,
78
+ false,
79
+ )
80
+ };
81
+ assert!(res2 == n_toks as i32);
82
+ res2
83
+ } else {
84
+ res
85
+ };
86
+
87
+ res_toks.truncate(res as usize);
88
+ res_toks
89
+ }
90
+ }
91
+
92
+ impl TokenizerEnv for LlamaTokenizer {
93
+ fn tok_trie(&self) -> &TokTrie {
94
+ &self.trie
95
+ }
96
+
97
+ fn tokenize_bytes(&self, s: &[u8]) -> Vec<toktrie::TokenId> {
98
+ // llama.cpp tokenizer encodes invalid UTF8 as Unicode replacement character U+FFFD,
99
+ // so we need the greedy fallback
100
+ self.trie.tokenize_with_greedy_fallback(s, |s| {
101
+ self.tokenize_with_sentinel(s.as_bytes())
102
+ .expect("tokenize_with_sentinel failed")
103
+ })
104
+ }
105
+ }
106
+
107
+ pub fn tokenv_from_llamacpp(
108
+ tokens: Vec<Vec<u8>>,
109
+ vocab_ptr: usize,
110
+ tokenize_fptr: usize,
111
+ eos_token: u32,
112
+ ) -> Result<TokEnv> {
113
+ ensure!(vocab_ptr != 0, "vocab_ptr must be non-null");
114
+ ensure!(tokenize_fptr != 0, "tokenize_fptr must be non-null");
115
+
116
+ let info = TokRxInfo::new(tokens.len() as u32, eos_token);
117
+ let trie = TokTrie::from(&info, &tokens);
118
+
119
+ let mut llama_tok = LlamaTokenizer {
120
+ trie,
121
+ tokenize_fn: unsafe { std::mem::transmute::<usize, LlamaTokenizeFn>(tokenize_fptr) },
122
+ vocab: vocab_ptr as *const std::os::raw::c_void,
123
+ sentinel: None,
124
+ sentinel_tokens: vec![],
125
+ };
126
+
127
+ let trie = &llama_tok.trie;
128
+ let t0 = llama_tok.raw_tokenize(b"a");
129
+ if trie.decode(&t0) != b"a" {
130
+ // Now, this likely means that the tokenizer is adding a space in front of the token
131
+ // (or possibly <BOS> token)
132
+ // We fill "fix" this by tokenizing [sentinel] + s instead of just s
133
+ // and then removing tokens corresponding to the sentinel
134
+
135
+ // find a good sentinel token - one that doesn't start any other token
136
+ let sentinel = (1u8..32)
137
+ .find(|&b| {
138
+ trie.token_id(&[b]).is_some()
139
+ && !trie.has_extensions(&[b])
140
+ && !trie.has_extensions(&[b' ', b])
141
+ })
142
+ .ok_or_else(|| {
143
+ anyhow::anyhow!("could not find a good sentinel token in the range 1..32")
144
+ })?;
145
+
146
+ llama_tok.sentinel_tokens = llama_tok.raw_tokenize(&[sentinel]);
147
+ llama_tok.sentinel = Some(sentinel);
148
+
149
+ // now, check if it works
150
+ let t1 = llama_tok.tokenize_with_sentinel(b"a")?;
151
+ ensure!(
152
+ trie.decode(&t1) == b"a",
153
+ "tokenizer is not working with the sentinel {} {:?}",
154
+ sentinel,
155
+ trie.decode(&t1)
156
+ );
157
+
158
+ // make sure we can tokenize double-sentinel
159
+ let t3 = llama_tok.tokenize_with_sentinel(&[sentinel])?;
160
+ ensure!(
161
+ trie.decode(&t3) == [sentinel],
162
+ "tokenizer is not working with the sentinel (rec) {} {:?}",
163
+ sentinel,
164
+ trie.decode(&t3)
165
+ );
166
+ }
167
+
168
+ Ok(Arc::new(llama_tok))
169
+ }
@@ -14,6 +14,8 @@ use serde::{Deserialize, Serialize};
14
14
  use serde_json::Value;
15
15
  use toktrie_hf_tokenizers::ByteTokenizer;
16
16
 
17
+ use crate::llamatokenizer::tokenv_from_llamacpp;
18
+
17
19
  struct PyTokenizer {
18
20
  tok_trie: Arc<toktrie::TokTrie>,
19
21
  tokenizer_fun: Py<PyAny>,
@@ -73,6 +75,29 @@ impl LLTokenizer {
73
75
  })
74
76
  }
75
77
 
78
+ #[staticmethod]
79
+ #[pyo3(signature = (*, tokens, vocab_ptr, tokenize_fptr, eos_token, slices=None))]
80
+ fn from_llamacpp(
81
+ tokens: Vec<Vec<u8>>,
82
+ vocab_ptr: usize,
83
+ tokenize_fptr: usize,
84
+ eos_token: u32,
85
+ slices: Option<Vec<String>>,
86
+ ) -> PyResult<Self> {
87
+ let tok_env =
88
+ tokenv_from_llamacpp(tokens, vocab_ptr, tokenize_fptr, eos_token).map_err(val_error)?;
89
+
90
+ let factory = ParserFactory::new(
91
+ &tok_env,
92
+ InferenceCapabilities::default(),
93
+ &slices.unwrap_or_else(SlicedBiasComputer::general_slices),
94
+ )
95
+ .map_err(val_error)?;
96
+ Ok(LLTokenizer {
97
+ factory: Arc::new(factory),
98
+ })
99
+ }
100
+
76
101
  fn with_slices(&self, slices: Vec<String>) -> PyResult<Self> {
77
102
  let factory = self.factory.with_slices(&slices)?;
78
103
  Ok(LLTokenizer {
@@ -2,7 +2,7 @@
2
2
 
3
3
  # installing guidance for deps
4
4
  pip install pytest guidance huggingface_hub tokenizers jsonschema maturin[zig] \
5
- torch transformers==4.52.1 bitsandbytes ipython psutil mypy
5
+ torch transformers==4.52.1 bitsandbytes ipython psutil mypy llama_cpp_python
6
6
  pip uninstall -y guidance
7
7
 
8
8
  # print out versions
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "toktrie"
3
- version = "0.7.22"
3
+ version = "0.7.24"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "LLM Token Trie library"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "toktrie_hf_downloader"
3
- version = "0.7.22"
3
+ version = "0.7.24"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "HuggingFace Hub download library support for toktrie and llguidance"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "toktrie_hf_tokenizers"
3
- version = "0.7.22"
3
+ version = "0.7.24"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "HuggingFace tokenizers library support for toktrie and llguidance"
@@ -102,7 +102,11 @@ impl ByteTokenizer {
102
102
  }
103
103
 
104
104
  let vocab_size = hft.get_vocab_size(true) as u32;
105
- let added = hft.get_added_tokens_decoder();
105
+ let mut added = hft
106
+ .get_added_tokens_decoder()
107
+ .into_iter()
108
+ .collect::<Vec<_>>();
109
+ added.sort_by_key(|(id, _)| *id);
106
110
 
107
111
  let mut res = ByteTokenizer {
108
112
  hf_model: "foobar".to_string(),
@@ -114,7 +118,8 @@ impl ByteTokenizer {
114
118
  let mut specials = HashSet::new();
115
119
 
116
120
  for (id, info) in added.iter() {
117
- if info.special {
121
+ // we treat all added tokens of the form <...> as special tokens
122
+ if info.special || (info.content.starts_with("<") && info.content.ends_with(">")) {
118
123
  match info.content.as_str() {
119
124
  "</s>"
120
125
  | "<|endoftext|>"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes