llguidance 0.7.22__tar.gz → 0.7.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. {llguidance-0.7.22 → llguidance-0.7.23}/CHANGELOG.md +5 -0
  2. {llguidance-0.7.22 → llguidance-0.7.23}/Cargo.lock +5 -5
  3. {llguidance-0.7.22 → llguidance-0.7.23}/PKG-INFO +1 -1
  4. {llguidance-0.7.22 → llguidance-0.7.23}/parser/Cargo.toml +1 -1
  5. {llguidance-0.7.22 → llguidance-0.7.23}/pyproject.toml +1 -1
  6. llguidance-0.7.23/python/llguidance/llamacpp.py +62 -0
  7. {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/test_hf.py +27 -10
  8. llguidance-0.7.23/python/torch_tests/test_llamacpp.py +42 -0
  9. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/Cargo.toml +1 -1
  10. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/py.rs +108 -0
  11. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/install-deps.sh +1 -1
  12. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/Cargo.toml +1 -1
  13. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_downloader/Cargo.toml +1 -1
  14. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_tokenizers/Cargo.toml +1 -1
  15. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_tokenizers/src/lib.rs +7 -2
  16. {llguidance-0.7.22 → llguidance-0.7.23}/.github/workflows/rust.yml +0 -0
  17. {llguidance-0.7.22 → llguidance-0.7.23}/.github/workflows/wheels.yml +0 -0
  18. {llguidance-0.7.22 → llguidance-0.7.23}/.gitignore +0 -0
  19. {llguidance-0.7.22 → llguidance-0.7.23}/CODE_OF_CONDUCT.md +0 -0
  20. {llguidance-0.7.22 → llguidance-0.7.23}/Cargo.toml +0 -0
  21. {llguidance-0.7.22 → llguidance-0.7.23}/LICENSE +0 -0
  22. {llguidance-0.7.22 → llguidance-0.7.23}/README.md +0 -0
  23. {llguidance-0.7.22 → llguidance-0.7.23}/SECURITY.md +0 -0
  24. {llguidance-0.7.22 → llguidance-0.7.23}/SUPPORT.md +0 -0
  25. {llguidance-0.7.22 → llguidance-0.7.23}/c_sample/Makefile +0 -0
  26. {llguidance-0.7.22 → llguidance-0.7.23}/c_sample/README.md +0 -0
  27. {llguidance-0.7.22 → llguidance-0.7.23}/c_sample/c_sample.cpp +0 -0
  28. {llguidance-0.7.22 → llguidance-0.7.23}/docs/fast_forward.md +0 -0
  29. {llguidance-0.7.22 → llguidance-0.7.23}/docs/json_schema.md +0 -0
  30. {llguidance-0.7.22 → llguidance-0.7.23}/docs/mask_plot.png +0 -0
  31. {llguidance-0.7.22 → llguidance-0.7.23}/docs/optimizations.md +0 -0
  32. {llguidance-0.7.22 → llguidance-0.7.23}/docs/special_tokens.md +0 -0
  33. {llguidance-0.7.22 → llguidance-0.7.23}/docs/syntax.md +0 -0
  34. {llguidance-0.7.22 → llguidance-0.7.23}/docs/toktrie.md +0 -0
  35. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/Cargo.toml +0 -0
  36. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/expected_maskbench.json +0 -0
  37. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/jstats.sh +0 -0
  38. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/scripts/split-stats.sh +0 -0
  39. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/scripts/split_plot.py +0 -0
  40. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/src/json_stats.rs +0 -0
  41. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/src/lib.rs +0 -0
  42. {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/src/stats.rs +0 -0
  43. {llguidance-0.7.22 → llguidance-0.7.23}/parser/LICENSE +0 -0
  44. {llguidance-0.7.22 → llguidance-0.7.23}/parser/README.md +0 -0
  45. {llguidance-0.7.22 → llguidance-0.7.23}/parser/build.rs +0 -0
  46. {llguidance-0.7.22 → llguidance-0.7.23}/parser/cbindgen.toml +0 -0
  47. {llguidance-0.7.22 → llguidance-0.7.23}/parser/grammars/character.json +0 -0
  48. {llguidance-0.7.22 → llguidance-0.7.23}/parser/grammars/json.json +0 -0
  49. {llguidance-0.7.22 → llguidance-0.7.23}/parser/llguidance.h +0 -0
  50. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/api.rs +0 -0
  51. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/constraint.rs +0 -0
  52. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/from_guidance.rs +0 -0
  53. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/grammar.rs +0 -0
  54. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/lexer.rs +0 -0
  55. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/lexerspec.rs +0 -0
  56. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/mod.rs +0 -0
  57. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/parser.rs +0 -0
  58. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/perf.rs +0 -0
  59. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/regexvec.rs +0 -0
  60. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/slicer.rs +0 -0
  61. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/factory.rs +0 -0
  62. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/ffi.rs +0 -0
  63. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/ffi_par.rs +0 -0
  64. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/grammar_builder.rs +0 -0
  65. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/README.md +0 -0
  66. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/compiler.rs +0 -0
  67. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_ref.rs +0 -0
  68. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_simple/context.rs +0 -0
  69. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_simple/draft.rs +0 -0
  70. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_simple/mod.rs +0 -0
  71. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/formats.rs +0 -0
  72. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/mod.rs +0 -0
  73. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/numeric.rs +0 -0
  74. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/schema.rs +0 -0
  75. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/shared_context.rs +0 -0
  76. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json_validation.rs +0 -0
  77. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/README.md +0 -0
  78. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/ast.rs +0 -0
  79. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/common.rs +0 -0
  80. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/compiler.rs +0 -0
  81. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/lexer.rs +0 -0
  82. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/mod.rs +0 -0
  83. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/parser.rs +0 -0
  84. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lib.rs +0 -0
  85. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/logging.rs +0 -0
  86. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/matcher.rs +0 -0
  87. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/output.rs +0 -0
  88. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/panic_utils.rs +0 -0
  89. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/regex_rewrite.rs +0 -0
  90. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/stop_controller.rs +0 -0
  91. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/substring.rs +0 -0
  92. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/tokenizer_json.rs +0 -0
  93. {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/tokenparser.rs +0 -0
  94. {llguidance-0.7.22 → llguidance-0.7.23}/plan.md +0 -0
  95. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/__init__.py +0 -0
  96. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_grammar_from.py +0 -0
  97. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_lib.pyi +0 -0
  98. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_struct_tag.py +0 -0
  99. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_tokenizer.py +0 -0
  100. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_util.py +0 -0
  101. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/cli.py +0 -0
  102. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/gbnf_to_lark.py +0 -0
  103. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/hf.py +0 -0
  104. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/mlx.py +0 -0
  105. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/numpy.py +0 -0
  106. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/py.typed +0 -0
  107. {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/torch.py +0 -0
  108. {llguidance-0.7.22 → llguidance-0.7.23}/python/mypy.ini +0 -0
  109. {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/__init__.py +0 -0
  110. {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/test_bitmask.py +0 -0
  111. {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/test_matcher.py +0 -0
  112. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/lib.rs +0 -0
  113. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/llinterpreter.rs +0 -0
  114. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/llmatcher.rs +0 -0
  115. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/parserlimits.rs +0 -0
  116. {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/pyjson.rs +0 -0
  117. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/Cargo.toml +0 -0
  118. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/README.md +0 -0
  119. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/cli.sh +0 -0
  120. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/blog.sample.json +0 -0
  121. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/blog.schema.json +0 -0
  122. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/blog.schema.ll.json +0 -0
  123. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/README.md +0 -0
  124. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
  125. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
  126. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
  127. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
  128. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
  129. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
  130. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
  131. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
  132. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
  133. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/lark.lark +0 -0
  134. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/rfc.lark +0 -0
  135. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/rfc.xml +0 -0
  136. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/ulysses.md +0 -0
  137. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/gtest.sh +0 -0
  138. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/lark.sh +0 -0
  139. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/run.sh +0 -0
  140. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/src/lib.rs +0 -0
  141. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/src/minimal.rs +0 -0
  142. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/src/sample_parser.rs +0 -0
  143. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_lark.rs +0 -0
  144. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_ll.rs +0 -0
  145. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_raw_parser.rs +0 -0
  146. {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_stop.rs +0 -0
  147. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/annotate_asm.js +0 -0
  148. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/bump.py +0 -0
  149. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/cbindgen.sh +0 -0
  150. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/checklinks.py +0 -0
  151. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/checklinks.sh +0 -0
  152. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/ci-publish.py +0 -0
  153. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/disasm.sh +0 -0
  154. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/gbnf_to_lark.py +0 -0
  155. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/gen-testcase.py +0 -0
  156. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/git-version.sh +0 -0
  157. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/jsonschema-stats.js +0 -0
  158. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/remote-guidance-test.sh +0 -0
  159. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/rust-size.js +0 -0
  160. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/rust_size.py +0 -0
  161. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/test-guidance.sh +0 -0
  162. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/tokenizer_test.py +0 -0
  163. {llguidance-0.7.22 → llguidance-0.7.23}/scripts/update-git.py +0 -0
  164. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/LICENSE +0 -0
  165. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/README.md +0 -0
  166. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/bytes.rs +0 -0
  167. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/lib.rs +0 -0
  168. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/recognizer.rs +0 -0
  169. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/rng.rs +0 -0
  170. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/svob.rs +0 -0
  171. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/tokenv.rs +0 -0
  172. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/toktree.rs +0 -0
  173. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/tests/test_svob.rs +0 -0
  174. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_downloader/LICENSE +0 -0
  175. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_downloader/src/lib.rs +0 -0
  176. {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_tokenizers/LICENSE +0 -0
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. Dates are d
4
4
 
5
5
  If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
6
6
 
7
+ #### [0.7.23](https://github.com/guidance-ai/llguidance/compare/v0.7.22...0.7.23) 2025-05-22
8
+
9
+ - native llama.cpp tokenizer support [`#179`](https://github.com/guidance-ai/llguidance/pull/179)
10
+ - improve special token detection in HF tokenizers [`6cae393`](https://github.com/guidance-ai/llguidance/commit/6cae393b9c04fe67621615ff22b46beab512d069)
11
+
7
12
  #### [0.7.22](https://github.com/guidance-ai/llguidance/compare/v0.7.21...0.7.22) 2025-05-21
8
13
 
9
14
  - Keep EOS token bytes in `TokenizerWrapper` [`#178`](https://github.com/guidance-ai/llguidance/pull/178)
@@ -1174,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
1174
1174
 
1175
1175
  [[package]]
1176
1176
  name = "llguidance"
1177
- version = "0.7.22"
1177
+ version = "0.7.23"
1178
1178
  dependencies = [
1179
1179
  "anyhow",
1180
1180
  "derivre",
@@ -1193,7 +1193,7 @@ dependencies = [
1193
1193
 
1194
1194
  [[package]]
1195
1195
  name = "llguidance_py"
1196
- version = "0.7.22"
1196
+ version = "0.7.23"
1197
1197
  dependencies = [
1198
1198
  "anyhow",
1199
1199
  "bytemuck",
@@ -2336,7 +2336,7 @@ dependencies = [
2336
2336
 
2337
2337
  [[package]]
2338
2338
  name = "toktrie"
2339
- version = "0.7.22"
2339
+ version = "0.7.23"
2340
2340
  dependencies = [
2341
2341
  "anyhow",
2342
2342
  "bytemuck",
@@ -2347,7 +2347,7 @@ dependencies = [
2347
2347
 
2348
2348
  [[package]]
2349
2349
  name = "toktrie_hf_downloader"
2350
- version = "0.7.22"
2350
+ version = "0.7.23"
2351
2351
  dependencies = [
2352
2352
  "anyhow",
2353
2353
  "hf-hub",
@@ -2358,7 +2358,7 @@ dependencies = [
2358
2358
 
2359
2359
  [[package]]
2360
2360
  name = "toktrie_hf_tokenizers"
2361
- version = "0.7.22"
2361
+ version = "0.7.23"
2362
2362
  dependencies = [
2363
2363
  "anyhow",
2364
2364
  "log",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 0.7.22
3
+ Version: 0.7.23
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance"
3
- version = "0.7.22"
3
+ version = "0.7.23"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llguidance"
3
- version = "0.7.22"
3
+ version = "0.7.23"
4
4
  description = "Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance"
5
5
  requires-python = ">=3.9"
6
6
  license = "MIT"
@@ -0,0 +1,62 @@
1
+ from typing import List, Optional
2
+
3
+ from ._lib import LLTokenizer
4
+
5
+ import llama_cpp
6
+ import ctypes
7
+
8
+ def lltokenizer_from_vocab(
9
+ vocab: llama_cpp.llama_vocab_p,
10
+ n_vocab: Optional[int] = None,
11
+ eos_token: Optional[int] = None,
12
+ slices: Optional[List[str]] = None,
13
+ ) -> LLTokenizer:
14
+ """
15
+ Create a new tokenizer from a llama.cpp vocab object.
16
+ This is an expensive operation (~1s), so the result should be cached.
17
+
18
+ Args:
19
+ vocab: llama_cpp.llama_vocab_p - the vocab object to use
20
+ n_vocab: int - override the size of the vocabulary
21
+ eos_token: int - override the EOS token
22
+ slices: List[str] - configuration for slicer optimization; pass [] to disable,
23
+ or None to use the default configuration
24
+ """
25
+
26
+ ntok = llama_cpp.llama_vocab_n_tokens(vocab)
27
+ if eos_token is None:
28
+ eos_token = llama_cpp.llama_vocab_eos(vocab)
29
+ buffer_len = 16 * 1024
30
+ buffer = ctypes.create_string_buffer(buffer_len + 1)
31
+ tokens: List[bytes] = []
32
+
33
+ for token in range(ntok):
34
+ n = llama_cpp.llama_token_to_piece(
35
+ vocab,
36
+ token,
37
+ buffer,
38
+ buffer_len,
39
+ 0,
40
+ True
41
+ )
42
+ if n < 0:
43
+ raise ValueError(f"Error writing token {token} to buffer of size {buffer_len}. Error: {n}")
44
+ assert n <= buffer_len
45
+ tok = bytes(buffer[:n]) # type: ignore
46
+ attr = llama_cpp.llama_token_get_attr(vocab, token)
47
+ if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL:
48
+ tok = b"\xFF" + tok
49
+ tokens.append(tok)
50
+
51
+ if n_vocab is not None:
52
+ while len(tokens) < n_vocab:
53
+ tokens.append(b"")
54
+
55
+ fptr = ctypes.cast(llama_cpp.llama_cpp._lib.llama_tokenize, ctypes.c_void_p).value
56
+ return LLTokenizer.from_llamacpp( # type: ignore
57
+ tokens=tokens,
58
+ vocab_ptr=vocab,
59
+ tokenize_fptr=fptr,
60
+ eos_token=eos_token,
61
+ slices=slices
62
+ )
@@ -21,7 +21,8 @@ from transformers import AutoTokenizer
21
21
 
22
22
 
23
23
  def _build_tokenizer() -> LLTokenizer:
24
- hf_tok = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct")
24
+ hf_tok = AutoTokenizer.from_pretrained(
25
+ "unsloth/Meta-Llama-3.1-8B-Instruct")
25
26
  return llguidance.hf.from_tokenizer(hf_tok)
26
27
 
27
28
 
@@ -41,6 +42,22 @@ def lark_matcher(grm: str) -> LLMatcher:
41
42
  return interp
42
43
 
43
44
 
45
+ def test_basic_tokenizer() -> None:
46
+ llt = tokenizer()
47
+ for s in [
48
+ "Hello world!", "Hello world! こんにちは世界!", "wave 👋", "heart 👋💖",
49
+ "1`a`b`c`d`e`f`g`h`i"
50
+ ]:
51
+ toks = llt.tokenize_str(s)
52
+ print(llt.dbg_tokens(toks))
53
+ assert llt.decode_str(toks) == s
54
+ toks = llt.tokenize_bytes(b"\x8b")
55
+ print(llt.dbg_tokens(toks))
56
+ print(toks)
57
+ assert len(toks) == 1
58
+ assert llt.decode_bytes(toks) == b"\x8b"
59
+
60
+
44
61
  def test_grammar() -> None:
45
62
  t = tokenizer()
46
63
  mask = allocate_token_bitmask(2, t.vocab_size)
@@ -66,7 +83,8 @@ def test_grammar() -> None:
66
83
  def test_par_grammar() -> None:
67
84
  n_gram = 50
68
85
  t = tokenizer()
69
- grammars = [(lark_matcher(r"start: /[a-zA-Z ]*/"), idx) for idx in range(n_gram)]
86
+ grammars = [(lark_matcher(r"start: /[a-zA-Z ]*/"), idx)
87
+ for idx in range(n_gram)]
70
88
  mask = allocate_token_bitmask(n_gram, t.vocab_size)
71
89
  mask2 = allocate_token_bitmask(n_gram, t.vocab_size)
72
90
  exec = LLExecutor()
@@ -88,16 +106,15 @@ def test_tokenize_partial_basic(recent_tokens: List[int]) -> None:
88
106
  """Test tokenize_partial with a simple sentence."""
89
107
  ll_tok = tokenizer()
90
108
  assert ll_tok.is_canonical
91
- new_tokens, leftover = ll_tok.tokenize_partial(
92
- b" How are you", recent_tokens=recent_tokens
93
- )
109
+ new_tokens, leftover = ll_tok.tokenize_partial(b" How are you",
110
+ recent_tokens=recent_tokens)
94
111
  assert isinstance(new_tokens, list)
95
112
  assert isinstance(leftover, bytes)
96
113
  assert len(new_tokens) >= 2
97
114
  assert ll_tok.decode_bytes(new_tokens) + leftover == b" How are you"
98
115
  for suff in ["", "r", "!", " "]:
99
116
  tok2 = ll_tok.tokenize_str(" How are you" + suff)
100
- assert tok2[0 : len(new_tokens)] == new_tokens
117
+ assert tok2[0:len(new_tokens)] == new_tokens
101
118
 
102
119
 
103
120
  def test_tokenize_partial_docs() -> None:
@@ -107,16 +124,16 @@ def test_tokenize_partial_docs() -> None:
107
124
  assert leftover == b"order"
108
125
 
109
126
  recent = ll.tokenize_bytes(b'{"')
110
- new_tok, leftover = ll.tokenize_partial(
111
- b'name_of_the_person"', recent_tokens=recent
112
- )
127
+ new_tok, leftover = ll.tokenize_partial(b'name_of_the_person"',
128
+ recent_tokens=recent)
113
129
  print(ll.dbg_tokens(new_tok))
114
130
  assert leftover == b'"'
115
131
  assert ll.decode_str(new_tok) == "name_of_the_person"
116
132
 
117
133
 
118
134
  def test_incomplete_tokenizer() -> None:
119
- hf_tok = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-135M-Instruct")
135
+ hf_tok = AutoTokenizer.from_pretrained(
136
+ "HuggingFaceTB/SmolLM-135M-Instruct")
120
137
  ll_tok = llguidance.hf.from_tokenizer(hf_tok)
121
138
 
122
139
  # unknown bytes are to be skipped
@@ -0,0 +1,42 @@
1
+ import llguidance.llamacpp
2
+ import llama_cpp
3
+ import os
4
+ import requests # type: ignore
5
+ from typing import Any
6
+
7
+ def get_llama_vocab_file(pytestconfig: Any) -> str:
8
+ url = "https://raw.githubusercontent.com/ggml-org/llama.cpp/f4ab2a41476600a98067a9474ea8f9e6db41bcfa/models/ggml-vocab-llama-bpe.gguf"
9
+ cache_dir = pytestconfig.cache.makedir("llama_vocab")
10
+ file_name = "vocab.gguf"
11
+ file_path = os.path.join(cache_dir, file_name)
12
+
13
+ if not os.path.exists(file_path):
14
+ r = requests.get(url)
15
+ r.raise_for_status()
16
+ with open(file_path, "wb") as f:
17
+ f.write(r.content)
18
+
19
+ return file_path
20
+
21
+
22
+ def test_llama_cpp(pytestconfig: Any) -> None:
23
+ filepath = get_llama_vocab_file(pytestconfig)
24
+ p = llama_cpp.llama_model_default_params()
25
+ p.vocab_only = True
26
+ model = llama_cpp.llama_model_load_from_file(filepath.encode(), p)
27
+ assert model is not None
28
+ vocab = llama_cpp.llama_model_get_vocab(model)
29
+ assert vocab is not None
30
+ llt = llguidance.llamacpp.lltokenizer_from_vocab(vocab)
31
+ for s in [
32
+ "Hello world!", "Hello world! こんにちは世界!", "wave 👋", "heart 👋💖",
33
+ "1`a`b`c`d`e`f`g`h`i"
34
+ ]:
35
+ toks = llt.tokenize_str(s)
36
+ print(llt.dbg_tokens(toks))
37
+ assert llt.decode_str(toks) == s
38
+ toks = llt.tokenize_bytes(b"\x8b")
39
+ print(llt.dbg_tokens(toks))
40
+ print(toks)
41
+ assert len(toks) == 1
42
+ assert llt.decode_bytes(toks) == b"\x8b"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance_py"
3
- version = "0.7.22"
3
+ version = "0.7.23"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -34,6 +34,78 @@ struct PyMidProcessResult {
34
34
  temperature: f32,
35
35
  }
36
36
 
37
+ type LlamaTokenizeFn = unsafe extern "C" fn(
38
+ vocab: *const std::os::raw::c_void,
39
+ text: *const std::os::raw::c_char,
40
+ text_len: i32,
41
+ tokens: *mut i32,
42
+ n_tokens_max: i32,
43
+ add_special: bool,
44
+ parse_special: bool,
45
+ ) -> i32;
46
+
47
+ struct LlamaTokenizerInner {
48
+ trie: TokTrie,
49
+ tokenize_fn: LlamaTokenizeFn,
50
+ vocab: *const std::os::raw::c_void,
51
+ }
52
+ // SAFETY: tokenize_fn is required to be thread-safe
53
+ unsafe impl Send for LlamaTokenizerInner {}
54
+ unsafe impl Sync for LlamaTokenizerInner {}
55
+
56
+ impl LlamaTokenizerInner {
57
+ fn raw_tokenize(&self, s: &[u8]) -> Vec<toktrie::TokenId> {
58
+ let mut res_toks = vec![0u32; s.len() / 4 + 5];
59
+ let res = unsafe {
60
+ (self.tokenize_fn)(
61
+ self.vocab,
62
+ s.as_ptr() as *const std::os::raw::c_char,
63
+ s.len().try_into().unwrap(),
64
+ res_toks.as_mut_ptr() as *mut i32,
65
+ res_toks.len().try_into().unwrap(),
66
+ false,
67
+ false,
68
+ )
69
+ };
70
+
71
+ let res = if res < 0 {
72
+ let n_toks = (-res) as usize;
73
+ res_toks.resize(n_toks, 0);
74
+ let res2 = unsafe {
75
+ (self.tokenize_fn)(
76
+ self.vocab,
77
+ s.as_ptr() as *const std::os::raw::c_char,
78
+ s.len().try_into().unwrap(),
79
+ res_toks.as_mut_ptr() as *mut i32,
80
+ res_toks.len().try_into().unwrap(),
81
+ false,
82
+ false,
83
+ )
84
+ };
85
+ assert!(res2 == n_toks as i32);
86
+ res2
87
+ } else {
88
+ res
89
+ };
90
+
91
+ res_toks.truncate(res as usize);
92
+ res_toks
93
+ }
94
+ }
95
+
96
+ impl TokenizerEnv for LlamaTokenizerInner {
97
+ fn tok_trie(&self) -> &TokTrie {
98
+ &self.trie
99
+ }
100
+
101
+ fn tokenize_bytes(&self, s: &[u8]) -> Vec<toktrie::TokenId> {
102
+ // llama.cpp tokenizer encodes invalid UTF8 as Unicode replacement character U+FFFD,
103
+ // so we need the greedy fallback
104
+ self.trie
105
+ .tokenize_with_greedy_fallback(s, |s| self.raw_tokenize(s.as_bytes()))
106
+ }
107
+ }
108
+
37
109
  #[pymethods]
38
110
  impl LLTokenizer {
39
111
  #[new]
@@ -73,6 +145,42 @@ impl LLTokenizer {
73
145
  })
74
146
  }
75
147
 
148
+ #[staticmethod]
149
+ #[pyo3(signature = (*, tokens, vocab_ptr, tokenize_fptr, eos_token, slices=None))]
150
+ fn from_llamacpp(
151
+ tokens: Vec<Vec<u8>>,
152
+ vocab_ptr: usize,
153
+ tokenize_fptr: usize,
154
+ eos_token: u32,
155
+ slices: Option<Vec<String>>,
156
+ ) -> PyResult<Self> {
157
+ if vocab_ptr == 0 {
158
+ return Err(PyValueError::new_err("vocab_ptr must be non-null"));
159
+ }
160
+ if tokenize_fptr == 0 {
161
+ return Err(PyValueError::new_err("tokenize_fptr must be non-null"));
162
+ }
163
+
164
+ let info = TokRxInfo::new(tokens.len() as u32, eos_token);
165
+ let trie = TokTrie::from(&info, &tokens);
166
+
167
+ let llama_tok = LlamaTokenizerInner {
168
+ trie,
169
+ tokenize_fn: unsafe { std::mem::transmute::<usize, LlamaTokenizeFn>(tokenize_fptr) },
170
+ vocab: vocab_ptr as *const std::os::raw::c_void,
171
+ };
172
+ let tok_env: TokEnv = Arc::new(llama_tok);
173
+ let factory = ParserFactory::new(
174
+ &tok_env,
175
+ InferenceCapabilities::default(),
176
+ &slices.unwrap_or_else(SlicedBiasComputer::general_slices),
177
+ )
178
+ .map_err(val_error)?;
179
+ Ok(LLTokenizer {
180
+ factory: Arc::new(factory),
181
+ })
182
+ }
183
+
76
184
  fn with_slices(&self, slices: Vec<String>) -> PyResult<Self> {
77
185
  let factory = self.factory.with_slices(&slices)?;
78
186
  Ok(LLTokenizer {
@@ -2,7 +2,7 @@
2
2
 
3
3
  # installing guidance for deps
4
4
  pip install pytest guidance huggingface_hub tokenizers jsonschema maturin[zig] \
5
- torch transformers==4.52.1 bitsandbytes ipython psutil mypy
5
+ torch transformers==4.52.1 bitsandbytes ipython psutil mypy llama_cpp_python
6
6
  pip uninstall -y guidance
7
7
 
8
8
  # print out versions
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "toktrie"
3
- version = "0.7.22"
3
+ version = "0.7.23"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "LLM Token Trie library"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "toktrie_hf_downloader"
3
- version = "0.7.22"
3
+ version = "0.7.23"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "HuggingFace Hub download library support for toktrie and llguidance"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "toktrie_hf_tokenizers"
3
- version = "0.7.22"
3
+ version = "0.7.23"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "HuggingFace tokenizers library support for toktrie and llguidance"
@@ -102,7 +102,11 @@ impl ByteTokenizer {
102
102
  }
103
103
 
104
104
  let vocab_size = hft.get_vocab_size(true) as u32;
105
- let added = hft.get_added_tokens_decoder();
105
+ let mut added = hft
106
+ .get_added_tokens_decoder()
107
+ .into_iter()
108
+ .collect::<Vec<_>>();
109
+ added.sort_by_key(|(id, _)| *id);
106
110
 
107
111
  let mut res = ByteTokenizer {
108
112
  hf_model: "foobar".to_string(),
@@ -114,7 +118,8 @@ impl ByteTokenizer {
114
118
  let mut specials = HashSet::new();
115
119
 
116
120
  for (id, info) in added.iter() {
117
- if info.special {
121
+ // we treat all added tokens of the form <...> as special tokens
122
+ if info.special || (info.content.starts_with("<") && info.content.ends_with(">")) {
118
123
  match info.content.as_str() {
119
124
  "</s>"
120
125
  | "<|endoftext|>"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes