llguidance 1.0.1__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. {llguidance-1.0.1 → llguidance-1.1.1}/CHANGELOG.md +12 -0
  2. {llguidance-1.0.1 → llguidance-1.1.1}/Cargo.lock +105 -6
  3. {llguidance-1.0.1 → llguidance-1.1.1}/PKG-INFO +1 -1
  4. llguidance-1.1.1/docs/de_recursing.md +51 -0
  5. {llguidance-1.0.1 → llguidance-1.1.1}/docs/syntax.md +5 -0
  6. {llguidance-1.0.1 → llguidance-1.1.1}/parser/Cargo.toml +1 -1
  7. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/grammar_builder.rs +75 -2
  8. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/compiler.rs +17 -1
  9. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/matcher.rs +1 -0
  10. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/tokenparser.rs +11 -1
  11. {llguidance-1.0.1 → llguidance-1.1.1}/pyproject.toml +1 -1
  12. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/hf.py +12 -7
  13. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/llamacpp.py +8 -2
  14. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/Cargo.toml +1 -1
  15. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/Cargo.toml +3 -0
  16. llguidance-1.1.1/sample_parser/tests/common_lark_utils/mod.rs +152 -0
  17. llguidance-1.1.1/sample_parser/tests/test_json_arrays.rs +130 -0
  18. llguidance-1.1.1/sample_parser/tests/test_json_objects.rs +203 -0
  19. llguidance-1.1.1/sample_parser/tests/test_json_primitives.rs +376 -0
  20. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/tests/test_lark.rs +15 -139
  21. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/tests/test_ll.rs +24 -0
  22. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/tests/test_raw_parser.rs +45 -0
  23. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/Cargo.toml +1 -1
  24. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_hf_downloader/Cargo.toml +1 -1
  25. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_hf_tokenizers/Cargo.toml +1 -1
  26. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_tiktoken/Cargo.toml +1 -1
  27. {llguidance-1.0.1 → llguidance-1.1.1}/.github/workflows/rust.yml +0 -0
  28. {llguidance-1.0.1 → llguidance-1.1.1}/.github/workflows/wheels.yml +0 -0
  29. {llguidance-1.0.1 → llguidance-1.1.1}/.gitignore +0 -0
  30. {llguidance-1.0.1 → llguidance-1.1.1}/CODE_OF_CONDUCT.md +0 -0
  31. {llguidance-1.0.1 → llguidance-1.1.1}/Cargo.toml +0 -0
  32. {llguidance-1.0.1 → llguidance-1.1.1}/LICENSE +0 -0
  33. {llguidance-1.0.1 → llguidance-1.1.1}/README.md +0 -0
  34. {llguidance-1.0.1 → llguidance-1.1.1}/SECURITY.md +0 -0
  35. {llguidance-1.0.1 → llguidance-1.1.1}/SUPPORT.md +0 -0
  36. {llguidance-1.0.1 → llguidance-1.1.1}/c_sample/Makefile +0 -0
  37. {llguidance-1.0.1 → llguidance-1.1.1}/c_sample/README.md +0 -0
  38. {llguidance-1.0.1 → llguidance-1.1.1}/c_sample/c_sample.cpp +0 -0
  39. {llguidance-1.0.1 → llguidance-1.1.1}/docs/fast_forward.md +0 -0
  40. {llguidance-1.0.1 → llguidance-1.1.1}/docs/json_schema.md +0 -0
  41. {llguidance-1.0.1 → llguidance-1.1.1}/docs/mask_plot.png +0 -0
  42. {llguidance-1.0.1 → llguidance-1.1.1}/docs/optimizations.md +0 -0
  43. {llguidance-1.0.1 → llguidance-1.1.1}/docs/parametric.md +0 -0
  44. {llguidance-1.0.1 → llguidance-1.1.1}/docs/special_tokens.md +0 -0
  45. {llguidance-1.0.1 → llguidance-1.1.1}/docs/toktrie.md +0 -0
  46. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/Cargo.toml +0 -0
  47. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/expected_maskbench.json +0 -0
  48. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/jstats.sh +0 -0
  49. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/scripts/split-stats.sh +0 -0
  50. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/scripts/split_plot.py +0 -0
  51. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/src/json_stats.rs +0 -0
  52. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/src/lib.rs +0 -0
  53. {llguidance-1.0.1 → llguidance-1.1.1}/json_stats/src/stats.rs +0 -0
  54. {llguidance-1.0.1 → llguidance-1.1.1}/parser/LICENSE +0 -0
  55. {llguidance-1.0.1 → llguidance-1.1.1}/parser/README.md +0 -0
  56. {llguidance-1.0.1 → llguidance-1.1.1}/parser/build.rs +0 -0
  57. {llguidance-1.0.1 → llguidance-1.1.1}/parser/cbindgen.toml +0 -0
  58. {llguidance-1.0.1 → llguidance-1.1.1}/parser/grammars/character.json +0 -0
  59. {llguidance-1.0.1 → llguidance-1.1.1}/parser/grammars/json.json +0 -0
  60. {llguidance-1.0.1 → llguidance-1.1.1}/parser/llguidance.h +0 -0
  61. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/api.rs +0 -0
  62. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/constraint.rs +0 -0
  63. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/from_guidance.rs +0 -0
  64. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/grammar.rs +0 -0
  65. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/lexer.rs +0 -0
  66. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/lexerspec.rs +0 -0
  67. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/mod.rs +0 -0
  68. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/parser.rs +0 -0
  69. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/perf.rs +0 -0
  70. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/regexvec.rs +0 -0
  71. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/earley/slicer.rs +0 -0
  72. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/factory.rs +0 -0
  73. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/ffi.rs +0 -0
  74. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/ffi_par.rs +0 -0
  75. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/hashcons.rs +0 -0
  76. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/README.md +0 -0
  77. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/compiler.rs +0 -0
  78. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/context_ref.rs +0 -0
  79. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/context_simple/context.rs +0 -0
  80. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/context_simple/draft.rs +0 -0
  81. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/context_simple/mod.rs +0 -0
  82. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/formats.rs +0 -0
  83. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/mod.rs +0 -0
  84. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/numeric.rs +0 -0
  85. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/schema.rs +0 -0
  86. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json/shared_context.rs +0 -0
  87. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/json_validation.rs +0 -0
  88. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/README.md +0 -0
  89. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/ast.rs +0 -0
  90. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/common.rs +0 -0
  91. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/lexer.rs +0 -0
  92. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/mod.rs +0 -0
  93. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lark/parser.rs +0 -0
  94. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/lib.rs +0 -0
  95. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/logging.rs +0 -0
  96. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/output.rs +0 -0
  97. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/panic_utils.rs +0 -0
  98. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/regex_rewrite.rs +0 -0
  99. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/stop_controller.rs +0 -0
  100. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/substring.rs +0 -0
  101. {llguidance-1.0.1 → llguidance-1.1.1}/parser/src/tokenizer_json.rs +0 -0
  102. {llguidance-1.0.1 → llguidance-1.1.1}/plan.md +0 -0
  103. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/__init__.py +0 -0
  104. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/_grammar_from.py +0 -0
  105. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/_lib.pyi +0 -0
  106. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/_struct_tag.py +0 -0
  107. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/_tokenizer.py +0 -0
  108. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/_util.py +0 -0
  109. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/cli.py +0 -0
  110. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/gbnf_to_lark.py +0 -0
  111. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/mlx.py +0 -0
  112. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/numpy.py +0 -0
  113. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/py.typed +0 -0
  114. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/tiktoken.py +0 -0
  115. {llguidance-1.0.1 → llguidance-1.1.1}/python/llguidance/torch.py +0 -0
  116. {llguidance-1.0.1 → llguidance-1.1.1}/python/mypy.ini +0 -0
  117. {llguidance-1.0.1 → llguidance-1.1.1}/python/torch_tests/__init__.py +0 -0
  118. {llguidance-1.0.1 → llguidance-1.1.1}/python/torch_tests/test_bitmask.py +0 -0
  119. {llguidance-1.0.1 → llguidance-1.1.1}/python/torch_tests/test_hf.py +0 -0
  120. {llguidance-1.0.1 → llguidance-1.1.1}/python/torch_tests/test_llamacpp.py +0 -0
  121. {llguidance-1.0.1 → llguidance-1.1.1}/python/torch_tests/test_matcher.py +0 -0
  122. {llguidance-1.0.1 → llguidance-1.1.1}/python/torch_tests/test_tiktoken.py +0 -0
  123. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/lib.rs +0 -0
  124. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/llamatokenizer.rs +0 -0
  125. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/llinterpreter.rs +0 -0
  126. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/llmatcher.rs +0 -0
  127. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/parserlimits.rs +0 -0
  128. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/py.rs +0 -0
  129. {llguidance-1.0.1 → llguidance-1.1.1}/python_ext/src/pyjson.rs +0 -0
  130. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/README.md +0 -0
  131. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/cli.sh +0 -0
  132. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/blog.sample.json +0 -0
  133. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/blog.schema.json +0 -0
  134. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/blog.schema.ll.json +0 -0
  135. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/README.md +0 -0
  136. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
  137. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
  138. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
  139. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
  140. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
  141. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
  142. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
  143. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
  144. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
  145. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/lark.lark +0 -0
  146. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/rfc.lark +0 -0
  147. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/rfc.xml +0 -0
  148. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/data/ulysses.md +0 -0
  149. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/gtest.sh +0 -0
  150. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/lark.sh +0 -0
  151. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/run.sh +0 -0
  152. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/src/lib.rs +0 -0
  153. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/src/minimal.rs +0 -0
  154. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/src/sample_parser.rs +0 -0
  155. {llguidance-1.0.1 → llguidance-1.1.1}/sample_parser/tests/test_stop.rs +0 -0
  156. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/annotate_asm.js +0 -0
  157. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/bump.py +0 -0
  158. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/cbindgen.sh +0 -0
  159. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/checklinks.py +0 -0
  160. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/checklinks.sh +0 -0
  161. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/ci-publish.py +0 -0
  162. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/disasm.sh +0 -0
  163. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/gbnf_to_lark.py +0 -0
  164. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/gen-testcase.py +0 -0
  165. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/git-version.sh +0 -0
  166. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/install-deps.sh +0 -0
  167. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/jsonschema-stats.js +0 -0
  168. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/remote-guidance-test.sh +0 -0
  169. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/rust-size.js +0 -0
  170. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/rust_size.py +0 -0
  171. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/test-guidance.sh +0 -0
  172. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/tokenizer_test.py +0 -0
  173. {llguidance-1.0.1 → llguidance-1.1.1}/scripts/update-git.py +0 -0
  174. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/LICENSE +0 -0
  175. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/README.md +0 -0
  176. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/bytes.rs +0 -0
  177. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/lib.rs +0 -0
  178. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/recognizer.rs +0 -0
  179. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/rng.rs +0 -0
  180. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/svob.rs +0 -0
  181. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/tokenv.rs +0 -0
  182. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/src/toktree.rs +0 -0
  183. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie/tests/test_svob.rs +0 -0
  184. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_hf_downloader/LICENSE +0 -0
  185. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_hf_downloader/src/lib.rs +0 -0
  186. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_hf_tokenizers/LICENSE +0 -0
  187. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_hf_tokenizers/src/lib.rs +0 -0
  188. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_tiktoken/LICENSE +0 -0
  189. {llguidance-1.0.1 → llguidance-1.1.1}/toktrie_tiktoken/src/lib.rs +0 -0
@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file. Dates are d
4
4
 
5
5
  If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
6
6
 
7
+ #### [1.1.1](https://github.com/guidance-ai/llguidance/compare/v1.1.0...1.1.1) 2025-07-23
8
+
9
+ - prevent error state when calling `try_consume_tokens` after parser is stopped [`#213`](https://github.com/guidance-ai/llguidance/pull/213); fixes [`#211`](https://github.com/guidance-ai/llguidance/issues/211)
10
+ - set parser stop condition in `try_consume_tokens` even when some tokens are rejected [`#212`](https://github.com/guidance-ai/llguidance/pull/212)
11
+
12
+ #### [1.1.0](https://github.com/guidance-ai/llguidance/compare/v1.0.1...1.1.0) 2025-07-18
13
+
14
+ - disable hf tokenizer truncation and padding [`#205`](https://github.com/guidance-ai/llguidance/pull/205); fixes [`#1322`](https://github.com/guidance-ai/guidance/issues/1322)
15
+ - llama_cpp tokenizers: infer added tokens starting/ending with < and > to be special tokens [`#202`](https://github.com/guidance-ai/llguidance/pull/202)
16
+ - add lark syntax for "any token" and negation of token ranges [`#201`](https://github.com/guidance-ai/llguidance/pull/201)
17
+ - add de-recursion cook book to docs [`#199`](https://github.com/guidance-ai/llguidance/pull/199)
18
+
7
19
  #### [1.0.1](https://github.com/guidance-ai/llguidance/compare/v1.0.0...1.0.1) 2025-07-03
8
20
 
9
21
  - fix: tokenizers normalizers sequence api changed [`#195`](https://github.com/guidance-ai/llguidance/pull/195)
@@ -711,6 +711,12 @@ version = "0.3.31"
711
711
  source = "registry+https://github.com/rust-lang/crates.io-index"
712
712
  checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
713
713
 
714
+ [[package]]
715
+ name = "futures-timer"
716
+ version = "3.0.3"
717
+ source = "registry+https://github.com/rust-lang/crates.io-index"
718
+ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
719
+
714
720
  [[package]]
715
721
  name = "futures-util"
716
722
  version = "0.3.31"
@@ -760,6 +766,12 @@ version = "0.31.1"
760
766
  source = "registry+https://github.com/rust-lang/crates.io-index"
761
767
  checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
762
768
 
769
+ [[package]]
770
+ name = "glob"
771
+ version = "0.3.2"
772
+ source = "registry+https://github.com/rust-lang/crates.io-index"
773
+ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
774
+
763
775
  [[package]]
764
776
  name = "h2"
765
777
  version = "0.4.11"
@@ -1229,7 +1241,7 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
1229
1241
 
1230
1242
  [[package]]
1231
1243
  name = "llguidance"
1232
- version = "1.0.1"
1244
+ version = "1.1.1"
1233
1245
  dependencies = [
1234
1246
  "anyhow",
1235
1247
  "derivre",
@@ -1248,7 +1260,7 @@ dependencies = [
1248
1260
 
1249
1261
  [[package]]
1250
1262
  name = "llguidance_py"
1251
- version = "1.0.1"
1263
+ version = "1.1.1"
1252
1264
  dependencies = [
1253
1265
  "anyhow",
1254
1266
  "bytemuck",
@@ -1637,6 +1649,15 @@ dependencies = [
1637
1649
  "zerocopy",
1638
1650
  ]
1639
1651
 
1652
+ [[package]]
1653
+ name = "proc-macro-crate"
1654
+ version = "3.3.0"
1655
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1656
+ checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
1657
+ dependencies = [
1658
+ "toml_edit",
1659
+ ]
1660
+
1640
1661
  [[package]]
1641
1662
  name = "proc-macro2"
1642
1663
  version = "1.0.95"
@@ -1868,6 +1889,12 @@ version = "0.8.5"
1868
1889
  source = "registry+https://github.com/rust-lang/crates.io-index"
1869
1890
  checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
1870
1891
 
1892
+ [[package]]
1893
+ name = "relative-path"
1894
+ version = "1.9.3"
1895
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1896
+ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
1897
+
1871
1898
  [[package]]
1872
1899
  name = "reqwest"
1873
1900
  version = "0.12.22"
@@ -1926,6 +1953,36 @@ dependencies = [
1926
1953
  "windows-sys 0.52.0",
1927
1954
  ]
1928
1955
 
1956
+ [[package]]
1957
+ name = "rstest"
1958
+ version = "0.25.0"
1959
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1960
+ checksum = "6fc39292f8613e913f7df8fa892b8944ceb47c247b78e1b1ae2f09e019be789d"
1961
+ dependencies = [
1962
+ "futures-timer",
1963
+ "futures-util",
1964
+ "rstest_macros",
1965
+ "rustc_version",
1966
+ ]
1967
+
1968
+ [[package]]
1969
+ name = "rstest_macros"
1970
+ version = "0.25.0"
1971
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1972
+ checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746"
1973
+ dependencies = [
1974
+ "cfg-if",
1975
+ "glob",
1976
+ "proc-macro-crate",
1977
+ "proc-macro2",
1978
+ "quote",
1979
+ "regex",
1980
+ "relative-path",
1981
+ "rustc_version",
1982
+ "syn",
1983
+ "unicode-ident",
1984
+ ]
1985
+
1929
1986
  [[package]]
1930
1987
  name = "rustc-demangle"
1931
1988
  version = "0.1.25"
@@ -1938,6 +1995,15 @@ version = "1.1.0"
1938
1995
  source = "registry+https://github.com/rust-lang/crates.io-index"
1939
1996
  checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
1940
1997
 
1998
+ [[package]]
1999
+ name = "rustc_version"
2000
+ version = "0.4.1"
2001
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2002
+ checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
2003
+ dependencies = [
2004
+ "semver",
2005
+ ]
2006
+
1941
2007
  [[package]]
1942
2008
  name = "rustix"
1943
2009
  version = "1.0.7"
@@ -2006,6 +2072,7 @@ dependencies = [
2006
2072
  "clap",
2007
2073
  "lazy_static",
2008
2074
  "llguidance",
2075
+ "rstest",
2009
2076
  "serde",
2010
2077
  "serde_json",
2011
2078
  "toktrie_hf_downloader",
@@ -2050,6 +2117,12 @@ dependencies = [
2050
2117
  "libc",
2051
2118
  ]
2052
2119
 
2120
+ [[package]]
2121
+ name = "semver"
2122
+ version = "1.0.26"
2123
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2124
+ checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
2125
+
2053
2126
  [[package]]
2054
2127
  name = "serde"
2055
2128
  version = "1.0.219"
@@ -2405,7 +2478,7 @@ dependencies = [
2405
2478
 
2406
2479
  [[package]]
2407
2480
  name = "toktrie"
2408
- version = "1.0.1"
2481
+ version = "1.1.1"
2409
2482
  dependencies = [
2410
2483
  "anyhow",
2411
2484
  "bytemuck",
@@ -2416,7 +2489,7 @@ dependencies = [
2416
2489
 
2417
2490
  [[package]]
2418
2491
  name = "toktrie_hf_downloader"
2419
- version = "1.0.1"
2492
+ version = "1.1.1"
2420
2493
  dependencies = [
2421
2494
  "anyhow",
2422
2495
  "hf-hub",
@@ -2427,7 +2500,7 @@ dependencies = [
2427
2500
 
2428
2501
  [[package]]
2429
2502
  name = "toktrie_hf_tokenizers"
2430
- version = "1.0.1"
2503
+ version = "1.1.1"
2431
2504
  dependencies = [
2432
2505
  "anyhow",
2433
2506
  "log",
@@ -2439,7 +2512,7 @@ dependencies = [
2439
2512
 
2440
2513
  [[package]]
2441
2514
  name = "toktrie_tiktoken"
2442
- version = "1.0.1"
2515
+ version = "1.1.1"
2443
2516
  dependencies = [
2444
2517
  "anyhow",
2445
2518
  "log",
@@ -2449,6 +2522,23 @@ dependencies = [
2449
2522
  "toktrie",
2450
2523
  ]
2451
2524
 
2525
+ [[package]]
2526
+ name = "toml_datetime"
2527
+ version = "0.6.11"
2528
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2529
+ checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
2530
+
2531
+ [[package]]
2532
+ name = "toml_edit"
2533
+ version = "0.22.27"
2534
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2535
+ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
2536
+ dependencies = [
2537
+ "indexmap",
2538
+ "toml_datetime",
2539
+ "winnow",
2540
+ ]
2541
+
2452
2542
  [[package]]
2453
2543
  name = "tower"
2454
2544
  version = "0.5.2"
@@ -3004,6 +3094,15 @@ version = "0.53.0"
3004
3094
  source = "registry+https://github.com/rust-lang/crates.io-index"
3005
3095
  checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
3006
3096
 
3097
+ [[package]]
3098
+ name = "winnow"
3099
+ version = "0.7.12"
3100
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3101
+ checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
3102
+ dependencies = [
3103
+ "memchr",
3104
+ ]
3105
+
3007
3106
  [[package]]
3008
3107
  name = "wit-bindgen-rt"
3009
3108
  version = "0.39.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 1.0.1
3
+ Version: 1.1.1
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
@@ -0,0 +1,51 @@
1
+ # De-Recursing Grammars
2
+
3
+ This is a cookbook of examples to help in removing recursion where possible from grammars (see [Syntax](./syntax.md) for more details).
4
+ The examples below will generally already be left-recursive.
5
+
6
+ ## Simple lists
7
+
8
+ ```lark
9
+ item_list : item
10
+ | item_list item
11
+ ```
12
+ can become
13
+ ```lark
14
+ item_list : item+
15
+ ```
16
+
17
+ ## Lists with Delimiters
18
+
19
+ ```lark
20
+ sep_list : item
21
+ | item_list SEP item
22
+ ```
23
+ becomes
24
+ ```lark
25
+ sep_list : item (SEP item)*
26
+ ```
27
+
28
+ ## List with alternatives
29
+
30
+ ```lark
31
+ postfix_expression: primary_expression
32
+ | postfix_expression "[" expression "]"
33
+ | postfix_expression "(" ")"
34
+ | postfix_expression "(" argument_expression_list ")"
35
+ | postfix_expression "." IDENTIFIER
36
+ | postfix_expression PTR_OP IDENTIFIER
37
+ | "(" type_name ")" "{" initializer_list "}"
38
+ | "(" type_name ")" "{" initializer_list "," "}"
39
+ ```
40
+ becomes (note the additional rule):
41
+ ```lark
42
+ postfix_expression: primary_expression postfix_suffix*
43
+ | "(" type_name ")" "{" initializer_list "}"
44
+ | "(" type_name ")" "{" initializer_list "," "}"
45
+
46
+ postfix_suffix: "[" expression "]"
47
+ | "(" ")"
48
+ | "(" argument_expression_list ")"
49
+ | "." IDENTIFIER
50
+ | PTR_OP IDENTIFIER
51
+ ```
@@ -70,6 +70,11 @@ You can also use numeric token ids, as in `<[128010]>` (this is `<|python_tag|>`
70
70
  Tou can also use ranges like `<[128000-128255]>` for all Llama special tokens, or
71
71
  even lists of ranges like `<[128000-128100,128130-128170]>`; ranges are inclusive.
72
72
 
73
+ Individual numeric token ids and ranges can be negated with the caret operator, like `<[^128000,128130-128170]>`.
74
+ This is equivalent to `<[0-12799,128001-128129,128171-MAX]>`.
75
+
76
+ You can also use a *wildcard* token range, `<[*]>`, denoting `<[0-MAX]>`.
77
+
73
78
  For example, this is how to constrain JSON function calling for Meta Llama 3.1,
74
79
  according to their [source repo](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/prompt_format.md#model-response-format-5) (and yes, it's [different](https://github.com/meta-llama/llama-models/issues/266) than the website).
75
80
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance"
3
- version = "1.0.1"
3
+ version = "1.1.1"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -282,8 +282,6 @@ impl GrammarBuilder {
282
282
  pub fn token_ranges(&mut self, token_ranges: Vec<RangeInclusive<u32>>) -> Result<NodeRef> {
283
283
  self.check_limits()?;
284
284
 
285
- let name = token_ranges_to_string(&token_ranges);
286
-
287
285
  let trie = self.tok_env.as_ref().map(|t| t.tok_trie());
288
286
  for r in &token_ranges {
289
287
  ensure!(r.start() <= r.end(), "Invalid token range: {:?}", r);
@@ -300,10 +298,69 @@ impl GrammarBuilder {
300
298
  self.add_warning("no tokenizer - can't validate <[...]>".to_string());
301
299
  }
302
300
 
301
+ let name = token_ranges_to_string(&token_ranges);
303
302
  let id = self.regex.spec.add_special_token(name, token_ranges)?;
304
303
  Ok(self.lexeme_to_node(id))
305
304
  }
306
305
 
306
+ pub fn negated_token_ranges(
307
+ &mut self,
308
+ token_ranges: Vec<RangeInclusive<u32>>,
309
+ ) -> Result<NodeRef> {
310
+ let negated_ranges = if let Some(te) = &self.tok_env {
311
+ let trie = te.tok_trie();
312
+
313
+ let (min, max) = (0u32, trie.vocab_size() as u32 - 1);
314
+ ensure!(
315
+ !token_ranges.is_empty(),
316
+ "negation of empty token ranges is not supported"
317
+ );
318
+
319
+ let mut sorted = token_ranges.clone();
320
+ sorted.sort_by_key(|r| *r.start());
321
+
322
+ let mut negated = vec![];
323
+ let mut current = min;
324
+ for range in sorted {
325
+ ensure!(
326
+ *range.end() < trie.vocab_size() as u32,
327
+ "Token range end too large: {:?}",
328
+ range.end()
329
+ );
330
+ ensure!(
331
+ range.start() <= range.end(),
332
+ "Invalid token range: {:?}",
333
+ range
334
+ );
335
+
336
+ let (&start, &end) = (range.start(), range.end());
337
+ ensure!(start <= end, "Invalid token range: {:?}", range);
338
+ if end < current {
339
+ // skip this range, it is already covered by the previous one
340
+ continue;
341
+ }
342
+ if start > current {
343
+ // add a range from the current to the start of this one
344
+ negated.push(current..=start - 1);
345
+ }
346
+ // update the current to the end of this range
347
+ current = current.max(end + 1);
348
+ }
349
+ if current <= max {
350
+ // add the last range from the current to the max
351
+ negated.push(current..=max);
352
+ }
353
+ negated
354
+ } else {
355
+ self.add_warning("no tokenizer - can't validate <[^...]>".to_string());
356
+ vec![INVALID_TOKEN..=INVALID_TOKEN]
357
+ };
358
+
359
+ let name = token_ranges_to_string(&negated_ranges);
360
+ let id = self.regex.spec.add_special_token(name, negated_ranges)?;
361
+ Ok(self.lexeme_to_node(id))
362
+ }
363
+
307
364
  pub fn special_token(&mut self, token: &str) -> Result<NodeRef> {
308
365
  self.check_limits()?;
309
366
 
@@ -331,6 +388,22 @@ impl GrammarBuilder {
331
388
  Ok(self.lexeme_to_node(idx))
332
389
  }
333
390
 
391
+ pub fn any_token(&mut self) -> Result<NodeRef> {
392
+ self.check_limits()?;
393
+ let range = if let Some(te) = &self.tok_env {
394
+ let trie = te.tok_trie();
395
+ 0..=trie.vocab_size() as u32 - 1
396
+ } else {
397
+ self.add_warning("no tokenizer - can't validate <any_token>".to_string());
398
+ INVALID_TOKEN..=INVALID_TOKEN
399
+ };
400
+ let idx = self
401
+ .regex
402
+ .spec
403
+ .add_special_token("<[*]>".to_string(), vec![range])?;
404
+ Ok(self.lexeme_to_node(idx))
405
+ }
406
+
334
407
  pub fn gen_grammar(&mut self, data: GenGrammarOptions, props: NodeProps) -> NodeRef {
335
408
  if props.max_tokens.is_some() {
336
409
  self.regex.spec.has_max_tokens = true;
@@ -313,6 +313,18 @@ impl Compiler {
313
313
  Value::SpecialToken(s) => {
314
314
  if s.starts_with("<[") && s.ends_with("]>") {
315
315
  let s = &s[2..s.len() - 2];
316
+ let negate = s.starts_with("^");
317
+ let s = if negate { &s[1..] } else { s };
318
+ if s == "*" {
319
+ if negate {
320
+ bail!("negated wildcard token <[^*]> is not supported");
321
+ }
322
+ return self.builder.any_token();
323
+ } else if s.contains('*') {
324
+ bail!(
325
+ "wildcard token range '*' must not contain additional tokens"
326
+ );
327
+ }
316
328
  let mut ranges = vec![];
317
329
  for range in s.split(",") {
318
330
  let ends: Vec<&str> = range.split('-').map(|s| s.trim()).collect();
@@ -334,7 +346,11 @@ impl Compiler {
334
346
  ranges.push(start..=end);
335
347
  }
336
348
  ensure!(!ranges.is_empty(), "empty token range");
337
- return self.builder.token_ranges(ranges);
349
+ return if negate {
350
+ self.builder.negated_token_ranges(ranges)
351
+ } else {
352
+ self.builder.token_ranges(ranges)
353
+ };
338
354
  }
339
355
  return self.builder.special_token(s);
340
356
  }
@@ -166,6 +166,7 @@ impl Matcher {
166
166
  self.with_inner(|inner| {
167
167
  for (idx, &t) in tokens.iter().enumerate() {
168
168
  if !inner.parser.validate_token(t)? {
169
+ let _ = inner.parser.check_stop()?;
169
170
  return Ok(idx);
170
171
  }
171
172
  let bt = inner.parser.consume_token(t)?;
@@ -139,6 +139,10 @@ impl TokenParser {
139
139
  self.stop_reason
140
140
  }
141
141
 
142
+ pub fn stopped(&self) -> bool {
143
+ self.stop_reason != StopReason::NotStopped
144
+ }
145
+
142
146
  pub fn is_fresh(&self) -> bool {
143
147
  self.is_fresh
144
148
  }
@@ -334,7 +338,7 @@ impl TokenParser {
334
338
  fn check_initialized(&self, lbl: &str) -> Result<()> {
335
339
  ensure!(!self.is_fresh, "process_prompt() not called in {}", lbl);
336
340
  ensure!(
337
- self.stop_reason == StopReason::NotStopped,
341
+ !self.stopped(),
338
342
  "parser stopped in {}; {}",
339
343
  lbl,
340
344
  self.error_message()
@@ -344,6 +348,9 @@ impl TokenParser {
344
348
  }
345
349
 
346
350
  pub fn validate_token(&mut self, token: TokenId) -> Result<bool> {
351
+ if self.stopped() {
352
+ return Ok(false);
353
+ }
347
354
  self.check_initialized("validate_token")?;
348
355
  self.validate_tokens_raw(&[token]).map(|n| n > 0)
349
356
  }
@@ -407,6 +414,9 @@ impl TokenParser {
407
414
  /// It does not tokenize forced bytes, so will accept non-canonical tokenizations.
408
415
  /// If called with more than one token, it may ignore max_tokens constraints.
409
416
  pub fn validate_tokens_raw(&mut self, tokens: &[TokenId]) -> Result<usize> {
417
+ if self.stopped() {
418
+ return Ok(0);
419
+ }
410
420
  self.check_initialized("validate_tokens_raw")?;
411
421
 
412
422
  if tokens.is_empty() {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llguidance"
3
- version = "1.0.1"
3
+ version = "1.1.1"
4
4
  description = "Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance"
5
5
  requires-python = ">=3.9"
6
6
  license = "MIT"
@@ -1,8 +1,10 @@
1
+ from copy import copy
1
2
  from typing import List, Optional
2
- from ._lib import LLTokenizer
3
3
 
4
4
  import transformers
5
5
 
6
+ from ._lib import LLTokenizer
7
+
6
8
 
7
9
  def from_tokenizer(
8
10
  hf_tokenizer: transformers.PreTrainedTokenizerFast,
@@ -28,15 +30,18 @@ def from_tokenizer(
28
30
  # this will JSON-serialize the Rust impl of the tokenizer,
29
31
  # including added tokens from tokenizer_config.json
30
32
  # (which may be missing from tokenizer.json)
31
- s = hf_tokenizer.backend_tokenizer.to_str() # type: ignore
33
+ backend_tokenizer = copy(
34
+ hf_tokenizer.backend_tokenizer # type: ignore[attr-defined]
35
+ )
36
+ # disable padding and truncation on copy before converting to string
37
+ backend_tokenizer.no_padding()
38
+ backend_tokenizer.no_truncation()
39
+ s = backend_tokenizer.to_str()
32
40
  # This is probably not needed - it should figure it out by itself
33
41
  # if n_vocab is None:
34
42
  # n_vocab = hf_tokenizer.backend_tokenizer.get_vocab_size(with_added_tokens=True)
35
43
  if eos_token is None:
36
- eos_token = hf_tokenizer.eos_token_id # type: ignore
37
- return LLTokenizer(s,
38
- n_vocab=n_vocab,
39
- eos_token=eos_token,
40
- slices=slices)
44
+ eos_token = hf_tokenizer.eos_token_id # type: ignore
45
+ return LLTokenizer(s, n_vocab=n_vocab, eos_token=eos_token, slices=slices)
41
46
  else:
42
47
  raise ValueError("Only fast tokenizers are supported")
@@ -44,8 +44,14 @@ def lltokenizer_from_vocab(
44
44
  assert n <= buffer_len
45
45
  tok = bytes(buffer[:n]) # type: ignore
46
46
  attr = llama_cpp.llama_token_get_attr(vocab, token)
47
- if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL:
48
- tok = b"\xFF" + tok
47
+ # If the token is a control token or a user-defined token that looks like a control token,
48
+ # we prefix it with 0xff to indicate that it should be treated as a special token.
49
+ if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL or (
50
+ attr & llama_cpp.LLAMA_TOKEN_ATTR_USER_DEFINED
51
+ and tok.startswith(b"<")
52
+ and tok.endswith(b">")
53
+ ):
54
+ tok = b"\xff" + tok
49
55
  tokens.append(tok)
50
56
 
51
57
  if n_vocab is not None:
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance_py"
3
- version = "1.0.1"
3
+ version = "1.1.1"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -14,6 +14,9 @@ serde_json = { version = "1.0.138", features = ["preserve_order"] }
14
14
  serde = { version = "1.0.217", features = ["derive"] }
15
15
  clap = { version = "4.5.31", features = ["derive"] }
16
16
 
17
+ [dev-dependencies]
18
+ rstest = "0.25.0"
19
+
17
20
  [[bin]]
18
21
  name = "sample_parser"
19
22
  path = "src/sample_parser.rs"