llguidance 1.1.2__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. {llguidance-1.1.2 → llguidance-1.2.0}/CHANGELOG.md +7 -0
  2. {llguidance-1.1.2 → llguidance-1.2.0}/Cargo.lock +6 -6
  3. {llguidance-1.1.2 → llguidance-1.2.0}/PKG-INFO +1 -1
  4. {llguidance-1.1.2 → llguidance-1.2.0}/parser/Cargo.toml +1 -1
  5. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/lexer.rs +1 -1
  6. {llguidance-1.1.2 → llguidance-1.2.0}/pyproject.toml +1 -1
  7. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/_lib.pyi +34 -3
  8. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/numpy.py +14 -0
  9. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/torch.py +11 -0
  10. {llguidance-1.1.2 → llguidance-1.2.0}/python/torch_tests/test_matcher.py +149 -2
  11. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/Cargo.toml +1 -1
  12. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/llmatcher.rs +109 -0
  13. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/common_lark_utils/mod.rs +7 -0
  14. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_json_primitives.rs +13 -7
  15. llguidance-1.2.0/sample_parser/tests/test_json_schema_combinations.rs +372 -0
  16. llguidance-1.2.0/sample_parser/tests/test_json_string_format.rs +141 -0
  17. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_lark.rs +16 -0
  18. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/Cargo.toml +1 -1
  19. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_hf_downloader/Cargo.toml +1 -1
  20. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_hf_tokenizers/Cargo.toml +1 -1
  21. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_tiktoken/Cargo.toml +1 -1
  22. llguidance-1.1.2/sample_parser/tests/test_json_schema_combinations.rs +0 -154
  23. {llguidance-1.1.2 → llguidance-1.2.0}/.github/workflows/code-coverage.yml +0 -0
  24. {llguidance-1.1.2 → llguidance-1.2.0}/.github/workflows/rust.yml +0 -0
  25. {llguidance-1.1.2 → llguidance-1.2.0}/.github/workflows/wheels.yml +0 -0
  26. {llguidance-1.1.2 → llguidance-1.2.0}/.gitignore +0 -0
  27. {llguidance-1.1.2 → llguidance-1.2.0}/CODE_OF_CONDUCT.md +0 -0
  28. {llguidance-1.1.2 → llguidance-1.2.0}/Cargo.toml +0 -0
  29. {llguidance-1.1.2 → llguidance-1.2.0}/LICENSE +0 -0
  30. {llguidance-1.1.2 → llguidance-1.2.0}/README.md +0 -0
  31. {llguidance-1.1.2 → llguidance-1.2.0}/SECURITY.md +0 -0
  32. {llguidance-1.1.2 → llguidance-1.2.0}/SUPPORT.md +0 -0
  33. {llguidance-1.1.2 → llguidance-1.2.0}/c_sample/Makefile +0 -0
  34. {llguidance-1.1.2 → llguidance-1.2.0}/c_sample/README.md +0 -0
  35. {llguidance-1.1.2 → llguidance-1.2.0}/c_sample/c_sample.cpp +0 -0
  36. {llguidance-1.1.2 → llguidance-1.2.0}/docs/de_recursing.md +0 -0
  37. {llguidance-1.1.2 → llguidance-1.2.0}/docs/fast_forward.md +0 -0
  38. {llguidance-1.1.2 → llguidance-1.2.0}/docs/json_schema.md +0 -0
  39. {llguidance-1.1.2 → llguidance-1.2.0}/docs/mask_plot.png +0 -0
  40. {llguidance-1.1.2 → llguidance-1.2.0}/docs/optimizations.md +0 -0
  41. {llguidance-1.1.2 → llguidance-1.2.0}/docs/parametric.md +0 -0
  42. {llguidance-1.1.2 → llguidance-1.2.0}/docs/special_tokens.md +0 -0
  43. {llguidance-1.1.2 → llguidance-1.2.0}/docs/syntax.md +0 -0
  44. {llguidance-1.1.2 → llguidance-1.2.0}/docs/toktrie.md +0 -0
  45. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/Cargo.toml +0 -0
  46. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/expected_maskbench.json +0 -0
  47. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/jstats.sh +0 -0
  48. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/scripts/split-stats.sh +0 -0
  49. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/scripts/split_plot.py +0 -0
  50. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/src/json_stats.rs +0 -0
  51. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/src/lib.rs +0 -0
  52. {llguidance-1.1.2 → llguidance-1.2.0}/json_stats/src/stats.rs +0 -0
  53. {llguidance-1.1.2 → llguidance-1.2.0}/parser/LICENSE +0 -0
  54. {llguidance-1.1.2 → llguidance-1.2.0}/parser/README.md +0 -0
  55. {llguidance-1.1.2 → llguidance-1.2.0}/parser/build.rs +0 -0
  56. {llguidance-1.1.2 → llguidance-1.2.0}/parser/cbindgen.toml +0 -0
  57. {llguidance-1.1.2 → llguidance-1.2.0}/parser/grammars/character.json +0 -0
  58. {llguidance-1.1.2 → llguidance-1.2.0}/parser/grammars/json.json +0 -0
  59. {llguidance-1.1.2 → llguidance-1.2.0}/parser/llguidance.h +0 -0
  60. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/api.rs +0 -0
  61. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/constraint.rs +0 -0
  62. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/from_guidance.rs +0 -0
  63. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/grammar.rs +0 -0
  64. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/lexer.rs +0 -0
  65. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/lexerspec.rs +0 -0
  66. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/mod.rs +0 -0
  67. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/parser.rs +0 -0
  68. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/perf.rs +0 -0
  69. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/regexvec.rs +0 -0
  70. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/earley/slicer.rs +0 -0
  71. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/factory.rs +0 -0
  72. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/ffi.rs +0 -0
  73. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/ffi_par.rs +0 -0
  74. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/grammar_builder.rs +0 -0
  75. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/hashcons.rs +0 -0
  76. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/README.md +0 -0
  77. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/compiler.rs +0 -0
  78. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/context_ref.rs +0 -0
  79. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/context_simple/context.rs +0 -0
  80. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/context_simple/draft.rs +0 -0
  81. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/context_simple/mod.rs +0 -0
  82. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/formats.rs +0 -0
  83. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/mod.rs +0 -0
  84. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/numeric.rs +0 -0
  85. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/schema.rs +0 -0
  86. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json/shared_context.rs +0 -0
  87. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/json_validation.rs +0 -0
  88. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/README.md +0 -0
  89. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/ast.rs +0 -0
  90. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/common.rs +0 -0
  91. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/compiler.rs +0 -0
  92. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/mod.rs +0 -0
  93. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lark/parser.rs +0 -0
  94. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/lib.rs +0 -0
  95. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/logging.rs +0 -0
  96. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/matcher.rs +0 -0
  97. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/output.rs +0 -0
  98. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/panic_utils.rs +0 -0
  99. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/regex_rewrite.rs +0 -0
  100. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/stop_controller.rs +0 -0
  101. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/substring.rs +0 -0
  102. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/tokenizer_json.rs +0 -0
  103. {llguidance-1.1.2 → llguidance-1.2.0}/parser/src/tokenparser.rs +0 -0
  104. {llguidance-1.1.2 → llguidance-1.2.0}/plan.md +0 -0
  105. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/__init__.py +0 -0
  106. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/_grammar_from.py +0 -0
  107. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/_struct_tag.py +0 -0
  108. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/_tokenizer.py +0 -0
  109. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/_util.py +0 -0
  110. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/cli.py +0 -0
  111. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/gbnf_to_lark.py +0 -0
  112. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/hf.py +0 -0
  113. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/llamacpp.py +0 -0
  114. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/mlx.py +0 -0
  115. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/py.typed +0 -0
  116. {llguidance-1.1.2 → llguidance-1.2.0}/python/llguidance/tiktoken.py +0 -0
  117. {llguidance-1.1.2 → llguidance-1.2.0}/python/mypy.ini +0 -0
  118. {llguidance-1.1.2 → llguidance-1.2.0}/python/torch_tests/__init__.py +0 -0
  119. {llguidance-1.1.2 → llguidance-1.2.0}/python/torch_tests/test_bitmask.py +0 -0
  120. {llguidance-1.1.2 → llguidance-1.2.0}/python/torch_tests/test_hf.py +0 -0
  121. {llguidance-1.1.2 → llguidance-1.2.0}/python/torch_tests/test_llamacpp.py +0 -0
  122. {llguidance-1.1.2 → llguidance-1.2.0}/python/torch_tests/test_tiktoken.py +0 -0
  123. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/lib.rs +0 -0
  124. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/llamatokenizer.rs +0 -0
  125. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/llinterpreter.rs +0 -0
  126. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/parserlimits.rs +0 -0
  127. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/py.rs +0 -0
  128. {llguidance-1.1.2 → llguidance-1.2.0}/python_ext/src/pyjson.rs +0 -0
  129. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/Cargo.toml +0 -0
  130. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/README.md +0 -0
  131. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/cli.sh +0 -0
  132. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/blog.sample.json +0 -0
  133. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/blog.schema.json +0 -0
  134. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/blog.schema.ll.json +0 -0
  135. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/README.md +0 -0
  136. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
  137. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
  138. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
  139. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
  140. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
  141. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
  142. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
  143. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
  144. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
  145. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/lark.lark +0 -0
  146. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/rfc.lark +0 -0
  147. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/rfc.xml +0 -0
  148. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/data/ulysses.md +0 -0
  149. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/gtest.sh +0 -0
  150. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/lark.sh +0 -0
  151. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/run.sh +0 -0
  152. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/src/lib.rs +0 -0
  153. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/src/minimal.rs +0 -0
  154. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/src/sample_parser.rs +0 -0
  155. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_json_arrays.rs +0 -0
  156. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_json_enum_const.rs +0 -0
  157. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_json_objects.rs +0 -0
  158. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_ll.rs +0 -0
  159. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_raw_parser.rs +0 -0
  160. {llguidance-1.1.2 → llguidance-1.2.0}/sample_parser/tests/test_stop.rs +0 -0
  161. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/annotate_asm.js +0 -0
  162. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/bump.py +0 -0
  163. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/cbindgen.sh +0 -0
  164. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/checklinks.py +0 -0
  165. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/checklinks.sh +0 -0
  166. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/ci-publish.py +0 -0
  167. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/disasm.sh +0 -0
  168. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/gbnf_to_lark.py +0 -0
  169. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/gen-testcase.py +0 -0
  170. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/git-version.sh +0 -0
  171. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/install-deps.sh +0 -0
  172. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/jsonschema-stats.js +0 -0
  173. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/remote-guidance-test.sh +0 -0
  174. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/rust-size.js +0 -0
  175. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/rust_size.py +0 -0
  176. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/test-guidance.sh +0 -0
  177. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/tokenizer_test.py +0 -0
  178. {llguidance-1.1.2 → llguidance-1.2.0}/scripts/update-git.py +0 -0
  179. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/LICENSE +0 -0
  180. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/README.md +0 -0
  181. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/bytes.rs +0 -0
  182. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/lib.rs +0 -0
  183. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/recognizer.rs +0 -0
  184. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/rng.rs +0 -0
  185. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/svob.rs +0 -0
  186. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/tokenv.rs +0 -0
  187. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/src/toktree.rs +0 -0
  188. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie/tests/test_svob.rs +0 -0
  189. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_hf_downloader/LICENSE +0 -0
  190. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_hf_downloader/src/lib.rs +0 -0
  191. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_hf_tokenizers/LICENSE +0 -0
  192. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_hf_tokenizers/src/lib.rs +0 -0
  193. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_tiktoken/LICENSE +0 -0
  194. {llguidance-1.1.2 → llguidance-1.2.0}/toktrie_tiktoken/src/lib.rs +0 -0
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. Dates are d
4
4
 
5
5
  If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
6
6
 
7
+ #### [v1.1.3](https://github.com/guidance-ai/llguidance/compare/v1.1.2...v1.1.3) 2025-08-12
8
+
9
+ - support multithreaded compute bitmask for speculative decoding [`#225`](https://github.com/guidance-ai/llguidance/pull/225)
10
+ - thank you [@ZonePG](https://github.com/ZonePG)!
11
+ - `force_lexeme_end` -> `try_lexeme_end` in lark lexer when out of input [`#229`](https://github.com/guidance-ai/llguidance/pull/229); fixes [`#228`](https://github.com/guidance-ai/llguidance/issues/228)
12
+ - more JSON test coverage
13
+
7
14
  #### [v1.1.2](https://github.com/guidance-ai/llguidance/compare/v1.1.1...v1.1.2) 2025-08-08
8
15
 
9
16
  - add flag in ParserLimits to disable verbose errors [`#227`](https://github.com/guidance-ai/llguidance/pull/227)
@@ -1241,7 +1241,7 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
1241
1241
 
1242
1242
  [[package]]
1243
1243
  name = "llguidance"
1244
- version = "1.1.2"
1244
+ version = "1.2.0"
1245
1245
  dependencies = [
1246
1246
  "anyhow",
1247
1247
  "derivre",
@@ -1260,7 +1260,7 @@ dependencies = [
1260
1260
 
1261
1261
  [[package]]
1262
1262
  name = "llguidance_py"
1263
- version = "1.1.2"
1263
+ version = "1.2.0"
1264
1264
  dependencies = [
1265
1265
  "anyhow",
1266
1266
  "bytemuck",
@@ -2478,7 +2478,7 @@ dependencies = [
2478
2478
 
2479
2479
  [[package]]
2480
2480
  name = "toktrie"
2481
- version = "1.1.2"
2481
+ version = "1.2.0"
2482
2482
  dependencies = [
2483
2483
  "anyhow",
2484
2484
  "bytemuck",
@@ -2489,7 +2489,7 @@ dependencies = [
2489
2489
 
2490
2490
  [[package]]
2491
2491
  name = "toktrie_hf_downloader"
2492
- version = "1.1.2"
2492
+ version = "1.2.0"
2493
2493
  dependencies = [
2494
2494
  "anyhow",
2495
2495
  "hf-hub",
@@ -2500,7 +2500,7 @@ dependencies = [
2500
2500
 
2501
2501
  [[package]]
2502
2502
  name = "toktrie_hf_tokenizers"
2503
- version = "1.1.2"
2503
+ version = "1.2.0"
2504
2504
  dependencies = [
2505
2505
  "anyhow",
2506
2506
  "log",
@@ -2512,7 +2512,7 @@ dependencies = [
2512
2512
 
2513
2513
  [[package]]
2514
2514
  name = "toktrie_tiktoken"
2515
- version = "1.1.2"
2515
+ version = "1.2.0"
2516
2516
  dependencies = [
2517
2517
  "anyhow",
2518
2518
  "log",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 1.1.2
3
+ Version: 1.2.0
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance"
3
- version = "1.1.2"
3
+ version = "1.2.0"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -279,7 +279,7 @@ pub fn lex_lark(input: &str) -> Result<Vec<Lexeme>> {
279
279
  while idx <= input_bytes.len() {
280
280
  let mut b = b'\n';
281
281
  let res = if idx == input_bytes.len() {
282
- lexer.force_lexeme_end(state)
282
+ lexer.try_lexeme_end(state)
283
283
  } else {
284
284
  b = input_bytes[idx];
285
285
  lexer.advance(state, b, false)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llguidance"
3
- version = "1.1.2"
3
+ version = "1.2.0"
4
4
  description = "Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance"
5
5
  requires-python = ">=3.9"
6
6
  license = "MIT"
@@ -526,18 +526,49 @@ class LLExecutor:
526
526
  self,
527
527
  interpreters: List[Tuple[LLMatcher, int]],
528
528
  trg_pointer: int,
529
- one_mask_byte_size: int,
529
+ one_mask_bytes: int,
530
530
  trg_batch_size: int,
531
531
  ) -> None:
532
532
  """
533
533
  Compute the token mask directly into memory at the specified pointer.
534
534
  For each matcher, provide the index of the target mask.
535
- If index is K, the memory will be written at trg_pointer + K * one_mask_byte_size,
535
+ If index is K, the memory will be written at trg_pointer + K * one_mask_bytes,
536
536
  where K < trg_batch_size.
537
- Memory has to have size trg_batch_size * one_mask_byte_size.
537
+ Memory has to have size trg_batch_size * one_mask_bytes.
538
538
  Prefer to use fill_next_token_bitmask_par(), which wraps this.
539
539
  """
540
540
 
541
+ def unsafe_compute_mask_ptr_with_draft_token(
542
+ self,
543
+ interpreters: List[Tuple[LLMatcher, int, List[int]]],
544
+ trg_pointer: int,
545
+ one_mask_bytes: int,
546
+ trg_batch_size: int,
547
+ ) -> None:
548
+ """
549
+ Compute the token mask directly into memory at the specified pointer, including draft tokens.
550
+
551
+ This function extends unsafe_compute_mask_ptr() to handle draft tokens in speculative decoding.
552
+ For each matcher in the batch, it computes masks for both the current position and all draft tokens.
553
+
554
+ Args:
555
+ interpreters: List of tuples containing:
556
+ - LLMatcher: The matcher object for constrained generation
557
+ - int: Index K indicating the target mask position (K < trg_batch_size)
558
+ - List[int]: Draft tokens to be processed for speculative decoding
559
+ trg_pointer: Memory address where mask data will be written
560
+ one_mask_bytes: Size in bytes of a single token mask
561
+ trg_batch_size: Total batch size for memory allocation validation
562
+
563
+ Memory Layout:
564
+ - Main mask written at: trg_pointer + K * one_mask_bytes
565
+ - Draft token i mask written at: trg_pointer + (K + i + 1) * one_mask_bytes
566
+ - Total memory required: trg_batch_size * one_mask_bytes
567
+
568
+ The function processes each matcher's draft tokens sequentially, advancing the matcher state
569
+ for each valid token until encountering an invalid token or termination condition.
570
+ State rollback is performed to maintain matcher consistency.
571
+ """
541
572
 
542
573
  class JsonCompileOptions(TypedDict, total=False):
543
574
  # defaults to ","
@@ -66,3 +66,17 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
66
66
  batch, vocab = bitmask.shape
67
67
  assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
68
68
  executor.unsafe_compute_mask_ptr(matchers, bitmask.ctypes.data, vocab * 4, batch)
69
+
70
+
71
+ def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
72
+ matchers: List[Tuple[LLMatcher, int, List[int]]],
73
+ bitmask: NDArray[np.int32]) -> None:
74
+ """
75
+ Compute the token mask directly into the specified array.
76
+ For each matcher, provide the index of the target mask.
77
+ """
78
+ assert bitmask.dtype == np.int32, "Mask must be int32"
79
+ assert bitmask.ndim == 2, "Mask must be 2D"
80
+ batch, vocab = bitmask.shape
81
+ assert bitmask.flags["C_CONTIGUOUS"], "Mask must be contiguous"
82
+ executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.ctypes.data, vocab * 4, batch)
@@ -66,3 +66,14 @@ def fill_next_token_bitmask_par(executor: LLExecutor,
66
66
  assert bitmask.is_contiguous(), "Mask must be contiguous"
67
67
  executor.unsafe_compute_mask_ptr(matchers, bitmask.data_ptr(), vocab * 4,
68
68
  batch)
69
+
70
+
71
+ def fill_next_token_bitmask_par_with_draft_tokens(executor: LLExecutor,
72
+ matchers: List[Tuple[LLMatcher, int, List[int]]],
73
+ bitmask: torch.Tensor) -> None:
74
+ assert bitmask.dtype == torch.int32, "Mask must be int32"
75
+ assert bitmask.is_cpu, "Mask must be on CPU"
76
+ assert bitmask.dim() == 2, "Mask must be 2D"
77
+ batch, vocab = bitmask.shape
78
+ assert bitmask.is_contiguous(), "Mask must be contiguous"
79
+ executor.unsafe_compute_mask_ptr_with_draft_token(matchers, bitmask.data_ptr(), vocab * 4, batch)
@@ -1,6 +1,11 @@
1
1
  from typing import Any, Dict, List, Tuple, Union
2
2
  import llguidance
3
- from llguidance.numpy import fill_next_token_bitmask_par, allocate_token_bitmask
3
+ from llguidance.numpy import (
4
+ fill_next_token_bitmask_par,
5
+ fill_next_token_bitmask_par_with_draft_tokens,
6
+ allocate_token_bitmask,
7
+ )
8
+
4
9
  from llguidance import LLMatcher, LLTokenizer, StructTag, LLParserLimits
5
10
  import pytest
6
11
  from numpy.typing import NDArray
@@ -156,7 +161,8 @@ def test_slices() -> None:
156
161
 
157
162
  def mask_has(mask: NDArray[np.int32], t: int) -> bool:
158
163
  v: int = mask[t // 32]
159
- return v & (1 << (t % 32)) != 0
164
+ # use np.int32 to avoid int32 overflow errors
165
+ return bool(v & (np.int32(1) << (t % 32)) != 0)
160
166
 
161
167
 
162
168
  def test_par_errors() -> None:
@@ -207,6 +213,147 @@ def test_par_errors() -> None:
207
213
  assert mask_has(mask[2, :], t_1)
208
214
 
209
215
 
216
+ def retrieve_tokens_from_bitmask(
217
+ bitmask: NDArray[np.int32], vocab_size: int
218
+ ) -> Tuple[List[List[int]], List[List[int]]]:
219
+ batch_accepted_tokens: List[List[int]] = []
220
+ batch_rejected_tokens: List[List[int]] = []
221
+ for batch_idx in range(bitmask.shape[0]):
222
+ batch_accepted_tokens.append([])
223
+ batch_rejected_tokens.append([])
224
+ for token_id in range(vocab_size):
225
+ print(bitmask.shape)
226
+ if mask_has(bitmask[batch_idx], token_id):
227
+ batch_accepted_tokens[-1].append(token_id)
228
+ else:
229
+ batch_rejected_tokens[-1].append(token_id)
230
+ return batch_accepted_tokens, batch_rejected_tokens
231
+
232
+
233
+ def test_par_draft_tokens() -> None:
234
+ t = tokenizer()
235
+ exec = llguidance.LLExecutor()
236
+ g0 = matcher(r"start: /[a-zA-Z]/ /[0-9]*/")
237
+ g1 = matcher(r"start: /[0-9]/ /[a-zA-Z]*/")
238
+ g2 = matcher(r"start: <[*]>*")
239
+ g3 = matcher(r"start: /[a-zA-Z]/ /[0-9]*/")
240
+
241
+ # should be OK
242
+ g0_draft_tokens = t.tokenize_str("a1")
243
+ g1_draft_tokens = t.tokenize_str("2b")
244
+ g2_draft_tokens = t.tokenize_str("cc")
245
+ # g3 index 1 draft is reject
246
+ g3_draft_tokens = t.tokenize_str("aa")
247
+ mask = allocate_token_bitmask(
248
+ len(g0_draft_tokens)
249
+ + 1
250
+ + len(g1_draft_tokens)
251
+ + 1
252
+ + len(g2_draft_tokens)
253
+ + 1
254
+ + len(g3_draft_tokens)
255
+ + 1,
256
+ t.vocab_size,
257
+ )
258
+ fill_next_token_bitmask_par_with_draft_tokens(
259
+ exec,
260
+ [
261
+ (g0, 0, g0_draft_tokens),
262
+ (g1, 3, g1_draft_tokens),
263
+ (g2, 6, g2_draft_tokens),
264
+ (g3, 9, g3_draft_tokens),
265
+ ],
266
+ mask,
267
+ )
268
+
269
+ batch_accepted_tokens, batch_rejected_tokens = retrieve_tokens_from_bitmask(
270
+ mask, t.vocab_size
271
+ )
272
+ for batch_idx in range(len(batch_accepted_tokens)):
273
+ assert (
274
+ len(batch_accepted_tokens[batch_idx])
275
+ + len(batch_rejected_tokens[batch_idx])
276
+ == t.vocab_size
277
+ )
278
+
279
+ # for g0, first token should be Letters
280
+ # other tokens should be Numbers
281
+ mask_start_idx = 0
282
+ for idx, mask_idx in enumerate(
283
+ range(mask_start_idx, mask_start_idx + len(g0_draft_tokens) + 1)
284
+ ):
285
+ g0_accepted_tokens = batch_accepted_tokens[mask_idx]
286
+ for token_id in range(t.vocab_size):
287
+ if token_id in g0_accepted_tokens:
288
+ if idx == 0:
289
+ assert t.decode_str([token_id]).isalpha()
290
+ else:
291
+ assert token_id == t.eos_token or t.decode_str([token_id]).isdigit()
292
+ else:
293
+ assert not g0.try_consume_tokens([token_id])
294
+ if idx < len(g0_draft_tokens):
295
+ assert g0.consume_token(g0_draft_tokens[idx])
296
+
297
+ # for g1, first token should be Numbers
298
+ # other tokens should be Letters
299
+ mask_start_idx += len(g0_draft_tokens) + 1
300
+ for idx, mask_idx in enumerate(
301
+ range(mask_start_idx, mask_start_idx + len(g1_draft_tokens) + 1)
302
+ ):
303
+ g1_accepted_tokens = batch_accepted_tokens[mask_idx]
304
+ for token_id in range(t.vocab_size):
305
+ if token_id in g1_accepted_tokens:
306
+ if idx == 0:
307
+ assert t.decode_str([token_id]).isdigit()
308
+ else:
309
+ assert token_id == t.eos_token or t.decode_str([token_id]).isalpha()
310
+ else:
311
+ assert not g1.try_consume_tokens([token_id])
312
+ if idx < len(g1_draft_tokens):
313
+ assert g1.consume_token(g1_draft_tokens[idx])
314
+
315
+ # for g2, all tokens should be accept
316
+ mask_start_idx += len(g1_draft_tokens) + 1
317
+ for idx, mask_idx in enumerate(
318
+ range(mask_start_idx, mask_start_idx + len(g2_draft_tokens) + 1)
319
+ ):
320
+ g2_rejected_tokens = batch_rejected_tokens[mask_idx]
321
+ g2_accepted_tokens = batch_accepted_tokens[mask_idx]
322
+ assert len(g2_rejected_tokens) == 0
323
+ assert len(g2_accepted_tokens) == t.vocab_size
324
+ for token_id in range(t.vocab_size):
325
+ if token_id in g2_accepted_tokens:
326
+ assert mask_has(mask[mask_idx, :], token_id)
327
+ else:
328
+ assert not mask_has(mask[mask_idx, :], token_id)
329
+ if idx < len(g2_draft_tokens):
330
+ assert g2.consume_token(g2_draft_tokens[idx])
331
+
332
+ # for g3
333
+ # g3_draft_tokens[0] is accept
334
+ # g3_draft_tokens[1] is reject
335
+ mask_start_idx += len(g2_draft_tokens) + 1
336
+ for idx, mask_idx in enumerate(
337
+ range(mask_start_idx, mask_start_idx + len(g3_draft_tokens) + 1)
338
+ ):
339
+ g3_rejected_tokens = batch_rejected_tokens[mask_idx]
340
+ g3_accepted_tokens = batch_accepted_tokens[mask_idx]
341
+ if idx <= 1:
342
+ for token_id in range(t.vocab_size):
343
+ if token_id in g3_accepted_tokens:
344
+ assert mask_has(mask[mask_idx, :], token_id)
345
+ else:
346
+ assert not mask_has(mask[mask_idx, :], token_id)
347
+ if idx == 0:
348
+ assert g3.consume_token(g3_draft_tokens[idx])
349
+ else:
350
+ assert not g3.consume_token(g3_draft_tokens[idx])
351
+ else:
352
+ # the bitmask of all tokens has a bit value of 1.
353
+ assert len(g3_rejected_tokens) == 0
354
+ assert len(g3_accepted_tokens) == t.vocab_size
355
+
356
+
210
357
  def consume_tokens(m: LLMatcher, tokens: List[int]) -> None:
211
358
  print("Consume", tokenizer().dbg_tokens(tokens))
212
359
  assert m.stop_reason() == "NotStopped"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance_py"
3
- version = "1.1.2"
3
+ version = "1.2.0"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -100,6 +100,73 @@ impl LLExecutor {
100
100
 
101
101
  Ok(())
102
102
  }
103
+
104
+ fn unsafe_compute_mask_ptr_with_draft_token(
105
+ &self,
106
+ interpreters: Bound<'_, PyList>,
107
+ trg_ptr: usize,
108
+ one_mask_bytes: usize,
109
+ trg_batch_size: usize,
110
+ py: Python<'_>,
111
+ ) -> PyResult<()> {
112
+ if interpreters.len() == 0 {
113
+ return Err(PyValueError::new_err("No interpreters"));
114
+ }
115
+
116
+ let mut mut_refs = vec![];
117
+ for ent in interpreters.iter() {
118
+ let tupl = ent.downcast::<PyTuple>()?;
119
+ if tupl.len() != 3 {
120
+ return Err(PyValueError::new_err(
121
+ "Expecting (LLMatcher, int, List[int]) tuple",
122
+ ));
123
+ }
124
+ let interp = tupl.get_item(0)?.extract::<PyRefMut<LLMatcher>>()?;
125
+ let idx = tupl.get_item(1)?.extract::<usize>()?;
126
+ if idx >= trg_batch_size {
127
+ return Err(PyValueError::new_err("Target index out of bounds"));
128
+ }
129
+ let draft_tokens = tupl.get_item(2)?.extract::<Vec<TokenId>>()?;
130
+ if draft_tokens.is_empty() {
131
+ return Err(PyValueError::new_err("Draft tokens must not be empty"));
132
+ }
133
+ interp.validate_mask_ptr(trg_ptr, one_mask_bytes)?;
134
+ mut_refs.push((interp, idx, draft_tokens));
135
+ }
136
+
137
+ if mut_refs.len() == 1 {
138
+ let (mut interp, idx, draft_tokens) = mut_refs.pop().unwrap();
139
+ return interp.unsafe_compute_mask_ptr_with_draft_token(
140
+ trg_ptr + idx * one_mask_bytes,
141
+ one_mask_bytes,
142
+ draft_tokens,
143
+ py,
144
+ );
145
+ }
146
+
147
+ let mut_refs2: Vec<_> = mut_refs
148
+ .iter_mut()
149
+ .map(|(x, idx, draft_tokens)| (x.deref_mut(), *idx, draft_tokens.clone()))
150
+ .collect();
151
+
152
+ use rayon::prelude::*;
153
+
154
+ py.allow_threads(|| {
155
+ self.pool.install(|| {
156
+ mut_refs2
157
+ .into_par_iter()
158
+ .for_each(|(interp, idx, draft_tokens)| {
159
+ interp.unsafe_compute_mask_ptr_inner_with_draft_tokens(
160
+ trg_ptr + idx * one_mask_bytes,
161
+ one_mask_bytes,
162
+ draft_tokens,
163
+ );
164
+ })
165
+ })
166
+ });
167
+
168
+ Ok(())
169
+ }
103
170
  }
104
171
 
105
172
  impl LLMatcher {
@@ -125,6 +192,34 @@ impl LLMatcher {
125
192
  trg_slice.copy_from_slice(&src[0..trg_slice.len()]);
126
193
  }
127
194
 
195
+ fn unsafe_compute_mask_ptr_inner_with_draft_tokens(
196
+ &mut self,
197
+ trg_ptr: usize,
198
+ trg_bytes: usize,
199
+ draft_tokens: Vec<TokenId>,
200
+ ) {
201
+ let mut state_advancements = 0;
202
+ let spec_k = draft_tokens.len();
203
+ #[allow(clippy::needless_range_loop)]
204
+ for token_idx in 0..=spec_k {
205
+ self.unsafe_compute_mask_ptr_inner(trg_ptr + token_idx * trg_bytes, trg_bytes);
206
+
207
+ if token_idx == spec_k || self.inner.is_stopped() {
208
+ break;
209
+ }
210
+
211
+ let token = draft_tokens[token_idx];
212
+
213
+ match self.inner.try_consume_tokens(&[token]) {
214
+ Ok(cosumed) if cosumed > 0 => state_advancements += 1,
215
+ _ => break,
216
+ }
217
+ }
218
+ if state_advancements > 0 {
219
+ self.rollback(state_advancements);
220
+ }
221
+ }
222
+
128
223
  fn eos_token_set(&self) -> SimpleVob {
129
224
  let trie = self.tok_env.tok_trie();
130
225
  trie.singleton_token_set(trie.eos_token())
@@ -337,6 +432,20 @@ impl LLMatcher {
337
432
  Ok(())
338
433
  }
339
434
 
435
+ fn unsafe_compute_mask_ptr_with_draft_token(
436
+ &mut self,
437
+ trg_ptr: usize,
438
+ trg_bytes: usize,
439
+ draft_tokens: Vec<TokenId>,
440
+ py: Python<'_>,
441
+ ) -> PyResult<()> {
442
+ self.validate_mask_ptr(trg_ptr, trg_bytes)?;
443
+ py.allow_threads(|| {
444
+ self.unsafe_compute_mask_ptr_inner_with_draft_tokens(trg_ptr, trg_bytes, draft_tokens)
445
+ });
446
+ Ok(())
447
+ }
448
+
340
449
  fn compute_logit_bias(&mut self, py: Python<'_>) -> Cow<[u8]> {
341
450
  py.allow_threads(|| {
342
451
  let m = self.compute_mask_or_eos();
@@ -13,6 +13,13 @@ use llguidance::{
13
13
  use sample_parser::*;
14
14
  use serde_json::Value;
15
15
 
16
+ #[allow(dead_code)]
17
+ #[derive(Debug)]
18
+ pub enum NumericBounds {
19
+ Inclusive,
20
+ Exclusive,
21
+ }
22
+
16
23
  pub fn make_parser(lark: &str, quiet: bool) -> Result<TokenParser> {
17
24
  let grm = TopLevelGrammar::from_lark(lark.to_string());
18
25
  let mut parser = get_parser_factory().create_parser_from_init(
@@ -2,13 +2,7 @@ use rstest::*;
2
2
  use serde_json::{json, Value};
3
3
 
4
4
  mod common_lark_utils;
5
- use common_lark_utils::{json_err_test, json_schema_check};
6
-
7
- #[derive(Debug)]
8
- enum NumericBounds {
9
- Inclusive,
10
- Exclusive,
11
- }
5
+ use common_lark_utils::{json_err_test, json_schema_check, NumericBounds};
12
6
 
13
7
  #[test]
14
8
  fn null_schema() {
@@ -219,6 +213,18 @@ fn integer_multipleof_zero(#[values(0, 3, 12, 1818)] test_value: i64) {
219
213
  }
220
214
  */
221
215
 
216
+ #[rstest]
217
+ fn integer_both_minima(#[values(2, 3, 4)] test_value: i64) {
218
+ let schema = &json!({"type":"integer", "minimum": 2, "exclusiveMinimum": 1});
219
+ json_schema_check(schema, &json!(test_value), true);
220
+ }
221
+
222
+ #[rstest]
223
+ fn integer_both_maxima(#[values(-1,0,1)] test_value: i64) {
224
+ let schema = &json!({"type":"integer", "maximum": 4, "exclusiveMaximum": 2});
225
+ json_schema_check(schema, &json!(test_value), true);
226
+ }
227
+
222
228
  // ============================================================================
223
229
 
224
230
  #[rstest]