llguidance 0.7.11__tar.gz → 0.7.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. {llguidance-0.7.11 → llguidance-0.7.12}/CHANGELOG.md +8 -3
  2. {llguidance-0.7.11 → llguidance-0.7.12}/Cargo.lock +7 -7
  3. {llguidance-0.7.11 → llguidance-0.7.12}/PKG-INFO +1 -1
  4. {llguidance-0.7.11 → llguidance-0.7.12}/docs/syntax.md +40 -3
  5. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/expected_maskbench.json +3 -7
  6. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/jstats.sh +5 -0
  7. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/json_stats.rs +31 -6
  8. {llguidance-0.7.11 → llguidance-0.7.12}/parser/Cargo.toml +2 -2
  9. {llguidance-0.7.11 → llguidance-0.7.12}/parser/llguidance.h +6 -0
  10. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/api.rs +1 -0
  11. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/from_guidance.rs +60 -61
  12. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/grammar.rs +86 -36
  13. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/lexerspec.rs +37 -10
  14. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/mod.rs +1 -2
  15. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/parser.rs +13 -34
  16. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/regexvec.rs +14 -2
  17. llguidance-0.7.12/parser/src/earley/slicer.rs +394 -0
  18. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/factory.rs +29 -38
  19. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/ffi.rs +124 -63
  20. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/grammar_builder.rs +21 -24
  21. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/compiler.rs +19 -1
  22. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/lexer.rs +1 -1
  23. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lib.rs +1 -0
  24. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/matcher.rs +20 -1
  25. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/tokenparser.rs +13 -37
  26. {llguidance-0.7.11 → llguidance-0.7.12}/pyproject.toml +1 -1
  27. {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/Cargo.toml +1 -1
  28. {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/llinterpreter.rs +10 -12
  29. {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/llmatcher.rs +10 -17
  30. {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/py.rs +1 -2
  31. llguidance-0.7.12/sample_parser/src/minimal.rs +83 -0
  32. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/src/sample_parser.rs +52 -110
  33. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_lark.rs +6 -3
  34. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_ll.rs +1 -2
  35. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/Cargo.toml +1 -1
  36. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/tokenv.rs +4 -0
  37. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/toktree.rs +13 -0
  38. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_downloader/Cargo.toml +1 -1
  39. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_tokenizers/Cargo.toml +1 -1
  40. llguidance-0.7.11/parser/src/earley/slicer.rs +0 -249
  41. llguidance-0.7.11/sample_parser/src/minimal.rs +0 -179
  42. {llguidance-0.7.11 → llguidance-0.7.12}/.github/workflows/rust.yml +0 -0
  43. {llguidance-0.7.11 → llguidance-0.7.12}/.github/workflows/wheels.yml +0 -0
  44. {llguidance-0.7.11 → llguidance-0.7.12}/.gitignore +0 -0
  45. {llguidance-0.7.11 → llguidance-0.7.12}/CODE_OF_CONDUCT.md +0 -0
  46. {llguidance-0.7.11 → llguidance-0.7.12}/Cargo.toml +0 -0
  47. {llguidance-0.7.11 → llguidance-0.7.12}/LICENSE +0 -0
  48. {llguidance-0.7.11 → llguidance-0.7.12}/README.md +0 -0
  49. {llguidance-0.7.11 → llguidance-0.7.12}/SECURITY.md +0 -0
  50. {llguidance-0.7.11 → llguidance-0.7.12}/SUPPORT.md +0 -0
  51. {llguidance-0.7.11 → llguidance-0.7.12}/c_sample/Makefile +0 -0
  52. {llguidance-0.7.11 → llguidance-0.7.12}/c_sample/README.md +0 -0
  53. {llguidance-0.7.11 → llguidance-0.7.12}/c_sample/c_sample.cpp +0 -0
  54. {llguidance-0.7.11 → llguidance-0.7.12}/docs/fast_forward.md +0 -0
  55. {llguidance-0.7.11 → llguidance-0.7.12}/docs/json_schema.md +0 -0
  56. {llguidance-0.7.11 → llguidance-0.7.12}/docs/mask_plot.png +0 -0
  57. {llguidance-0.7.11 → llguidance-0.7.12}/docs/optimizations.md +0 -0
  58. {llguidance-0.7.11 → llguidance-0.7.12}/docs/special_tokens.md +0 -0
  59. {llguidance-0.7.11 → llguidance-0.7.12}/docs/toktrie.md +0 -0
  60. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/Cargo.toml +0 -0
  61. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/scripts/split-stats.sh +0 -0
  62. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/scripts/split_plot.py +0 -0
  63. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/lib.rs +0 -0
  64. {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/stats.rs +0 -0
  65. {llguidance-0.7.11 → llguidance-0.7.12}/parser/LICENSE +0 -0
  66. {llguidance-0.7.11 → llguidance-0.7.12}/parser/README.md +0 -0
  67. {llguidance-0.7.11 → llguidance-0.7.12}/parser/build.rs +0 -0
  68. {llguidance-0.7.11 → llguidance-0.7.12}/parser/cbindgen.toml +0 -0
  69. {llguidance-0.7.11 → llguidance-0.7.12}/parser/grammars/character.json +0 -0
  70. {llguidance-0.7.11 → llguidance-0.7.12}/parser/grammars/json.json +0 -0
  71. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/constraint.rs +0 -0
  72. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/lexer.rs +0 -0
  73. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/perf.rs +0 -0
  74. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/ffi_par.rs +0 -0
  75. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/README.md +0 -0
  76. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_ref.rs +0 -0
  77. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_simple/context.rs +0 -0
  78. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_simple/draft.rs +0 -0
  79. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_simple/mod.rs +0 -0
  80. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/formats.rs +0 -0
  81. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/mod.rs +0 -0
  82. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/numeric.rs +0 -0
  83. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/schema.rs +0 -0
  84. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/shared_context.rs +0 -0
  85. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json_validation.rs +0 -0
  86. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/README.md +0 -0
  87. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/ast.rs +0 -0
  88. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/common.rs +0 -0
  89. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/compiler.rs +0 -0
  90. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/mod.rs +0 -0
  91. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/parser.rs +0 -0
  92. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/logging.rs +0 -0
  93. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/output.rs +0 -0
  94. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/panic_utils.rs +0 -0
  95. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/stop_controller.rs +0 -0
  96. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/substring.rs +0 -0
  97. {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/tokenizer_json.rs +0 -0
  98. {llguidance-0.7.11 → llguidance-0.7.12}/plan.md +0 -0
  99. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/__init__.py +0 -0
  100. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_grammar_from.py +0 -0
  101. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_lib.pyi +0 -0
  102. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_struct_tag.py +0 -0
  103. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_tokenizer.py +0 -0
  104. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_util.py +0 -0
  105. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/cli.py +0 -0
  106. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/gbnf_to_lark.py +0 -0
  107. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/hf.py +0 -0
  108. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/mlx.py +0 -0
  109. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/numpy.py +0 -0
  110. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/py.typed +0 -0
  111. {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/torch.py +0 -0
  112. {llguidance-0.7.11 → llguidance-0.7.12}/python/mypy.ini +0 -0
  113. {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/__init__.py +0 -0
  114. {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/test_bitmask.py +0 -0
  115. {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/test_hf.py +0 -0
  116. {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/test_matcher.py +0 -0
  117. {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/lib.rs +0 -0
  118. {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/pyjson.rs +0 -0
  119. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/Cargo.toml +0 -0
  120. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/README.md +0 -0
  121. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/cli.sh +0 -0
  122. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/blog.sample.json +0 -0
  123. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/blog.schema.json +0 -0
  124. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/blog.schema.ll.json +0 -0
  125. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/README.md +0 -0
  126. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
  127. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
  128. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
  129. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
  130. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
  131. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
  132. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
  133. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
  134. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
  135. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/lark.lark +0 -0
  136. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/rfc.lark +0 -0
  137. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/rfc.xml +0 -0
  138. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/gtest.sh +0 -0
  139. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/lark.sh +0 -0
  140. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/run.sh +0 -0
  141. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/src/lib.rs +0 -0
  142. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_raw_parser.rs +0 -0
  143. {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_stop.rs +0 -0
  144. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/annotate_asm.js +0 -0
  145. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/bump.py +0 -0
  146. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/cbindgen.sh +0 -0
  147. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/checklinks.py +0 -0
  148. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/checklinks.sh +0 -0
  149. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/ci-publish.py +0 -0
  150. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/disasm.sh +0 -0
  151. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/gbnf_to_lark.py +0 -0
  152. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/gen-testcase.py +0 -0
  153. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/git-version.sh +0 -0
  154. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/install-deps.sh +0 -0
  155. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/jsonschema-stats.js +0 -0
  156. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/remote-guidance-test.sh +0 -0
  157. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/rust-size.js +0 -0
  158. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/rust_size.py +0 -0
  159. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/test-guidance.sh +0 -0
  160. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/tokenizer_test.py +0 -0
  161. {llguidance-0.7.11 → llguidance-0.7.12}/scripts/update-git.py +0 -0
  162. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/LICENSE +0 -0
  163. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/README.md +0 -0
  164. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/bytes.rs +0 -0
  165. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/lib.rs +0 -0
  166. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/recognizer.rs +0 -0
  167. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/rng.rs +0 -0
  168. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/svob.rs +0 -0
  169. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/tests/test_svob.rs +0 -0
  170. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_downloader/LICENSE +0 -0
  171. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_downloader/src/lib.rs +0 -0
  172. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_tokenizers/LICENSE +0 -0
  173. {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_tokenizers/src/lib.rs +0 -0
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. Dates are d
4
4
 
5
5
  If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
6
6
 
7
+ #### [0.7.12](https://github.com/guidance-ai/llguidance/compare/v0.7.11...0.7.12) 2025-04-04
8
+
9
+ - performance optimizations
10
+ - use factory in C FFI (otherwise slicer was not used)
11
+ - add some null checks and safety comments in C FFI
12
+ - implement subgrammar lexeme class merging; fixes [`#113`](https://github.com/guidance-ai/llguidance/issues/113)
13
+
7
14
  #### [0.7.11](https://github.com/guidance-ai/llguidance/compare/v0.7.10...0.7.11) 2025-03-27
8
15
 
9
16
  - add StructTag python API; fixes [`#146`](https://github.com/guidance-ai/llguidance/issues/146)
@@ -46,7 +53,7 @@ If a release doesn't introduce any interesting changes (build fixes etc.), it's
46
53
  #### [v0.7.1](https://github.com/guidance-ai/llguidance/compare/v0.7.0...v0.7.1) 2025-03-18
47
54
 
48
55
  - add `LLMatcher` interface in python
49
- - add whitespace_pattern to JsonCompileOptions [`04a5491`](https://github.com/guidance-ai/llguidance/commit/04a54912cf6d082669674340833f06385f7b66f8)
56
+ - add whitespace_pattern to JsonCompileOptions [`04a5491`](https://github.com/guidance-ai/llguidance/commit/04a54912cf6d082669674340833f06385f7b66f8)
50
57
  - enable mypy in CI [`#140`](https://github.com/guidance-ai/llguidance/pull/140)
51
58
  - add py.typed for annotations information [`#139`](https://github.com/guidance-ai/llguidance/pull/139)
52
59
  - fix clippy warnings
@@ -60,7 +67,6 @@ If a release doesn't introduce any interesting changes (build fixes etc.), it's
60
67
  - fix https://github.com/guidance-ai/guidance/issues/1131 - backtracking+prompt healing [`#1131`](https://github.com/guidance-ai/guidance/issues/1131)
61
68
  - optimize substring [`9950600`](https://github.com/guidance-ai/llguidance/commit/9950600f46e433b4c42506f8816f61cee331774f)
62
69
 
63
-
64
70
  #### [v0.6.29](https://github.com/guidance-ai/llguidance/compare/v0.6.28...v0.6.29) 2025-02-25
65
71
 
66
72
  - [JSON] "x-guidance" JsonCompileOptions [`#130`](https://github.com/guidance-ai/llguidance/pull/130)
@@ -110,4 +116,3 @@ Plus a few releases messing with, deps, unsafe code cleanup.
110
116
 
111
117
  - fixes for numeric tokens [`b7c9970`](https://github.com/guidance-ai/llguidance/commit/b7c99709a9cb7f7a8a3c4716092e4d94fae2ff2c)
112
118
  - make capture explicit in lark syntax [`2a57678`](https://github.com/guidance-ai/llguidance/commit/2a57678d9397e8be54cb0c9f14c4270604f8e1a5)
113
-
@@ -401,9 +401,9 @@ dependencies = [
401
401
 
402
402
  [[package]]
403
403
  name = "derivre"
404
- version = "0.3.1"
404
+ version = "0.3.4"
405
405
  source = "registry+https://github.com/rust-lang/crates.io-index"
406
- checksum = "3a3c2606b3ffc46f91fd62d954d55659ba9fb391bb673311b70f50daf9c15e49"
406
+ checksum = "310c9990c5a531352e274c8c929ca667a84b6bbaceb1e095c177e6a979807f57"
407
407
  dependencies = [
408
408
  "ahash",
409
409
  "anyhow",
@@ -1177,7 +1177,7 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
1177
1177
 
1178
1178
  [[package]]
1179
1179
  name = "llguidance"
1180
- version = "0.7.11"
1180
+ version = "0.7.12"
1181
1181
  dependencies = [
1182
1182
  "anyhow",
1183
1183
  "derivre",
@@ -1196,7 +1196,7 @@ dependencies = [
1196
1196
 
1197
1197
  [[package]]
1198
1198
  name = "llguidance_py"
1199
- version = "0.7.11"
1199
+ version = "0.7.12"
1200
1200
  dependencies = [
1201
1201
  "anyhow",
1202
1202
  "bytemuck",
@@ -2356,7 +2356,7 @@ dependencies = [
2356
2356
 
2357
2357
  [[package]]
2358
2358
  name = "toktrie"
2359
- version = "0.7.11"
2359
+ version = "0.7.12"
2360
2360
  dependencies = [
2361
2361
  "anyhow",
2362
2362
  "bytemuck",
@@ -2367,7 +2367,7 @@ dependencies = [
2367
2367
 
2368
2368
  [[package]]
2369
2369
  name = "toktrie_hf_downloader"
2370
- version = "0.7.11"
2370
+ version = "0.7.12"
2371
2371
  dependencies = [
2372
2372
  "anyhow",
2373
2373
  "hf-hub",
@@ -2378,7 +2378,7 @@ dependencies = [
2378
2378
 
2379
2379
  [[package]]
2380
2380
  name = "toktrie_hf_tokenizers"
2381
- version = "0.7.11"
2381
+ version = "0.7.12"
2382
2382
  dependencies = [
2383
2383
  "anyhow",
2384
2384
  "log",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llguidance
3
- Version: 0.7.11
3
+ Version: 0.7.12
4
4
  License-File: LICENSE
5
5
  Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
6
6
  Author: Michal Moskal
@@ -293,12 +293,13 @@ that llguidance should be used to process the grammar.
293
293
  ### Multiple grammars
294
294
 
295
295
  The input to LLGuidance consists of a list of grammars. This can be accessed via
296
- [LLGuidance API](../parser/src/api.rs). Each of these can be a Lark grammar, a JSON schema,
297
- or a grammar in the API format. With the introduction of `%json` in Lark syntax
296
+ [LLGuidance API](../parser/src/api.rs). Each of these can be a Lark grammar or a JSON schema.
297
+ With the introduction of `%json` in Lark syntax
298
298
  there is less need now for using multiple grammars, but it is still supported.
299
+ We may add nested lark grammars in future.
299
300
 
300
301
  Inside of Lark grammar, you can reference other grammars using syntax like `@my_grammar`,
301
- refering to grammar with `"name": "my_grammar"` (numeric reference like `@17` are no longer supported).
302
+ refering to grammar with `"name": "my_grammar"` (numeric reference like `@17` are **no longer supported**).
302
303
  The top-level grammar is at index 0.
303
304
 
304
305
  You can specify temperature for subgrammar by referencing it via
@@ -317,6 +318,42 @@ Example:
317
318
  }
318
319
  ```
319
320
 
321
+ #### Subgrammar details
322
+
323
+ Generally, subgrammars share the same context-free grammar but have a separate
324
+ set of lexemes (lexeme class).
325
+ The parser keeps track of a stack of lexeme classes, and considers the top one
326
+ to be the current lexeme class.
327
+ The `%ignore` is applied based on the top lexeme class.
328
+
329
+ Temperature and `max_tokens` can be applied to the current lexeme class as well.
330
+
331
+ There may be issues with subgrammars spanning LLM token boundaries.
332
+
333
+ If two lexeme classes share the `%ignore` regex, and `max_tokens=` and `temperature=`
334
+ are not used, the lexeme classes are merged, which generally allows for parsing
335
+ of more grammars.
336
+ For example, consider:
337
+
338
+ ```lark
339
+ start: a | b
340
+ a: %json { A }
341
+ b: %json { B }
342
+ ```
343
+
344
+ Normally, the parser would have to pick between lexeme class for either A or B
345
+ at the first `{` (it would always pick A since it comes first in the grammar).
346
+ However, if the classes for A and B are merged, the grammar will be equivalent to
347
+ `start: %json { "anyOf": [A, B] }` which is generally what the
348
+ [users expect](https://github.com/guidance-ai/llguidance/issues/113).
349
+
350
+
351
+ ### Features to avoid
352
+
353
+ - `stop=...` - use `suffix=...` or just `lazy`
354
+ - `max_tokens=...` - any use of `max_tokens` will disable rollback, which is needed for spec-decoding; it also makes the parser slower and prevents subgrammar merging
355
+ - `temperature=...` - this is not supported in most server side integrations and prevents subgrammar merging
356
+
320
357
  ### Unsupported Lark features
321
358
 
322
359
  Following features of Lark syntax are currently not supported:
@@ -1291,9 +1291,7 @@
1291
1291
  "Github_easy---o21209.json": {},
1292
1292
  "Github_easy---o21393.json": {},
1293
1293
  "Github_easy---o21455.json": {},
1294
- "Github_easy---o21456.json": {
1295
- "json_error": "Unable to determine if regex is empty: (And (Regex \"([^@^\\\\s]+@[^@^\\\\.^\\\\s]+(\\\\.[^@^\\\\.^\\\\s]*)*.gov.uk).*\") (Regex \"(?s:.{5,254})\"))"
1296
- },
1294
+ "Github_easy---o21456.json": {},
1297
1295
  "Github_easy---o21458.json": {},
1298
1296
  "Github_easy---o21459.json": {},
1299
1297
  "Github_easy---o21460.json": {},
@@ -5037,7 +5035,7 @@
5037
5035
  "Github_hard---o69969.json": {},
5038
5036
  "Github_hard---o69970.json": {},
5039
5037
  "Github_hard---o69972.json": {
5040
- "json_error": "Unable to determine if regex is empty: (And (And (Regex \"(\\\\w+([\\\\.-]?\\\\w+)*@\\\\w+([\\\\.-]?\\\\w+)*(\\\\.\\\\w{2,})+)\") (Regex \"((?P<local_part>(?P<dot_string>[^\\\\s@\\\\.]+(\\\\.[^\\\\s@\\\\.]+)*))@((?P<domain>(?P<sub_domain>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)(\\\\.(?P<sub_domain2>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?))*)|\\\\[(?P<ipv4>((([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9])))\\\\.){3}(([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9]))))\\\\]))\")) (Regex \"(?s:.{6,})\"))"
5038
+ "validation_error": "test #0: token not accepted at ⟦loc‧-‧1‧\",\"‧code‧\":\"‧LOC‧-‧1‧\",\"‧name‧\":\"‧Main‧ Library‧\",\"‧library‧\":{\"‧$‧ref‧\":\"‧https‧://‧ils‧.r‧ero‧.ch‧/api‧/lib‧raries‧/lib‧-‧1‧\"},\"‧allow‧_request‧\":‧true‧,\"‧send‧_notification‧\":‧true‧,\"‧notification‧_email‧\":\"‧library‧@example‧.com‧\",\"⟧ * ⟦is⟧ * ⟦_online‧\":‧false‧,\"⟧ forced tokens \"⟦restrict‧_pick‧up‧_to⟧\" != \"⟦is‧_online‧\\\":‧false⟧\""
5041
5039
  },
5042
5040
  "Github_hard---o69976.json": {},
5043
5041
  "Github_hard---o70037.json": {},
@@ -7356,9 +7354,7 @@
7356
7354
  "Github_medium---o6378.json": {},
7357
7355
  "Github_medium---o63935.json": {},
7358
7356
  "Github_medium---o63937.json": {},
7359
- "Github_medium---o63939.json": {
7360
- "json_error": "Unable to determine if regex is empty: (And (Regex \"([\\\\w\\\\-\\\\./]+\\\\.php+)\") (Regex \"(?s:.{16,1024})\"))"
7361
- },
7357
+ "Github_medium---o63939.json": {},
7362
7358
  "Github_medium---o63941.json": {},
7363
7359
  "Github_medium---o63945.json": {},
7364
7360
  "Github_medium---o63998.json": {},
@@ -18,6 +18,11 @@ if [ "$1" == "--bench" ] ; then
18
18
  done
19
19
  fi
20
20
 
21
+ if [ "$1" == "--exp" ] ; then
22
+ shift
23
+ DEFAULT_ARGS="--expected expected_maskbench.json"
24
+ fi
25
+
21
26
  if [ -z "$PERF" ]; then
22
27
  cargo build --release
23
28
  ../target/release/json_stats $DEFAULT_ARGS "$@"
@@ -60,6 +60,10 @@ pub struct CliOptions {
60
60
  #[arg(long)]
61
61
  llg_no_forcing: bool,
62
62
 
63
+ /// Set stderr log level; implies --num-threads 1
64
+ #[arg(long, default_value = "0")]
65
+ llg_log_level: u32,
66
+
63
67
  /// Test the slicer optimization against un-sliced parser
64
68
  #[arg(long)]
65
69
  llg_test_slicer: bool,
@@ -80,6 +84,10 @@ pub struct CliOptions {
80
84
  #[arg(long)]
81
85
  csv: bool,
82
86
 
87
+ /// Don't print JSON output and perf counters
88
+ #[arg(long)]
89
+ quiet: bool,
90
+
83
91
  /// Test rollback mechanism for speculative decoding
84
92
  #[arg(long)]
85
93
  rollback: bool,
@@ -702,8 +710,10 @@ impl TestEnv {
702
710
  Ok(schema) => schema,
703
711
  Err(e) => {
704
712
  res.json_error = Some(format!("{e}"));
713
+ if self.cli.llg_log_level > 0 {
714
+ eprintln!("{} Error JSON: {}", self.file_name, e);
715
+ }
705
716
  limit_string(&mut res.json_error);
706
- // eprintln!("{} Error Compile: {}", file, e);
707
717
  return res;
708
718
  }
709
719
  };
@@ -743,6 +753,9 @@ impl TestEnv {
743
753
  Err(e) => {
744
754
  // eprintln!("{} Error Parser: {}", self.file_name, e);
745
755
  res.parser_error = Some(format!("{e}"));
756
+ if self.cli.llg_log_level > 0 {
757
+ eprintln!("{} Error JSON: {}", self.file_name, e);
758
+ }
746
759
  limit_string(&mut res.parser_error);
747
760
  return res;
748
761
  }
@@ -757,6 +770,9 @@ impl TestEnv {
757
770
  if let Err(e) = self.run_llg_test(&mut res, &parser, ref_parser.as_ref(), t) {
758
771
  if res.validation_error.is_none() {
759
772
  res.validation_error = Some(format!("test #{idx}: {e}"));
773
+ if self.cli.llg_log_level > 0 {
774
+ eprintln!("{} Error Validating: {}", self.file_name, e);
775
+ }
760
776
  limit_string(&mut res.validation_error);
761
777
  }
762
778
  } else if t.valid {
@@ -905,6 +921,9 @@ fn main() {
905
921
  if options.llg_validate_tokens {
906
922
  options.llg_compile = true;
907
923
  }
924
+ if options.llg_log_level > 0 {
925
+ options.num_threads = Some(1);
926
+ }
908
927
 
909
928
  // set max thread numbers
910
929
  let num_cores = std::thread::available_parallelism().unwrap().get();
@@ -961,8 +980,9 @@ fn main() {
961
980
  };
962
981
 
963
982
  let mut factory = ParserFactory::new(&tok_env, caps.clone(), &slices).unwrap();
964
- factory.quiet();
965
- // factory.set_stderr_log_level(2);
983
+ factory.set_buffer_log_level(0);
984
+ factory.set_stderr_log_level(options.llg_log_level);
985
+
966
986
  // factory.limits_mut().step_lexer_fuel = 10_000_000;
967
987
 
968
988
  let mut ref_factory = ParserFactory::new(&tok_env, caps.clone(), &[]).unwrap();
@@ -1131,10 +1151,15 @@ fn main() {
1131
1151
  total.llg.mask_ms_total_a /= 1000;
1132
1152
 
1133
1153
  total.llg_json = llg_totals.clone();
1134
- eprintln!("{}", serde_json::to_string_pretty(&total).unwrap());
1154
+ if !options.quiet {
1155
+ eprintln!(
1156
+ "{}\n{}",
1157
+ serde_json::to_string_pretty(&total).unwrap(),
1158
+ perf_counters
1159
+ );
1160
+ }
1135
1161
  eprintln!(
1136
- "{}Total time: {}ms TTFM {}μs, mask {}μs, ff {}μs, mask+ff {}ms + compile {}ms",
1137
- perf_counters,
1162
+ "Total time: {}ms TTFM {}μs, mask {}μs, ff {}μs, mask+ff {}ms + compile {}ms",
1138
1163
  t0.elapsed().as_millis(),
1139
1164
  total.llg.ttfm_us,
1140
1165
  total.llg.mask_us,
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "llguidance"
3
- version = "0.7.11"
3
+ version = "0.7.12"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Super-fast Structured Outputs"
@@ -8,7 +8,7 @@ repository = "https://github.com/guidance-ai/llguidance"
8
8
 
9
9
  [dependencies]
10
10
  toktrie = { workspace = true }
11
- derivre = { version = "=0.3.1", default-features = false, features = ["compress"] }
11
+ derivre = { version = "=0.3.4", default-features = false, features = ["compress"] }
12
12
  serde = { version = "1.0.217", features = ["derive"] }
13
13
  serde_json = { version = "1.0.138", features = ["preserve_order"] }
14
14
  anyhow = "1.0.95"
@@ -219,6 +219,12 @@ typedef struct LlgTokenizerInit {
219
219
  * User data to pass to the tokenize_fn
220
220
  */
221
221
  const void *tokenize_user_data;
222
+ /**
223
+ * Tokenizer partitions for the slicer optimization.
224
+ * This is array of pointers to strings, terminated with NULL (argv style).
225
+ * Pass NULL to use defaults. Pass empty array to disable.
226
+ */
227
+ const char *const *slices;
222
228
  } LlgTokenizerInit;
223
229
 
224
230
 
@@ -42,6 +42,7 @@ pub struct LLGuidanceOptions {
42
42
 
43
43
  /// If set, the grammar will allow invalid utf8 byte sequences.
44
44
  /// Any Unicode regex will cause an error.
45
+ /// This is very unlikely what you need.
45
46
  #[serde(default)]
46
47
  pub allow_invalid_utf8: bool,
47
48
  }
@@ -13,78 +13,77 @@ use crate::{GrammarBuilder, HashMap};
13
13
  use anyhow::{bail, ensure, Result};
14
14
  use toktrie::TokEnv;
15
15
 
16
- fn process_grammar(ctx: &mut CompileCtx, input: GrammarWithLexer) -> Result<(SymIdx, LexemeClass)> {
17
- let builder = std::mem::take(&mut ctx.builder).unwrap();
18
-
19
- let res = if let Some(lark) = input.lark_grammar {
20
- ensure!(
21
- input.json_schema.is_none(),
22
- "cannot have both lark_grammar and json_schema"
23
- );
24
- lark_to_llguidance(builder, &lark)?
25
- } else if let Some(mut json_schema) = input.json_schema {
26
- let mut opts = JsonCompileOptions::default();
27
- if let Some(x_guidance) = json_schema.get("x-guidance") {
28
- opts = serde_json::from_value(x_guidance.clone())?;
29
- // TODO not removing it causes oneOf to be handled as anyOf in Github_medium---o61004.json
30
- json_schema.as_object_mut().unwrap().remove("x-guidance");
31
- }
32
- opts.json_to_llg(builder, json_schema)?
33
- } else {
34
- bail!("grammar must have either lark_grammar or json_schema");
35
- };
16
+ struct CompileCtx {
17
+ builder: Option<GrammarBuilder>,
18
+ grammar_by_idx: HashMap<GrammarId, usize>,
19
+ grammar_roots: Vec<(SymIdx, LexemeClass)>,
20
+ }
36
21
 
37
- res.builder.check_limits()?;
22
+ impl CompileCtx {
23
+ fn run_one(&mut self, input: GrammarWithLexer) -> Result<(SymIdx, LexemeClass)> {
24
+ let builder = std::mem::take(&mut self.builder).unwrap();
25
+
26
+ let res = if let Some(lark) = input.lark_grammar {
27
+ ensure!(
28
+ input.json_schema.is_none(),
29
+ "cannot have both lark_grammar and json_schema"
30
+ );
31
+ lark_to_llguidance(builder, &lark)?
32
+ } else if let Some(mut json_schema) = input.json_schema {
33
+ let mut opts = JsonCompileOptions::default();
34
+ if let Some(x_guidance) = json_schema.get("x-guidance") {
35
+ opts = serde_json::from_value(x_guidance.clone())?;
36
+ // TODO not removing it causes oneOf to be handled as anyOf in Github_medium---o61004.json
37
+ json_schema.as_object_mut().unwrap().remove("x-guidance");
38
+ }
39
+ opts.json_to_llg(builder, json_schema)?
40
+ } else {
41
+ bail!("grammar must have either lark_grammar or json_schema");
42
+ };
38
43
 
39
- let grammar_id = res.builder.grammar.sym_props(res.start_node).grammar_id;
44
+ res.builder.check_limits()?;
40
45
 
41
- // restore builder
42
- ctx.builder = Some(res.builder);
46
+ let grammar_id = res.builder.grammar.sym_props(res.start_node).grammar_id;
43
47
 
44
- Ok((res.start_node, grammar_id))
45
- }
48
+ // restore builder
49
+ self.builder = Some(res.builder);
46
50
 
47
- fn process_all_grammars(
48
- mut ctx: CompileCtx,
49
- input: TopLevelGrammar,
50
- ) -> Result<(Grammar, LexerSpec)> {
51
- for (idx, grm) in input.grammars.iter().enumerate() {
52
- if grm.lark_grammar.is_none() && grm.json_schema.is_none() {
53
- bail!("grammar must have either lark_grammar or json_schema");
54
- }
55
- if let Some(n) = &grm.name {
56
- let n = GrammarId::Name(n.to_string());
57
- if ctx.grammar_by_idx.contains_key(&n) {
58
- bail!("duplicate grammar name: {}", n);
59
- }
60
- ctx.grammar_by_idx.insert(n, idx);
61
- }
51
+ Ok((res.start_node, grammar_id))
62
52
  }
63
53
 
64
- for (idx, grm) in input.grammars.into_iter().enumerate() {
65
- let v = process_grammar(&mut ctx, grm)?;
66
- ctx.grammar_roots[idx] = v;
67
- }
54
+ fn run(mut self, input: TopLevelGrammar) -> Result<(Grammar, LexerSpec)> {
55
+ for (idx, grm) in input.grammars.iter().enumerate() {
56
+ if grm.lark_grammar.is_none() && grm.json_schema.is_none() {
57
+ bail!("grammar must have either lark_grammar or json_schema");
58
+ }
59
+ if let Some(n) = &grm.name {
60
+ let n = GrammarId::Name(n.to_string());
61
+ if self.grammar_by_idx.contains_key(&n) {
62
+ bail!("duplicate grammar name: {}", n);
63
+ }
64
+ self.grammar_by_idx.insert(n, idx);
65
+ }
66
+ }
68
67
 
69
- let grammar_by_idx: HashMap<GrammarId, (SymIdx, LexemeClass)> = ctx
70
- .grammar_by_idx
71
- .into_iter()
72
- .map(|(k, v)| (k, ctx.grammar_roots[v]))
73
- .collect();
68
+ for (idx, grm) in input.grammars.into_iter().enumerate() {
69
+ let v = self.run_one(grm)?;
70
+ self.grammar_roots[idx] = v;
71
+ }
74
72
 
75
- let builder = ctx.builder.unwrap();
76
- let mut grammar = builder.grammar;
77
- let mut lexer_spec = builder.regex.spec;
73
+ let grammar_by_idx: HashMap<GrammarId, (SymIdx, LexemeClass)> = self
74
+ .grammar_by_idx
75
+ .into_iter()
76
+ .map(|(k, v)| (k, self.grammar_roots[v]))
77
+ .collect();
78
78
 
79
- grammar.resolve_grammar_refs(&mut lexer_spec, &grammar_by_idx)?;
79
+ let builder = self.builder.unwrap();
80
+ let mut grammar = builder.grammar;
81
+ let mut lexer_spec = builder.regex.spec;
80
82
 
81
- Ok((grammar, lexer_spec))
82
- }
83
+ grammar.resolve_grammar_refs(&mut lexer_spec, &grammar_by_idx)?;
83
84
 
84
- struct CompileCtx {
85
- builder: Option<GrammarBuilder>,
86
- grammar_by_idx: HashMap<GrammarId, usize>,
87
- grammar_roots: Vec<(SymIdx, LexemeClass)>,
85
+ Ok((grammar, lexer_spec))
86
+ }
88
87
  }
89
88
 
90
89
  impl GrammarInit {
@@ -107,7 +106,7 @@ impl GrammarInit {
107
106
  grammar_roots: vec![(SymIdx::BOGUS, LexemeClass::ROOT); input.grammars.len()],
108
107
  };
109
108
 
110
- process_all_grammars(ctx, input)
109
+ ctx.run(input)
111
110
  }
112
111
  }
113
112
  }
@@ -1,6 +1,6 @@
1
1
  use super::lexerspec::{LexemeClass, LexemeIdx, LexerSpec};
2
2
  use crate::api::{GenGrammarOptions, GrammarId, NodeProps};
3
- use crate::HashMap;
3
+ use crate::{HashMap, HashSet};
4
4
  use anyhow::{bail, ensure, Result};
5
5
  use std::fmt::Display;
6
6
  use std::{fmt::Debug, hash::Hash};
@@ -312,7 +312,17 @@ impl Grammar {
312
312
 
313
313
  uf_compress_all(&mut definition);
314
314
 
315
- let mut use_count = vec![0; self.symbols.len()];
315
+ // println!(
316
+ // "symbols: {:?}",
317
+ // self.symbols
318
+ // .iter()
319
+ // .map(|s| (s.idx, &s.name))
320
+ // .collect::<Vec<_>>()
321
+ // );
322
+
323
+ // println!("definition: {:?}", definition);
324
+
325
+ let mut the_user_of = vec![None; self.symbols.len()];
316
326
  for sym in &self.symbols {
317
327
  if definition[sym.idx.as_usize()].is_some() {
318
328
  continue;
@@ -320,19 +330,38 @@ impl Grammar {
320
330
  for r in sym.rules.iter() {
321
331
  for s in &r.rhs {
322
332
  let s = definition[s.as_usize()].unwrap_or(*s);
323
- use_count[s.0 as usize] += 1;
333
+ let idx = s.as_usize();
334
+ if the_user_of[idx].is_none() {
335
+ the_user_of[idx] = Some(r.lhs);
336
+ } else {
337
+ // use self-loop to indicate there are multiple users
338
+ the_user_of[idx] = Some(s);
339
+ }
324
340
  }
325
341
  }
326
342
  }
327
343
 
344
+ // println!("the_user_of: {:?}", the_user_of);
345
+
346
+ // clean up self loops to None
347
+ for idx in 0..the_user_of.len() {
348
+ if let Some(sym) = the_user_of[idx] {
349
+ if sym.as_usize() == idx {
350
+ the_user_of[idx] = None;
351
+ }
352
+ }
353
+ }
354
+
355
+ // println!("the_user_of: {:?}", the_user_of);
356
+
328
357
  let mut repl = crate::HashMap::default();
329
358
 
330
359
  for sym in &self.symbols {
331
360
  if self.is_special_symbol(sym) {
332
361
  continue;
333
362
  }
334
- if sym.rules.len() == 1 && use_count[sym.idx.0 as usize] == 1 {
335
- // eliminate sym.idx
363
+ if sym.rules.len() == 1 && the_user_of[sym.idx.as_usize()].is_some() {
364
+ // we will eliminate sym.idx
336
365
  repl.insert(
337
366
  sym.idx,
338
367
  sym.rules[0]
@@ -344,38 +373,56 @@ impl Grammar {
344
373
  }
345
374
  }
346
375
 
376
+ // println!("repl: {:?}", repl);
377
+
378
+ // these are keys of repl that may need to be used outside of repl itself
379
+ let repl_roots = repl
380
+ .keys()
381
+ .filter(|s| !repl.contains_key(the_user_of[s.as_usize()].as_ref().unwrap()))
382
+ .cloned()
383
+ .collect::<Vec<_>>();
384
+
385
+ // println!("repl_roots: {:?}", repl_roots);
386
+
387
+ let mut to_eliminate = HashSet::from_iter(repl.keys().copied());
347
388
  for (idx, m) in definition.iter().enumerate() {
348
- if let Some(r) = m {
349
- repl.insert(SymIdx(idx as u32), vec![*r]);
389
+ if m.is_some() {
390
+ let src = SymIdx(idx as u32);
391
+ to_eliminate.insert(src);
350
392
  }
351
393
  }
352
394
 
353
- let mut simple_repl = HashMap::default();
354
- while !repl.is_empty() {
355
- let mut new_repl = HashMap::default();
356
- for (k, v) in repl.iter() {
357
- let v2 = v
358
- .iter()
359
- .flat_map(|s| {
360
- simple_repl
361
- .get(s)
362
- .cloned()
363
- .unwrap_or_else(|| repl.get(s).cloned().unwrap_or_else(|| vec![*s]))
364
- })
365
- .collect::<Vec<_>>();
366
- if *v == v2 {
367
- simple_repl.insert(*k, v2);
368
- } else {
369
- new_repl.insert(*k, v2);
395
+ let mut new_repl = HashMap::default();
396
+
397
+ let mut stack = vec![];
398
+ for sym in repl_roots {
399
+ stack.push(vec![sym]);
400
+ let mut res = vec![];
401
+ while let Some(mut lst) = stack.pop() {
402
+ while let Some(e) = lst.pop() {
403
+ if let Some(mut lst2) = repl.remove(&e) {
404
+ lst2.reverse();
405
+ if !lst.is_empty() {
406
+ stack.push(lst);
407
+ }
408
+ stack.push(lst2);
409
+ break;
410
+ }
411
+ assert!(!to_eliminate.contains(&e));
412
+ res.push(e);
370
413
  }
371
414
  }
372
- repl = new_repl;
415
+ // println!("res: {:?} -> {:?}", sym, res);
416
+ new_repl.insert(sym, res);
373
417
  }
374
- repl = simple_repl;
375
418
 
376
- for (k, v) in repl.iter() {
377
- if let Some(p) = v.iter().find(|e| repl.contains_key(*e)) {
378
- panic!("loop at {:?} ({:?})", k, p);
419
+ repl = new_repl;
420
+
421
+ for (idx, m) in definition.iter().enumerate() {
422
+ if let Some(trg) = m {
423
+ if !to_eliminate.contains(trg) {
424
+ repl.insert(SymIdx(idx as u32), vec![*trg]);
425
+ }
379
426
  }
380
427
  }
381
428
 
@@ -403,12 +450,14 @@ impl Grammar {
403
450
  }
404
451
  let lhs = outp.copy_from(self, sym.idx);
405
452
  for rule in &sym.rules {
406
- let rhs = rule
407
- .rhs
408
- .iter()
409
- .flat_map(|s| repl.get(s).cloned().unwrap_or_else(|| vec![*s]))
410
- .map(|s| outp.copy_from(self, s))
411
- .collect();
453
+ let mut rhs = Vec::with_capacity(rule.rhs.len());
454
+ for s in &rule.rhs {
455
+ if let Some(repl) = repl.get(s) {
456
+ rhs.extend(repl.iter().map(|s| outp.copy_from(self, *s)));
457
+ } else {
458
+ rhs.push(outp.copy_from(self, *s));
459
+ }
460
+ }
412
461
  outp.add_rule(lhs, rhs).unwrap();
413
462
  }
414
463
  }
@@ -489,7 +538,8 @@ impl Grammar {
489
538
  pub fn fresh_symbol_ext(&mut self, name0: &str, symprops: SymbolProps) -> SymIdx {
490
539
  let mut name = name0.to_string();
491
540
  let mut idx = self.symbol_count_cache.get(&name).cloned().unwrap_or(2);
492
- while self.symbol_by_name.contains_key(&name) {
541
+ // don't allow empty names
542
+ while name.is_empty() || self.symbol_by_name.contains_key(&name) {
493
543
  name = format!("{}#{}", name0, idx);
494
544
  idx += 1;
495
545
  }