llguidance 0.7.11__tar.gz → 0.7.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.11 → llguidance-0.7.12}/CHANGELOG.md +8 -3
- {llguidance-0.7.11 → llguidance-0.7.12}/Cargo.lock +7 -7
- {llguidance-0.7.11 → llguidance-0.7.12}/PKG-INFO +1 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/syntax.md +40 -3
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/expected_maskbench.json +3 -7
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/jstats.sh +5 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/json_stats.rs +31 -6
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/Cargo.toml +2 -2
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/llguidance.h +6 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/api.rs +1 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/from_guidance.rs +60 -61
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/grammar.rs +86 -36
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/lexerspec.rs +37 -10
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/mod.rs +1 -2
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/parser.rs +13 -34
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/regexvec.rs +14 -2
- llguidance-0.7.12/parser/src/earley/slicer.rs +394 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/factory.rs +29 -38
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/ffi.rs +124 -63
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/grammar_builder.rs +21 -24
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/compiler.rs +19 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/lexer.rs +1 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lib.rs +1 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/matcher.rs +20 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/tokenparser.rs +13 -37
- {llguidance-0.7.11 → llguidance-0.7.12}/pyproject.toml +1 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/llinterpreter.rs +10 -12
- {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/llmatcher.rs +10 -17
- {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/py.rs +1 -2
- llguidance-0.7.12/sample_parser/src/minimal.rs +83 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/src/sample_parser.rs +52 -110
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_lark.rs +6 -3
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_ll.rs +1 -2
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/tokenv.rs +4 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/toktree.rs +13 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- llguidance-0.7.11/parser/src/earley/slicer.rs +0 -249
- llguidance-0.7.11/sample_parser/src/minimal.rs +0 -179
- {llguidance-0.7.11 → llguidance-0.7.12}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/.gitignore +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/Cargo.toml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/LICENSE +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/SECURITY.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/SUPPORT.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/c_sample/Makefile +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/c_sample/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/fast_forward.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/json_schema.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/mask_plot.png +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/optimizations.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/special_tokens.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/docs/toktrie.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/LICENSE +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/build.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/grammars/character.json +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/grammars/json.json +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/logging.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/output.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/substring.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/plan.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/mypy.ini +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/run.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_raw_parser.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/bump.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/checklinks.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/disasm.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/git-version.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/rust-size.js +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/rust_size.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/scripts/update-git.py +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/LICENSE +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/README.md +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.11 → llguidance-0.7.12}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
#### [0.7.12](https://github.com/guidance-ai/llguidance/compare/v0.7.11...0.7.12) 2025-04-04
|
|
8
|
+
|
|
9
|
+
- performance optimizations
|
|
10
|
+
- use factory in C FFI (otherwise slicer was not used)
|
|
11
|
+
- add some null checks and safety comments in C FFI
|
|
12
|
+
- implement subgrammar lexeme class merging; fixes [`#113`](https://github.com/guidance-ai/llguidance/issues/113)
|
|
13
|
+
|
|
7
14
|
#### [0.7.11](https://github.com/guidance-ai/llguidance/compare/v0.7.10...0.7.11) 2025-03-27
|
|
8
15
|
|
|
9
16
|
- add StructTag python API; fixes [`#146`](https://github.com/guidance-ai/llguidance/issues/146)
|
|
@@ -46,7 +53,7 @@ If a release doesn't introduce any interesting changes (build fixes etc.), it's
|
|
|
46
53
|
#### [v0.7.1](https://github.com/guidance-ai/llguidance/compare/v0.7.0...v0.7.1) 2025-03-18
|
|
47
54
|
|
|
48
55
|
- add `LLMatcher` interface in python
|
|
49
|
-
- add
|
|
56
|
+
- add whitespace_pattern to JsonCompileOptions [`04a5491`](https://github.com/guidance-ai/llguidance/commit/04a54912cf6d082669674340833f06385f7b66f8)
|
|
50
57
|
- enable mypy in CI [`#140`](https://github.com/guidance-ai/llguidance/pull/140)
|
|
51
58
|
- add py.typed for annotations information [`#139`](https://github.com/guidance-ai/llguidance/pull/139)
|
|
52
59
|
- fix clippy warnings
|
|
@@ -60,7 +67,6 @@ If a release doesn't introduce any interesting changes (build fixes etc.), it's
|
|
|
60
67
|
- fix https://github.com/guidance-ai/guidance/issues/1131 - backtracking+prompt healing [`#1131`](https://github.com/guidance-ai/guidance/issues/1131)
|
|
61
68
|
- optimize substring [`9950600`](https://github.com/guidance-ai/llguidance/commit/9950600f46e433b4c42506f8816f61cee331774f)
|
|
62
69
|
|
|
63
|
-
|
|
64
70
|
#### [v0.6.29](https://github.com/guidance-ai/llguidance/compare/v0.6.28...v0.6.29) 2025-02-25
|
|
65
71
|
|
|
66
72
|
- [JSON] "x-guidance" JsonCompileOptions [`#130`](https://github.com/guidance-ai/llguidance/pull/130)
|
|
@@ -110,4 +116,3 @@ Plus a few releases messing with, deps, unsafe code cleanup.
|
|
|
110
116
|
|
|
111
117
|
- fixes for numeric tokens [`b7c9970`](https://github.com/guidance-ai/llguidance/commit/b7c99709a9cb7f7a8a3c4716092e4d94fae2ff2c)
|
|
112
118
|
- make capture explicit in lark syntax [`2a57678`](https://github.com/guidance-ai/llguidance/commit/2a57678d9397e8be54cb0c9f14c4270604f8e1a5)
|
|
113
|
-
|
|
@@ -401,9 +401,9 @@ dependencies = [
|
|
|
401
401
|
|
|
402
402
|
[[package]]
|
|
403
403
|
name = "derivre"
|
|
404
|
-
version = "0.3.
|
|
404
|
+
version = "0.3.4"
|
|
405
405
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
406
|
-
checksum = "
|
|
406
|
+
checksum = "310c9990c5a531352e274c8c929ca667a84b6bbaceb1e095c177e6a979807f57"
|
|
407
407
|
dependencies = [
|
|
408
408
|
"ahash",
|
|
409
409
|
"anyhow",
|
|
@@ -1177,7 +1177,7 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
|
|
|
1177
1177
|
|
|
1178
1178
|
[[package]]
|
|
1179
1179
|
name = "llguidance"
|
|
1180
|
-
version = "0.7.
|
|
1180
|
+
version = "0.7.12"
|
|
1181
1181
|
dependencies = [
|
|
1182
1182
|
"anyhow",
|
|
1183
1183
|
"derivre",
|
|
@@ -1196,7 +1196,7 @@ dependencies = [
|
|
|
1196
1196
|
|
|
1197
1197
|
[[package]]
|
|
1198
1198
|
name = "llguidance_py"
|
|
1199
|
-
version = "0.7.
|
|
1199
|
+
version = "0.7.12"
|
|
1200
1200
|
dependencies = [
|
|
1201
1201
|
"anyhow",
|
|
1202
1202
|
"bytemuck",
|
|
@@ -2356,7 +2356,7 @@ dependencies = [
|
|
|
2356
2356
|
|
|
2357
2357
|
[[package]]
|
|
2358
2358
|
name = "toktrie"
|
|
2359
|
-
version = "0.7.
|
|
2359
|
+
version = "0.7.12"
|
|
2360
2360
|
dependencies = [
|
|
2361
2361
|
"anyhow",
|
|
2362
2362
|
"bytemuck",
|
|
@@ -2367,7 +2367,7 @@ dependencies = [
|
|
|
2367
2367
|
|
|
2368
2368
|
[[package]]
|
|
2369
2369
|
name = "toktrie_hf_downloader"
|
|
2370
|
-
version = "0.7.
|
|
2370
|
+
version = "0.7.12"
|
|
2371
2371
|
dependencies = [
|
|
2372
2372
|
"anyhow",
|
|
2373
2373
|
"hf-hub",
|
|
@@ -2378,7 +2378,7 @@ dependencies = [
|
|
|
2378
2378
|
|
|
2379
2379
|
[[package]]
|
|
2380
2380
|
name = "toktrie_hf_tokenizers"
|
|
2381
|
-
version = "0.7.
|
|
2381
|
+
version = "0.7.12"
|
|
2382
2382
|
dependencies = [
|
|
2383
2383
|
"anyhow",
|
|
2384
2384
|
"log",
|
|
@@ -293,12 +293,13 @@ that llguidance should be used to process the grammar.
|
|
|
293
293
|
### Multiple grammars
|
|
294
294
|
|
|
295
295
|
The input to LLGuidance consists of a list of grammars. This can be accessed via
|
|
296
|
-
[LLGuidance API](../parser/src/api.rs). Each of these can be a Lark grammar
|
|
297
|
-
|
|
296
|
+
[LLGuidance API](../parser/src/api.rs). Each of these can be a Lark grammar or a JSON schema.
|
|
297
|
+
With the introduction of `%json` in Lark syntax
|
|
298
298
|
there is less need now for using multiple grammars, but it is still supported.
|
|
299
|
+
We may add nested lark grammars in future.
|
|
299
300
|
|
|
300
301
|
Inside of Lark grammar, you can reference other grammars using syntax like `@my_grammar`,
|
|
301
|
-
refering to grammar with `"name": "my_grammar"` (numeric reference like `@17` are no longer supported).
|
|
302
|
+
refering to grammar with `"name": "my_grammar"` (numeric reference like `@17` are **no longer supported**).
|
|
302
303
|
The top-level grammar is at index 0.
|
|
303
304
|
|
|
304
305
|
You can specify temperature for subgrammar by referencing it via
|
|
@@ -317,6 +318,42 @@ Example:
|
|
|
317
318
|
}
|
|
318
319
|
```
|
|
319
320
|
|
|
321
|
+
#### Subgrammar details
|
|
322
|
+
|
|
323
|
+
Generally, subgrammars share the same context-free grammar but have a separate
|
|
324
|
+
set of lexemes (lexeme class).
|
|
325
|
+
The parser keeps track of a stack of lexeme classes, and considers the top one
|
|
326
|
+
to be the current lexeme class.
|
|
327
|
+
The `%ignore` is applied based on the top lexeme class.
|
|
328
|
+
|
|
329
|
+
Temperature and `max_tokens` can be applied to the current lexeme class as well.
|
|
330
|
+
|
|
331
|
+
There may be issues with subgrammars spanning LLM token boundaries.
|
|
332
|
+
|
|
333
|
+
If two lexeme classes share the `%ignore` regex, and `max_tokens=` and `temperature=`
|
|
334
|
+
are not used, the lexeme classes are merged, which generally allows for parsing
|
|
335
|
+
of more grammars.
|
|
336
|
+
For example, consider:
|
|
337
|
+
|
|
338
|
+
```lark
|
|
339
|
+
start: a | b
|
|
340
|
+
a: %json { A }
|
|
341
|
+
b: %json { B }
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Normally, the parser would have to pick between lexeme class for either A or B
|
|
345
|
+
at the first `{` (it would always pick A since it comes first in the grammar).
|
|
346
|
+
However, if the classes for A and B are merged, the grammar will be equivalent to
|
|
347
|
+
`start: %json { "anyOf": [A, B] }` which is generally what the
|
|
348
|
+
[users expect](https://github.com/guidance-ai/llguidance/issues/113).
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
### Features to avoid
|
|
352
|
+
|
|
353
|
+
- `stop=...` - use `suffix=...` or just `lazy`
|
|
354
|
+
- `max_tokens=...` - any use of `max_tokens` will disable rollback, which is needed for spec-decoding; it also makes the parser slower and prevents subgrammar merging
|
|
355
|
+
- `temperature=...` - this is not supported in most server side integrations and prevents subgrammar merging
|
|
356
|
+
|
|
320
357
|
### Unsupported Lark features
|
|
321
358
|
|
|
322
359
|
Following features of Lark syntax are currently not supported:
|
|
@@ -1291,9 +1291,7 @@
|
|
|
1291
1291
|
"Github_easy---o21209.json": {},
|
|
1292
1292
|
"Github_easy---o21393.json": {},
|
|
1293
1293
|
"Github_easy---o21455.json": {},
|
|
1294
|
-
"Github_easy---o21456.json": {
|
|
1295
|
-
"json_error": "Unable to determine if regex is empty: (And (Regex \"([^@^\\\\s]+@[^@^\\\\.^\\\\s]+(\\\\.[^@^\\\\.^\\\\s]*)*.gov.uk).*\") (Regex \"(?s:.{5,254})\"))"
|
|
1296
|
-
},
|
|
1294
|
+
"Github_easy---o21456.json": {},
|
|
1297
1295
|
"Github_easy---o21458.json": {},
|
|
1298
1296
|
"Github_easy---o21459.json": {},
|
|
1299
1297
|
"Github_easy---o21460.json": {},
|
|
@@ -5037,7 +5035,7 @@
|
|
|
5037
5035
|
"Github_hard---o69969.json": {},
|
|
5038
5036
|
"Github_hard---o69970.json": {},
|
|
5039
5037
|
"Github_hard---o69972.json": {
|
|
5040
|
-
"
|
|
5038
|
+
"validation_error": "test #0: token not accepted at ⟦loc‧-‧1‧\",\"‧code‧\":\"‧LOC‧-‧1‧\",\"‧name‧\":\"‧Main‧ Library‧\",\"‧library‧\":{\"‧$‧ref‧\":\"‧https‧://‧ils‧.r‧ero‧.ch‧/api‧/lib‧raries‧/lib‧-‧1‧\"},\"‧allow‧_request‧\":‧true‧,\"‧send‧_notification‧\":‧true‧,\"‧notification‧_email‧\":\"‧library‧@example‧.com‧\",\"⟧ * ⟦is⟧ * ⟦_online‧\":‧false‧,\"⟧ forced tokens \"⟦restrict‧_pick‧up‧_to⟧\" != \"⟦is‧_online‧\\\":‧false⟧\""
|
|
5041
5039
|
},
|
|
5042
5040
|
"Github_hard---o69976.json": {},
|
|
5043
5041
|
"Github_hard---o70037.json": {},
|
|
@@ -7356,9 +7354,7 @@
|
|
|
7356
7354
|
"Github_medium---o6378.json": {},
|
|
7357
7355
|
"Github_medium---o63935.json": {},
|
|
7358
7356
|
"Github_medium---o63937.json": {},
|
|
7359
|
-
"Github_medium---o63939.json": {
|
|
7360
|
-
"json_error": "Unable to determine if regex is empty: (And (Regex \"([\\\\w\\\\-\\\\./]+\\\\.php+)\") (Regex \"(?s:.{16,1024})\"))"
|
|
7361
|
-
},
|
|
7357
|
+
"Github_medium---o63939.json": {},
|
|
7362
7358
|
"Github_medium---o63941.json": {},
|
|
7363
7359
|
"Github_medium---o63945.json": {},
|
|
7364
7360
|
"Github_medium---o63998.json": {},
|
|
@@ -18,6 +18,11 @@ if [ "$1" == "--bench" ] ; then
|
|
|
18
18
|
done
|
|
19
19
|
fi
|
|
20
20
|
|
|
21
|
+
if [ "$1" == "--exp" ] ; then
|
|
22
|
+
shift
|
|
23
|
+
DEFAULT_ARGS="--expected expected_maskbench.json"
|
|
24
|
+
fi
|
|
25
|
+
|
|
21
26
|
if [ -z "$PERF" ]; then
|
|
22
27
|
cargo build --release
|
|
23
28
|
../target/release/json_stats $DEFAULT_ARGS "$@"
|
|
@@ -60,6 +60,10 @@ pub struct CliOptions {
|
|
|
60
60
|
#[arg(long)]
|
|
61
61
|
llg_no_forcing: bool,
|
|
62
62
|
|
|
63
|
+
/// Set stderr log level; implies --num-threads 1
|
|
64
|
+
#[arg(long, default_value = "0")]
|
|
65
|
+
llg_log_level: u32,
|
|
66
|
+
|
|
63
67
|
/// Test the slicer optimization against un-sliced parser
|
|
64
68
|
#[arg(long)]
|
|
65
69
|
llg_test_slicer: bool,
|
|
@@ -80,6 +84,10 @@ pub struct CliOptions {
|
|
|
80
84
|
#[arg(long)]
|
|
81
85
|
csv: bool,
|
|
82
86
|
|
|
87
|
+
/// Don't print JSON output and perf counters
|
|
88
|
+
#[arg(long)]
|
|
89
|
+
quiet: bool,
|
|
90
|
+
|
|
83
91
|
/// Test rollback mechanism for speculative decoding
|
|
84
92
|
#[arg(long)]
|
|
85
93
|
rollback: bool,
|
|
@@ -702,8 +710,10 @@ impl TestEnv {
|
|
|
702
710
|
Ok(schema) => schema,
|
|
703
711
|
Err(e) => {
|
|
704
712
|
res.json_error = Some(format!("{e}"));
|
|
713
|
+
if self.cli.llg_log_level > 0 {
|
|
714
|
+
eprintln!("{} Error JSON: {}", self.file_name, e);
|
|
715
|
+
}
|
|
705
716
|
limit_string(&mut res.json_error);
|
|
706
|
-
// eprintln!("{} Error Compile: {}", file, e);
|
|
707
717
|
return res;
|
|
708
718
|
}
|
|
709
719
|
};
|
|
@@ -743,6 +753,9 @@ impl TestEnv {
|
|
|
743
753
|
Err(e) => {
|
|
744
754
|
// eprintln!("{} Error Parser: {}", self.file_name, e);
|
|
745
755
|
res.parser_error = Some(format!("{e}"));
|
|
756
|
+
if self.cli.llg_log_level > 0 {
|
|
757
|
+
eprintln!("{} Error JSON: {}", self.file_name, e);
|
|
758
|
+
}
|
|
746
759
|
limit_string(&mut res.parser_error);
|
|
747
760
|
return res;
|
|
748
761
|
}
|
|
@@ -757,6 +770,9 @@ impl TestEnv {
|
|
|
757
770
|
if let Err(e) = self.run_llg_test(&mut res, &parser, ref_parser.as_ref(), t) {
|
|
758
771
|
if res.validation_error.is_none() {
|
|
759
772
|
res.validation_error = Some(format!("test #{idx}: {e}"));
|
|
773
|
+
if self.cli.llg_log_level > 0 {
|
|
774
|
+
eprintln!("{} Error Validating: {}", self.file_name, e);
|
|
775
|
+
}
|
|
760
776
|
limit_string(&mut res.validation_error);
|
|
761
777
|
}
|
|
762
778
|
} else if t.valid {
|
|
@@ -905,6 +921,9 @@ fn main() {
|
|
|
905
921
|
if options.llg_validate_tokens {
|
|
906
922
|
options.llg_compile = true;
|
|
907
923
|
}
|
|
924
|
+
if options.llg_log_level > 0 {
|
|
925
|
+
options.num_threads = Some(1);
|
|
926
|
+
}
|
|
908
927
|
|
|
909
928
|
// set max thread numbers
|
|
910
929
|
let num_cores = std::thread::available_parallelism().unwrap().get();
|
|
@@ -961,8 +980,9 @@ fn main() {
|
|
|
961
980
|
};
|
|
962
981
|
|
|
963
982
|
let mut factory = ParserFactory::new(&tok_env, caps.clone(), &slices).unwrap();
|
|
964
|
-
factory.
|
|
965
|
-
|
|
983
|
+
factory.set_buffer_log_level(0);
|
|
984
|
+
factory.set_stderr_log_level(options.llg_log_level);
|
|
985
|
+
|
|
966
986
|
// factory.limits_mut().step_lexer_fuel = 10_000_000;
|
|
967
987
|
|
|
968
988
|
let mut ref_factory = ParserFactory::new(&tok_env, caps.clone(), &[]).unwrap();
|
|
@@ -1131,10 +1151,15 @@ fn main() {
|
|
|
1131
1151
|
total.llg.mask_ms_total_a /= 1000;
|
|
1132
1152
|
|
|
1133
1153
|
total.llg_json = llg_totals.clone();
|
|
1134
|
-
|
|
1154
|
+
if !options.quiet {
|
|
1155
|
+
eprintln!(
|
|
1156
|
+
"{}\n{}",
|
|
1157
|
+
serde_json::to_string_pretty(&total).unwrap(),
|
|
1158
|
+
perf_counters
|
|
1159
|
+
);
|
|
1160
|
+
}
|
|
1135
1161
|
eprintln!(
|
|
1136
|
-
"
|
|
1137
|
-
perf_counters,
|
|
1162
|
+
"Total time: {}ms TTFM {}μs, mask {}μs, ff {}μs, mask+ff {}ms + compile {}ms",
|
|
1138
1163
|
t0.elapsed().as_millis(),
|
|
1139
1164
|
total.llg.ttfm_us,
|
|
1140
1165
|
total.llg.mask_us,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "llguidance"
|
|
3
|
-
version = "0.7.
|
|
3
|
+
version = "0.7.12"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
description = "Super-fast Structured Outputs"
|
|
@@ -8,7 +8,7 @@ repository = "https://github.com/guidance-ai/llguidance"
|
|
|
8
8
|
|
|
9
9
|
[dependencies]
|
|
10
10
|
toktrie = { workspace = true }
|
|
11
|
-
derivre = { version = "=0.3.
|
|
11
|
+
derivre = { version = "=0.3.4", default-features = false, features = ["compress"] }
|
|
12
12
|
serde = { version = "1.0.217", features = ["derive"] }
|
|
13
13
|
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
|
14
14
|
anyhow = "1.0.95"
|
|
@@ -219,6 +219,12 @@ typedef struct LlgTokenizerInit {
|
|
|
219
219
|
* User data to pass to the tokenize_fn
|
|
220
220
|
*/
|
|
221
221
|
const void *tokenize_user_data;
|
|
222
|
+
/**
|
|
223
|
+
* Tokenizer partitions for the slicer optimization.
|
|
224
|
+
* This is array of pointers to strings, terminated with NULL (argv style).
|
|
225
|
+
* Pass NULL to use defaults. Pass empty array to disable.
|
|
226
|
+
*/
|
|
227
|
+
const char *const *slices;
|
|
222
228
|
} LlgTokenizerInit;
|
|
223
229
|
|
|
224
230
|
|
|
@@ -13,78 +13,77 @@ use crate::{GrammarBuilder, HashMap};
|
|
|
13
13
|
use anyhow::{bail, ensure, Result};
|
|
14
14
|
use toktrie::TokEnv;
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
input.json_schema.is_none(),
|
|
22
|
-
"cannot have both lark_grammar and json_schema"
|
|
23
|
-
);
|
|
24
|
-
lark_to_llguidance(builder, &lark)?
|
|
25
|
-
} else if let Some(mut json_schema) = input.json_schema {
|
|
26
|
-
let mut opts = JsonCompileOptions::default();
|
|
27
|
-
if let Some(x_guidance) = json_schema.get("x-guidance") {
|
|
28
|
-
opts = serde_json::from_value(x_guidance.clone())?;
|
|
29
|
-
// TODO not removing it causes oneOf to be handled as anyOf in Github_medium---o61004.json
|
|
30
|
-
json_schema.as_object_mut().unwrap().remove("x-guidance");
|
|
31
|
-
}
|
|
32
|
-
opts.json_to_llg(builder, json_schema)?
|
|
33
|
-
} else {
|
|
34
|
-
bail!("grammar must have either lark_grammar or json_schema");
|
|
35
|
-
};
|
|
16
|
+
struct CompileCtx {
|
|
17
|
+
builder: Option<GrammarBuilder>,
|
|
18
|
+
grammar_by_idx: HashMap<GrammarId, usize>,
|
|
19
|
+
grammar_roots: Vec<(SymIdx, LexemeClass)>,
|
|
20
|
+
}
|
|
36
21
|
|
|
37
|
-
|
|
22
|
+
impl CompileCtx {
|
|
23
|
+
fn run_one(&mut self, input: GrammarWithLexer) -> Result<(SymIdx, LexemeClass)> {
|
|
24
|
+
let builder = std::mem::take(&mut self.builder).unwrap();
|
|
25
|
+
|
|
26
|
+
let res = if let Some(lark) = input.lark_grammar {
|
|
27
|
+
ensure!(
|
|
28
|
+
input.json_schema.is_none(),
|
|
29
|
+
"cannot have both lark_grammar and json_schema"
|
|
30
|
+
);
|
|
31
|
+
lark_to_llguidance(builder, &lark)?
|
|
32
|
+
} else if let Some(mut json_schema) = input.json_schema {
|
|
33
|
+
let mut opts = JsonCompileOptions::default();
|
|
34
|
+
if let Some(x_guidance) = json_schema.get("x-guidance") {
|
|
35
|
+
opts = serde_json::from_value(x_guidance.clone())?;
|
|
36
|
+
// TODO not removing it causes oneOf to be handled as anyOf in Github_medium---o61004.json
|
|
37
|
+
json_schema.as_object_mut().unwrap().remove("x-guidance");
|
|
38
|
+
}
|
|
39
|
+
opts.json_to_llg(builder, json_schema)?
|
|
40
|
+
} else {
|
|
41
|
+
bail!("grammar must have either lark_grammar or json_schema");
|
|
42
|
+
};
|
|
38
43
|
|
|
39
|
-
|
|
44
|
+
res.builder.check_limits()?;
|
|
40
45
|
|
|
41
|
-
|
|
42
|
-
ctx.builder = Some(res.builder);
|
|
46
|
+
let grammar_id = res.builder.grammar.sym_props(res.start_node).grammar_id;
|
|
43
47
|
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
// restore builder
|
|
49
|
+
self.builder = Some(res.builder);
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
mut ctx: CompileCtx,
|
|
49
|
-
input: TopLevelGrammar,
|
|
50
|
-
) -> Result<(Grammar, LexerSpec)> {
|
|
51
|
-
for (idx, grm) in input.grammars.iter().enumerate() {
|
|
52
|
-
if grm.lark_grammar.is_none() && grm.json_schema.is_none() {
|
|
53
|
-
bail!("grammar must have either lark_grammar or json_schema");
|
|
54
|
-
}
|
|
55
|
-
if let Some(n) = &grm.name {
|
|
56
|
-
let n = GrammarId::Name(n.to_string());
|
|
57
|
-
if ctx.grammar_by_idx.contains_key(&n) {
|
|
58
|
-
bail!("duplicate grammar name: {}", n);
|
|
59
|
-
}
|
|
60
|
-
ctx.grammar_by_idx.insert(n, idx);
|
|
61
|
-
}
|
|
51
|
+
Ok((res.start_node, grammar_id))
|
|
62
52
|
}
|
|
63
53
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
54
|
+
fn run(mut self, input: TopLevelGrammar) -> Result<(Grammar, LexerSpec)> {
|
|
55
|
+
for (idx, grm) in input.grammars.iter().enumerate() {
|
|
56
|
+
if grm.lark_grammar.is_none() && grm.json_schema.is_none() {
|
|
57
|
+
bail!("grammar must have either lark_grammar or json_schema");
|
|
58
|
+
}
|
|
59
|
+
if let Some(n) = &grm.name {
|
|
60
|
+
let n = GrammarId::Name(n.to_string());
|
|
61
|
+
if self.grammar_by_idx.contains_key(&n) {
|
|
62
|
+
bail!("duplicate grammar name: {}", n);
|
|
63
|
+
}
|
|
64
|
+
self.grammar_by_idx.insert(n, idx);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
.collect();
|
|
68
|
+
for (idx, grm) in input.grammars.into_iter().enumerate() {
|
|
69
|
+
let v = self.run_one(grm)?;
|
|
70
|
+
self.grammar_roots[idx] = v;
|
|
71
|
+
}
|
|
74
72
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
73
|
+
let grammar_by_idx: HashMap<GrammarId, (SymIdx, LexemeClass)> = self
|
|
74
|
+
.grammar_by_idx
|
|
75
|
+
.into_iter()
|
|
76
|
+
.map(|(k, v)| (k, self.grammar_roots[v]))
|
|
77
|
+
.collect();
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
let builder = self.builder.unwrap();
|
|
80
|
+
let mut grammar = builder.grammar;
|
|
81
|
+
let mut lexer_spec = builder.regex.spec;
|
|
80
82
|
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
+
grammar.resolve_grammar_refs(&mut lexer_spec, &grammar_by_idx)?;
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
grammar_by_idx: HashMap<GrammarId, usize>,
|
|
87
|
-
grammar_roots: Vec<(SymIdx, LexemeClass)>,
|
|
85
|
+
Ok((grammar, lexer_spec))
|
|
86
|
+
}
|
|
88
87
|
}
|
|
89
88
|
|
|
90
89
|
impl GrammarInit {
|
|
@@ -107,7 +106,7 @@ impl GrammarInit {
|
|
|
107
106
|
grammar_roots: vec![(SymIdx::BOGUS, LexemeClass::ROOT); input.grammars.len()],
|
|
108
107
|
};
|
|
109
108
|
|
|
110
|
-
|
|
109
|
+
ctx.run(input)
|
|
111
110
|
}
|
|
112
111
|
}
|
|
113
112
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
use super::lexerspec::{LexemeClass, LexemeIdx, LexerSpec};
|
|
2
2
|
use crate::api::{GenGrammarOptions, GrammarId, NodeProps};
|
|
3
|
-
use crate::HashMap;
|
|
3
|
+
use crate::{HashMap, HashSet};
|
|
4
4
|
use anyhow::{bail, ensure, Result};
|
|
5
5
|
use std::fmt::Display;
|
|
6
6
|
use std::{fmt::Debug, hash::Hash};
|
|
@@ -312,7 +312,17 @@ impl Grammar {
|
|
|
312
312
|
|
|
313
313
|
uf_compress_all(&mut definition);
|
|
314
314
|
|
|
315
|
-
|
|
315
|
+
// println!(
|
|
316
|
+
// "symbols: {:?}",
|
|
317
|
+
// self.symbols
|
|
318
|
+
// .iter()
|
|
319
|
+
// .map(|s| (s.idx, &s.name))
|
|
320
|
+
// .collect::<Vec<_>>()
|
|
321
|
+
// );
|
|
322
|
+
|
|
323
|
+
// println!("definition: {:?}", definition);
|
|
324
|
+
|
|
325
|
+
let mut the_user_of = vec![None; self.symbols.len()];
|
|
316
326
|
for sym in &self.symbols {
|
|
317
327
|
if definition[sym.idx.as_usize()].is_some() {
|
|
318
328
|
continue;
|
|
@@ -320,19 +330,38 @@ impl Grammar {
|
|
|
320
330
|
for r in sym.rules.iter() {
|
|
321
331
|
for s in &r.rhs {
|
|
322
332
|
let s = definition[s.as_usize()].unwrap_or(*s);
|
|
323
|
-
|
|
333
|
+
let idx = s.as_usize();
|
|
334
|
+
if the_user_of[idx].is_none() {
|
|
335
|
+
the_user_of[idx] = Some(r.lhs);
|
|
336
|
+
} else {
|
|
337
|
+
// use self-loop to indicate there are multiple users
|
|
338
|
+
the_user_of[idx] = Some(s);
|
|
339
|
+
}
|
|
324
340
|
}
|
|
325
341
|
}
|
|
326
342
|
}
|
|
327
343
|
|
|
344
|
+
// println!("the_user_of: {:?}", the_user_of);
|
|
345
|
+
|
|
346
|
+
// clean up self loops to None
|
|
347
|
+
for idx in 0..the_user_of.len() {
|
|
348
|
+
if let Some(sym) = the_user_of[idx] {
|
|
349
|
+
if sym.as_usize() == idx {
|
|
350
|
+
the_user_of[idx] = None;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// println!("the_user_of: {:?}", the_user_of);
|
|
356
|
+
|
|
328
357
|
let mut repl = crate::HashMap::default();
|
|
329
358
|
|
|
330
359
|
for sym in &self.symbols {
|
|
331
360
|
if self.is_special_symbol(sym) {
|
|
332
361
|
continue;
|
|
333
362
|
}
|
|
334
|
-
if sym.rules.len() == 1 &&
|
|
335
|
-
// eliminate sym.idx
|
|
363
|
+
if sym.rules.len() == 1 && the_user_of[sym.idx.as_usize()].is_some() {
|
|
364
|
+
// we will eliminate sym.idx
|
|
336
365
|
repl.insert(
|
|
337
366
|
sym.idx,
|
|
338
367
|
sym.rules[0]
|
|
@@ -344,38 +373,56 @@ impl Grammar {
|
|
|
344
373
|
}
|
|
345
374
|
}
|
|
346
375
|
|
|
376
|
+
// println!("repl: {:?}", repl);
|
|
377
|
+
|
|
378
|
+
// these are keys of repl that may need to be used outside of repl itself
|
|
379
|
+
let repl_roots = repl
|
|
380
|
+
.keys()
|
|
381
|
+
.filter(|s| !repl.contains_key(the_user_of[s.as_usize()].as_ref().unwrap()))
|
|
382
|
+
.cloned()
|
|
383
|
+
.collect::<Vec<_>>();
|
|
384
|
+
|
|
385
|
+
// println!("repl_roots: {:?}", repl_roots);
|
|
386
|
+
|
|
387
|
+
let mut to_eliminate = HashSet::from_iter(repl.keys().copied());
|
|
347
388
|
for (idx, m) in definition.iter().enumerate() {
|
|
348
|
-
if
|
|
349
|
-
|
|
389
|
+
if m.is_some() {
|
|
390
|
+
let src = SymIdx(idx as u32);
|
|
391
|
+
to_eliminate.insert(src);
|
|
350
392
|
}
|
|
351
393
|
}
|
|
352
394
|
|
|
353
|
-
let mut
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
395
|
+
let mut new_repl = HashMap::default();
|
|
396
|
+
|
|
397
|
+
let mut stack = vec![];
|
|
398
|
+
for sym in repl_roots {
|
|
399
|
+
stack.push(vec![sym]);
|
|
400
|
+
let mut res = vec![];
|
|
401
|
+
while let Some(mut lst) = stack.pop() {
|
|
402
|
+
while let Some(e) = lst.pop() {
|
|
403
|
+
if let Some(mut lst2) = repl.remove(&e) {
|
|
404
|
+
lst2.reverse();
|
|
405
|
+
if !lst.is_empty() {
|
|
406
|
+
stack.push(lst);
|
|
407
|
+
}
|
|
408
|
+
stack.push(lst2);
|
|
409
|
+
break;
|
|
410
|
+
}
|
|
411
|
+
assert!(!to_eliminate.contains(&e));
|
|
412
|
+
res.push(e);
|
|
370
413
|
}
|
|
371
414
|
}
|
|
372
|
-
|
|
415
|
+
// println!("res: {:?} -> {:?}", sym, res);
|
|
416
|
+
new_repl.insert(sym, res);
|
|
373
417
|
}
|
|
374
|
-
repl = simple_repl;
|
|
375
418
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
419
|
+
repl = new_repl;
|
|
420
|
+
|
|
421
|
+
for (idx, m) in definition.iter().enumerate() {
|
|
422
|
+
if let Some(trg) = m {
|
|
423
|
+
if !to_eliminate.contains(trg) {
|
|
424
|
+
repl.insert(SymIdx(idx as u32), vec![*trg]);
|
|
425
|
+
}
|
|
379
426
|
}
|
|
380
427
|
}
|
|
381
428
|
|
|
@@ -403,12 +450,14 @@ impl Grammar {
|
|
|
403
450
|
}
|
|
404
451
|
let lhs = outp.copy_from(self, sym.idx);
|
|
405
452
|
for rule in &sym.rules {
|
|
406
|
-
let rhs = rule
|
|
407
|
-
|
|
408
|
-
.
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
453
|
+
let mut rhs = Vec::with_capacity(rule.rhs.len());
|
|
454
|
+
for s in &rule.rhs {
|
|
455
|
+
if let Some(repl) = repl.get(s) {
|
|
456
|
+
rhs.extend(repl.iter().map(|s| outp.copy_from(self, *s)));
|
|
457
|
+
} else {
|
|
458
|
+
rhs.push(outp.copy_from(self, *s));
|
|
459
|
+
}
|
|
460
|
+
}
|
|
412
461
|
outp.add_rule(lhs, rhs).unwrap();
|
|
413
462
|
}
|
|
414
463
|
}
|
|
@@ -489,7 +538,8 @@ impl Grammar {
|
|
|
489
538
|
pub fn fresh_symbol_ext(&mut self, name0: &str, symprops: SymbolProps) -> SymIdx {
|
|
490
539
|
let mut name = name0.to_string();
|
|
491
540
|
let mut idx = self.symbol_count_cache.get(&name).cloned().unwrap_or(2);
|
|
492
|
-
|
|
541
|
+
// don't allow empty names
|
|
542
|
+
while name.is_empty() || self.symbol_by_name.contains_key(&name) {
|
|
493
543
|
name = format!("{}#{}", name0, idx);
|
|
494
544
|
idx += 1;
|
|
495
545
|
}
|