llguidance 0.7.26__tar.gz → 0.7.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.26 → llguidance-0.7.29}/CHANGELOG.md +13 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/Cargo.lock +80 -9
- {llguidance-0.7.26 → llguidance-0.7.29}/Cargo.toml +4 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/PKG-INFO +1 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/Cargo.toml +1 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/llguidance.h +7 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/parser.rs +99 -70
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/ffi.rs +11 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/stop_controller.rs +10 -5
- {llguidance-0.7.26 → llguidance-0.7.29}/pyproject.toml +1 -1
- llguidance-0.7.29/python/llguidance/tiktoken.py +34 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/torch_tests/test_llamacpp.py +4 -0
- llguidance-0.7.29/python/torch_tests/test_tiktoken.py +30 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/Cargo.toml +2 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/py.rs +36 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/tests/test_raw_parser.rs +16 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/bump.py +1 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/ci-publish.py +18 -16
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/toktree.rs +62 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- llguidance-0.7.29/toktrie_tiktoken/Cargo.toml +15 -0
- llguidance-0.7.29/toktrie_tiktoken/LICENSE +21 -0
- llguidance-0.7.29/toktrie_tiktoken/src/lib.rs +103 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/.gitignore +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/LICENSE +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/SECURITY.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/SUPPORT.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/c_sample/Makefile +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/c_sample/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/fast_forward.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/json_schema.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/mask_plot.png +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/optimizations.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/special_tokens.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/syntax.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/docs/toktrie.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/LICENSE +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/build.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/grammars/character.json +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/grammars/json.json +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/api.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/factory.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/grammar_builder.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/lib.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/logging.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/matcher.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/output.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/regex_rewrite.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/substring.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/parser/src/tokenparser.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/plan.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/llamacpp.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/mypy.ini +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/llamatokenizer.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/llmatcher.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/parserlimits.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/data/ulysses.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/run.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/tests/test_lark.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/tests/test_ll.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/checklinks.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/disasm.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/git-version.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/rust-size.js +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/rust_size.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/scripts/update-git.py +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/LICENSE +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/README.md +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.26 → llguidance-0.7.29}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
#### [0.7.29](https://github.com/guidance-ai/llguidance/compare/v0.7.28...0.7.29) 2025-06-06
|
|
8
|
+
|
|
9
|
+
- cargo fmt
|
|
10
|
+
|
|
11
|
+
#### [0.7.28](https://github.com/guidance-ai/llguidance/compare/v0.7.27...0.7.28) 2025-06-06
|
|
12
|
+
|
|
13
|
+
- fix lexer_stack=... panic with numeric tokens [`4e91b0f`](https://github.com/guidance-ai/llguidance/commit/4e91b0fa0c03572a5fc221ac0e0b05035af9dcfa)
|
|
14
|
+
|
|
15
|
+
#### [0.7.27](https://github.com/guidance-ai/llguidance/compare/v0.7.26...0.7.27) 2025-06-04
|
|
16
|
+
|
|
17
|
+
- add toktrie_tiktoken and llguidance.tiktoken.lltokenizer_from_encoding [`#154`](https://github.com/guidance-ai/llguidance/issues/154)
|
|
18
|
+
- implement clone on StopController [`#185`](https://github.com/guidance-ai/llguidance/issues/185)
|
|
19
|
+
|
|
7
20
|
#### [0.7.26](https://github.com/guidance-ai/llguidance/compare/v0.7.25...0.7.26) 2025-05-30
|
|
8
21
|
|
|
9
22
|
- add support for & and ~ in lark regexes [`96fcee3`](https://github.com/guidance-ai/llguidance/commit/96fcee373697b57bead94d1bc06c17cf1c6134e4)
|
|
@@ -135,15 +135,30 @@ version = "0.22.1"
|
|
|
135
135
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
136
136
|
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
|
137
137
|
|
|
138
|
+
[[package]]
|
|
139
|
+
name = "bit-set"
|
|
140
|
+
version = "0.5.3"
|
|
141
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
142
|
+
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
|
|
143
|
+
dependencies = [
|
|
144
|
+
"bit-vec 0.6.3",
|
|
145
|
+
]
|
|
146
|
+
|
|
138
147
|
[[package]]
|
|
139
148
|
name = "bit-set"
|
|
140
149
|
version = "0.8.0"
|
|
141
150
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
142
151
|
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
|
143
152
|
dependencies = [
|
|
144
|
-
"bit-vec",
|
|
153
|
+
"bit-vec 0.8.0",
|
|
145
154
|
]
|
|
146
155
|
|
|
156
|
+
[[package]]
|
|
157
|
+
name = "bit-vec"
|
|
158
|
+
version = "0.6.3"
|
|
159
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
160
|
+
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
161
|
+
|
|
147
162
|
[[package]]
|
|
148
163
|
name = "bit-vec"
|
|
149
164
|
version = "0.8.0"
|
|
@@ -162,6 +177,17 @@ version = "0.2.2"
|
|
|
162
177
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
163
178
|
checksum = "3eeab4423108c5d7c744f4d234de88d18d636100093ae04caf4825134b9c3a32"
|
|
164
179
|
|
|
180
|
+
[[package]]
|
|
181
|
+
name = "bstr"
|
|
182
|
+
version = "1.12.0"
|
|
183
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
184
|
+
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
|
|
185
|
+
dependencies = [
|
|
186
|
+
"memchr",
|
|
187
|
+
"regex-automata",
|
|
188
|
+
"serde",
|
|
189
|
+
]
|
|
190
|
+
|
|
165
191
|
[[package]]
|
|
166
192
|
name = "bumpalo"
|
|
167
193
|
version = "3.17.0"
|
|
@@ -492,13 +518,24 @@ version = "0.1.10"
|
|
|
492
518
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
493
519
|
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
|
494
520
|
|
|
521
|
+
[[package]]
|
|
522
|
+
name = "fancy-regex"
|
|
523
|
+
version = "0.13.0"
|
|
524
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
525
|
+
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
|
526
|
+
dependencies = [
|
|
527
|
+
"bit-set 0.5.3",
|
|
528
|
+
"regex-automata",
|
|
529
|
+
"regex-syntax",
|
|
530
|
+
]
|
|
531
|
+
|
|
495
532
|
[[package]]
|
|
496
533
|
name = "fancy-regex"
|
|
497
534
|
version = "0.14.0"
|
|
498
535
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
499
536
|
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
|
|
500
537
|
dependencies = [
|
|
501
|
-
"bit-set",
|
|
538
|
+
"bit-set 0.8.0",
|
|
502
539
|
"regex-automata",
|
|
503
540
|
"regex-syntax",
|
|
504
541
|
]
|
|
@@ -1123,7 +1160,7 @@ dependencies = [
|
|
|
1123
1160
|
"base64 0.22.1",
|
|
1124
1161
|
"bytecount",
|
|
1125
1162
|
"email_address",
|
|
1126
|
-
"fancy-regex",
|
|
1163
|
+
"fancy-regex 0.14.0",
|
|
1127
1164
|
"fraction",
|
|
1128
1165
|
"idna",
|
|
1129
1166
|
"itoa",
|
|
@@ -1174,7 +1211,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
|
|
1174
1211
|
|
|
1175
1212
|
[[package]]
|
|
1176
1213
|
name = "llguidance"
|
|
1177
|
-
version = "0.7.
|
|
1214
|
+
version = "0.7.29"
|
|
1178
1215
|
dependencies = [
|
|
1179
1216
|
"anyhow",
|
|
1180
1217
|
"derivre",
|
|
@@ -1193,7 +1230,7 @@ dependencies = [
|
|
|
1193
1230
|
|
|
1194
1231
|
[[package]]
|
|
1195
1232
|
name = "llguidance_py"
|
|
1196
|
-
version = "0.7.
|
|
1233
|
+
version = "0.7.29"
|
|
1197
1234
|
dependencies = [
|
|
1198
1235
|
"anyhow",
|
|
1199
1236
|
"bytemuck",
|
|
@@ -1203,6 +1240,7 @@ dependencies = [
|
|
|
1203
1240
|
"serde",
|
|
1204
1241
|
"serde_json",
|
|
1205
1242
|
"toktrie_hf_tokenizers",
|
|
1243
|
+
"toktrie_tiktoken",
|
|
1206
1244
|
]
|
|
1207
1245
|
|
|
1208
1246
|
[[package]]
|
|
@@ -1865,6 +1903,12 @@ version = "0.1.24"
|
|
|
1865
1903
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1866
1904
|
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
|
1867
1905
|
|
|
1906
|
+
[[package]]
|
|
1907
|
+
name = "rustc-hash"
|
|
1908
|
+
version = "1.1.0"
|
|
1909
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1910
|
+
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|
1911
|
+
|
|
1868
1912
|
[[package]]
|
|
1869
1913
|
name = "rustix"
|
|
1870
1914
|
version = "1.0.5"
|
|
@@ -2233,6 +2277,21 @@ dependencies = [
|
|
|
2233
2277
|
"syn",
|
|
2234
2278
|
]
|
|
2235
2279
|
|
|
2280
|
+
[[package]]
|
|
2281
|
+
name = "tiktoken-rs"
|
|
2282
|
+
version = "0.7.0"
|
|
2283
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2284
|
+
checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625"
|
|
2285
|
+
dependencies = [
|
|
2286
|
+
"anyhow",
|
|
2287
|
+
"base64 0.22.1",
|
|
2288
|
+
"bstr",
|
|
2289
|
+
"fancy-regex 0.13.0",
|
|
2290
|
+
"lazy_static",
|
|
2291
|
+
"regex",
|
|
2292
|
+
"rustc-hash",
|
|
2293
|
+
]
|
|
2294
|
+
|
|
2236
2295
|
[[package]]
|
|
2237
2296
|
name = "tinystr"
|
|
2238
2297
|
version = "0.7.6"
|
|
@@ -2252,7 +2311,7 @@ dependencies = [
|
|
|
2252
2311
|
"aho-corasick",
|
|
2253
2312
|
"derive_builder",
|
|
2254
2313
|
"esaxx-rs",
|
|
2255
|
-
"fancy-regex",
|
|
2314
|
+
"fancy-regex 0.14.0",
|
|
2256
2315
|
"getrandom 0.2.15",
|
|
2257
2316
|
"itertools 0.13.0",
|
|
2258
2317
|
"lazy_static",
|
|
@@ -2336,7 +2395,7 @@ dependencies = [
|
|
|
2336
2395
|
|
|
2337
2396
|
[[package]]
|
|
2338
2397
|
name = "toktrie"
|
|
2339
|
-
version = "0.7.
|
|
2398
|
+
version = "0.7.29"
|
|
2340
2399
|
dependencies = [
|
|
2341
2400
|
"anyhow",
|
|
2342
2401
|
"bytemuck",
|
|
@@ -2347,7 +2406,7 @@ dependencies = [
|
|
|
2347
2406
|
|
|
2348
2407
|
[[package]]
|
|
2349
2408
|
name = "toktrie_hf_downloader"
|
|
2350
|
-
version = "0.7.
|
|
2409
|
+
version = "0.7.29"
|
|
2351
2410
|
dependencies = [
|
|
2352
2411
|
"anyhow",
|
|
2353
2412
|
"hf-hub",
|
|
@@ -2358,7 +2417,7 @@ dependencies = [
|
|
|
2358
2417
|
|
|
2359
2418
|
[[package]]
|
|
2360
2419
|
name = "toktrie_hf_tokenizers"
|
|
2361
|
-
version = "0.7.
|
|
2420
|
+
version = "0.7.29"
|
|
2362
2421
|
dependencies = [
|
|
2363
2422
|
"anyhow",
|
|
2364
2423
|
"log",
|
|
@@ -2368,6 +2427,18 @@ dependencies = [
|
|
|
2368
2427
|
"toktrie",
|
|
2369
2428
|
]
|
|
2370
2429
|
|
|
2430
|
+
[[package]]
|
|
2431
|
+
name = "toktrie_tiktoken"
|
|
2432
|
+
version = "0.7.29"
|
|
2433
|
+
dependencies = [
|
|
2434
|
+
"anyhow",
|
|
2435
|
+
"log",
|
|
2436
|
+
"serde",
|
|
2437
|
+
"serde_json",
|
|
2438
|
+
"tiktoken-rs",
|
|
2439
|
+
"toktrie",
|
|
2440
|
+
]
|
|
2441
|
+
|
|
2371
2442
|
[[package]]
|
|
2372
2443
|
name = "tower"
|
|
2373
2444
|
version = "0.5.2"
|
|
@@ -7,6 +7,7 @@ members = [
|
|
|
7
7
|
"toktrie",
|
|
8
8
|
"toktrie_hf_tokenizers",
|
|
9
9
|
"toktrie_hf_downloader",
|
|
10
|
+
"toktrie_tiktoken",
|
|
10
11
|
]
|
|
11
12
|
# just exclude python_ext since it doesn't build without maturin
|
|
12
13
|
default-members = [
|
|
@@ -16,6 +17,7 @@ default-members = [
|
|
|
16
17
|
"toktrie",
|
|
17
18
|
"toktrie_hf_tokenizers",
|
|
18
19
|
"toktrie_hf_downloader",
|
|
20
|
+
"toktrie_tiktoken",
|
|
19
21
|
]
|
|
20
22
|
resolver = "2"
|
|
21
23
|
|
|
@@ -36,4 +38,5 @@ opt-level = 3
|
|
|
36
38
|
toktrie = { path = "toktrie" }
|
|
37
39
|
llguidance = { path = "parser" }
|
|
38
40
|
toktrie_hf_tokenizers = { path = "toktrie_hf_tokenizers" }
|
|
39
|
-
toktrie_hf_downloader = { path = "toktrie_hf_downloader" }
|
|
41
|
+
toktrie_hf_downloader = { path = "toktrie_hf_downloader" }
|
|
42
|
+
toktrie_tiktoken = { path = "toktrie_tiktoken" }
|
|
@@ -433,6 +433,13 @@ const char *llg_stop_commit_token(struct LlgStopController *stop_ctrl,
|
|
|
433
433
|
size_t *output_len_p,
|
|
434
434
|
bool *is_stopped_p);
|
|
435
435
|
|
|
436
|
+
/**
|
|
437
|
+
* Clone the stop-sequence controller.
|
|
438
|
+
* The cloned controller shares (under mutex) regex caches if any, so that
|
|
439
|
+
* cloning is cheap.
|
|
440
|
+
*/
|
|
441
|
+
struct LlgStopController *llg_clone_stop_controller(const struct LlgStopController *stop_ctrl);
|
|
442
|
+
|
|
436
443
|
/**
|
|
437
444
|
* Free the stop-sequence controller
|
|
438
445
|
*/
|
|
@@ -53,6 +53,14 @@ macro_rules! debug {
|
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
+
macro_rules! debug_def {
|
|
57
|
+
($s:expr, $($arg:tt)*) => {
|
|
58
|
+
if cfg!(feature = "logging") && DEBUG && $s.scratch.log_enabled() {
|
|
59
|
+
eprintln!($($arg)*);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
56
64
|
macro_rules! item_trace {
|
|
57
65
|
($($arg:tt)*) => {
|
|
58
66
|
if ITEM_TRACE {
|
|
@@ -322,6 +330,8 @@ struct Scratch {
|
|
|
322
330
|
// mode, which is used for computing the token mask on the
|
|
323
331
|
// pre-lexemes.
|
|
324
332
|
definitive: bool,
|
|
333
|
+
|
|
334
|
+
log_override: bool,
|
|
325
335
|
}
|
|
326
336
|
|
|
327
337
|
#[derive(Clone)]
|
|
@@ -425,6 +435,7 @@ struct ParserState {
|
|
|
425
435
|
// history - items are not popped in definitive mode.
|
|
426
436
|
lexer_stack: Vec<LexerState>,
|
|
427
437
|
lexer_stack_top_eos: bool,
|
|
438
|
+
lexer_stack_flush_position: usize,
|
|
428
439
|
rows: Vec<Row>,
|
|
429
440
|
rows_valid_end: usize,
|
|
430
441
|
|
|
@@ -488,9 +499,14 @@ impl Scratch {
|
|
|
488
499
|
items: vec![],
|
|
489
500
|
grammar_stack: vec![],
|
|
490
501
|
definitive: true,
|
|
502
|
+
log_override: false,
|
|
491
503
|
}
|
|
492
504
|
}
|
|
493
505
|
|
|
506
|
+
fn log_enabled(&self) -> bool {
|
|
507
|
+
self.definitive || self.log_override
|
|
508
|
+
}
|
|
509
|
+
|
|
494
510
|
// Set current working Earley to empty set
|
|
495
511
|
// The set backing data is at `pos`
|
|
496
512
|
fn new_row(&mut self, pos: usize) {
|
|
@@ -523,7 +539,7 @@ impl Scratch {
|
|
|
523
539
|
}
|
|
524
540
|
|
|
525
541
|
fn push_grammar_stack(&mut self, node: GrammarStackNode) {
|
|
526
|
-
if self.
|
|
542
|
+
if self.log_enabled() {
|
|
527
543
|
debug!("push_grammar_stack: {:?}", node);
|
|
528
544
|
}
|
|
529
545
|
let ptr = GrammarStackPtr::new(self.grammar_stack.len());
|
|
@@ -543,7 +559,7 @@ impl Scratch {
|
|
|
543
559
|
} else {
|
|
544
560
|
self.items[self.row_end] = item;
|
|
545
561
|
}
|
|
546
|
-
if self.
|
|
562
|
+
if self.log_enabled() {
|
|
547
563
|
debug!(
|
|
548
564
|
" addu: {} ({})",
|
|
549
565
|
self.item_to_string(self.row_end),
|
|
@@ -650,6 +666,7 @@ impl ParserState {
|
|
|
650
666
|
limits,
|
|
651
667
|
backtrack_byte_count: 0,
|
|
652
668
|
lexer_stack_top_eos: false,
|
|
669
|
+
lexer_stack_flush_position: 0,
|
|
653
670
|
lexer_stack: vec![LexerState {
|
|
654
671
|
row_idx: 0,
|
|
655
672
|
lexer_state,
|
|
@@ -778,12 +795,6 @@ impl ParserState {
|
|
|
778
795
|
});
|
|
779
796
|
}
|
|
780
797
|
|
|
781
|
-
if set.is_zero() {
|
|
782
|
-
// nothing allowed
|
|
783
|
-
// we're going to be stopped outside - we better flush the lexer
|
|
784
|
-
let _ = self.flush_lexer();
|
|
785
|
-
}
|
|
786
|
-
|
|
787
798
|
let eos = computer.trie().eos_token();
|
|
788
799
|
if eos != INVALID_TOKEN && start.is_empty() && self.lexer_allows_eos() {
|
|
789
800
|
set.allow_token(eos);
|
|
@@ -920,7 +931,7 @@ impl ParserState {
|
|
|
920
931
|
self.stats = ParserStats::default();
|
|
921
932
|
}
|
|
922
933
|
|
|
923
|
-
fn
|
|
934
|
+
fn assert_definitive_inner(&self) {
|
|
924
935
|
assert!(self.scratch.definitive);
|
|
925
936
|
assert!(self.backtrack_byte_count == 0);
|
|
926
937
|
if self.num_rows() != self.row_infos.len() {
|
|
@@ -932,6 +943,14 @@ impl ParserState {
|
|
|
932
943
|
}
|
|
933
944
|
}
|
|
934
945
|
|
|
946
|
+
fn assert_definitive(&self) {
|
|
947
|
+
self.assert_definitive_inner();
|
|
948
|
+
|
|
949
|
+
if self.lexer_spec().can_rollback() {
|
|
950
|
+
self.check_lexer_bytes_invariant();
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
|
|
935
954
|
pub fn get_bytes(&self) -> &[u8] {
|
|
936
955
|
&self.bytes
|
|
937
956
|
}
|
|
@@ -980,7 +999,6 @@ impl ParserState {
|
|
|
980
999
|
n_bytes,
|
|
981
1000
|
self.byte_to_token_idx.len()
|
|
982
1001
|
);
|
|
983
|
-
self.check_lexer_bytes_invariant();
|
|
984
1002
|
|
|
985
1003
|
let new_len = self.byte_to_token_idx.len() - n_bytes;
|
|
986
1004
|
|
|
@@ -995,7 +1013,6 @@ impl ParserState {
|
|
|
995
1013
|
self.rows_valid_end = self.num_rows();
|
|
996
1014
|
|
|
997
1015
|
self.assert_definitive();
|
|
998
|
-
self.check_lexer_bytes_invariant();
|
|
999
1016
|
|
|
1000
1017
|
Ok(())
|
|
1001
1018
|
}
|
|
@@ -1003,6 +1020,7 @@ impl ParserState {
|
|
|
1003
1020
|
pub fn validate_tokens(&mut self, tokens: &[TokenId]) -> usize {
|
|
1004
1021
|
self.assert_definitive();
|
|
1005
1022
|
self.run_speculative("validate_tokens", |state| {
|
|
1023
|
+
state.scratch.log_override = true;
|
|
1006
1024
|
let mut applied_idx = state.byte_to_token_idx.len();
|
|
1007
1025
|
let tok_env = state.tok_env.clone();
|
|
1008
1026
|
let trie = tok_env.tok_trie();
|
|
@@ -1081,6 +1099,12 @@ impl ParserState {
|
|
|
1081
1099
|
.push(self.token_idx.try_into().unwrap());
|
|
1082
1100
|
}
|
|
1083
1101
|
}
|
|
1102
|
+
debug_def!(
|
|
1103
|
+
self,
|
|
1104
|
+
"add_numeric_token: idx={:?} bytes={:?}",
|
|
1105
|
+
idx,
|
|
1106
|
+
tok_bytes
|
|
1107
|
+
);
|
|
1084
1108
|
let ok = self.advance_parser(PreLexeme::just_idx(MatchingLexemesIdx::Single(idx)));
|
|
1085
1109
|
ensure!(
|
|
1086
1110
|
ok,
|
|
@@ -1140,9 +1164,20 @@ impl ParserState {
|
|
|
1140
1164
|
let row_idx = self.num_rows() - 1;
|
|
1141
1165
|
self.row_infos[row_idx].apply_token_idx(self.token_idx);
|
|
1142
1166
|
|
|
1167
|
+
self.lexer_stack_flush_position = 0;
|
|
1143
1168
|
let idx = self.flush_and_check_numeric(tok_id).unwrap();
|
|
1144
1169
|
self.add_numeric_token(idx, tok_bytes)?;
|
|
1145
1170
|
|
|
1171
|
+
// if flush_lexer() added a stack entry
|
|
1172
|
+
if self.lexer_stack_flush_position > 0 {
|
|
1173
|
+
// we make sure it's not on the top
|
|
1174
|
+
assert!(self.lexer_stack_flush_position + 1 < self.lexer_stack.len());
|
|
1175
|
+
// and remove it
|
|
1176
|
+
self.lexer_stack.remove(self.lexer_stack_flush_position);
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
self.assert_definitive();
|
|
1180
|
+
|
|
1146
1181
|
return Ok(0);
|
|
1147
1182
|
}
|
|
1148
1183
|
}
|
|
@@ -1298,6 +1333,8 @@ impl ParserState {
|
|
|
1298
1333
|
self.print_row(self.num_rows() - 1);
|
|
1299
1334
|
}
|
|
1300
1335
|
|
|
1336
|
+
self.assert_definitive();
|
|
1337
|
+
|
|
1301
1338
|
Ok(0)
|
|
1302
1339
|
}
|
|
1303
1340
|
|
|
@@ -1458,10 +1495,6 @@ impl ParserState {
|
|
|
1458
1495
|
// debug!("trie_started: rows={} lexer={}", self.num_rows(), self.lexer_stack.len());
|
|
1459
1496
|
self.assert_definitive();
|
|
1460
1497
|
|
|
1461
|
-
if self.lexer_spec().can_rollback() {
|
|
1462
|
-
self.check_lexer_bytes_invariant();
|
|
1463
|
-
}
|
|
1464
|
-
|
|
1465
1498
|
self.trie_lexer_stack = self.lexer_stack.len();
|
|
1466
1499
|
self.trie_grammar_stack = self.scratch.grammar_stack.len();
|
|
1467
1500
|
self.scratch.definitive = false;
|
|
@@ -1497,6 +1530,8 @@ impl ParserState {
|
|
|
1497
1530
|
self.scratch.definitive = true;
|
|
1498
1531
|
self.assert_definitive();
|
|
1499
1532
|
self.rows_valid_end = self.num_rows();
|
|
1533
|
+
self.scratch.log_override = false; // reset
|
|
1534
|
+
self.lexer_stack_flush_position = 0;
|
|
1500
1535
|
}
|
|
1501
1536
|
|
|
1502
1537
|
fn run_speculative<T>(&mut self, lbl: &str, f: impl FnOnce(&mut Self) -> T) -> T {
|
|
@@ -1655,16 +1690,19 @@ impl ParserState {
|
|
|
1655
1690
|
}
|
|
1656
1691
|
let curr = self.lexer_state();
|
|
1657
1692
|
let lex_result = self.lexer_mut().try_lexeme_end(curr.lexer_state);
|
|
1693
|
+
let prev_len = self.lexer_stack.len();
|
|
1658
1694
|
let r = self.advance_lexer_or_parser(lex_result, curr);
|
|
1695
|
+
if self.lexer_stack.len() != prev_len {
|
|
1696
|
+
assert!(self.lexer_stack.len() == prev_len + 1);
|
|
1697
|
+
assert!(prev_len > 0);
|
|
1698
|
+
self.lexer_stack_flush_position = prev_len;
|
|
1699
|
+
}
|
|
1659
1700
|
assert!(self.backtrack_byte_count == 0);
|
|
1660
1701
|
r
|
|
1661
1702
|
}
|
|
1662
1703
|
|
|
1663
1704
|
pub fn scan_eos(&mut self) -> bool {
|
|
1664
1705
|
self.assert_definitive(); // ???
|
|
1665
|
-
if self.lexer_spec().can_rollback() {
|
|
1666
|
-
self.check_lexer_bytes_invariant();
|
|
1667
|
-
}
|
|
1668
1706
|
|
|
1669
1707
|
let lexer_eos = self.lexer_allows_eos();
|
|
1670
1708
|
|
|
@@ -1693,9 +1731,7 @@ impl ParserState {
|
|
|
1693
1731
|
self.lexer_stack_top_eos = true;
|
|
1694
1732
|
}
|
|
1695
1733
|
|
|
1696
|
-
|
|
1697
|
-
self.check_lexer_bytes_invariant();
|
|
1698
|
-
}
|
|
1734
|
+
self.assert_definitive(); // ???
|
|
1699
1735
|
|
|
1700
1736
|
false
|
|
1701
1737
|
}
|
|
@@ -1762,14 +1798,13 @@ impl ParserState {
|
|
|
1762
1798
|
self.scratch.new_row(items.end);
|
|
1763
1799
|
self.scratch.push_lexeme_idx = lexeme.idx;
|
|
1764
1800
|
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
}
|
|
1801
|
+
debug_def!(
|
|
1802
|
+
self,
|
|
1803
|
+
" scan: {} at row={} token={}",
|
|
1804
|
+
self.lexer().dbg_lexeme(lexeme),
|
|
1805
|
+
row_idx,
|
|
1806
|
+
self.token_idx,
|
|
1807
|
+
);
|
|
1773
1808
|
|
|
1774
1809
|
// This loop performs the scan inference rule
|
|
1775
1810
|
// (slide 21 of Kallmeyer 2018). It is an
|
|
@@ -1890,9 +1925,7 @@ impl ParserState {
|
|
|
1890
1925
|
let item_idx = agenda_ptr;
|
|
1891
1926
|
let item = self.scratch.items[agenda_ptr];
|
|
1892
1927
|
agenda_ptr += 1;
|
|
1893
|
-
|
|
1894
|
-
debug!(" agenda: {}", self.item_to_string(item_idx));
|
|
1895
|
-
}
|
|
1928
|
+
debug_def!(self, " agenda: {}", self.item_to_string(item_idx));
|
|
1896
1929
|
|
|
1897
1930
|
let rule = item.rhs_ptr();
|
|
1898
1931
|
let after_dot = self.grammar.sym_idx_dot(rule);
|
|
@@ -1988,13 +2021,12 @@ impl ParserState {
|
|
|
1988
2021
|
.start_state(&self.scratch.push_allowed_lexemes)
|
|
1989
2022
|
};
|
|
1990
2023
|
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
}
|
|
2024
|
+
debug_def!(
|
|
2025
|
+
self,
|
|
2026
|
+
" push row: {} {:?}",
|
|
2027
|
+
self.allowed_lexemes_dbg(lex_start),
|
|
2028
|
+
grammar_id
|
|
2029
|
+
);
|
|
1998
2030
|
|
|
1999
2031
|
// Add the working row to the parser state
|
|
2000
2032
|
let idx = self.num_rows();
|
|
@@ -2042,9 +2074,7 @@ impl ParserState {
|
|
|
2042
2074
|
}
|
|
2043
2075
|
|
|
2044
2076
|
fn process_max_tokens(&mut self, ptr: GrammarStackPtr, lexeme: &Lexeme) {
|
|
2045
|
-
|
|
2046
|
-
debug!(" process_max_tokens");
|
|
2047
|
-
}
|
|
2077
|
+
debug_def!(self, " process_max_tokens");
|
|
2048
2078
|
let curr_idx = self.num_rows();
|
|
2049
2079
|
let top = &self.scratch.grammar_stack[ptr.as_usize()];
|
|
2050
2080
|
self.scratch.push_grm_top = top.back_ptr;
|
|
@@ -2118,12 +2148,13 @@ impl ParserState {
|
|
|
2118
2148
|
|
|
2119
2149
|
while grm_stack_top.as_usize() > 0 {
|
|
2120
2150
|
let grm_top = &self.scratch.grammar_stack[grm_stack_top.as_usize()];
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2151
|
+
debug_def!(
|
|
2152
|
+
self,
|
|
2153
|
+
" pop grammar_stack: top={:?}, curr={:?}, #{}",
|
|
2154
|
+
grm_top.grammar_id,
|
|
2155
|
+
grammar_ids,
|
|
2156
|
+
self.token_idx
|
|
2157
|
+
);
|
|
2127
2158
|
if grammar_ids.contains(&grm_top.grammar_id) {
|
|
2128
2159
|
// token_idx is one behind
|
|
2129
2160
|
if grm_top.token_horizon <= self.token_idx as u32 {
|
|
@@ -2132,12 +2163,12 @@ impl ParserState {
|
|
|
2132
2163
|
// We only pop one grammar off the stack.
|
|
2133
2164
|
// If more grammars have the same token horizon, they will get popped
|
|
2134
2165
|
// in the next step - we might overrun a bit.
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2166
|
+
debug_def!(
|
|
2167
|
+
self,
|
|
2168
|
+
" hit token limit horizon={} token_idx={}",
|
|
2169
|
+
grm_top.token_horizon,
|
|
2170
|
+
self.token_idx
|
|
2171
|
+
);
|
|
2141
2172
|
max_token_ptr = Some(grm_stack_top);
|
|
2142
2173
|
}
|
|
2143
2174
|
break;
|
|
@@ -2252,13 +2283,14 @@ impl ParserState {
|
|
|
2252
2283
|
.saturating_sub(1);
|
|
2253
2284
|
self.row_infos[added_row].start_byte_idx -= new_start;
|
|
2254
2285
|
}
|
|
2255
|
-
debug!(
|
|
2256
|
-
"lex: re-start {:?} (via {:?}); allowed: {}",
|
|
2257
|
-
no_hidden.lexer_state,
|
|
2258
|
-
transition_byte.map(|b| b as char),
|
|
2259
|
-
self.allowed_lexemes_dbg(added_row_start_state)
|
|
2260
|
-
);
|
|
2261
2286
|
}
|
|
2287
|
+
debug_def!(
|
|
2288
|
+
self,
|
|
2289
|
+
"lex: re-start {:?} (via {:?}); allowed: {}",
|
|
2290
|
+
no_hidden.lexer_state,
|
|
2291
|
+
transition_byte.map(|b| b as char),
|
|
2292
|
+
self.allowed_lexemes_dbg(added_row_start_state)
|
|
2293
|
+
);
|
|
2262
2294
|
|
|
2263
2295
|
no_hidden
|
|
2264
2296
|
}
|
|
@@ -2277,7 +2309,7 @@ impl ParserState {
|
|
|
2277
2309
|
|
|
2278
2310
|
let hidden_bytes = lexeme.hidden_bytes();
|
|
2279
2311
|
|
|
2280
|
-
let trace_here = self.scratch.
|
|
2312
|
+
let trace_here = self.scratch.log_enabled();
|
|
2281
2313
|
|
|
2282
2314
|
if trace_here {
|
|
2283
2315
|
trace!(
|
|
@@ -2348,7 +2380,7 @@ impl ParserState {
|
|
|
2348
2380
|
});
|
|
2349
2381
|
}
|
|
2350
2382
|
if self.scratch.definitive {
|
|
2351
|
-
self.
|
|
2383
|
+
self.assert_definitive_inner();
|
|
2352
2384
|
}
|
|
2353
2385
|
} else {
|
|
2354
2386
|
if trace_here {
|
|
@@ -2361,7 +2393,7 @@ impl ParserState {
|
|
|
2361
2393
|
byte: None,
|
|
2362
2394
|
..no_hidden
|
|
2363
2395
|
});
|
|
2364
|
-
self.
|
|
2396
|
+
self.assert_definitive_inner();
|
|
2365
2397
|
self.backtrack_byte_count = hidden_bytes.len();
|
|
2366
2398
|
} else {
|
|
2367
2399
|
// prevent any further matches in this branch
|
|
@@ -2467,9 +2499,7 @@ impl ParserState {
|
|
|
2467
2499
|
.lexer_mut()
|
|
2468
2500
|
.check_for_single_byte_lexeme(no_hidden.lexer_state, b);
|
|
2469
2501
|
if let Some(second_lexeme) = single {
|
|
2470
|
-
|
|
2471
|
-
debug!("single byte lexeme: {:?}", second_lexeme);
|
|
2472
|
-
}
|
|
2502
|
+
debug_def!(self, "single byte lexeme: {:?}", second_lexeme);
|
|
2473
2503
|
no_hidden.byte = None;
|
|
2474
2504
|
self.lexer_stack.push(no_hidden);
|
|
2475
2505
|
|
|
@@ -2488,16 +2518,15 @@ impl ParserState {
|
|
|
2488
2518
|
}
|
|
2489
2519
|
}
|
|
2490
2520
|
}
|
|
2521
|
+
debug_def!(self, " push normal: {no_hidden:?}");
|
|
2491
2522
|
self.lexer_stack.push(no_hidden);
|
|
2492
2523
|
}
|
|
2493
2524
|
if self.scratch.definitive {
|
|
2494
|
-
self.
|
|
2525
|
+
self.assert_definitive_inner();
|
|
2495
2526
|
}
|
|
2496
2527
|
true
|
|
2497
2528
|
} else {
|
|
2498
|
-
|
|
2499
|
-
debug!(" scan failed");
|
|
2500
|
-
}
|
|
2529
|
+
debug_def!(self, " scan failed");
|
|
2501
2530
|
false
|
|
2502
2531
|
}
|
|
2503
2532
|
}
|