llguidance 0.7.24__tar.gz → 0.7.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.24 → llguidance-0.7.26}/CHANGELOG.md +11 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/Cargo.lock +5 -5
- {llguidance-0.7.24 → llguidance-0.7.26}/PKG-INFO +2 -4
- {llguidance-0.7.24 → llguidance-0.7.26}/README.md +1 -3
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/optimizations.md +35 -33
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/syntax.md +22 -6
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/Cargo.toml +1 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/parser.rs +6 -2
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/grammar_builder.rs +3 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/ast.rs +18 -5
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/compiler.rs +30 -10
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/lexer.rs +2 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/parser.rs +27 -7
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/tokenparser.rs +7 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/pyproject.toml +1 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/_lib.pyi +8 -2
- {llguidance-0.7.24 → llguidance-0.7.26}/python/torch_tests/test_llamacpp.py +7 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/llamatokenizer.rs +22 -11
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/llinterpreter.rs +2 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/llmatcher.rs +17 -8
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/py.rs +14 -4
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/tests/test_lark.rs +134 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/tests/test_ll.rs +22 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/tokenv.rs +6 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- {llguidance-0.7.24 → llguidance-0.7.26}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/.gitignore +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/Cargo.toml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/LICENSE +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/SECURITY.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/SUPPORT.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/c_sample/Makefile +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/c_sample/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/fast_forward.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/json_schema.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/mask_plot.png +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/special_tokens.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/docs/toktrie.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/LICENSE +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/build.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/grammars/character.json +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/grammars/json.json +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/llguidance.h +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/api.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/factory.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/ffi.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/lib.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/logging.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/matcher.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/output.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/regex_rewrite.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/substring.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/plan.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/llamacpp.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/mypy.ini +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/parserlimits.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/data/ulysses.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/run.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/tests/test_raw_parser.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/bump.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/checklinks.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/disasm.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/git-version.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/rust-size.js +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/rust_size.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/scripts/update-git.py +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/LICENSE +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/README.md +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/src/toktree.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.24 → llguidance-0.7.26}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
#### [0.7.26](https://github.com/guidance-ai/llguidance/compare/v0.7.25...0.7.26) 2025-05-30
|
|
8
|
+
|
|
9
|
+
- add support for & and ~ in lark regexes [`96fcee3`](https://github.com/guidance-ai/llguidance/commit/96fcee373697b57bead94d1bc06c17cf1c6134e4)
|
|
10
|
+
- dump grammar in errors in LLInterpreter [`#183`](https://github.com/guidance-ai/llguidance/pull/183)
|
|
11
|
+
- don't check lexer bytes invariant when we cannot rollback [`ec22083`](https://github.com/guidance-ai/llguidance/commit/ec220837051513a70177974ca389b7bf387455f1)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
#### [0.7.25](https://github.com/guidance-ai/llguidance/compare/v0.7.24...0.7.25) 2025-05-28
|
|
15
|
+
|
|
16
|
+
- add parse_special=False to tokenize_str/bytes() in python [`#181`](https://github.com/guidance-ai/llguidance/pull/181)
|
|
17
|
+
|
|
7
18
|
#### [0.7.24](https://github.com/guidance-ai/llguidance/compare/v0.7.23...0.7.24) 2025-05-23
|
|
8
19
|
|
|
9
20
|
- add the sentinel token hack, fixes #180 [`#180`](https://github.com/guidance-ai/llguidance/issues/180)
|
|
@@ -1174,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
|
|
1174
1174
|
|
|
1175
1175
|
[[package]]
|
|
1176
1176
|
name = "llguidance"
|
|
1177
|
-
version = "0.7.
|
|
1177
|
+
version = "0.7.26"
|
|
1178
1178
|
dependencies = [
|
|
1179
1179
|
"anyhow",
|
|
1180
1180
|
"derivre",
|
|
@@ -1193,7 +1193,7 @@ dependencies = [
|
|
|
1193
1193
|
|
|
1194
1194
|
[[package]]
|
|
1195
1195
|
name = "llguidance_py"
|
|
1196
|
-
version = "0.7.
|
|
1196
|
+
version = "0.7.26"
|
|
1197
1197
|
dependencies = [
|
|
1198
1198
|
"anyhow",
|
|
1199
1199
|
"bytemuck",
|
|
@@ -2336,7 +2336,7 @@ dependencies = [
|
|
|
2336
2336
|
|
|
2337
2337
|
[[package]]
|
|
2338
2338
|
name = "toktrie"
|
|
2339
|
-
version = "0.7.
|
|
2339
|
+
version = "0.7.26"
|
|
2340
2340
|
dependencies = [
|
|
2341
2341
|
"anyhow",
|
|
2342
2342
|
"bytemuck",
|
|
@@ -2347,7 +2347,7 @@ dependencies = [
|
|
|
2347
2347
|
|
|
2348
2348
|
[[package]]
|
|
2349
2349
|
name = "toktrie_hf_downloader"
|
|
2350
|
-
version = "0.7.
|
|
2350
|
+
version = "0.7.26"
|
|
2351
2351
|
dependencies = [
|
|
2352
2352
|
"anyhow",
|
|
2353
2353
|
"hf-hub",
|
|
@@ -2358,7 +2358,7 @@ dependencies = [
|
|
|
2358
2358
|
|
|
2359
2359
|
[[package]]
|
|
2360
2360
|
name = "toktrie_hf_tokenizers"
|
|
2361
|
-
version = "0.7.
|
|
2361
|
+
version = "0.7.26"
|
|
2362
2362
|
dependencies = [
|
|
2363
2363
|
"anyhow",
|
|
2364
2364
|
"log",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llguidance
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.26
|
|
4
4
|
License-File: LICENSE
|
|
5
5
|
Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
|
|
6
6
|
Author: Michal Moskal
|
|
@@ -72,9 +72,7 @@ The library is currently integrated in:
|
|
|
72
72
|
- **vLLM** - [V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [V1 PR](https://github.com/vllm-project/vllm/pull/14779)
|
|
73
73
|
- [LLGTRT](https://github.com/guidance-ai/llgtrt) - OpenAI-compatible REST server using NVIDIA's [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
|
|
74
74
|
- [mistral.rs](https://github.com/EricLBuehler/mistral.rs/pull/899)
|
|
75
|
-
|
|
76
|
-
The integration is ongoing in:
|
|
77
|
-
- **onnxruntime-genai** - [draft PR](https://github.com/microsoft/onnxruntime-genai/pull/1038)
|
|
75
|
+
- [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai/pull/1381)
|
|
78
76
|
|
|
79
77
|
## Technical details
|
|
80
78
|
|
|
@@ -60,9 +60,7 @@ The library is currently integrated in:
|
|
|
60
60
|
- **vLLM** - [V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [V1 PR](https://github.com/vllm-project/vllm/pull/14779)
|
|
61
61
|
- [LLGTRT](https://github.com/guidance-ai/llgtrt) - OpenAI-compatible REST server using NVIDIA's [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
|
|
62
62
|
- [mistral.rs](https://github.com/EricLBuehler/mistral.rs/pull/899)
|
|
63
|
-
|
|
64
|
-
The integration is ongoing in:
|
|
65
|
-
- **onnxruntime-genai** - [draft PR](https://github.com/microsoft/onnxruntime-genai/pull/1038)
|
|
63
|
+
- [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai/pull/1381)
|
|
66
64
|
|
|
67
65
|
## Technical details
|
|
68
66
|
|
|
@@ -91,7 +91,6 @@ Walking the trie mostly involves successful lookups in that table,
|
|
|
91
91
|
and the derivative engine is only used when the table doesn't yet have the
|
|
92
92
|
given transition.
|
|
93
93
|
|
|
94
|
-
|
|
95
94
|
## Earley parser optimizations
|
|
96
95
|
|
|
97
96
|
- CFG rules are stored in a flat array
|
|
@@ -123,7 +122,7 @@ We thus define a series _slices_, under-approximation of such unconstrained cont
|
|
|
123
122
|
The slices are defined by regular expressions typically of the form `[...]{1,N}`
|
|
124
123
|
(that is a character class repeated up to `N` times).
|
|
125
124
|
|
|
126
|
-
For example, a good set of slices for JSON schemas is
|
|
125
|
+
For example, a good set of slices for JSON schemas is
|
|
127
126
|
|
|
128
127
|
- `[^"\\\x00-\x1F\x7F]{1,10}` (`turtle`, ` turtle`, `)!;`, `żółw`, `🐢`, etc.)
|
|
129
128
|
- `[^"\\\x00-\x1F\x7F]{1,30}` (`/////////////////`, ...)
|
|
@@ -169,6 +168,7 @@ Now, the JSON slice is contained in `C*"`,
|
|
|
169
168
|
and thus we can skip walking the trie for the slice.
|
|
170
169
|
|
|
171
170
|
Another example:
|
|
171
|
+
|
|
172
172
|
- assume schemas has `{ "type": "string", "maxLength": 20 }`
|
|
173
173
|
- so after initial quote, the lexer allows `C{0,20}"`
|
|
174
174
|
- the JSON slice `[^"\\\x00-\x1F\x7F]{1,10}` is contained in this lexeme,
|
|
@@ -176,17 +176,19 @@ Another example:
|
|
|
176
176
|
|
|
177
177
|
This optimization make the mask computation about 10x faster in [MaskBench](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench).
|
|
178
178
|
|
|
179
|
+
### Mask density statistics
|
|
180
|
+
|
|
179
181
|
The reason the optimization works, is that masks tend be either small or sliceable.
|
|
180
182
|
Here are statistics of various kinds of masks, across around 2M masks in MaskBench,
|
|
181
183
|
categorized based on how "full" the mask is and whether the slicer optimization was applied.
|
|
182
184
|
|
|
183
|
-
| Category
|
|
184
|
-
|
|
185
|
-
| 0%-2% & !sliced
|
|
186
|
-
| 2%-85% & !sliced
|
|
187
|
-
| 85%+ & !sliced
|
|
188
|
-
| 85%+ & sliced
|
|
189
|
-
| **Total**
|
|
185
|
+
| Category | % Masks | % Time | Time/Mask [us] |
|
|
186
|
+
| ---------------- | ------: | -----: | -------------: |
|
|
187
|
+
| 0%-2% & !sliced | 44.6% | 20.7% | 28 |
|
|
188
|
+
| 2%-85% & !sliced | 1.1% | 11.0% | 576 |
|
|
189
|
+
| 85%+ & !sliced | 0.5% | 13.0% | 1577 |
|
|
190
|
+
| 85%+ & sliced | 53.8% | 55.0% | 61 |
|
|
191
|
+
| **Total** | 100.0% | 100.0% | 60 |
|
|
190
192
|
|
|
191
193
|

|
|
192
194
|
|
|
@@ -195,43 +197,43 @@ and in a little over half the slicer optimization can be applied
|
|
|
195
197
|
(there are no masks under 85% full where the slicer can be applied).
|
|
196
198
|
The remaining sliver of masks are either intermediate size or large, but the slicer optimization can't be applied; they take disproportionately long time to compute.
|
|
197
199
|
|
|
200
|
+
|
|
198
201
|
### Checking regex containment
|
|
199
202
|
|
|
200
|
-
This is an under-approximation of the containment problem,
|
|
201
|
-
|
|
202
|
-
|
|
203
|
+
This is an under-approximation of the containment problem, that is it may return
|
|
204
|
+
false when the containment is actually true. If any of the "checks" fail, we
|
|
205
|
+
return false.
|
|
203
206
|
|
|
204
207
|
Prefixes of language `R`, are defined as `P(R) = { w | ∃q. wq ∈ R }`.
|
|
205
208
|
|
|
206
|
-
We need to check if regex `S` (slice) is contained in prefix of regex `L`
|
|
209
|
+
We need to check if regex `S` (slice) is contained in prefix of regex `L`
|
|
210
|
+
(lexeme): `S ⊆ P(L)`.
|
|
207
211
|
|
|
208
|
-
We check if `L` is of the form `(X{m,n} & ~E) T`, where
|
|
209
|
-
`
|
|
210
|
-
|
|
212
|
+
We check if `L` is of the form `(X{m,n} & ~E) T`, where `E` is of the form
|
|
213
|
+
`E0 | E1 | ... | Ek`. Note that: `E` can be `∅` (empty-set/no match) and `T` can
|
|
214
|
+
be `ε` (empty string).
|
|
211
215
|
|
|
212
|
-
Observe that `P(R) ⊆ P(RT)`, ie. making regex longer doesn't remove any prefixes
|
|
213
|
-
Thus, we'll be checking containment in `P(X{m,n} & ~E)`.
|
|
216
|
+
Observe that `P(R) ⊆ P(RT)`, ie. making regex longer doesn't remove any prefixes
|
|
217
|
+
(provided `T ≠ ∅`). Thus, we'll be checking containment in `P(X{m,n} & ~E)`.
|
|
214
218
|
|
|
215
|
-
We (over)estimate maximum length of `E`, let `o >= max { |w| | w ∈ E }`.
|
|
216
|
-
|
|
217
|
-
|
|
219
|
+
We (over)estimate maximum length of `E`, let `o >= max { |w| | w ∈ E }`. We
|
|
220
|
+
check that `n > o`, and that `∃v ≠ ε. v ∈ X`. In other words, we check that for
|
|
221
|
+
anything matching `Ei` and `X{m,n}` there is a proper extension of that string
|
|
222
|
+
in `X{m,n}`.
|
|
218
223
|
|
|
219
224
|
Now, we prove that `P(X{m,n} & ~E) = P(X{m,n})`.
|
|
220
225
|
|
|
221
|
-
Consider `w ∈ P(X{m,n})`. We have `wq ∈ X{m,n}` for some `q`.
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
and thus `wqv...v ∈ X{n}
|
|
225
|
-
|
|
226
|
-
and thus `wqv...v ∈ X{m,n} & ~E`,
|
|
227
|
-
and thus `w ∈ P(X{m,n} & ~E)`.
|
|
228
|
-
The other direction is trivial.
|
|
226
|
+
Consider `w ∈ P(X{m,n})`. We have `wq ∈ X{m,n}` for some `q`. If `|wq| > o`,
|
|
227
|
+
then `wq ∉ E`, and thus `wq ∈ X{m,n} & ~E`. Otherwise, `wq ∈ X{p}` for some
|
|
228
|
+
`p <= o < n`, and thus `wqv...v ∈ X{n}` for `n-p` repetitions of `v`. We also
|
|
229
|
+
have `|wqv...v| > o`, and thus `wqv...v ∉ E`, and thus `wqv...v ∈ X{m,n} & ~E`,
|
|
230
|
+
and thus `w ∈ P(X{m,n} & ~E)`. The other direction is trivial.
|
|
229
231
|
|
|
230
232
|
Now, we just need to check if `S ⊆ P(X{m,n})`.
|
|
231
233
|
|
|
232
|
-
First, we check if `S` is of the form `Y{m',n'}`.
|
|
233
|
-
|
|
234
|
-
Finally, we check if `n' <= n`.
|
|
235
|
-
|
|
234
|
+
First, we check if `S` is of the form `Y{m',n'}`. Then, we check if `Y` is
|
|
235
|
+
contained in `X` (this is a cached check using symbolic derivatives; it's
|
|
236
|
+
typically simple). Finally, we check if `n' <= n`. Note that we don't care about
|
|
237
|
+
`m` and `m'`, as we're checking for prefixes.
|
|
236
238
|
|
|
237
239
|
Also note that the upper-bound in the above calculations can be infinity.
|
|
@@ -217,6 +217,28 @@ like `<|python_tag|>`, not a string like `<function`.
|
|
|
217
217
|
|
|
218
218
|
The `llguidance.StructTag` API, [inspired](https://github.com/mlc-ai/xgrammar/blob/fd9ee31/python/xgrammar/grammar.py#L211) by XGrammar, just compiles down to the above.
|
|
219
219
|
|
|
220
|
+
### And/Not operators in regexes
|
|
221
|
+
|
|
222
|
+
The regular expressions in LLGuidance can use additional operators: `&` (and) and `~` (not).
|
|
223
|
+
They can only be used outside of the `/.../` syntax, i.e., in the Lark terminal (token) definitions.
|
|
224
|
+
The `&` operator binds tighter than `|` (alternation), so `A & B | C` means `(A & B) | C`.
|
|
225
|
+
The `~` operator binds tighter than even `+` or `*`, so `~A+` means `(~A)+`.
|
|
226
|
+
|
|
227
|
+
The negation operator `~` is particularly tricky to use right.
|
|
228
|
+
For example, this is a terminal definition that matches any list of ASCII lines,
|
|
229
|
+
but they cannot have two newlines in a row:
|
|
230
|
+
|
|
231
|
+
```lark
|
|
232
|
+
ASCII_LINES: /[a-zA-Z \n]*/ & ~/(?s:.*)\n\n(?s:.*)/
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Note that `/[a-zA-Z \n]*/ & ~/\n\n/` would mean any list of lines, also with two newlines in a row,
|
|
236
|
+
except for the exact string `\n\n`.
|
|
237
|
+
Also, `/[a-zA-Z \n]*/ & ~/(.*)\n\n(.*)/` would allow double newlines, but if there is at least two of them
|
|
238
|
+
(`/./` doesn't match newline).
|
|
239
|
+
|
|
240
|
+
These operators are sometimes expensive to use, so you should generally avoid them if alternatives exist.
|
|
241
|
+
|
|
220
242
|
### Structured %regex
|
|
221
243
|
|
|
222
244
|
LLGuidance supports [extended regex syntax](https://docs.rs/regex/latest/regex/#syntax) in `/.../`.
|
|
@@ -273,12 +295,6 @@ MULT_NUM: %regex {
|
|
|
273
295
|
}
|
|
274
296
|
```
|
|
275
297
|
|
|
276
|
-
We also plan to add `&` and `~` operators:
|
|
277
|
-
|
|
278
|
-
```lark
|
|
279
|
-
ASCII_LINES: /[a-zA-Z \n]*/ & ~/.*\n\n.*/
|
|
280
|
-
```
|
|
281
|
-
|
|
282
298
|
### Grammar options
|
|
283
299
|
|
|
284
300
|
Certain grammar options can be set by using `%llguidnace { ... }`,
|
|
@@ -1662,7 +1662,9 @@ impl ParserState {
|
|
|
1662
1662
|
|
|
1663
1663
|
pub fn scan_eos(&mut self) -> bool {
|
|
1664
1664
|
self.assert_definitive(); // ???
|
|
1665
|
-
self.
|
|
1665
|
+
if self.lexer_spec().can_rollback() {
|
|
1666
|
+
self.check_lexer_bytes_invariant();
|
|
1667
|
+
}
|
|
1666
1668
|
|
|
1667
1669
|
let lexer_eos = self.lexer_allows_eos();
|
|
1668
1670
|
|
|
@@ -1691,7 +1693,9 @@ impl ParserState {
|
|
|
1691
1693
|
self.lexer_stack_top_eos = true;
|
|
1692
1694
|
}
|
|
1693
1695
|
|
|
1694
|
-
self.
|
|
1696
|
+
if self.lexer_spec().can_rollback() {
|
|
1697
|
+
self.check_lexer_bytes_invariant();
|
|
1698
|
+
}
|
|
1695
1699
|
|
|
1696
1700
|
false
|
|
1697
1701
|
}
|
|
@@ -79,29 +79,41 @@ pub struct RuleParams(pub Vec<String>);
|
|
|
79
79
|
#[derive(Debug, Clone)]
|
|
80
80
|
pub struct TokenParams(pub Vec<String>);
|
|
81
81
|
|
|
82
|
-
/// Represents
|
|
82
|
+
/// Represents an alternative (OR) of productions in a grammar.
|
|
83
83
|
#[derive(Debug)]
|
|
84
84
|
pub struct Expansions(pub Location, pub Vec<Alias>);
|
|
85
85
|
|
|
86
86
|
impl Expansions {
|
|
87
87
|
pub fn single_atom(&self) -> Option<&Atom> {
|
|
88
|
-
if self.1.len() == 1
|
|
89
|
-
|
|
88
|
+
if self.1.len() == 1
|
|
89
|
+
&& self.1[0].conjuncts.len() == 1
|
|
90
|
+
&& self.1[0].conjuncts[0].0.len() == 1
|
|
91
|
+
{
|
|
92
|
+
Some(&self.1[0].conjuncts[0].0[0].atom)
|
|
90
93
|
} else {
|
|
91
94
|
None
|
|
92
95
|
}
|
|
93
96
|
}
|
|
97
|
+
|
|
98
|
+
pub fn take_single_atom(&mut self) -> Option<Atom> {
|
|
99
|
+
if self.single_atom().is_none() {
|
|
100
|
+
None
|
|
101
|
+
} else {
|
|
102
|
+
Some(self.1[0].conjuncts.pop().unwrap().0.pop().unwrap().atom)
|
|
103
|
+
}
|
|
104
|
+
}
|
|
94
105
|
}
|
|
95
106
|
|
|
96
107
|
/// Represents an alias in the grammar.
|
|
108
|
+
/// Each alias consists of possibly multiple conjuncts (AND).
|
|
97
109
|
#[derive(Debug)]
|
|
98
110
|
pub struct Alias {
|
|
99
|
-
pub
|
|
111
|
+
pub conjuncts: Vec<Expansion>,
|
|
100
112
|
#[allow(dead_code)]
|
|
101
113
|
pub alias: Option<String>,
|
|
102
114
|
}
|
|
103
115
|
|
|
104
|
-
/// Represents
|
|
116
|
+
/// Represents a concatenation of expressions in the grammar.
|
|
105
117
|
#[derive(Debug)]
|
|
106
118
|
pub struct Expansion(pub Vec<Expr>);
|
|
107
119
|
|
|
@@ -119,6 +131,7 @@ pub enum Atom {
|
|
|
119
131
|
Group(Expansions),
|
|
120
132
|
Maybe(Expansions),
|
|
121
133
|
Value(Value),
|
|
134
|
+
Not(Box<Atom>),
|
|
122
135
|
}
|
|
123
136
|
|
|
124
137
|
/// Represents different values in the grammar.
|
|
@@ -111,6 +111,10 @@ impl Compiler {
|
|
|
111
111
|
let id = self.do_token_expansions(expansions)?;
|
|
112
112
|
Ok(self.builder.regex.optional(id))
|
|
113
113
|
}
|
|
114
|
+
Atom::Not(inner) => {
|
|
115
|
+
let id = self.do_token_atom(*inner)?;
|
|
116
|
+
Ok(self.builder.regex.not(id))
|
|
117
|
+
}
|
|
114
118
|
Atom::Value(value) => match value {
|
|
115
119
|
Value::LiteralRange(a, b) => {
|
|
116
120
|
ensure!(
|
|
@@ -218,12 +222,18 @@ impl Compiler {
|
|
|
218
222
|
.into_iter()
|
|
219
223
|
.map(|alias| {
|
|
220
224
|
let args = alias
|
|
221
|
-
.
|
|
222
|
-
.0
|
|
225
|
+
.conjuncts
|
|
223
226
|
.into_iter()
|
|
224
|
-
.map(|
|
|
227
|
+
.map(|exp| {
|
|
228
|
+
let args = exp
|
|
229
|
+
.0
|
|
230
|
+
.into_iter()
|
|
231
|
+
.map(|e| self.do_token_expr(e))
|
|
232
|
+
.collect::<Result<Vec<_>>>()?;
|
|
233
|
+
Ok(self.builder.regex.concat(args))
|
|
234
|
+
})
|
|
225
235
|
.collect::<Result<Vec<_>>>()?;
|
|
226
|
-
Ok(self.builder.regex.
|
|
236
|
+
Ok(self.builder.regex.and(args))
|
|
227
237
|
})
|
|
228
238
|
.collect::<Result<Vec<_>>>()
|
|
229
239
|
.map_err(|e| expansions.0.augment(e))?;
|
|
@@ -265,6 +275,11 @@ impl Compiler {
|
|
|
265
275
|
let id = self.do_expansions(expansions)?;
|
|
266
276
|
Ok(self.builder.optional(id))
|
|
267
277
|
}
|
|
278
|
+
Atom::Not(_) => {
|
|
279
|
+
// treat as token
|
|
280
|
+
let rx = self.do_token_atom(expr)?;
|
|
281
|
+
Ok(self.lift_regex(rx)?)
|
|
282
|
+
}
|
|
268
283
|
Atom::Value(value) => {
|
|
269
284
|
match &value {
|
|
270
285
|
Value::Name(n) => {
|
|
@@ -363,9 +378,15 @@ impl Compiler {
|
|
|
363
378
|
let options = expansions
|
|
364
379
|
.1
|
|
365
380
|
.into_iter()
|
|
366
|
-
.map(|alias| {
|
|
381
|
+
.map(|mut alias| {
|
|
382
|
+
ensure!(
|
|
383
|
+
alias.conjuncts.len() == 1,
|
|
384
|
+
"& is only supported for tokens, not rules; try renaming the rule to UPPERCASE"
|
|
385
|
+
);
|
|
367
386
|
let args = alias
|
|
368
|
-
.
|
|
387
|
+
.conjuncts
|
|
388
|
+
.pop()
|
|
389
|
+
.unwrap()
|
|
369
390
|
.0
|
|
370
391
|
.into_iter()
|
|
371
392
|
.map(|e| self.do_expr(&loc, e))
|
|
@@ -478,8 +499,7 @@ impl Compiler {
|
|
|
478
499
|
return self.gen_grammar(g, rule.temperature, props);
|
|
479
500
|
}
|
|
480
501
|
Some(Atom::Value(Value::Json(_) | Value::NestedLark(_))) => {
|
|
481
|
-
if let Atom::Value(x) = rule.expansions.
|
|
482
|
-
{
|
|
502
|
+
if let Some(Atom::Value(x)) = rule.expansions.take_single_atom() {
|
|
483
503
|
return self.do_nested(&rule.expansions.0, x, rule.temperature, props);
|
|
484
504
|
} else {
|
|
485
505
|
unreachable!();
|
|
@@ -580,11 +600,11 @@ impl Grammar {
|
|
|
580
600
|
expansions: Expansions(
|
|
581
601
|
loc.clone(),
|
|
582
602
|
vec![Alias {
|
|
583
|
-
|
|
603
|
+
conjuncts: vec![Expansion(vec![Expr {
|
|
584
604
|
atom: Atom::Value(Value::LiteralRegex(regex.to_string(), "".to_string())),
|
|
585
605
|
op: None,
|
|
586
606
|
range: None,
|
|
587
|
-
}]),
|
|
607
|
+
}])],
|
|
588
608
|
alias: None,
|
|
589
609
|
}],
|
|
590
610
|
),
|
|
@@ -49,6 +49,7 @@ pub enum Token {
|
|
|
49
49
|
Number,
|
|
50
50
|
Newline,
|
|
51
51
|
VBar,
|
|
52
|
+
And, // &
|
|
52
53
|
SpecialToken, // <something>
|
|
53
54
|
GrammarRef, // @grammar_id or @7
|
|
54
55
|
// special
|
|
@@ -144,6 +145,7 @@ impl Token {
|
|
|
144
145
|
(Token::RBracket, "]"),
|
|
145
146
|
(Token::Tilde, "~"),
|
|
146
147
|
(Token::VBar, "|"),
|
|
148
|
+
(Token::And, "&"),
|
|
147
149
|
(Token::Equals, "="),
|
|
148
150
|
];
|
|
149
151
|
|
|
@@ -357,13 +357,20 @@ impl Parser {
|
|
|
357
357
|
|
|
358
358
|
/// Parses an alias.
|
|
359
359
|
fn parse_alias(&mut self) -> Result<Alias> {
|
|
360
|
-
let
|
|
360
|
+
let mut conjuncts = Vec::with_capacity(1);
|
|
361
|
+
loop {
|
|
362
|
+
let expansion = self.parse_expansion()?;
|
|
363
|
+
conjuncts.push(expansion);
|
|
364
|
+
if !self.match_token(Token::And) {
|
|
365
|
+
break;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
361
368
|
let alias = if self.match_token(Token::Arrow) {
|
|
362
369
|
Some(self.expect_token_val(Token::Rule)?)
|
|
363
370
|
} else {
|
|
364
371
|
None
|
|
365
372
|
};
|
|
366
|
-
Ok(Alias {
|
|
373
|
+
Ok(Alias { conjuncts, alias })
|
|
367
374
|
}
|
|
368
375
|
|
|
369
376
|
/// Parses an expansion.
|
|
@@ -376,6 +383,7 @@ impl Parser {
|
|
|
376
383
|
|| self.has_token(Token::RBrace)
|
|
377
384
|
|| self.has_token(Token::RParen)
|
|
378
385
|
|| self.has_token(Token::RBracket)
|
|
386
|
+
|| self.has_token(Token::And)
|
|
379
387
|
{
|
|
380
388
|
break;
|
|
381
389
|
}
|
|
@@ -391,7 +399,8 @@ impl Parser {
|
|
|
391
399
|
let mut range = None;
|
|
392
400
|
if let Some(op_token) = self.match_token_with_value(Token::Op) {
|
|
393
401
|
op = Some(Op(op_token.clone()));
|
|
394
|
-
} else if self.
|
|
402
|
+
} else if self.has_tokens(&[Token::Tilde, Token::Number]) {
|
|
403
|
+
self.expect_token(Token::Tilde)?;
|
|
395
404
|
let start_num = self.expect_token_val(Token::Number)?.parse::<i32>()?;
|
|
396
405
|
let end_num = if self.match_token(Token::DotDot) {
|
|
397
406
|
Some(self.expect_token_val(Token::Number)?.parse::<i32>()?)
|
|
@@ -426,16 +435,27 @@ impl Parser {
|
|
|
426
435
|
|
|
427
436
|
/// Parses an atom.
|
|
428
437
|
fn parse_atom(&mut self) -> Result<Atom> {
|
|
429
|
-
|
|
438
|
+
let mut negated = false;
|
|
439
|
+
if self.match_token(Token::Tilde) {
|
|
440
|
+
negated = true;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
let res = if self.match_token(Token::LParen) {
|
|
430
444
|
let expansions = self.parse_expansions()?;
|
|
431
445
|
self.expect_token(Token::RParen)?;
|
|
432
|
-
|
|
446
|
+
Atom::Group(expansions)
|
|
433
447
|
} else if self.match_token(Token::LBracket) {
|
|
434
448
|
let expansions = self.parse_expansions()?;
|
|
435
449
|
self.expect_token(Token::RBracket)?;
|
|
436
|
-
|
|
450
|
+
Atom::Maybe(expansions)
|
|
451
|
+
} else {
|
|
452
|
+
Atom::Value(self.parse_value()?)
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
if negated {
|
|
456
|
+
Ok(Atom::Not(Box::new(res)))
|
|
437
457
|
} else {
|
|
438
|
-
Ok(
|
|
458
|
+
Ok(res)
|
|
439
459
|
}
|
|
440
460
|
}
|
|
441
461
|
|
|
@@ -18,6 +18,7 @@ pub struct TokenParser {
|
|
|
18
18
|
pub logger: Logger,
|
|
19
19
|
pub limits: ParserLimits,
|
|
20
20
|
pub bias_computer: Arc<dyn BiasComputer>,
|
|
21
|
+
pub dbg_grammar: String,
|
|
21
22
|
last_step_stats: ParserStats,
|
|
22
23
|
max_step_stats: ParserStats,
|
|
23
24
|
eos_token: TokenId,
|
|
@@ -106,6 +107,7 @@ impl TokenParser {
|
|
|
106
107
|
stop_reason: StopReason::NotStopped,
|
|
107
108
|
error_message: None,
|
|
108
109
|
parser,
|
|
110
|
+
dbg_grammar: String::new(),
|
|
109
111
|
eos_token,
|
|
110
112
|
llm_tokens: Vec::new(),
|
|
111
113
|
llm_bytes: Vec::new(),
|
|
@@ -274,7 +276,11 @@ impl TokenParser {
|
|
|
274
276
|
}
|
|
275
277
|
|
|
276
278
|
pub fn augment_err(&self, e: impl Display) -> String {
|
|
277
|
-
format!(
|
|
279
|
+
format!(
|
|
280
|
+
"{e}\n<state>\n{}\n</state><grammar>\n{}\n</grammar>",
|
|
281
|
+
self.dump_state(),
|
|
282
|
+
self.dbg_grammar
|
|
283
|
+
)
|
|
278
284
|
}
|
|
279
285
|
|
|
280
286
|
pub fn dump_state(&self) -> String {
|
|
@@ -51,7 +51,10 @@ class LLTokenizer:
|
|
|
51
51
|
This will not necessarily match BPE.
|
|
52
52
|
"""
|
|
53
53
|
|
|
54
|
-
def tokenize_bytes(self,
|
|
54
|
+
def tokenize_bytes(self,
|
|
55
|
+
utf8bytes: bytes,
|
|
56
|
+
*,
|
|
57
|
+
parse_special: bool = False) -> List[int]:
|
|
55
58
|
"""
|
|
56
59
|
Tokenize the text as bytes.
|
|
57
60
|
This will use the underlying Python tokenizer to tokenize valid UTF8
|
|
@@ -59,7 +62,10 @@ class LLTokenizer:
|
|
|
59
62
|
few bytes.
|
|
60
63
|
"""
|
|
61
64
|
|
|
62
|
-
def tokenize_str(self,
|
|
65
|
+
def tokenize_str(self,
|
|
66
|
+
text: str,
|
|
67
|
+
*,
|
|
68
|
+
parse_special: bool = False) -> List[int]:
|
|
63
69
|
"""
|
|
64
70
|
Same as tokenize_bytes, but for strings.
|
|
65
71
|
"""
|
|
@@ -40,3 +40,10 @@ def test_llama_cpp(pytestconfig: Any) -> None:
|
|
|
40
40
|
print(toks)
|
|
41
41
|
assert len(toks) == 1
|
|
42
42
|
assert llt.decode_bytes(toks) == b"\x8b"
|
|
43
|
+
|
|
44
|
+
toks1 = llt.tokenize_str("<|eot_id|>")
|
|
45
|
+
toks0 = llt.tokenize_str("<|eot_id|>", parse_special=False)
|
|
46
|
+
assert toks1 == toks0
|
|
47
|
+
assert len(toks0) > 1
|
|
48
|
+
toks2 = llt.tokenize_str("<|eot_id|>", parse_special=True)
|
|
49
|
+
assert len(toks2) == 1
|