llguidance 0.7.21__tar.gz → 0.7.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.21 → llguidance-0.7.22}/CHANGELOG.md +5 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/Cargo.lock +5 -5
- {llguidance-0.7.21 → llguidance-0.7.22}/PKG-INFO +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/Cargo.toml +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/parser.rs +6 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/matcher.rs +4 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/tokenparser.rs +4 -2
- {llguidance-0.7.21 → llguidance-0.7.22}/pyproject.toml +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/_tokenizer.py +1 -8
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/src/py.rs +0 -7
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/tests/test_raw_parser.rs +33 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- {llguidance-0.7.21 → llguidance-0.7.22}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/.gitignore +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/Cargo.toml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/LICENSE +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/SECURITY.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/SUPPORT.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/c_sample/Makefile +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/c_sample/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/fast_forward.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/json_schema.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/mask_plot.png +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/optimizations.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/special_tokens.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/syntax.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/docs/toktrie.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/LICENSE +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/build.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/grammars/character.json +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/grammars/json.json +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/llguidance.h +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/api.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/factory.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/ffi.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/grammar_builder.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/lib.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/logging.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/output.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/regex_rewrite.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/substring.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/plan.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/mypy.ini +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/src/llmatcher.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/src/parserlimits.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/data/ulysses.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/run.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/tests/test_lark.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/tests/test_ll.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/bump.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/checklinks.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/disasm.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/git-version.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/rust-size.js +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/rust_size.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/scripts/update-git.py +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/LICENSE +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/README.md +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/src/toktree.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.21 → llguidance-0.7.22}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
#### [0.7.22](https://github.com/guidance-ai/llguidance/compare/v0.7.21...0.7.22) 2025-05-21
|
|
8
|
+
|
|
9
|
+
- Keep EOS token bytes in `TokenizerWrapper` [`#178`](https://github.com/guidance-ai/llguidance/pull/178)
|
|
10
|
+
- Stop using prefix/sentinel strings for `TokenizerWrapper` [`#175`](https://github.com/guidance-ai/llguidance/pull/175)
|
|
11
|
+
- avoid taking poisoned locks, see [`#174`](https://github.com/guidance-ai/llguidance/issues/174) [`d41aa9a`](https://github.com/guidance-ai/llguidance/commit/d41aa9a4427967708a951506b2bc0e395871b6c8); thanks [@g-eoj](https://github.com/g-eoj)
|
|
7
12
|
|
|
8
13
|
#### [0.7.21](https://github.com/guidance-ai/llguidance/compare/v0.7.20...0.7.21) 2025-05-20
|
|
9
14
|
|
|
@@ -1174,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
|
|
1174
1174
|
|
|
1175
1175
|
[[package]]
|
|
1176
1176
|
name = "llguidance"
|
|
1177
|
-
version = "0.7.
|
|
1177
|
+
version = "0.7.22"
|
|
1178
1178
|
dependencies = [
|
|
1179
1179
|
"anyhow",
|
|
1180
1180
|
"derivre",
|
|
@@ -1193,7 +1193,7 @@ dependencies = [
|
|
|
1193
1193
|
|
|
1194
1194
|
[[package]]
|
|
1195
1195
|
name = "llguidance_py"
|
|
1196
|
-
version = "0.7.
|
|
1196
|
+
version = "0.7.22"
|
|
1197
1197
|
dependencies = [
|
|
1198
1198
|
"anyhow",
|
|
1199
1199
|
"bytemuck",
|
|
@@ -2336,7 +2336,7 @@ dependencies = [
|
|
|
2336
2336
|
|
|
2337
2337
|
[[package]]
|
|
2338
2338
|
name = "toktrie"
|
|
2339
|
-
version = "0.7.
|
|
2339
|
+
version = "0.7.22"
|
|
2340
2340
|
dependencies = [
|
|
2341
2341
|
"anyhow",
|
|
2342
2342
|
"bytemuck",
|
|
@@ -2347,7 +2347,7 @@ dependencies = [
|
|
|
2347
2347
|
|
|
2348
2348
|
[[package]]
|
|
2349
2349
|
name = "toktrie_hf_downloader"
|
|
2350
|
-
version = "0.7.
|
|
2350
|
+
version = "0.7.22"
|
|
2351
2351
|
dependencies = [
|
|
2352
2352
|
"anyhow",
|
|
2353
2353
|
"hf-hub",
|
|
@@ -2358,7 +2358,7 @@ dependencies = [
|
|
|
2358
2358
|
|
|
2359
2359
|
[[package]]
|
|
2360
2360
|
name = "toktrie_hf_tokenizers"
|
|
2361
|
-
version = "0.7.
|
|
2361
|
+
version = "0.7.22"
|
|
2362
2362
|
dependencies = [
|
|
2363
2363
|
"anyhow",
|
|
2364
2364
|
"log",
|
|
@@ -86,6 +86,10 @@ impl Matcher {
|
|
|
86
86
|
self.consume_tokens(&[token])
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
pub fn test_trigger_lexer_error(&mut self) -> Result<()> {
|
|
90
|
+
self.with_inner(|inner| inner.parser.parser.test_trigger_lexer_error())
|
|
91
|
+
}
|
|
92
|
+
|
|
89
93
|
pub fn rollback(&mut self, num_tokens: usize) -> Result<()> {
|
|
90
94
|
self.with_inner(|inner| inner.parser.rollback(num_tokens))
|
|
91
95
|
}
|
|
@@ -278,8 +278,11 @@ impl TokenParser {
|
|
|
278
278
|
}
|
|
279
279
|
|
|
280
280
|
pub fn dump_state(&self) -> String {
|
|
281
|
+
// make sure not take self.parser.shared lock
|
|
282
|
+
// for example, self.parser.lexer_stats() takes it
|
|
283
|
+
// if we take it after panic, it will be poisoned
|
|
281
284
|
format!(
|
|
282
|
-
"Tokens: {}\n{} tokens, {} bytes; grm_prefix: {:?}\nFlags:{}{}\
|
|
285
|
+
"Tokens: {}\n{} tokens, {} bytes; grm_prefix: {:?}\nFlags:{}{}\nParser: {}\nStop: {}\nError: {}",
|
|
283
286
|
self.tok_trie().tokens_dbg(&self.llm_tokens),
|
|
284
287
|
self.llm_tokens.len(),
|
|
285
288
|
self.llm_bytes.len(),
|
|
@@ -294,7 +297,6 @@ impl TokenParser {
|
|
|
294
297
|
} else {
|
|
295
298
|
""
|
|
296
299
|
},
|
|
297
|
-
self.parser.lexer_stats(),
|
|
298
300
|
self.parser.stats(),
|
|
299
301
|
self.stop_reason,
|
|
300
302
|
self.error_message.as_deref().unwrap_or("None"),
|
|
@@ -23,10 +23,6 @@ class TokenizerWrapper:
|
|
|
23
23
|
gtokenizer(b"test")
|
|
24
24
|
except:
|
|
25
25
|
self._accepts_bytes = False
|
|
26
|
-
# If the tokenizer used bytes, then b"\xff" would be better (since it's invalid UTF-8)
|
|
27
|
-
# For now, we'll settle for "\x02" as assume it doesn't start any other token
|
|
28
|
-
self._prefix_string = "\x02"
|
|
29
|
-
self._prefix_tokens = self._encode_string(self._prefix_string)
|
|
30
26
|
|
|
31
27
|
def _encode_string(self, s: str) -> List[TokenId]:
|
|
32
28
|
r: List[TokenId]
|
|
@@ -37,7 +33,4 @@ class TokenizerWrapper:
|
|
|
37
33
|
return r
|
|
38
34
|
|
|
39
35
|
# required by LLTokenizer
|
|
40
|
-
|
|
41
|
-
tokens = self._encode_string(self._prefix_string + s)
|
|
42
|
-
assert tokens[: len(self._prefix_tokens)] == self._prefix_tokens
|
|
43
|
-
return tokens[len(self._prefix_tokens) :]
|
|
36
|
+
__call__ = _encode_string
|
|
@@ -241,14 +241,7 @@ impl PyTokenizer {
|
|
|
241
241
|
}
|
|
242
242
|
}
|
|
243
243
|
|
|
244
|
-
// we want decode_bytes([EOS]) etc to be empty
|
|
245
|
-
tokens[tok_eos as usize] = vec![];
|
|
246
|
-
// if let Some(t) = tok_bos {
|
|
247
|
-
// tokens[t as usize] = vec![];
|
|
248
|
-
// }
|
|
249
|
-
|
|
250
244
|
let info = TokRxInfo::new(tokens.len() as u32, tok_eos);
|
|
251
|
-
|
|
252
245
|
let tok_trie = TokTrie::from(&info, &tokens);
|
|
253
246
|
Ok(PyTokenizer {
|
|
254
247
|
tok_trie: Arc::new(tok_trie),
|
|
@@ -235,3 +235,36 @@ fn test_err_state() {
|
|
|
235
235
|
}
|
|
236
236
|
unreachable!();
|
|
237
237
|
}
|
|
238
|
+
|
|
239
|
+
#[test]
|
|
240
|
+
fn test_trigger_lexer_error() {
|
|
241
|
+
let lark = r#"
|
|
242
|
+
start: /[a-z]*/
|
|
243
|
+
"#;
|
|
244
|
+
|
|
245
|
+
let tokens = get_tok_env().tokenize("fobarbazqu");
|
|
246
|
+
let mut matcher = Matcher::new(Ok(make_parser(lark)));
|
|
247
|
+
|
|
248
|
+
for tok in tokens.iter() {
|
|
249
|
+
matcher.consume_token(*tok).unwrap();
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if let Err(e) = matcher.test_trigger_lexer_error() {
|
|
253
|
+
let e = e.to_string();
|
|
254
|
+
println!("Error: {}", e);
|
|
255
|
+
assert!(e.contains("<state>"));
|
|
256
|
+
assert!(e.contains("synthetic error"));
|
|
257
|
+
} else {
|
|
258
|
+
unreachable!();
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// now all calls should return the same error
|
|
262
|
+
if let Err(e) = matcher.consume_token(123) {
|
|
263
|
+
let e = e.to_string();
|
|
264
|
+
println!("Error: {}", e);
|
|
265
|
+
assert!(e.contains("<state>"));
|
|
266
|
+
assert!(e.contains("synthetic error"));
|
|
267
|
+
} else {
|
|
268
|
+
unreachable!();
|
|
269
|
+
}
|
|
270
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|