llguidance 0.7.18__tar.gz → 0.7.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.18 → llguidance-0.7.19}/CHANGELOG.md +4 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/Cargo.lock +5 -5
- {llguidance-0.7.18 → llguidance-0.7.19}/PKG-INFO +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/Cargo.toml +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/lexer.rs +2 -2
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/regexvec.rs +7 -5
- {llguidance-0.7.18 → llguidance-0.7.19}/pyproject.toml +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/tests/test_ll.rs +49 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie_hf_tokenizers/src/lib.rs +3 -1
- {llguidance-0.7.18 → llguidance-0.7.19}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/.gitignore +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/Cargo.toml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/LICENSE +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/SECURITY.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/SUPPORT.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/c_sample/Makefile +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/c_sample/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/fast_forward.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/json_schema.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/mask_plot.png +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/optimizations.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/special_tokens.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/syntax.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/docs/toktrie.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/LICENSE +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/build.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/grammars/character.json +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/grammars/json.json +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/llguidance.h +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/api.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/parser.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/factory.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/ffi.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/grammar_builder.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/lib.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/logging.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/matcher.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/output.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/regex_rewrite.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/substring.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/parser/src/tokenparser.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/plan.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/mypy.ini +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/src/llmatcher.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/src/parserlimits.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/src/py.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/data/ulysses.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/run.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/tests/test_lark.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/tests/test_raw_parser.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/bump.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/checklinks.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/disasm.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/git-version.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/rust-size.js +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/rust_size.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/scripts/update-git.py +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/LICENSE +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/README.md +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/src/toktree.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.18 → llguidance-0.7.19}/toktrie_hf_tokenizers/LICENSE +0 -0
|
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
#### [0.7.19](https://github.com/guidance-ai/llguidance/compare/v0.7.18...0.7.19) 2025-04-24
|
|
8
|
+
|
|
9
|
+
- fix a numeric token bug [`1f59edf`](https://github.com/guidance-ai/llguidance/commit/1f59edfc49b44cfba74b2380f34874a0778d9441)
|
|
10
|
+
|
|
7
11
|
#### [0.7.18](https://github.com/guidance-ai/llguidance/compare/v0.7.17...0.7.18) 2025-04-22
|
|
8
12
|
|
|
9
13
|
- apply x-guidance also in %json{} [`2627891`](https://github.com/guidance-ai/llguidance/commit/2627891c72c7e38062cd3e052f1de146d2e21635)
|
|
@@ -1178,7 +1178,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
|
|
1178
1178
|
|
|
1179
1179
|
[[package]]
|
|
1180
1180
|
name = "llguidance"
|
|
1181
|
-
version = "0.7.
|
|
1181
|
+
version = "0.7.19"
|
|
1182
1182
|
dependencies = [
|
|
1183
1183
|
"anyhow",
|
|
1184
1184
|
"derivre",
|
|
@@ -1197,7 +1197,7 @@ dependencies = [
|
|
|
1197
1197
|
|
|
1198
1198
|
[[package]]
|
|
1199
1199
|
name = "llguidance_py"
|
|
1200
|
-
version = "0.7.
|
|
1200
|
+
version = "0.7.19"
|
|
1201
1201
|
dependencies = [
|
|
1202
1202
|
"anyhow",
|
|
1203
1203
|
"bytemuck",
|
|
@@ -2362,7 +2362,7 @@ dependencies = [
|
|
|
2362
2362
|
|
|
2363
2363
|
[[package]]
|
|
2364
2364
|
name = "toktrie"
|
|
2365
|
-
version = "0.7.
|
|
2365
|
+
version = "0.7.19"
|
|
2366
2366
|
dependencies = [
|
|
2367
2367
|
"anyhow",
|
|
2368
2368
|
"bytemuck",
|
|
@@ -2373,7 +2373,7 @@ dependencies = [
|
|
|
2373
2373
|
|
|
2374
2374
|
[[package]]
|
|
2375
2375
|
name = "toktrie_hf_downloader"
|
|
2376
|
-
version = "0.7.
|
|
2376
|
+
version = "0.7.19"
|
|
2377
2377
|
dependencies = [
|
|
2378
2378
|
"anyhow",
|
|
2379
2379
|
"hf-hub",
|
|
@@ -2384,7 +2384,7 @@ dependencies = [
|
|
|
2384
2384
|
|
|
2385
2385
|
[[package]]
|
|
2386
2386
|
name = "toktrie_hf_tokenizers"
|
|
2387
|
-
version = "0.7.
|
|
2387
|
+
version = "0.7.19"
|
|
2388
2388
|
dependencies = [
|
|
2389
2389
|
"anyhow",
|
|
2390
2390
|
"log",
|
|
@@ -221,8 +221,8 @@ impl Lexer {
|
|
|
221
221
|
if enable_logging {
|
|
222
222
|
let info = self.state_info(state);
|
|
223
223
|
debug!(
|
|
224
|
-
"lex: {:?} -{:?}-> {:?}, acpt={:?}",
|
|
225
|
-
prev, byte as char, state, info.greedy_accepting
|
|
224
|
+
"lex: {:?} -{:?}-> {:?}, acpt={:?}/{:?}",
|
|
225
|
+
prev, byte as char, state, info.greedy_accepting, info.lazy_accepting
|
|
226
226
|
);
|
|
227
227
|
}
|
|
228
228
|
|
|
@@ -407,14 +407,18 @@ impl RegexVec {
|
|
|
407
407
|
for (idx, e) in iter_state(&self.rx_sets, desc.state) {
|
|
408
408
|
// If this lexeme is not a match. (If the derivative at this point is nullable,
|
|
409
409
|
// there is a match, so if it is not nullable, there is no match.)
|
|
410
|
+
// println!("idx: {:?} e: {:?} {:?}", idx, e,self.special_token_rx);
|
|
410
411
|
if !self.exprs.is_nullable(e) {
|
|
411
412
|
// No match, so not at end of lexeme
|
|
412
413
|
all_eoi = false;
|
|
413
414
|
continue;
|
|
414
|
-
} else if Some(
|
|
415
|
+
} else if Some(self.get_rx(idx)) == self.special_token_rx {
|
|
415
416
|
// the regex is /\xFF\[[0-9]+\]/ so it's guaranteed not to conflict with anything
|
|
416
417
|
// else (starts with non-unicode byte); thus we ignore the rest of processing
|
|
418
|
+
// when has_special_token is set, we just need to make sure lazy_accepting is non-empty,
|
|
419
|
+
// the actual value is not important
|
|
417
420
|
desc.lazy_accepting = MatchingLexemes::One(idx);
|
|
421
|
+
desc.has_special_token = true;
|
|
418
422
|
return;
|
|
419
423
|
}
|
|
420
424
|
|
|
@@ -424,9 +428,6 @@ impl RegexVec {
|
|
|
424
428
|
if lazies.is_none() {
|
|
425
429
|
all_eoi = false;
|
|
426
430
|
hidden_len = self.exprs.possible_lookahead_len(e) as u32;
|
|
427
|
-
if Some(self.get_rx(idx)) == self.special_token_rx {
|
|
428
|
-
desc.has_special_token = true;
|
|
429
|
-
}
|
|
430
431
|
}
|
|
431
432
|
lazies.add(idx);
|
|
432
433
|
continue;
|
|
@@ -711,7 +712,8 @@ impl RegexVec {
|
|
|
711
712
|
|
|
712
713
|
self.lowest_match_inner(&mut res);
|
|
713
714
|
|
|
714
|
-
//
|
|
715
|
+
// println!("state {:?} desc: {:?}", state, res);
|
|
716
|
+
|
|
715
717
|
res
|
|
716
718
|
}
|
|
717
719
|
|
|
@@ -1262,3 +1262,52 @@ fn test_ll_tool_str_spec() {
|
|
|
1262
1262
|
|
|
1263
1263
|
check_lark_grammar(TOOL_STR_SPEC_GRAMMAR, tool_chk);
|
|
1264
1264
|
}
|
|
1265
|
+
|
|
1266
|
+
#[test]
|
|
1267
|
+
fn test_ll_numeric_bug() {
|
|
1268
|
+
check_lark_grammar(
|
|
1269
|
+
r#"
|
|
1270
|
+
start: text
|
|
1271
|
+
text: (text_tokens)* <[33000]> ap
|
|
1272
|
+
ap: <[33001]> (atok*)
|
|
1273
|
+
atok: <[400-410]>
|
|
1274
|
+
text_tokens: <[300-310]>
|
|
1275
|
+
"#,
|
|
1276
|
+
&[
|
|
1277
|
+
"",
|
|
1278
|
+
"<[300]>‧<[33000]>",
|
|
1279
|
+
"<[33001]>",
|
|
1280
|
+
"<[401]>‧<[402]>‧≺EOS≻",
|
|
1281
|
+
],
|
|
1282
|
+
);
|
|
1283
|
+
|
|
1284
|
+
check_lark_grammar(
|
|
1285
|
+
r#"
|
|
1286
|
+
start: text
|
|
1287
|
+
text: (text_tokens)* ( (<[33000]> ap) | (<[33002]> (atok*)) )
|
|
1288
|
+
ap: <[33001]> (atok*)
|
|
1289
|
+
atok: <[400-410]>
|
|
1290
|
+
text_tokens: <[300-310]>
|
|
1291
|
+
"#,
|
|
1292
|
+
&[
|
|
1293
|
+
"",
|
|
1294
|
+
"<[300]>‧<[33000]>",
|
|
1295
|
+
"<[33001]>",
|
|
1296
|
+
"<[401]>‧<[402]>‧≺EOS≻",
|
|
1297
|
+
],
|
|
1298
|
+
);
|
|
1299
|
+
|
|
1300
|
+
check_lark_grammar(
|
|
1301
|
+
r#"
|
|
1302
|
+
start: text
|
|
1303
|
+
text: (text_tokens)* ( (<[33000]> ap) | (<[33002]> (atok*)) )
|
|
1304
|
+
ap: <[33001,33003]> (atok*)
|
|
1305
|
+
atok: <[400-410]>
|
|
1306
|
+
text_tokens: <[300-310]>
|
|
1307
|
+
"#,
|
|
1308
|
+
&[
|
|
1309
|
+
"",
|
|
1310
|
+
"✖<[33001]>‧<[300]>‧<[33000]>✖<[33002]>✖<[300]>‧<[33001]>✖<[33002]>✖<[300]>‧<[401]>‧<[402]>‧≺EOS≻",
|
|
1311
|
+
],
|
|
1312
|
+
);
|
|
1313
|
+
}
|
|
@@ -213,7 +213,9 @@ impl ByteTokenizerEnv {
|
|
|
213
213
|
bail!("vocab size too small; {} vs {}", n_vocab, token_bytes.len());
|
|
214
214
|
}
|
|
215
215
|
while n_vocab > token_bytes.len() {
|
|
216
|
-
token_bytes.
|
|
216
|
+
let mut name = format!(".<[{}]>", token_bytes.len()).into_bytes();
|
|
217
|
+
name[0] = TokTrie::SPECIAL_TOKEN_MARKER;
|
|
218
|
+
token_bytes.push(name);
|
|
217
219
|
}
|
|
218
220
|
info.vocab_size = n_vocab as u32;
|
|
219
221
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|