llguidance 0.7.19__tar.gz → 0.7.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.19 → llguidance-0.7.21}/.github/workflows/rust.yml +17 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/CHANGELOG.md +12 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/Cargo.lock +17 -43
- {llguidance-0.7.19 → llguidance-0.7.21}/PKG-INFO +3 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/README.md +2 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/Cargo.toml +2 -2
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/build.rs +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/llguidance.h +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/constraint.rs +10 -4
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/ffi.rs +7 -4
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/formats.rs +9 -3
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/matcher.rs +4 -3
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/tokenparser.rs +37 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/pyproject.toml +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/cli.py +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/hf.py +3 -3
- {llguidance-0.7.19 → llguidance-0.7.21}/python/torch_tests/test_hf.py +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/tests/test_lark.rs +34 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/tests/test_raw_parser.rs +29 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/install-deps.sh +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/toktree.rs +12 -10
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie_hf_tokenizers/Cargo.toml +5 -2
- {llguidance-0.7.19 → llguidance-0.7.21}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/.gitignore +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/Cargo.toml +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/LICENSE +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/SECURITY.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/SUPPORT.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/c_sample/Makefile +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/c_sample/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/fast_forward.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/json_schema.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/mask_plot.png +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/optimizations.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/special_tokens.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/syntax.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/docs/toktrie.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/LICENSE +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/grammars/character.json +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/grammars/json.json +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/api.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/parser.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/factory.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/grammar_builder.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/lib.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/logging.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/output.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/regex_rewrite.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/substring.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/plan.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/mypy.ini +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/src/llmatcher.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/src/parserlimits.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/src/py.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/data/ulysses.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/run.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/tests/test_ll.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/bump.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/checklinks.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/disasm.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/git-version.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/rust-size.js +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/rust_size.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/scripts/update-git.py +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/LICENSE +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/README.md +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.19 → llguidance-0.7.21}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -52,3 +52,20 @@ jobs:
|
|
|
52
52
|
with:
|
|
53
53
|
name: wheels
|
|
54
54
|
path: target/wheels/*
|
|
55
|
+
|
|
56
|
+
msrv:
|
|
57
|
+
name: MSRV Check
|
|
58
|
+
|
|
59
|
+
runs-on: ubuntu-latest
|
|
60
|
+
|
|
61
|
+
steps:
|
|
62
|
+
- uses: actions/checkout@v4
|
|
63
|
+
|
|
64
|
+
- name: Set up Rust
|
|
65
|
+
uses: dtolnay/rust-toolchain@1.80.0
|
|
66
|
+
with:
|
|
67
|
+
components: clippy
|
|
68
|
+
|
|
69
|
+
- name: Build parser
|
|
70
|
+
run: cargo build --verbose --locked
|
|
71
|
+
working-directory: parser
|
|
@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
|
|
8
|
+
#### [0.7.21](https://github.com/guidance-ai/llguidance/compare/v0.7.20...0.7.21) 2025-05-20
|
|
9
|
+
|
|
10
|
+
- include parser state in errors [`82e34da`](https://github.com/guidance-ai/llguidance/commit/82e34da704d22f04979d8cbc54a0ac00885a277d)
|
|
11
|
+
- tighten email format in JSON schema [`7454ea9`](https://github.com/guidance-ai/llguidance/commit/7454ea9df958f8bcc42e6bb986d6de397de65b3e)
|
|
12
|
+
|
|
13
|
+
#### [0.7.20](https://github.com/guidance-ai/llguidance/compare/v0.7.19...0.7.20) 2025-05-15
|
|
14
|
+
|
|
15
|
+
- use fancy-regex instead of onig as tokenizers regex library [`#172`](https://github.com/guidance-ai/llguidance/pull/172)
|
|
16
|
+
- fixes compilation on GCC 15, thanks [@Slowki](https://github.com/Slowki)
|
|
17
|
+
- msrv 1.80 support (incl. derivre bump) [`c89e386`](https://github.com/guidance-ai/llguidance/commit/c89e386685cd911a89fd47df225de88f88c10883), thank you [@nteodosio](https://github.com/nteodosio) for initial [PR](https://github.com/guidance-ai/llguidance/pull/170)!
|
|
18
|
+
|
|
7
19
|
#### [0.7.19](https://github.com/guidance-ai/llguidance/compare/v0.7.18...0.7.19) 2025-04-24
|
|
8
20
|
|
|
9
21
|
- fix a numeric token bug [`1f59edf`](https://github.com/guidance-ai/llguidance/commit/1f59edfc49b44cfba74b2380f34874a0778d9441)
|
|
@@ -150,12 +150,6 @@ version = "0.8.0"
|
|
|
150
150
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
151
151
|
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
|
152
152
|
|
|
153
|
-
[[package]]
|
|
154
|
-
name = "bitflags"
|
|
155
|
-
version = "1.3.2"
|
|
156
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
157
|
-
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
|
158
|
-
|
|
159
153
|
[[package]]
|
|
160
154
|
name = "bitflags"
|
|
161
155
|
version = "2.9.0"
|
|
@@ -401,9 +395,9 @@ dependencies = [
|
|
|
401
395
|
|
|
402
396
|
[[package]]
|
|
403
397
|
name = "derivre"
|
|
404
|
-
version = "0.3.
|
|
398
|
+
version = "0.3.8"
|
|
405
399
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
406
|
-
checksum = "
|
|
400
|
+
checksum = "786c7c65c4ef0c7deb05de3005e01991612a8f09fe0844fc0969c68b90468ba8"
|
|
407
401
|
dependencies = [
|
|
408
402
|
"ahash",
|
|
409
403
|
"anyhow",
|
|
@@ -672,8 +666,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
672
666
|
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
|
673
667
|
dependencies = [
|
|
674
668
|
"cfg-if",
|
|
669
|
+
"js-sys",
|
|
675
670
|
"libc",
|
|
676
671
|
"wasi 0.11.0+wasi-snapshot-preview1",
|
|
672
|
+
"wasm-bindgen",
|
|
677
673
|
]
|
|
678
674
|
|
|
679
675
|
[[package]]
|
|
@@ -1160,7 +1156,7 @@ version = "0.1.3"
|
|
|
1160
1156
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1161
1157
|
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
|
|
1162
1158
|
dependencies = [
|
|
1163
|
-
"bitflags
|
|
1159
|
+
"bitflags",
|
|
1164
1160
|
"libc",
|
|
1165
1161
|
]
|
|
1166
1162
|
|
|
@@ -1178,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
|
|
1178
1174
|
|
|
1179
1175
|
[[package]]
|
|
1180
1176
|
name = "llguidance"
|
|
1181
|
-
version = "0.7.
|
|
1177
|
+
version = "0.7.21"
|
|
1182
1178
|
dependencies = [
|
|
1183
1179
|
"anyhow",
|
|
1184
1180
|
"derivre",
|
|
@@ -1197,7 +1193,7 @@ dependencies = [
|
|
|
1197
1193
|
|
|
1198
1194
|
[[package]]
|
|
1199
1195
|
name = "llguidance_py"
|
|
1200
|
-
version = "0.7.
|
|
1196
|
+
version = "0.7.21"
|
|
1201
1197
|
dependencies = [
|
|
1202
1198
|
"anyhow",
|
|
1203
1199
|
"bytemuck",
|
|
@@ -1446,35 +1442,13 @@ version = "1.21.3"
|
|
|
1446
1442
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1447
1443
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
1448
1444
|
|
|
1449
|
-
[[package]]
|
|
1450
|
-
name = "onig"
|
|
1451
|
-
version = "6.4.0"
|
|
1452
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1453
|
-
checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
|
|
1454
|
-
dependencies = [
|
|
1455
|
-
"bitflags 1.3.2",
|
|
1456
|
-
"libc",
|
|
1457
|
-
"once_cell",
|
|
1458
|
-
"onig_sys",
|
|
1459
|
-
]
|
|
1460
|
-
|
|
1461
|
-
[[package]]
|
|
1462
|
-
name = "onig_sys"
|
|
1463
|
-
version = "69.8.1"
|
|
1464
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1465
|
-
checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
|
|
1466
|
-
dependencies = [
|
|
1467
|
-
"cc",
|
|
1468
|
-
"pkg-config",
|
|
1469
|
-
]
|
|
1470
|
-
|
|
1471
1445
|
[[package]]
|
|
1472
1446
|
name = "openssl"
|
|
1473
1447
|
version = "0.10.72"
|
|
1474
1448
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1475
1449
|
checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da"
|
|
1476
1450
|
dependencies = [
|
|
1477
|
-
"bitflags
|
|
1451
|
+
"bitflags",
|
|
1478
1452
|
"cfg-if",
|
|
1479
1453
|
"foreign-types",
|
|
1480
1454
|
"libc",
|
|
@@ -1747,7 +1721,7 @@ version = "0.5.11"
|
|
|
1747
1721
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1748
1722
|
checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3"
|
|
1749
1723
|
dependencies = [
|
|
1750
|
-
"bitflags
|
|
1724
|
+
"bitflags",
|
|
1751
1725
|
]
|
|
1752
1726
|
|
|
1753
1727
|
[[package]]
|
|
@@ -1897,7 +1871,7 @@ version = "1.0.5"
|
|
|
1897
1871
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1898
1872
|
checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf"
|
|
1899
1873
|
dependencies = [
|
|
1900
|
-
"bitflags
|
|
1874
|
+
"bitflags",
|
|
1901
1875
|
"errno",
|
|
1902
1876
|
"libc",
|
|
1903
1877
|
"linux-raw-sys",
|
|
@@ -1992,7 +1966,7 @@ version = "2.11.1"
|
|
|
1992
1966
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1993
1967
|
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
|
|
1994
1968
|
dependencies = [
|
|
1995
|
-
"bitflags
|
|
1969
|
+
"bitflags",
|
|
1996
1970
|
"core-foundation",
|
|
1997
1971
|
"core-foundation-sys",
|
|
1998
1972
|
"libc",
|
|
@@ -2185,7 +2159,7 @@ version = "0.6.1"
|
|
|
2185
2159
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2186
2160
|
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
|
|
2187
2161
|
dependencies = [
|
|
2188
|
-
"bitflags
|
|
2162
|
+
"bitflags",
|
|
2189
2163
|
"core-foundation",
|
|
2190
2164
|
"system-configuration-sys",
|
|
2191
2165
|
]
|
|
@@ -2278,13 +2252,13 @@ dependencies = [
|
|
|
2278
2252
|
"aho-corasick",
|
|
2279
2253
|
"derive_builder",
|
|
2280
2254
|
"esaxx-rs",
|
|
2255
|
+
"fancy-regex",
|
|
2281
2256
|
"getrandom 0.2.15",
|
|
2282
2257
|
"itertools 0.13.0",
|
|
2283
2258
|
"lazy_static",
|
|
2284
2259
|
"log",
|
|
2285
2260
|
"macro_rules_attribute",
|
|
2286
2261
|
"monostate",
|
|
2287
|
-
"onig",
|
|
2288
2262
|
"paste",
|
|
2289
2263
|
"rand",
|
|
2290
2264
|
"rayon",
|
|
@@ -2362,7 +2336,7 @@ dependencies = [
|
|
|
2362
2336
|
|
|
2363
2337
|
[[package]]
|
|
2364
2338
|
name = "toktrie"
|
|
2365
|
-
version = "0.7.
|
|
2339
|
+
version = "0.7.21"
|
|
2366
2340
|
dependencies = [
|
|
2367
2341
|
"anyhow",
|
|
2368
2342
|
"bytemuck",
|
|
@@ -2373,7 +2347,7 @@ dependencies = [
|
|
|
2373
2347
|
|
|
2374
2348
|
[[package]]
|
|
2375
2349
|
name = "toktrie_hf_downloader"
|
|
2376
|
-
version = "0.7.
|
|
2350
|
+
version = "0.7.21"
|
|
2377
2351
|
dependencies = [
|
|
2378
2352
|
"anyhow",
|
|
2379
2353
|
"hf-hub",
|
|
@@ -2384,7 +2358,7 @@ dependencies = [
|
|
|
2384
2358
|
|
|
2385
2359
|
[[package]]
|
|
2386
2360
|
name = "toktrie_hf_tokenizers"
|
|
2387
|
-
version = "0.7.
|
|
2361
|
+
version = "0.7.21"
|
|
2388
2362
|
dependencies = [
|
|
2389
2363
|
"anyhow",
|
|
2390
2364
|
"log",
|
|
@@ -2987,7 +2961,7 @@ version = "0.39.0"
|
|
|
2987
2961
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2988
2962
|
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
|
|
2989
2963
|
dependencies = [
|
|
2990
|
-
"bitflags
|
|
2964
|
+
"bitflags",
|
|
2991
2965
|
]
|
|
2992
2966
|
|
|
2993
2967
|
[[package]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llguidance
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.21
|
|
4
4
|
License-File: LICENSE
|
|
5
5
|
Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
|
|
6
6
|
Author: Michal Moskal
|
|
@@ -20,6 +20,7 @@ Project-URL: issue_tracker, https://github.com/microsoft/llguidance/issues
|
|
|
20
20
|
|
|
21
21
|
---
|
|
22
22
|
|
|
23
|
+
* 2025-05-20 LLGuidance [shipped](https://x.com/OpenAIDevs/status/1924915341052019166) in [OpenAI](https://x.com/OpenAIDevs/status/1924915343677653014) for JSON Schema
|
|
23
24
|
* 2025-04-11 integration [merged](https://github.com/chromium/chromium/commit/07ca6337c2f714ba0477202414bd2b1692e70594) into Chromium
|
|
24
25
|
* 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
|
|
25
26
|
* 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
|
|
@@ -59,6 +60,7 @@ The library can be used from:
|
|
|
59
60
|
|
|
60
61
|
The library is currently integrated in:
|
|
61
62
|
- [Guidance](https://github.com/guidance-ai/guidance) - library for interacting with LLMs
|
|
63
|
+
- [OpenAI models](https://x.com/OpenAIDevs/status/1924915343677653014) - LLGuidance powers [Structured Output](https://platform.openai.com/docs/guides/structured-outputs) (JSON Schema only)
|
|
62
64
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp/pull/10224) -
|
|
63
65
|
available via `-DLLAMA_LLGUIDANCE=ON` option for `cmake`;
|
|
64
66
|
llama.cpp can be also used Guidance Python package
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
11
|
+
* 2025-05-20 LLGuidance [shipped](https://x.com/OpenAIDevs/status/1924915341052019166) in [OpenAI](https://x.com/OpenAIDevs/status/1924915343677653014) for JSON Schema
|
|
11
12
|
* 2025-04-11 integration [merged](https://github.com/chromium/chromium/commit/07ca6337c2f714ba0477202414bd2b1692e70594) into Chromium
|
|
12
13
|
* 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
|
|
13
14
|
* 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
|
|
@@ -47,6 +48,7 @@ The library can be used from:
|
|
|
47
48
|
|
|
48
49
|
The library is currently integrated in:
|
|
49
50
|
- [Guidance](https://github.com/guidance-ai/guidance) - library for interacting with LLMs
|
|
51
|
+
- [OpenAI models](https://x.com/OpenAIDevs/status/1924915343677653014) - LLGuidance powers [Structured Output](https://platform.openai.com/docs/guides/structured-outputs) (JSON Schema only)
|
|
50
52
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp/pull/10224) -
|
|
51
53
|
available via `-DLLAMA_LLGUIDANCE=ON` option for `cmake`;
|
|
52
54
|
llama.cpp can be also used Guidance Python package
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "llguidance"
|
|
3
|
-
version = "0.7.
|
|
3
|
+
version = "0.7.21"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
description = "Super-fast Structured Outputs"
|
|
@@ -8,7 +8,7 @@ repository = "https://github.com/guidance-ai/llguidance"
|
|
|
8
8
|
|
|
9
9
|
[dependencies]
|
|
10
10
|
toktrie = { workspace = true }
|
|
11
|
-
derivre = { version = "=0.3.
|
|
11
|
+
derivre = { version = "=0.3.8", default-features = false, features = ["compress"] }
|
|
12
12
|
serde = { version = "1.0.217", features = ["derive"] }
|
|
13
13
|
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
|
14
14
|
anyhow = "1.0.95"
|
|
@@ -494,7 +494,7 @@ int32_t llg_matcher_compute_mask(struct LlgMatcher *matcher);
|
|
|
494
494
|
const uint32_t *llg_matcher_get_mask(struct LlgMatcher *matcher);
|
|
495
495
|
|
|
496
496
|
/**
|
|
497
|
-
* Return
|
|
497
|
+
* Return the size of the mask in bytes.
|
|
498
498
|
*/
|
|
499
499
|
size_t llg_matcher_get_mask_byte_size(struct LlgMatcher *matcher);
|
|
500
500
|
|
|
@@ -137,7 +137,7 @@ impl Constraint {
|
|
|
137
137
|
/// The splice is never returned when ff_tokens are disabled in InferenceCapabilities.
|
|
138
138
|
/// After this returns, commit_token() must be called with the sampled token if any.
|
|
139
139
|
pub fn compute_mask(&mut self) -> Result<&StepResult> {
|
|
140
|
-
|
|
140
|
+
self.catch_unwind(|s| s.compute_mask_inner())
|
|
141
141
|
.map(|_| &self.last_res)
|
|
142
142
|
}
|
|
143
143
|
|
|
@@ -185,6 +185,14 @@ impl Constraint {
|
|
|
185
185
|
self.parser.validate_tokens_raw(tokens)
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
+
fn catch_unwind<F, R>(&mut self, f: F) -> Result<R>
|
|
189
|
+
where
|
|
190
|
+
F: FnOnce(&mut Self) -> Result<R>,
|
|
191
|
+
{
|
|
192
|
+
panic_utils::catch_unwind(std::panic::AssertUnwindSafe(|| f(self)))
|
|
193
|
+
.map_err(|e| anyhow::anyhow!(self.parser.augment_err(e)))
|
|
194
|
+
}
|
|
195
|
+
|
|
188
196
|
/// commit_token() is a top-level method in this file and is called by
|
|
189
197
|
/// the LLInterpreter::commit_token().
|
|
190
198
|
///
|
|
@@ -194,9 +202,7 @@ impl Constraint {
|
|
|
194
202
|
/// It only returns 'STOP' if previous compute_mask() already returned 'STOP'
|
|
195
203
|
/// (in which case there's little point calling commit_token()).
|
|
196
204
|
pub fn commit_token(&mut self, sampled_token: Option<TokenId>) -> Result<CommitResult> {
|
|
197
|
-
|
|
198
|
-
self.commit_token_inner(sampled_token)
|
|
199
|
-
}))
|
|
205
|
+
self.catch_unwind(|s| s.commit_token_inner(sampled_token))
|
|
200
206
|
}
|
|
201
207
|
|
|
202
208
|
fn commit_token_inner(&mut self, sampled_token: Option<TokenId>) -> Result<CommitResult> {
|
|
@@ -554,9 +554,12 @@ pub extern "C" fn llg_get_temperature(cc: &LlgConstraint) -> f32 {
|
|
|
554
554
|
/// Check if constraint is stopped (cannot be extended further).
|
|
555
555
|
#[no_mangle]
|
|
556
556
|
pub extern "C" fn llg_is_stopped(cc: &LlgConstraint) -> bool {
|
|
557
|
-
cc.constraint
|
|
558
|
-
.
|
|
559
|
-
|
|
557
|
+
if let Some(c) = &cc.constraint {
|
|
558
|
+
c.step_result().is_stop()
|
|
559
|
+
} else {
|
|
560
|
+
// if there is no constraint, we consider it stopped
|
|
561
|
+
true
|
|
562
|
+
}
|
|
560
563
|
}
|
|
561
564
|
|
|
562
565
|
/// Compute mask for the next token sampling
|
|
@@ -1102,7 +1105,7 @@ pub extern "C" fn llg_matcher_get_mask(matcher: &mut LlgMatcher) -> *const u32 {
|
|
|
1102
1105
|
.map_or(std::ptr::null(), |m| m.as_ptr())
|
|
1103
1106
|
}
|
|
1104
1107
|
|
|
1105
|
-
/// Return
|
|
1108
|
+
/// Return the size of the mask in bytes.
|
|
1106
1109
|
#[no_mangle]
|
|
1107
1110
|
pub extern "C" fn llg_matcher_get_mask_byte_size(matcher: &mut LlgMatcher) -> usize {
|
|
1108
1111
|
matcher.mask_elts() * 4
|
|
@@ -27,9 +27,15 @@ pub fn lookup_format(name: &str) -> Option<&str> {
|
|
|
27
27
|
"duration" => {
|
|
28
28
|
r"P(?:(?P<dur_date>(?:(?P<dur_year>[0-9]+Y(?:[0-9]+M(?:[0-9]+D)?)?)|(?P<dur_month>[0-9]+M(?:[0-9]+D)?)|(?P<dur_day>[0-9]+D))(?:T(?:(?P<dur_hour>[0-9]+H(?:[0-9]+M(?:[0-9]+S)?)?)|(?P<dur_minute>[0-9]+M(?:[0-9]+S)?)|(?P<dur_second>[0-9]+S)))?)|(?P<dur_time>T(?:(?P<dur_hour2>[0-9]+H(?:[0-9]+M(?:[0-9]+S)?)?)|(?P<dur_minute2>[0-9]+M(?:[0-9]+S)?)|(?P<dur_second2>[0-9]+S)))|(?P<dur_week>[0-9]+W))"
|
|
29
29
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
// https://www.rfc-editor.org/rfc/inline-errata/rfc5321.html 4.1.2 -> Mailbox
|
|
31
|
+
"email" => concat!(
|
|
32
|
+
r"(?P<local_part>(?P<dot_string>[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+(\.[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+)*))",
|
|
33
|
+
r"@(",
|
|
34
|
+
r"(?P<domain>(?P<sub_domain>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)(\.(?P<sub_domain2>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?))*)",
|
|
35
|
+
r"|",
|
|
36
|
+
r"\[(?P<ipv4>((([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9])))\.){3}(([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9]))))\]",
|
|
37
|
+
r")"
|
|
38
|
+
),
|
|
33
39
|
"hostname" => {
|
|
34
40
|
r"[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"
|
|
35
41
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use anyhow::{anyhow, ensure, Result};
|
|
1
|
+
use anyhow::{anyhow, bail, ensure, Result};
|
|
2
2
|
use toktrie::{SimpleVob, TokEnv, TokenId};
|
|
3
3
|
|
|
4
4
|
use crate::{api::StopReason, earley::ParserStats, panic_utils, TokenParser};
|
|
@@ -48,8 +48,9 @@ impl Matcher {
|
|
|
48
48
|
match r {
|
|
49
49
|
Ok(r) => Ok(r),
|
|
50
50
|
Err(e) => {
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
let msg = inner.parser.augment_err(e);
|
|
52
|
+
self.0 = MatcherState::Error(msg.clone());
|
|
53
|
+
bail!(msg);
|
|
53
54
|
}
|
|
54
55
|
}
|
|
55
56
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use std::{hint::black_box, panic::AssertUnwindSafe, sync::Arc, time::Duration};
|
|
1
|
+
use std::{fmt::Display, hint::black_box, panic::AssertUnwindSafe, sync::Arc, time::Duration};
|
|
2
2
|
|
|
3
3
|
use crate::{
|
|
4
4
|
api::{GrammarInit, ParserLimits, StopReason},
|
|
@@ -22,6 +22,9 @@ pub struct TokenParser {
|
|
|
22
22
|
max_step_stats: ParserStats,
|
|
23
23
|
eos_token: TokenId,
|
|
24
24
|
|
|
25
|
+
had_rollback: bool,
|
|
26
|
+
had_backtrack: bool,
|
|
27
|
+
|
|
25
28
|
is_accepting_cache: Option<bool>,
|
|
26
29
|
ff_tokens_cache: Option<(Vec<TokenId>, Vec<u8>)>,
|
|
27
30
|
stop_reason: StopReason,
|
|
@@ -110,6 +113,8 @@ impl TokenParser {
|
|
|
110
113
|
max_tokens_total: max_tokens,
|
|
111
114
|
last_bias_time: Duration::from_secs(0),
|
|
112
115
|
is_fresh: true,
|
|
116
|
+
had_backtrack: false,
|
|
117
|
+
had_rollback: false,
|
|
113
118
|
})
|
|
114
119
|
}
|
|
115
120
|
|
|
@@ -268,6 +273,34 @@ impl TokenParser {
|
|
|
268
273
|
res_prompt
|
|
269
274
|
}
|
|
270
275
|
|
|
276
|
+
pub fn augment_err(&self, e: impl Display) -> String {
|
|
277
|
+
format!("{e}\n<state>\n{}\n</state>", self.dump_state())
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
pub fn dump_state(&self) -> String {
|
|
281
|
+
format!(
|
|
282
|
+
"Tokens: {}\n{} tokens, {} bytes; grm_prefix: {:?}\nFlags:{}{}\nLexer: {}\nParser: {}\nStop: {}\nError: {}",
|
|
283
|
+
self.tok_trie().tokens_dbg(&self.llm_tokens),
|
|
284
|
+
self.llm_tokens.len(),
|
|
285
|
+
self.llm_bytes.len(),
|
|
286
|
+
String::from_utf8_lossy(&self.grm_prefix),
|
|
287
|
+
if self.had_backtrack {
|
|
288
|
+
" had_backtrack"
|
|
289
|
+
} else {
|
|
290
|
+
""
|
|
291
|
+
},
|
|
292
|
+
if self.had_rollback {
|
|
293
|
+
" had_rollback"
|
|
294
|
+
} else {
|
|
295
|
+
""
|
|
296
|
+
},
|
|
297
|
+
self.parser.lexer_stats(),
|
|
298
|
+
self.parser.stats(),
|
|
299
|
+
self.stop_reason,
|
|
300
|
+
self.error_message.as_deref().unwrap_or("None"),
|
|
301
|
+
)
|
|
302
|
+
}
|
|
303
|
+
|
|
271
304
|
fn clear_caches(&mut self) {
|
|
272
305
|
self.is_accepting_cache = None;
|
|
273
306
|
self.ff_tokens_cache = None;
|
|
@@ -332,6 +365,8 @@ impl TokenParser {
|
|
|
332
365
|
// this will fail in case we're in error state or not initialized
|
|
333
366
|
self.check_initialized("rollback")?;
|
|
334
367
|
|
|
368
|
+
self.had_rollback = true;
|
|
369
|
+
|
|
335
370
|
let new_len = self.llm_tokens.len() - n_tokens;
|
|
336
371
|
let mut bytes_to_drop = 0;
|
|
337
372
|
for tok in &self.llm_tokens[new_len..] {
|
|
@@ -522,6 +557,7 @@ impl TokenParser {
|
|
|
522
557
|
self.llm_bytes.extend_from_slice(tok_bytes);
|
|
523
558
|
|
|
524
559
|
if backtrack_bytes0 != 0 {
|
|
560
|
+
self.had_backtrack = true;
|
|
525
561
|
let mut backtrack_bytes: isize = backtrack_bytes0.try_into().unwrap();
|
|
526
562
|
let mut backtrack_tokens = 0;
|
|
527
563
|
while backtrack_bytes > 0 {
|
|
@@ -13,11 +13,11 @@ def from_tokenizer(
|
|
|
13
13
|
"""
|
|
14
14
|
Create a new tokenizer from a fast Hugging Face tokenizer.
|
|
15
15
|
This is an expensive operation (~1s), so the result should be cached.
|
|
16
|
-
It
|
|
17
|
-
|
|
16
|
+
It currently only supports fast tokenizers, which are then handled
|
|
17
|
+
by the Rust tokenizers library.
|
|
18
18
|
|
|
19
19
|
Args:
|
|
20
|
-
hf_tokenizer: transformers.
|
|
20
|
+
hf_tokenizer: transformers.PreTrainedTokenizerFast - the tokenizer to wrap
|
|
21
21
|
n_vocab: int - override the size of the vocabulary
|
|
22
22
|
eos_token: int - override the EOS token
|
|
23
23
|
slices: List[str] - configuration for slicer optimization; pass [] to disable,
|
|
@@ -1306,3 +1306,37 @@ fn test_json_min_max_properties() {
|
|
|
1306
1306
|
],
|
|
1307
1307
|
);
|
|
1308
1308
|
}
|
|
1309
|
+
|
|
1310
|
+
#[test]
|
|
1311
|
+
fn test_json_format_email() {
|
|
1312
|
+
json_test_many(
|
|
1313
|
+
&json!({
|
|
1314
|
+
"type": "string",
|
|
1315
|
+
"format": "email",
|
|
1316
|
+
}),
|
|
1317
|
+
&[
|
|
1318
|
+
json!("test@example.com"),
|
|
1319
|
+
json!("foo.bar@example.com"),
|
|
1320
|
+
json!("foo.bar@example-123.com"),
|
|
1321
|
+
json!("foo+bar@example-123.com"),
|
|
1322
|
+
json!("f$o#o`b-a!r@example-123.com"),
|
|
1323
|
+
json!("fo%o#bar@example-123.com"),
|
|
1324
|
+
json!("test@[192.168.1.1]"),
|
|
1325
|
+
],
|
|
1326
|
+
&[
|
|
1327
|
+
json!(""),
|
|
1328
|
+
json!(" @example.com"),
|
|
1329
|
+
json!("test@"),
|
|
1330
|
+
json!("@example.com"),
|
|
1331
|
+
json!("test@.com"),
|
|
1332
|
+
json!("test@com"),
|
|
1333
|
+
json!("test@com."),
|
|
1334
|
+
json!("test@example..com"),
|
|
1335
|
+
json!("test@example.c"),
|
|
1336
|
+
json!("test@example.c."),
|
|
1337
|
+
json!("test@.example.com"),
|
|
1338
|
+
json!("test:2@example.com"),
|
|
1339
|
+
json!("test[2]@example.com"),
|
|
1340
|
+
],
|
|
1341
|
+
);
|
|
1342
|
+
}
|
|
@@ -3,7 +3,7 @@ use llguidance::{
|
|
|
3
3
|
api::TopLevelGrammar,
|
|
4
4
|
earley::SlicedBiasComputer,
|
|
5
5
|
toktrie::{InferenceCapabilities, TokEnv},
|
|
6
|
-
ParserFactory, TokenParser,
|
|
6
|
+
Matcher, ParserFactory, TokenParser,
|
|
7
7
|
};
|
|
8
8
|
use serde_json::{json, Value};
|
|
9
9
|
|
|
@@ -207,3 +207,31 @@ fn test_ff_early() {
|
|
|
207
207
|
parser.consume_token(*tok).unwrap();
|
|
208
208
|
}
|
|
209
209
|
}
|
|
210
|
+
|
|
211
|
+
#[test]
|
|
212
|
+
fn test_err_state() {
|
|
213
|
+
let lark = r#"
|
|
214
|
+
start: /[a-z]*/
|
|
215
|
+
"#;
|
|
216
|
+
|
|
217
|
+
let tokens = get_tok_env().tokenize("fobarbazqu123");
|
|
218
|
+
let mut t2 = vec![];
|
|
219
|
+
for _ in 0..100 {
|
|
220
|
+
t2.push(tokens[0]);
|
|
221
|
+
t2.push(tokens[1]);
|
|
222
|
+
t2.push(tokens[2]);
|
|
223
|
+
}
|
|
224
|
+
t2.extend_from_slice(&tokens);
|
|
225
|
+
let mut matcher = Matcher::new(Ok(make_parser(lark)));
|
|
226
|
+
|
|
227
|
+
for tok in t2.iter() {
|
|
228
|
+
if let Err(e) = matcher.consume_token(*tok) {
|
|
229
|
+
let e = e.to_string();
|
|
230
|
+
println!("Error: {}", e);
|
|
231
|
+
assert!(e.contains("<state>"));
|
|
232
|
+
assert!(e.contains("Tokens:"));
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
unreachable!();
|
|
237
|
+
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# installing guidance for deps
|
|
4
4
|
pip install pytest guidance huggingface_hub tokenizers jsonschema maturin[zig] \
|
|
5
|
-
torch transformers bitsandbytes ipython psutil mypy
|
|
5
|
+
torch transformers==4.52.1 bitsandbytes ipython psutil mypy
|
|
6
6
|
pip uninstall -y guidance
|
|
7
7
|
|
|
8
8
|
# print out versions
|