llguidance 0.7.9__tar.gz → 0.7.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.9 → llguidance-0.7.10}/Cargo.lock +5 -5
- {llguidance-0.7.9 → llguidance-0.7.10}/PKG-INFO +14 -2
- {llguidance-0.7.9 → llguidance-0.7.10}/README.md +13 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/Cargo.toml +1 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/llguidance.h +156 -18
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/api.rs +21 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/ffi.rs +339 -41
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/grammar_builder.rs +30 -28
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/matcher.rs +24 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/pyproject.toml +1 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/cbindgen.sh +6 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/toktree.rs +10 -2
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- {llguidance-0.7.9 → llguidance-0.7.10}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/.gitignore +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/Cargo.toml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/LICENSE +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/SECURITY.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/SUPPORT.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/c_sample/Makefile +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/c_sample/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/fast_forward.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/json_schema.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/mask_plot.png +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/optimizations.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/special_tokens.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/syntax.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/docs/toktrie.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/LICENSE +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/build.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/grammars/character.json +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/grammars/json.json +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/parser.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/factory.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/lib.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/logging.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/output.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/substring.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/parser/src/tokenparser.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/plan.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/mypy.ini +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python_ext/src/llmatcher.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python_ext/src/py.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/run.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/tests/test_lark.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/tests/test_ll.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/tests/test_raw_parser.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/bump.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/checklinks.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/disasm.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/git-version.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/rust-size.js +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/rust_size.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/scripts/update-git.py +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/LICENSE +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/README.md +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.9 → llguidance-0.7.10}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -1177,7 +1177,7 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
|
|
|
1177
1177
|
|
|
1178
1178
|
[[package]]
|
|
1179
1179
|
name = "llguidance"
|
|
1180
|
-
version = "0.7.
|
|
1180
|
+
version = "0.7.10"
|
|
1181
1181
|
dependencies = [
|
|
1182
1182
|
"anyhow",
|
|
1183
1183
|
"derivre",
|
|
@@ -1196,7 +1196,7 @@ dependencies = [
|
|
|
1196
1196
|
|
|
1197
1197
|
[[package]]
|
|
1198
1198
|
name = "llguidance_py"
|
|
1199
|
-
version = "0.7.
|
|
1199
|
+
version = "0.7.10"
|
|
1200
1200
|
dependencies = [
|
|
1201
1201
|
"anyhow",
|
|
1202
1202
|
"bytemuck",
|
|
@@ -2356,7 +2356,7 @@ dependencies = [
|
|
|
2356
2356
|
|
|
2357
2357
|
[[package]]
|
|
2358
2358
|
name = "toktrie"
|
|
2359
|
-
version = "0.7.
|
|
2359
|
+
version = "0.7.10"
|
|
2360
2360
|
dependencies = [
|
|
2361
2361
|
"anyhow",
|
|
2362
2362
|
"bytemuck",
|
|
@@ -2367,7 +2367,7 @@ dependencies = [
|
|
|
2367
2367
|
|
|
2368
2368
|
[[package]]
|
|
2369
2369
|
name = "toktrie_hf_downloader"
|
|
2370
|
-
version = "0.7.
|
|
2370
|
+
version = "0.7.10"
|
|
2371
2371
|
dependencies = [
|
|
2372
2372
|
"anyhow",
|
|
2373
2373
|
"hf-hub",
|
|
@@ -2378,7 +2378,7 @@ dependencies = [
|
|
|
2378
2378
|
|
|
2379
2379
|
[[package]]
|
|
2380
2380
|
name = "toktrie_hf_tokenizers"
|
|
2381
|
-
version = "0.7.
|
|
2381
|
+
version = "0.7.10"
|
|
2382
2382
|
dependencies = [
|
|
2383
2383
|
"anyhow",
|
|
2384
2384
|
"log",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llguidance
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.10
|
|
4
4
|
License-File: LICENSE
|
|
5
5
|
Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
|
|
6
6
|
Author: Michal Moskal
|
|
@@ -18,6 +18,18 @@ Project-URL: issue_tracker, https://github.com/microsoft/llguidance/issues
|
|
|
18
18
|
<em>Performance results from <a href ="https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench">MaskBench</a></em>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
* 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
|
|
24
|
+
* 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
|
|
25
|
+
* 2025-02-01 integration [merged](https://github.com/ggml-org/llama.cpp/pull/10224) into llama.cpp (b4613)
|
|
26
|
+
* 2025-01-21 [JSONSchemaBench](https://github.com/guidance-ai/jsonschemabench) released, including [paper](https://arxiv.org/abs/2501.10868) and [MaskBench](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench)
|
|
27
|
+
* 2025-01-07 Guidance [v0.2.0](https://github.com/guidance-ai/guidance/releases/tag/0.2.0) released, using llguidance as the grammar engine
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## About
|
|
32
|
+
|
|
21
33
|
This library implements constrained decoding (also called constrained sampling or
|
|
22
34
|
structured outputs) for Large Langauge Models (LLMs).
|
|
23
35
|
It can enforce arbitrary context-free grammar on the output of LLM
|
|
@@ -52,7 +64,7 @@ The library is currently integrated in:
|
|
|
52
64
|
- [SGLang](https://github.com/sgl-project/sglang/pull/3298) -
|
|
53
65
|
use `--grammar-backend llguidance`; when passing Lark grammar make
|
|
54
66
|
sure to prefix them with `%llguidance {}`, just as in llama.cpp
|
|
55
|
-
- vLLM - [
|
|
67
|
+
- vLLM - [V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [V1 PR](https://github.com/vllm-project/vllm/pull/14779)
|
|
56
68
|
- [LLGTRT](https://github.com/guidance-ai/llgtrt) - OpenAI-compatible REST server using NVIDIA's [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
|
|
57
69
|
- [mistral.rs](https://github.com/EricLBuehler/mistral.rs/pull/899)
|
|
58
70
|
|
|
@@ -6,6 +6,18 @@
|
|
|
6
6
|
<em>Performance results from <a href ="https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench">MaskBench</a></em>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
* 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
|
|
12
|
+
* 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
|
|
13
|
+
* 2025-02-01 integration [merged](https://github.com/ggml-org/llama.cpp/pull/10224) into llama.cpp (b4613)
|
|
14
|
+
* 2025-01-21 [JSONSchemaBench](https://github.com/guidance-ai/jsonschemabench) released, including [paper](https://arxiv.org/abs/2501.10868) and [MaskBench](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench)
|
|
15
|
+
* 2025-01-07 Guidance [v0.2.0](https://github.com/guidance-ai/guidance/releases/tag/0.2.0) released, using llguidance as the grammar engine
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## About
|
|
20
|
+
|
|
9
21
|
This library implements constrained decoding (also called constrained sampling or
|
|
10
22
|
structured outputs) for Large Langauge Models (LLMs).
|
|
11
23
|
It can enforce arbitrary context-free grammar on the output of LLM
|
|
@@ -40,7 +52,7 @@ The library is currently integrated in:
|
|
|
40
52
|
- [SGLang](https://github.com/sgl-project/sglang/pull/3298) -
|
|
41
53
|
use `--grammar-backend llguidance`; when passing Lark grammar make
|
|
42
54
|
sure to prefix them with `%llguidance {}`, just as in llama.cpp
|
|
43
|
-
- vLLM - [
|
|
55
|
+
- vLLM - [V0 PR](https://github.com/vllm-project/vllm/pull/14589) and [V1 PR](https://github.com/vllm-project/vllm/pull/14779)
|
|
44
56
|
- [LLGTRT](https://github.com/guidance-ai/llgtrt) - OpenAI-compatible REST server using NVIDIA's [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
|
|
45
57
|
- [mistral.rs](https://github.com/EricLBuehler/mistral.rs/pull/899)
|
|
46
58
|
|
|
@@ -7,8 +7,26 @@
|
|
|
7
7
|
#include <stdint.h>
|
|
8
8
|
#include <stdlib.h>
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Do not include special tokens, and keep invalid UTF-8 as is.
|
|
12
|
+
*/
|
|
13
|
+
#define LLG_DECODE_NONE 0
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Include special tokens in the output.
|
|
17
|
+
* They may look like <|something|>, <something_else>, or <[12345]> if they don't have a name.
|
|
18
|
+
*/
|
|
19
|
+
#define LLG_DECODE_INCLUDE_SPECIAL 1
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Replace invalid UTF-8 with the replacement character.
|
|
23
|
+
*/
|
|
24
|
+
#define LLG_DECODE_VALID_UTF8 2
|
|
25
|
+
|
|
10
26
|
typedef struct LlgConstraint LlgConstraint;
|
|
11
27
|
|
|
28
|
+
typedef struct LlgMatcher LlgMatcher;
|
|
29
|
+
|
|
12
30
|
typedef struct LlgStopController LlgStopController;
|
|
13
31
|
|
|
14
32
|
typedef struct LlgTokenizer LlgTokenizer;
|
|
@@ -223,7 +241,7 @@ void llg_constraint_init_set_defaults(struct LlgConstraintInit *init,
|
|
|
223
241
|
* Always returns a non-null value. Call llg_get_error() on the result to check for errors.
|
|
224
242
|
*/
|
|
225
243
|
struct LlgConstraint *llg_new_constraint(const struct LlgConstraintInit *init,
|
|
226
|
-
const char *
|
|
244
|
+
const char *llguidance);
|
|
227
245
|
|
|
228
246
|
/**
|
|
229
247
|
* Create a new constraint from a given regular expression
|
|
@@ -291,8 +309,6 @@ int32_t llg_commit_token(struct LlgConstraint *cc, LlgToken token, struct LlgCom
|
|
|
291
309
|
|
|
292
310
|
/**
|
|
293
311
|
* Compute mask for several constraints in parallel.
|
|
294
|
-
* # Safety
|
|
295
|
-
* This function should only be called from C code.
|
|
296
312
|
*/
|
|
297
313
|
void llg_par_compute_mask(const struct LlgConstraintStep *steps,
|
|
298
314
|
size_t n_steps,
|
|
@@ -321,8 +337,6 @@ struct LlgTokenizer *llg_clone_tokenizer(const struct LlgTokenizer *tok);
|
|
|
321
337
|
* Tokenize the given bytes and return the tokens.
|
|
322
338
|
* Always returns the number of tokens that would be written to output_tokens
|
|
323
339
|
* if output_tokens_len was large enough.
|
|
324
|
-
* # Safety
|
|
325
|
-
* This function should only be called from C code.
|
|
326
340
|
*/
|
|
327
341
|
size_t llg_tokenize_bytes(const struct LlgTokenizer *tok,
|
|
328
342
|
const uint8_t *bytes,
|
|
@@ -335,8 +349,6 @@ size_t llg_tokenize_bytes(const struct LlgTokenizer *tok,
|
|
|
335
349
|
* Special tokens will be tokenized, if they follow 0xFF byte prefix.
|
|
336
350
|
* Always returns the number of tokens that would be written to output_tokens
|
|
337
351
|
* if output_tokens_len was large enough.
|
|
338
|
-
* # Safety
|
|
339
|
-
* This function should only be called from C code.
|
|
340
352
|
*/
|
|
341
353
|
size_t llg_tokenize_bytes_marker(const struct LlgTokenizer *tok,
|
|
342
354
|
const uint8_t *bytes,
|
|
@@ -346,10 +358,8 @@ size_t llg_tokenize_bytes_marker(const struct LlgTokenizer *tok,
|
|
|
346
358
|
|
|
347
359
|
/**
|
|
348
360
|
* Return a string representation of the tokens, useful for debugging.
|
|
349
|
-
* The output is
|
|
361
|
+
* The output is NUL-terminated.
|
|
350
362
|
* Returns the number of bytes that would be written to output if output_len was large enough.
|
|
351
|
-
* # Safety
|
|
352
|
-
* This function should only be called from C code.
|
|
353
363
|
*/
|
|
354
364
|
size_t llg_stringify_tokens(const struct LlgTokenizer *tok,
|
|
355
365
|
const uint32_t *tokens,
|
|
@@ -357,17 +367,26 @@ size_t llg_stringify_tokens(const struct LlgTokenizer *tok,
|
|
|
357
367
|
char *output,
|
|
358
368
|
size_t output_len);
|
|
359
369
|
|
|
370
|
+
/**
|
|
371
|
+
* Return a string representation of the tokens, useful for debugging.
|
|
372
|
+
* The output is NUL-terminated.
|
|
373
|
+
* Returns the number of bytes that would be written to output if output_len was large enough.
|
|
374
|
+
* flags is one of LLG_DECODE_*
|
|
375
|
+
*/
|
|
376
|
+
size_t llg_decode_tokens(const struct LlgTokenizer *tok,
|
|
377
|
+
const uint32_t *tokens,
|
|
378
|
+
size_t n_tokens,
|
|
379
|
+
char *output,
|
|
380
|
+
size_t output_len,
|
|
381
|
+
uint32_t flags);
|
|
382
|
+
|
|
360
383
|
/**
|
|
361
384
|
* Free the tokenizer. Should *NOT* be called while there are still constraints using it.
|
|
362
|
-
* # Safety
|
|
363
|
-
* This function should only be called from C code.
|
|
364
385
|
*/
|
|
365
386
|
void llg_free_tokenizer(struct LlgTokenizer *tok);
|
|
366
387
|
|
|
367
388
|
/**
|
|
368
389
|
* Free the constraint
|
|
369
|
-
* # Safety
|
|
370
|
-
* This function should only be called from C code.
|
|
371
390
|
*/
|
|
372
391
|
void llg_free_constraint(struct LlgConstraint *cc);
|
|
373
392
|
|
|
@@ -381,8 +400,6 @@ const char *llg_flush_logs(struct LlgConstraint *cc);
|
|
|
381
400
|
|
|
382
401
|
/**
|
|
383
402
|
* Create a new stop-sequence controller
|
|
384
|
-
* # Safety
|
|
385
|
-
* This function should only be called from C code.
|
|
386
403
|
*/
|
|
387
404
|
struct LlgStopController *llg_new_stop_controller(const struct LlgTokenizer *tokenizer,
|
|
388
405
|
const uint32_t *stop_tokens,
|
|
@@ -404,11 +421,132 @@ const char *llg_stop_commit_token(struct LlgStopController *stop_ctrl,
|
|
|
404
421
|
|
|
405
422
|
/**
|
|
406
423
|
* Free the stop-sequence controller
|
|
407
|
-
* # Safety
|
|
408
|
-
* This function should only be called from C code.
|
|
409
424
|
*/
|
|
410
425
|
void llg_free_stop_controller(struct LlgStopController *stop_ctrl);
|
|
411
426
|
|
|
427
|
+
/**
|
|
428
|
+
* Create a new matcher from the given ConstraintInit
|
|
429
|
+
* Always returns a non-null value. Call llg_matcher_get_error() on the result to check for errors.
|
|
430
|
+
* init.ff_tokens_ok and init.backtrack_ok are ignored
|
|
431
|
+
* (backtracking is always disabled, and ff_tokens can be retrieved using llg_matcher_compute_ff_tokens()).
|
|
432
|
+
* The data is of different format, depending on constraint_type:
|
|
433
|
+
* - "regex" - data is regular expression in rust regex format
|
|
434
|
+
* see https://docs.rs/regex/latest/regex/#syntax
|
|
435
|
+
* - "json" or "json_schema" - data is (stringifed) JSON schema
|
|
436
|
+
* see https://github.com/guidance-ai/llguidance/blob/main/docs/json_schema.md
|
|
437
|
+
* - "json_object" - equivalent to JSON schema: {"type":"object"}
|
|
438
|
+
* - "lark" - data is grammar in a variant of Lark syntax
|
|
439
|
+
* see https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md
|
|
440
|
+
* - "llguidance" or "guidance" - data is a list of Lark or JSON schemas in JSON format
|
|
441
|
+
*/
|
|
442
|
+
struct LlgMatcher *llg_new_matcher(const struct LlgConstraintInit *init,
|
|
443
|
+
const char *constraint_type,
|
|
444
|
+
const char *data);
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Compute the set of allowed tokens for the current state.
|
|
448
|
+
* The result is written to mask_dest.
|
|
449
|
+
* mask_byte_len must be equal to llg_matcher_get_mask_byte_size().
|
|
450
|
+
* Returns 0 on success and -1 on error.
|
|
451
|
+
*/
|
|
452
|
+
int32_t llg_matcher_compute_mask_into(struct LlgMatcher *matcher,
|
|
453
|
+
uint32_t *mask_dest,
|
|
454
|
+
size_t mask_byte_len);
|
|
455
|
+
|
|
456
|
+
/**
|
|
457
|
+
* Compute the set of allowed tokens for the current state.
|
|
458
|
+
* The pointer to the result is written to mask_dest.
|
|
459
|
+
* Returns 0 on success and -1 on error.
|
|
460
|
+
*/
|
|
461
|
+
int32_t llg_matcher_compute_mask(struct LlgMatcher *matcher);
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* Return pointer to the mask computed by llg_matcher_compute_mask(), if any.
|
|
465
|
+
*/
|
|
466
|
+
const uint32_t *llg_matcher_get_mask(struct LlgMatcher *matcher);
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Return pointer to the mask computed by llg_matcher_compute_mask(), if any.
|
|
470
|
+
*/
|
|
471
|
+
size_t llg_matcher_get_mask_byte_size(struct LlgMatcher *matcher);
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Advance the matcher by one token.
|
|
475
|
+
* Returns 0 on success and -1 on error.
|
|
476
|
+
*/
|
|
477
|
+
int32_t llg_matcher_consume_token(struct LlgMatcher *matcher, uint32_t token);
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Advance the matcher by several tokens.
|
|
481
|
+
* Returns 0 on success and -1 on error.
|
|
482
|
+
*/
|
|
483
|
+
int32_t llg_matcher_consume_tokens(struct LlgMatcher *matcher,
|
|
484
|
+
const uint32_t *tokens,
|
|
485
|
+
size_t n_tokens);
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Get the error message from the matcher or null if there is no error.
|
|
489
|
+
* After it returns a non-null value, it will always return it until the matcher is freed
|
|
490
|
+
* using llg_free_matcher() (at which point the pointer will be invalid).
|
|
491
|
+
*/
|
|
492
|
+
const char *llg_matcher_get_error(struct LlgMatcher *matcher);
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Check if the matcher is in an error state.
|
|
496
|
+
*/
|
|
497
|
+
bool llg_matcher_is_error(struct LlgMatcher *matcher);
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* Free the matcher.
|
|
501
|
+
*/
|
|
502
|
+
void llg_free_matcher(struct LlgMatcher *matcher);
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Backtracks the matcher states by num_tokens.
|
|
506
|
+
* Returns 0 on success and -1 on error.
|
|
507
|
+
*/
|
|
508
|
+
int32_t llg_matcher_rollback(struct LlgMatcher *matcher, size_t num_tokens);
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Resets the matcher to the initial state.
|
|
512
|
+
* A matcher in error state cannot be reset.
|
|
513
|
+
* Returns 0 on success and -1 on error.
|
|
514
|
+
*/
|
|
515
|
+
int32_t llg_matcher_reset(struct LlgMatcher *matcher);
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Check if the grammar can fully accept the input.
|
|
519
|
+
*/
|
|
520
|
+
bool llg_matcher_is_accepting(struct LlgMatcher *matcher);
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Check if the matcher will force EOS token.
|
|
524
|
+
* This returns true also in error state, as that is a forced stop.
|
|
525
|
+
*/
|
|
526
|
+
bool llg_matcher_is_stopped(const struct LlgMatcher *matcher);
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Check how many tokens can be consumed from the given tokens.
|
|
530
|
+
* Returns the number of tokens that can be consumed, or -1 on error.
|
|
531
|
+
*/
|
|
532
|
+
int32_t llg_matcher_validate_tokens(struct LlgMatcher *matcher,
|
|
533
|
+
const uint32_t *tokens,
|
|
534
|
+
size_t n_tokens);
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Compute the fast-forward (forced) tokens for the current state.
|
|
538
|
+
* The result is written to output.
|
|
539
|
+
* Returns the number of tokens written to output (which can be 0) or -1 on error.
|
|
540
|
+
*/
|
|
541
|
+
int32_t llg_matcher_compute_ff_tokens(struct LlgMatcher *matcher,
|
|
542
|
+
uint32_t *output,
|
|
543
|
+
size_t output_len);
|
|
544
|
+
|
|
545
|
+
/**
|
|
546
|
+
* Clone the matcher.
|
|
547
|
+
*/
|
|
548
|
+
struct LlgMatcher *llg_clone_matcher(const struct LlgMatcher *matcher);
|
|
549
|
+
|
|
412
550
|
#ifdef __cplusplus
|
|
413
551
|
} // extern "C"
|
|
414
552
|
#endif // __cplusplus
|
|
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
|
|
|
3
3
|
use anyhow::{bail, Result};
|
|
4
4
|
use derivre::RegexAst;
|
|
5
5
|
use serde::{Deserialize, Serialize};
|
|
6
|
-
use serde_json::Value;
|
|
6
|
+
use serde_json::{json, Value};
|
|
7
7
|
|
|
8
8
|
use crate::{
|
|
9
9
|
earley::{lexerspec::LexerSpec, Grammar},
|
|
@@ -294,6 +294,26 @@ impl TopLevelGrammar {
|
|
|
294
294
|
max_tokens: None,
|
|
295
295
|
}
|
|
296
296
|
}
|
|
297
|
+
|
|
298
|
+
/// The data is of different format, depending on tag:
|
|
299
|
+
/// - "regex" - data is regular expression in rust regex format
|
|
300
|
+
/// see https://docs.rs/regex/latest/regex/#syntax
|
|
301
|
+
/// - "json" or "json_schema" - data is (stringifed) JSON schema
|
|
302
|
+
/// see https://github.com/guidance-ai/llguidance/blob/main/docs/json_schema.md
|
|
303
|
+
/// - "json_object" - equivalent to JSON schema: {"type":"object"}
|
|
304
|
+
/// - "lark" - data is grammar in a variant of Lark syntax
|
|
305
|
+
/// see https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md
|
|
306
|
+
/// - "llguidance" or "guidance" - data is a list of Lark or JSON schemas in JSON format
|
|
307
|
+
pub fn from_tagged_str(tag: &str, data: &str) -> Result<Self> {
|
|
308
|
+
match tag {
|
|
309
|
+
"regex" => Ok(Self::from_regex(data)),
|
|
310
|
+
"json" | "json_schema" => Ok(Self::from_json_schema(serde_json::from_str(data)?)),
|
|
311
|
+
"json_object" => Ok(Self::from_json_schema(json!({"type": "object"}))),
|
|
312
|
+
"lark" => Ok(Self::from_lark(data.to_string())),
|
|
313
|
+
"llguidance" | "guidance" => Self::from_lark_or_grammar_list(data),
|
|
314
|
+
_ => bail!("unknown constraint type: {tag}"),
|
|
315
|
+
}
|
|
316
|
+
}
|
|
297
317
|
}
|
|
298
318
|
|
|
299
319
|
impl GrammarWithLexer {
|