llguidance 0.7.8__tar.gz → 0.7.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.8 → llguidance-0.7.9}/Cargo.lock +5 -5
- {llguidance-0.7.8 → llguidance-0.7.9}/PKG-INFO +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/Cargo.toml +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/pyproject.toml +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/_grammar_from.py +5 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/_lib.pyi +2 -4
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/gbnf_to_lark.py +17 -18
- {llguidance-0.7.8 → llguidance-0.7.9}/python/torch_tests/test_matcher.py +7 -2
- {llguidance-0.7.8 → llguidance-0.7.9}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/python_ext/src/llmatcher.rs +16 -4
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- {llguidance-0.7.8 → llguidance-0.7.9}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/.gitignore +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/Cargo.toml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/LICENSE +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/SECURITY.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/SUPPORT.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/c_sample/Makefile +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/c_sample/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/fast_forward.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/json_schema.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/mask_plot.png +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/optimizations.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/special_tokens.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/syntax.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/docs/toktrie.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/LICENSE +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/build.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/grammars/character.json +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/grammars/json.json +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/llguidance.h +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/api.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/parser.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/factory.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/ffi.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/grammar_builder.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/lib.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/logging.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/matcher.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/output.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/substring.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/parser/src/tokenparser.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/plan.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/mypy.ini +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python/torch_tests/test_hf.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python_ext/src/py.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/run.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/tests/test_lark.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/tests/test_ll.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/tests/test_raw_parser.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/bump.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/checklinks.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/disasm.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/git-version.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/install-deps.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/rust-size.js +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/rust_size.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/scripts/update-git.py +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/LICENSE +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/README.md +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/src/toktree.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie_hf_tokenizers/LICENSE +0 -0
- {llguidance-0.7.8 → llguidance-0.7.9}/toktrie_hf_tokenizers/src/lib.rs +0 -0
|
@@ -1177,7 +1177,7 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
|
|
|
1177
1177
|
|
|
1178
1178
|
[[package]]
|
|
1179
1179
|
name = "llguidance"
|
|
1180
|
-
version = "0.7.
|
|
1180
|
+
version = "0.7.9"
|
|
1181
1181
|
dependencies = [
|
|
1182
1182
|
"anyhow",
|
|
1183
1183
|
"derivre",
|
|
@@ -1196,7 +1196,7 @@ dependencies = [
|
|
|
1196
1196
|
|
|
1197
1197
|
[[package]]
|
|
1198
1198
|
name = "llguidance_py"
|
|
1199
|
-
version = "0.7.
|
|
1199
|
+
version = "0.7.9"
|
|
1200
1200
|
dependencies = [
|
|
1201
1201
|
"anyhow",
|
|
1202
1202
|
"bytemuck",
|
|
@@ -2356,7 +2356,7 @@ dependencies = [
|
|
|
2356
2356
|
|
|
2357
2357
|
[[package]]
|
|
2358
2358
|
name = "toktrie"
|
|
2359
|
-
version = "0.7.
|
|
2359
|
+
version = "0.7.9"
|
|
2360
2360
|
dependencies = [
|
|
2361
2361
|
"anyhow",
|
|
2362
2362
|
"bytemuck",
|
|
@@ -2367,7 +2367,7 @@ dependencies = [
|
|
|
2367
2367
|
|
|
2368
2368
|
[[package]]
|
|
2369
2369
|
name = "toktrie_hf_downloader"
|
|
2370
|
-
version = "0.7.
|
|
2370
|
+
version = "0.7.9"
|
|
2371
2371
|
dependencies = [
|
|
2372
2372
|
"anyhow",
|
|
2373
2373
|
"hf-hub",
|
|
@@ -2378,7 +2378,7 @@ dependencies = [
|
|
|
2378
2378
|
|
|
2379
2379
|
[[package]]
|
|
2380
2380
|
name = "toktrie_hf_tokenizers"
|
|
2381
|
-
version = "0.7.
|
|
2381
|
+
version = "0.7.9"
|
|
2382
2382
|
dependencies = [
|
|
2383
2383
|
"anyhow",
|
|
2384
2384
|
"log",
|
|
@@ -47,7 +47,11 @@ def grammar_from(format: GrammarFormat, text: str) -> str:
|
|
|
47
47
|
if format == "lark":
|
|
48
48
|
return LLMatcher.grammar_from_lark(text)
|
|
49
49
|
if format in ("gbnf", "ebnf", "cfg", "grammar"):
|
|
50
|
-
|
|
50
|
+
try:
|
|
51
|
+
text = any_to_lark(text)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
raise ValueError(f"Failed to convert the grammar from GBNF to Lark: {e}")
|
|
54
|
+
return LLMatcher.grammar_from_lark(text)
|
|
51
55
|
if format in ("json_schema", "json"):
|
|
52
56
|
return LLMatcher.grammar_from_json_schema(text)
|
|
53
57
|
if format == "regex":
|
|
@@ -246,12 +246,10 @@ class LLMatcher:
|
|
|
246
246
|
"""
|
|
247
247
|
|
|
248
248
|
@staticmethod
|
|
249
|
-
def validate_grammar(
|
|
249
|
+
def validate_grammar(grammar: str, tokenizer: Optional[LLTokenizer] = None) -> str:
|
|
250
250
|
"""
|
|
251
251
|
Validate the grammar, for example one returned by LLMatcher.grammar_from_*().
|
|
252
|
-
Returns
|
|
253
|
-
|
|
254
|
-
Currently, this is equivalent to LLMatcher(tokenizer, grammar).get_error().
|
|
252
|
+
Returns empty string if the grammar is valid, otherwise an error message.
|
|
255
253
|
"""
|
|
256
254
|
|
|
257
255
|
@staticmethod
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from
|
|
5
|
-
from typing import Optional, List, Dict, Tuple, Iterator
|
|
4
|
+
from typing import Optional, List, Tuple, Iterator
|
|
6
5
|
from abc import ABC, abstractmethod
|
|
7
6
|
import re
|
|
8
7
|
|
|
@@ -30,7 +29,7 @@ class Position:
|
|
|
30
29
|
return f"line {line_no}, {repr(pref)} ^ {repr(suff)}"
|
|
31
30
|
|
|
32
31
|
|
|
33
|
-
class
|
|
32
|
+
class GbnfToLarkError(Exception):
|
|
34
33
|
def __init__(self, pos: Position, message: str):
|
|
35
34
|
self.pos = pos
|
|
36
35
|
super().__init__(f"{message} at {pos}")
|
|
@@ -202,7 +201,7 @@ class GrammarParser:
|
|
|
202
201
|
|
|
203
202
|
if pos.current() == "\\":
|
|
204
203
|
if not pos.peek(2)[1]:
|
|
205
|
-
raise
|
|
204
|
+
raise GbnfToLarkError(pos, "Incomplete escape sequence")
|
|
206
205
|
pos = pos.advance()
|
|
207
206
|
c = pos.current()
|
|
208
207
|
if c in '"\\[]nrt':
|
|
@@ -210,7 +209,7 @@ class GrammarParser:
|
|
|
210
209
|
elif c == "x":
|
|
211
210
|
hex_value = pos.peek(3)[1:3]
|
|
212
211
|
if len(hex_value) != 2 or not is_all_hex(hex_value):
|
|
213
|
-
raise
|
|
212
|
+
raise GbnfToLarkError(
|
|
214
213
|
pos, f"Invalid \\x escape sequence: \\x{hex_value}"
|
|
215
214
|
)
|
|
216
215
|
pos = pos.advance(3)
|
|
@@ -218,7 +217,7 @@ class GrammarParser:
|
|
|
218
217
|
elif c == "u":
|
|
219
218
|
hex_value = pos.peek(5)[1:5]
|
|
220
219
|
if len(hex_value) != 4 or not is_all_hex(hex_value):
|
|
221
|
-
raise
|
|
220
|
+
raise GbnfToLarkError(
|
|
222
221
|
pos, f"Invalid \\u escape sequence: \\u{hex_value}"
|
|
223
222
|
)
|
|
224
223
|
pos = pos.advance(5)
|
|
@@ -226,21 +225,21 @@ class GrammarParser:
|
|
|
226
225
|
elif c == "U":
|
|
227
226
|
hex_value = pos.peek(9)[1:9]
|
|
228
227
|
if len(hex_value) != 8 or not is_all_hex(hex_value):
|
|
229
|
-
raise
|
|
228
|
+
raise GbnfToLarkError(
|
|
230
229
|
pos, f"Invalid \\U escape sequence: \\U{hex_value}"
|
|
231
230
|
)
|
|
232
231
|
pos = pos.advance(9)
|
|
233
232
|
return f"\\U{hex_value.lstrip('0')}", pos
|
|
234
233
|
else:
|
|
235
|
-
raise
|
|
234
|
+
raise GbnfToLarkError(pos, f"Invalid escape sequence \\{c}")
|
|
236
235
|
elif pos.current() == "":
|
|
237
|
-
raise
|
|
236
|
+
raise GbnfToLarkError(pos, "Unexpected end of input")
|
|
238
237
|
|
|
239
238
|
return pos.current(), pos.advance()
|
|
240
239
|
|
|
241
240
|
def _parse_char_class(self, pos: Position) -> Tuple[ASTNode, Position]:
|
|
242
241
|
if pos.current() != "[":
|
|
243
|
-
raise
|
|
242
|
+
raise GbnfToLarkError(pos, "Expected '['")
|
|
244
243
|
r = "["
|
|
245
244
|
pos = pos.advance()
|
|
246
245
|
|
|
@@ -257,7 +256,7 @@ class GrammarParser:
|
|
|
257
256
|
|
|
258
257
|
def _parse_literal(self, pos: Position) -> Tuple[ASTNode, Position]:
|
|
259
258
|
if pos.current() != '"':
|
|
260
|
-
raise
|
|
259
|
+
raise GbnfToLarkError(pos, "Expected '\"'")
|
|
261
260
|
pos = pos.advance()
|
|
262
261
|
r = ""
|
|
263
262
|
|
|
@@ -275,7 +274,7 @@ class GrammarParser:
|
|
|
275
274
|
while GrammarParser._is_word_char(pos.current()):
|
|
276
275
|
pos = pos.advance()
|
|
277
276
|
if pos.pos == start:
|
|
278
|
-
raise
|
|
277
|
+
raise GbnfToLarkError(pos, "Expected name")
|
|
279
278
|
return pos.text[start : pos.pos], pos
|
|
280
279
|
|
|
281
280
|
@staticmethod
|
|
@@ -284,7 +283,7 @@ class GrammarParser:
|
|
|
284
283
|
while pos.current().isdigit():
|
|
285
284
|
pos = pos.advance()
|
|
286
285
|
if pos.pos == start:
|
|
287
|
-
raise
|
|
286
|
+
raise GbnfToLarkError(pos, "Expected integer")
|
|
288
287
|
return int(pos.text[start : pos.pos]), pos
|
|
289
288
|
|
|
290
289
|
def _skip_space(self, pos: Position, allow_newlines: bool) -> Position:
|
|
@@ -323,7 +322,7 @@ class GrammarParser:
|
|
|
323
322
|
pos = self._skip_space(pos, allow_newlines=False)
|
|
324
323
|
|
|
325
324
|
if pos.peek(3) != "::=":
|
|
326
|
-
raise
|
|
325
|
+
raise GbnfToLarkError(pos, "Expected ::=")
|
|
327
326
|
pos = pos.advance(3)
|
|
328
327
|
|
|
329
328
|
pos = self._skip_space(pos, allow_newlines=True)
|
|
@@ -388,14 +387,14 @@ class GrammarParser:
|
|
|
388
387
|
|
|
389
388
|
def _parse_group(self, pos: Position, is_nested: bool) -> Tuple[ASTNode, Position]:
|
|
390
389
|
if pos.current() != "(":
|
|
391
|
-
raise
|
|
390
|
+
raise GbnfToLarkError(pos, "Expected '('")
|
|
392
391
|
pos = pos.advance()
|
|
393
392
|
pos = self._skip_space(pos, True)
|
|
394
393
|
|
|
395
394
|
alternatives, pos = self._parse_alternatives(pos, is_nested=True)
|
|
396
395
|
|
|
397
396
|
if pos.current() != ")":
|
|
398
|
-
raise
|
|
397
|
+
raise GbnfToLarkError(pos, "Expected ')'")
|
|
399
398
|
pos = pos.advance()
|
|
400
399
|
|
|
401
400
|
return alternatives, self._skip_space(pos, is_nested)
|
|
@@ -429,11 +428,11 @@ class GrammarParser:
|
|
|
429
428
|
max_times, pos = self._parse_int(pos)
|
|
430
429
|
pos = self._skip_space(pos, True)
|
|
431
430
|
if pos.current() != "}":
|
|
432
|
-
raise
|
|
431
|
+
raise GbnfToLarkError(pos, "Expected '}'")
|
|
433
432
|
nodes[-1] = RepetitionNode(nodes[-1], min_times, max_times)
|
|
434
433
|
return pos.advance()
|
|
435
434
|
else:
|
|
436
|
-
raise
|
|
435
|
+
raise GbnfToLarkError(pos, "Expected ',' or '}'")
|
|
437
436
|
|
|
438
437
|
return pos
|
|
439
438
|
|
|
@@ -111,13 +111,18 @@ def test_lark_syntax() -> None:
|
|
|
111
111
|
assert m.is_error()
|
|
112
112
|
assert "no_such_rule" in m.get_error()
|
|
113
113
|
|
|
114
|
-
e = LLMatcher.validate_grammar(
|
|
114
|
+
e = LLMatcher.validate_grammar('start: /.../ no_such_rule')
|
|
115
|
+
assert "no_such_rule" in e
|
|
116
|
+
|
|
117
|
+
e = LLMatcher.validate_grammar('start: /.../ no_such_rule', tokenizer())
|
|
115
118
|
assert "no_such_rule" in e
|
|
116
119
|
|
|
117
120
|
|
|
118
121
|
def test_regex_syntax() -> None:
|
|
119
122
|
g = LLMatcher.grammar_from_regex(r"missing close paren (")
|
|
120
|
-
e = LLMatcher.validate_grammar(
|
|
123
|
+
e = LLMatcher.validate_grammar(g)
|
|
124
|
+
assert "invalid regex" in e
|
|
125
|
+
e = LLMatcher.validate_grammar(g, tokenizer())
|
|
121
126
|
assert "invalid regex" in e
|
|
122
127
|
|
|
123
128
|
|
|
@@ -3,7 +3,7 @@ use std::fmt::Display;
|
|
|
3
3
|
use std::ops::DerefMut;
|
|
4
4
|
|
|
5
5
|
use anyhow::Result;
|
|
6
|
-
use llguidance::api::ParserLimits;
|
|
6
|
+
use llguidance::api::{GrammarInit, ParserLimits};
|
|
7
7
|
use llguidance::toktrie::{InferenceCapabilities, SimpleVob, TokEnv, TokenId};
|
|
8
8
|
use llguidance::{api::TopLevelGrammar, TokenParser};
|
|
9
9
|
use llguidance::{json_merge, Logger, Matcher, ParserFactory};
|
|
@@ -198,13 +198,25 @@ impl LLMatcher {
|
|
|
198
198
|
}
|
|
199
199
|
|
|
200
200
|
#[staticmethod]
|
|
201
|
+
#[pyo3(signature = (grammar, tokenizer=None))]
|
|
201
202
|
fn validate_grammar(
|
|
202
|
-
tokenizer: &LLTokenizer,
|
|
203
203
|
grammar: Bound<'_, PyAny>,
|
|
204
|
+
tokenizer: Option<&LLTokenizer>,
|
|
204
205
|
py: Python<'_>,
|
|
205
206
|
) -> String {
|
|
206
|
-
|
|
207
|
-
|
|
207
|
+
match extract_grammar(grammar) {
|
|
208
|
+
Ok(grammar) => py.allow_threads(|| {
|
|
209
|
+
GrammarInit::Serialized(grammar)
|
|
210
|
+
.to_internal(
|
|
211
|
+
tokenizer.map(|t| t.factory().tok_env().clone()),
|
|
212
|
+
ParserLimits::default(),
|
|
213
|
+
)
|
|
214
|
+
.err()
|
|
215
|
+
.map(|e| e.to_string())
|
|
216
|
+
.unwrap_or_default()
|
|
217
|
+
}),
|
|
218
|
+
Err(e) => e.to_string(),
|
|
219
|
+
}
|
|
208
220
|
}
|
|
209
221
|
|
|
210
222
|
#[staticmethod]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|