llguidance 0.7.22__tar.gz → 0.7.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llguidance-0.7.22 → llguidance-0.7.23}/CHANGELOG.md +5 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/Cargo.lock +5 -5
- {llguidance-0.7.22 → llguidance-0.7.23}/PKG-INFO +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/Cargo.toml +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/pyproject.toml +1 -1
- llguidance-0.7.23/python/llguidance/llamacpp.py +62 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/test_hf.py +27 -10
- llguidance-0.7.23/python/torch_tests/test_llamacpp.py +42 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/Cargo.toml +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/py.rs +108 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/install-deps.sh +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/Cargo.toml +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_downloader/Cargo.toml +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_tokenizers/Cargo.toml +1 -1
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_tokenizers/src/lib.rs +7 -2
- {llguidance-0.7.22 → llguidance-0.7.23}/.github/workflows/rust.yml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/.github/workflows/wheels.yml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/.gitignore +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/CODE_OF_CONDUCT.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/Cargo.toml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/LICENSE +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/SECURITY.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/SUPPORT.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/c_sample/Makefile +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/c_sample/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/c_sample/c_sample.cpp +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/fast_forward.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/json_schema.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/mask_plot.png +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/optimizations.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/special_tokens.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/syntax.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/docs/toktrie.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/Cargo.toml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/expected_maskbench.json +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/jstats.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/scripts/split-stats.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/scripts/split_plot.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/src/json_stats.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/src/lib.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/json_stats/src/stats.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/LICENSE +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/build.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/cbindgen.toml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/grammars/character.json +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/grammars/json.json +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/llguidance.h +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/api.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/constraint.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/from_guidance.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/grammar.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/lexer.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/lexerspec.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/mod.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/parser.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/perf.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/regexvec.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/earley/slicer.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/factory.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/ffi.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/ffi_par.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/grammar_builder.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/compiler.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_ref.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_simple/context.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_simple/draft.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/context_simple/mod.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/formats.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/mod.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/numeric.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/schema.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json/shared_context.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/json_validation.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/ast.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/common.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/compiler.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/lexer.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/mod.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lark/parser.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/lib.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/logging.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/matcher.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/output.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/panic_utils.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/regex_rewrite.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/stop_controller.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/substring.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/tokenizer_json.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/parser/src/tokenparser.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/plan.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/__init__.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_grammar_from.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_lib.pyi +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_struct_tag.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_tokenizer.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/_util.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/cli.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/gbnf_to_lark.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/hf.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/mlx.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/numpy.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/py.typed +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/llguidance/torch.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/mypy.ini +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/__init__.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/test_bitmask.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python/torch_tests/test_matcher.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/lib.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/llinterpreter.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/llmatcher.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/parserlimits.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/python_ext/src/pyjson.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/Cargo.toml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/cli.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/blog.sample.json +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/blog.schema.json +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/blog.schema.ll.json +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/arithmetic.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/c.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/chess.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/english.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/japanese.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/json.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/json_arr.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/list.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/from-llama.cpp/vllm-sql.gbnf +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/lark.lark +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/rfc.lark +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/rfc.xml +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/data/ulysses.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/gtest.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/lark.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/run.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/src/lib.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/src/minimal.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/src/sample_parser.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_lark.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_ll.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_raw_parser.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/sample_parser/tests/test_stop.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/annotate_asm.js +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/bump.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/cbindgen.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/checklinks.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/checklinks.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/ci-publish.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/disasm.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/gbnf_to_lark.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/gen-testcase.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/git-version.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/jsonschema-stats.js +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/remote-guidance-test.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/rust-size.js +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/rust_size.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/test-guidance.sh +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/tokenizer_test.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/scripts/update-git.py +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/LICENSE +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/README.md +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/bytes.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/lib.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/recognizer.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/rng.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/svob.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/tokenv.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/src/toktree.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie/tests/test_svob.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_downloader/LICENSE +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_downloader/src/lib.rs +0 -0
- {llguidance-0.7.22 → llguidance-0.7.23}/toktrie_hf_tokenizers/LICENSE +0 -0
|
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. Dates are d
|
|
|
4
4
|
|
|
5
5
|
If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
|
|
6
6
|
|
|
7
|
+
#### [0.7.23](https://github.com/guidance-ai/llguidance/compare/v0.7.22...0.7.23) 2025-05-22
|
|
8
|
+
|
|
9
|
+
- native llama.cpp tokenizer support [`#179`](https://github.com/guidance-ai/llguidance/pull/179)
|
|
10
|
+
- improve special token detection in HF tokenizers [`6cae393`](https://github.com/guidance-ai/llguidance/commit/6cae393b9c04fe67621615ff22b46beab512d069)
|
|
11
|
+
|
|
7
12
|
#### [0.7.22](https://github.com/guidance-ai/llguidance/compare/v0.7.21...0.7.22) 2025-05-21
|
|
8
13
|
|
|
9
14
|
- Keep EOS token bytes in `TokenizerWrapper` [`#178`](https://github.com/guidance-ai/llguidance/pull/178)
|
|
@@ -1174,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
|
|
|
1174
1174
|
|
|
1175
1175
|
[[package]]
|
|
1176
1176
|
name = "llguidance"
|
|
1177
|
-
version = "0.7.
|
|
1177
|
+
version = "0.7.23"
|
|
1178
1178
|
dependencies = [
|
|
1179
1179
|
"anyhow",
|
|
1180
1180
|
"derivre",
|
|
@@ -1193,7 +1193,7 @@ dependencies = [
|
|
|
1193
1193
|
|
|
1194
1194
|
[[package]]
|
|
1195
1195
|
name = "llguidance_py"
|
|
1196
|
-
version = "0.7.
|
|
1196
|
+
version = "0.7.23"
|
|
1197
1197
|
dependencies = [
|
|
1198
1198
|
"anyhow",
|
|
1199
1199
|
"bytemuck",
|
|
@@ -2336,7 +2336,7 @@ dependencies = [
|
|
|
2336
2336
|
|
|
2337
2337
|
[[package]]
|
|
2338
2338
|
name = "toktrie"
|
|
2339
|
-
version = "0.7.
|
|
2339
|
+
version = "0.7.23"
|
|
2340
2340
|
dependencies = [
|
|
2341
2341
|
"anyhow",
|
|
2342
2342
|
"bytemuck",
|
|
@@ -2347,7 +2347,7 @@ dependencies = [
|
|
|
2347
2347
|
|
|
2348
2348
|
[[package]]
|
|
2349
2349
|
name = "toktrie_hf_downloader"
|
|
2350
|
-
version = "0.7.
|
|
2350
|
+
version = "0.7.23"
|
|
2351
2351
|
dependencies = [
|
|
2352
2352
|
"anyhow",
|
|
2353
2353
|
"hf-hub",
|
|
@@ -2358,7 +2358,7 @@ dependencies = [
|
|
|
2358
2358
|
|
|
2359
2359
|
[[package]]
|
|
2360
2360
|
name = "toktrie_hf_tokenizers"
|
|
2361
|
-
version = "0.7.
|
|
2361
|
+
version = "0.7.23"
|
|
2362
2362
|
dependencies = [
|
|
2363
2363
|
"anyhow",
|
|
2364
2364
|
"log",
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from ._lib import LLTokenizer
|
|
4
|
+
|
|
5
|
+
import llama_cpp
|
|
6
|
+
import ctypes
|
|
7
|
+
|
|
8
|
+
def lltokenizer_from_vocab(
|
|
9
|
+
vocab: llama_cpp.llama_vocab_p,
|
|
10
|
+
n_vocab: Optional[int] = None,
|
|
11
|
+
eos_token: Optional[int] = None,
|
|
12
|
+
slices: Optional[List[str]] = None,
|
|
13
|
+
) -> LLTokenizer:
|
|
14
|
+
"""
|
|
15
|
+
Create a new tokenizer from a llama.cpp vocab object.
|
|
16
|
+
This is an expensive operation (~1s), so the result should be cached.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
vocab: llama_cpp.llama_vocab_p - the vocab object to use
|
|
20
|
+
n_vocab: int - override the size of the vocabulary
|
|
21
|
+
eos_token: int - override the EOS token
|
|
22
|
+
slices: List[str] - configuration for slicer optimization; pass [] to disable,
|
|
23
|
+
or None to use the default configuration
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
ntok = llama_cpp.llama_vocab_n_tokens(vocab)
|
|
27
|
+
if eos_token is None:
|
|
28
|
+
eos_token = llama_cpp.llama_vocab_eos(vocab)
|
|
29
|
+
buffer_len = 16 * 1024
|
|
30
|
+
buffer = ctypes.create_string_buffer(buffer_len + 1)
|
|
31
|
+
tokens: List[bytes] = []
|
|
32
|
+
|
|
33
|
+
for token in range(ntok):
|
|
34
|
+
n = llama_cpp.llama_token_to_piece(
|
|
35
|
+
vocab,
|
|
36
|
+
token,
|
|
37
|
+
buffer,
|
|
38
|
+
buffer_len,
|
|
39
|
+
0,
|
|
40
|
+
True
|
|
41
|
+
)
|
|
42
|
+
if n < 0:
|
|
43
|
+
raise ValueError(f"Error writing token {token} to buffer of size {buffer_len}. Error: {n}")
|
|
44
|
+
assert n <= buffer_len
|
|
45
|
+
tok = bytes(buffer[:n]) # type: ignore
|
|
46
|
+
attr = llama_cpp.llama_token_get_attr(vocab, token)
|
|
47
|
+
if attr & llama_cpp.LLAMA_TOKEN_ATTR_CONTROL:
|
|
48
|
+
tok = b"\xFF" + tok
|
|
49
|
+
tokens.append(tok)
|
|
50
|
+
|
|
51
|
+
if n_vocab is not None:
|
|
52
|
+
while len(tokens) < n_vocab:
|
|
53
|
+
tokens.append(b"")
|
|
54
|
+
|
|
55
|
+
fptr = ctypes.cast(llama_cpp.llama_cpp._lib.llama_tokenize, ctypes.c_void_p).value
|
|
56
|
+
return LLTokenizer.from_llamacpp( # type: ignore
|
|
57
|
+
tokens=tokens,
|
|
58
|
+
vocab_ptr=vocab,
|
|
59
|
+
tokenize_fptr=fptr,
|
|
60
|
+
eos_token=eos_token,
|
|
61
|
+
slices=slices
|
|
62
|
+
)
|
|
@@ -21,7 +21,8 @@ from transformers import AutoTokenizer
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def _build_tokenizer() -> LLTokenizer:
|
|
24
|
-
hf_tok = AutoTokenizer.from_pretrained(
|
|
24
|
+
hf_tok = AutoTokenizer.from_pretrained(
|
|
25
|
+
"unsloth/Meta-Llama-3.1-8B-Instruct")
|
|
25
26
|
return llguidance.hf.from_tokenizer(hf_tok)
|
|
26
27
|
|
|
27
28
|
|
|
@@ -41,6 +42,22 @@ def lark_matcher(grm: str) -> LLMatcher:
|
|
|
41
42
|
return interp
|
|
42
43
|
|
|
43
44
|
|
|
45
|
+
def test_basic_tokenizer() -> None:
|
|
46
|
+
llt = tokenizer()
|
|
47
|
+
for s in [
|
|
48
|
+
"Hello world!", "Hello world! こんにちは世界!", "wave 👋", "heart 👋💖",
|
|
49
|
+
"1`a`b`c`d`e`f`g`h`i"
|
|
50
|
+
]:
|
|
51
|
+
toks = llt.tokenize_str(s)
|
|
52
|
+
print(llt.dbg_tokens(toks))
|
|
53
|
+
assert llt.decode_str(toks) == s
|
|
54
|
+
toks = llt.tokenize_bytes(b"\x8b")
|
|
55
|
+
print(llt.dbg_tokens(toks))
|
|
56
|
+
print(toks)
|
|
57
|
+
assert len(toks) == 1
|
|
58
|
+
assert llt.decode_bytes(toks) == b"\x8b"
|
|
59
|
+
|
|
60
|
+
|
|
44
61
|
def test_grammar() -> None:
|
|
45
62
|
t = tokenizer()
|
|
46
63
|
mask = allocate_token_bitmask(2, t.vocab_size)
|
|
@@ -66,7 +83,8 @@ def test_grammar() -> None:
|
|
|
66
83
|
def test_par_grammar() -> None:
|
|
67
84
|
n_gram = 50
|
|
68
85
|
t = tokenizer()
|
|
69
|
-
grammars = [(lark_matcher(r"start: /[a-zA-Z ]*/"), idx)
|
|
86
|
+
grammars = [(lark_matcher(r"start: /[a-zA-Z ]*/"), idx)
|
|
87
|
+
for idx in range(n_gram)]
|
|
70
88
|
mask = allocate_token_bitmask(n_gram, t.vocab_size)
|
|
71
89
|
mask2 = allocate_token_bitmask(n_gram, t.vocab_size)
|
|
72
90
|
exec = LLExecutor()
|
|
@@ -88,16 +106,15 @@ def test_tokenize_partial_basic(recent_tokens: List[int]) -> None:
|
|
|
88
106
|
"""Test tokenize_partial with a simple sentence."""
|
|
89
107
|
ll_tok = tokenizer()
|
|
90
108
|
assert ll_tok.is_canonical
|
|
91
|
-
new_tokens, leftover = ll_tok.tokenize_partial(
|
|
92
|
-
|
|
93
|
-
)
|
|
109
|
+
new_tokens, leftover = ll_tok.tokenize_partial(b" How are you",
|
|
110
|
+
recent_tokens=recent_tokens)
|
|
94
111
|
assert isinstance(new_tokens, list)
|
|
95
112
|
assert isinstance(leftover, bytes)
|
|
96
113
|
assert len(new_tokens) >= 2
|
|
97
114
|
assert ll_tok.decode_bytes(new_tokens) + leftover == b" How are you"
|
|
98
115
|
for suff in ["", "r", "!", " "]:
|
|
99
116
|
tok2 = ll_tok.tokenize_str(" How are you" + suff)
|
|
100
|
-
assert tok2[0
|
|
117
|
+
assert tok2[0:len(new_tokens)] == new_tokens
|
|
101
118
|
|
|
102
119
|
|
|
103
120
|
def test_tokenize_partial_docs() -> None:
|
|
@@ -107,16 +124,16 @@ def test_tokenize_partial_docs() -> None:
|
|
|
107
124
|
assert leftover == b"order"
|
|
108
125
|
|
|
109
126
|
recent = ll.tokenize_bytes(b'{"')
|
|
110
|
-
new_tok, leftover = ll.tokenize_partial(
|
|
111
|
-
|
|
112
|
-
)
|
|
127
|
+
new_tok, leftover = ll.tokenize_partial(b'name_of_the_person"',
|
|
128
|
+
recent_tokens=recent)
|
|
113
129
|
print(ll.dbg_tokens(new_tok))
|
|
114
130
|
assert leftover == b'"'
|
|
115
131
|
assert ll.decode_str(new_tok) == "name_of_the_person"
|
|
116
132
|
|
|
117
133
|
|
|
118
134
|
def test_incomplete_tokenizer() -> None:
|
|
119
|
-
hf_tok = AutoTokenizer.from_pretrained(
|
|
135
|
+
hf_tok = AutoTokenizer.from_pretrained(
|
|
136
|
+
"HuggingFaceTB/SmolLM-135M-Instruct")
|
|
120
137
|
ll_tok = llguidance.hf.from_tokenizer(hf_tok)
|
|
121
138
|
|
|
122
139
|
# unknown bytes are to be skipped
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import llguidance.llamacpp
|
|
2
|
+
import llama_cpp
|
|
3
|
+
import os
|
|
4
|
+
import requests # type: ignore
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
def get_llama_vocab_file(pytestconfig: Any) -> str:
|
|
8
|
+
url = "https://raw.githubusercontent.com/ggml-org/llama.cpp/f4ab2a41476600a98067a9474ea8f9e6db41bcfa/models/ggml-vocab-llama-bpe.gguf"
|
|
9
|
+
cache_dir = pytestconfig.cache.makedir("llama_vocab")
|
|
10
|
+
file_name = "vocab.gguf"
|
|
11
|
+
file_path = os.path.join(cache_dir, file_name)
|
|
12
|
+
|
|
13
|
+
if not os.path.exists(file_path):
|
|
14
|
+
r = requests.get(url)
|
|
15
|
+
r.raise_for_status()
|
|
16
|
+
with open(file_path, "wb") as f:
|
|
17
|
+
f.write(r.content)
|
|
18
|
+
|
|
19
|
+
return file_path
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_llama_cpp(pytestconfig: Any) -> None:
|
|
23
|
+
filepath = get_llama_vocab_file(pytestconfig)
|
|
24
|
+
p = llama_cpp.llama_model_default_params()
|
|
25
|
+
p.vocab_only = True
|
|
26
|
+
model = llama_cpp.llama_model_load_from_file(filepath.encode(), p)
|
|
27
|
+
assert model is not None
|
|
28
|
+
vocab = llama_cpp.llama_model_get_vocab(model)
|
|
29
|
+
assert vocab is not None
|
|
30
|
+
llt = llguidance.llamacpp.lltokenizer_from_vocab(vocab)
|
|
31
|
+
for s in [
|
|
32
|
+
"Hello world!", "Hello world! こんにちは世界!", "wave 👋", "heart 👋💖",
|
|
33
|
+
"1`a`b`c`d`e`f`g`h`i"
|
|
34
|
+
]:
|
|
35
|
+
toks = llt.tokenize_str(s)
|
|
36
|
+
print(llt.dbg_tokens(toks))
|
|
37
|
+
assert llt.decode_str(toks) == s
|
|
38
|
+
toks = llt.tokenize_bytes(b"\x8b")
|
|
39
|
+
print(llt.dbg_tokens(toks))
|
|
40
|
+
print(toks)
|
|
41
|
+
assert len(toks) == 1
|
|
42
|
+
assert llt.decode_bytes(toks) == b"\x8b"
|
|
@@ -34,6 +34,78 @@ struct PyMidProcessResult {
|
|
|
34
34
|
temperature: f32,
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
type LlamaTokenizeFn = unsafe extern "C" fn(
|
|
38
|
+
vocab: *const std::os::raw::c_void,
|
|
39
|
+
text: *const std::os::raw::c_char,
|
|
40
|
+
text_len: i32,
|
|
41
|
+
tokens: *mut i32,
|
|
42
|
+
n_tokens_max: i32,
|
|
43
|
+
add_special: bool,
|
|
44
|
+
parse_special: bool,
|
|
45
|
+
) -> i32;
|
|
46
|
+
|
|
47
|
+
struct LlamaTokenizerInner {
|
|
48
|
+
trie: TokTrie,
|
|
49
|
+
tokenize_fn: LlamaTokenizeFn,
|
|
50
|
+
vocab: *const std::os::raw::c_void,
|
|
51
|
+
}
|
|
52
|
+
// SAFETY: tokenize_fn is required to be thread-safe
|
|
53
|
+
unsafe impl Send for LlamaTokenizerInner {}
|
|
54
|
+
unsafe impl Sync for LlamaTokenizerInner {}
|
|
55
|
+
|
|
56
|
+
impl LlamaTokenizerInner {
|
|
57
|
+
fn raw_tokenize(&self, s: &[u8]) -> Vec<toktrie::TokenId> {
|
|
58
|
+
let mut res_toks = vec![0u32; s.len() / 4 + 5];
|
|
59
|
+
let res = unsafe {
|
|
60
|
+
(self.tokenize_fn)(
|
|
61
|
+
self.vocab,
|
|
62
|
+
s.as_ptr() as *const std::os::raw::c_char,
|
|
63
|
+
s.len().try_into().unwrap(),
|
|
64
|
+
res_toks.as_mut_ptr() as *mut i32,
|
|
65
|
+
res_toks.len().try_into().unwrap(),
|
|
66
|
+
false,
|
|
67
|
+
false,
|
|
68
|
+
)
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
let res = if res < 0 {
|
|
72
|
+
let n_toks = (-res) as usize;
|
|
73
|
+
res_toks.resize(n_toks, 0);
|
|
74
|
+
let res2 = unsafe {
|
|
75
|
+
(self.tokenize_fn)(
|
|
76
|
+
self.vocab,
|
|
77
|
+
s.as_ptr() as *const std::os::raw::c_char,
|
|
78
|
+
s.len().try_into().unwrap(),
|
|
79
|
+
res_toks.as_mut_ptr() as *mut i32,
|
|
80
|
+
res_toks.len().try_into().unwrap(),
|
|
81
|
+
false,
|
|
82
|
+
false,
|
|
83
|
+
)
|
|
84
|
+
};
|
|
85
|
+
assert!(res2 == n_toks as i32);
|
|
86
|
+
res2
|
|
87
|
+
} else {
|
|
88
|
+
res
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
res_toks.truncate(res as usize);
|
|
92
|
+
res_toks
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
impl TokenizerEnv for LlamaTokenizerInner {
|
|
97
|
+
fn tok_trie(&self) -> &TokTrie {
|
|
98
|
+
&self.trie
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
fn tokenize_bytes(&self, s: &[u8]) -> Vec<toktrie::TokenId> {
|
|
102
|
+
// llama.cpp tokenizer encodes invalid UTF8 as Unicode replacement character U+FFFD,
|
|
103
|
+
// so we need the greedy fallback
|
|
104
|
+
self.trie
|
|
105
|
+
.tokenize_with_greedy_fallback(s, |s| self.raw_tokenize(s.as_bytes()))
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
37
109
|
#[pymethods]
|
|
38
110
|
impl LLTokenizer {
|
|
39
111
|
#[new]
|
|
@@ -73,6 +145,42 @@ impl LLTokenizer {
|
|
|
73
145
|
})
|
|
74
146
|
}
|
|
75
147
|
|
|
148
|
+
#[staticmethod]
|
|
149
|
+
#[pyo3(signature = (*, tokens, vocab_ptr, tokenize_fptr, eos_token, slices=None))]
|
|
150
|
+
fn from_llamacpp(
|
|
151
|
+
tokens: Vec<Vec<u8>>,
|
|
152
|
+
vocab_ptr: usize,
|
|
153
|
+
tokenize_fptr: usize,
|
|
154
|
+
eos_token: u32,
|
|
155
|
+
slices: Option<Vec<String>>,
|
|
156
|
+
) -> PyResult<Self> {
|
|
157
|
+
if vocab_ptr == 0 {
|
|
158
|
+
return Err(PyValueError::new_err("vocab_ptr must be non-null"));
|
|
159
|
+
}
|
|
160
|
+
if tokenize_fptr == 0 {
|
|
161
|
+
return Err(PyValueError::new_err("tokenize_fptr must be non-null"));
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
let info = TokRxInfo::new(tokens.len() as u32, eos_token);
|
|
165
|
+
let trie = TokTrie::from(&info, &tokens);
|
|
166
|
+
|
|
167
|
+
let llama_tok = LlamaTokenizerInner {
|
|
168
|
+
trie,
|
|
169
|
+
tokenize_fn: unsafe { std::mem::transmute::<usize, LlamaTokenizeFn>(tokenize_fptr) },
|
|
170
|
+
vocab: vocab_ptr as *const std::os::raw::c_void,
|
|
171
|
+
};
|
|
172
|
+
let tok_env: TokEnv = Arc::new(llama_tok);
|
|
173
|
+
let factory = ParserFactory::new(
|
|
174
|
+
&tok_env,
|
|
175
|
+
InferenceCapabilities::default(),
|
|
176
|
+
&slices.unwrap_or_else(SlicedBiasComputer::general_slices),
|
|
177
|
+
)
|
|
178
|
+
.map_err(val_error)?;
|
|
179
|
+
Ok(LLTokenizer {
|
|
180
|
+
factory: Arc::new(factory),
|
|
181
|
+
})
|
|
182
|
+
}
|
|
183
|
+
|
|
76
184
|
fn with_slices(&self, slices: Vec<String>) -> PyResult<Self> {
|
|
77
185
|
let factory = self.factory.with_slices(&slices)?;
|
|
78
186
|
Ok(LLTokenizer {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# installing guidance for deps
|
|
4
4
|
pip install pytest guidance huggingface_hub tokenizers jsonschema maturin[zig] \
|
|
5
|
-
torch transformers==4.52.1 bitsandbytes ipython psutil mypy
|
|
5
|
+
torch transformers==4.52.1 bitsandbytes ipython psutil mypy llama_cpp_python
|
|
6
6
|
pip uninstall -y guidance
|
|
7
7
|
|
|
8
8
|
# print out versions
|
|
@@ -102,7 +102,11 @@ impl ByteTokenizer {
|
|
|
102
102
|
}
|
|
103
103
|
|
|
104
104
|
let vocab_size = hft.get_vocab_size(true) as u32;
|
|
105
|
-
let added = hft
|
|
105
|
+
let mut added = hft
|
|
106
|
+
.get_added_tokens_decoder()
|
|
107
|
+
.into_iter()
|
|
108
|
+
.collect::<Vec<_>>();
|
|
109
|
+
added.sort_by_key(|(id, _)| *id);
|
|
106
110
|
|
|
107
111
|
let mut res = ByteTokenizer {
|
|
108
112
|
hf_model: "foobar".to_string(),
|
|
@@ -114,7 +118,8 @@ impl ByteTokenizer {
|
|
|
114
118
|
let mut specials = HashSet::new();
|
|
115
119
|
|
|
116
120
|
for (id, info) in added.iter() {
|
|
117
|
-
|
|
121
|
+
// we treat all added tokens of the form <...> as special tokens
|
|
122
|
+
if info.special || (info.content.starts_with("<") && info.content.ends_with(">")) {
|
|
118
123
|
match info.content.as_str() {
|
|
119
124
|
"</s>"
|
|
120
125
|
| "<|endoftext|>"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|