PyPI - llguidance - Versions diffs - 0.7.19__tar.gz → 0.7.21__tar.gz - Mend

llguidance 0.7.19tar.gz → 0.7.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

{llguidance-0.7.19 → llguidance-0.7.21}/.github/workflows/rust.yml RENAMED Viewed

@@ -52,3 +52,20 @@ jobs:
       with:
         name: wheels
         path: target/wheels/*
+  msrv:
+    name: MSRV Check
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Rust
+      uses: dtolnay/rust-toolchain@1.80.0
+      with:
+        components: clippy
+    - name: Build parser
+      run: cargo build --verbose --locked
+      working-directory: parser

{llguidance-0.7.19 → llguidance-0.7.21}/CHANGELOG.md RENAMED Viewed

@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file. Dates are d
 If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
+#### [0.7.21](https://github.com/guidance-ai/llguidance/compare/v0.7.20...0.7.21) 2025-05-20
+- include parser state in errors [`82e34da`](https://github.com/guidance-ai/llguidance/commit/82e34da704d22f04979d8cbc54a0ac00885a277d)
+- tighten email format in JSON schema [`7454ea9`](https://github.com/guidance-ai/llguidance/commit/7454ea9df958f8bcc42e6bb986d6de397de65b3e)
+#### [0.7.20](https://github.com/guidance-ai/llguidance/compare/v0.7.19...0.7.20) 2025-05-15
+- use fancy-regex instead of onig as tokenizers regex library [`#172`](https://github.com/guidance-ai/llguidance/pull/172)
+  - fixes compilation on GCC 15, thanks [@Slowki](https://github.com/Slowki)
+- msrv 1.80 support (incl. derivre bump) [`c89e386`](https://github.com/guidance-ai/llguidance/commit/c89e386685cd911a89fd47df225de88f88c10883), thank you [@nteodosio](https://github.com/nteodosio) for initial [PR](https://github.com/guidance-ai/llguidance/pull/170)!
 #### [0.7.19](https://github.com/guidance-ai/llguidance/compare/v0.7.18...0.7.19) 2025-04-24
 - fix a numeric token bug [`1f59edf`](https://github.com/guidance-ai/llguidance/commit/1f59edfc49b44cfba74b2380f34874a0778d9441)

{llguidance-0.7.19 → llguidance-0.7.21}/Cargo.lock RENAMED Viewed

@@ -150,12 +150,6 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
-[[package]]
-name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 [[package]]
 name = "bitflags"
 version = "2.9.0"
@@ -401,9 +395,9 @@ dependencies = [
 [[package]]
 name = "derivre"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a605f30e6a1460a323cc4de7bc62dea81df1d9d67eb92194d3a983a8a9601c4"
+checksum = "786c7c65c4ef0c7deb05de3005e01991612a8f09fe0844fc0969c68b90468ba8"
 dependencies = [
  "ahash",
  "anyhow",
@@ -672,8 +666,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
  "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasm-bindgen",
 ]
 [[package]]
@@ -1160,7 +1156,7 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
  "libc",
 ]
@@ -1178,7 +1174,7 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
 [[package]]
 name = "llguidance"
-version = "0.7.19"
+version = "0.7.21"
 dependencies = [
  "anyhow",
  "derivre",
@@ -1197,7 +1193,7 @@ dependencies = [
 [[package]]
 name = "llguidance_py"
-version = "0.7.19"
+version = "0.7.21"
 dependencies = [
  "anyhow",
  "bytemuck",
@@ -1446,35 +1442,13 @@ version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
-[[package]]
-name = "onig"
-version = "6.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
-dependencies = [
- "bitflags 1.3.2",
- "libc",
- "once_cell",
- "onig_sys",
-]
-[[package]]
-name = "onig_sys"
-version = "69.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
-dependencies = [
- "cc",
- "pkg-config",
-]
 [[package]]
 name = "openssl"
 version = "0.10.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
  "cfg-if",
  "foreign-types",
  "libc",
@@ -1747,7 +1721,7 @@ version = "0.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
 ]
 [[package]]
@@ -1897,7 +1871,7 @@ version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -1992,7 +1966,7 @@ version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
  "core-foundation",
  "core-foundation-sys",
  "libc",
@@ -2185,7 +2159,7 @@ version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
  "core-foundation",
  "system-configuration-sys",
 ]
@@ -2278,13 +2252,13 @@ dependencies = [
  "aho-corasick",
  "derive_builder",
  "esaxx-rs",
+ "fancy-regex",
  "getrandom 0.2.15",
  "itertools 0.13.0",
  "lazy_static",
  "log",
  "macro_rules_attribute",
  "monostate",
- "onig",
  "paste",
  "rand",
  "rayon",
@@ -2362,7 +2336,7 @@ dependencies = [
 [[package]]
 name = "toktrie"
-version = "0.7.19"
+version = "0.7.21"
 dependencies = [
  "anyhow",
  "bytemuck",
@@ -2373,7 +2347,7 @@ dependencies = [
 [[package]]
 name = "toktrie_hf_downloader"
-version = "0.7.19"
+version = "0.7.21"
 dependencies = [
  "anyhow",
  "hf-hub",
@@ -2384,7 +2358,7 @@ dependencies = [
 [[package]]
 name = "toktrie_hf_tokenizers"
-version = "0.7.19"
+version = "0.7.21"
 dependencies = [
  "anyhow",
  "log",
@@ -2987,7 +2961,7 @@ version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
 dependencies = [
- "bitflags 2.9.0",
+ "bitflags",
 ]
 [[package]]

{llguidance-0.7.19 → llguidance-0.7.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llguidance
-Version: 0.7.19
+Version: 0.7.21
 License-File: LICENSE
 Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
 Author: Michal Moskal
@@ -20,6 +20,7 @@ Project-URL: issue_tracker, https://github.com/microsoft/llguidance/issues
 ---
+* 2025-05-20 LLGuidance [shipped](https://x.com/OpenAIDevs/status/1924915341052019166) in [OpenAI](https://x.com/OpenAIDevs/status/1924915343677653014) for JSON Schema
 * 2025-04-11 integration [merged](https://github.com/chromium/chromium/commit/07ca6337c2f714ba0477202414bd2b1692e70594) into Chromium
 * 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
 * 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
@@ -59,6 +60,7 @@ The library can be used from:
 The library is currently integrated in:
 - [Guidance](https://github.com/guidance-ai/guidance) - library for interacting with LLMs
+- [OpenAI models](https://x.com/OpenAIDevs/status/1924915343677653014) - LLGuidance powers [Structured Output](https://platform.openai.com/docs/guides/structured-outputs) (JSON Schema only)
 - [llama.cpp](https://github.com/ggerganov/llama.cpp/pull/10224) -
   available via `-DLLAMA_LLGUIDANCE=ON` option for `cmake`;
   llama.cpp can be also used Guidance Python package

{llguidance-0.7.19 → llguidance-0.7.21}/README.md RENAMED Viewed

@@ -8,6 +8,7 @@
 ---
+* 2025-05-20 LLGuidance [shipped](https://x.com/OpenAIDevs/status/1924915341052019166) in [OpenAI](https://x.com/OpenAIDevs/status/1924915343677653014) for JSON Schema
 * 2025-04-11 integration [merged](https://github.com/chromium/chromium/commit/07ca6337c2f714ba0477202414bd2b1692e70594) into Chromium
 * 2025-03-25 integration [merged](https://github.com/vllm-project/vllm/pull/14779) into vLLM (v0.8.2)
 * 2025-02-26 integration [merged](https://github.com/sgl-project/sglang/pull/3298) into SGLang (v0.4.4)
@@ -47,6 +48,7 @@ The library can be used from:
 The library is currently integrated in:
 - [Guidance](https://github.com/guidance-ai/guidance) - library for interacting with LLMs
+- [OpenAI models](https://x.com/OpenAIDevs/status/1924915343677653014) - LLGuidance powers [Structured Output](https://platform.openai.com/docs/guides/structured-outputs) (JSON Schema only)
 - [llama.cpp](https://github.com/ggerganov/llama.cpp/pull/10224) -
   available via `-DLLAMA_LLGUIDANCE=ON` option for `cmake`;
   llama.cpp can be also used Guidance Python package

{llguidance-0.7.19 → llguidance-0.7.21}/parser/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "llguidance"
-version = "0.7.19"
+version = "0.7.21"
 edition = "2021"
 license = "MIT"
 description = "Super-fast Structured Outputs"
@@ -8,7 +8,7 @@ repository = "https://github.com/guidance-ai/llguidance"
 [dependencies]
 toktrie = { workspace = true }
-derivre = { version = "=0.3.7", default-features = false, features = ["compress"] }
+derivre = { version = "=0.3.8", default-features = false, features = ["compress"] }
 serde = { version = "1.0.217", features = ["derive"] }
 serde_json = { version = "1.0.138", features = ["preserve_order"] }
 anyhow = "1.0.95"

{llguidance-0.7.19 → llguidance-0.7.21}/parser/build.rs RENAMED Viewed

@@ -5,7 +5,7 @@ fn main() {
     let required_vars = [
         "CARGO",
-        "CARGO_MANIFEST_PATH",
+        // "CARGO_MANIFEST_PATH",
         "CARGO_PKG_NAME",
         "CARGO_PKG_VERSION",
         "OUT_DIR",

{llguidance-0.7.19 → llguidance-0.7.21}/parser/llguidance.h RENAMED Viewed

@@ -494,7 +494,7 @@ int32_t llg_matcher_compute_mask(struct LlgMatcher *matcher);
 const uint32_t *llg_matcher_get_mask(struct LlgMatcher *matcher);
 /**
- * Return pointer to the mask computed by llg_matcher_compute_mask(), if any.
+ * Return the size of the mask in bytes.
  */
 size_t llg_matcher_get_mask_byte_size(struct LlgMatcher *matcher);

{llguidance-0.7.19 → llguidance-0.7.21}/parser/src/constraint.rs RENAMED Viewed

@@ -137,7 +137,7 @@ impl Constraint {
     /// The splice is never returned when ff_tokens are disabled in InferenceCapabilities.
     /// After this returns, commit_token() must be called with the sampled token if any.
     pub fn compute_mask(&mut self) -> Result<&StepResult> {
-        panic_utils::catch_unwind(std::panic::AssertUnwindSafe(|| self.compute_mask_inner()))
+        self.catch_unwind(|s| s.compute_mask_inner())
             .map(|_| &self.last_res)
     }
@@ -185,6 +185,14 @@ impl Constraint {
         self.parser.validate_tokens_raw(tokens)
     }
+    fn catch_unwind<F, R>(&mut self, f: F) -> Result<R>
+    where
+        F: FnOnce(&mut Self) -> Result<R>,
+    {
+        panic_utils::catch_unwind(std::panic::AssertUnwindSafe(|| f(self)))
+            .map_err(|e| anyhow::anyhow!(self.parser.augment_err(e)))
+    }
     /// commit_token() is a top-level method in this file and is called by
     /// the LLInterpreter::commit_token().
     ///
@@ -194,9 +202,7 @@ impl Constraint {
     /// It only returns 'STOP' if previous compute_mask() already returned 'STOP'
     /// (in which case there's little point calling commit_token()).
     pub fn commit_token(&mut self, sampled_token: Option<TokenId>) -> Result<CommitResult> {
-        panic_utils::catch_unwind(std::panic::AssertUnwindSafe(|| {
-            self.commit_token_inner(sampled_token)
-        }))
+        self.catch_unwind(|s| s.commit_token_inner(sampled_token))
     }
     fn commit_token_inner(&mut self, sampled_token: Option<TokenId>) -> Result<CommitResult> {

{llguidance-0.7.19 → llguidance-0.7.21}/parser/src/ffi.rs RENAMED Viewed

@@ -554,9 +554,12 @@ pub extern "C" fn llg_get_temperature(cc: &LlgConstraint) -> f32 {
 /// Check if constraint is stopped (cannot be extended further).
 #[no_mangle]
 pub extern "C" fn llg_is_stopped(cc: &LlgConstraint) -> bool {
-    cc.constraint
-        .as_ref()
-        .is_none_or(|c| c.step_result().is_stop())
+    if let Some(c) = &cc.constraint {
+        c.step_result().is_stop()
+    } else {
+        // if there is no constraint, we consider it stopped
+        true
+    }
 }
 /// Compute mask for the next token sampling
@@ -1102,7 +1105,7 @@ pub extern "C" fn llg_matcher_get_mask(matcher: &mut LlgMatcher) -> *const u32 {
         .map_or(std::ptr::null(), |m| m.as_ptr())
 }
-/// Return pointer to the mask computed by llg_matcher_compute_mask(), if any.
+/// Return the size of the mask in bytes.
 #[no_mangle]
 pub extern "C" fn llg_matcher_get_mask_byte_size(matcher: &mut LlgMatcher) -> usize {
     matcher.mask_elts() * 4

{llguidance-0.7.19 → llguidance-0.7.21}/parser/src/json/formats.rs RENAMED Viewed

@@ -27,9 +27,15 @@ pub fn lookup_format(name: &str) -> Option<&str> {
         "duration" => {
             r"P(?:(?P<dur_date>(?:(?P<dur_year>[0-9]+Y(?:[0-9]+M(?:[0-9]+D)?)?)|(?P<dur_month>[0-9]+M(?:[0-9]+D)?)|(?P<dur_day>[0-9]+D))(?:T(?:(?P<dur_hour>[0-9]+H(?:[0-9]+M(?:[0-9]+S)?)?)|(?P<dur_minute>[0-9]+M(?:[0-9]+S)?)|(?P<dur_second>[0-9]+S)))?)|(?P<dur_time>T(?:(?P<dur_hour2>[0-9]+H(?:[0-9]+M(?:[0-9]+S)?)?)|(?P<dur_minute2>[0-9]+M(?:[0-9]+S)?)|(?P<dur_second2>[0-9]+S)))|(?P<dur_week>[0-9]+W))"
         }
-        "email" => {
-            r"(?P<local_part>(?P<dot_string>[^\s@\.]+(\.[^\s@\.]+)*))@((?P<domain>(?P<sub_domain>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)(\.(?P<sub_domain2>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?))*)|\[(?P<ipv4>((([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9])))\.){3}(([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9]))))\])"
-        }
+        // https://www.rfc-editor.org/rfc/inline-errata/rfc5321.html 4.1.2 -> Mailbox
+        "email" => concat!(
+            r"(?P<local_part>(?P<dot_string>[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+(\.[a-zA-Z0-9!#$%&'*+\-/=?\^_`{|}~]+)*))",
+            r"@(",
+            r"(?P<domain>(?P<sub_domain>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)(\.(?P<sub_domain2>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?))*)",
+            r"|",
+            r"\[(?P<ipv4>((([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9])))\.){3}(([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9]))))\]",
+            r")"
+        ),
         "hostname" => {
             r"[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"
         }

{llguidance-0.7.19 → llguidance-0.7.21}/parser/src/matcher.rs RENAMED Viewed

@@ -1,4 +1,4 @@
-use anyhow::{anyhow, ensure, Result};
+use anyhow::{anyhow, bail, ensure, Result};
 use toktrie::{SimpleVob, TokEnv, TokenId};
 use crate::{api::StopReason, earley::ParserStats, panic_utils, TokenParser};
@@ -48,8 +48,9 @@ impl Matcher {
                 match r {
                     Ok(r) => Ok(r),
                     Err(e) => {
-                        self.0 = MatcherState::Error(e.to_string());
-                        Err(e)
+                        let msg = inner.parser.augment_err(e);
+                        self.0 = MatcherState::Error(msg.clone());
+                        bail!(msg);
                     }
                 }
             }

{llguidance-0.7.19 → llguidance-0.7.21}/parser/src/tokenparser.rs RENAMED Viewed

@@ -1,4 +1,4 @@
-use std::{hint::black_box, panic::AssertUnwindSafe, sync::Arc, time::Duration};
+use std::{fmt::Display, hint::black_box, panic::AssertUnwindSafe, sync::Arc, time::Duration};
 use crate::{
     api::{GrammarInit, ParserLimits, StopReason},
@@ -22,6 +22,9 @@ pub struct TokenParser {
     max_step_stats: ParserStats,
     eos_token: TokenId,
+    had_rollback: bool,
+    had_backtrack: bool,
     is_accepting_cache: Option<bool>,
     ff_tokens_cache: Option<(Vec<TokenId>, Vec<u8>)>,
     stop_reason: StopReason,
@@ -110,6 +113,8 @@ impl TokenParser {
             max_tokens_total: max_tokens,
             last_bias_time: Duration::from_secs(0),
             is_fresh: true,
+            had_backtrack: false,
+            had_rollback: false,
         })
     }
@@ -268,6 +273,34 @@ impl TokenParser {
         res_prompt
     }
+    pub fn augment_err(&self, e: impl Display) -> String {
+        format!("{e}\n<state>\n{}\n</state>", self.dump_state())
+    }
+    pub fn dump_state(&self) -> String {
+        format!(
+            "Tokens: {}\n{} tokens, {} bytes; grm_prefix: {:?}\nFlags:{}{}\nLexer: {}\nParser: {}\nStop: {}\nError: {}",
+            self.tok_trie().tokens_dbg(&self.llm_tokens),
+            self.llm_tokens.len(),
+            self.llm_bytes.len(),
+            String::from_utf8_lossy(&self.grm_prefix),
+            if self.had_backtrack {
+                " had_backtrack"
+            } else {
+                ""
+            },
+            if self.had_rollback {
+                " had_rollback"
+            } else {
+                ""
+            },
+            self.parser.lexer_stats(),
+            self.parser.stats(),
+            self.stop_reason,
+            self.error_message.as_deref().unwrap_or("None"),
+        )
+    }
     fn clear_caches(&mut self) {
         self.is_accepting_cache = None;
         self.ff_tokens_cache = None;
@@ -332,6 +365,8 @@ impl TokenParser {
         // this will fail in case we're in error state or not initialized
         self.check_initialized("rollback")?;
+        self.had_rollback = true;
         let new_len = self.llm_tokens.len() - n_tokens;
         let mut bytes_to_drop = 0;
         for tok in &self.llm_tokens[new_len..] {
@@ -522,6 +557,7 @@ impl TokenParser {
                 self.llm_bytes.extend_from_slice(tok_bytes);
                 if backtrack_bytes0 != 0 {
+                    self.had_backtrack = true;
                     let mut backtrack_bytes: isize = backtrack_bytes0.try_into().unwrap();
                     let mut backtrack_tokens = 0;
                     while backtrack_bytes > 0 {

{llguidance-0.7.19 → llguidance-0.7.21}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "llguidance"
-version = "0.7.19"
+version = "0.7.21"
 description = "Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance"
 requires-python = ">=3.9"
 license = "MIT"

{llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/cli.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import argparse
 import json
 import huggingface_hub
-from transformers import AutoTokenizer  # type: ignore[attr-defined]
+from transformers import AutoTokenizer
 import llguidance

{llguidance-0.7.19 → llguidance-0.7.21}/python/llguidance/hf.py RENAMED Viewed

@@ -13,11 +13,11 @@ def from_tokenizer(
     """
     Create a new tokenizer from a fast Hugging Face tokenizer.
     This is an expensive operation (~1s), so the result should be cached.
-    It also currently creates a non-canonical tokenizer, which means it cannot
-    produce fast-forward tokens (though it can produce fast-forward bytes).
+    It currently only supports fast tokenizers, which are then handled
+    by the Rust tokenizers library.
     Args:
-        hf_tokenizer: transformers.PreTrainedTokenizerBase - the tokenizer to wrap
+        hf_tokenizer: transformers.PreTrainedTokenizerFast - the tokenizer to wrap
         n_vocab: int - override the size of the vocabulary
         eos_token: int - override the EOS token
         slices: List[str] - configuration for slicer optimization; pass [] to disable,

{llguidance-0.7.19 → llguidance-0.7.21}/python/torch_tests/test_hf.py RENAMED Viewed

@@ -17,7 +17,7 @@ from llguidance import LLMatcher, LLTokenizer, LLExecutor
 import llguidance.hf
-from transformers import AutoTokenizer  # type: ignore[attr-defined]
+from transformers import AutoTokenizer
 def _build_tokenizer() -> LLTokenizer:

{llguidance-0.7.19 → llguidance-0.7.21}/python_ext/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "llguidance_py"
-version = "0.7.19"
+version = "0.7.21"
 edition = "2021"
 license = "MIT"
 description = "Super-fast Structured Outputs"

{llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/tests/test_lark.rs RENAMED Viewed

@@ -1306,3 +1306,37 @@ fn test_json_min_max_properties() {
         ],
     );
 }
+#[test]
+fn test_json_format_email() {
+    json_test_many(
+        &json!({
+            "type": "string",
+            "format": "email",
+        }),
+        &[
+            json!("test@example.com"),
+            json!("foo.bar@example.com"),
+            json!("foo.bar@example-123.com"),
+            json!("foo+bar@example-123.com"),
+            json!("f$o#o`b-a!r@example-123.com"),
+            json!("fo%o#bar@example-123.com"),
+            json!("test@[192.168.1.1]"),
+        ],
+        &[
+            json!(""),
+            json!(" @example.com"),
+            json!("test@"),
+            json!("@example.com"),
+            json!("test@.com"),
+            json!("test@com"),
+            json!("test@com."),
+            json!("test@example..com"),
+            json!("test@example.c"),
+            json!("test@example.c."),
+            json!("test@.example.com"),
+            json!("test:2@example.com"),
+            json!("test[2]@example.com"),
+        ],
+    );
+}

{llguidance-0.7.19 → llguidance-0.7.21}/sample_parser/tests/test_raw_parser.rs RENAMED Viewed

@@ -3,7 +3,7 @@ use llguidance::{
     api::TopLevelGrammar,
     earley::SlicedBiasComputer,
     toktrie::{InferenceCapabilities, TokEnv},
-    ParserFactory, TokenParser,
+    Matcher, ParserFactory, TokenParser,
 };
 use serde_json::{json, Value};
@@ -207,3 +207,31 @@ fn test_ff_early() {
         parser.consume_token(*tok).unwrap();
     }
 }
+#[test]
+fn test_err_state() {
+    let lark = r#"
+        start: /[a-z]*/
+    "#;
+    let tokens = get_tok_env().tokenize("fobarbazqu123");
+    let mut t2 = vec![];
+    for _ in 0..100 {
+        t2.push(tokens[0]);
+        t2.push(tokens[1]);
+        t2.push(tokens[2]);
+    }
+    t2.extend_from_slice(&tokens);
+    let mut matcher = Matcher::new(Ok(make_parser(lark)));
+    for tok in t2.iter() {
+        if let Err(e) = matcher.consume_token(*tok) {
+            let e = e.to_string();
+            println!("Error: {}", e);
+            assert!(e.contains("<state>"));
+            assert!(e.contains("Tokens:"));
+            return;
+        }
+    }
+    unreachable!();
+}

{llguidance-0.7.19 → llguidance-0.7.21}/scripts/install-deps.sh RENAMED Viewed

@@ -2,7 +2,7 @@
 # installing guidance for deps
 pip install pytest guidance huggingface_hub tokenizers jsonschema maturin[zig] \
-    torch transformers bitsandbytes ipython psutil mypy
+    torch transformers==4.52.1 bitsandbytes ipython psutil mypy
 pip uninstall -y guidance
 # print out versions

{llguidance-0.7.19 → llguidance-0.7.21}/toktrie/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "toktrie"
-version = "0.7.19"
+version = "0.7.21"
 edition = "2021"
 license = "MIT"
 description = "LLM Token Trie library"

llguidance 0.7.19__tar.gz → 0.7.21__tar.gz

llguidance 0.7.19tar.gz → 0.7.21tar.gz