PyPI - llguidance - Versions diffs - 0.7.11__tar.gz → 0.7.12__tar.gz - Mend

llguidance 0.7.11tar.gz → 0.7.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

{llguidance-0.7.11 → llguidance-0.7.12}/CHANGELOG.md RENAMED Viewed

@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. Dates are d
 If a release doesn't introduce any interesting changes (build fixes etc.), it's skipped.
+#### [0.7.12](https://github.com/guidance-ai/llguidance/compare/v0.7.11...0.7.12) 2025-04-04
+- performance optimizations
+- use factory in C FFI (otherwise slicer was not used)
+- add some null checks and safety comments in C FFI
+- implement subgrammar lexeme class merging; fixes [`#113`](https://github.com/guidance-ai/llguidance/issues/113)
 #### [0.7.11](https://github.com/guidance-ai/llguidance/compare/v0.7.10...0.7.11) 2025-03-27
 - add StructTag python API; fixes [`#146`](https://github.com/guidance-ai/llguidance/issues/146)
@@ -46,7 +53,7 @@ If a release doesn't introduce any interesting changes (build fixes etc.), it's
 #### [v0.7.1](https://github.com/guidance-ai/llguidance/compare/v0.7.0...v0.7.1) 2025-03-18
 - add `LLMatcher` interface in python
-- add  whitespace_pattern to JsonCompileOptions [`04a5491`](https://github.com/guidance-ai/llguidance/commit/04a54912cf6d082669674340833f06385f7b66f8)
+- add whitespace_pattern to JsonCompileOptions [`04a5491`](https://github.com/guidance-ai/llguidance/commit/04a54912cf6d082669674340833f06385f7b66f8)
 - enable mypy in CI [`#140`](https://github.com/guidance-ai/llguidance/pull/140)
 - add py.typed for annotations information [`#139`](https://github.com/guidance-ai/llguidance/pull/139)
 - fix clippy warnings
@@ -60,7 +67,6 @@ If a release doesn't introduce any interesting changes (build fixes etc.), it's
 - fix https://github.com/guidance-ai/guidance/issues/1131 - backtracking+prompt healing [`#1131`](https://github.com/guidance-ai/guidance/issues/1131)
 - optimize substring [`9950600`](https://github.com/guidance-ai/llguidance/commit/9950600f46e433b4c42506f8816f61cee331774f)
 #### [v0.6.29](https://github.com/guidance-ai/llguidance/compare/v0.6.28...v0.6.29) 2025-02-25
 - [JSON] "x-guidance" JsonCompileOptions [`#130`](https://github.com/guidance-ai/llguidance/pull/130)
@@ -110,4 +116,3 @@ Plus a few releases messing with, deps, unsafe code cleanup.
 - fixes for numeric tokens [`b7c9970`](https://github.com/guidance-ai/llguidance/commit/b7c99709a9cb7f7a8a3c4716092e4d94fae2ff2c)
 - make capture explicit in lark syntax [`2a57678`](https://github.com/guidance-ai/llguidance/commit/2a57678d9397e8be54cb0c9f14c4270604f8e1a5)

{llguidance-0.7.11 → llguidance-0.7.12}/Cargo.lock RENAMED Viewed

@@ -401,9 +401,9 @@ dependencies = [
 [[package]]
 name = "derivre"
-version = "0.3.1"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a3c2606b3ffc46f91fd62d954d55659ba9fb391bb673311b70f50daf9c15e49"
+checksum = "310c9990c5a531352e274c8c929ca667a84b6bbaceb1e095c177e6a979807f57"
 dependencies = [
  "ahash",
  "anyhow",
@@ -1177,7 +1177,7 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
 [[package]]
 name = "llguidance"
-version = "0.7.11"
+version = "0.7.12"
 dependencies = [
  "anyhow",
  "derivre",
@@ -1196,7 +1196,7 @@ dependencies = [
 [[package]]
 name = "llguidance_py"
-version = "0.7.11"
+version = "0.7.12"
 dependencies = [
  "anyhow",
  "bytemuck",
@@ -2356,7 +2356,7 @@ dependencies = [
 [[package]]
 name = "toktrie"
-version = "0.7.11"
+version = "0.7.12"
 dependencies = [
  "anyhow",
  "bytemuck",
@@ -2367,7 +2367,7 @@ dependencies = [
 [[package]]
 name = "toktrie_hf_downloader"
-version = "0.7.11"
+version = "0.7.12"
 dependencies = [
  "anyhow",
  "hf-hub",
@@ -2378,7 +2378,7 @@ dependencies = [
 [[package]]
 name = "toktrie_hf_tokenizers"
-version = "0.7.11"
+version = "0.7.12"
 dependencies = [
  "anyhow",
  "log",

{llguidance-0.7.11 → llguidance-0.7.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llguidance
-Version: 0.7.11
+Version: 0.7.12
 License-File: LICENSE
 Summary: Bindings for the Low-level Guidance (llguidance) Rust library for use within Guidance
 Author: Michal Moskal

{llguidance-0.7.11 → llguidance-0.7.12}/docs/syntax.md RENAMED Viewed

@@ -293,12 +293,13 @@ that llguidance should be used to process the grammar.
 ### Multiple grammars
 The input to LLGuidance consists of a list of grammars. This can be accessed via
-[LLGuidance API](../parser/src/api.rs). Each of these can be a Lark grammar, a JSON schema,
-or a grammar in the API format. With the introduction of `%json` in Lark syntax
+[LLGuidance API](../parser/src/api.rs). Each of these can be a Lark grammar or a JSON schema.
+With the introduction of `%json` in Lark syntax
 there is less need now for using multiple grammars, but it is still supported.
+We may add nested lark grammars in future.
 Inside of Lark grammar, you can reference other grammars using syntax like `@my_grammar`,
-refering to grammar with `"name": "my_grammar"` (numeric reference like `@17` are no longer supported).
+refering to grammar with `"name": "my_grammar"` (numeric reference like `@17` are **no longer supported**).
 The top-level grammar is at index 0.
 You can specify temperature for subgrammar by referencing it via
@@ -317,6 +318,42 @@ Example:
 }
 ```
+#### Subgrammar details
+Generally, subgrammars share the same context-free grammar but have a separate
+set of lexemes (lexeme class).
+The parser keeps track of a stack of lexeme classes, and considers the top one
+to be the current lexeme class.
+The `%ignore` is applied based on the top lexeme class.
+Temperature and `max_tokens` can be applied to the current lexeme class as well.
+There may be issues with subgrammars spanning LLM token boundaries.
+If two lexeme classes share the `%ignore` regex, and `max_tokens=` and `temperature=`
+are not used, the lexeme classes are merged, which generally allows for parsing
+of more grammars.
+For example, consider:
+```lark
+start: a | b
+a: %json { A }
+b: %json { B }
+```
+Normally, the parser would have to pick between lexeme class for either A or B
+at the first `{` (it would always pick A since it comes first in the grammar).
+However, if the classes for A and B are merged, the grammar will be equivalent to
+`start: %json { "anyOf": [A, B] }` which is generally what the
+[users expect](https://github.com/guidance-ai/llguidance/issues/113).
+### Features to avoid
+- `stop=...` - use `suffix=...` or just `lazy`
+- `max_tokens=...` - any use of `max_tokens` will disable rollback, which is needed for spec-decoding; it also makes the parser slower and prevents subgrammar merging
+- `temperature=...` - this is not supported in most server side integrations and prevents subgrammar merging
 ### Unsupported Lark features
 Following features of Lark syntax are currently not supported:

{llguidance-0.7.11 → llguidance-0.7.12}/json_stats/expected_maskbench.json RENAMED Viewed

@@ -1291,9 +1291,7 @@
   "Github_easy---o21209.json": {},
   "Github_easy---o21393.json": {},
   "Github_easy---o21455.json": {},
-  "Github_easy---o21456.json": {
-    "json_error": "Unable to determine if regex is empty:  (And (Regex \"([^@^\\\\s]+@[^@^\\\\.^\\\\s]+(\\\\.[^@^\\\\.^\\\\s]*)*.gov.uk).*\") (Regex \"(?s:.{5,254})\"))"
-  },
+  "Github_easy---o21456.json": {},
   "Github_easy---o21458.json": {},
   "Github_easy---o21459.json": {},
   "Github_easy---o21460.json": {},
@@ -5037,7 +5035,7 @@
   "Github_hard---o69969.json": {},
   "Github_hard---o69970.json": {},
   "Github_hard---o69972.json": {
-    "json_error": "Unable to determine if regex is empty:  (And (And (Regex \"(\\\\w+([\\\\.-]?\\\\w+)*@\\\\w+([\\\\.-]?\\\\w+)*(\\\\.\\\\w{2,})+)\") (Regex \"((?P<local_part>(?P<dot_string>[^\\\\s@\\\\.]+(\\\\.[^\\\\s@\\\\.]+)*))@((?P<domain>(?P<sub_domain>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)(\\\\.(?P<sub_domain2>[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?))*)|\\\\[(?P<ipv4>((([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9])))\\\\.){3}(([0-9])|(([1-9])[0-9]|(25[0-5]|(2[0-4]|(1)[0-9])[0-9]))))\\\\]))\")) (Regex \"(?s:.{6,})\"))"
+    "validation_error": "test #0: token not accepted at ⟦loc‧-‧1‧\",\"‧code‧\":\"‧LOC‧-‧1‧\",\"‧name‧\":\"‧Main‧ Library‧\",\"‧library‧\":{\"‧$‧ref‧\":\"‧https‧://‧ils‧.r‧ero‧.ch‧/api‧/lib‧raries‧/lib‧-‧1‧\"},\"‧allow‧_request‧\":‧true‧,\"‧send‧_notification‧\":‧true‧,\"‧notification‧_email‧\":\"‧library‧@example‧.com‧\",\"⟧ * ⟦is⟧ * ⟦_online‧\":‧false‧,\"⟧ forced tokens \"⟦restrict‧_pick‧up‧_to⟧\" != \"⟦is‧_online‧\\\":‧false⟧\""
   },
   "Github_hard---o69976.json": {},
   "Github_hard---o70037.json": {},
@@ -7356,9 +7354,7 @@
   "Github_medium---o6378.json": {},
   "Github_medium---o63935.json": {},
   "Github_medium---o63937.json": {},
-  "Github_medium---o63939.json": {
-    "json_error": "Unable to determine if regex is empty:  (And (Regex \"([\\\\w\\\\-\\\\./]+\\\\.php+)\") (Regex \"(?s:.{16,1024})\"))"
-  },
+  "Github_medium---o63939.json": {},
   "Github_medium---o63941.json": {},
   "Github_medium---o63945.json": {},
   "Github_medium---o63998.json": {},

{llguidance-0.7.11 → llguidance-0.7.12}/json_stats/jstats.sh RENAMED Viewed

@@ -18,6 +18,11 @@ if [ "$1" == "--bench" ] ; then
     done
 fi
+if [ "$1" == "--exp" ] ; then
+    shift
+    DEFAULT_ARGS="--expected expected_maskbench.json"
+fi
 if [ -z "$PERF" ]; then
     cargo build --release
     ../target/release/json_stats $DEFAULT_ARGS "$@"

{llguidance-0.7.11 → llguidance-0.7.12}/json_stats/src/json_stats.rs RENAMED Viewed

@@ -60,6 +60,10 @@ pub struct CliOptions {
     #[arg(long)]
     llg_no_forcing: bool,
+    /// Set stderr log level; implies --num-threads 1
+    #[arg(long, default_value = "0")]
+    llg_log_level: u32,
     /// Test the slicer optimization against un-sliced parser
     #[arg(long)]
     llg_test_slicer: bool,
@@ -80,6 +84,10 @@ pub struct CliOptions {
     #[arg(long)]
     csv: bool,
+    /// Don't print JSON output and perf counters
+    #[arg(long)]
+    quiet: bool,
     /// Test rollback mechanism for speculative decoding
     #[arg(long)]
     rollback: bool,
@@ -702,8 +710,10 @@ impl TestEnv {
             Ok(schema) => schema,
             Err(e) => {
                 res.json_error = Some(format!("{e}"));
+                if self.cli.llg_log_level > 0 {
+                    eprintln!("{} Error JSON: {}", self.file_name, e);
+                }
                 limit_string(&mut res.json_error);
-                // eprintln!("{} Error Compile: {}", file, e);
                 return res;
             }
         };
@@ -743,6 +753,9 @@ impl TestEnv {
             Err(e) => {
                 // eprintln!("{} Error Parser: {}", self.file_name, e);
                 res.parser_error = Some(format!("{e}"));
+                if self.cli.llg_log_level > 0 {
+                    eprintln!("{} Error JSON: {}", self.file_name, e);
+                }
                 limit_string(&mut res.parser_error);
                 return res;
             }
@@ -757,6 +770,9 @@ impl TestEnv {
                 if let Err(e) = self.run_llg_test(&mut res, &parser, ref_parser.as_ref(), t) {
                     if res.validation_error.is_none() {
                         res.validation_error = Some(format!("test #{idx}: {e}"));
+                        if self.cli.llg_log_level > 0 {
+                            eprintln!("{} Error Validating: {}", self.file_name, e);
+                        }
                         limit_string(&mut res.validation_error);
                     }
                 } else if t.valid {
@@ -905,6 +921,9 @@ fn main() {
     if options.llg_validate_tokens {
         options.llg_compile = true;
     }
+    if options.llg_log_level > 0 {
+        options.num_threads = Some(1);
+    }
     // set max thread numbers
     let num_cores = std::thread::available_parallelism().unwrap().get();
@@ -961,8 +980,9 @@ fn main() {
     };
     let mut factory = ParserFactory::new(&tok_env, caps.clone(), &slices).unwrap();
-    factory.quiet();
-    // factory.set_stderr_log_level(2);
+    factory.set_buffer_log_level(0);
+    factory.set_stderr_log_level(options.llg_log_level);
     // factory.limits_mut().step_lexer_fuel = 10_000_000;
     let mut ref_factory = ParserFactory::new(&tok_env, caps.clone(), &[]).unwrap();
@@ -1131,10 +1151,15 @@ fn main() {
     total.llg.mask_ms_total_a /= 1000;
     total.llg_json = llg_totals.clone();
-    eprintln!("{}", serde_json::to_string_pretty(&total).unwrap());
+    if !options.quiet {
+        eprintln!(
+            "{}\n{}",
+            serde_json::to_string_pretty(&total).unwrap(),
+            perf_counters
+        );
+    }
     eprintln!(
-        "{}Total time: {}ms TTFM {}μs, mask {}μs, ff {}μs, mask+ff {}ms + compile {}ms",
-        perf_counters,
+        "Total time: {}ms TTFM {}μs, mask {}μs, ff {}μs, mask+ff {}ms + compile {}ms",
         t0.elapsed().as_millis(),
         total.llg.ttfm_us,
         total.llg.mask_us,

{llguidance-0.7.11 → llguidance-0.7.12}/parser/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "llguidance"
-version = "0.7.11"
+version = "0.7.12"
 edition = "2021"
 license = "MIT"
 description = "Super-fast Structured Outputs"
@@ -8,7 +8,7 @@ repository = "https://github.com/guidance-ai/llguidance"
 [dependencies]
 toktrie = { workspace = true }
-derivre = { version = "=0.3.1", default-features = false, features = ["compress"] }
+derivre = { version = "=0.3.4", default-features = false, features = ["compress"] }
 serde = { version = "1.0.217", features = ["derive"] }
 serde_json = { version = "1.0.138", features = ["preserve_order"] }
 anyhow = "1.0.95"

{llguidance-0.7.11 → llguidance-0.7.12}/parser/llguidance.h RENAMED Viewed

@@ -219,6 +219,12 @@ typedef struct LlgTokenizerInit {
    * User data to pass to the tokenize_fn
    */
   const void *tokenize_user_data;
+  /**
+   * Tokenizer partitions for the slicer optimization.
+   * This is array of pointers to strings, terminated with NULL (argv style).
+   * Pass NULL to use defaults. Pass empty array to disable.
+   */
+  const char *const *slices;
 } LlgTokenizerInit;

{llguidance-0.7.11 → llguidance-0.7.12}/parser/src/api.rs RENAMED Viewed

@@ -42,6 +42,7 @@ pub struct LLGuidanceOptions {
     /// If set, the grammar will allow invalid utf8 byte sequences.
     /// Any Unicode regex will cause an error.
+    /// This is very unlikely what you need.
     #[serde(default)]
     pub allow_invalid_utf8: bool,
 }

{llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/from_guidance.rs RENAMED Viewed

@@ -13,78 +13,77 @@ use crate::{GrammarBuilder, HashMap};
 use anyhow::{bail, ensure, Result};
 use toktrie::TokEnv;
-fn process_grammar(ctx: &mut CompileCtx, input: GrammarWithLexer) -> Result<(SymIdx, LexemeClass)> {
-    let builder = std::mem::take(&mut ctx.builder).unwrap();
-    let res = if let Some(lark) = input.lark_grammar {
-        ensure!(
-            input.json_schema.is_none(),
-            "cannot have both lark_grammar and json_schema"
-        );
-        lark_to_llguidance(builder, &lark)?
-    } else if let Some(mut json_schema) = input.json_schema {
-        let mut opts = JsonCompileOptions::default();
-        if let Some(x_guidance) = json_schema.get("x-guidance") {
-            opts = serde_json::from_value(x_guidance.clone())?;
-            // TODO not removing it causes oneOf to be handled as anyOf in Github_medium---o61004.json
-            json_schema.as_object_mut().unwrap().remove("x-guidance");
-        }
-        opts.json_to_llg(builder, json_schema)?
-    } else {
-        bail!("grammar must have either lark_grammar or json_schema");
-    };
+struct CompileCtx {
+    builder: Option<GrammarBuilder>,
+    grammar_by_idx: HashMap<GrammarId, usize>,
+    grammar_roots: Vec<(SymIdx, LexemeClass)>,
+}
-    res.builder.check_limits()?;
+impl CompileCtx {
+    fn run_one(&mut self, input: GrammarWithLexer) -> Result<(SymIdx, LexemeClass)> {
+        let builder = std::mem::take(&mut self.builder).unwrap();
+        let res = if let Some(lark) = input.lark_grammar {
+            ensure!(
+                input.json_schema.is_none(),
+                "cannot have both lark_grammar and json_schema"
+            );
+            lark_to_llguidance(builder, &lark)?
+        } else if let Some(mut json_schema) = input.json_schema {
+            let mut opts = JsonCompileOptions::default();
+            if let Some(x_guidance) = json_schema.get("x-guidance") {
+                opts = serde_json::from_value(x_guidance.clone())?;
+                // TODO not removing it causes oneOf to be handled as anyOf in Github_medium---o61004.json
+                json_schema.as_object_mut().unwrap().remove("x-guidance");
+            }
+            opts.json_to_llg(builder, json_schema)?
+        } else {
+            bail!("grammar must have either lark_grammar or json_schema");
+        };
-    let grammar_id = res.builder.grammar.sym_props(res.start_node).grammar_id;
+        res.builder.check_limits()?;
-    // restore builder
-    ctx.builder = Some(res.builder);
+        let grammar_id = res.builder.grammar.sym_props(res.start_node).grammar_id;
-    Ok((res.start_node, grammar_id))
-}
+        // restore builder
+        self.builder = Some(res.builder);
-fn process_all_grammars(
-    mut ctx: CompileCtx,
-    input: TopLevelGrammar,
-) -> Result<(Grammar, LexerSpec)> {
-    for (idx, grm) in input.grammars.iter().enumerate() {
-        if grm.lark_grammar.is_none() && grm.json_schema.is_none() {
-            bail!("grammar must have either lark_grammar or json_schema");
-        }
-        if let Some(n) = &grm.name {
-            let n = GrammarId::Name(n.to_string());
-            if ctx.grammar_by_idx.contains_key(&n) {
-                bail!("duplicate grammar name: {}", n);
-            }
-            ctx.grammar_by_idx.insert(n, idx);
-        }
+        Ok((res.start_node, grammar_id))
     }
-    for (idx, grm) in input.grammars.into_iter().enumerate() {
-        let v = process_grammar(&mut ctx, grm)?;
-        ctx.grammar_roots[idx] = v;
-    }
+    fn run(mut self, input: TopLevelGrammar) -> Result<(Grammar, LexerSpec)> {
+        for (idx, grm) in input.grammars.iter().enumerate() {
+            if grm.lark_grammar.is_none() && grm.json_schema.is_none() {
+                bail!("grammar must have either lark_grammar or json_schema");
+            }
+            if let Some(n) = &grm.name {
+                let n = GrammarId::Name(n.to_string());
+                if self.grammar_by_idx.contains_key(&n) {
+                    bail!("duplicate grammar name: {}", n);
+                }
+                self.grammar_by_idx.insert(n, idx);
+            }
+        }
-    let grammar_by_idx: HashMap<GrammarId, (SymIdx, LexemeClass)> = ctx
-        .grammar_by_idx
-        .into_iter()
-        .map(|(k, v)| (k, ctx.grammar_roots[v]))
-        .collect();
+        for (idx, grm) in input.grammars.into_iter().enumerate() {
+            let v = self.run_one(grm)?;
+            self.grammar_roots[idx] = v;
+        }
-    let builder = ctx.builder.unwrap();
-    let mut grammar = builder.grammar;
-    let mut lexer_spec = builder.regex.spec;
+        let grammar_by_idx: HashMap<GrammarId, (SymIdx, LexemeClass)> = self
+            .grammar_by_idx
+            .into_iter()
+            .map(|(k, v)| (k, self.grammar_roots[v]))
+            .collect();
-    grammar.resolve_grammar_refs(&mut lexer_spec, &grammar_by_idx)?;
+        let builder = self.builder.unwrap();
+        let mut grammar = builder.grammar;
+        let mut lexer_spec = builder.regex.spec;
-    Ok((grammar, lexer_spec))
-}
+        grammar.resolve_grammar_refs(&mut lexer_spec, &grammar_by_idx)?;
-struct CompileCtx {
-    builder: Option<GrammarBuilder>,
-    grammar_by_idx: HashMap<GrammarId, usize>,
-    grammar_roots: Vec<(SymIdx, LexemeClass)>,
+        Ok((grammar, lexer_spec))
+    }
 }
 impl GrammarInit {
@@ -107,7 +106,7 @@ impl GrammarInit {
                     grammar_roots: vec![(SymIdx::BOGUS, LexemeClass::ROOT); input.grammars.len()],
                 };
-                process_all_grammars(ctx, input)
+                ctx.run(input)
             }
         }
     }

{llguidance-0.7.11 → llguidance-0.7.12}/parser/src/earley/grammar.rs RENAMED Viewed

@@ -1,6 +1,6 @@
 use super::lexerspec::{LexemeClass, LexemeIdx, LexerSpec};
 use crate::api::{GenGrammarOptions, GrammarId, NodeProps};
-use crate::HashMap;
+use crate::{HashMap, HashSet};
 use anyhow::{bail, ensure, Result};
 use std::fmt::Display;
 use std::{fmt::Debug, hash::Hash};
@@ -312,7 +312,17 @@ impl Grammar {
         uf_compress_all(&mut definition);
-        let mut use_count = vec![0; self.symbols.len()];
+        // println!(
+        //     "symbols: {:?}",
+        //     self.symbols
+        //         .iter()
+        //         .map(|s| (s.idx, &s.name))
+        //         .collect::<Vec<_>>()
+        // );
+        // println!("definition: {:?}", definition);
+        let mut the_user_of = vec![None; self.symbols.len()];
         for sym in &self.symbols {
             if definition[sym.idx.as_usize()].is_some() {
                 continue;
@@ -320,19 +330,38 @@ impl Grammar {
             for r in sym.rules.iter() {
                 for s in &r.rhs {
                     let s = definition[s.as_usize()].unwrap_or(*s);
-                    use_count[s.0 as usize] += 1;
+                    let idx = s.as_usize();
+                    if the_user_of[idx].is_none() {
+                        the_user_of[idx] = Some(r.lhs);
+                    } else {
+                        // use self-loop to indicate there are multiple users
+                        the_user_of[idx] = Some(s);
+                    }
                 }
             }
         }
+        // println!("the_user_of: {:?}", the_user_of);
+        // clean up self loops to None
+        for idx in 0..the_user_of.len() {
+            if let Some(sym) = the_user_of[idx] {
+                if sym.as_usize() == idx {
+                    the_user_of[idx] = None;
+                }
+            }
+        }
+        // println!("the_user_of: {:?}", the_user_of);
         let mut repl = crate::HashMap::default();
         for sym in &self.symbols {
             if self.is_special_symbol(sym) {
                 continue;
             }
-            if sym.rules.len() == 1 && use_count[sym.idx.0 as usize] == 1 {
-                // eliminate sym.idx
+            if sym.rules.len() == 1 && the_user_of[sym.idx.as_usize()].is_some() {
+                // we will eliminate sym.idx
                 repl.insert(
                     sym.idx,
                     sym.rules[0]
@@ -344,38 +373,56 @@ impl Grammar {
             }
         }
+        // println!("repl: {:?}", repl);
+        // these are keys of repl that may need to be used outside of repl itself
+        let repl_roots = repl
+            .keys()
+            .filter(|s| !repl.contains_key(the_user_of[s.as_usize()].as_ref().unwrap()))
+            .cloned()
+            .collect::<Vec<_>>();
+        // println!("repl_roots: {:?}", repl_roots);
+        let mut to_eliminate = HashSet::from_iter(repl.keys().copied());
         for (idx, m) in definition.iter().enumerate() {
-            if let Some(r) = m {
-                repl.insert(SymIdx(idx as u32), vec![*r]);
+            if m.is_some() {
+                let src = SymIdx(idx as u32);
+                to_eliminate.insert(src);
             }
         }
-        let mut simple_repl = HashMap::default();
-        while !repl.is_empty() {
-            let mut new_repl = HashMap::default();
-            for (k, v) in repl.iter() {
-                let v2 = v
-                    .iter()
-                    .flat_map(|s| {
-                        simple_repl
-                            .get(s)
-                            .cloned()
-                            .unwrap_or_else(|| repl.get(s).cloned().unwrap_or_else(|| vec![*s]))
-                    })
-                    .collect::<Vec<_>>();
-                if *v == v2 {
-                    simple_repl.insert(*k, v2);
-                } else {
-                    new_repl.insert(*k, v2);
+        let mut new_repl = HashMap::default();
+        let mut stack = vec![];
+        for sym in repl_roots {
+            stack.push(vec![sym]);
+            let mut res = vec![];
+            while let Some(mut lst) = stack.pop() {
+                while let Some(e) = lst.pop() {
+                    if let Some(mut lst2) = repl.remove(&e) {
+                        lst2.reverse();
+                        if !lst.is_empty() {
+                            stack.push(lst);
+                        }
+                        stack.push(lst2);
+                        break;
+                    }
+                    assert!(!to_eliminate.contains(&e));
+                    res.push(e);
                 }
             }
-            repl = new_repl;
+            // println!("res: {:?} -> {:?}", sym, res);
+            new_repl.insert(sym, res);
         }
-        repl = simple_repl;
-        for (k, v) in repl.iter() {
-            if let Some(p) = v.iter().find(|e| repl.contains_key(*e)) {
-                panic!("loop at {:?} ({:?})", k, p);
+        repl = new_repl;
+        for (idx, m) in definition.iter().enumerate() {
+            if let Some(trg) = m {
+                if !to_eliminate.contains(trg) {
+                    repl.insert(SymIdx(idx as u32), vec![*trg]);
+                }
             }
         }
@@ -403,12 +450,14 @@ impl Grammar {
             }
             let lhs = outp.copy_from(self, sym.idx);
             for rule in &sym.rules {
-                let rhs = rule
-                    .rhs
-                    .iter()
-                    .flat_map(|s| repl.get(s).cloned().unwrap_or_else(|| vec![*s]))
-                    .map(|s| outp.copy_from(self, s))
-                    .collect();
+                let mut rhs = Vec::with_capacity(rule.rhs.len());
+                for s in &rule.rhs {
+                    if let Some(repl) = repl.get(s) {
+                        rhs.extend(repl.iter().map(|s| outp.copy_from(self, *s)));
+                    } else {
+                        rhs.push(outp.copy_from(self, *s));
+                    }
+                }
                 outp.add_rule(lhs, rhs).unwrap();
             }
         }
@@ -489,7 +538,8 @@ impl Grammar {
     pub fn fresh_symbol_ext(&mut self, name0: &str, symprops: SymbolProps) -> SymIdx {
         let mut name = name0.to_string();
         let mut idx = self.symbol_count_cache.get(&name).cloned().unwrap_or(2);
-        while self.symbol_by_name.contains_key(&name) {
+        // don't allow empty names
+        while name.is_empty() || self.symbol_by_name.contains_key(&name) {
             name = format!("{}#{}", name0, idx);
             idx += 1;
         }

llguidance 0.7.11__tar.gz → 0.7.12__tar.gz

llguidance 0.7.11tar.gz → 0.7.12tar.gz