RubyGems - red-candle - Versions diffs - 1.0.2 → 1.1.0 - Mend

red-candle 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/Cargo.lock +244 -6
data/README.md +36 -2
data/Rakefile +46 -1
data/ext/candle/Cargo.toml +2 -0
data/ext/candle/src/lib.rs +2 -0
data/ext/candle/src/llm/constrained_generation_test.rs +123 -0
data/ext/candle/src/llm/generation_config.rs +5 -0
data/ext/candle/src/llm/mod.rs +5 -0
data/ext/candle/src/llm/phi.rs +285 -0
data/ext/candle/src/llm/quantized_gguf.rs +155 -4
data/ext/candle/src/llm/qwen.rs +229 -0
data/ext/candle/src/llm/text_generation.rs +66 -2
data/ext/candle/src/ruby/device.rs +5 -0
data/ext/candle/src/ruby/llm.rs +42 -4
data/ext/candle/src/ruby/mod.rs +1 -0
data/ext/candle/src/ruby/structured.rs +47 -0
data/ext/candle/src/structured/integration_test.rs +130 -0
data/ext/candle/src/structured/mod.rs +31 -0
data/ext/candle/src/structured/schema_processor.rs +215 -0
data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
data/lib/candle/llm.rb +109 -3
data/lib/candle/version.rb +1 -1
metadata +14 -4

data/ext/candle/src/llm/qwen.rs ADDED Viewed

@@ -0,0 +1,229 @@
+use candle_core::{DType, Device, Result as CandleResult, Tensor};
+use candle_transformers::models::qwen2::{Config, Model as QwenModel};
+use hf_hub::api::tokio::Api;
+use tokenizers::Tokenizer;
+use crate::llm::{GenerationConfig, TextGeneration, TextGenerator, TokenizerWrapper};
+/// Qwen model wrapper for text generation
+#[derive(Debug)]
+pub struct Qwen {
+    model: QwenModel,
+    tokenizer: TokenizerWrapper,
+    device: Device,
+    model_id: String,
+    eos_token_id: u32,
+}
+impl Qwen {
+    /// Get the tokenizer
+    pub fn tokenizer(&self) -> &TokenizerWrapper {
+        &self.tokenizer
+    }
+    /// Clear the KV cache between generations
+    pub fn clear_kv_cache(&mut self) {
+        self.model.clear_kv_cache();
+    }
+    /// Load a Qwen model from HuggingFace
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        let api = Api::new()
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
+        let repo = api.model(model_id.to_string());
+        // Download configuration
+        let config_filename = repo.get("config.json").await
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to download config: {}", e)))?;
+        let config_str = std::fs::read_to_string(config_filename)?;
+        let config: Config = serde_json::from_str(&config_str)
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config: {}", e)))?;
+        // Download tokenizer
+        let tokenizer_filename = repo.get("tokenizer.json").await
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+        let tokenizer = Tokenizer::from_file(tokenizer_filename)
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
+        // Determine EOS token
+        let vocab = tokenizer.get_vocab(true);
+        let eos_token_id = vocab.get("<|endoftext|>")
+            .or_else(|| vocab.get("<|im_end|>"))
+            .or_else(|| vocab.get("</s>"))
+            .copied()
+            .unwrap_or(151643); // Default Qwen3 EOS token
+        // Download model weights
+        let mut filenames = vec![];
+        let num_shards = if model_id.contains("72b") || model_id.contains("72B") { 8 }
+                        else if model_id.contains("14b") || model_id.contains("14B") { 3 }
+                        else { 1 };
+        if num_shards == 1 {
+            // Single file model
+            let filename = repo.get("model.safetensors").await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download model weights: {}", e)))?;
+            filenames.push(filename);
+        } else {
+            // Sharded model
+            for shard_idx in 1..=num_shards {
+                let filename = repo.get(&format!("model-{:05}-of-{:05}.safetensors", shard_idx, num_shards)).await
+                    .map_err(|e| candle_core::Error::Msg(format!("Failed to download shard {}: {}", shard_idx, e)))?;
+                filenames.push(filename);
+            }
+        }
+        // Load the model
+        let vb = unsafe {
+            candle_nn::VarBuilder::from_mmaped_safetensors(&filenames, DType::F32, &device)?
+        };
+        let model = QwenModel::new(&config, vb)?;
+        Ok(Self {
+            model,
+            tokenizer: TokenizerWrapper::new(tokenizer),
+            device,
+            model_id: model_id.to_string(),
+            eos_token_id,
+        })
+    }
+    /// Apply Qwen chat template to messages
+    pub fn apply_chat_template(&self, messages: &[serde_json::Value]) -> CandleResult<String> {
+        let mut prompt = String::new();
+        for message in messages {
+            let role = message["role"].as_str().unwrap_or("");
+            let content = message["content"].as_str().unwrap_or("");
+            match role {
+                "system" => {
+                    prompt.push_str(&format!("<|im_start|>system\n{}<|im_end|>\n", content));
+                }
+                "user" => {
+                    prompt.push_str(&format!("<|im_start|>user\n{}<|im_end|>\n", content));
+                }
+                "assistant" => {
+                    prompt.push_str(&format!("<|im_start|>assistant\n{}<|im_end|>\n", content));
+                }
+                _ => {}
+            }
+        }
+        // Add generation prompt
+        prompt.push_str("<|im_start|>assistant\n");
+        Ok(prompt)
+    }
+    fn generate_tokens(
+        &mut self,
+        prompt_tokens: Vec<u32>,
+        config: &GenerationConfig,
+        mut callback: Option<impl FnMut(&str)>,
+    ) -> CandleResult<Vec<u32>> {
+        let mut text_gen = TextGeneration::from_config(config);
+        text_gen.set_eos_token_id(self.eos_token_id);
+        text_gen.set_tokens(prompt_tokens.clone());
+        let mut all_tokens = prompt_tokens.clone();
+        let start_gen = all_tokens.len();
+        for index in 0..config.max_length {
+            let context_size = if index > 0 { 1 } else { all_tokens.len() };
+            let start_pos = all_tokens.len().saturating_sub(context_size);
+            let ctxt = &all_tokens[start_pos..];
+            let input = Tensor::new(ctxt, &self.device)?.unsqueeze(0)?;
+            let logits = self.model.forward(&input, start_pos, None)?;
+            let logits = logits.squeeze(0)?;
+            // Handle different output shapes
+            let logits = if logits.dims().len() == 2 {
+                let seq_len = logits.dim(0)?;
+                logits.narrow(0, seq_len - 1, 1)?.squeeze(0)?
+            } else {
+                logits
+            };
+            let logits = logits.to_dtype(DType::F32)?;
+            let next_token = text_gen.sample_next_token(
+                &logits,
+                Some((config.repetition_penalty, config.repetition_penalty_last_n)),
+            )?;
+            all_tokens.push(next_token);
+            // Stream callback
+            if let Some(ref mut cb) = callback {
+                if config.debug_tokens {
+                    let token_piece = self.tokenizer.token_to_piece(next_token)?;
+                    cb(&format!("[{}:{}]", next_token, token_piece));
+                } else {
+                    let decoded_text = self.tokenizer.decode_incremental(&all_tokens, all_tokens.len() - 1)?;
+                    cb(&decoded_text);
+                }
+            }
+            // Check stop conditions
+            if text_gen.should_stop(next_token, config.max_length) {
+                break;
+            }
+            // Check stop sequences
+            let generated_text = self.tokenizer.decode(&all_tokens[start_gen..], true)?;
+            if text_gen.check_stop_sequences(&generated_text, &config.stop_sequences) {
+                break;
+            }
+        }
+        Ok(if config.include_prompt {
+            all_tokens
+        } else {
+            all_tokens[start_gen..].to_vec()
+        })
+    }
+}
+impl TextGenerator for Qwen {
+    fn generate(
+        &mut self,
+        prompt: &str,
+        config: &GenerationConfig,
+    ) -> CandleResult<String> {
+        let prompt_tokens = self.tokenizer.encode(prompt, true)?;
+        let output_tokens = self.generate_tokens(prompt_tokens, config, None::<fn(&str)>)?;
+        if config.debug_tokens {
+            self.tokenizer.format_tokens_with_debug(&output_tokens)
+        } else {
+            self.tokenizer.decode(&output_tokens, true)
+        }
+    }
+    fn generate_stream(
+        &mut self,
+        prompt: &str,
+        config: &GenerationConfig,
+        mut callback: impl FnMut(&str),
+    ) -> CandleResult<String> {
+        let prompt_tokens = self.tokenizer.encode(prompt, true)?;
+        let output_tokens = self.generate_tokens(prompt_tokens, config, Some(&mut callback))?;
+        self.tokenizer.decode(&output_tokens, true)
+    }
+    fn model_name(&self) -> &str {
+        &self.model_id
+    }
+    fn device(&self) -> &Device {
+        &self.device
+    }
+    fn clear_cache(&mut self) {
+        self.clear_kv_cache();
+    }
+}

data/ext/candle/src/llm/text_generation.rs CHANGED Viewed

@@ -1,13 +1,17 @@
 use candle_core::{Result as CandleResult, Tensor};
 use candle_transformers::generation::LogitsProcessor;
+use std::sync::Arc;
 use super::GenerationConfig;
+use crate::structured::Index;
 /// Helper struct for text generation process
 pub struct TextGeneration {
     logits_processor: LogitsProcessor,
     tokens: Vec<u32>,
     eos_token_id: Option<u32>,
+    constraint: Option<Arc<Index>>,
+    constraint_state: Option<u32>,
 }
 impl TextGeneration {
@@ -25,18 +29,27 @@ impl TextGeneration {
             logits_processor,
             tokens: Vec::new(),
             eos_token_id: None,
+            constraint: None,
+            constraint_state: None,
         }
     }
     pub fn from_config(config: &GenerationConfig) -> Self {
-        Self::new(
+        let mut text_gen = Self::new(
             config.seed,
             Some(config.temperature),
             config.top_p,
             config.top_k,
             config.repetition_penalty,
             config.repetition_penalty_last_n,
-        )
+        );
+        // Set constraint if provided
+        if let Some(ref constraint) = config.constraint {
+            text_gen.set_constraint(Arc::clone(constraint));
+        }
+        text_gen
     }
     pub fn set_eos_token_id(&mut self, eos_token_id: u32) {
@@ -55,6 +68,36 @@ impl TextGeneration {
         self.tokens.push(token);
     }
+    pub fn set_constraint(&mut self, constraint: Arc<Index>) {
+        // Initialize with the first state
+        self.constraint_state = Some(constraint.initial_state());
+        self.constraint = Some(constraint);
+    }
+    /// Apply constraints to logits by masking disallowed tokens
+    fn apply_constraints(&self, logits: &mut Tensor) -> CandleResult<()> {
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            let device = logits.device();
+            let vocab_size = logits.dims1()?;
+            // Get allowed tokens from the constraint index for current state
+            if let Some(allowed_tokens) = constraint_index.allowed_tokens(&state) {
+                // Create a mask where allowed tokens have value 0 and others have -inf
+                let mut mask = vec![f32::NEG_INFINITY; vocab_size];
+                for &token_id in &allowed_tokens {
+                    if (token_id as usize) < vocab_size {
+                        mask[token_id as usize] = 0.0;
+                    }
+                }
+                // Apply mask to logits
+                let mask_tensor = Tensor::from_vec(mask, vocab_size, device)?;
+                *logits = logits.add(&mask_tensor)?;
+            }
+        }
+        Ok(())
+    }
     /// Apply repetition penalty to logits
     pub fn apply_repetition_penalty(
         &self,
@@ -103,10 +146,18 @@ impl TextGeneration {
             self.apply_repetition_penalty(&mut logits, penalty, last_n)?;
         }
+        // Apply constraints if active
+        self.apply_constraints(&mut logits)?;
         // Sample token
         let next_token = self.logits_processor.sample(&logits)?;
         self.tokens.push(next_token);
+        // Update constraint state if active
+        if let (Some(ref constraint_index), Some(current_state)) = (&self.constraint, self.constraint_state) {
+            self.constraint_state = constraint_index.next_state(&current_state, &next_token);
+        }
         Ok(next_token)
     }
@@ -122,6 +173,19 @@ impl TextGeneration {
             }
         }
+        // Check if we've reached a final state in constraint
+        // A state is considered final if it has no allowed tokens
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                if allowed.is_empty() {
+                    return true;
+                }
+            } else {
+                // None means no tokens allowed - we're done
+                return true;
+            }
+        }
         false
     }

data/ext/candle/src/ruby/device.rs CHANGED Viewed

@@ -162,6 +162,10 @@ impl Device {
     pub fn __str__(&self) -> String {
         self.__repr__()
     }
+    pub fn __eq__(&self, other: &Device) -> bool {
+        self == other
+    }
 }
 impl magnus::TryConvert for Device {
@@ -193,5 +197,6 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     rb_device.define_singleton_method("default", function!(default_device, 0))?;
     rb_device.define_method("to_s", method!(Device::__str__, 0))?;
     rb_device.define_method("inspect", method!(Device::__repr__, 0))?;
+    rb_device.define_method("==", method!(Device::__eq__, 1))?;
     Ok(())
 }

data/ext/candle/src/ruby/llm.rs CHANGED Viewed

@@ -1,15 +1,18 @@
 use magnus::{function, method, prelude::*, Error, Module, RArray, RHash, RModule, Ruby, TryConvert, Value};
 use std::cell::RefCell;
+use std::sync::Arc;
-use crate::llm::{GenerationConfig as RustGenerationConfig, TextGenerator, mistral::Mistral as RustMistral, llama::Llama as RustLlama, gemma::Gemma as RustGemma, QuantizedGGUF as RustQuantizedGGUF};
+use crate::llm::{GenerationConfig as RustGenerationConfig, TextGenerator, mistral::Mistral as RustMistral, llama::Llama as RustLlama, gemma::Gemma as RustGemma, qwen::Qwen as RustQwen, phi::Phi as RustPhi, QuantizedGGUF as RustQuantizedGGUF};
 use crate::ruby::{Result, Device};
+use crate::ruby::structured::StructuredConstraint;
 // Use an enum to handle different model types instead of trait objects
-#[derive(Debug)]
 enum ModelType {
     Mistral(RustMistral),
     Llama(RustLlama),
     Gemma(RustGemma),
+    Qwen(RustQwen),
+    Phi(RustPhi),
     QuantizedGGUF(RustQuantizedGGUF),
 }
@@ -19,6 +22,8 @@ impl ModelType {
             ModelType::Mistral(m) => m.generate(prompt, config),
             ModelType::Llama(m) => m.generate(prompt, config),
             ModelType::Gemma(m) => m.generate(prompt, config),
+            ModelType::Qwen(m) => m.generate(prompt, config),
+            ModelType::Phi(m) => m.generate(prompt, config),
             ModelType::QuantizedGGUF(m) => m.generate(prompt, config),
         }
     }
@@ -33,6 +38,8 @@ impl ModelType {
             ModelType::Mistral(m) => m.generate_stream(prompt, config, callback),
             ModelType::Llama(m) => m.generate_stream(prompt, config, callback),
             ModelType::Gemma(m) => m.generate_stream(prompt, config, callback),
+            ModelType::Qwen(m) => m.generate_stream(prompt, config, callback),
+            ModelType::Phi(m) => m.generate_stream(prompt, config, callback),
             ModelType::QuantizedGGUF(m) => m.generate_stream(prompt, config, callback),
         }
     }
@@ -42,6 +49,8 @@ impl ModelType {
             ModelType::Mistral(m) => m.clear_cache(),
             ModelType::Llama(m) => m.clear_cache(),
             ModelType::Gemma(m) => m.clear_cache(),
+            ModelType::Qwen(m) => m.clear_cache(),
+            ModelType::Phi(m) => m.clear_cache(),
             ModelType::QuantizedGGUF(m) => m.clear_cache(),
         }
     }
@@ -67,6 +76,8 @@ impl ModelType {
             },
             ModelType::Llama(m) => m.apply_chat_template(messages),
             ModelType::Gemma(m) => m.apply_chat_template(messages),
+            ModelType::Qwen(m) => m.apply_chat_template(messages),
+            ModelType::Phi(m) => m.apply_chat_template(messages),
             ModelType::QuantizedGGUF(m) => m.apply_chat_template(messages),
         }
     }
@@ -146,6 +157,13 @@ impl GenerationConfig {
             }
         }
+        // Handle constraint parameter
+        if let Some(value) = kwargs.get(magnus::Symbol::new("constraint")) {
+            if let Ok(constraint) = <&StructuredConstraint as TryConvert>::try_convert(value) {
+                config.constraint = Some(Arc::clone(&constraint.index));
+            }
+        }
         Ok(Self { inner: config })
     }
@@ -191,9 +209,14 @@ impl GenerationConfig {
     pub fn debug_tokens(&self) -> bool {
         self.inner.debug_tokens
     }
+    pub fn constraint(&self) -> Option<StructuredConstraint> {
+        self.inner.constraint.as_ref().map(|c| StructuredConstraint {
+            index: Arc::clone(c),
+        })
+    }
 }
-#[derive(Clone, Debug)]
+#[derive(Clone)]
 #[magnus::wrap(class = "Candle::LLM", mark, free_immediately)]
 pub struct LLM {
     model: std::sync::Arc<std::sync::Mutex<RefCell<ModelType>>>,
@@ -251,10 +274,22 @@ impl LLM {
                 })
                 .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
                 ModelType::Gemma(gemma)
+            } else if model_lower.contains("qwen") {
+                let qwen = rt.block_on(async {
+                    RustQwen::from_pretrained(&model_id, candle_device).await
+                })
+                .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
+                ModelType::Qwen(qwen)
+            } else if model_lower.contains("phi") {
+                let phi = rt.block_on(async {
+                    RustPhi::from_pretrained(&model_id, candle_device).await
+                })
+                .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
+                ModelType::Phi(phi)
             } else {
                 return Err(Error::new(
                     magnus::exception::runtime_error(),
-                    format!("Unsupported model type: {}. Currently Mistral, Llama, and Gemma models are supported.", model_id),
+                    format!("Unsupported model type: {}. Currently Mistral, Llama, Gemma, Qwen, and Phi models are supported.", model_id),
                 ));
             }
         };
@@ -332,6 +367,8 @@ impl LLM {
             ModelType::Mistral(m) => Ok(crate::ruby::tokenizer::Tokenizer(m.tokenizer().clone())),
             ModelType::Llama(m) => Ok(crate::ruby::tokenizer::Tokenizer(m.tokenizer().clone())),
             ModelType::Gemma(m) => Ok(crate::ruby::tokenizer::Tokenizer(m.tokenizer().clone())),
+            ModelType::Qwen(m) => Ok(crate::ruby::tokenizer::Tokenizer(m.tokenizer().clone())),
+            ModelType::Phi(m) => Ok(crate::ruby::tokenizer::Tokenizer(m.tokenizer().clone())),
             ModelType::QuantizedGGUF(m) => Ok(crate::ruby::tokenizer::Tokenizer(m.tokenizer().clone())),
         }
     }
@@ -423,6 +460,7 @@ pub fn init_llm(rb_candle: RModule) -> Result<()> {
     rb_generation_config.define_method("stop_sequences", method!(GenerationConfig::stop_sequences, 0))?;
     rb_generation_config.define_method("include_prompt", method!(GenerationConfig::include_prompt, 0))?;
     rb_generation_config.define_method("debug_tokens", method!(GenerationConfig::debug_tokens, 0))?;
+    rb_generation_config.define_method("constraint", method!(GenerationConfig::constraint, 0))?;
     let rb_llm = rb_candle.define_class("LLM", magnus::class::object())?;
     rb_llm.define_singleton_method("_from_pretrained", function!(from_pretrained_wrapper, -1))?;

data/ext/candle/src/ruby/mod.rs CHANGED Viewed

@@ -7,6 +7,7 @@ pub mod errors;
 pub mod utils;
 pub mod llm;
 pub mod tokenizer;
+pub mod structured;
 pub use embedding_model::{EmbeddingModel, EmbeddingModelInner};
 pub use tensor::Tensor;

data/ext/candle/src/ruby/structured.rs ADDED Viewed

@@ -0,0 +1,47 @@
+use magnus::{Error, Module, RModule, function, Object};
+use std::sync::Arc;
+use crate::structured::{SchemaProcessor, VocabularyAdapter, Index};
+use crate::ruby::{Result, tokenizer::Tokenizer};
+/// Ruby wrapper for structured generation constraints
+#[derive(Clone, Debug)]
+#[magnus::wrap(class = "Candle::StructuredConstraint", mark, free_immediately)]
+pub struct StructuredConstraint {
+    pub(crate) index: Arc<Index>,
+}
+impl StructuredConstraint {
+    /// Create a constraint from a JSON schema
+    pub fn from_schema(schema: String, tokenizer: &Tokenizer) -> Result<Self> {
+        let vocabulary = VocabularyAdapter::from_tokenizer(&tokenizer.0)
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create vocabulary: {}", e)))?;
+        let processor = SchemaProcessor::new();
+        let index = processor.process_schema(&schema, &vocabulary)
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to process schema: {}", e)))?;
+        Ok(Self { index })
+    }
+    /// Create a constraint from a regex pattern
+    pub fn from_regex(pattern: String, tokenizer: &Tokenizer) -> Result<Self> {
+        let vocabulary = VocabularyAdapter::from_tokenizer(&tokenizer.0)
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create vocabulary: {}", e)))?;
+        let processor = SchemaProcessor::new();
+        let index = processor.process_regex(&pattern, &vocabulary)
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to process regex: {}", e)))?;
+        Ok(Self { index })
+    }
+}
+pub fn init_structured(rb_candle: RModule) -> Result<()> {
+    let class = rb_candle.define_class("StructuredConstraint", magnus::class::object())?;
+    class.define_singleton_method("from_schema", function!(StructuredConstraint::from_schema, 2))?;
+    class.define_singleton_method("from_regex", function!(StructuredConstraint::from_regex, 2))?;
+    Ok(())
+}

data/ext/candle/src/structured/integration_test.rs ADDED Viewed

@@ -0,0 +1,130 @@
+#[cfg(test)]
+mod integration_tests {
+    use super::super::*;
+    use crate::tokenizer::{TokenizerWrapper, loader::TokenizerLoader};
+    use std::sync::Arc;
+    #[tokio::test]
+    async fn test_schema_processor_with_vocabulary() {
+        // This test requires a tokenizer to create a vocabulary
+        let tokenizer_result = TokenizerLoader::from_hf_hub("bert-base-uncased", None).await;
+        if let Ok(tokenizer) = tokenizer_result {
+            let wrapper = TokenizerWrapper::new(tokenizer);
+            // Create vocabulary from tokenizer
+            let vocabulary = VocabularyAdapter::from_tokenizer(&wrapper)
+                .expect("Should create vocabulary");
+            // Create schema processor
+            let processor = SchemaProcessor::new();
+            // Test with a simple JSON schema
+            let schema = r#"{
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "age": {"type": "integer"}
+                },
+                "required": ["name", "age"]
+            }"#;
+            // Process schema into Index
+            let index_result = processor.process_schema(schema, &vocabulary);
+            assert!(index_result.is_ok(), "Should process schema successfully");
+            // Test caching - second call should use cache
+            let index2_result = processor.process_schema(schema, &vocabulary);
+            assert!(index2_result.is_ok(), "Should retrieve from cache");
+            // Both should be the same Arc
+            let index1 = index_result.unwrap();
+            let index2 = index2_result.unwrap();
+            assert!(Arc::ptr_eq(&index1, &index2), "Should return cached Index");
+            // Check cache stats
+            let (size, _) = processor.cache_stats();
+            assert_eq!(size, 1, "Cache should have one entry");
+        } else {
+            eprintln!("Skipping integration test - couldn't load tokenizer");
+        }
+    }
+    #[tokio::test]
+    async fn test_regex_processing() {
+        let tokenizer_result = TokenizerLoader::from_hf_hub("bert-base-uncased", None).await;
+        if let Ok(tokenizer) = tokenizer_result {
+            let wrapper = TokenizerWrapper::new(tokenizer);
+            let vocabulary = VocabularyAdapter::from_tokenizer(&wrapper)
+                .expect("Should create vocabulary");
+            let processor = SchemaProcessor::new();
+            // Test with a simple regex pattern
+            let email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}";
+            let index_result = processor.process_regex(email_regex, &vocabulary);
+            assert!(index_result.is_ok(), "Should process regex successfully");
+            // Test different regex
+            let phone_regex = r"\d{3}-\d{3}-\d{4}";
+            let phone_index_result = processor.process_regex(phone_regex, &vocabulary);
+            assert!(phone_index_result.is_ok(), "Should process phone regex");
+            // Cache should have both
+            let (size, _) = processor.cache_stats();
+            assert_eq!(size, 2, "Cache should have two entries");
+            // Clear cache
+            processor.clear_cache();
+            let (size, _) = processor.cache_stats();
+            assert_eq!(size, 0, "Cache should be empty after clear");
+        }
+    }
+    #[test]
+    fn test_various_json_schemas() {
+        let _processor = SchemaProcessor::new();
+        // Array schema
+        let array_schema = serde_json::json!({
+            "type": "array",
+            "items": {"type": "string"}
+        });
+        // Process as a full schema instead of testing private method
+        // This would need a mock vocabulary in a real test
+        // For now, just verify the schema is valid JSON
+        let json_str = serde_json::to_string(&array_schema).unwrap();
+        assert!(!json_str.is_empty(), "Should serialize array schema");
+        // Nested object schema
+        let nested_schema = serde_json::json!({
+            "type": "object",
+            "properties": {
+                "user": {
+                    "type": "object",
+                    "properties": {
+                        "id": {"type": "integer"},
+                        "email": {"type": "string", "format": "email"}
+                    }
+                }
+            }
+        });
+        // Verify nested schema is valid
+        let json_str = serde_json::to_string(&nested_schema).unwrap();
+        assert!(json_str.contains("properties"), "Should have nested properties");
+        // Schema with enum
+        let enum_schema = serde_json::json!({
+            "type": "string",
+            "enum": ["red", "green", "blue"]
+        });
+        // Verify enum schema is valid
+        let json_str = serde_json::to_string(&enum_schema).unwrap();
+        assert!(json_str.contains("enum"), "Should have enum values");
+    }
+}