RubyGems - red-candle - Versions diffs - 1.0.2 → 1.1.0 - Mend

red-candle 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/Cargo.lock +244 -6
data/README.md +36 -2
data/Rakefile +46 -1
data/ext/candle/Cargo.toml +2 -0
data/ext/candle/src/lib.rs +2 -0
data/ext/candle/src/llm/constrained_generation_test.rs +123 -0
data/ext/candle/src/llm/generation_config.rs +5 -0
data/ext/candle/src/llm/mod.rs +5 -0
data/ext/candle/src/llm/phi.rs +285 -0
data/ext/candle/src/llm/quantized_gguf.rs +155 -4
data/ext/candle/src/llm/qwen.rs +229 -0
data/ext/candle/src/llm/text_generation.rs +66 -2
data/ext/candle/src/ruby/device.rs +5 -0
data/ext/candle/src/ruby/llm.rs +42 -4
data/ext/candle/src/ruby/mod.rs +1 -0
data/ext/candle/src/ruby/structured.rs +47 -0
data/ext/candle/src/structured/integration_test.rs +130 -0
data/ext/candle/src/structured/mod.rs +31 -0
data/ext/candle/src/structured/schema_processor.rs +215 -0
data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
data/lib/candle/llm.rb +109 -3
data/lib/candle/version.rb +1 -1
metadata +14 -4

data/ext/candle/src/structured/mod.rs ADDED Viewed

@@ -0,0 +1,31 @@
+/// Structured generation support using Outlines
+///
+/// This module provides functionality to constrain language model generation
+/// to follow specific patterns, such as JSON schemas or regular expressions.
+pub mod vocabulary_adapter;
+pub mod schema_processor;
+pub use vocabulary_adapter::VocabularyAdapter;
+pub use schema_processor::SchemaProcessor;
+// Re-export commonly used types from outlines-core
+pub use outlines_core::prelude::Index;
+pub use outlines_core::vocabulary::Vocabulary;
+#[cfg(test)]
+mod vocabulary_adapter_simple_test;
+#[cfg(test)]
+mod integration_test;
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_module_imports() {
+        // Ensure all exports are available
+        let _ = VocabularyAdapter;
+    }
+}

data/ext/candle/src/structured/schema_processor.rs ADDED Viewed

@@ -0,0 +1,215 @@
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+use candle_core::Result as CandleResult;
+use candle_core::Error as CandleError;
+use outlines_core::prelude::Index;
+use outlines_core::vocabulary::Vocabulary;
+use serde_json::Value as JsonValue;
+use outlines_core::json_schema;
+/// Processes JSON schemas into compiled Index objects for structured generation
+pub struct SchemaProcessor {
+    /// Cache of compiled Index objects keyed by schema hash
+    cache: Arc<Mutex<HashMap<u64, Arc<Index>>>>,
+}
+impl SchemaProcessor {
+    /// Create a new schema processor with an empty cache
+    pub fn new() -> Self {
+        Self {
+            cache: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+    /// Process a JSON schema into a compiled Index
+    ///
+    /// # Arguments
+    /// * `schema` - JSON schema as a string
+    /// * `vocabulary` - The tokenizer's vocabulary
+    ///
+    /// # Returns
+    /// A compiled Index ready for constrained generation
+    pub fn process_schema(
+        &self,
+        schema: &str,
+        vocabulary: &Vocabulary,
+    ) -> CandleResult<Arc<Index>> {
+        // Calculate hash of the schema for caching
+        let schema_hash = self.calculate_hash(schema);
+        // Check cache first
+        if let Ok(cache) = self.cache.lock() {
+            if let Some(cached_index) = cache.get(&schema_hash) {
+                return Ok(Arc::clone(cached_index));
+            }
+        }
+        // Parse the JSON schema
+        let schema_value: JsonValue = serde_json::from_str(schema)
+            .map_err(|e| CandleError::Msg(format!("Invalid JSON schema: {}", e)))?;
+        // Convert schema to regex using Outlines
+        let regex = self.schema_to_regex(&schema_value)?;
+        // Compile regex into Index
+        let index = self.compile_regex(&regex, vocabulary)?;
+        let index_arc = Arc::new(index);
+        // Cache the compiled Index
+        if let Ok(mut cache) = self.cache.lock() {
+            cache.insert(schema_hash, Arc::clone(&index_arc));
+        }
+        Ok(index_arc)
+    }
+    /// Process a regex pattern directly into an Index
+    ///
+    /// # Arguments
+    /// * `regex` - Regular expression pattern
+    /// * `vocabulary` - The tokenizer's vocabulary
+    ///
+    /// # Returns
+    /// A compiled Index for the regex pattern
+    pub fn process_regex(
+        &self,
+        regex: &str,
+        vocabulary: &Vocabulary,
+    ) -> CandleResult<Arc<Index>> {
+        // Calculate hash for caching
+        let regex_hash = self.calculate_hash(regex);
+        // Check cache
+        if let Ok(cache) = self.cache.lock() {
+            if let Some(cached_index) = cache.get(&regex_hash) {
+                return Ok(Arc::clone(cached_index));
+            }
+        }
+        // Compile the regex
+        let index = self.compile_regex(regex, vocabulary)?;
+        let index_arc = Arc::new(index);
+        // Cache it
+        if let Ok(mut cache) = self.cache.lock() {
+            cache.insert(regex_hash, Arc::clone(&index_arc));
+        }
+        Ok(index_arc)
+    }
+    /// Convert a JSON schema to a regex pattern
+    fn schema_to_regex(&self, schema: &JsonValue) -> CandleResult<String> {
+        // Use Outlines' built-in JSON schema to regex conversion
+        json_schema::regex_from_value(schema, None)
+            .map_err(|e| CandleError::Msg(format!("Failed to convert schema to regex: {:?}", e)))
+    }
+    /// Compile a regex pattern into an Index
+    fn compile_regex(&self, regex: &str, vocabulary: &Vocabulary) -> CandleResult<Index> {
+        // Use Outlines to build the Index from regex
+        Index::new(regex, vocabulary)
+            .map_err(|e| CandleError::Msg(format!("Failed to build index from regex: {:?}", e)))
+    }
+    /// Calculate a hash for caching
+    fn calculate_hash(&self, input: &str) -> u64 {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+        let mut hasher = DefaultHasher::new();
+        input.hash(&mut hasher);
+        hasher.finish()
+    }
+    /// Clear the cache
+    pub fn clear_cache(&self) {
+        if let Ok(mut cache) = self.cache.lock() {
+            cache.clear();
+        }
+    }
+    /// Get cache statistics
+    pub fn cache_stats(&self) -> (usize, usize) {
+        if let Ok(cache) = self.cache.lock() {
+            let size = cache.len();
+            let capacity = cache.capacity();
+            (size, capacity)
+        } else {
+            (0, 0)
+        }
+    }
+}
+impl Default for SchemaProcessor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_schema_processor_creation() {
+        let processor = SchemaProcessor::new();
+        let (size, _) = processor.cache_stats();
+        assert_eq!(size, 0, "Cache should start empty");
+    }
+    #[test]
+    fn test_cache_operations() {
+        let processor = SchemaProcessor::new();
+        // Initially empty
+        let (size, _) = processor.cache_stats();
+        assert_eq!(size, 0);
+        // After clear (should still be empty)
+        processor.clear_cache();
+        let (size, _) = processor.cache_stats();
+        assert_eq!(size, 0);
+    }
+    #[test]
+    fn test_schema_to_regex_basic_types() {
+        let processor = SchemaProcessor::new();
+        // Test string type
+        let string_schema = serde_json::json!({
+            "type": "string"
+        });
+        let regex = processor.schema_to_regex(&string_schema).unwrap();
+        // Just verify it produces a regex, exact format depends on Outlines
+        assert!(!regex.is_empty(), "String schema should produce a regex");
+        // Test number type
+        let number_schema = serde_json::json!({
+            "type": "number"
+        });
+        let regex = processor.schema_to_regex(&number_schema).unwrap();
+        assert!(!regex.is_empty(), "Number schema should produce a regex");
+        // Test boolean type
+        let bool_schema = serde_json::json!({
+            "type": "boolean"
+        });
+        let regex = processor.schema_to_regex(&bool_schema).unwrap();
+        assert!(regex.contains("true") && regex.contains("false"), "Boolean regex should contain true/false");
+    }
+    #[test]
+    fn test_schema_with_pattern() {
+        let processor = SchemaProcessor::new();
+        let schema = serde_json::json!({
+            "type": "string",
+            "pattern": r"^\d{3}-\d{3}-\d{4}$"
+        });
+        let regex = processor.schema_to_regex(&schema).unwrap();
+        // Pattern should be included in the generated regex
+        assert!(regex.contains("\\d{3}"), "Should contain digit pattern");
+    }
+}

data/ext/candle/src/structured/vocabulary_adapter.rs ADDED Viewed

@@ -0,0 +1,152 @@
+use crate::tokenizer::TokenizerWrapper;
+use candle_core::Result as CandleResult;
+use outlines_core::vocabulary::Vocabulary;
+use std::collections::HashMap;
+/// Adapter to convert between red-candle's TokenizerWrapper and Outlines' Vocabulary
+pub struct VocabularyAdapter;
+impl VocabularyAdapter {
+    /// Convert a TokenizerWrapper's vocabulary to an Outlines Vocabulary
+    ///
+    /// # Arguments
+    /// * `tokenizer` - The tokenizer to extract vocabulary from
+    ///
+    /// # Returns
+    /// An Outlines Vocabulary suitable for use with Index construction
+    pub fn from_tokenizer(tokenizer: &TokenizerWrapper) -> CandleResult<Vocabulary> {
+        // Get the vocabulary mapping from the tokenizer
+        let vocab_map: HashMap<String, u32> = tokenizer.inner().get_vocab(true);
+        // Try to find EOS token in vocabulary
+        let eos_token_id = vocab_map.get("</s>")
+            .or_else(|| vocab_map.get("<|endoftext|>"))
+            .or_else(|| vocab_map.get("<eos>"))
+            .or_else(|| vocab_map.get("[SEP]"))
+            .copied();
+        // Create a sorted list of (token_id, token_string) pairs
+        let mut token_pairs: Vec<(u32, String)> = vocab_map
+            .into_iter()
+            .map(|(token, id)| (id, token))
+            .collect();
+        // Sort by token ID to ensure correct indexing
+        token_pairs.sort_by_key(|(id, _)| *id);
+        // Find the maximum token ID to determine vocabulary size
+        let max_token_id = token_pairs
+            .last()
+            .map(|(id, _)| *id)
+            .unwrap_or(0);
+        // Create vocabulary items in the format expected by Outlines
+        // We need to handle potential gaps in token IDs
+        let mut vocab_items: Vec<(String, Vec<u8>)> = Vec::new();
+        let mut current_id = 0;
+        for (token_id, token_string) in token_pairs {
+            // Fill gaps with placeholder tokens
+            while current_id < token_id {
+                vocab_items.push((
+                    format!("<unused_{}>", current_id),
+                    format!("<unused_{}>", current_id).into_bytes(),
+                ));
+                current_id += 1;
+            }
+            // Add the actual token
+            // Convert token string to bytes for Outlines
+            vocab_items.push((
+                token_string.clone(),
+                token_string.into_bytes(),
+            ));
+            current_id += 1;
+        }
+        // Fill any remaining gaps up to a reasonable vocabulary size
+        // This ensures we don't have issues with token IDs beyond our vocabulary
+        while current_id <= max_token_id {
+            vocab_items.push((
+                format!("<unused_{}>", current_id),
+                format!("<unused_{}>", current_id).into_bytes(),
+            ));
+            current_id += 1;
+        }
+        // Create the Outlines vocabulary
+        // The Vocabulary API expects us to build it token by token
+        let mut vocabulary = Vocabulary::new(
+            eos_token_id.unwrap_or(0) // Use EOS token ID or 0 as default
+        );
+        // Insert all tokens into the vocabulary
+        for (idx, (token, bytes)) in vocab_items.into_iter().enumerate() {
+            // Skip inserting the EOS token as it's already set in the vocabulary
+            if Some(idx as u32) == eos_token_id {
+                continue;
+            }
+            vocabulary.try_insert(bytes, idx as u32)
+                .map_err(|e| candle_core::Error::Msg(
+                    format!("Failed to insert token '{}': {:?}", token, e)
+                ))?;
+        }
+        Ok(vocabulary)
+    }
+    /// Get vocabulary size from a tokenizer
+    pub fn vocab_size(tokenizer: &TokenizerWrapper) -> usize {
+        tokenizer.inner().get_vocab_size(true)
+    }
+    /// Extract and validate special tokens
+    pub fn get_special_tokens(tokenizer: &TokenizerWrapper) -> HashMap<String, u32> {
+        let tokenizer_inner = tokenizer.inner();
+        let mut special_tokens = HashMap::new();
+        // Get common special tokens if they exist
+        if let Some(_token) = tokenizer_inner.id_to_token(0) {
+            special_tokens.insert("pad_token".to_string(), 0);
+        }
+        // Try to find EOS token
+        let vocab = tokenizer_inner.get_vocab(true);
+        if let Some(&eos_id) = vocab.get("</s>")
+            .or_else(|| vocab.get("<|endoftext|>"))
+            .or_else(|| vocab.get("<eos>"))
+            .or_else(|| vocab.get("[SEP]")) {
+            special_tokens.insert("eos_token".to_string(), eos_id);
+        }
+        // Try to get BOS token if it exists
+        if let Some(bos_token) = tokenizer_inner.token_to_id("<s>") {
+            special_tokens.insert("bos_token".to_string(), bos_token);
+        } else if let Some(bos_token) = tokenizer_inner.token_to_id("<|startoftext|>") {
+            special_tokens.insert("bos_token".to_string(), bos_token);
+        }
+        special_tokens
+    }
+}
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_vocabulary_adapter_creation() {
+        // This test will be implemented once we have a way to create test tokenizers
+        // For now, it serves as a placeholder for the test structure
+    }
+    #[test]
+    fn test_special_tokens_extraction() {
+        // Test special token extraction logic
+    }
+    #[test]
+    fn test_vocab_size() {
+        // Test vocabulary size calculation
+    }
+}

data/ext/candle/src/structured/vocabulary_adapter_real_test.rs ADDED Viewed

@@ -0,0 +1,66 @@
+#[cfg(test)]
+mod real_tests {
+    use super::super::*;
+    use crate::tokenizer::{TokenizerWrapper, loader::TokenizerLoader};
+    #[tokio::test]
+    async fn test_vocabulary_conversion_with_real_outlines() {
+        // This test requires network access to download a tokenizer
+        // It verifies that our adapter works with the real outlines-core crate
+        // Load a simple tokenizer
+        let tokenizer_result = TokenizerLoader::from_hf_hub("bert-base-uncased", None).await;
+        if let Ok(tokenizer) = tokenizer_result {
+            let wrapper = TokenizerWrapper::new(tokenizer);
+            // Convert to Outlines vocabulary
+            let vocab_result = VocabularyAdapter::from_tokenizer(&wrapper);
+            assert!(vocab_result.is_ok(), "Vocabulary conversion should succeed");
+            let vocabulary = vocab_result.unwrap();
+            // Verify the vocabulary was created
+            // The real Vocabulary doesn't expose a size method directly,
+            // but we can verify it exists and has the correct EOS token
+            assert_eq!(vocabulary.eos_token_id(), 102); // BERT's [SEP] token
+            println!("✓ Successfully created Outlines Vocabulary from BERT tokenizer");
+        } else {
+            println!("⚠️  Skipping test - couldn't download tokenizer (likely offline)");
+        }
+    }
+    #[test]
+    fn test_vocabulary_adapter_with_mock_data() {
+        // This test doesn't require network access
+        // It uses a mock tokenizer to verify the conversion logic
+        use tokenizers::models::wordpiece::WordPiece;
+        use tokenizers::Tokenizer;
+        use std::collections::HashMap;
+        // Create a minimal vocabulary
+        let mut vocab = HashMap::new();
+        vocab.insert("[PAD]".to_string(), 0);
+        vocab.insert("[UNK]".to_string(), 1);
+        vocab.insert("[SEP]".to_string(), 2);
+        vocab.insert("hello".to_string(), 3);
+        vocab.insert("world".to_string(), 4);
+        let model = WordPiece::from_vocab(vocab);
+        let tokenizer = Tokenizer::new(model);
+        let wrapper = TokenizerWrapper::new(tokenizer);
+        // Convert to Outlines vocabulary
+        let vocab_result = VocabularyAdapter::from_tokenizer(&wrapper);
+        assert!(vocab_result.is_ok(), "Vocabulary conversion should succeed");
+        let vocabulary = vocab_result.unwrap();
+        // Verify EOS token was found
+        assert_eq!(vocabulary.eos_token_id(), 2); // [SEP] token
+        println!("✓ Mock vocabulary conversion successful");
+    }
+}

data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs ADDED Viewed

@@ -0,0 +1,70 @@
+#[cfg(test)]
+mod simple_tests {
+    use super::super::*;
+    #[test]
+    fn test_vocabulary_adapter_basic() {
+        // Create a simple mock tokenizer to test the adapter
+        // This validates that the VocabularyAdapter compiles and can be called
+        // Note: Creating a full tokenizer in tests is complex due to the tokenizers crate API
+        // For now, we verify compilation and will rely on integration tests
+        // The important thing is that this code compiles, proving our integration works
+        let _adapter = VocabularyAdapter;
+        // Test the static methods compile
+        // These would be tested with a real tokenizer in integration tests
+        // Test passes if this compiles - no output needed
+    }
+    #[test]
+    fn test_outlines_vocabulary_api() {
+        use outlines_core::vocabulary::Vocabulary;
+        // Test that we can create a Vocabulary object
+        // Use token ID 2 as EOS (like BERT's [SEP] token)
+        let mut vocab = Vocabulary::new(2);
+        // Test inserting tokens
+        let test_tokens = vec![
+            ("<pad>".to_string(), "<pad>".as_bytes().to_vec()),
+            ("<unk>".to_string(), "<unk>".as_bytes().to_vec()),
+            ("<sep>".to_string(), "<sep>".as_bytes().to_vec()), // EOS token at ID 2
+            ("hello".to_string(), "hello".as_bytes().to_vec()),
+            ("world".to_string(), "world".as_bytes().to_vec()),
+        ];
+        for (idx, (_token, bytes)) in test_tokens.into_iter().enumerate() {
+            match vocab.try_insert(bytes, idx as u32) {
+                Ok(_) => {},
+                Err(e) => {
+                    // It's ok if we can't insert the EOS token
+                    if idx != 2 {
+                        panic!("Failed to insert token at index {}: {:?}", idx, e);
+                    }
+                }
+            }
+        }
+        // Test passes - vocabulary API works correctly
+    }
+    #[test]
+    fn test_special_token_patterns() {
+        // Test that our special token patterns are correct
+        let test_cases = vec![
+            ("</s>", "EOS token for many models"),
+            ("<|endoftext|>", "GPT-style EOS token"),
+            ("<eos>", "Alternative EOS token"),
+            ("[SEP]", "BERT-style separator"),
+            ("<s>", "BOS token"),
+            ("<|startoftext|>", "GPT-style BOS token"),
+        ];
+        // Just verify the patterns exist - no output needed
+        assert_eq!(test_cases.len(), 6, "Should have 6 special token patterns");
+    }
+}

data/lib/candle/llm.rb CHANGED Viewed

@@ -1,5 +1,67 @@
+require 'json'
 module Candle
   class LLM
+    # Create a structured constraint from a JSON schema
+    def constraint_from_schema(schema)
+      schema_str = schema.is_a?(String) ? schema : JSON.generate(schema)
+      StructuredConstraint.from_schema(schema_str, tokenizer)
+    end
+    # Create a structured constraint from a regex pattern
+    def constraint_from_regex(pattern)
+      pattern_str = pattern.is_a?(Regexp) ? pattern.source : pattern.to_s
+      StructuredConstraint.from_regex(pattern_str, tokenizer)
+    end
+    # Generate with regex constraint
+    def generate_regex(prompt, pattern:, **options)
+      constraint = constraint_from_regex(pattern)
+      # Add common EOS tokens as stop sequences for regex generation
+      stop_sequences = options[:stop_sequences] || []
+      stop_sequences += ["</s>", "<|endoftext|>", "<|im_end|>", "<end>", "\n"] unless options[:no_auto_stop]
+      config_opts = options.merge(constraint: constraint, stop_sequences: stop_sequences)
+      config = options[:config] || GenerationConfig.balanced(**config_opts)
+      result = generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
+      # Clean up any trailing EOS tokens
+      result.gsub(/(<\/s>|<\|endoftext\|>|<\|im_end\|>|<end>).*$/m, '').strip
+    end
+    # Generate and parse structured output from a JSON schema
+    def generate_structured(prompt, schema:, **options)
+      constraint = constraint_from_schema(schema)
+      config_opts = options.merge(constraint: constraint)
+      config = options[:config] || GenerationConfig.balanced(**config_opts)
+      result = generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
+      # Clean up the result - remove common end-of-sequence tokens
+      # that might appear after valid JSON
+      cleaned_result = result.gsub(/(<\/s>|<\|endoftext\|>|<\|im_end\|>|<end>).*$/m, '')
+      # Try to parse as JSON
+      begin
+        JSON.parse(cleaned_result)
+      rescue JSON::ParserError => e
+        # If cleaning didn't help, try to extract JSON from the result
+        # Look for the first complete JSON object/array
+        if match = cleaned_result.match(/(\{[^{}]*\}|\[[^\[\]]*\])/m)
+          begin
+            return JSON.parse(match[1])
+          rescue JSON::ParserError
+            # Fall through to warning
+          end
+        end
+        # Return the raw string if parsing fails
+        warn "Warning: Generated output is not valid JSON: #{e.message}" if options[:warn_on_parse_error]
+        result
+      end
+    end
     # Tokenizer registry for automatic detection
     TOKENIZER_REGISTRY = {
       # Exact model matches
@@ -8,6 +70,18 @@ module Candle
       "TheBloke/Llama-2-7B-Chat-GGUF" => "meta-llama/Llama-2-7b-chat-hf",
       "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" => "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+      # Qwen official GGUF models
+      "Qwen/Qwen3-8B-GGUF" => "Qwen/Qwen3-8B",
+      "Qwen/Qwen3-4B-GGUF" => "Qwen/Qwen3-4B",
+      "Qwen/Qwen3-14B-GGUF" => "Qwen/Qwen3-14B",
+      "Qwen/Qwen3-32B-GGUF" => "Qwen/Qwen3-32B",
+      "Qwen/Qwen3-72B-GGUF" => "Qwen/Qwen3-72B",
+      # Phi GGUF models
+      "TheBloke/phi-2-GGUF" => "microsoft/phi-2",
+      "microsoft/phi-4-gguf" => "microsoft/phi-4",
+      "bartowski/Phi-3.5-mini-instruct-GGUF" => "microsoft/Phi-3.5-mini-instruct",
       # Pattern-based fallbacks (evaluated in order)
       :patterns => [
         # Mistral models
@@ -27,7 +101,31 @@ module Candle
         [/gemma.*?2.*?9b/i, "google/gemma-2-9b"],
         [/gemma.*?2.*?2b/i, "google/gemma-2-2b"],
         [/gemma.*?7b/i, "google/gemma-7b"],
-        [/gemma.*?2b/i, "google/gemma-2b"]
+        [/gemma.*?2b/i, "google/gemma-2b"],
+        # Qwen models
+        [/qwen.*?3.*?72b/i, "Qwen/Qwen3-72B"],
+        [/qwen.*?3.*?32b/i, "Qwen/Qwen3-32B"],
+        [/qwen.*?3.*?14b/i, "Qwen/Qwen3-14B"],
+        [/qwen.*?3.*?8b/i, "Qwen/Qwen3-8B"],
+        [/qwen.*?3.*?4b/i, "Qwen/Qwen3-4B"],
+        [/qwen.*?3.*?1\.8b/i, "Qwen/Qwen3-1.8B"],
+        [/qwen.*?3.*?0\.5b/i, "Qwen/Qwen3-0.5B"],
+        [/qwen.*?2\.5/i, "Qwen/Qwen2.5-0.5B"],
+        [/qwen.*?2/i, "Qwen/Qwen2-1.5B"],
+        [/qwen/i, "Qwen/Qwen-1_8B"],
+        # Phi models (order matters - more specific patterns first)
+        [/phi.*?3\.5.*?mini/i, "microsoft/Phi-3.5-mini-instruct"],
+        [/phi.*?3.*?mini.*?4k/i, "microsoft/Phi-3-mini-4k-instruct"],
+        [/phi.*?3.*?medium/i, "microsoft/Phi-3-medium-4k-instruct"],
+        [/phi.*?3.*?small/i, "microsoft/Phi-3-small-8k-instruct"],
+        [/phi.*?3.*?mini/i, "microsoft/Phi-3-mini-4k-instruct"],
+        [/phi.*?3/i, "microsoft/Phi-3-mini-4k-instruct"],
+        [/phi-4/i, "microsoft/phi-4"],
+        [/phi.*?2/i, "microsoft/phi-2"],
+        [/phi.*?1\.5/i, "microsoft/phi-1_5"],
+        [/phi/i, "microsoft/phi-2"]
       ]
     }
@@ -74,7 +172,14 @@ module Candle
     def generate(prompt, config: GenerationConfig.balanced, reset_cache: true)
       begin
-        _generate(prompt, config)
+        result = _generate(prompt, config)
+        # If there's a constraint, clean up common EOS tokens that appear after the constrained content
+        if config.constraint
+          result = result.gsub(/(<\/s>|<\|endoftext\|>|<\|im_end\|>|<end>).*$/m, '').strip
+        end
+        result
       ensure
         clear_cache if reset_cache
       end
@@ -155,7 +260,8 @@ module Candle
         repetition_penalty: repetition_penalty,
         seed: seed,
         stop_sequences: stop_sequences,
-        include_prompt: include_prompt
+        include_prompt: include_prompt,
+        constraint: defined?(@constraint) ? @constraint : nil
       }.compact
       self.class.new(current_config.merge(overrides))

data/lib/candle/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # :nocov:
 module Candle
-  VERSION = "1.0.2"
+  VERSION = "1.1.0"
 end
 # :nocov: