RubyGems - red-candle - Versions diffs - 1.0.1 → 1.1.0 - Mend

red-candle 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/Cargo.lock +244 -6
data/README.md +57 -4
data/Rakefile +46 -1
data/ext/candle/Cargo.toml +2 -0
data/ext/candle/build.rs +6 -5
data/ext/candle/extconf.rb +5 -6
data/ext/candle/src/lib.rs +2 -0
data/ext/candle/src/llm/constrained_generation_test.rs +123 -0
data/ext/candle/src/llm/generation_config.rs +5 -0
data/ext/candle/src/llm/mod.rs +5 -0
data/ext/candle/src/llm/phi.rs +285 -0
data/ext/candle/src/llm/quantized_gguf.rs +155 -4
data/ext/candle/src/llm/qwen.rs +229 -0
data/ext/candle/src/llm/text_generation.rs +66 -2
data/ext/candle/src/ruby/device.rs +5 -0
data/ext/candle/src/ruby/llm.rs +42 -4
data/ext/candle/src/ruby/mod.rs +1 -0
data/ext/candle/src/ruby/structured.rs +47 -0
data/ext/candle/src/structured/integration_test.rs +130 -0
data/ext/candle/src/structured/mod.rs +31 -0
data/ext/candle/src/structured/schema_processor.rs +215 -0
data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
data/lib/candle/build_info.rb +2 -2
data/lib/candle/llm.rb +109 -3
data/lib/candle/version.rb +1 -1
data/lib/red-candle.rb +1 -0
metadata +15 -4

data/ext/candle/src/structured/vocabulary_adapter.rs ADDED Viewed

@@ -0,0 +1,152 @@
+use crate::tokenizer::TokenizerWrapper;
+use candle_core::Result as CandleResult;
+use outlines_core::vocabulary::Vocabulary;
+use std::collections::HashMap;
+/// Adapter to convert between red-candle's TokenizerWrapper and Outlines' Vocabulary
+pub struct VocabularyAdapter;
+impl VocabularyAdapter {
+    /// Convert a TokenizerWrapper's vocabulary to an Outlines Vocabulary
+    ///
+    /// # Arguments
+    /// * `tokenizer` - The tokenizer to extract vocabulary from
+    ///
+    /// # Returns
+    /// An Outlines Vocabulary suitable for use with Index construction
+    pub fn from_tokenizer(tokenizer: &TokenizerWrapper) -> CandleResult<Vocabulary> {
+        // Get the vocabulary mapping from the tokenizer
+        let vocab_map: HashMap<String, u32> = tokenizer.inner().get_vocab(true);
+        // Try to find EOS token in vocabulary
+        let eos_token_id = vocab_map.get("</s>")
+            .or_else(|| vocab_map.get("<|endoftext|>"))
+            .or_else(|| vocab_map.get("<eos>"))
+            .or_else(|| vocab_map.get("[SEP]"))
+            .copied();
+        // Create a sorted list of (token_id, token_string) pairs
+        let mut token_pairs: Vec<(u32, String)> = vocab_map
+            .into_iter()
+            .map(|(token, id)| (id, token))
+            .collect();
+        // Sort by token ID to ensure correct indexing
+        token_pairs.sort_by_key(|(id, _)| *id);
+        // Find the maximum token ID to determine vocabulary size
+        let max_token_id = token_pairs
+            .last()
+            .map(|(id, _)| *id)
+            .unwrap_or(0);
+        // Create vocabulary items in the format expected by Outlines
+        // We need to handle potential gaps in token IDs
+        let mut vocab_items: Vec<(String, Vec<u8>)> = Vec::new();
+        let mut current_id = 0;
+        for (token_id, token_string) in token_pairs {
+            // Fill gaps with placeholder tokens
+            while current_id < token_id {
+                vocab_items.push((
+                    format!("<unused_{}>", current_id),
+                    format!("<unused_{}>", current_id).into_bytes(),
+                ));
+                current_id += 1;
+            }
+            // Add the actual token
+            // Convert token string to bytes for Outlines
+            vocab_items.push((
+                token_string.clone(),
+                token_string.into_bytes(),
+            ));
+            current_id += 1;
+        }
+        // Fill any remaining gaps up to a reasonable vocabulary size
+        // This ensures we don't have issues with token IDs beyond our vocabulary
+        while current_id <= max_token_id {
+            vocab_items.push((
+                format!("<unused_{}>", current_id),
+                format!("<unused_{}>", current_id).into_bytes(),
+            ));
+            current_id += 1;
+        }
+        // Create the Outlines vocabulary
+        // The Vocabulary API expects us to build it token by token
+        let mut vocabulary = Vocabulary::new(
+            eos_token_id.unwrap_or(0) // Use EOS token ID or 0 as default
+        );
+        // Insert all tokens into the vocabulary
+        for (idx, (token, bytes)) in vocab_items.into_iter().enumerate() {
+            // Skip inserting the EOS token as it's already set in the vocabulary
+            if Some(idx as u32) == eos_token_id {
+                continue;
+            }
+            vocabulary.try_insert(bytes, idx as u32)
+                .map_err(|e| candle_core::Error::Msg(
+                    format!("Failed to insert token '{}': {:?}", token, e)
+                ))?;
+        }
+        Ok(vocabulary)
+    }
+    /// Get vocabulary size from a tokenizer
+    pub fn vocab_size(tokenizer: &TokenizerWrapper) -> usize {
+        tokenizer.inner().get_vocab_size(true)
+    }
+    /// Extract and validate special tokens
+    pub fn get_special_tokens(tokenizer: &TokenizerWrapper) -> HashMap<String, u32> {
+        let tokenizer_inner = tokenizer.inner();
+        let mut special_tokens = HashMap::new();
+        // Get common special tokens if they exist
+        if let Some(_token) = tokenizer_inner.id_to_token(0) {
+            special_tokens.insert("pad_token".to_string(), 0);
+        }
+        // Try to find EOS token
+        let vocab = tokenizer_inner.get_vocab(true);
+        if let Some(&eos_id) = vocab.get("</s>")
+            .or_else(|| vocab.get("<|endoftext|>"))
+            .or_else(|| vocab.get("<eos>"))
+            .or_else(|| vocab.get("[SEP]")) {
+            special_tokens.insert("eos_token".to_string(), eos_id);
+        }
+        // Try to get BOS token if it exists
+        if let Some(bos_token) = tokenizer_inner.token_to_id("<s>") {
+            special_tokens.insert("bos_token".to_string(), bos_token);
+        } else if let Some(bos_token) = tokenizer_inner.token_to_id("<|startoftext|>") {
+            special_tokens.insert("bos_token".to_string(), bos_token);
+        }
+        special_tokens
+    }
+}
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_vocabulary_adapter_creation() {
+        // This test will be implemented once we have a way to create test tokenizers
+        // For now, it serves as a placeholder for the test structure
+    }
+    #[test]
+    fn test_special_tokens_extraction() {
+        // Test special token extraction logic
+    }
+    #[test]
+    fn test_vocab_size() {
+        // Test vocabulary size calculation
+    }
+}

data/ext/candle/src/structured/vocabulary_adapter_real_test.rs ADDED Viewed

@@ -0,0 +1,66 @@
+#[cfg(test)]
+mod real_tests {
+    use super::super::*;
+    use crate::tokenizer::{TokenizerWrapper, loader::TokenizerLoader};
+    #[tokio::test]
+    async fn test_vocabulary_conversion_with_real_outlines() {
+        // This test requires network access to download a tokenizer
+        // It verifies that our adapter works with the real outlines-core crate
+        // Load a simple tokenizer
+        let tokenizer_result = TokenizerLoader::from_hf_hub("bert-base-uncased", None).await;
+        if let Ok(tokenizer) = tokenizer_result {
+            let wrapper = TokenizerWrapper::new(tokenizer);
+            // Convert to Outlines vocabulary
+            let vocab_result = VocabularyAdapter::from_tokenizer(&wrapper);
+            assert!(vocab_result.is_ok(), "Vocabulary conversion should succeed");
+            let vocabulary = vocab_result.unwrap();
+            // Verify the vocabulary was created
+            // The real Vocabulary doesn't expose a size method directly,
+            // but we can verify it exists and has the correct EOS token
+            assert_eq!(vocabulary.eos_token_id(), 102); // BERT's [SEP] token
+            println!("✓ Successfully created Outlines Vocabulary from BERT tokenizer");
+        } else {
+            println!("⚠️  Skipping test - couldn't download tokenizer (likely offline)");
+        }
+    }
+    #[test]
+    fn test_vocabulary_adapter_with_mock_data() {
+        // This test doesn't require network access
+        // It uses a mock tokenizer to verify the conversion logic
+        use tokenizers::models::wordpiece::WordPiece;
+        use tokenizers::Tokenizer;
+        use std::collections::HashMap;
+        // Create a minimal vocabulary
+        let mut vocab = HashMap::new();
+        vocab.insert("[PAD]".to_string(), 0);
+        vocab.insert("[UNK]".to_string(), 1);
+        vocab.insert("[SEP]".to_string(), 2);
+        vocab.insert("hello".to_string(), 3);
+        vocab.insert("world".to_string(), 4);
+        let model = WordPiece::from_vocab(vocab);
+        let tokenizer = Tokenizer::new(model);
+        let wrapper = TokenizerWrapper::new(tokenizer);
+        // Convert to Outlines vocabulary
+        let vocab_result = VocabularyAdapter::from_tokenizer(&wrapper);
+        assert!(vocab_result.is_ok(), "Vocabulary conversion should succeed");
+        let vocabulary = vocab_result.unwrap();
+        // Verify EOS token was found
+        assert_eq!(vocabulary.eos_token_id(), 2); // [SEP] token
+        println!("✓ Mock vocabulary conversion successful");
+    }
+}

data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs ADDED Viewed

@@ -0,0 +1,70 @@
+#[cfg(test)]
+mod simple_tests {
+    use super::super::*;
+    #[test]
+    fn test_vocabulary_adapter_basic() {
+        // Create a simple mock tokenizer to test the adapter
+        // This validates that the VocabularyAdapter compiles and can be called
+        // Note: Creating a full tokenizer in tests is complex due to the tokenizers crate API
+        // For now, we verify compilation and will rely on integration tests
+        // The important thing is that this code compiles, proving our integration works
+        let _adapter = VocabularyAdapter;
+        // Test the static methods compile
+        // These would be tested with a real tokenizer in integration tests
+        // Test passes if this compiles - no output needed
+    }
+    #[test]
+    fn test_outlines_vocabulary_api() {
+        use outlines_core::vocabulary::Vocabulary;
+        // Test that we can create a Vocabulary object
+        // Use token ID 2 as EOS (like BERT's [SEP] token)
+        let mut vocab = Vocabulary::new(2);
+        // Test inserting tokens
+        let test_tokens = vec![
+            ("<pad>".to_string(), "<pad>".as_bytes().to_vec()),
+            ("<unk>".to_string(), "<unk>".as_bytes().to_vec()),
+            ("<sep>".to_string(), "<sep>".as_bytes().to_vec()), // EOS token at ID 2
+            ("hello".to_string(), "hello".as_bytes().to_vec()),
+            ("world".to_string(), "world".as_bytes().to_vec()),
+        ];
+        for (idx, (_token, bytes)) in test_tokens.into_iter().enumerate() {
+            match vocab.try_insert(bytes, idx as u32) {
+                Ok(_) => {},
+                Err(e) => {
+                    // It's ok if we can't insert the EOS token
+                    if idx != 2 {
+                        panic!("Failed to insert token at index {}: {:?}", idx, e);
+                    }
+                }
+            }
+        }
+        // Test passes - vocabulary API works correctly
+    }
+    #[test]
+    fn test_special_token_patterns() {
+        // Test that our special token patterns are correct
+        let test_cases = vec![
+            ("</s>", "EOS token for many models"),
+            ("<|endoftext|>", "GPT-style EOS token"),
+            ("<eos>", "Alternative EOS token"),
+            ("[SEP]", "BERT-style separator"),
+            ("<s>", "BOS token"),
+            ("<|startoftext|>", "GPT-style BOS token"),
+        ];
+        // Just verify the patterns exist - no output needed
+        assert_eq!(test_cases.len(), 6, "Should have 6 special token patterns");
+    }
+}

data/lib/candle/build_info.rb CHANGED Viewed

@@ -15,8 +15,8 @@ module Candle
         if cuda_potentially_available
           warn "=" * 80
           warn "Red Candle: CUDA detected on system but not enabled in build."
-          warn "To enable CUDA support (experimental), reinstall with:"
-          warn "  CANDLE_ENABLE_CUDA=1 gem install red-candle"
+          warn "This may be due to CANDLE_DISABLE_CUDA being set during installation."
+          warn "To enable CUDA support, reinstall without CANDLE_DISABLE_CUDA set."
           warn "=" * 80
         end
         # :nocov:

data/lib/candle/llm.rb CHANGED Viewed

@@ -1,5 +1,67 @@
+require 'json'
 module Candle
   class LLM
+    # Create a structured constraint from a JSON schema
+    def constraint_from_schema(schema)
+      schema_str = schema.is_a?(String) ? schema : JSON.generate(schema)
+      StructuredConstraint.from_schema(schema_str, tokenizer)
+    end
+    # Create a structured constraint from a regex pattern
+    def constraint_from_regex(pattern)
+      pattern_str = pattern.is_a?(Regexp) ? pattern.source : pattern.to_s
+      StructuredConstraint.from_regex(pattern_str, tokenizer)
+    end
+    # Generate with regex constraint
+    def generate_regex(prompt, pattern:, **options)
+      constraint = constraint_from_regex(pattern)
+      # Add common EOS tokens as stop sequences for regex generation
+      stop_sequences = options[:stop_sequences] || []
+      stop_sequences += ["</s>", "<|endoftext|>", "<|im_end|>", "<end>", "\n"] unless options[:no_auto_stop]
+      config_opts = options.merge(constraint: constraint, stop_sequences: stop_sequences)
+      config = options[:config] || GenerationConfig.balanced(**config_opts)
+      result = generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
+      # Clean up any trailing EOS tokens
+      result.gsub(/(<\/s>|<\|endoftext\|>|<\|im_end\|>|<end>).*$/m, '').strip
+    end
+    # Generate and parse structured output from a JSON schema
+    def generate_structured(prompt, schema:, **options)
+      constraint = constraint_from_schema(schema)
+      config_opts = options.merge(constraint: constraint)
+      config = options[:config] || GenerationConfig.balanced(**config_opts)
+      result = generate(prompt, config: config, reset_cache: options.fetch(:reset_cache, true))
+      # Clean up the result - remove common end-of-sequence tokens
+      # that might appear after valid JSON
+      cleaned_result = result.gsub(/(<\/s>|<\|endoftext\|>|<\|im_end\|>|<end>).*$/m, '')
+      # Try to parse as JSON
+      begin
+        JSON.parse(cleaned_result)
+      rescue JSON::ParserError => e
+        # If cleaning didn't help, try to extract JSON from the result
+        # Look for the first complete JSON object/array
+        if match = cleaned_result.match(/(\{[^{}]*\}|\[[^\[\]]*\])/m)
+          begin
+            return JSON.parse(match[1])
+          rescue JSON::ParserError
+            # Fall through to warning
+          end
+        end
+        # Return the raw string if parsing fails
+        warn "Warning: Generated output is not valid JSON: #{e.message}" if options[:warn_on_parse_error]
+        result
+      end
+    end
     # Tokenizer registry for automatic detection
     TOKENIZER_REGISTRY = {
       # Exact model matches
@@ -8,6 +70,18 @@ module Candle
       "TheBloke/Llama-2-7B-Chat-GGUF" => "meta-llama/Llama-2-7b-chat-hf",
       "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" => "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+      # Qwen official GGUF models
+      "Qwen/Qwen3-8B-GGUF" => "Qwen/Qwen3-8B",
+      "Qwen/Qwen3-4B-GGUF" => "Qwen/Qwen3-4B",
+      "Qwen/Qwen3-14B-GGUF" => "Qwen/Qwen3-14B",
+      "Qwen/Qwen3-32B-GGUF" => "Qwen/Qwen3-32B",
+      "Qwen/Qwen3-72B-GGUF" => "Qwen/Qwen3-72B",
+      # Phi GGUF models
+      "TheBloke/phi-2-GGUF" => "microsoft/phi-2",
+      "microsoft/phi-4-gguf" => "microsoft/phi-4",
+      "bartowski/Phi-3.5-mini-instruct-GGUF" => "microsoft/Phi-3.5-mini-instruct",
       # Pattern-based fallbacks (evaluated in order)
       :patterns => [
         # Mistral models
@@ -27,7 +101,31 @@ module Candle
         [/gemma.*?2.*?9b/i, "google/gemma-2-9b"],
         [/gemma.*?2.*?2b/i, "google/gemma-2-2b"],
         [/gemma.*?7b/i, "google/gemma-7b"],
-        [/gemma.*?2b/i, "google/gemma-2b"]
+        [/gemma.*?2b/i, "google/gemma-2b"],
+        # Qwen models
+        [/qwen.*?3.*?72b/i, "Qwen/Qwen3-72B"],
+        [/qwen.*?3.*?32b/i, "Qwen/Qwen3-32B"],
+        [/qwen.*?3.*?14b/i, "Qwen/Qwen3-14B"],
+        [/qwen.*?3.*?8b/i, "Qwen/Qwen3-8B"],
+        [/qwen.*?3.*?4b/i, "Qwen/Qwen3-4B"],
+        [/qwen.*?3.*?1\.8b/i, "Qwen/Qwen3-1.8B"],
+        [/qwen.*?3.*?0\.5b/i, "Qwen/Qwen3-0.5B"],
+        [/qwen.*?2\.5/i, "Qwen/Qwen2.5-0.5B"],
+        [/qwen.*?2/i, "Qwen/Qwen2-1.5B"],
+        [/qwen/i, "Qwen/Qwen-1_8B"],
+        # Phi models (order matters - more specific patterns first)
+        [/phi.*?3\.5.*?mini/i, "microsoft/Phi-3.5-mini-instruct"],
+        [/phi.*?3.*?mini.*?4k/i, "microsoft/Phi-3-mini-4k-instruct"],
+        [/phi.*?3.*?medium/i, "microsoft/Phi-3-medium-4k-instruct"],
+        [/phi.*?3.*?small/i, "microsoft/Phi-3-small-8k-instruct"],
+        [/phi.*?3.*?mini/i, "microsoft/Phi-3-mini-4k-instruct"],
+        [/phi.*?3/i, "microsoft/Phi-3-mini-4k-instruct"],
+        [/phi-4/i, "microsoft/phi-4"],
+        [/phi.*?2/i, "microsoft/phi-2"],
+        [/phi.*?1\.5/i, "microsoft/phi-1_5"],
+        [/phi/i, "microsoft/phi-2"]
       ]
     }
@@ -74,7 +172,14 @@ module Candle
     def generate(prompt, config: GenerationConfig.balanced, reset_cache: true)
       begin
-        _generate(prompt, config)
+        result = _generate(prompt, config)
+        # If there's a constraint, clean up common EOS tokens that appear after the constrained content
+        if config.constraint
+          result = result.gsub(/(<\/s>|<\|endoftext\|>|<\|im_end\|>|<end>).*$/m, '').strip
+        end
+        result
       ensure
         clear_cache if reset_cache
       end
@@ -155,7 +260,8 @@ module Candle
         repetition_penalty: repetition_penalty,
         seed: seed,
         stop_sequences: stop_sequences,
-        include_prompt: include_prompt
+        include_prompt: include_prompt,
+        constraint: defined?(@constraint) ? @constraint : nil
       }.compact
       self.class.new(current_config.merge(overrides))

data/lib/candle/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # :nocov:
 module Candle
-  VERSION = "1.0.1"
+  VERSION = "1.1.0"
 end
 # :nocov:

data/lib/red-candle.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'candle'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: red-candle
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.1.0
 platform: ruby
 authors:
 - Christopher Petersen
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-07-19 00:00:00.000000000 Z
+date: 2025-07-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rb_sys
@@ -159,12 +159,15 @@ files:
 - ext/candle/extconf.rb
 - ext/candle/rustfmt.toml
 - ext/candle/src/lib.rs
+- ext/candle/src/llm/constrained_generation_test.rs
 - ext/candle/src/llm/gemma.rs
 - ext/candle/src/llm/generation_config.rs
 - ext/candle/src/llm/llama.rs
 - ext/candle/src/llm/mistral.rs
 - ext/candle/src/llm/mod.rs
+- ext/candle/src/llm/phi.rs
 - ext/candle/src/llm/quantized_gguf.rs
+- ext/candle/src/llm/qwen.rs
 - ext/candle/src/llm/text_generation.rs
 - ext/candle/src/ner.rs
 - ext/candle/src/reranker.rs
@@ -175,9 +178,16 @@ files:
 - ext/candle/src/ruby/llm.rs
 - ext/candle/src/ruby/mod.rs
 - ext/candle/src/ruby/result.rs
+- ext/candle/src/ruby/structured.rs
 - ext/candle/src/ruby/tensor.rs
 - ext/candle/src/ruby/tokenizer.rs
 - ext/candle/src/ruby/utils.rs
+- ext/candle/src/structured/integration_test.rs
+- ext/candle/src/structured/mod.rs
+- ext/candle/src/structured/schema_processor.rs
+- ext/candle/src/structured/vocabulary_adapter.rs
+- ext/candle/src/structured/vocabulary_adapter_real_test.rs
+- ext/candle/src/structured/vocabulary_adapter_simple_test.rs
 - ext/candle/src/tokenizer/loader.rs
 - ext/candle/src/tokenizer/mod.rs
 - ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt
@@ -197,6 +207,7 @@ files:
 - lib/candle/tensor.rb
 - lib/candle/tokenizer.rb
 - lib/candle/version.rb
+- lib/red-candle.rb
 homepage: https://github.com/assaydepot/red-candle
 licenses:
 - MIT
@@ -209,14 +220,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 2.7.0
+      version: 3.2.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: 3.3.26
 requirements:
-- Rust >= 1.65
+- Rust >= 1.85
 rubygems_version: 3.5.3
 signing_key:
 specification_version: 4