RubyGems - red-candle - Versions diffs - 1.2.3 → 1.3.0 - Mend

red-candle 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/Cargo.lock +460 -379
data/README.md +1 -1
data/ext/candle/Cargo.toml +3 -3
data/ext/candle/src/llm/gemma.rs +24 -9
data/ext/candle/src/llm/llama.rs +46 -10
data/ext/candle/src/llm/mistral.rs +46 -10
data/ext/candle/src/llm/phi.rs +76 -8
data/ext/candle/src/llm/qwen.rs +23 -10
data/ext/candle/src/ruby/llm.rs +62 -29
data/lib/candle/version.rb +1 -1
metadata +11 -13
data/ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt +0 -1
data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/common.rs +0 -355
data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/dynamic.rs +0 -276
data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/macros.rs +0 -49
data/ext/candle/target/release/build/pulp-1b95cfe377eede97/out/x86_64_asm.rs +0 -2748
data/ext/candle/target/release/build/rb-sys-f8ac4edc30ab3e53/out/bindings-0.9.116-mri-arm64-darwin24-3.3.0.rs +0 -8902

data/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-<img src="/docs/assets/logo-title.png" alt="red-candle" height="80px">
+<img src="/docs/assets/logo-title.png" alt="red-candle" height="160px">
 [![build](https://github.com/scientist-labs/red-candle/actions/workflows/build.yml/badge.svg)](https://github.com/scientist-labs/red-candle/actions/workflows/build.yml)
 [![Gem Version](https://badge.fury.io/rb/red-candle.svg)](https://badge.fury.io/rb/red-candle)

data/ext/candle/Cargo.toml CHANGED Viewed

@@ -12,8 +12,8 @@ crate-type = ["cdylib"]
 candle-core = { version = "0.9.1" }
 candle-nn = { version = "0.9.1" }
 candle-transformers = { version = "0.9.1" }
-tokenizers = { version = "0.21.1", default-features = true, features = ["fancy-regex"] }
-hf-hub = "0.4.3"
+tokenizers = { version = "0.22.0", default-features = true, features = ["fancy-regex"] }
+hf-hub = "0.4.1"
 half = "2.6.0"
 magnus = "0.7.1"
 safetensors = "0.3"
@@ -21,7 +21,7 @@ serde_json = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 tokio = { version = "1.45", features = ["rt", "macros"] }
 rand = "0.8"
-outlines-core = "0.2"
+outlines-core = "0.2.11"
 [features]
 default = []

data/ext/candle/src/llm/gemma.rs CHANGED Viewed

@@ -30,8 +30,8 @@ impl Gemma {
         &self.tokenizer
     }
-    /// Load a Gemma model from HuggingFace Hub
-    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+    /// Load a Gemma model from HuggingFace Hub with optional custom tokenizer
+    pub async fn from_pretrained_with_tokenizer(model_id: &str, device: Device, tokenizer_source: Option<&str>) -> CandleResult<Self> {
         let api = Api::new()
             .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
@@ -43,10 +43,23 @@ impl Gemma {
             .await
             .map_err(|e| candle_core::Error::Msg(format!("Failed to download config: {}", e)))?;
-        let tokenizer_filename = repo
-            .get("tokenizer.json")
-            .await
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+        // Download tokenizer from custom source if provided, otherwise from model repo
+        let tokenizer = if let Some(tokenizer_id) = tokenizer_source {
+            let tokenizer_repo = api.repo(Repo::model(tokenizer_id.to_string()));
+            let tokenizer_filename = tokenizer_repo
+                .get("tokenizer.json")
+                .await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer from {}: {}", tokenizer_id, e)))?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?
+        } else {
+            let tokenizer_filename = repo
+                .get("tokenizer.json")
+                .await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?
+        };
         // Try different file patterns for model weights
         let weights_filenames = if let Ok(single_file) = repo.get("model.safetensors").await {
@@ -87,9 +100,6 @@ impl Gemma {
         let config: Config = serde_json::from_reader(std::fs::File::open(config_filename)?)
             .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config: {}", e)))?;
-        // Load tokenizer
-        let tokenizer = Tokenizer::from_file(tokenizer_filename)
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
         // Gemma uses specific tokens
         let eos_token_id = {
@@ -116,6 +126,11 @@ impl Gemma {
         })
     }
+    /// Load a Gemma model from HuggingFace Hub (backwards compatibility)
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        Self::from_pretrained_with_tokenizer(model_id, device, None).await
+    }
     /// Create from existing components (useful for testing)
     pub fn new(
         model: GemmaModel,

data/ext/candle/src/llm/llama.rs CHANGED Viewed

@@ -37,8 +37,8 @@ impl Llama {
         &self.tokenizer
     }
-    /// Load a Llama model from HuggingFace Hub
-    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+    /// Load a Llama model from HuggingFace Hub with optional custom tokenizer
+    pub async fn from_pretrained_with_tokenizer(model_id: &str, device: Device, tokenizer_source: Option<&str>) -> CandleResult<Self> {
         let api = Api::new()
             .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
@@ -50,10 +50,45 @@ impl Llama {
             .await
             .map_err(|e| candle_core::Error::Msg(format!("Failed to download config: {}", e)))?;
-        let tokenizer_filename = repo
-            .get("tokenizer.json")
-            .await
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+        // Download tokenizer from custom source if provided, otherwise from model repo
+        let tokenizer = if let Some(tokenizer_id) = tokenizer_source {
+            let tokenizer_repo = api.repo(Repo::model(tokenizer_id.to_string()));
+            let tokenizer_filename = tokenizer_repo
+                .get("tokenizer.json")
+                .await
+                .map_err(|e| {
+                    let error_msg = if e.to_string().contains("404") || e.to_string().contains("Not Found") {
+                        format!("Tokenizer file 'tokenizer.json' not found in repository '{}'. The repository may not have a tokenizer.json file or may use a different format (e.g., tokenizer.model for SentencePiece).", tokenizer_id)
+                    } else if e.to_string().contains("401") || e.to_string().contains("Unauthorized") {
+                        format!("Authentication required to access tokenizer '{}'. You may need to set HF_TOKEN environment variable with a valid Hugging Face token.", tokenizer_id)
+                    } else if e.to_string().contains("timed out") || e.to_string().contains("connection") {
+                        format!("Network error downloading tokenizer from '{}': {}. Please check your internet connection.", tokenizer_id, e)
+                    } else {
+                        format!("Failed to download tokenizer from '{}': {}", tokenizer_id, e)
+                    };
+                    candle_core::Error::Msg(error_msg)
+                })?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer file: {}", e)))?
+        } else {
+            let tokenizer_filename = repo
+                .get("tokenizer.json")
+                .await
+                .map_err(|e| {
+                    let error_msg = if e.to_string().contains("404") || e.to_string().contains("Not Found") {
+                        format!("No tokenizer found in model repository '{}'. The model may not include a tokenizer. Try specifying a tokenizer explicitly using the 'tokenizer' parameter, e.g.: from_pretrained('{}', tokenizer: 'appropriate-tokenizer-repo')", model_id, model_id)
+                    } else if e.to_string().contains("401") || e.to_string().contains("Unauthorized") {
+                        format!("Authentication required to access model '{}'. You may need to set HF_TOKEN environment variable with a valid Hugging Face token.", model_id)
+                    } else if e.to_string().contains("timed out") || e.to_string().contains("connection") {
+                        format!("Network error downloading tokenizer: {}. Please check your internet connection.", e)
+                    } else {
+                        format!("Failed to download tokenizer: {}", e)
+                    };
+                    candle_core::Error::Msg(error_msg)
+                })?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer file: {}", e)))?
+        };
         // Try different file patterns for model weights
         let weights_filenames = if let Ok(single_file) = repo.get("model.safetensors").await {
@@ -97,10 +132,6 @@ impl Llama {
             .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config: {}", e)))?;
         let config = llama_config.into_config(false); // Don't use flash attention for now
-        // Load tokenizer
-        let tokenizer = Tokenizer::from_file(tokenizer_filename)
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
         // Determine EOS token ID based on model type
         let eos_token_id = if model_id.contains("Llama-3") || model_id.contains("llama-3") {
             // Llama 3 uses different special tokens
@@ -139,6 +170,11 @@ impl Llama {
         })
     }
+    /// Load a Llama model from HuggingFace Hub (backwards compatibility)
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        Self::from_pretrained_with_tokenizer(model_id, device, None).await
+    }
     /// Create from existing components (useful for testing)
     pub fn new(
         model: LlamaModel,

data/ext/candle/src/llm/mistral.rs CHANGED Viewed

@@ -30,8 +30,8 @@ impl Mistral {
         &self.tokenizer
     }
-    /// Load a Mistral model from HuggingFace Hub
-    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+    /// Load a Mistral model from HuggingFace Hub with optional custom tokenizer
+    pub async fn from_pretrained_with_tokenizer(model_id: &str, device: Device, tokenizer_source: Option<&str>) -> CandleResult<Self> {
         let api = Api::new()
             .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
@@ -43,10 +43,45 @@ impl Mistral {
             .await
             .map_err(|e| candle_core::Error::Msg(format!("Failed to download config: {}", e)))?;
-        let tokenizer_filename = repo
-            .get("tokenizer.json")
-            .await
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+        // Download tokenizer from custom source if provided, otherwise from model repo
+        let tokenizer = if let Some(tokenizer_id) = tokenizer_source {
+            let tokenizer_repo = api.repo(Repo::model(tokenizer_id.to_string()));
+            let tokenizer_filename = tokenizer_repo
+                .get("tokenizer.json")
+                .await
+                .map_err(|e| {
+                    let error_msg = if e.to_string().contains("404") || e.to_string().contains("Not Found") {
+                        format!("Tokenizer file 'tokenizer.json' not found in repository '{}'. The repository may not have a tokenizer.json file or may use a different format (e.g., tokenizer.model for SentencePiece).", tokenizer_id)
+                    } else if e.to_string().contains("401") || e.to_string().contains("Unauthorized") {
+                        format!("Authentication required to access tokenizer '{}'. You may need to set HF_TOKEN environment variable with a valid Hugging Face token.", tokenizer_id)
+                    } else if e.to_string().contains("timed out") || e.to_string().contains("connection") {
+                        format!("Network error downloading tokenizer from '{}': {}. Please check your internet connection.", tokenizer_id, e)
+                    } else {
+                        format!("Failed to download tokenizer from '{}': {}", tokenizer_id, e)
+                    };
+                    candle_core::Error::Msg(error_msg)
+                })?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer file: {}", e)))?
+        } else {
+            let tokenizer_filename = repo
+                .get("tokenizer.json")
+                .await
+                .map_err(|e| {
+                    let error_msg = if e.to_string().contains("404") || e.to_string().contains("Not Found") {
+                        format!("No tokenizer found in model repository '{}'. The model may not include a tokenizer. Try specifying a tokenizer explicitly using the 'tokenizer' parameter, e.g.: from_pretrained('{}', tokenizer: 'mistralai/Mistral-7B-Instruct-v0.2')", model_id, model_id)
+                    } else if e.to_string().contains("401") || e.to_string().contains("Unauthorized") {
+                        format!("Authentication required to access model '{}'. You may need to set HF_TOKEN environment variable with a valid Hugging Face token.", model_id)
+                    } else if e.to_string().contains("timed out") || e.to_string().contains("connection") {
+                        format!("Network error downloading tokenizer: {}. Please check your internet connection.", e)
+                    } else {
+                        format!("Failed to download tokenizer: {}", e)
+                    };
+                    candle_core::Error::Msg(error_msg)
+                })?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer file: {}", e)))?
+        };
         // Try different file patterns for model weights
         let weights_filenames = if let Ok(single_file) = repo.get("model.safetensors").await {
@@ -97,10 +132,6 @@ impl Mistral {
         let config: Config = serde_json::from_reader(std::fs::File::open(config_filename)?)
             .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config: {}", e)))?;
-        // Load tokenizer
-        let tokenizer = Tokenizer::from_file(tokenizer_filename)
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
         let eos_token_id = tokenizer
             .get_vocab(true)
             .get("</s>")
@@ -123,6 +154,11 @@ impl Mistral {
         })
     }
+    /// Load a Mistral model from HuggingFace Hub (backwards compatibility)
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        Self::from_pretrained_with_tokenizer(model_id, device, None).await
+    }
     /// Create from existing components (useful for testing)
     pub fn new(
         model: MistralModel,

data/ext/candle/src/llm/phi.rs CHANGED Viewed

@@ -38,8 +38,8 @@ impl Phi {
         }
     }
-    /// Load a Phi model from HuggingFace
-    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+    /// Load a Phi model from HuggingFace with optional custom tokenizer
+    pub async fn from_pretrained_with_tokenizer(model_id: &str, device: Device, tokenizer_source: Option<&str>) -> CandleResult<Self> {
         let api = Api::new()
             .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
@@ -50,11 +50,19 @@ impl Phi {
             .map_err(|e| candle_core::Error::Msg(format!("Failed to download config: {}", e)))?;
         let config_str = std::fs::read_to_string(config_filename)?;
-        // Download tokenizer
-        let tokenizer_filename = repo.get("tokenizer.json").await
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
-        let tokenizer = Tokenizer::from_file(tokenizer_filename)
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
+        // Download tokenizer from custom source if provided, otherwise from model repo
+        let tokenizer = if let Some(tokenizer_id) = tokenizer_source {
+            let tokenizer_repo = api.model(tokenizer_id.to_string());
+            let tokenizer_filename = tokenizer_repo.get("tokenizer.json").await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer from {}: {}", tokenizer_id, e)))?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?
+        } else {
+            let tokenizer_filename = repo.get("tokenizer.json").await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?
+        };
         // Determine EOS token
         let vocab = tokenizer.get_vocab(true);
@@ -104,7 +112,62 @@ impl Phi {
         let model = if is_phi3 {
             // Load Phi3 model
-            let config: Phi3Config = serde_json::from_str(&config_str)
+            // Handle config differences between Phi-3-small and Phi-3-mini
+            let mut config_str_fixed;
+            // Parse config as JSON for modifications
+            let mut config_json: serde_json::Value = serde_json::from_str(&config_str)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config JSON: {}", e)))?;
+            // Phi-3-small uses ff_intermediate_size instead of intermediate_size
+            if config_json.get("ff_intermediate_size").is_some() && config_json.get("intermediate_size").is_none() {
+                if let Some(ff_size) = config_json.get("ff_intermediate_size").cloned() {
+                    config_json["intermediate_size"] = ff_size;
+                }
+            }
+            // Phi-3-small uses layer_norm_epsilon instead of rms_norm_eps
+            if config_json.get("layer_norm_epsilon").is_some() && config_json.get("rms_norm_eps").is_none() {
+                if let Some(eps) = config_json.get("layer_norm_epsilon").cloned() {
+                    config_json["rms_norm_eps"] = eps;
+                }
+            }
+            // Handle rope_scaling for long context models (Phi-3-mini-128k)
+            // Candle expects rope_scaling to be a string, but newer configs have it as an object
+            if let Some(rope_scaling) = config_json.get("rope_scaling") {
+                if rope_scaling.is_object() {
+                    // For now, just convert to the type string - candle will use default scaling
+                    if let Some(scaling_type) = rope_scaling.get("type").and_then(|v| v.as_str()) {
+                        config_json["rope_scaling"] = serde_json::Value::String(scaling_type.to_string());
+                    } else {
+                        // Remove it if we can't determine the type
+                        config_json.as_object_mut().unwrap().remove("rope_scaling");
+                    }
+                }
+            }
+            // Phi-3-small uses rope_embedding_base instead of rope_theta
+            if config_json.get("rope_embedding_base").is_some() && config_json.get("rope_theta").is_none() {
+                if let Some(rope_base) = config_json.get("rope_embedding_base").cloned() {
+                    config_json["rope_theta"] = rope_base;
+                }
+            }
+            config_str_fixed = serde_json::to_string(&config_json)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to serialize config: {}", e)))?;
+            // Check for unsupported gegelu activation
+            if config_str_fixed.contains("\"gegelu\"") {
+                // For now, map gegelu to gelu_pytorch_tanh with a warning
+                // This is not ideal but allows the model to at least load
+                eprintln!("WARNING: This model uses 'gegelu' activation which is not fully supported.");
+                eprintln!("         Mapping to 'gelu_pytorch_tanh' - results may be degraded.");
+                eprintln!("         For best results, use Phi-3-mini models instead.");
+                config_str_fixed = config_str_fixed.replace("\"gegelu\"", "\"gelu_pytorch_tanh\"");
+            }
+            let config: Phi3Config = serde_json::from_str(&config_str_fixed)
                 .map_err(|e| candle_core::Error::Msg(format!("Failed to parse Phi3 config: {}", e)))?;
             let vb = unsafe {
@@ -134,6 +197,11 @@ impl Phi {
             eos_token_id,
         })
     }
+    /// Load a Phi model from HuggingFace (backwards compatibility)
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        Self::from_pretrained_with_tokenizer(model_id, device, None).await
+    }
     /// Apply Phi chat template to messages
     pub fn apply_chat_template(&self, messages: &[serde_json::Value]) -> CandleResult<String> {

data/ext/candle/src/llm/qwen.rs CHANGED Viewed

@@ -30,8 +30,8 @@ impl Qwen {
         self.model.clear_kv_cache();
     }
-    /// Load a Qwen model from HuggingFace
-    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+    /// Load a Qwen model from HuggingFace with optional custom tokenizer
+    pub async fn from_pretrained_with_tokenizer(model_id: &str, device: Device, tokenizer_source: Option<&str>) -> CandleResult<Self> {
         let api = Api::new()
             .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
@@ -44,19 +44,27 @@ impl Qwen {
         let config: Config = serde_json::from_str(&config_str)
             .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config: {}", e)))?;
-        // Download tokenizer
-        let tokenizer_filename = repo.get("tokenizer.json").await
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
-        let tokenizer = Tokenizer::from_file(tokenizer_filename)
-            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
+        // Download tokenizer from custom source if provided, otherwise from model repo
+        let tokenizer = if let Some(tokenizer_id) = tokenizer_source {
+            let tokenizer_repo = api.model(tokenizer_id.to_string());
+            let tokenizer_filename = tokenizer_repo.get("tokenizer.json").await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer from {}: {}", tokenizer_id, e)))?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?
+        } else {
+            let tokenizer_filename = repo.get("tokenizer.json").await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+            Tokenizer::from_file(tokenizer_filename)
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?
+        };
         // Determine EOS token
         let vocab = tokenizer.get_vocab(true);
-        let eos_token_id = vocab.get("<|endoftext|>")
-            .or_else(|| vocab.get("<|im_end|>"))
+        let eos_token_id = vocab.get("<|im_end|>")
+            .or_else(|| vocab.get("<|endoftext|>"))
             .or_else(|| vocab.get("</s>"))
             .copied()
-            .unwrap_or(151643); // Default Qwen3 EOS token
+            .unwrap_or(151645); // Default Qwen2.5 EOS token
         // Download model weights
         // NOTE: Qwen uses hardcoded shard counts based on model size rather than
@@ -97,6 +105,11 @@ impl Qwen {
         })
     }
+    /// Load a Qwen model from HuggingFace (backwards compatibility)
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        Self::from_pretrained_with_tokenizer(model_id, device, None).await
+    }
     /// Apply Qwen chat template to messages
     pub fn apply_chat_template(&self, messages: &[serde_json::Value]) -> CandleResult<String> {
         let mut prompt = String::new();

data/ext/candle/src/ruby/llm.rs CHANGED Viewed

@@ -257,14 +257,15 @@ impl LLM {
         let model_lower = model_id.to_lowercase();
         let is_quantized = model_lower.contains("gguf") || model_lower.contains("-q4") || model_lower.contains("-q5") || model_lower.contains("-q8");
+        // Extract tokenizer source if provided in model_id (for both GGUF and regular models)
+        let (model_id_clean, tokenizer_source) = if let Some(pos) = model_id.find("@@") {
+            let (id, _tok) = model_id.split_at(pos);
+            (id.to_string(), Some(&model_id[pos+2..]))
+        } else {
+            (model_id.clone(), None)
+        };
         let model = if is_quantized {
-            // Extract tokenizer source if provided in model_id
-            let (model_id_clean, tokenizer_source) = if let Some(pos) = model_id.find("@@") {
-                let (id, _tok) = model_id.split_at(pos);
-                (id.to_string(), Some(&model_id[pos+2..]))
-            } else {
-                (model_id.clone(), None)
-            };
             // Use unified GGUF loader for all quantized models
             let gguf_model = rt.block_on(async {
@@ -273,41 +274,73 @@ impl LLM {
             .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load GGUF model: {}", e)))?;
             ModelType::QuantizedGGUF(gguf_model)
         } else {
-            // Load non-quantized models
-            if model_lower.contains("mistral") {
-                let mistral = rt.block_on(async {
-                    RustMistral::from_pretrained(&model_id, candle_device).await
-                })
+            // Load non-quantized models based on type
+            let model_lower_clean = model_id_clean.to_lowercase();
+            if model_lower_clean.contains("mistral") {
+                let mistral = if tokenizer_source.is_some() {
+                    rt.block_on(async {
+                        RustMistral::from_pretrained_with_tokenizer(&model_id_clean, candle_device, tokenizer_source).await
+                    })
+                } else {
+                    rt.block_on(async {
+                        RustMistral::from_pretrained(&model_id_clean, candle_device).await
+                    })
+                }
                 .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
                 ModelType::Mistral(mistral)
-            } else if model_lower.contains("llama") || model_lower.contains("meta-llama") || model_lower.contains("tinyllama") {
-                let llama = rt.block_on(async {
-                    RustLlama::from_pretrained(&model_id, candle_device).await
-                })
+            } else if model_lower_clean.contains("llama") || model_lower_clean.contains("meta-llama") || model_lower_clean.contains("tinyllama") {
+                let llama = if tokenizer_source.is_some() {
+                    rt.block_on(async {
+                        RustLlama::from_pretrained_with_tokenizer(&model_id_clean, candle_device, tokenizer_source).await
+                    })
+                } else {
+                    rt.block_on(async {
+                        RustLlama::from_pretrained(&model_id_clean, candle_device).await
+                    })
+                }
                 .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
                 ModelType::Llama(llama)
-            } else if model_lower.contains("gemma") || model_lower.contains("google/gemma") {
-                let gemma = rt.block_on(async {
-                    RustGemma::from_pretrained(&model_id, candle_device).await
-                })
+            } else if model_lower_clean.contains("gemma") || model_lower_clean.contains("google/gemma") {
+                let gemma = if tokenizer_source.is_some() {
+                    rt.block_on(async {
+                        RustGemma::from_pretrained_with_tokenizer(&model_id_clean, candle_device, tokenizer_source).await
+                    })
+                } else {
+                    rt.block_on(async {
+                        RustGemma::from_pretrained(&model_id_clean, candle_device).await
+                    })
+                }
                 .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
                 ModelType::Gemma(gemma)
-            } else if model_lower.contains("qwen") {
-                let qwen = rt.block_on(async {
-                    RustQwen::from_pretrained(&model_id, candle_device).await
-                })
+            } else if model_lower_clean.contains("qwen") {
+                let qwen = if tokenizer_source.is_some() {
+                    rt.block_on(async {
+                        RustQwen::from_pretrained_with_tokenizer(&model_id_clean, candle_device, tokenizer_source).await
+                    })
+                } else {
+                    rt.block_on(async {
+                        RustQwen::from_pretrained(&model_id_clean, candle_device).await
+                    })
+                }
                 .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
                 ModelType::Qwen(qwen)
-            } else if model_lower.contains("phi") {
-                let phi = rt.block_on(async {
-                    RustPhi::from_pretrained(&model_id, candle_device).await
-                })
+            } else if model_lower_clean.contains("phi") {
+                let phi = if tokenizer_source.is_some() {
+                    rt.block_on(async {
+                        RustPhi::from_pretrained_with_tokenizer(&model_id_clean, candle_device, tokenizer_source).await
+                    })
+                } else {
+                    rt.block_on(async {
+                        RustPhi::from_pretrained(&model_id_clean, candle_device).await
+                    })
+                }
                 .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e)))?;
                 ModelType::Phi(phi)
             } else {
                 return Err(Error::new(
                     magnus::exception::runtime_error(),
-                    format!("Unsupported model type: {}. Currently Mistral, Llama, Gemma, Qwen, and Phi models are supported.", model_id),
+                    format!("Unsupported model type: {}. Currently Mistral, Llama, Gemma, Qwen, and Phi models are supported.", model_id_clean),
                 ));
             }
         };

data/lib/candle/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # :nocov:
 module Candle
-  VERSION = "1.2.3"
+  VERSION = "1.3.0"
 end
 # :nocov:

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: red-candle
 version: !ruby/object:Gem::Version
-  version: 1.2.3
+  version: 1.3.0
 platform: ruby
 authors:
 - Christopher Petersen
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-09-07 00:00:00.000000000 Z
+date: 2025-09-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rb_sys
@@ -151,7 +151,9 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '3.13'
-description: huggingface/candle for Ruby
+description: Ruby gem for running state-of-the-art language models locally. Access
+  LLMs, embeddings, rerankers, and NER models directly from Ruby using Rust-powered
+  Candle with Metal/CUDA acceleration.
 email:
 - chris@petersen.io
 - 2xijok@gmail.com
@@ -204,12 +206,6 @@ files:
 - ext/candle/src/structured/vocabulary_adapter_simple_test.rs
 - ext/candle/src/tokenizer/loader.rs
 - ext/candle/src/tokenizer/mod.rs
-- ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt
-- ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/common.rs
-- ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/dynamic.rs
-- ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/macros.rs
-- ext/candle/target/release/build/pulp-1b95cfe377eede97/out/x86_64_asm.rs
-- ext/candle/target/release/build/rb-sys-f8ac4edc30ab3e53/out/bindings-0.9.116-mri-arm64-darwin24-3.3.0.rs
 - ext/candle/tests/device_tests.rs
 - ext/candle/tests/tensor_tests.rs
 - lib/candle.rb
@@ -237,16 +233,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.2.0
+      version: 3.1.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.3.26
+      version: '3.3'
 requirements:
 - Rust >= 1.85
-rubygems_version: 3.5.3
+rubygems_version: 3.3.3
 signing_key:
 specification_version: 4
-summary: huggingface/candle for Ruby
+summary: Ruby gem for running state-of-the-art language models locally. Access LLMs,
+  embeddings, rerankers, and NER models directly from Ruby using Rust-powered Candle
+  with Metal/CUDA acceleration.
 test_files: []

data/ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- aarch64-apple-darwin