RubyGems - red-candle - Versions diffs - 1.1.2 → 1.2.0 - Mend

red-candle 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/README.md +39 -45
data/Rakefile +79 -88
data/ext/candle/src/lib.rs +2 -4
data/ext/candle/src/llm/quantized_gguf.rs +1 -1
data/ext/candle/src/ruby/device.rs +30 -0
data/ext/candle/src/ruby/embedding_model.rs +74 -28
data/ext/candle/src/ruby/llm.rs +96 -1
data/ext/candle/src/ruby/mod.rs +2 -0
data/ext/candle/src/{ner.rs → ruby/ner.rs} +47 -15
data/ext/candle/src/{reranker.rs → ruby/reranker.rs} +24 -2
data/ext/candle/src/ruby/tensor.rs +101 -26
data/ext/candle/src/ruby/tokenizer.rs +60 -3
data/lib/candle/device_utils.rb +3 -15
data/lib/candle/embedding_model.rb +44 -1
data/lib/candle/llm.rb +63 -1
data/lib/candle/ner.rb +34 -22
data/lib/candle/reranker.rb +20 -1
data/lib/candle/tensor.rb +15 -0
data/lib/candle/version.rb +1 -1
metadata +18 -4

data/ext/candle/src/ruby/embedding_model.rs CHANGED Viewed

@@ -13,7 +13,7 @@ use candle_transformers::models::{
     jina_bert::{BertModel as JinaBertModel, Config as JinaConfig},
     distilbert::{DistilBertModel, Config as DistilBertConfig}
 };
-use magnus::{class, function, method, prelude::*, Error, RModule};
+use magnus::{class, function, method, prelude::*, Error, RModule, RHash};
 use std::path::Path;
 use serde_json;
@@ -53,7 +53,7 @@ pub enum EmbeddingModelVariant {
 }
 impl EmbeddingModelVariant {
-    pub fn embedding_model_type(&self) -> EmbeddingModelType {
+    pub fn model_type(&self) -> EmbeddingModelType {
         match self {
             EmbeddingModelVariant::JinaBert(_) => EmbeddingModelType::JinaBert,
             EmbeddingModelVariant::StandardBert(_) => EmbeddingModelType::StandardBert,
@@ -66,31 +66,31 @@ impl EmbeddingModelVariant {
 pub struct EmbeddingModelInner {
     device: CoreDevice,
-    tokenizer_path: Option<String>,
-    model_path: Option<String>,
-    embedding_model_type: Option<EmbeddingModelType>,
+    tokenizer_id: Option<String>,
+    model_id: Option<String>,
+    model_type: Option<EmbeddingModelType>,
     model: Option<EmbeddingModelVariant>,
     tokenizer: Option<TokenizerWrapper>,
     embedding_size: Option<usize>,
 }
 impl EmbeddingModel {
-    pub fn new(model_path: Option<String>, tokenizer_path: Option<String>, device: Option<Device>, embedding_model_type: Option<String>, embedding_size: Option<usize>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
-        let embedding_model_type = embedding_model_type
+    pub fn new(model_id: Option<String>, tokenizer: Option<String>, device: Option<Device>, model_type: Option<String>, embedding_size: Option<usize>) -> Result<Self> {
+        let device = device.unwrap_or(Device::best()).as_device()?;
+        let model_type = model_type
             .and_then(|mt| EmbeddingModelType::from_string(&mt))
             .unwrap_or(EmbeddingModelType::JinaBert);
         Ok(EmbeddingModel(EmbeddingModelInner {
             device: device.clone(),
-            model_path: model_path.clone(),
-            tokenizer_path: tokenizer_path.clone(),
-            embedding_model_type: Some(embedding_model_type),
-            model: match model_path {
-                Some(mp) => Some(Self::build_embedding_model(Path::new(&mp), device, embedding_model_type, embedding_size)?),
+            model_id: model_id.clone(),
+            tokenizer_id: tokenizer.clone(),
+            model_type: Some(model_type),
+            model: match model_id.as_ref() {
+                Some(id) => Some(Self::build_embedding_model(id, device, model_type, embedding_size)?),
                 None => None
             },
-            tokenizer: match tokenizer_path {
-                Some(tp) => Some(Self::build_tokenizer(tp)?),
+            tokenizer: match tokenizer {
+                Some(tid) => Some(Self::build_tokenizer(tid)?),
                 None => None
             },
             embedding_size,
@@ -170,11 +170,11 @@ impl EmbeddingModel {
         }
     }
-    fn build_embedding_model(model_path: &Path, device: CoreDevice, embedding_model_type: EmbeddingModelType, embedding_size: Option<usize>) -> Result<EmbeddingModelVariant> {
+    fn build_embedding_model(model_id: &str, device: CoreDevice, model_type: EmbeddingModelType, embedding_size: Option<usize>) -> Result<EmbeddingModelVariant> {
         use hf_hub::{api::sync::Api, Repo, RepoType};
         let api = Api::new().map_err(wrap_hf_err)?;
-        let repo = Repo::new(model_path.to_str().unwrap().to_string(), RepoType::Model);
-        match embedding_model_type {
+        let repo = Repo::new(model_id.to_string(), RepoType::Model);
+        match model_type {
             EmbeddingModelType::JinaBert => {
                 let model_path = api.repo(repo).get("model.safetensors").map_err(wrap_hf_err)?;
                 if !std::path::Path::new(&model_path).exists() {
@@ -257,12 +257,12 @@ impl EmbeddingModel {
         }
     }
-    fn build_tokenizer(tokenizer_path: String) -> Result<TokenizerWrapper> {
+    fn build_tokenizer(tokenizer_id: String) -> Result<TokenizerWrapper> {
         use hf_hub::{api::sync::Api, Repo, RepoType};
         let tokenizer_path = Api::new()
                 .map_err(wrap_hf_err)?
                 .repo(Repo::new(
-                    tokenizer_path,
+                    tokenizer_id,
                     RepoType::Model,
                 ))
                 .get("tokenizer.json")
@@ -365,19 +365,19 @@ impl EmbeddingModel {
         v.broadcast_div(&v.sqr()?.sum_keepdim(1)?.sqrt()?)
     }
-    pub fn embedding_model_type(&self) -> String {
-        match self.0.embedding_model_type {
-            Some(model_type) => format!("{:?}", model_type),
+    pub fn model_type(&self) -> String {
+        match self.0.model_type {
+            Some(mt) => format!("{:?}", mt),
             None => "nil".to_string(),
         }
     }
     pub fn __repr__(&self) -> String {
         format!(
-            "#<Candle::EmbeddingModel embedding_model_type: {}, model_path: {}, tokenizer_path: {}, embedding_size: {}>",
-            self.embedding_model_type(),
-            self.0.model_path.as_deref().unwrap_or("nil"),
-            self.0.tokenizer_path.as_deref().unwrap_or("nil"),
+            "#<Candle::EmbeddingModel model_type: {}, model_id: {}, tokenizer: {}, embedding_size: {}>",
+            self.model_type(),
+            self.0.model_id.as_deref().unwrap_or("nil"),
+            self.0.tokenizer_id.as_deref().unwrap_or("nil"),
             self.0.embedding_size.map(|x| x.to_string()).unwrap_or("nil".to_string())
         )
     }
@@ -393,6 +393,49 @@ impl EmbeddingModel {
             None => Err(magnus::Error::new(magnus::exception::runtime_error(), "No tokenizer loaded for this model"))
         }
     }
+    /// Get the model_id
+    pub fn model_id(&self) -> Result<String> {
+        match &self.0.model_id {
+            Some(id) => Ok(id.clone()),
+            None => Ok("unknown".to_string())
+        }
+    }
+    /// Get the device
+    pub fn device(&self) -> Device {
+        Device::from_device(&self.0.device)
+    }
+    /// Get all options as a hash
+    pub fn options(&self) -> Result<RHash> {
+        let hash = RHash::new();
+        // Add model_id
+        if let Some(model_id) = &self.0.model_id {
+            hash.aset("model_id", model_id.clone())?;
+        }
+        // Add tokenizer
+        if let Some(tokenizer_id) = &self.0.tokenizer_id {
+            hash.aset("tokenizer", tokenizer_id.clone())?;
+        }
+        // Add device
+        hash.aset("device", self.device().__str__())?;
+        // Add model_type
+        if let Some(model_type) = &self.0.model_type {
+            hash.aset("model_type", format!("{:?}", model_type))?;
+        }
+        // Add embedding_size
+        if let Some(size) = self.0.embedding_size {
+            hash.aset("embedding_size", size)?;
+        }
+        Ok(hash)
+    }
 }
 pub fn init(rb_candle: RModule) -> Result<()> {
@@ -404,9 +447,12 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     rb_embedding_model.define_method("pool_embedding", method!(EmbeddingModel::pool_embedding, 1))?;
     rb_embedding_model.define_method("pool_and_normalize_embedding", method!(EmbeddingModel::pool_and_normalize_embedding, 1))?;
     rb_embedding_model.define_method("pool_cls_embedding", method!(EmbeddingModel::pool_cls_embedding, 1))?;
-    rb_embedding_model.define_method("embedding_model_type", method!(EmbeddingModel::embedding_model_type, 0))?;
+    rb_embedding_model.define_method("model_type", method!(EmbeddingModel::model_type, 0))?;
     rb_embedding_model.define_method("to_s", method!(EmbeddingModel::__str__, 0))?;
     rb_embedding_model.define_method("inspect", method!(EmbeddingModel::__repr__, 0))?;
     rb_embedding_model.define_method("tokenizer", method!(EmbeddingModel::tokenizer, 0))?;
+    rb_embedding_model.define_method("model_id", method!(EmbeddingModel::model_id, 0))?;
+    rb_embedding_model.define_method("device", method!(EmbeddingModel::device, 0))?;
+    rb_embedding_model.define_method("options", method!(EmbeddingModel::options, 0))?;
     Ok(())
 }

data/ext/candle/src/ruby/llm.rs CHANGED Viewed

@@ -201,6 +201,37 @@ impl GenerationConfig {
             index: Arc::clone(c),
         })
     }
+    /// Get all options as a hash
+    pub fn options(&self) -> Result<RHash> {
+        let hash = RHash::new();
+        hash.aset("max_length", self.inner.max_length)?;
+        hash.aset("temperature", self.inner.temperature)?;
+        if let Some(top_p) = self.inner.top_p {
+            hash.aset("top_p", top_p)?;
+        }
+        if let Some(top_k) = self.inner.top_k {
+            hash.aset("top_k", top_k)?;
+        }
+        hash.aset("repetition_penalty", self.inner.repetition_penalty)?;
+        hash.aset("repetition_penalty_last_n", self.inner.repetition_penalty_last_n)?;
+        hash.aset("seed", self.inner.seed)?;
+        hash.aset("stop_sequences", self.inner.stop_sequences.clone())?;
+        hash.aset("include_prompt", self.inner.include_prompt)?;
+        hash.aset("debug_tokens", self.inner.debug_tokens)?;
+        hash.aset("stop_on_constraint_satisfaction", self.inner.stop_on_constraint_satisfaction)?;
+        hash.aset("stop_on_match", self.inner.stop_on_match)?;
+        if self.inner.constraint.is_some() {
+            hash.aset("has_constraint", true)?;
+        }
+        Ok(hash)
+    }
 }
 #[derive(Clone)]
@@ -214,7 +245,7 @@ pub struct LLM {
 impl LLM {
     /// Create a new LLM from a pretrained model
     pub fn from_pretrained(model_id: String, device: Option<Device>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu);
+        let device = device.unwrap_or(Device::best());
         let candle_device = device.as_device()?;
         // For now, we'll use tokio runtime directly
@@ -448,6 +479,67 @@ impl LLM {
         model_ref.apply_chat_template(&json_messages)
             .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to apply chat template: {}", e)))
     }
+    /// Get the model ID
+    pub fn model_id(&self) -> String {
+        self.model_id.clone()
+    }
+    /// Get model options
+    pub fn options(&self) -> Result<RHash> {
+        let hash = RHash::new();
+        // Basic metadata
+        hash.aset("model_id", self.model_id.clone())?;
+        let device_str = match self.device {
+            Device::Cpu => "cpu",
+            Device::Cuda => "cuda",
+            Device::Metal => "metal",
+        };
+        hash.aset("device", device_str)?;
+        // Parse model_id to extract GGUF file if present
+        if let Some(at_pos) = self.model_id.find('@') {
+            let (base_model, gguf_part) = self.model_id.split_at(at_pos);
+            let gguf_part = &gguf_part[1..]; // Skip the @ character
+            // Check for tokenizer (@@)
+            if let Some(tokenizer_pos) = gguf_part.find("@@") {
+                let (gguf_file, tokenizer) = gguf_part.split_at(tokenizer_pos);
+                hash.aset("base_model", base_model)?;
+                hash.aset("gguf_file", gguf_file)?;
+                hash.aset("tokenizer_source", &tokenizer[2..])?;
+            } else {
+                hash.aset("base_model", base_model)?;
+                hash.aset("gguf_file", gguf_part)?;
+            }
+        }
+        // Add model type
+        let model = match self.model.lock() {
+            Ok(guard) => guard,
+            Err(poisoned) => poisoned.into_inner(),
+        };
+        let model_ref = model.borrow();
+        let model_type = match &*model_ref {
+            ModelType::Mistral(_) => "Mistral",
+            ModelType::Llama(_) => "Llama",
+            ModelType::Gemma(_) => "Gemma",
+            ModelType::Qwen(_) => "Qwen",
+            ModelType::Phi(_) => "Phi",
+            ModelType::QuantizedGGUF(_) => "QuantizedGGUF",
+        };
+        hash.aset("model_type", model_type)?;
+        // For GGUF models, add architecture info
+        if let ModelType::QuantizedGGUF(gguf) = &*model_ref {
+            hash.aset("architecture", gguf.architecture.clone())?;
+            hash.aset("eos_token_id", gguf.eos_token_id())?;
+        }
+        Ok(hash)
+    }
 }
 // Define a standalone function for from_pretrained that handles variable arguments
@@ -486,6 +578,7 @@ pub fn init_llm(rb_candle: RModule) -> Result<()> {
     rb_generation_config.define_method("stop_on_constraint_satisfaction", method!(GenerationConfig::stop_on_constraint_satisfaction, 0))?;
     rb_generation_config.define_method("stop_on_match", method!(GenerationConfig::stop_on_match, 0))?;
     rb_generation_config.define_method("constraint", method!(GenerationConfig::constraint, 0))?;
+    rb_generation_config.define_method("options", method!(GenerationConfig::options, 0))?;
     let rb_llm = rb_candle.define_class("LLM", magnus::class::object())?;
     rb_llm.define_singleton_method("_from_pretrained", function!(from_pretrained_wrapper, -1))?;
@@ -497,6 +590,8 @@ pub fn init_llm(rb_candle: RModule) -> Result<()> {
     rb_llm.define_method("eos_token", method!(LLM::eos_token, 0))?;
     rb_llm.define_method("clear_cache", method!(LLM::clear_cache, 0))?;
     rb_llm.define_method("apply_chat_template", method!(LLM::apply_chat_template, 1))?;
+    rb_llm.define_method("model_id", method!(LLM::model_id, 0))?;
+    rb_llm.define_method("options", method!(LLM::options, 0))?;
     Ok(())
 }

data/ext/candle/src/ruby/mod.rs CHANGED Viewed

@@ -8,6 +8,8 @@ pub mod utils;
 pub mod llm;
 pub mod tokenizer;
 pub mod structured;
+pub mod reranker;
+pub mod ner;
 pub use embedding_model::{EmbeddingModel, EmbeddingModelInner};
 pub use tensor::Tensor;

data/ext/candle/src/{ner.rs → ruby/ner.rs} RENAMED Viewed

@@ -3,7 +3,7 @@ use candle_transformers::models::bert::{BertModel, Config};
 use candle_core::{Device as CoreDevice, Tensor, DType, Module as CanModule};
 use candle_nn::{VarBuilder, Linear};
 use hf_hub::{api::sync::Api, Repo, RepoType};
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use serde::{Deserialize, Serialize};
 use crate::ruby::{Device, Result};
 use crate::tokenizer::{TokenizerWrapper, loader::TokenizerLoader};
@@ -36,8 +36,8 @@ pub struct NER {
 }
 impl NER {
-    pub fn new(model_id: String, device: Option<Device>, tokenizer_id: Option<String>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
+    pub fn new(model_id: String, device: Option<Device>, tokenizer: Option<String>) -> Result<Self> {
+        let device = device.unwrap_or(Device::best()).as_device()?;
         let result = (|| -> std::result::Result<(BertModel, TokenizerWrapper, Linear, NERConfig), Box<dyn std::error::Error + Send + Sync>> {
             let api = Api::new()?;
@@ -46,18 +46,18 @@ impl NER {
             // Download model files
             let config_filename = repo.get("config.json")?;
-            // Handle tokenizer loading with optional tokenizer_id
-            let tokenizer = if let Some(tok_id) = tokenizer_id {
+            // Handle tokenizer loading with optional tokenizer
+            let tokenizer_wrapper = if let Some(tok_id) = tokenizer {
                 // Use the specified tokenizer
                 let tok_repo = api.repo(Repo::new(tok_id, RepoType::Model));
                 let tokenizer_filename = tok_repo.get("tokenizer.json")?;
                 let tokenizer = tokenizers::Tokenizer::from_file(tokenizer_filename)?;
-                TokenizerLoader::with_padding(tokenizer, None)
+                TokenizerWrapper::new(TokenizerLoader::with_padding(tokenizer, None))
             } else {
                 // Try to load tokenizer from model repo
                 let tokenizer_filename = repo.get("tokenizer.json")?;
                 let tokenizer = tokenizers::Tokenizer::from_file(tokenizer_filename)?;
-                TokenizerLoader::with_padding(tokenizer, None)
+                TokenizerWrapper::new(TokenizerLoader::with_padding(tokenizer, None))
             };
             let weights_filename = repo.get("pytorch_model.safetensors")
                 .or_else(|_| repo.get("model.safetensors"))?;
@@ -101,7 +101,7 @@ impl NER {
                 vb.pp("classifier")
             )?;
-            Ok((model, TokenizerWrapper::new(tokenizer), classifier, ner_config))
+            Ok((model, tokenizer_wrapper, classifier, ner_config))
         })();
         match result {
@@ -185,13 +185,13 @@ impl NER {
         let result = RArray::new();
         for entity in entities {
             let hash = RHash::new();
-            hash.aset("text", entity.text)?;
-            hash.aset("label", entity.label)?;
-            hash.aset("start", entity.start)?;
-            hash.aset("end", entity.end)?;
-            hash.aset("confidence", entity.confidence)?;
-            hash.aset("token_start", entity.token_start)?;
-            hash.aset("token_end", entity.token_end)?;
+            hash.aset(magnus::Symbol::new("text"), entity.text)?;
+            hash.aset(magnus::Symbol::new("label"), entity.label)?;
+            hash.aset(magnus::Symbol::new("start"), entity.start)?;
+            hash.aset(magnus::Symbol::new("end"), entity.end)?;
+            hash.aset(magnus::Symbol::new("confidence"), entity.confidence)?;
+            hash.aset(magnus::Symbol::new("token_start"), entity.token_start)?;
+            hash.aset(magnus::Symbol::new("token_end"), entity.token_end)?;
             result.push(hash)?;
         }
@@ -382,6 +382,35 @@ impl NER {
     pub fn model_info(&self) -> String {
         format!("NER model: {}, labels: {}", self.model_id, self.config.id2label.len())
     }
+    /// Get the model_id
+    pub fn model_id(&self) -> String {
+        self.model_id.clone()
+    }
+    /// Get the device
+    pub fn device(&self) -> Device {
+        Device::from_device(&self.device)
+    }
+    /// Get all options as a hash
+    pub fn options(&self) -> Result<RHash> {
+        let hash = RHash::new();
+        hash.aset("model_id", self.model_id.clone())?;
+        hash.aset("device", self.device().__str__())?;
+        hash.aset("num_labels", self.config.id2label.len())?;
+        // Add entity types as a list
+        let entity_types: Vec<String> = self.config.label2id.keys()
+            .filter(|l| *l != "O")
+            .map(|l| l.trim_start_matches("B-").trim_start_matches("I-").to_string())
+            .collect::<HashSet<_>>()
+            .into_iter()
+            .collect();
+        hash.aset("entity_types", entity_types)?;
+        Ok(hash)
+    }
 }
 pub fn init(rb_candle: RModule) -> Result<()> {
@@ -392,6 +421,9 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     ner_class.define_method("labels", method!(NER::labels, 0))?;
     ner_class.define_method("tokenizer", method!(NER::tokenizer, 0))?;
     ner_class.define_method("model_info", method!(NER::model_info, 0))?;
+    ner_class.define_method("model_id", method!(NER::model_id, 0))?;
+    ner_class.define_method("device", method!(NER::device, 0))?;
+    ner_class.define_method("options", method!(NER::options, 0))?;
     Ok(())
 }

data/ext/candle/src/{reranker.rs → ruby/reranker.rs} RENAMED Viewed

@@ -14,11 +14,12 @@ pub struct Reranker {
     pooler: Linear,
     classifier: Linear,
     device: CoreDevice,
+    model_id: String,
 }
 impl Reranker {
     pub fn new(model_id: String, device: Option<Device>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
+        let device = device.unwrap_or(Device::best()).as_device()?;
         Self::new_with_core_device(model_id, device)
     }
@@ -59,7 +60,7 @@ impl Reranker {
         match result {
             Ok((model, tokenizer, pooler, classifier)) => {
-                Ok(Self { model, tokenizer, pooler, classifier, device })
+                Ok(Self { model, tokenizer, pooler, classifier, device, model_id })
             }
             Err(e) => Err(Error::new(magnus::exception::runtime_error(), format!("Failed to load model: {}", e))),
         }
@@ -231,6 +232,24 @@ impl Reranker {
     pub fn tokenizer(&self) -> std::result::Result<crate::ruby::tokenizer::Tokenizer, Error> {
         Ok(crate::ruby::tokenizer::Tokenizer(self.tokenizer.clone()))
     }
+    /// Get the model_id
+    pub fn model_id(&self) -> String {
+        self.model_id.clone()
+    }
+    /// Get the device
+    pub fn device(&self) -> Device {
+        Device::from_device(&self.device)
+    }
+    /// Get all options as a hash
+    pub fn options(&self) -> std::result::Result<magnus::RHash, Error> {
+        let hash = magnus::RHash::new();
+        hash.aset("model_id", self.model_id.clone())?;
+        hash.aset("device", self.device().__str__())?;
+        Ok(hash)
+    }
 }
 pub fn init(rb_candle: RModule) -> std::result::Result<(), Error> {
@@ -239,5 +258,8 @@ pub fn init(rb_candle: RModule) -> std::result::Result<(), Error> {
     c_reranker.define_method("rerank_with_options", method!(Reranker::rerank_with_options, 4))?;
     c_reranker.define_method("debug_tokenization", method!(Reranker::debug_tokenization, 2))?;
     c_reranker.define_method("tokenizer", method!(Reranker::tokenizer, 0))?;
+    c_reranker.define_method("model_id", method!(Reranker::model_id, 0))?;
+    c_reranker.define_method("device", method!(Reranker::device, 0))?;
+    c_reranker.define_method("options", method!(Reranker::options, 0))?;
     Ok(())
 }