RubyGems - red-candle - Versions diffs - 1.1.1 → 1.2.0 - Mend

red-candle 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/README.md +102 -45
data/Rakefile +108 -77
data/ext/candle/src/lib.rs +2 -4
data/ext/candle/src/llm/quantized_gguf.rs +18 -2
data/ext/candle/src/ruby/device.rs +32 -1
data/ext/candle/src/ruby/dtype.rs +1 -0
data/ext/candle/src/ruby/embedding_model.rs +74 -28
data/ext/candle/src/ruby/errors.rs +1 -0
data/ext/candle/src/ruby/llm.rs +96 -1
data/ext/candle/src/ruby/mod.rs +2 -0
data/ext/candle/src/{ner.rs → ruby/ner.rs} +47 -15
data/ext/candle/src/{reranker.rs → ruby/reranker.rs} +24 -2
data/ext/candle/src/ruby/tensor.rs +103 -27
data/ext/candle/src/ruby/tokenizer.rs +60 -3
data/ext/candle/src/tokenizer/mod.rs +2 -1
data/ext/candle/tests/device_tests.rs +43 -0
data/ext/candle/tests/tensor_tests.rs +162 -0
data/lib/candle/device_utils.rb +3 -15
data/lib/candle/embedding_model.rb +44 -1
data/lib/candle/llm.rb +63 -1
data/lib/candle/ner.rb +34 -22
data/lib/candle/reranker.rb +20 -1
data/lib/candle/tensor.rb +15 -0
data/lib/candle/version.rb +1 -1
metadata +20 -4

data/ext/candle/src/ruby/tensor.rs CHANGED Viewed

@@ -6,7 +6,7 @@ use crate::ruby::{
     utils::{actual_dim, actual_index},
 };
 use crate::ruby::{DType, Device, Result};
-use ::candle_core::{DType as CoreDType, Tensor as CoreTensor};
+use ::candle_core::{DType as CoreDType, Tensor as CoreTensor, Device as CoreDevice, DeviceLocation};
 #[derive(Clone, Debug)]
 #[magnus::wrap(class = "Candle::Tensor", free_immediately, size)]
@@ -21,30 +21,108 @@ impl std::ops::Deref for Tensor {
     }
 }
+// Helper functions for tensor operations
+impl Tensor {
+    /// Check if device is Metal
+    fn is_metal_device(device: &CoreDevice) -> bool {
+        matches!(device.location(), DeviceLocation::Metal { .. })
+    }
+    /// Convert tensor to target dtype, handling Metal limitations
+    fn safe_to_dtype(&self, target_dtype: CoreDType) -> Result<CoreTensor> {
+        if Self::is_metal_device(self.0.device()) && self.0.dtype() != target_dtype {
+            // Move to CPU first to avoid Metal conversion limitations
+            self.0
+                .to_device(&CoreDevice::Cpu)
+                .map_err(wrap_candle_err)?
+                .to_dtype(target_dtype)
+                .map_err(wrap_candle_err)
+        } else {
+            // Direct conversion for CPU or when dtype matches
+            self.0
+                .to_dtype(target_dtype)
+                .map_err(wrap_candle_err)
+        }
+    }
+}
 impl Tensor {
     pub fn new(array: magnus::RArray, dtype: Option<magnus::Symbol>, device: Option<Device>) -> Result<Self> {
         let dtype = dtype
             .map(|dtype| DType::from_rbobject(dtype))
             .unwrap_or(Ok(DType(CoreDType::F32)))?;
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
-        // FIXME: Do not use `to_f64` here.
-        let array = array
-            .into_iter()
-            .map(|v| magnus::Float::try_convert(v).map(|v| v.to_f64()))
-            .collect::<Result<Vec<_>>>()?;
-        Ok(Self(
-            CoreTensor::new(array.as_slice(), &device)
-                .map_err(wrap_candle_err)?
-                .to_dtype(dtype.0)
-                .map_err(wrap_candle_err)?,
-        ))
+        let device = device.unwrap_or(Device::best()).as_device()?;
+        // Create tensor based on target dtype to avoid conversion issues on Metal
+        let tensor = match dtype.0 {
+            CoreDType::F32 => {
+                // Convert to f32 directly to avoid F64->F32 conversion on Metal
+                let array: Vec<f32> = array
+                    .into_iter()
+                    .map(|v| magnus::Float::try_convert(v).map(|v| v.to_f64() as f32))
+                    .collect::<Result<Vec<_>>>()?;
+                let len = array.len();
+                CoreTensor::from_vec(array, len, &device).map_err(wrap_candle_err)?
+            }
+            CoreDType::F64 => {
+                let array: Vec<f64> = array
+                    .into_iter()
+                    .map(|v| magnus::Float::try_convert(v).map(|v| v.to_f64()))
+                    .collect::<Result<Vec<_>>>()?;
+                let len = array.len();
+                CoreTensor::from_vec(array, len, &device).map_err(wrap_candle_err)?
+            }
+            CoreDType::I64 => {
+                // Convert to i64 directly to avoid conversion issues on Metal
+                let array: Vec<i64> = array
+                    .into_iter()
+                    .map(|v| {
+                        // Try integer first, then float
+                        if let Ok(i) = <i64>::try_convert(v) {
+                            Ok(i)
+                        } else if let Ok(f) = magnus::Float::try_convert(v) {
+                            Ok(f.to_f64() as i64)
+                        } else {
+                            Err(magnus::Error::new(
+                                magnus::exception::type_error(),
+                                "Cannot convert to i64"
+                            ))
+                        }
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                let len = array.len();
+                CoreTensor::from_vec(array, len, &device).map_err(wrap_candle_err)?
+            }
+            _ => {
+                // For other dtypes, create on CPU first if on Metal, then convert
+                let cpu_device = CoreDevice::Cpu;
+                let use_cpu = Self::is_metal_device(&device);
+                let target_device = if use_cpu { &cpu_device } else { &device };
+                let array: Vec<f64> = array
+                    .into_iter()
+                    .map(|v| magnus::Float::try_convert(v).map(|v| v.to_f64()))
+                    .collect::<Result<Vec<_>>>()?;
+                let tensor = CoreTensor::new(array.as_slice(), target_device)
+                    .map_err(wrap_candle_err)?
+                    .to_dtype(dtype.0)
+                    .map_err(wrap_candle_err)?;
+                // Move to target device if needed
+                if use_cpu {
+                    tensor.to_device(&device).map_err(wrap_candle_err)?
+                } else {
+                    tensor
+                }
+            }
+        };
+        Ok(Self(tensor))
     }
     pub fn values(&self) -> Result<Vec<f64>> {
-        let values = self
-            .0
-            .to_dtype(CoreDType::F64)
-            .map_err(wrap_candle_err)?
+        let tensor = self.safe_to_dtype(CoreDType::F64)?;
+        let values = tensor
             .flatten_all()
             .map_err(wrap_candle_err)?
             .to_vec1()
@@ -92,11 +170,8 @@ impl Tensor {
             }
             _ => {
                 // For other dtypes, convert to F64 first
-                let val: f64 = self.0
-                    .to_dtype(CoreDType::F64)
-                    .map_err(wrap_candle_err)?
-                    .to_vec0()
-                    .map_err(wrap_candle_err)?;
+                let tensor = self.safe_to_dtype(CoreDType::F64)?;
+                let val: f64 = tensor.to_vec0().map_err(wrap_candle_err)?;
                 Ok(val)
             }
         }
@@ -541,7 +616,7 @@ impl Tensor {
     /// Creates a new tensor with random values.
     /// &RETURNS&: Tensor
     pub fn rand(shape: Vec<usize>, device: Option<Device>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
+        let device = device.unwrap_or(Device::best()).as_device()?;
         Ok(Self(
             CoreTensor::rand(0f32, 1f32, shape, &device).map_err(wrap_candle_err)?,
         ))
@@ -550,7 +625,7 @@ impl Tensor {
     /// Creates a new tensor with random values from a normal distribution.
     /// &RETURNS&: Tensor
     pub fn randn(shape: Vec<usize>, device: Option<Device>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
+        let device = device.unwrap_or(Device::best()).as_device()?;
         Ok(Self(
             CoreTensor::randn(0f32, 1f32, shape, &device).map_err(wrap_candle_err)?,
         ))
@@ -559,7 +634,7 @@ impl Tensor {
     /// Creates a new tensor filled with ones.
     /// &RETURNS&: Tensor
     pub fn ones(shape: Vec<usize>, device: Option<Device>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
+        let device = device.unwrap_or(Device::best()).as_device()?;
         Ok(Self(
             CoreTensor::ones(shape, CoreDType::F32, &device).map_err(wrap_candle_err)?,
         ))
@@ -567,7 +642,7 @@ impl Tensor {
     /// Creates a new tensor filled with zeros.
     /// &RETURNS&: Tensor
     pub fn zeros(shape: Vec<usize>, device: Option<Device>) -> Result<Self> {
-        let device = device.unwrap_or(Device::Cpu).as_device()?;
+        let device = device.unwrap_or(Device::best()).as_device()?;
         Ok(Self(
             CoreTensor::zeros(shape, CoreDType::F32, &device).map_err(wrap_candle_err)?,
         ))
@@ -651,4 +726,5 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     rb_tensor.define_method("to_s", method!(Tensor::__str__, 0))?;
     rb_tensor.define_method("inspect", method!(Tensor::__repr__, 0))?;
     Ok(())
-}
+}

data/ext/candle/src/ruby/tokenizer.rs CHANGED Viewed

@@ -105,8 +105,8 @@ impl Tokenizer {
         }
         let hash = RHash::new();
-        hash.aset("ids", RArray::from_vec(token_ids.into_iter().map(|id| id as i64).collect()))?;
-        hash.aset("tokens", RArray::from_vec(tokens))?;
+        hash.aset(magnus::Symbol::new("ids"), RArray::from_vec(token_ids.into_iter().map(|id| id as i64).collect()))?;
+        hash.aset(magnus::Symbol::new("tokens"), RArray::from_vec(tokens))?;
         Ok(hash)
     }
@@ -236,9 +236,65 @@ impl Tokenizer {
         Ok(hash)
     }
+    /// Get tokenizer options as a hash
+    pub fn options(&self) -> Result<RHash> {
+        let hash = RHash::new();
+        // Get vocab size
+        hash.aset("vocab_size", self.vocab_size(Some(true)))?;
+        hash.aset("vocab_size_base", self.vocab_size(Some(false)))?;
+        // Get special tokens info
+        let special_tokens = self.get_special_tokens()?;
+        hash.aset("special_tokens", special_tokens)?;
+        // Get padding/truncation info if available
+        let inner_tokenizer = self.0.inner();
+        // Check if padding is enabled
+        if let Some(_padding) = inner_tokenizer.get_padding() {
+            let padding_info = RHash::new();
+            padding_info.aset("enabled", true)?;
+            // Note: We can't easily extract all padding params from the tokenizers library
+            // but we can indicate it's enabled
+            hash.aset("padding", padding_info)?;
+        }
+        // Check if truncation is enabled
+        if let Some(truncation) = inner_tokenizer.get_truncation() {
+            let truncation_info = RHash::new();
+            truncation_info.aset("enabled", true)?;
+            truncation_info.aset("max_length", truncation.max_length)?;
+            hash.aset("truncation", truncation_info)?;
+        }
+        Ok(hash)
+    }
     /// String representation
     pub fn inspect(&self) -> String {
-        format!("#<Candle::Tokenizer vocab_size={}>", self.vocab_size(Some(true)))
+        let vocab_size = self.vocab_size(Some(true));
+        let special_tokens = self.get_special_tokens()
+            .ok()
+            .map(|h| h.len())
+            .unwrap_or(0);
+        let mut parts = vec![format!("#<Candle::Tokenizer vocab_size={}", vocab_size)];
+        if special_tokens > 0 {
+            parts.push(format!("special_tokens={}", special_tokens));
+        }
+        // Check for padding/truncation
+        let inner_tokenizer = self.0.inner();
+        if inner_tokenizer.get_padding().is_some() {
+            parts.push("padding=enabled".to_string());
+        }
+        if let Some(truncation) = inner_tokenizer.get_truncation() {
+            parts.push(format!("truncation={}", truncation.max_length));
+        }
+        parts.join(" ") + ">"
     }
 }
@@ -262,6 +318,7 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     tokenizer_class.define_method("with_padding", method!(Tokenizer::with_padding, 1))?;
     tokenizer_class.define_method("with_truncation", method!(Tokenizer::with_truncation, 1))?;
     tokenizer_class.define_method("get_special_tokens", method!(Tokenizer::get_special_tokens, 0))?;
+    tokenizer_class.define_method("options", method!(Tokenizer::options, 0))?;
     tokenizer_class.define_method("inspect", method!(Tokenizer::inspect, 0))?;
     tokenizer_class.define_method("to_s", method!(Tokenizer::inspect, 0))?;

data/ext/candle/src/tokenizer/mod.rs CHANGED Viewed

@@ -100,4 +100,5 @@ impl TokenizerWrapper {
     pub fn inner_mut(&mut self) -> &mut Tokenizer {
         &mut self.tokenizer
     }
-}
+}

data/ext/candle/tests/device_tests.rs ADDED Viewed

@@ -0,0 +1,43 @@
+use candle_core::Device as CoreDevice;
+#[test]
+fn test_device_creation() {
+    // CPU device should always work
+    let cpu = CoreDevice::Cpu;
+    assert!(matches!(cpu, CoreDevice::Cpu));
+    // Test device display
+    assert_eq!(format!("{:?}", cpu), "Cpu");
+}
+#[cfg(feature = "cuda")]
+#[test]
+#[ignore = "requires CUDA hardware"]
+fn test_cuda_device_creation() {
+    // This might fail if no CUDA device is available
+    match CoreDevice::new_cuda(0) {
+        Ok(device) => assert!(matches!(device, CoreDevice::Cuda(_))),
+        Err(_) => println!("No CUDA device available for testing"),
+    }
+}
+#[cfg(feature = "metal")]
+#[test]
+#[ignore = "requires Metal hardware"]
+fn test_metal_device_creation() {
+    // This might fail if no Metal device is available
+    match CoreDevice::new_metal(0) {
+        Ok(device) => assert!(matches!(device, CoreDevice::Metal(_))),
+        Err(_) => println!("No Metal device available for testing"),
+    }
+}
+#[test]
+fn test_device_matching() {
+    let cpu1 = CoreDevice::Cpu;
+    let cpu2 = CoreDevice::Cpu;
+    // Same device types should match
+    assert!(matches!(cpu1, CoreDevice::Cpu));
+    assert!(matches!(cpu2, CoreDevice::Cpu));
+}

data/ext/candle/tests/tensor_tests.rs ADDED Viewed

@@ -0,0 +1,162 @@
+use candle_core::{Tensor, Device, DType};
+#[test]
+fn test_tensor_creation() {
+    let device = Device::Cpu;
+    // Test tensor creation from slice
+    let data = vec![1.0f32, 2.0, 3.0, 4.0];
+    let tensor = Tensor::new(&data[..], &device).unwrap();
+    assert_eq!(tensor.dims(), &[4]);
+    assert_eq!(tensor.dtype(), DType::F32);
+    // Test zeros
+    let zeros = Tensor::zeros(&[2, 3], DType::F32, &device).unwrap();
+    assert_eq!(zeros.dims(), &[2, 3]);
+    // Test ones
+    let ones = Tensor::ones(&[3, 2], DType::F32, &device).unwrap();
+    assert_eq!(ones.dims(), &[3, 2]);
+}
+#[test]
+fn test_tensor_arithmetic() {
+    let device = Device::Cpu;
+    let a = Tensor::new(&[1.0f32, 2.0, 3.0], &device).unwrap();
+    let b = Tensor::new(&[4.0f32, 5.0, 6.0], &device).unwrap();
+    // Addition
+    let sum = a.add(&b).unwrap();
+    let sum_vec: Vec<f32> = sum.to_vec1().unwrap();
+    assert_eq!(sum_vec, vec![5.0, 7.0, 9.0]);
+    // Subtraction
+    let diff = a.sub(&b).unwrap();
+    let diff_vec: Vec<f32> = diff.to_vec1().unwrap();
+    assert_eq!(diff_vec, vec![-3.0, -3.0, -3.0]);
+    // Multiplication
+    let prod = a.mul(&b).unwrap();
+    let prod_vec: Vec<f32> = prod.to_vec1().unwrap();
+    assert_eq!(prod_vec, vec![4.0, 10.0, 18.0]);
+}
+#[test]
+fn test_tensor_reshape() {
+    let device = Device::Cpu;
+    let tensor = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &device).unwrap();
+    // Reshape to 2x3
+    let reshaped = tensor.reshape(&[2, 3]).unwrap();
+    assert_eq!(reshaped.dims(), &[2, 3]);
+    // Reshape to 3x2
+    let reshaped = tensor.reshape(&[3, 2]).unwrap();
+    assert_eq!(reshaped.dims(), &[3, 2]);
+}
+#[test]
+fn test_tensor_transpose() {
+    let device = Device::Cpu;
+    let tensor = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0], &device)
+        .unwrap()
+        .reshape(&[2, 2])
+        .unwrap();
+    let transposed = tensor.transpose(0, 1).unwrap();
+    assert_eq!(transposed.dims(), &[2, 2]);
+    let values: Vec<f32> = transposed.flatten_all().unwrap().to_vec1().unwrap();
+    assert_eq!(values, vec![1.0, 3.0, 2.0, 4.0]);
+}
+#[test]
+fn test_tensor_reduction() {
+    let device = Device::Cpu;
+    let tensor = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0], &device).unwrap();
+    // Sum
+    let sum = tensor.sum_all().unwrap();
+    let sum_val: f32 = sum.to_scalar().unwrap();
+    assert_eq!(sum_val, 10.0);
+    // Mean
+    let mean = tensor.mean_all().unwrap();
+    let mean_val: f32 = mean.to_scalar().unwrap();
+    assert_eq!(mean_val, 2.5);
+}
+#[test]
+fn test_tensor_indexing() {
+    let device = Device::Cpu;
+    let tensor = Tensor::new(&[10.0f32, 20.0, 30.0, 40.0], &device).unwrap();
+    // Get element at index 0
+    let elem = tensor.get(0).unwrap();
+    let val: f32 = elem.to_scalar().unwrap();
+    assert_eq!(val, 10.0);
+    // Get element at index 2
+    let elem = tensor.get(2).unwrap();
+    let val: f32 = elem.to_scalar().unwrap();
+    assert_eq!(val, 30.0);
+}
+#[test]
+fn test_tensor_matmul() {
+    let device = Device::Cpu;
+    // 2x3 matrix
+    let a = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &device)
+        .unwrap()
+        .reshape(&[2, 3])
+        .unwrap();
+    // 3x2 matrix
+    let b = Tensor::new(&[7.0f32, 8.0, 9.0, 10.0, 11.0, 12.0], &device)
+        .unwrap()
+        .reshape(&[3, 2])
+        .unwrap();
+    // Matrix multiplication
+    let result = a.matmul(&b).unwrap();
+    assert_eq!(result.dims(), &[2, 2]);
+    let values: Vec<f32> = result.flatten_all().unwrap().to_vec1().unwrap();
+    // [1*7 + 2*9 + 3*11, 1*8 + 2*10 + 3*12, 4*7 + 5*9 + 6*11, 4*8 + 5*10 + 6*12]
+    // = [58, 64, 139, 154]
+    assert_eq!(values, vec![58.0, 64.0, 139.0, 154.0]);
+}
+#[test]
+fn test_tensor_where() {
+    let device = Device::Cpu;
+    // Create a condition tensor where values > 0 are treated as true
+    let cond_values = Tensor::new(&[1.0f32, 0.0, 1.0], &device).unwrap();
+    let cond = cond_values.gt(&Tensor::zeros(cond_values.shape(), DType::F32, &device).unwrap()).unwrap();
+    let on_true = Tensor::new(&[10.0f32, 20.0, 30.0], &device).unwrap();
+    let on_false = Tensor::new(&[100.0f32, 200.0, 300.0], &device).unwrap();
+    let result = cond.where_cond(&on_true, &on_false).unwrap();
+    let values: Vec<f32> = result.to_vec1().unwrap();
+    assert_eq!(values, vec![10.0, 200.0, 30.0]);
+}
+#[test]
+fn test_tensor_narrow() {
+    let device = Device::Cpu;
+    let tensor = Tensor::new(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &device).unwrap();
+    // Narrow from index 1, length 3
+    let narrowed = tensor.narrow(0, 1, 3).unwrap();
+    let values: Vec<f32> = narrowed.to_vec1().unwrap();
+    assert_eq!(values, vec![2.0, 3.0, 4.0]);
+}

data/lib/candle/device_utils.rb CHANGED Viewed

@@ -1,22 +1,10 @@
 module Candle
   module DeviceUtils
+    # @deprecated Use {Candle::Device.best} instead
     # Get the best available device (Metal > CUDA > CPU)
     def self.best_device
-      # Try devices in order of preference
-      begin
-        # Try Metal first (for Mac users)
-        Device.metal
-      rescue
-        # :nocov:
-        begin
-          # Try CUDA next (for NVIDIA GPU users)
-          Device.cuda
-        rescue
-          # Fall back to CPU
-          Device.cpu
-        end
-        # :nocov:
-      end
+      warn "[DEPRECATION] `DeviceUtils.best_device` is deprecated. Please use `Device.best` instead."
+      Device.best
     end
   end
 end

data/lib/candle/embedding_model.rb CHANGED Viewed

@@ -9,7 +9,36 @@ module Candle
     # Default embedding model type
     DEFAULT_EMBEDDING_MODEL_TYPE = "jina_bert"
+    # Load a pre-trained embedding model from HuggingFace
+    # @param model_id [String] HuggingFace model ID (defaults to jinaai/jina-embeddings-v2-base-en)
+    # @param device [Candle::Device] The device to use for computation (defaults to best available)
+    # @param tokenizer [String, nil] The tokenizer to use (defaults to using the model's tokenizer)
+    # @param model_type [String, nil] The type of embedding model (auto-detected if nil)
+    # @param embedding_size [Integer, nil] Override for the embedding size (optional)
+    # @return [EmbeddingModel] A new EmbeddingModel instance
+    def self.from_pretrained(model_id = DEFAULT_MODEL_PATH, device: Candle::Device.best, tokenizer: nil, model_type: nil, embedding_size: nil)
+      # Auto-detect model type based on model_id if not provided
+      if model_type.nil?
+        model_type = case model_id.downcase
+        when /jina/
+          "jina_bert"
+        when /distilbert/
+          "distilbert"
+        when /minilm/
+          "minilm"
+        else
+          "standard_bert"
+        end
+      end
+      # Use model_id as tokenizer if not specified (usually what you want)
+      tokenizer_id = tokenizer || model_id
+      _create(model_id, tokenizer_id, device, model_type, embedding_size)
+    end
     # Constructor for creating a new EmbeddingModel with optional parameters
+    # @deprecated Use {.from_pretrained} instead
     # @param model_path [String, nil] The path to the model on Hugging Face
     # @param tokenizer_path [String, nil] The path to the tokenizer on Hugging Face
     # @param device [Candle::Device, Candle::Device.cpu] The device to use for computation
@@ -17,9 +46,10 @@ module Candle
     # @param embedding_size [Integer, nil] Override for the embedding size (optional)
     def self.new(model_path: DEFAULT_MODEL_PATH,
       tokenizer_path: DEFAULT_TOKENIZER_PATH,
-      device: Candle::Device.cpu,
+      device: Candle::Device.best,
       model_type: DEFAULT_EMBEDDING_MODEL_TYPE,
       embedding_size: nil)
+      $stderr.puts "[DEPRECATION] `EmbeddingModel.new` is deprecated. Please use `EmbeddingModel.from_pretrained` instead."
       _create(model_path, tokenizer_path, device, model_type, embedding_size)
     end
     # Returns the embedding for a string using the specified pooling method.
@@ -28,5 +58,18 @@ module Candle
     def embedding(str, pooling_method: "pooled_normalized")
       _embedding(str, pooling_method)
     end
+    # Improved inspect method
+    def inspect
+      opts = options rescue {}
+      parts = ["#<Candle::EmbeddingModel"]
+      parts << "model=#{opts["model_id"] || "unknown"}"
+      parts << "type=#{opts["model_type"]}" if opts["model_type"]
+      parts << "device=#{opts["device"] || "unknown"}"
+      parts << "size=#{opts["embedding_size"]}" if opts["embedding_size"]
+      parts.join(" ") + ">"
+    end
   end
 end

data/lib/candle/llm.rb CHANGED Viewed

@@ -189,6 +189,45 @@ module Candle
       prompt = apply_chat_template(messages)
       generate_stream(prompt, **options, &block)
     end
+    # Inspect method for debugging and exploration
+    def inspect
+      opts = options rescue {}
+      # Extract key information
+      model_type = opts["model_type"] || "Unknown"
+      device = opts["device"] || self.device.to_s rescue "unknown"
+      # Build the inspect string
+      parts = ["#<Candle::LLM"]
+      # Add base model or model_id
+      if opts["base_model"]
+        parts << "model=#{opts["base_model"]}"
+      elsif opts["model_id"]
+        parts << "model=#{opts["model_id"]}"
+      elsif respond_to?(:model_id)
+        parts << "model=#{model_id}"
+      end
+      # Add GGUF file if present
+      if opts["gguf_file"]
+        parts << "gguf=#{opts["gguf_file"]}"
+      end
+      # Add device
+      parts << "device=#{device}"
+      # Add model type
+      parts << "type=#{model_type}"
+      # Add architecture for GGUF models
+      if opts["architecture"]
+        parts << "arch=#{opts["architecture"]}"
+      end
+      parts.join(" ") + ">"
+    end
     def generate(prompt, config: GenerationConfig.balanced, reset_cache: true)
       begin
@@ -206,7 +245,7 @@ module Candle
       end
     end
-    def self.from_pretrained(model_id, device: Candle::Device.cpu, gguf_file: nil, tokenizer: nil)
+    def self.from_pretrained(model_id, device: Candle::Device.best, gguf_file: nil, tokenizer: nil)
       model_str = if gguf_file
         "#{model_id}@#{gguf_file}"
       else
@@ -393,5 +432,28 @@ module Candle
       }
       new(defaults.merge(opts))
     end
+    # Inspect method for debugging and exploration
+    def inspect
+      opts = options rescue {}
+      parts = ["#<Candle::GenerationConfig"]
+      # Add key configuration parameters
+      parts << "temp=#{opts["temperature"]}" if opts["temperature"]
+      parts << "max=#{opts["max_length"]}" if opts["max_length"]
+      parts << "top_p=#{opts["top_p"]}" if opts["top_p"]
+      parts << "top_k=#{opts["top_k"]}" if opts["top_k"]
+      parts << "seed=#{opts["seed"]}" if opts["seed"]
+      # Add flags
+      flags = []
+      flags << "debug" if opts["debug_tokens"]
+      flags << "constraint" if opts["has_constraint"]
+      flags << "stop_on_match" if opts["stop_on_match"]
+      parts << "flags=[#{flags.join(",")}]" if flags.any?
+      parts.join(" ") + ">"
+    end
   end
 end