RubyGems - red-candle - Versions diffs - 1.8.0-aarch64-linux - Mend

red-candle 1.8.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

checksums.yaml +7 -0
data/Cargo.lock +5021 -0
data/Cargo.toml +6 -0
data/Gemfile +3 -0
data/LICENSE +22 -0
data/README.md +1171 -0
data/Rakefile +167 -0
data/bin/console +11 -0
data/bin/setup +17 -0
data/ext/candle/Cargo.toml +38 -0
data/ext/candle/build.rs +117 -0
data/ext/candle/extconf.rb +79 -0
data/ext/candle/rustfmt.toml +63 -0
data/ext/candle/src/gvl.rs +58 -0
data/ext/candle/src/lib.rs +59 -0
data/ext/candle/src/llm/constrained_generation_test.rs +395 -0
data/ext/candle/src/llm/gemma.rs +313 -0
data/ext/candle/src/llm/generation_config.rs +63 -0
data/ext/candle/src/llm/glm4.rs +236 -0
data/ext/candle/src/llm/granite.rs +308 -0
data/ext/candle/src/llm/granitemoehybrid.rs +315 -0
data/ext/candle/src/llm/llama.rs +396 -0
data/ext/candle/src/llm/mistral.rs +309 -0
data/ext/candle/src/llm/mod.rs +49 -0
data/ext/candle/src/llm/phi.rs +369 -0
data/ext/candle/src/llm/quantized_gguf.rs +734 -0
data/ext/candle/src/llm/qwen.rs +261 -0
data/ext/candle/src/llm/qwen3.rs +257 -0
data/ext/candle/src/llm/text_generation.rs +284 -0
data/ext/candle/src/ruby/device.rs +234 -0
data/ext/candle/src/ruby/dtype.rs +39 -0
data/ext/candle/src/ruby/embedding_model.rs +477 -0
data/ext/candle/src/ruby/errors.rs +16 -0
data/ext/candle/src/ruby/llm.rs +730 -0
data/ext/candle/src/ruby/mod.rs +24 -0
data/ext/candle/src/ruby/ner.rs +444 -0
data/ext/candle/src/ruby/reranker.rs +488 -0
data/ext/candle/src/ruby/result.rs +3 -0
data/ext/candle/src/ruby/structured.rs +92 -0
data/ext/candle/src/ruby/tensor.rs +731 -0
data/ext/candle/src/ruby/tokenizer.rs +343 -0
data/ext/candle/src/ruby/utils.rs +96 -0
data/ext/candle/src/ruby/vlm.rs +330 -0
data/ext/candle/src/structured/integration_test.rs +130 -0
data/ext/candle/src/structured/mod.rs +31 -0
data/ext/candle/src/structured/schema_processor.rs +215 -0
data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
data/ext/candle/src/tokenizer/loader.rs +108 -0
data/ext/candle/src/tokenizer/mod.rs +104 -0
data/ext/candle/tests/device_tests.rs +43 -0
data/ext/candle/tests/tensor_tests.rs +162 -0
data/lib/candle/3.1/candle.so +0 -0
data/lib/candle/3.2/candle.so +0 -0
data/lib/candle/3.3/candle.so +0 -0
data/lib/candle/3.4/candle.so +0 -0
data/lib/candle/4.0/candle.so +0 -0
data/lib/candle/agent.rb +68 -0
data/lib/candle/build_info.rb +67 -0
data/lib/candle/device_utils.rb +10 -0
data/lib/candle/embedding_model.rb +75 -0
data/lib/candle/embedding_model_type.rb +31 -0
data/lib/candle/llm.rb +595 -0
data/lib/candle/logger.rb +149 -0
data/lib/candle/ner.rb +368 -0
data/lib/candle/reranker.rb +45 -0
data/lib/candle/tensor.rb +99 -0
data/lib/candle/tokenizer.rb +139 -0
data/lib/candle/tool.rb +47 -0
data/lib/candle/tool_call_parser.rb +57 -0
data/lib/candle/version.rb +5 -0
data/lib/candle/vlm.rb +31 -0
data/lib/candle.rb +29 -0
data/lib/red-candle.rb +1 -0
metadata +309 -0

data/ext/candle/src/llm/text_generation.rs ADDED Viewed

@@ -0,0 +1,284 @@
+use candle_core::{Result as CandleResult, Tensor};
+use candle_transformers::generation::LogitsProcessor;
+use std::sync::Arc;
+use super::GenerationConfig;
+use crate::structured::Index;
+/// Helper struct for text generation process
+pub struct TextGeneration {
+    logits_processor: LogitsProcessor,
+    tokens: Vec<u32>,
+    eos_token_id: Option<u32>,
+    repetition_penalty: f32,
+    repetition_penalty_last_n: usize,
+    constraint: Option<Arc<Index>>,
+    constraint_state: Option<u32>,
+    constraint_completed: bool,
+    tokens_since_constraint_start: usize,
+}
+impl TextGeneration {
+    pub fn new(config: &GenerationConfig) -> Self {
+        let logits_processor = LogitsProcessor::new(config.seed, Some(config.temperature), config.top_p);
+        let mut text_gen = Self {
+            logits_processor,
+            tokens: Vec::new(),
+            eos_token_id: None,
+            repetition_penalty: config.repetition_penalty,
+            repetition_penalty_last_n: config.repetition_penalty_last_n,
+            constraint: None,
+            constraint_state: None,
+            constraint_completed: false,
+            tokens_since_constraint_start: 0,
+        };
+        // Set constraint if provided
+        if let Some(ref constraint) = config.constraint {
+            text_gen.set_constraint(Arc::clone(constraint));
+        }
+        text_gen
+    }
+    pub fn set_eos_token_id(&mut self, eos_token_id: u32) {
+        self.eos_token_id = Some(eos_token_id);
+    }
+    pub fn set_tokens(&mut self, tokens: Vec<u32>) {
+        self.tokens = tokens;
+    }
+    pub fn get_tokens(&self) -> &[u32] {
+        &self.tokens
+    }
+    pub fn push_token(&mut self, token: u32) {
+        self.tokens.push(token);
+    }
+    pub fn set_constraint(&mut self, constraint: Arc<Index>) {
+        // Initialize with the first state
+        self.constraint_state = Some(constraint.initial_state());
+        self.constraint = Some(constraint);
+        self.constraint_completed = false;
+        self.tokens_since_constraint_start = self.tokens.len();
+    }
+    /// Apply constraints to logits by masking disallowed tokens
+    fn apply_constraints(&self, logits: &mut Tensor) -> CandleResult<()> {
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            let device = logits.device();
+            let vocab_size = logits.dims1()?;
+            // Get allowed tokens from the constraint index for current state
+            if let Some(allowed_tokens) = constraint_index.allowed_tokens(&state) {
+                // Create a mask where allowed tokens have value 0 and others have -inf
+                let mut mask = vec![f32::NEG_INFINITY; vocab_size];
+                for &token_id in &allowed_tokens {
+                    if (token_id as usize) < vocab_size {
+                        mask[token_id as usize] = 0.0;
+                    }
+                }
+                // Apply mask to logits
+                let mask_tensor = Tensor::from_vec(mask, vocab_size, device)?;
+                *logits = logits.add(&mask_tensor)?;
+            }
+        }
+        Ok(())
+    }
+    /// Apply repetition penalty to logits
+    pub fn apply_repetition_penalty(
+        &self,
+        logits: &mut Tensor,
+        penalty: f32,
+        context_size: usize,
+    ) -> CandleResult<()> {
+        if penalty == 1.0 {
+            return Ok(());
+        }
+        let device = logits.device();
+        let vocab_size = logits.dims1()?;
+        // Get the context tokens to apply penalty to
+        let start = self.tokens.len().saturating_sub(context_size);
+        let context_tokens = &self.tokens[start..];
+        // Apply penalty to tokens that appear in the context
+        let mut logits_vec = logits.to_vec1::<f32>()?;
+        for &token in context_tokens {
+            if (token as usize) < vocab_size {
+                let idx = token as usize;
+                if logits_vec[idx] > 0.0 {
+                    logits_vec[idx] /= penalty;
+                } else {
+                    logits_vec[idx] *= penalty;
+                }
+            }
+        }
+        *logits = Tensor::from_vec(logits_vec, vocab_size, device)?;
+        Ok(())
+    }
+    /// Sample next token from logits
+    pub fn sample_next_token(
+        &mut self,
+        logits: &Tensor,
+    ) -> CandleResult<u32> {
+        let mut logits = logits.clone();
+        // Apply repetition penalty using stored parameters
+        if self.repetition_penalty != 1.0 {
+            self.apply_repetition_penalty(&mut logits, self.repetition_penalty, self.repetition_penalty_last_n)?;
+        }
+        // Apply constraints if active
+        self.apply_constraints(&mut logits)?;
+        // Sample token
+        let next_token = self.logits_processor.sample(&logits)?;
+        self.tokens.push(next_token);
+        // Update constraint state if active
+        if let (Some(ref constraint_index), Some(current_state)) = (&self.constraint, self.constraint_state) {
+            // Get the next state
+            let next_state = constraint_index.next_state(&current_state, &next_token);
+            // Check if we're transitioning to a state with no allowed tokens (completion)
+            if !self.constraint_completed && self.tokens.len() > self.tokens_since_constraint_start {
+                // Check if next state has no allowed tokens at all - this is definitive completion
+                if let Some(next_state_val) = next_state {
+                    if let Some(allowed) = constraint_index.allowed_tokens(&next_state_val) {
+                        if allowed.is_empty() {
+                            self.constraint_completed = true;
+                        }
+                        // Only mark as complete if ONLY EOS is allowed (not just if EOS is one of many options)
+                        else if let Some(eos) = self.eos_token_id {
+                            if allowed.len() == 1 && allowed.contains(&eos) {
+                                self.constraint_completed = true;
+                            }
+                        }
+                    } else {
+                        // None means no tokens allowed - constraint is complete
+                        self.constraint_completed = true;
+                    }
+                }
+            }
+            self.constraint_state = next_state;
+        }
+        Ok(next_token)
+    }
+    /// Check if the constraint is satisfied (reached a valid completion state)
+    pub fn is_constraint_satisfied(&self) -> bool {
+        // If we've explicitly marked the constraint as completed, return true
+        if self.constraint_completed {
+            return true;
+        }
+        // Also check the current state
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            // Check if the constraint has reached a state where it MUST end
+            // This happens when there are no more allowed tokens (constraint fully satisfied)
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                // If no tokens are allowed, the constraint is fully satisfied
+                if allowed.is_empty() {
+                    return true;
+                }
+                // For JSON schemas, check if ONLY the EOS token is allowed
+                // This means we've generated a complete, valid JSON structure
+                // Don't treat EOS as a satisfaction signal if other tokens are also allowed
+                if let Some(eos) = self.eos_token_id {
+                    if allowed.len() == 1 && allowed.contains(&eos) {
+                        return true;
+                    }
+                }
+            } else {
+                // None means no tokens allowed - constraint is satisfied
+                return true;
+            }
+        }
+        false
+    }
+    /// Check if the constraint is satisfied when stop_on_match is true
+    /// NOTE: For JSON schemas, this should only return true when the JSON structure is complete,
+    /// not just because we're in a state with many allowed tokens (like inside a string).
+    pub fn is_constraint_satisfied_stop_on_match(&self) -> bool {
+        // When stop_on_match is true, we stop as soon as the constraint is completed
+        if self.constraint_completed {
+            return true;
+        }
+        // For JSON and other structured outputs, don't use the "large allowed set" heuristic.
+        // Instead, only consider the constraint satisfied when:
+        // 1. There are no allowed tokens (definitive completion)
+        // 2. Only EOS is allowed (completion with optional termination)
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                // No more tokens allowed - definitely complete
+                if allowed.is_empty() {
+                    return true;
+                }
+                // Only EOS is allowed - complete JSON structure
+                if let Some(eos) = self.eos_token_id {
+                    if allowed.len() == 1 && allowed.contains(&eos) {
+                        return true;
+                    }
+                }
+            } else {
+                // None means no tokens allowed - constraint is complete
+                return true;
+            }
+        }
+        false
+    }
+    /// Check if we should stop generation
+    pub fn should_stop(&self, token: u32, max_length: usize) -> bool {
+        if self.tokens.len() >= max_length {
+            return true;
+        }
+        if let Some(eos) = self.eos_token_id {
+            if token == eos {
+                return true;
+            }
+        }
+        // Check if we've reached a final state in constraint
+        // A state is considered final if it has no allowed tokens
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                if allowed.is_empty() {
+                    return true;
+                }
+            } else {
+                // None means no tokens allowed - we're done
+                return true;
+            }
+        }
+        false
+    }
+    /// Check if the generated text ends with any stop sequence
+    pub fn check_stop_sequences(&self, text: &str, stop_sequences: &[String]) -> bool {
+        for seq in stop_sequences {
+            if text.ends_with(seq) {
+                return true;
+            }
+        }
+        false
+    }
+}

data/ext/candle/src/ruby/device.rs ADDED Viewed

@@ -0,0 +1,234 @@
+use magnus::Error;
+use magnus::{function, method, RModule, Module, Object, Ruby};
+use ::candle_core::Device as CoreDevice;
+use crate::ruby::Result;
+#[cfg(any(feature = "cuda", feature = "metal"))]
+use crate::ruby::errors::wrap_candle_err;
+#[cfg(feature = "cuda")]
+static CUDA_DEVICE: std::sync::Mutex<Option<CoreDevice>> = std::sync::Mutex::new(None);
+#[cfg(feature = "metal")]
+static METAL_DEVICE: std::sync::Mutex<Option<CoreDevice>> = std::sync::Mutex::new(None);
+/// Get list of available devices based on compile-time features
+pub fn available_devices() -> Vec<String> {
+    let devices = vec!["cpu".to_string()];
+    #[cfg(all(feature = "cuda", not(force_cpu)))]
+    let devices = {
+        let mut devices = devices;
+        devices.push("cuda".to_string());
+        devices
+    };
+    #[cfg(all(feature = "metal", not(force_cpu)))]
+    let devices = {
+        let mut devices = devices;
+        devices.push("metal".to_string());
+        devices
+    };
+    devices
+}
+/// Get the default device based on what's available
+pub fn default_device() -> Device {
+    // Return based on compiled features, not detection
+    #[cfg(all(feature = "metal", not(force_cpu)))]
+    {
+        Device::Metal
+    }
+    #[cfg(all(feature = "cuda", not(feature = "metal"), not(force_cpu)))]
+    {
+        Device::Cuda
+    }
+    #[cfg(not(any(all(feature = "metal", not(force_cpu)), all(feature = "cuda", not(feature = "metal"), not(force_cpu)))))]
+    {
+        Device::Cpu
+    }
+}
+/// Get the best available device by checking runtime availability
+pub fn best_device() -> Device {
+    // Try devices in order of preference
+    #[cfg(feature = "metal")]
+    {
+        // Check if Metal is actually available at runtime
+        if CoreDevice::new_metal(0).is_ok() {
+            return Device::Metal;
+        }
+    }
+    #[cfg(feature = "cuda")]
+    {
+        // Check if CUDA is actually available at runtime
+        if CoreDevice::new_cuda(0).is_ok() {
+            return Device::Cuda;
+        }
+    }
+    // Always fall back to CPU
+    Device::Cpu
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[magnus::wrap(class = "Candle::Device")]
+pub enum Device {
+    Cpu,
+    Cuda,
+    Metal,
+}
+impl Device {
+    /// Create a CPU device
+    pub fn cpu() -> Self {
+        Self::Cpu
+    }
+    /// Get the best available device
+    pub fn best() -> Self {
+        best_device()
+    }
+    /// Create a CUDA device (GPU)
+    pub fn cuda() -> Result<Self> {
+        #[cfg(not(feature = "cuda"))]
+        {
+            return Err(Error::new(
+                Ruby::get().unwrap().exception_runtime_error(),
+                "CUDA support not compiled in. Rebuild with CUDA available.",
+            ));
+        }
+        #[cfg(feature = "cuda")]
+        Ok(Self::Cuda)
+    }
+    /// Create a Metal device (Apple GPU)
+    pub fn metal() -> Result<Self> {
+        #[cfg(not(feature = "metal"))]
+        {
+            return Err(Error::new(
+                Ruby::get().unwrap().exception_runtime_error(),
+                "Metal support not compiled in. Rebuild on macOS.",
+            ));
+        }
+        #[cfg(feature = "metal")]
+        Ok(Self::Metal)
+    }
+    pub fn from_device(device: &CoreDevice) -> Self {
+        match device {
+            CoreDevice::Cpu => Self::Cpu,
+            CoreDevice::Cuda(_) => Self::Cuda,
+            CoreDevice::Metal(_) => Self::Metal,
+        }
+    }
+    pub fn as_device(&self) -> Result<CoreDevice> {
+        match self {
+            Self::Cpu => Ok(CoreDevice::Cpu),
+            Self::Cuda => {
+                #[cfg(not(feature = "cuda"))]
+                {
+                    return Err(Error::new(
+                        Ruby::get().unwrap().exception_runtime_error(),
+                        "CUDA support not compiled in. Rebuild with CUDA available.",
+                    ));
+                }
+                #[cfg(feature = "cuda")]
+                {
+                    let mut device = CUDA_DEVICE.lock().unwrap();
+                    if let Some(device) = device.as_ref() {
+                        return Ok(device.clone());
+                    };
+                    // Note: new_cuda() is used here (not cuda_if_available) because
+                    // we want to fail if CUDA isn't available at runtime, not fall back to CPU
+                    let d = CoreDevice::new_cuda(0).map_err(wrap_candle_err)?;
+                    *device = Some(d.clone());
+                    Ok(d)
+                }
+            }
+            Self::Metal => {
+                #[cfg(not(feature = "metal"))]
+                {
+                    return Err(Error::new(
+                        Ruby::get().unwrap().exception_runtime_error(),
+                        "Metal support not compiled in. Rebuild on macOS.",
+                    ));
+                }
+                #[cfg(feature = "metal")]
+                {
+                    let mut device = METAL_DEVICE.lock().unwrap();
+                    if let Some(device) = device.as_ref() {
+                        return Ok(device.clone());
+                    };
+                    let d = CoreDevice::new_metal(0).map_err(wrap_candle_err)?;
+                    *device = Some(d.clone());
+                    Ok(d)
+                }
+            }
+        }
+    }
+    pub fn __repr__(&self) -> String {
+        match self {
+            Self::Cpu => "cpu".to_string(),
+            Self::Cuda => "cuda".to_string(),
+            Self::Metal => "metal".to_string(),
+        }
+    }
+    pub fn __str__(&self) -> String {
+        self.__repr__()
+    }
+    pub fn __eq__(&self, other: &Device) -> bool {
+        self == other
+    }
+}
+impl magnus::TryConvert for Device {
+    fn try_convert(val: magnus::Value) -> Result<Self> {
+        // First check if it's already a wrapped Device object
+        if let Ok(device) = <magnus::typed_data::Obj<Device> as magnus::TryConvert>::try_convert(val) {
+            return Ok(*device);
+        }
+        // Otherwise try to convert from string
+        let device = magnus::RString::try_convert(val)?;
+        let device = unsafe { device.as_str() }.unwrap();
+        let device = match device {
+            "cpu" => Device::Cpu,
+            "cuda" => Device::Cuda,
+            "metal" => Device::Metal,
+            _ => return Err(Error::new(Ruby::get().unwrap().exception_arg_error(), "invalid device")),
+        };
+        Ok(device)
+    }
+}
+pub fn init(rb_candle: RModule) -> Result<()> {
+    let ruby = Ruby::get().unwrap();
+    let rb_device = rb_candle.define_class("Device", ruby.class_object())?;
+    rb_device.define_singleton_method("cpu", function!(Device::cpu, 0))?;
+    rb_device.define_singleton_method("cuda", function!(Device::cuda, 0))?;
+    rb_device.define_singleton_method("metal", function!(Device::metal, 0))?;
+    rb_device.define_singleton_method("available_devices", function!(available_devices, 0))?;
+    rb_device.define_singleton_method("default", function!(default_device, 0))?;
+    rb_device.define_singleton_method("best", function!(best_device, 0))?;
+    rb_device.define_method("to_s", method!(Device::__str__, 0))?;
+    rb_device.define_method("inspect", method!(Device::__repr__, 0))?;
+    rb_device.define_method("==", method!(Device::__eq__, 1))?;
+    Ok(())
+}

data/ext/candle/src/ruby/dtype.rs ADDED Viewed

@@ -0,0 +1,39 @@
+use magnus::value::ReprValue;
+use magnus::{method, RModule, Module, Ruby};
+use ::candle_core::DType as CoreDType;
+use crate::ruby::Result;
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[magnus::wrap(class = "Candle::DType", free_immediately, size)]
+/// A `candle` dtype.
+pub struct DType(pub CoreDType);
+impl DType {
+    pub fn __repr__(&self) -> String {
+        format!("{:?}", self.0)
+    }
+    pub fn __str__(&self) -> String {
+        self.__repr__()
+    }
+}
+impl DType {
+    pub fn from_rbobject(dtype: magnus::Symbol) -> Result<Self> {
+        let dtype = unsafe { dtype.to_s() }.unwrap().into_owned();
+        use std::str::FromStr;
+        let dtype = CoreDType::from_str(&dtype).unwrap();
+        Ok(Self(dtype))
+    }
+}
+pub fn init(rb_candle: RModule) -> Result<()> {
+    let ruby = Ruby::get().unwrap();
+    let rb_dtype = rb_candle.define_class("DType", ruby.class_object())?;
+    rb_dtype.define_method("to_s", method!(DType::__str__, 0))?;
+    rb_dtype.define_method("inspect", method!(DType::__repr__, 0))?;
+    Ok(())
+}