RubyGems - red-candle - Versions diffs - 1.0.1 → 1.1.0 - Mend

red-candle 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/Cargo.lock +244 -6
data/README.md +57 -4
data/Rakefile +46 -1
data/ext/candle/Cargo.toml +2 -0
data/ext/candle/build.rs +6 -5
data/ext/candle/extconf.rb +5 -6
data/ext/candle/src/lib.rs +2 -0
data/ext/candle/src/llm/constrained_generation_test.rs +123 -0
data/ext/candle/src/llm/generation_config.rs +5 -0
data/ext/candle/src/llm/mod.rs +5 -0
data/ext/candle/src/llm/phi.rs +285 -0
data/ext/candle/src/llm/quantized_gguf.rs +155 -4
data/ext/candle/src/llm/qwen.rs +229 -0
data/ext/candle/src/llm/text_generation.rs +66 -2
data/ext/candle/src/ruby/device.rs +5 -0
data/ext/candle/src/ruby/llm.rs +42 -4
data/ext/candle/src/ruby/mod.rs +1 -0
data/ext/candle/src/ruby/structured.rs +47 -0
data/ext/candle/src/structured/integration_test.rs +130 -0
data/ext/candle/src/structured/mod.rs +31 -0
data/ext/candle/src/structured/schema_processor.rs +215 -0
data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
data/lib/candle/build_info.rb +2 -2
data/lib/candle/llm.rb +109 -3
data/lib/candle/version.rb +1 -1
data/lib/red-candle.rb +1 -0
metadata +15 -4

data/ext/candle/src/llm/quantized_gguf.rs CHANGED Viewed

@@ -2,6 +2,9 @@ use candle_core::{DType, Device, Result as CandleResult, Tensor};
 use candle_core::quantized::gguf_file;
 use candle_transformers::models::quantized_llama::ModelWeights as QuantizedLlamaModel;
 use candle_transformers::models::quantized_gemma3::ModelWeights as QuantizedGemmaModel;
+use candle_transformers::models::quantized_qwen2::ModelWeights as QuantizedQwenModel;
+use candle_transformers::models::quantized_phi::ModelWeights as QuantizedPhiModel;
+use candle_transformers::models::quantized_phi3::ModelWeights as QuantizedPhi3Model;
 use hf_hub::api::tokio::{Api, ApiRepo};
 use tokenizers::Tokenizer;
 use std::io::Seek;
@@ -9,7 +12,6 @@ use std::io::Seek;
 use crate::llm::{GenerationConfig, TextGeneration, TextGenerator, TokenizerWrapper};
 /// Unified GGUF model that can load any GGUF file and detect the architecture
-#[derive(Debug)]
 pub struct QuantizedGGUF {
     model: ModelType,
     tokenizer: TokenizerWrapper,
@@ -20,10 +22,12 @@ pub struct QuantizedGGUF {
     _chat_template: Option<String>,
 }
-#[derive(Debug)]
 enum ModelType {
     Llama(QuantizedLlamaModel),
     Gemma(QuantizedGemmaModel),
+    Qwen(QuantizedQwenModel),
+    Phi(QuantizedPhiModel),
+    Phi3(QuantizedPhi3Model),
     // Mistral uses Llama loader due to tensor naming compatibility
 }
@@ -97,6 +101,34 @@ impl QuantizedGGUF {
                 let model = QuantizedLlamaModel::from_gguf(content, &mut file, &device)?;
                 ModelType::Llama(model)
             }
+            "qwen" | "qwen2" | "qwen3" => {
+                // Try different loaders based on what metadata is available
+                if content.metadata.contains_key("llama.attention.head_count") {
+                    let model = QuantizedLlamaModel::from_gguf(content, &mut file, &device)?;
+                    ModelType::Llama(model)
+                } else if content.metadata.contains_key("qwen2.attention.head_count") {
+                    let model = QuantizedQwenModel::from_gguf(content, &mut file, &device)?;
+                    ModelType::Qwen(model)
+                } else if content.metadata.contains_key("qwen3.attention.head_count") {
+                    // Qwen3 GGUF files use a different metadata format
+                    // The quantized_qwen3 module is not yet in the released version of candle-transformers
+                    return Err(candle_core::Error::Msg(format!(
+                        "Qwen3 GGUF format detected but not yet fully supported.\n\n\
+                        The file contains qwen3.* metadata keys which require candle-transformers > 0.9.1.\n\n\
+                        Current alternatives:\n\
+                        1. Use Qwen2.5 GGUF models which work well:\n\
+                           - Qwen/Qwen2.5-7B-Instruct-GGUF (recommended)\n\
+                           - Qwen/Qwen2.5-32B-Instruct-GGUF\n\
+                        2. Use non-quantized Qwen models with safetensors\n\
+                        3. Wait for candle-transformers update with quantized_qwen3 support\n\n\
+                        Note: Qwen2.5 models have similar capabilities to Qwen3."
+                    )));
+                } else {
+                    // Last resort: try llama loader anyway, as it's the most common
+                    let model = QuantizedLlamaModel::from_gguf(content, &mut file, &device)?;
+                    ModelType::Llama(model)
+                }
+            }
             "gemma" | "gemma2" | "gemma3" => {
                 // Try Gemma-specific loader first, fall back to Llama if it fails
                 match QuantizedGemmaModel::from_gguf(content, &mut file, &device) {
@@ -112,9 +144,20 @@ impl QuantizedGGUF {
                     Err(e) => return Err(e),
                 }
             }
+            "phi" | "phi2" => {
+                let model = QuantizedPhiModel::from_gguf(content, &mut file, &device)?;
+                ModelType::Phi(model)
+            }
+            "phi3" => {
+                // QuantizedPhi3Model requires an additional `approx` parameter
+                // Setting to false to avoid performance issues without flash-attn
+                let approx = false;
+                let model = QuantizedPhi3Model::from_gguf(approx, content, &mut file, &device)?;
+                ModelType::Phi3(model)
+            }
             _ => {
                 return Err(candle_core::Error::Msg(format!(
-                    "Unsupported architecture: {}. Supported: llama, mistral, gemma",
+                    "Unsupported architecture: {}. Supported: llama, mistral, gemma, qwen, qwen2, qwen3, phi, phi2, phi3",
                     architecture
                 )));
             }
@@ -149,6 +192,14 @@ impl QuantizedGGUF {
             Ok("mistral".to_string())
         } else if model_lower.contains("gemma") {
             Ok("gemma".to_string())
+        } else if model_lower.contains("qwen") {
+            Ok("qwen".to_string())
+        } else if model_lower.contains("phi-3") || model_lower.contains("phi3") {
+            Ok("phi3".to_string())
+        } else if model_lower.contains("phi-2") || model_lower.contains("phi2") {
+            Ok("phi2".to_string())
+        } else if model_lower.contains("phi") {
+            Ok("phi".to_string())
         } else {
             Err(candle_core::Error::Msg(
                 "Could not determine model architecture from metadata or name".to_string()
@@ -235,6 +286,20 @@ impl QuantizedGGUF {
                     .copied()
                     .unwrap_or(1)
             }
+            "qwen" | "qwen2" | "qwen3" => {
+                vocab.get("<|endoftext|>")
+                    .or_else(|| vocab.get("<|im_end|>"))
+                    .or_else(|| vocab.get("</s>"))
+                    .copied()
+                    .unwrap_or(151643) // Default Qwen3 EOS token
+            }
+            "phi" | "phi2" | "phi3" => {
+                vocab.get("<|endoftext|>")
+                    .or_else(|| vocab.get("<|end|>"))
+                    .or_else(|| vocab.get("</s>"))
+                    .copied()
+                    .unwrap_or(50256) // Default GPT-2 style EOS token
+            }
             _ => 2, // Default
         }
     }
@@ -256,6 +321,10 @@ impl QuantizedGGUF {
         } else if model_lower.contains("gemma") {
             // Always use Gemma template for Gemma models, regardless of loader used
             self.apply_gemma_template(messages)
+        } else if model_lower.contains("qwen") {
+            self.apply_qwen_template(messages)
+        } else if model_lower.contains("phi") {
+            self.apply_phi_template(messages)
         } else {
             match self.architecture.as_str() {
                 "llama" => {
@@ -268,6 +337,12 @@ impl QuantizedGGUF {
                 "gemma" => {
                     self.apply_gemma_template(messages)
                 }
+                "qwen" | "qwen2" | "qwen3" => {
+                    self.apply_qwen_template(messages)
+                }
+                "phi" | "phi2" | "phi3" => {
+                    self.apply_phi_template(messages)
+                }
                 _ => Ok(self.apply_generic_template(messages))
             }
         }
@@ -366,6 +441,77 @@ impl QuantizedGGUF {
         Ok(prompt)
     }
+    fn apply_qwen_template(&self, messages: &[serde_json::Value]) -> CandleResult<String> {
+        let mut prompt = String::new();
+        for message in messages {
+            let role = message["role"].as_str().unwrap_or("");
+            let content = message["content"].as_str().unwrap_or("");
+            match role {
+                "system" => {
+                    prompt.push_str(&format!("<|im_start|>system\n{}<|im_end|>\n", content));
+                }
+                "user" => {
+                    prompt.push_str(&format!("<|im_start|>user\n{}<|im_end|>\n", content));
+                }
+                "assistant" => {
+                    prompt.push_str(&format!("<|im_start|>assistant\n{}<|im_end|>\n", content));
+                }
+                _ => {}
+            }
+        }
+        // Add generation prompt
+        prompt.push_str("<|im_start|>assistant\n");
+        Ok(prompt)
+    }
+    fn apply_phi_template(&self, messages: &[serde_json::Value]) -> CandleResult<String> {
+        let mut prompt = String::new();
+        // Check if it's Phi-3 (newer format) or Phi-2/Phi (simpler format)
+        let is_phi3 = self.model_id.contains("phi-3") || self.model_id.contains("Phi-3") || self.architecture == "phi3";
+        if is_phi3 {
+            // Phi-3 format
+            for message in messages {
+                let role = message["role"].as_str().unwrap_or("");
+                let content = message["content"].as_str().unwrap_or("");
+                match role {
+                    "system" => {
+                        prompt.push_str(&format!("<|system|>\n{}<|end|>\n", content));
+                    }
+                    "user" => {
+                        prompt.push_str(&format!("<|user|>\n{}<|end|>\n", content));
+                    }
+                    "assistant" => {
+                        prompt.push_str(&format!("<|assistant|>\n{}<|end|>\n", content));
+                    }
+                    _ => {}
+                }
+            }
+            prompt.push_str("<|assistant|>\n");
+        } else {
+            // Phi-2 format
+            for message in messages {
+                let role = message["role"].as_str().unwrap_or("");
+                let content = message["content"].as_str().unwrap_or("");
+                match role {
+                    "system" => prompt.push_str(&format!("System: {}\n", content)),
+                    "user" => prompt.push_str(&format!("User: {}\n", content)),
+                    "assistant" => prompt.push_str(&format!("Assistant: {}\n", content)),
+                    _ => {}
+                }
+            }
+            prompt.push_str("Assistant: ");
+        }
+        Ok(prompt)
+    }
     fn apply_generic_template(&self, messages: &[serde_json::Value]) -> String {
         let mut prompt = String::new();
@@ -381,7 +527,9 @@ impl QuantizedGGUF {
     /// Clear the KV cache between generations
     pub fn clear_kv_cache(&mut self) {
-        // Quantized models manage cache internally
+        // Quantized models don't expose cache clearing methods
+        // Phi3 GGUF models have a known issue where the KV cache
+        // cannot be cleared, leading to errors on subsequent generations
     }
     fn generate_tokens(
@@ -408,6 +556,9 @@ impl QuantizedGGUF {
             let logits = match &mut self.model {
                 ModelType::Llama(model) => model.forward(&input, start_pos)?,
                 ModelType::Gemma(model) => model.forward(&input, start_pos)?,
+                ModelType::Qwen(model) => model.forward(&input, start_pos)?,
+                ModelType::Phi(model) => model.forward(&input, start_pos)?,
+                ModelType::Phi3(model) => model.forward(&input, start_pos)?,
             };
             let logits = logits.squeeze(0)?;

data/ext/candle/src/llm/qwen.rs ADDED Viewed

@@ -0,0 +1,229 @@
+use candle_core::{DType, Device, Result as CandleResult, Tensor};
+use candle_transformers::models::qwen2::{Config, Model as QwenModel};
+use hf_hub::api::tokio::Api;
+use tokenizers::Tokenizer;
+use crate::llm::{GenerationConfig, TextGeneration, TextGenerator, TokenizerWrapper};
+/// Qwen model wrapper for text generation
+#[derive(Debug)]
+pub struct Qwen {
+    model: QwenModel,
+    tokenizer: TokenizerWrapper,
+    device: Device,
+    model_id: String,
+    eos_token_id: u32,
+}
+impl Qwen {
+    /// Get the tokenizer
+    pub fn tokenizer(&self) -> &TokenizerWrapper {
+        &self.tokenizer
+    }
+    /// Clear the KV cache between generations
+    pub fn clear_kv_cache(&mut self) {
+        self.model.clear_kv_cache();
+    }
+    /// Load a Qwen model from HuggingFace
+    pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
+        let api = Api::new()
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to create HF API: {}", e)))?;
+        let repo = api.model(model_id.to_string());
+        // Download configuration
+        let config_filename = repo.get("config.json").await
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to download config: {}", e)))?;
+        let config_str = std::fs::read_to_string(config_filename)?;
+        let config: Config = serde_json::from_str(&config_str)
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to parse config: {}", e)))?;
+        // Download tokenizer
+        let tokenizer_filename = repo.get("tokenizer.json").await
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to download tokenizer: {}", e)))?;
+        let tokenizer = Tokenizer::from_file(tokenizer_filename)
+            .map_err(|e| candle_core::Error::Msg(format!("Failed to load tokenizer: {}", e)))?;
+        // Determine EOS token
+        let vocab = tokenizer.get_vocab(true);
+        let eos_token_id = vocab.get("<|endoftext|>")
+            .or_else(|| vocab.get("<|im_end|>"))
+            .or_else(|| vocab.get("</s>"))
+            .copied()
+            .unwrap_or(151643); // Default Qwen3 EOS token
+        // Download model weights
+        let mut filenames = vec![];
+        let num_shards = if model_id.contains("72b") || model_id.contains("72B") { 8 }
+                        else if model_id.contains("14b") || model_id.contains("14B") { 3 }
+                        else { 1 };
+        if num_shards == 1 {
+            // Single file model
+            let filename = repo.get("model.safetensors").await
+                .map_err(|e| candle_core::Error::Msg(format!("Failed to download model weights: {}", e)))?;
+            filenames.push(filename);
+        } else {
+            // Sharded model
+            for shard_idx in 1..=num_shards {
+                let filename = repo.get(&format!("model-{:05}-of-{:05}.safetensors", shard_idx, num_shards)).await
+                    .map_err(|e| candle_core::Error::Msg(format!("Failed to download shard {}: {}", shard_idx, e)))?;
+                filenames.push(filename);
+            }
+        }
+        // Load the model
+        let vb = unsafe {
+            candle_nn::VarBuilder::from_mmaped_safetensors(&filenames, DType::F32, &device)?
+        };
+        let model = QwenModel::new(&config, vb)?;
+        Ok(Self {
+            model,
+            tokenizer: TokenizerWrapper::new(tokenizer),
+            device,
+            model_id: model_id.to_string(),
+            eos_token_id,
+        })
+    }
+    /// Apply Qwen chat template to messages
+    pub fn apply_chat_template(&self, messages: &[serde_json::Value]) -> CandleResult<String> {
+        let mut prompt = String::new();
+        for message in messages {
+            let role = message["role"].as_str().unwrap_or("");
+            let content = message["content"].as_str().unwrap_or("");
+            match role {
+                "system" => {
+                    prompt.push_str(&format!("<|im_start|>system\n{}<|im_end|>\n", content));
+                }
+                "user" => {
+                    prompt.push_str(&format!("<|im_start|>user\n{}<|im_end|>\n", content));
+                }
+                "assistant" => {
+                    prompt.push_str(&format!("<|im_start|>assistant\n{}<|im_end|>\n", content));
+                }
+                _ => {}
+            }
+        }
+        // Add generation prompt
+        prompt.push_str("<|im_start|>assistant\n");
+        Ok(prompt)
+    }
+    fn generate_tokens(
+        &mut self,
+        prompt_tokens: Vec<u32>,
+        config: &GenerationConfig,
+        mut callback: Option<impl FnMut(&str)>,
+    ) -> CandleResult<Vec<u32>> {
+        let mut text_gen = TextGeneration::from_config(config);
+        text_gen.set_eos_token_id(self.eos_token_id);
+        text_gen.set_tokens(prompt_tokens.clone());
+        let mut all_tokens = prompt_tokens.clone();
+        let start_gen = all_tokens.len();
+        for index in 0..config.max_length {
+            let context_size = if index > 0 { 1 } else { all_tokens.len() };
+            let start_pos = all_tokens.len().saturating_sub(context_size);
+            let ctxt = &all_tokens[start_pos..];
+            let input = Tensor::new(ctxt, &self.device)?.unsqueeze(0)?;
+            let logits = self.model.forward(&input, start_pos, None)?;
+            let logits = logits.squeeze(0)?;
+            // Handle different output shapes
+            let logits = if logits.dims().len() == 2 {
+                let seq_len = logits.dim(0)?;
+                logits.narrow(0, seq_len - 1, 1)?.squeeze(0)?
+            } else {
+                logits
+            };
+            let logits = logits.to_dtype(DType::F32)?;
+            let next_token = text_gen.sample_next_token(
+                &logits,
+                Some((config.repetition_penalty, config.repetition_penalty_last_n)),
+            )?;
+            all_tokens.push(next_token);
+            // Stream callback
+            if let Some(ref mut cb) = callback {
+                if config.debug_tokens {
+                    let token_piece = self.tokenizer.token_to_piece(next_token)?;
+                    cb(&format!("[{}:{}]", next_token, token_piece));
+                } else {
+                    let decoded_text = self.tokenizer.decode_incremental(&all_tokens, all_tokens.len() - 1)?;
+                    cb(&decoded_text);
+                }
+            }
+            // Check stop conditions
+            if text_gen.should_stop(next_token, config.max_length) {
+                break;
+            }
+            // Check stop sequences
+            let generated_text = self.tokenizer.decode(&all_tokens[start_gen..], true)?;
+            if text_gen.check_stop_sequences(&generated_text, &config.stop_sequences) {
+                break;
+            }
+        }
+        Ok(if config.include_prompt {
+            all_tokens
+        } else {
+            all_tokens[start_gen..].to_vec()
+        })
+    }
+}
+impl TextGenerator for Qwen {
+    fn generate(
+        &mut self,
+        prompt: &str,
+        config: &GenerationConfig,
+    ) -> CandleResult<String> {
+        let prompt_tokens = self.tokenizer.encode(prompt, true)?;
+        let output_tokens = self.generate_tokens(prompt_tokens, config, None::<fn(&str)>)?;
+        if config.debug_tokens {
+            self.tokenizer.format_tokens_with_debug(&output_tokens)
+        } else {
+            self.tokenizer.decode(&output_tokens, true)
+        }
+    }
+    fn generate_stream(
+        &mut self,
+        prompt: &str,
+        config: &GenerationConfig,
+        mut callback: impl FnMut(&str),
+    ) -> CandleResult<String> {
+        let prompt_tokens = self.tokenizer.encode(prompt, true)?;
+        let output_tokens = self.generate_tokens(prompt_tokens, config, Some(&mut callback))?;
+        self.tokenizer.decode(&output_tokens, true)
+    }
+    fn model_name(&self) -> &str {
+        &self.model_id
+    }
+    fn device(&self) -> &Device {
+        &self.device
+    }
+    fn clear_cache(&mut self) {
+        self.clear_kv_cache();
+    }
+}

data/ext/candle/src/llm/text_generation.rs CHANGED Viewed

@@ -1,13 +1,17 @@
 use candle_core::{Result as CandleResult, Tensor};
 use candle_transformers::generation::LogitsProcessor;
+use std::sync::Arc;
 use super::GenerationConfig;
+use crate::structured::Index;
 /// Helper struct for text generation process
 pub struct TextGeneration {
     logits_processor: LogitsProcessor,
     tokens: Vec<u32>,
     eos_token_id: Option<u32>,
+    constraint: Option<Arc<Index>>,
+    constraint_state: Option<u32>,
 }
 impl TextGeneration {
@@ -25,18 +29,27 @@ impl TextGeneration {
             logits_processor,
             tokens: Vec::new(),
             eos_token_id: None,
+            constraint: None,
+            constraint_state: None,
         }
     }
     pub fn from_config(config: &GenerationConfig) -> Self {
-        Self::new(
+        let mut text_gen = Self::new(
             config.seed,
             Some(config.temperature),
             config.top_p,
             config.top_k,
             config.repetition_penalty,
             config.repetition_penalty_last_n,
-        )
+        );
+        // Set constraint if provided
+        if let Some(ref constraint) = config.constraint {
+            text_gen.set_constraint(Arc::clone(constraint));
+        }
+        text_gen
     }
     pub fn set_eos_token_id(&mut self, eos_token_id: u32) {
@@ -55,6 +68,36 @@ impl TextGeneration {
         self.tokens.push(token);
     }
+    pub fn set_constraint(&mut self, constraint: Arc<Index>) {
+        // Initialize with the first state
+        self.constraint_state = Some(constraint.initial_state());
+        self.constraint = Some(constraint);
+    }
+    /// Apply constraints to logits by masking disallowed tokens
+    fn apply_constraints(&self, logits: &mut Tensor) -> CandleResult<()> {
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            let device = logits.device();
+            let vocab_size = logits.dims1()?;
+            // Get allowed tokens from the constraint index for current state
+            if let Some(allowed_tokens) = constraint_index.allowed_tokens(&state) {
+                // Create a mask where allowed tokens have value 0 and others have -inf
+                let mut mask = vec![f32::NEG_INFINITY; vocab_size];
+                for &token_id in &allowed_tokens {
+                    if (token_id as usize) < vocab_size {
+                        mask[token_id as usize] = 0.0;
+                    }
+                }
+                // Apply mask to logits
+                let mask_tensor = Tensor::from_vec(mask, vocab_size, device)?;
+                *logits = logits.add(&mask_tensor)?;
+            }
+        }
+        Ok(())
+    }
     /// Apply repetition penalty to logits
     pub fn apply_repetition_penalty(
         &self,
@@ -103,10 +146,18 @@ impl TextGeneration {
             self.apply_repetition_penalty(&mut logits, penalty, last_n)?;
         }
+        // Apply constraints if active
+        self.apply_constraints(&mut logits)?;
         // Sample token
         let next_token = self.logits_processor.sample(&logits)?;
         self.tokens.push(next_token);
+        // Update constraint state if active
+        if let (Some(ref constraint_index), Some(current_state)) = (&self.constraint, self.constraint_state) {
+            self.constraint_state = constraint_index.next_state(&current_state, &next_token);
+        }
         Ok(next_token)
     }
@@ -122,6 +173,19 @@ impl TextGeneration {
             }
         }
+        // Check if we've reached a final state in constraint
+        // A state is considered final if it has no allowed tokens
+        if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                if allowed.is_empty() {
+                    return true;
+                }
+            } else {
+                // None means no tokens allowed - we're done
+                return true;
+            }
+        }
         false
     }

data/ext/candle/src/ruby/device.rs CHANGED Viewed

@@ -162,6 +162,10 @@ impl Device {
     pub fn __str__(&self) -> String {
         self.__repr__()
     }
+    pub fn __eq__(&self, other: &Device) -> bool {
+        self == other
+    }
 }
 impl magnus::TryConvert for Device {
@@ -193,5 +197,6 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     rb_device.define_singleton_method("default", function!(default_device, 0))?;
     rb_device.define_method("to_s", method!(Device::__str__, 0))?;
     rb_device.define_method("inspect", method!(Device::__repr__, 0))?;
+    rb_device.define_method("==", method!(Device::__eq__, 1))?;
     Ok(())
 }