RubyGems - red-candle - Versions diffs - 1.3.0 → 1.4.0 - Mend

red-candle 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/Cargo.lock +11 -20
data/ext/candle/Cargo.toml +1 -1
data/ext/candle/src/llm/constrained_generation_test.rs +79 -0
data/ext/candle/src/llm/text_generation.rs +40 -50
data/ext/candle/src/ruby/device.rs +8 -7
data/ext/candle/src/ruby/dtype.rs +3 -2
data/ext/candle/src/ruby/embedding_model.rs +31 -14
data/ext/candle/src/ruby/errors.rs +6 -4
data/ext/candle/src/ruby/llm.rs +78 -68
data/ext/candle/src/ruby/ner.rs +106 -95
data/ext/candle/src/ruby/reranker.rs +51 -38
data/ext/candle/src/ruby/structured.rs +61 -16
data/ext/candle/src/ruby/tensor.rs +7 -6
data/ext/candle/src/ruby/tokenizer.rs +101 -84
data/lib/candle/llm.rb +77 -3
data/lib/candle/version.rb +1 -1
metadata +31 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5b92d492e96b8192fba14141ab66ad42aa4afe0d942cc0658f8b64bab2bf916b
-  data.tar.gz: fe3510382fe48853b45061beb336108499655b566c9cb8bf1889b36f76dcda0a
+  metadata.gz: 7a3ac57ccd28cb2eb647c5a71f42fdbdf84a9dcaabd906165daca3d57c4a8eb0
+  data.tar.gz: 283104e93802ac97f11525226c9e41dc3bebccb8706d5b69a6a648d62cf2ccad
 SHA512:
-  metadata.gz: eeddd779bc811f2c2707439d8b92644a2711091d9e42750ed4ebbbf17054a482f1b79147a562200ef5cd5cf6f7620cfd5b543ca32624371121ca64bae40f210b
-  data.tar.gz: cfdf7c9b76a8dda7bcfc9f215374251a606ba06d34c9310430e61390654ae873f3d5e61359767356e7c6554302e8f33a9385d3cbaee3d0da7c6cda771d2af970
+  metadata.gz: d5be5ca76fe5441ee1fd87ea83bcd02f5a630c3410a4dccc347da28290ef408c4c2cae428134f2a8fba839fe5aacf2b7593fe45274c967600e6684d09f212a01
+  data.tar.gz: 72289aaf0c17dea679acfa4f92be5371f2543293e27dab0752b39c77a1c2be98c480c0ce7266bc07519551770abdf53fdbbf0f4d3c1fdf6ba928c2452e598060

data/Cargo.lock CHANGED Viewed

@@ -167,7 +167,7 @@ dependencies = [
  "bitflags 2.9.4",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
+ "itertools 0.11.0",
  "lazy_static",
  "lazycell",
  "proc-macro2",
@@ -1750,15 +1750,6 @@ dependencies = [
  "either",
 ]
-[[package]]
-name = "itertools"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
-dependencies = [
- "either",
-]
 [[package]]
 name = "itertools"
 version = "0.13.0"
@@ -1890,9 +1881,9 @@ checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
 [[package]]
 name = "magnus"
-version = "0.7.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
+checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
 dependencies = [
  "magnus-macros",
  "rb-sys",
@@ -1902,9 +1893,9 @@ dependencies = [
 [[package]]
 name = "magnus-macros"
-version = "0.6.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
+checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2656,18 +2647,18 @@ dependencies = [
 [[package]]
 name = "rb-sys"
-version = "0.9.117"
+version = "0.9.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f900d1ce4629a2ebffaf5de74bd8f9c1188d4c5ed406df02f97e22f77a006f44"
+checksum = "c85c4188462601e2aa1469def389c17228566f82ea72f137ed096f21591bc489"
 dependencies = [
  "rb-sys-build",
 ]
 [[package]]
 name = "rb-sys-build"
-version = "0.9.117"
+version = "0.9.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef1e9c857028f631056bcd6d88cec390c751e343ce2223ddb26d23eb4a151d59"
+checksum = "568068db4102230882e6d4ae8de6632e224ca75fe5970f6e026a04e91ed635d3"
 dependencies = [
  "bindgen 0.69.5",
  "lazy_static",
@@ -2680,9 +2671,9 @@ dependencies = [
 [[package]]
 name = "rb-sys-env"
-version = "0.1.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
+checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
 [[package]]
 name = "reborrow"

data/ext/candle/Cargo.toml CHANGED Viewed

@@ -15,7 +15,7 @@ candle-transformers = { version = "0.9.1" }
 tokenizers = { version = "0.22.0", default-features = true, features = ["fancy-regex"] }
 hf-hub = "0.4.1"
 half = "2.6.0"
-magnus = "0.7.1"
+magnus = "0.8"
 safetensors = "0.3"
 serde_json = "1.0"
 serde = { version = "1.0", features = ["derive"] }

data/ext/candle/src/llm/constrained_generation_test.rs CHANGED Viewed

@@ -313,4 +313,83 @@ mod constrained_generation_tests {
         // Verify tokens are being tracked
         assert_eq!(text_gen.get_tokens().len(), all_tokens.len(), "Internal tokens should match generated");
     }
+    #[test]
+    fn test_constraint_satisfied_not_triggered_by_large_allowed_set() {
+        // This test verifies the fix for the bug where is_constraint_satisfied_stop_on_match
+        // would incorrectly return true when many tokens are allowed (e.g., inside a JSON string).
+        // The old buggy code had: if allowed.len() > 1000 { return true; }
+        // This caused early termination when inside strings with many valid characters.
+        let config = GenerationConfig::default();
+        let mut text_gen = TextGeneration::new(&config);
+        text_gen.set_eos_token_id(50256);
+        // Without a constraint, should not be satisfied
+        assert!(!text_gen.is_constraint_satisfied(),
+            "Without constraint, should not be satisfied");
+        assert!(!text_gen.is_constraint_satisfied_stop_on_match(),
+            "Without constraint, stop_on_match should not be satisfied");
+    }
+    #[test]
+    fn test_constraint_satisfied_only_when_empty_or_eos_only() {
+        // Test that constraint satisfaction only triggers when:
+        // 1. No tokens are allowed (empty set)
+        // 2. Only EOS token is allowed
+        // NOT when many tokens are allowed (like inside a JSON string)
+        let config = GenerationConfig::default();
+        let mut text_gen = TextGeneration::new(&config);
+        text_gen.set_eos_token_id(100); // Set EOS token
+        // Without constraint, should not be satisfied
+        assert!(!text_gen.is_constraint_satisfied());
+        assert!(!text_gen.is_constraint_satisfied_stop_on_match());
+        // The key insight: constraint satisfaction should NOT be triggered
+        // just because there are many allowed tokens. It should only trigger
+        // when the constraint is definitively complete (empty allowed set or only EOS).
+    }
+    #[tokio::test]
+    async fn test_constraint_with_json_schema_not_early_termination() {
+        // Integration test: Create a real JSON schema constraint and verify
+        // that being inside a string (many allowed tokens) doesn't trigger completion.
+        if let Ok(tokenizer) = TokenizerLoader::from_hf_hub("bert-base-uncased", None).await {
+            let wrapper = TokenizerWrapper::new(tokenizer);
+            let vocabulary = VocabularyAdapter::from_tokenizer(&wrapper)
+                .expect("Should create vocabulary");
+            let processor = SchemaProcessor::new();
+            // Schema with a string field - when generating content inside the string,
+            // many characters are valid, but the constraint is NOT complete
+            let schema = r#"{
+                "type": "object",
+                "properties": {
+                    "name": { "type": "string" }
+                },
+                "required": ["name"]
+            }"#;
+            let index = processor.process_schema(schema, &vocabulary)
+                .expect("Should process schema");
+            let mut config = GenerationConfig::default();
+            config.constraint = Some(index);
+            config.max_length = 100;
+            let mut text_gen = TextGeneration::new(&config);
+            text_gen.set_eos_token_id(102); // BERT's [SEP]
+            // At the initial state, the constraint should NOT be satisfied
+            // (we haven't generated a complete JSON object yet)
+            assert!(!text_gen.is_constraint_satisfied(),
+                "Initial state should not be satisfied - JSON not yet generated");
+            assert!(!text_gen.is_constraint_satisfied_stop_on_match(),
+                "Initial state should not trigger stop_on_match");
+        }
+    }
 }

data/ext/candle/src/llm/text_generation.rs CHANGED Viewed

@@ -148,47 +148,28 @@ impl TextGeneration {
         if let (Some(ref constraint_index), Some(current_state)) = (&self.constraint, self.constraint_state) {
             // Get the next state
             let next_state = constraint_index.next_state(&current_state, &next_token);
             // Check if we're transitioning to a state with no allowed tokens (completion)
             if !self.constraint_completed && self.tokens.len() > self.tokens_since_constraint_start {
-                // Check if we've transitioned from a constrained state to an unconstrained state
-                // This happens when the pattern is complete and the FSM allows "anything"
-                let current_constrained = if let Some(allowed) = constraint_index.allowed_tokens(&current_state) {
-                    // Consider it constrained if we have a limited set of allowed tokens
-                    allowed.len() < 1000  // Arbitrary threshold for "constrained"
-                } else {
-                    true  // No tokens allowed is definitely constrained
-                };
-                let next_constrained = if let Some(next_state_val) = next_state {
-                    if let Some(allowed) = constraint_index.allowed_tokens(&next_state_val) {
-                        allowed.is_empty() || allowed.len() < 1000
-                    } else {
-                        true
-                    }
-                } else {
-                    true
-                };
-                // If we're transitioning from constrained to unconstrained, we've completed the pattern
-                if current_constrained && !next_constrained {
-                    self.constraint_completed = true;
-                }
-                // Also check if next state has no allowed tokens at all
+                // Check if next state has no allowed tokens at all - this is definitive completion
                 if let Some(next_state_val) = next_state {
                     if let Some(allowed) = constraint_index.allowed_tokens(&next_state_val) {
                         if allowed.is_empty() {
                             self.constraint_completed = true;
                         }
+                        // Only mark as complete if ONLY EOS is allowed (not just if EOS is one of many options)
+                        else if let Some(eos) = self.eos_token_id {
+                            if allowed.len() == 1 && allowed.contains(&eos) {
+                                self.constraint_completed = true;
+                            }
+                        }
                     } else {
                         // None means no tokens allowed - constraint is complete
                         self.constraint_completed = true;
                     }
                 }
             }
             self.constraint_state = next_state;
         }
@@ -201,22 +182,22 @@ impl TextGeneration {
         if self.constraint_completed {
             return true;
         }
         // Also check the current state
         if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
-            // Check if the constraint has reached a state where it could validly end
-            // This happens when:
-            // 1. We have no more allowed tokens (constraint fully satisfied)
-            // 2. The EOS token is in the allowed tokens (optional ending)
+            // Check if the constraint has reached a state where it MUST end
+            // This happens when there are no more allowed tokens (constraint fully satisfied)
             if let Some(allowed) = constraint_index.allowed_tokens(&state) {
                 // If no tokens are allowed, the constraint is fully satisfied
                 if allowed.is_empty() {
                     return true;
                 }
-                // If EOS token is allowed, we've reached an optional completion point
+                // For JSON schemas, check if ONLY the EOS token is allowed
+                // This means we've generated a complete, valid JSON structure
+                // Don't treat EOS as a satisfaction signal if other tokens are also allowed
                 if let Some(eos) = self.eos_token_id {
-                    if allowed.contains(&eos) {
+                    if allowed.len() == 1 && allowed.contains(&eos) {
                         return true;
                     }
                 }
@@ -229,28 +210,37 @@ impl TextGeneration {
     }
     /// Check if the constraint is satisfied when stop_on_match is true
+    /// NOTE: For JSON schemas, this should only return true when the JSON structure is complete,
+    /// not just because we're in a state with many allowed tokens (like inside a string).
     pub fn is_constraint_satisfied_stop_on_match(&self) -> bool {
         // When stop_on_match is true, we stop as soon as the constraint is completed
         if self.constraint_completed {
             return true;
         }
-        // Also check if we're currently in a state that could be a valid end
-        // This is important for patterns like phone numbers where after matching
-        // the pattern, the FSM might allow any token (including more numbers)
+        // For JSON and other structured outputs, don't use the "large allowed set" heuristic.
+        // Instead, only consider the constraint satisfied when:
+        // 1. There are no allowed tokens (definitive completion)
+        // 2. Only EOS is allowed (completion with optional termination)
         if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
-            // Check if we've generated at least one token since constraint start
-            if self.tokens.len() > self.tokens_since_constraint_start {
-                if let Some(allowed) = constraint_index.allowed_tokens(&state) {
-                    // If the allowed tokens set is very large (unconstrained),
-                    // it means the pattern has been satisfied
-                    if allowed.len() > 1000 {
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                // No more tokens allowed - definitely complete
+                if allowed.is_empty() {
+                    return true;
+                }
+                // Only EOS is allowed - complete JSON structure
+                if let Some(eos) = self.eos_token_id {
+                    if allowed.len() == 1 && allowed.contains(&eos) {
                         return true;
                     }
                 }
+            } else {
+                // None means no tokens allowed - constraint is complete
+                return true;
             }
         }
         false
     }
@@ -259,13 +249,13 @@ impl TextGeneration {
         if self.tokens.len() >= max_length {
             return true;
         }
         if let Some(eos) = self.eos_token_id {
             if token == eos {
                 return true;
             }
         }
         // Check if we've reached a final state in constraint
         // A state is considered final if it has no allowed tokens
         if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
@@ -278,7 +268,7 @@ impl TextGeneration {
                 return true;
             }
         }
         false
     }

data/ext/candle/src/ruby/device.rs CHANGED Viewed

@@ -1,5 +1,5 @@
 use magnus::Error;
-use magnus::{function, method, class, RModule, Module, Object};
+use magnus::{function, method, RModule, Module, Object, Ruby};
 use ::candle_core::Device as CoreDevice;
 use crate::ruby::Result;
@@ -101,7 +101,7 @@ impl Device {
         #[cfg(not(feature = "cuda"))]
         {
             return Err(Error::new(
-                magnus::exception::runtime_error(),
+                Ruby::get().unwrap().exception_runtime_error(),
                 "CUDA support not compiled in. Rebuild with CUDA available.",
             ));
         }
@@ -115,7 +115,7 @@ impl Device {
         #[cfg(not(feature = "metal"))]
         {
             return Err(Error::new(
-                magnus::exception::runtime_error(),
+                Ruby::get().unwrap().exception_runtime_error(),
                 "Metal support not compiled in. Rebuild on macOS.",
             ));
         }
@@ -139,7 +139,7 @@ impl Device {
                 #[cfg(not(feature = "cuda"))]
                 {
                     return Err(Error::new(
-                        magnus::exception::runtime_error(),
+                        Ruby::get().unwrap().exception_runtime_error(),
                         "CUDA support not compiled in. Rebuild with CUDA available.",
                     ));
                 }
@@ -161,7 +161,7 @@ impl Device {
                 #[cfg(not(feature = "metal"))]
                 {
                     return Err(Error::new(
-                        magnus::exception::runtime_error(),
+                        Ruby::get().unwrap().exception_runtime_error(),
                         "Metal support not compiled in. Rebuild on macOS.",
                     ));
                 }
@@ -211,14 +211,15 @@ impl magnus::TryConvert for Device {
             "cpu" => Device::Cpu,
             "cuda" => Device::Cuda,
             "metal" => Device::Metal,
-            _ => return Err(Error::new(magnus::exception::arg_error(), "invalid device")),
+            _ => return Err(Error::new(Ruby::get().unwrap().exception_arg_error(), "invalid device")),
         };
         Ok(device)
     }
 }
 pub fn init(rb_candle: RModule) -> Result<()> {
-    let rb_device = rb_candle.define_class("Device", class::object())?;
+    let ruby = Ruby::get().unwrap();
+    let rb_device = rb_candle.define_class("Device", ruby.class_object())?;
     rb_device.define_singleton_method("cpu", function!(Device::cpu, 0))?;
     rb_device.define_singleton_method("cuda", function!(Device::cuda, 0))?;
     rb_device.define_singleton_method("metal", function!(Device::metal, 0))?;

data/ext/candle/src/ruby/dtype.rs CHANGED Viewed

@@ -1,5 +1,5 @@
 use magnus::value::ReprValue;
-use magnus::{method, class, RModule, Module};
+use magnus::{method, RModule, Module, Ruby};
 use ::candle_core::DType as CoreDType;
 use crate::ruby::Result;
@@ -30,7 +30,8 @@ impl DType {
 }
 pub fn init(rb_candle: RModule) -> Result<()> {
-    let rb_dtype = rb_candle.define_class("DType", class::object())?;
+    let ruby = Ruby::get().unwrap();
+    let rb_dtype = rb_candle.define_class("DType", ruby.class_object())?;
     rb_dtype.define_method("to_s", method!(DType::__str__, 0))?;
     rb_dtype.define_method("inspect", method!(DType::__repr__, 0))?;
     Ok(())

data/ext/candle/src/ruby/embedding_model.rs CHANGED Viewed

@@ -13,7 +13,7 @@ use candle_transformers::models::{
     jina_bert::{BertModel as JinaBertModel, Config as JinaConfig},
     distilbert::{DistilBertModel, Config as DistilBertConfig}
 };
-use magnus::{class, function, method, prelude::*, Error, RModule, RHash};
+use magnus::{function, method, prelude::*, Error, RModule, RHash, Ruby};
 use std::path::Path;
 use serde_json;
@@ -103,28 +103,30 @@ impl EmbeddingModel {
     /// &RETURNS&: Tensor
     /// pooling_method: "pooled", "pooled_normalized", or "cls" (default: "pooled")
     pub fn embedding(&self, input: String, pooling_method: String) -> Result<Tensor> {
+        let ruby = Ruby::get().unwrap();
         match &self.0.model {
             Some(model) => {
                 match &self.0.tokenizer {
                     Some(tokenizer) => Ok(Tensor(self.compute_embedding(input, model, tokenizer, &pooling_method)?)),
-                    None => Err(magnus::Error::new(magnus::exception::runtime_error(), "Tokenizer not found"))
+                    None => Err(magnus::Error::new(ruby.exception_runtime_error(), "Tokenizer not found"))
                 }
             }
-            None => Err(magnus::Error::new(magnus::exception::runtime_error(), "Model not found"))
+            None => Err(magnus::Error::new(ruby.exception_runtime_error(), "Model not found"))
         }
     }
     /// Returns the unpooled embedding tensor ([1, SEQLENGTH, DIM]) for the input text
     /// &RETURNS&: Tensor
     pub fn embeddings(&self, input: String) -> Result<Tensor> {
+        let ruby = Ruby::get().unwrap();
         match &self.0.model {
             Some(model) => {
                 match &self.0.tokenizer {
                     Some(tokenizer) => Ok(Tensor(self.compute_embeddings(input, model, tokenizer)?)),
-                    None => Err(magnus::Error::new(magnus::exception::runtime_error(), "Tokenizer not found"))
+                    None => Err(magnus::Error::new(ruby.exception_runtime_error(), "Tokenizer not found"))
                 }
             }
-            None => Err(magnus::Error::new(magnus::exception::runtime_error(), "Model not found"))
+            None => Err(magnus::Error::new(ruby.exception_runtime_error(), "Model not found"))
         }
     }
@@ -165,7 +167,10 @@ impl EmbeddingModel {
                     },
                     Err(_) => None
                 };
-                inferred_emb_dim.ok_or_else(|| magnus::Error::new(magnus::exception::runtime_error(), "Could not infer embedding size from model file. Please specify embedding_size explicitly."))
+                inferred_emb_dim.ok_or_else(|| {
+                    let ruby = Ruby::get().unwrap();
+                    magnus::Error::new(ruby.exception_runtime_error(), "Could not infer embedding size from model file. Please specify embedding_size explicitly.")
+                })
             }
         }
     }
@@ -178,8 +183,9 @@ impl EmbeddingModel {
             EmbeddingModelType::JinaBert => {
                 let model_path = api.repo(repo).get("model.safetensors").map_err(wrap_hf_err)?;
                 if !std::path::Path::new(&model_path).exists() {
+                    let ruby = Ruby::get().unwrap();
                     return Err(magnus::Error::new(
-                        magnus::exception::runtime_error(),
+                        ruby.exception_runtime_error(),
                         "model.safetensors not found after download. Only safetensors models are supported. Please ensure your model repo contains model.safetensors."
                     ));
                 }
@@ -196,8 +202,9 @@ impl EmbeddingModel {
             EmbeddingModelType::StandardBert => {
                 let model_path = api.repo(repo).get("model.safetensors").map_err(wrap_hf_err)?;
                 if !std::path::Path::new(&model_path).exists() {
+                    let ruby = Ruby::get().unwrap();
                     return Err(magnus::Error::new(
-                        magnus::exception::runtime_error(),
+                        ruby.exception_runtime_error(),
                         "model.safetensors not found after download. Only safetensors models are supported. Please ensure your model repo contains model.safetensors."
                     ));
                 }
@@ -214,8 +221,9 @@ impl EmbeddingModel {
             EmbeddingModelType::DistilBert => {
                 let model_path = api.repo(repo.clone()).get("model.safetensors").map_err(wrap_hf_err)?;
                 if !std::path::Path::new(&model_path).exists() {
+                    let ruby = Ruby::get().unwrap();
                     return Err(magnus::Error::new(
-                        magnus::exception::runtime_error(),
+                        ruby.exception_runtime_error(),
                         "model.safetensors not found after download. Only safetensors models are supported. Please ensure your model repo contains model.safetensors."
                     ));
                 }
@@ -235,8 +243,9 @@ impl EmbeddingModel {
             EmbeddingModelType::MiniLM => {
                 let model_path = api.repo(repo.clone()).get("model.safetensors").map_err(wrap_hf_err)?;
                 if !std::path::Path::new(&model_path).exists() {
+                    let ruby = Ruby::get().unwrap();
                     return Err(magnus::Error::new(
-                        magnus::exception::runtime_error(),
+                        ruby.exception_runtime_error(),
                         "model.safetensors not found after download. Only safetensors models are supported. Please ensure your model repo contains model.safetensors."
                     ));
                 }
@@ -357,7 +366,10 @@ impl EmbeddingModel {
             "pooled" => Self::pooled_embedding(&result),
             "pooled_normalized" => Self::pooled_normalized_embedding(&result),
             "cls" => Self::pooled_cls_embedding(&result),
-            _ => Err(magnus::Error::new(magnus::exception::runtime_error(), "Unknown pooling method")),
+            _ => {
+                let ruby = Ruby::get().unwrap();
+                Err(magnus::Error::new(ruby.exception_runtime_error(), "Unknown pooling method"))
+            },
         }
     }
@@ -390,7 +402,10 @@ impl EmbeddingModel {
     pub fn tokenizer(&self) -> Result<crate::ruby::tokenizer::Tokenizer> {
         match &self.0.tokenizer {
             Some(tokenizer) => Ok(crate::ruby::tokenizer::Tokenizer(tokenizer.clone())),
-            None => Err(magnus::Error::new(magnus::exception::runtime_error(), "No tokenizer loaded for this model"))
+            None => {
+                let ruby = Ruby::get().unwrap();
+                Err(magnus::Error::new(ruby.exception_runtime_error(), "No tokenizer loaded for this model"))
+            }
         }
     }
@@ -409,7 +424,8 @@ impl EmbeddingModel {
     /// Get all options as a hash
     pub fn options(&self) -> Result<RHash> {
-        let hash = RHash::new();
+        let ruby = Ruby::get().unwrap();
+        let hash = ruby.hash_new();
         // Add model_id
         if let Some(model_id) = &self.0.model_id {
@@ -439,7 +455,8 @@ impl EmbeddingModel {
 }
 pub fn init(rb_candle: RModule) -> Result<()> {
-    let rb_embedding_model = rb_candle.define_class("EmbeddingModel", class::object())?;
+    let ruby = Ruby::get().unwrap();
+    let rb_embedding_model = rb_candle.define_class("EmbeddingModel", ruby.class_object())?;
     rb_embedding_model.define_singleton_method("_create", function!(EmbeddingModel::new, 5))?;
     // Expose embedding with an optional pooling_method argument (default: "pooled")
     rb_embedding_model.define_method("_embedding", method!(EmbeddingModel::embedding, 2))?;

data/ext/candle/src/ruby/errors.rs CHANGED Viewed

@@ -1,14 +1,16 @@
 use magnus::Error;
 pub fn wrap_std_err(err: Box<dyn std::error::Error + Send + Sync>) -> Error {
-    Error::new(magnus::exception::runtime_error(), err.to_string())
+    let ruby = magnus::Ruby::get().unwrap();
+    Error::new(ruby.exception_runtime_error(), err.to_string())
 }
 pub fn wrap_candle_err(err: candle_core::Error) -> Error {
-    Error::new(magnus::exception::runtime_error(), err.to_string())
+    let ruby = magnus::Ruby::get().unwrap();
+    Error::new(ruby.exception_runtime_error(), err.to_string())
 }
 pub fn wrap_hf_err(err: hf_hub::api::sync::ApiError) -> Error {
-    Error::new(magnus::exception::runtime_error(), err.to_string())
+    let ruby = magnus::Ruby::get().unwrap();
+    Error::new(ruby.exception_runtime_error(), err.to_string())
 }