RubyGems - red-candle - Versions diffs - 1.3.0 → 1.3.1 - Mend

red-candle 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/ext/candle/src/llm/constrained_generation_test.rs +79 -0
data/ext/candle/src/llm/text_generation.rs +40 -50
data/ext/candle/src/ruby/structured.rs +54 -10
data/lib/candle/llm.rb +77 -3
data/lib/candle/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5b92d492e96b8192fba14141ab66ad42aa4afe0d942cc0658f8b64bab2bf916b
-  data.tar.gz: fe3510382fe48853b45061beb336108499655b566c9cb8bf1889b36f76dcda0a
+  metadata.gz: e41612741efd4ec156c530c9d68edc5c860c17067123da37c06adbe1792442ef
+  data.tar.gz: ff3c39bd45b9c2801278ba56feec00ec72634c6ac71318f6eac13ec7e18fa70e
 SHA512:
-  metadata.gz: eeddd779bc811f2c2707439d8b92644a2711091d9e42750ed4ebbbf17054a482f1b79147a562200ef5cd5cf6f7620cfd5b543ca32624371121ca64bae40f210b
-  data.tar.gz: cfdf7c9b76a8dda7bcfc9f215374251a606ba06d34c9310430e61390654ae873f3d5e61359767356e7c6554302e8f33a9385d3cbaee3d0da7c6cda771d2af970
+  metadata.gz: d90e48980e392174a2f2630feaaf3d7e735ef055e417fc8a5a9edeb3f3dda5ec3b7a99240145825da2bf6dd0a032c4afc24f67da2e3668a93950c069c1648824
+  data.tar.gz: 8feb63ed8818782bad6ba76591ae36f930b14421b75c2bfe69206ceba673074585f37c84c43d16738fc80d5e6110617a34bd4693b98dea80c2c6d6fdf6436e1c

data/ext/candle/src/llm/constrained_generation_test.rs CHANGED Viewed

@@ -313,4 +313,83 @@ mod constrained_generation_tests {
         // Verify tokens are being tracked
         assert_eq!(text_gen.get_tokens().len(), all_tokens.len(), "Internal tokens should match generated");
     }
+    #[test]
+    fn test_constraint_satisfied_not_triggered_by_large_allowed_set() {
+        // This test verifies the fix for the bug where is_constraint_satisfied_stop_on_match
+        // would incorrectly return true when many tokens are allowed (e.g., inside a JSON string).
+        // The old buggy code had: if allowed.len() > 1000 { return true; }
+        // This caused early termination when inside strings with many valid characters.
+        let config = GenerationConfig::default();
+        let mut text_gen = TextGeneration::new(&config);
+        text_gen.set_eos_token_id(50256);
+        // Without a constraint, should not be satisfied
+        assert!(!text_gen.is_constraint_satisfied(),
+            "Without constraint, should not be satisfied");
+        assert!(!text_gen.is_constraint_satisfied_stop_on_match(),
+            "Without constraint, stop_on_match should not be satisfied");
+    }
+    #[test]
+    fn test_constraint_satisfied_only_when_empty_or_eos_only() {
+        // Test that constraint satisfaction only triggers when:
+        // 1. No tokens are allowed (empty set)
+        // 2. Only EOS token is allowed
+        // NOT when many tokens are allowed (like inside a JSON string)
+        let config = GenerationConfig::default();
+        let mut text_gen = TextGeneration::new(&config);
+        text_gen.set_eos_token_id(100); // Set EOS token
+        // Without constraint, should not be satisfied
+        assert!(!text_gen.is_constraint_satisfied());
+        assert!(!text_gen.is_constraint_satisfied_stop_on_match());
+        // The key insight: constraint satisfaction should NOT be triggered
+        // just because there are many allowed tokens. It should only trigger
+        // when the constraint is definitively complete (empty allowed set or only EOS).
+    }
+    #[tokio::test]
+    async fn test_constraint_with_json_schema_not_early_termination() {
+        // Integration test: Create a real JSON schema constraint and verify
+        // that being inside a string (many allowed tokens) doesn't trigger completion.
+        if let Ok(tokenizer) = TokenizerLoader::from_hf_hub("bert-base-uncased", None).await {
+            let wrapper = TokenizerWrapper::new(tokenizer);
+            let vocabulary = VocabularyAdapter::from_tokenizer(&wrapper)
+                .expect("Should create vocabulary");
+            let processor = SchemaProcessor::new();
+            // Schema with a string field - when generating content inside the string,
+            // many characters are valid, but the constraint is NOT complete
+            let schema = r#"{
+                "type": "object",
+                "properties": {
+                    "name": { "type": "string" }
+                },
+                "required": ["name"]
+            }"#;
+            let index = processor.process_schema(schema, &vocabulary)
+                .expect("Should process schema");
+            let mut config = GenerationConfig::default();
+            config.constraint = Some(index);
+            config.max_length = 100;
+            let mut text_gen = TextGeneration::new(&config);
+            text_gen.set_eos_token_id(102); // BERT's [SEP]
+            // At the initial state, the constraint should NOT be satisfied
+            // (we haven't generated a complete JSON object yet)
+            assert!(!text_gen.is_constraint_satisfied(),
+                "Initial state should not be satisfied - JSON not yet generated");
+            assert!(!text_gen.is_constraint_satisfied_stop_on_match(),
+                "Initial state should not trigger stop_on_match");
+        }
+    }
 }

data/ext/candle/src/llm/text_generation.rs CHANGED Viewed

@@ -148,47 +148,28 @@ impl TextGeneration {
         if let (Some(ref constraint_index), Some(current_state)) = (&self.constraint, self.constraint_state) {
             // Get the next state
             let next_state = constraint_index.next_state(&current_state, &next_token);
             // Check if we're transitioning to a state with no allowed tokens (completion)
             if !self.constraint_completed && self.tokens.len() > self.tokens_since_constraint_start {
-                // Check if we've transitioned from a constrained state to an unconstrained state
-                // This happens when the pattern is complete and the FSM allows "anything"
-                let current_constrained = if let Some(allowed) = constraint_index.allowed_tokens(&current_state) {
-                    // Consider it constrained if we have a limited set of allowed tokens
-                    allowed.len() < 1000  // Arbitrary threshold for "constrained"
-                } else {
-                    true  // No tokens allowed is definitely constrained
-                };
-                let next_constrained = if let Some(next_state_val) = next_state {
-                    if let Some(allowed) = constraint_index.allowed_tokens(&next_state_val) {
-                        allowed.is_empty() || allowed.len() < 1000
-                    } else {
-                        true
-                    }
-                } else {
-                    true
-                };
-                // If we're transitioning from constrained to unconstrained, we've completed the pattern
-                if current_constrained && !next_constrained {
-                    self.constraint_completed = true;
-                }
-                // Also check if next state has no allowed tokens at all
+                // Check if next state has no allowed tokens at all - this is definitive completion
                 if let Some(next_state_val) = next_state {
                     if let Some(allowed) = constraint_index.allowed_tokens(&next_state_val) {
                         if allowed.is_empty() {
                             self.constraint_completed = true;
                         }
+                        // Only mark as complete if ONLY EOS is allowed (not just if EOS is one of many options)
+                        else if let Some(eos) = self.eos_token_id {
+                            if allowed.len() == 1 && allowed.contains(&eos) {
+                                self.constraint_completed = true;
+                            }
+                        }
                     } else {
                         // None means no tokens allowed - constraint is complete
                         self.constraint_completed = true;
                     }
                 }
             }
             self.constraint_state = next_state;
         }
@@ -201,22 +182,22 @@ impl TextGeneration {
         if self.constraint_completed {
             return true;
         }
         // Also check the current state
         if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
-            // Check if the constraint has reached a state where it could validly end
-            // This happens when:
-            // 1. We have no more allowed tokens (constraint fully satisfied)
-            // 2. The EOS token is in the allowed tokens (optional ending)
+            // Check if the constraint has reached a state where it MUST end
+            // This happens when there are no more allowed tokens (constraint fully satisfied)
             if let Some(allowed) = constraint_index.allowed_tokens(&state) {
                 // If no tokens are allowed, the constraint is fully satisfied
                 if allowed.is_empty() {
                     return true;
                 }
-                // If EOS token is allowed, we've reached an optional completion point
+                // For JSON schemas, check if ONLY the EOS token is allowed
+                // This means we've generated a complete, valid JSON structure
+                // Don't treat EOS as a satisfaction signal if other tokens are also allowed
                 if let Some(eos) = self.eos_token_id {
-                    if allowed.contains(&eos) {
+                    if allowed.len() == 1 && allowed.contains(&eos) {
                         return true;
                     }
                 }
@@ -229,28 +210,37 @@ impl TextGeneration {
     }
     /// Check if the constraint is satisfied when stop_on_match is true
+    /// NOTE: For JSON schemas, this should only return true when the JSON structure is complete,
+    /// not just because we're in a state with many allowed tokens (like inside a string).
     pub fn is_constraint_satisfied_stop_on_match(&self) -> bool {
         // When stop_on_match is true, we stop as soon as the constraint is completed
         if self.constraint_completed {
             return true;
         }
-        // Also check if we're currently in a state that could be a valid end
-        // This is important for patterns like phone numbers where after matching
-        // the pattern, the FSM might allow any token (including more numbers)
+        // For JSON and other structured outputs, don't use the "large allowed set" heuristic.
+        // Instead, only consider the constraint satisfied when:
+        // 1. There are no allowed tokens (definitive completion)
+        // 2. Only EOS is allowed (completion with optional termination)
         if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
-            // Check if we've generated at least one token since constraint start
-            if self.tokens.len() > self.tokens_since_constraint_start {
-                if let Some(allowed) = constraint_index.allowed_tokens(&state) {
-                    // If the allowed tokens set is very large (unconstrained),
-                    // it means the pattern has been satisfied
-                    if allowed.len() > 1000 {
+            if let Some(allowed) = constraint_index.allowed_tokens(&state) {
+                // No more tokens allowed - definitely complete
+                if allowed.is_empty() {
+                    return true;
+                }
+                // Only EOS is allowed - complete JSON structure
+                if let Some(eos) = self.eos_token_id {
+                    if allowed.len() == 1 && allowed.contains(&eos) {
                         return true;
                     }
                 }
+            } else {
+                // None means no tokens allowed - constraint is complete
+                return true;
             }
         }
         false
     }
@@ -259,13 +249,13 @@ impl TextGeneration {
         if self.tokens.len() >= max_length {
             return true;
         }
         if let Some(eos) = self.eos_token_id {
             if token == eos {
                 return true;
             }
         }
         // Check if we've reached a final state in constraint
         // A state is considered final if it has no allowed tokens
         if let (Some(ref constraint_index), Some(state)) = (&self.constraint, self.constraint_state) {
@@ -278,7 +268,7 @@ impl TextGeneration {
                 return true;
             }
         }
         false
     }

data/ext/candle/src/ruby/structured.rs CHANGED Viewed

@@ -1,7 +1,7 @@
 use magnus::{Error, Module, RModule, function, Object};
 use std::sync::Arc;
-use crate::structured::{SchemaProcessor, VocabularyAdapter, Index};
+use crate::structured::{SchemaProcessor, VocabularyAdapter, Index, Vocabulary};
 use crate::ruby::{Result, tokenizer::Tokenizer};
 /// Ruby wrapper for structured generation constraints
@@ -12,36 +12,80 @@ pub struct StructuredConstraint {
 }
 impl StructuredConstraint {
-    /// Create a constraint from a JSON schema
+    /// Create a constraint from a JSON schema using a model ID
+    /// This uses Vocabulary::from_pretrained which handles tokenizer byte encoding correctly
+    pub fn from_schema_with_model(schema: String, model_id: String) -> Result<Self> {
+        // Use tokio runtime for async vocabulary loading
+        let rt = tokio::runtime::Runtime::new()
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create runtime: {}", e)))?;
+        let vocabulary = rt.block_on(async {
+            Vocabulary::from_pretrained(&model_id, None)
+        })
+        .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create vocabulary from model '{}': {:?}", model_id, e)))?;
+        let processor = SchemaProcessor::new();
+        let index = processor.process_schema(&schema, &vocabulary)
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to process schema: {}", e)))?;
+        Ok(Self { index })
+    }
+    /// Create a constraint from a regex pattern using a model ID
+    pub fn from_regex_with_model(pattern: String, model_id: String) -> Result<Self> {
+        // Use tokio runtime for async vocabulary loading
+        let rt = tokio::runtime::Runtime::new()
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create runtime: {}", e)))?;
+        let vocabulary = rt.block_on(async {
+            Vocabulary::from_pretrained(&model_id, None)
+        })
+        .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create vocabulary from model '{}': {:?}", model_id, e)))?;
+        let processor = SchemaProcessor::new();
+        let index = processor.process_regex(&pattern, &vocabulary)
+            .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to process regex: {}", e)))?;
+        Ok(Self { index })
+    }
+    /// Create a constraint from a JSON schema (legacy method using tokenizer directly)
+    /// Note: This may not handle all tokenizer byte encodings correctly
     pub fn from_schema(schema: String, tokenizer: &Tokenizer) -> Result<Self> {
         let vocabulary = VocabularyAdapter::from_tokenizer(&tokenizer.0)
             .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create vocabulary: {}", e)))?;
         let processor = SchemaProcessor::new();
         let index = processor.process_schema(&schema, &vocabulary)
             .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to process schema: {}", e)))?;
         Ok(Self { index })
     }
-    /// Create a constraint from a regex pattern
+    /// Create a constraint from a regex pattern (legacy method using tokenizer directly)
+    /// Note: This may not handle all tokenizer byte encodings correctly
     pub fn from_regex(pattern: String, tokenizer: &Tokenizer) -> Result<Self> {
         let vocabulary = VocabularyAdapter::from_tokenizer(&tokenizer.0)
             .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to create vocabulary: {}", e)))?;
         let processor = SchemaProcessor::new();
         let index = processor.process_regex(&pattern, &vocabulary)
             .map_err(|e| Error::new(magnus::exception::runtime_error(), format!("Failed to process regex: {}", e)))?;
         Ok(Self { index })
     }
 }
 pub fn init_structured(rb_candle: RModule) -> Result<()> {
     let class = rb_candle.define_class("StructuredConstraint", magnus::class::object())?;
+    // New methods using model_id for proper vocabulary loading
+    class.define_singleton_method("from_schema_with_model", function!(StructuredConstraint::from_schema_with_model, 2))?;
+    class.define_singleton_method("from_regex_with_model", function!(StructuredConstraint::from_regex_with_model, 2))?;
+    // Legacy methods using tokenizer directly (may have byte encoding issues with some models)
     class.define_singleton_method("from_schema", function!(StructuredConstraint::from_schema, 2))?;
     class.define_singleton_method("from_regex", function!(StructuredConstraint::from_regex, 2))?;
     Ok(())
 }

data/lib/candle/llm.rb CHANGED Viewed

@@ -32,16 +32,90 @@ module Candle
       end
     end
     # Create a structured constraint from a JSON schema
+    # Uses the model's vocabulary with proper byte encoding handling
     def constraint_from_schema(schema)
       schema_str = schema.is_a?(String) ? schema : JSON.generate(schema)
-      StructuredConstraint.from_schema(schema_str, tokenizer)
+      # Extract the tokenizer source model ID for proper vocabulary loading
+      tokenizer_model = tokenizer_source_model
+      if tokenizer_model
+        begin
+          StructuredConstraint.from_schema_with_model(schema_str, tokenizer_model)
+        rescue RuntimeError => e
+          # Fall back to legacy method if from_pretrained fails
+          # (e.g., tokenizer doesn't have EOS token in expected format)
+          if e.message.include?("UnsupportedTokenizer")
+            StructuredConstraint.from_schema(schema_str, tokenizer)
+          else
+            raise
+          end
+        end
+      else
+        # Fall back to legacy method if we can't determine the model
+        StructuredConstraint.from_schema(schema_str, tokenizer)
+      end
     end
     # Create a structured constraint from a regex pattern
+    # Uses the model's vocabulary with proper byte encoding handling
     def constraint_from_regex(pattern)
       pattern_str = pattern.is_a?(Regexp) ? pattern.source : pattern.to_s
-      StructuredConstraint.from_regex(pattern_str, tokenizer)
+      # Extract the tokenizer source model ID for proper vocabulary loading
+      tokenizer_model = tokenizer_source_model
+      if tokenizer_model
+        begin
+          StructuredConstraint.from_regex_with_model(pattern_str, tokenizer_model)
+        rescue RuntimeError => e
+          # Fall back to legacy method if from_pretrained fails
+          if e.message.include?("UnsupportedTokenizer")
+            StructuredConstraint.from_regex(pattern_str, tokenizer)
+          else
+            raise
+          end
+        end
+      else
+        # Fall back to legacy method if we can't determine the model
+        StructuredConstraint.from_regex(pattern_str, tokenizer)
+      end
     end
+    private
+    # Get the model ID to use for vocabulary loading
+    # This handles GGUF models by extracting the tokenizer source
+    def tokenizer_source_model
+      opts = options rescue {}
+      # For GGUF models, use the tokenizer source if available
+      if opts["tokenizer_source"]
+        return opts["tokenizer_source"]
+      end
+      # For regular models, use the base model ID
+      if opts["base_model"]
+        return opts["base_model"]
+      end
+      # Try model_id but strip GGUF parts
+      model = opts["model_id"] || (model_id rescue nil)
+      return nil unless model
+      # Remove GGUF file suffix if present
+      if model.include?("@")
+        model = model.split("@").first
+      end
+      # For GGUF repos, try to guess the tokenizer source
+      if model.downcase.include?("gguf")
+        guessed = self.class.guess_tokenizer(model)
+        return guessed if guessed && guessed != model
+      end
+      model
+    end
+    public
     # Generate with regex constraint
     def generate_regex(prompt, pattern:, stop_on_match: true, **options)

data/lib/candle/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # :nocov:
 module Candle
-  VERSION = "1.3.0"
+  VERSION = "1.3.1"
 end
 # :nocov:

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: red-candle
 version: !ruby/object:Gem::Version
-  version: 1.3.0
+  version: 1.3.1
 platform: ruby
 authors:
 - Christopher Petersen
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-09-13 00:00:00.000000000 Z
+date: 2025-12-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rb_sys