RubyGems - ruby_llm-red_candle - Versions diffs - 0.1.0 → 0.2.0 - Mend

ruby_llm-red_candle 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/lib/ruby_llm/red_candle/capabilities.rb +1 -1
data/lib/ruby_llm/red_candle/chat.rb +64 -18
data/lib/ruby_llm/red_candle/models.rb +160 -18
data/lib/ruby_llm/red_candle/tools.rb +59 -0
data/lib/ruby_llm/red_candle/version.rb +1 -1
data/lib/ruby_llm-red_candle.rb +1 -0
data/ruby_llm-red_candle.gemspec +48 -0
metadata +9 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c6a9af49d55182783c1cebd1e02748f3a9e0716a8e35bb87e250c38075731784
-  data.tar.gz: 32242fd560276ce4889ea640ffebc11c81f0e4f1afef83e7edfe5204cdb813ce
+  metadata.gz: 3d28e7533552659e4618c7774c7758741d49433dfd320b0a483a0e93dfd20889
+  data.tar.gz: 7da03bf5b36d20608917f261b2ec284a6e4dd50bba9a95ebbbfa618f149a2948
 SHA512:
-  metadata.gz: 6c28f204a8faedfda2c578ec9a05227a36c69047e7bd31555e9603fae6ad5c86b9b4680ba2119af70e49d80f6705d9d1c5bd7b9290e6acda7042beb9102bf714
-  data.tar.gz: 40411b9e81a77b97a0dcb1a5dd01e9b5ef5d7b6d0a3cf3c211a72b8879f1985a9b129cdf384bc72077d48c0b9bef66ed6a83730ee3803adeec72b21dbe33db83
+  metadata.gz: c05f230c8f8cc3d42b0ce4194f1d67d3964424a421b29745d3d8ebfeba1499b0e4a7452d8349a6940ba62139b745181e0b63f99a2a43cd68fce2ff4241bdbaf8
+  data.tar.gz: 0ed7589883dcd93fe699fc2f702563dbac6905a72309c13d7fb98042ec4189e3dac1827e6e016411856378544e6c4cbe5f8f0b14087541732e86c7da0c01a2c6

data/lib/ruby_llm/red_candle/capabilities.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
       end
       def supports_functions?(_model_id = nil)
-        false
+        true
       end
       def supports_streaming?

data/lib/ruby_llm/red_candle/chat.rb CHANGED Viewed

@@ -5,8 +5,10 @@ module RubyLLM
     # Chat implementation for Red Candle provider
     module Chat
       # Override the base complete method to handle local execution
-      def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &block)
+      def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, tool_prefs: nil, thinking: nil, &block)
         _ = headers # Interface compatibility
+        _ = tool_prefs # Interface compatibility (not yet used by local models)
+        _ = thinking # Interface compatibility (not yet used by local models)
         payload = RubyLLM::Utils.deep_merge(
           render_payload(
             messages,
@@ -23,11 +25,13 @@ module RubyLLM
           perform_streaming_completion!(payload, &block)
         else
           result = perform_completion!(payload)
-          # Convert to Message object for compatibility
-          # Red Candle doesn't provide token counts by default, but we can estimate them
+          # perform_tool_completion! returns a Message directly
+          return result if result.is_a?(RubyLLM::Message)
+          # Convert hash result to Message object
           content = result[:content]
-          # Rough estimation: ~4 characters per token
-          estimated_output_tokens = (content.length / 4.0).round
+          estimated_output_tokens = (content.to_s.length / 4.0).round
           estimated_input_tokens = estimate_input_tokens(payload[:messages])
           RubyLLM::Message.new(
@@ -40,25 +44,31 @@ module RubyLLM
         end
       end
-      def render_payload(messages, tools:, temperature:, model:, stream:, schema:)
-        # Red Candle doesn't support tools
-        if tools && !tools.empty?
-          raise RubyLLM::Error.new(nil, "Red Candle provider does not support tool calling")
-        end
-        {
+      def render_payload(messages, tools:, temperature:, model:, stream:, schema:, tool_prefs: nil, thinking: nil)
+        payload = {
           messages: messages,
           temperature: temperature,
           model: model.id,
           stream: stream,
           schema: schema
         }
+        if tools && !tools.empty?
+          payload[:tools] = tools
+        end
+        payload
       end
       def perform_completion!(payload)
         model = ensure_model_loaded!(payload[:model])
         messages = format_messages(payload[:messages])
+        # Handle tool calling
+        if payload[:tools] && !payload[:tools].empty?
+          return perform_tool_completion!(model, messages, payload)
+        end
         # Handle structured generation differently - we need to build the prompt
         # with JSON instructions BEFORE applying the chat template
         response = if payload[:schema]
@@ -110,6 +120,34 @@ module RubyLLM
       private
+      def perform_tool_completion!(model, messages, payload)
+        # Convert RubyLLM tools to Candle tools
+        candle_tools = payload[:tools].values.map { |t| Tools.candle_tool_for(t) }
+        # Build generation config with enough room for thinking + tool calls
+        # Tool calling needs more tokens than regular chat (model uses <think> blocks)
+        payload[:max_tokens] ||= 1000
+        config = build_generation_config(payload)
+        # Use red-candle's chat_with_tools (execute: false — RubyLLM manages execution)
+        result = model.chat_with_tools(messages, tools: candle_tools, config: config)
+        content = result.text_response || ""
+        tool_calls = Tools.parse_tool_calls(result.tool_calls)
+        estimated_output_tokens = ((result.raw_response || "").length / 4.0).round
+        estimated_input_tokens = estimate_input_tokens(payload[:messages])
+        RubyLLM::Message.new(
+          role: :assistant,
+          content: content.empty? ? nil : content,
+          tool_calls: tool_calls,
+          model_id: payload[:model],
+          input_tokens: estimated_input_tokens,
+          output_tokens: estimated_output_tokens
+        )
+      end
       # Build the prompt string from messages using the model's chat template
       def build_prompt(model, messages)
         if model.respond_to?(:apply_chat_template)
@@ -229,12 +267,17 @@ module RubyLLM
       def format_messages(messages)
         messages.map do |msg|
-          # Handle both hash and Message objects
           if msg.is_a?(RubyLLM::Message)
-            {
-              role: msg.role.to_s,
-              content: extract_message_content_from_object(msg)
-            }
+            if msg.tool_call?
+              Tools.format_tool_call(msg)
+            elsif msg.tool_result?
+              Tools.format_tool_result(msg)
+            else
+              {
+                role: msg.role.to_s,
+                content: extract_message_content_from_object(msg)
+              }
+            end
           else
             {
               role: msg[:role].to_s,
@@ -292,8 +335,11 @@ module RubyLLM
         # Use Red Candle's native structured generation which uses the Rust outlines crate
         # for grammar-constrained generation. This ensures valid JSON output.
+        # Unwrap RubyLLM's schema wrapper format: {name: "response", schema: {...}, strict: true}
+        actual_schema = schema.is_a?(Hash) && (schema[:schema] || schema["schema"]) ? (schema[:schema] || schema["schema"]) : schema
         # Normalize schema to ensure consistent symbol keys
-        normalized_schema = deep_symbolize_keys(schema)
+        normalized_schema = deep_symbolize_keys(actual_schema)
         # Validate schema before attempting generation
         SchemaValidator.validate!(normalized_schema)

data/lib/ruby_llm/red_candle/models.rb CHANGED Viewed

@@ -6,14 +6,33 @@ module RubyLLM
     module Models
       # TODO: red-candle supports more models, but let's start with some well tested ones.
       SUPPORTED_MODELS = [
+        # Mistral
         {
-          id: "google/gemma-3-4b-it-qat-q4_0-gguf",
-          name: "Gemma 3 4B Instruct (Quantized)",
-          gguf_file: "gemma-3-4b-it-q4_0.gguf",
-          tokenizer: "google/gemma-3-4b-it", # Tokenizer from base model
-          context_window: 8192,
-          family: "gemma",
-          architecture: "gemma2",
+          id: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+          name: "Mistral 7B Instruct v0.2 (Quantized)",
+          gguf_file: "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+          tokenizer: "mistralai/Mistral-7B-Instruct-v0.2",
+          context_window: 32_768,
+          family: "mistral",
+          supports_chat: true,
+          supports_structured: true
+        },
+        {
+          id: "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
+          name: "Mistral 7B Instruct v0.3 (Quantized)",
+          gguf_file: "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf",
+          tokenizer: "mistralai/Mistral-7B-Instruct-v0.3",
+          context_window: 32_768,
+          family: "mistral",
+          supports_chat: true,
+          supports_structured: true
+        },
+        # Llama / TinyLlama
+        {
+          id: "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+          name: "TinyLlama 1.1B Chat",
+          context_window: 2048,
+          family: "llama",
           supports_chat: true,
           supports_structured: true
         },
@@ -23,28 +42,95 @@ module RubyLLM
           gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
           context_window: 2048,
           family: "llama",
-          architecture: "llama",
           supports_chat: true,
           supports_structured: true
         },
+        # Gemma
         {
-          id: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
-          name: "Mistral 7B Instruct v0.2 (Quantized)",
-          gguf_file: "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
-          tokenizer: "mistralai/Mistral-7B-Instruct-v0.2",
-          context_window: 32_768,
-          family: "mistral",
-          architecture: "mistral",
+          id: "google/gemma-3-4b-it-qat-q4_0-gguf",
+          name: "Gemma 3 4B Instruct (Quantized)",
+          gguf_file: "gemma-3-4b-it-q4_0.gguf",
+          tokenizer: "google/gemma-3-4b-it",
+          context_window: 8192,
+          family: "gemma",
           supports_chat: true,
           supports_structured: true
         },
+        # Qwen 2.5
         {
           id: "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
           name: "Qwen 2.5 1.5B Instruct (Quantized)",
           gguf_file: "qwen2.5-1.5b-instruct-q4_k_m.gguf",
           context_window: 32_768,
           family: "qwen2",
-          architecture: "qwen2",
+          supports_chat: true,
+          supports_structured: true
+        },
+        # Qwen 3
+        {
+          id: "Qwen/Qwen3-0.6B",
+          name: "Qwen3 0.6B",
+          context_window: 40_960,
+          family: "qwen3",
+          supports_chat: true,
+          supports_structured: true,
+          supports_tools: true
+        },
+        {
+          id: "MaziyarPanahi/Qwen3-0.6B-GGUF",
+          name: "Qwen3 0.6B (Quantized)",
+          gguf_file: "Qwen3-0.6B.Q4_K_M.gguf",
+          tokenizer: "Qwen/Qwen3-0.6B",
+          context_window: 40_960,
+          family: "qwen3",
+          supports_chat: true,
+          supports_structured: true,
+          supports_tools: true
+        },
+        {
+          id: "MaziyarPanahi/Qwen3-4B-GGUF",
+          name: "Qwen3 4B (Quantized)",
+          gguf_file: "Qwen3-4B.Q4_K_M.gguf",
+          tokenizer: "Qwen/Qwen3-4B",
+          context_window: 40_960,
+          family: "qwen3",
+          supports_chat: true,
+          supports_structured: true,
+          supports_tools: true
+        },
+        # SmolLM2
+        {
+          id: "HuggingFaceTB/SmolLM2-360M-Instruct",
+          name: "SmolLM2 360M Instruct",
+          context_window: 8192,
+          family: "llama",
+          supports_chat: true,
+          supports_structured: true
+        },
+        {
+          id: "HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
+          name: "SmolLM2 360M Instruct (Quantized)",
+          gguf_file: "smollm2-360m-instruct-q8_0.gguf",
+          context_window: 8192,
+          family: "llama",
+          supports_chat: true,
+          supports_structured: true
+        },
+        # Phi
+        {
+          id: "microsoft/phi-2",
+          name: "Phi 2",
+          context_window: 2048,
+          family: "phi",
+          supports_chat: true,
+          supports_structured: true
+        },
+        {
+          id: "TheBloke/phi-2-GGUF",
+          name: "Phi 2 (Quantized)",
+          gguf_file: "phi-2.Q4_K_M.gguf",
+          context_window: 2048,
+          family: "phi",
           supports_chat: true,
           supports_structured: true
         },
@@ -53,10 +139,66 @@ module RubyLLM
           name: "Phi 3 Mini 4K Instruct",
           context_window: 4096,
           family: "phi",
-          architecture: "phi",
           supports_chat: true,
           supports_structured: true
-        }
+        },
+        {
+          id: "microsoft/Phi-3-mini-4k-instruct-gguf",
+          name: "Phi 3 Mini 4K Instruct (Quantized)",
+          gguf_file: "Phi-3-mini-4k-instruct-q4.gguf",
+          context_window: 4096,
+          family: "phi",
+          supports_chat: true,
+          supports_structured: true
+        },
+        {
+          id: "microsoft/phi-4-gguf",
+          name: "Phi 4 (Quantized)",
+          gguf_file: "phi-4-Q4_K_S.gguf",
+          context_window: 16_384,
+          family: "phi",
+          supports_chat: true,
+          supports_structured: true
+        },
+        # Yi
+        {
+          id: "bartowski/Yi-1.5-6B-Chat-GGUF",
+          name: "Yi 1.5 6B Chat (Quantized)",
+          gguf_file: "Yi-1.5-6B-Chat-Q4_K_M.gguf",
+          tokenizer: "01-ai/Yi-1.5-6B-Chat",
+          context_window: 4096,
+          family: "llama",
+          supports_chat: true,
+          supports_structured: true
+        },
+        # Granite
+        {
+          id: "ibm-granite/granite-7b-instruct",
+          name: "Granite 7B Instruct",
+          context_window: 4096,
+          family: "granite",
+          supports_chat: true,
+          supports_structured: true
+        },
+        {
+          id: "ibm-granite/granite-4.0-micro",
+          name: "Granite 4.0 Micro",
+          context_window: 8192,
+          family: "granite",
+          supports_chat: true,
+          supports_structured: true
+        },
+        # GLM-4
+        {
+          id: "bartowski/THUDM_GLM-4-9B-0414-GGUF",
+          name: "GLM-4 9B (Quantized)",
+          gguf_file: "THUDM_GLM-4-9B-0414-Q4_K_M.gguf",
+          tokenizer: "THUDM/GLM-4-9B-0414",
+          context_window: 131_072,
+          family: "glm4",
+          supports_chat: true,
+          supports_structured: true
+        },
       ].freeze
       def list_models

data/lib/ruby_llm/red_candle/tools.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+require "securerandom"
+module RubyLLM
+  module RedCandle
+    # Tool calling support for Red Candle provider.
+    # Bridges between RubyLLM::Tool and Candle::Tool formats.
+    module Tools
+      module_function
+      # Convert a RubyLLM::Tool to a Candle::Tool (without a callable block —
+      # RubyLLM manages tool execution itself)
+      def candle_tool_for(tool)
+        parameters = tool.params_schema ||
+                     RubyLLM::Tool::SchemaDefinition.from_parameters(tool.parameters)&.json_schema ||
+                     { type: "object", properties: {}, required: [] }
+        ::Candle::Tool.new(
+          name: tool.name,
+          description: tool.description || "",
+          parameters: parameters
+        ) { |_args| nil } # No-op block — RubyLLM handles execution
+      end
+      # Convert Candle::ToolCall objects to RubyLLM tool_calls hash format
+      # RubyLLM expects: { "call_id" => RubyLLM::ToolCall, ... }
+      def parse_tool_calls(candle_tool_calls)
+        return nil if candle_tool_calls.nil? || candle_tool_calls.empty?
+        tool_calls = {}
+        candle_tool_calls.each do |tc|
+          call_id = "call_#{SecureRandom.hex(12)}"
+          tool_calls[call_id] = RubyLLM::ToolCall.new(
+            id: call_id,
+            name: tc.name,
+            arguments: tc.arguments
+          )
+        end
+        tool_calls
+      end
+      # Format a tool call message (assistant message with tool_calls) for
+      # sending back to the model. Injects tool calls into the content.
+      def format_tool_call(msg)
+        content = msg.content.to_s
+        msg.tool_calls&.each_value do |tc|
+          content += "\n<tool_call>\n#{JSON.generate({ name: tc.name, arguments: tc.arguments })}\n</tool_call>"
+        end
+        { role: "assistant", content: content }
+      end
+      # Format a tool result message for sending back to the model
+      def format_tool_result(msg)
+        { role: "tool", content: msg.content.to_s }
+      end
+    end
+  end
+end

data/lib/ruby_llm/red_candle/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module RubyLLM
   module RedCandle
-    VERSION = "0.1.0"
+    VERSION = "0.2.0"
   end
 end

data/lib/ruby_llm-red_candle.rb CHANGED Viewed

@@ -8,6 +8,7 @@ require_relative "ruby_llm/red_candle/schema_validator"
 require_relative "ruby_llm/red_candle/capabilities"
 require_relative "ruby_llm/red_candle/models"
 require_relative "ruby_llm/red_candle/streaming"
+require_relative "ruby_llm/red_candle/tools"
 require_relative "ruby_llm/red_candle/chat"
 require_relative "ruby_llm/red_candle/provider"

data/ruby_llm-red_candle.gemspec ADDED Viewed

@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+require_relative "lib/ruby_llm/red_candle/version"
+Gem::Specification.new do |spec|
+  spec.name = "ruby_llm-red_candle"
+  spec.version = RubyLLM::RedCandle::VERSION
+  spec.authors = ["Chris Petersen"]
+  spec.email = ["chris@scientist.com"]
+  spec.summary = "Red Candle provider for RubyLLM - local LLM execution using quantized GGUF models"
+  spec.description = <<~DESC
+    A RubyLLM plugin that enables local LLM execution using the Red Candle gem.
+    Run quantized GGUF models directly in Ruby without external API calls.
+    Supports streaming, structured output, and multiple model architectures
+    including Gemma, Llama, Qwen, Mistral, and Phi.
+  DESC
+  spec.homepage = "https://github.com/scientist-labs/ruby_llm-red_candle"
+  spec.license = "MIT"
+  spec.required_ruby_version = ">= 3.1.0"
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = spec.homepage
+  spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
+  spec.metadata["rubygems_mfa_required"] = "true"
+  # Specify which files should be added to the gem when it is released.
+  spec.files = Dir.chdir(__dir__) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (File.expand_path(f) == __FILE__) ||
+        f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
+    end
+  end
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  # Runtime dependencies
+  spec.add_dependency "ruby_llm", ">= 1.10", "< 3.0"
+  spec.add_dependency "red-candle", "~> 1.5"
+  # Development dependencies
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "rspec", "~> 3.12"
+  spec.add_development_dependency "rubocop", "~> 1.0"
+  spec.add_development_dependency "rubocop-rspec", "~> 3.0"
+  spec.add_development_dependency "simplecov", "~> 0.22"
+end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm-red_candle
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Chris Petersen
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-12-11 00:00:00.000000000 Z
+date: 2026-03-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby_llm
@@ -16,7 +16,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '1.2'
+        version: '1.10'
     - - "<"
       - !ruby/object:Gem::Version
         version: '3.0'
@@ -26,7 +26,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: '1.2'
+        version: '1.10'
     - - "<"
       - !ruby/object:Gem::Version
         version: '3.0'
@@ -36,14 +36,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.3'
+        version: '1.5'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.3'
+        version: '1.5'
 - !ruby/object:Gem::Dependency
   name: rake
   requirement: !ruby/object:Gem::Requirement
@@ -140,7 +140,9 @@ files:
 - lib/ruby_llm/red_candle/provider.rb
 - lib/ruby_llm/red_candle/schema_validator.rb
 - lib/ruby_llm/red_candle/streaming.rb
+- lib/ruby_llm/red_candle/tools.rb
 - lib/ruby_llm/red_candle/version.rb
+- ruby_llm-red_candle.gemspec
 homepage: https://github.com/scientist-labs/ruby_llm-red_candle
 licenses:
 - MIT
@@ -164,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.3
+rubygems_version: 3.5.22
 signing_key:
 specification_version: 4
 summary: Red Candle provider for RubyLLM - local LLM execution using quantized GGUF