RubyGems - ruby_llm_community - Versions diffs - 1.1.1 → 1.3.0 - Mend

ruby_llm_community 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

data/lib/ruby_llm/providers/red_candle/capabilities.rb ADDED Viewed

@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class RedCandle
+      # Determines capabilities and pricing for RedCandle models
+      module Capabilities
+        module_function
+        def supports_vision?
+          false
+        end
+        def supports_functions?(_model_id = nil)
+          false
+        end
+        def supports_streaming?
+          true
+        end
+        def supports_structured_output?
+          true
+        end
+        def supports_regex_constraints?
+          true
+        end
+        def supports_embeddings?
+          false # Future enhancement - Red Candle does support embedding models
+        end
+        def supports_audio?
+          false
+        end
+        def supports_pdf?
+          false
+        end
+        def normalize_temperature(temperature, _model_id)
+          # Red Candle uses standard 0-2 range
+          return 0.7 if temperature.nil?
+          temperature = temperature.to_f
+          temperature.clamp(0.0, 2.0)
+        end
+        def model_context_window(model_id)
+          case model_id
+          when /gemma-3-4b/i
+            8192
+          when /qwen2\.5-1\.5b/i, /mistral-7b/i
+            32_768
+          when /tinyllama/i
+            2048
+          else
+            4096 # Conservative default
+          end
+        end
+        def pricing
+          # Local execution - no API costs
+          {
+            input_tokens_per_dollar: Float::INFINITY,
+            output_tokens_per_dollar: Float::INFINITY,
+            input_price_per_million_tokens: 0.0,
+            output_price_per_million_tokens: 0.0
+          }
+        end
+        def default_max_tokens
+          512
+        end
+        def max_temperature
+          2.0
+        end
+        def min_temperature
+          0.0
+        end
+        def supports_temperature?
+          true
+        end
+        def supports_top_p?
+          true
+        end
+        def supports_top_k?
+          true
+        end
+        def supports_repetition_penalty?
+          true
+        end
+        def supports_seed?
+          true
+        end
+        def supports_stop_sequences?
+          true
+        end
+        def model_families
+          %w[gemma llama qwen2 mistral phi]
+        end
+        def available_on_platform?
+          # Check if Candle can be loaded
+          require 'candle'
+          true
+        rescue LoadError
+          false
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/red_candle/chat.rb ADDED Viewed

@@ -0,0 +1,317 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class RedCandle
+      # Chat implementation for Red Candle provider
+      module Chat
+        # Override the base complete method to handle local execution
+        def complete(messages, tools:, temperature:, cache_prompts:, model:, params: {}, headers: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
+          _ = headers # Interface compatibility
+          _ = cache_prompts # Interface compatibility
+          payload = Utils.deep_merge(
+            render_payload(
+              messages,
+              tools: tools,
+              temperature: temperature,
+              model: model,
+              stream: block_given?,
+              schema: schema
+            ),
+            params
+          )
+          if block_given?
+            perform_streaming_completion!(payload, &)
+          else
+            result = perform_completion!(payload)
+            # Convert to Message object for compatibility
+            # Red Candle doesn't provide token counts by default, but we can estimate them
+            content = result[:content]
+            # Rough estimation: ~4 characters per token
+            estimated_output_tokens = (content.length / 4.0).round
+            estimated_input_tokens = estimate_input_tokens(payload[:messages])
+            Message.new(
+              role: result[:role].to_sym,
+              content: content,
+              model_id: model.id,
+              input_tokens: estimated_input_tokens,
+              output_tokens: estimated_output_tokens
+            )
+          end
+        end
+        def render_payload(messages, tools:, temperature:, model:, stream:, schema:) # rubocop:disable Metrics/ParameterLists
+          # Red Candle doesn't support tools
+          raise Error.new(nil, 'Red Candle provider does not support tool calling') if tools && !tools.empty?
+          {
+            messages: messages,
+            temperature: temperature,
+            model: model.id,
+            stream: stream,
+            schema: schema
+          }
+        end
+        def perform_completion!(payload)
+          model = ensure_model_loaded!(payload[:model])
+          messages = format_messages(payload[:messages])
+          # Apply chat template if available
+          prompt = if model.respond_to?(:apply_chat_template)
+                     model.apply_chat_template(messages)
+                   else
+                     # Fallback to simple formatting
+                     "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
+                   end
+          # Check context length
+          validate_context_length!(prompt, payload[:model])
+          # Configure generation
+          config_opts = {
+            temperature: payload[:temperature] || 0.7,
+            max_length: payload[:max_tokens] || 512
+          }
+          # Handle structured generation if schema provided
+          response = if payload[:schema]
+                       generate_with_schema(model, prompt, payload[:schema], config_opts)
+                     else
+                       model.generate(
+                         prompt,
+                         config: ::Candle::GenerationConfig.balanced(**config_opts)
+                       )
+                     end
+          format_response(response, payload[:schema])
+        end
+        def perform_streaming_completion!(payload, &block)
+          model = ensure_model_loaded!(payload[:model])
+          messages = format_messages(payload[:messages])
+          # Apply chat template if available
+          prompt = if model.respond_to?(:apply_chat_template)
+                     model.apply_chat_template(messages)
+                   else
+                     "#{messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")}\n\nassistant:"
+                   end
+          # Check context length
+          validate_context_length!(prompt, payload[:model])
+          # Configure generation
+          config = ::Candle::GenerationConfig.balanced(
+            temperature: payload[:temperature] || 0.7,
+            max_length: payload[:max_tokens] || 512
+          )
+          # Collect all streamed content
+          full_content = ''
+          # Stream tokens
+          model.generate_stream(prompt, config: config) do |token|
+            full_content += token
+            chunk = format_stream_chunk(token)
+            block.call(chunk)
+          end
+          # Send final chunk with empty content (indicates completion)
+          final_chunk = format_stream_chunk('')
+          block.call(final_chunk)
+          # Return a Message object with the complete response
+          estimated_output_tokens = (full_content.length / 4.0).round
+          estimated_input_tokens = estimate_input_tokens(payload[:messages])
+          Message.new(
+            role: :assistant,
+            content: full_content,
+            model_id: payload[:model],
+            input_tokens: estimated_input_tokens,
+            output_tokens: estimated_output_tokens
+          )
+        end
+        private
+        def ensure_model_loaded!(model_id)
+          @loaded_models[model_id] ||= load_model(model_id)
+        end
+        def model_options(model_id)
+          # Get GGUF file and tokenizer if this is a GGUF model
+          # Access the methods from the Models module which is included in the provider
+          options = { device: @device }
+          options[:gguf_file] = gguf_file_for(model_id) if respond_to?(:gguf_file_for)
+          options[:tokenizer] = tokenizer_for(model_id) if respond_to?(:tokenizer_for)
+          options
+        end
+        def load_model(model_id)
+          options = model_options(model_id)
+          ::Candle::LLM.from_pretrained(model_id, **options)
+        rescue StandardError => e
+          if e.message.include?('Failed to find tokenizer')
+            raise Error.new(nil, token_error_message(e, options[:tokenizer]))
+          elsif e.message.include?('Failed to find model')
+            raise Error.new(nil, model_error_message(e, model_id))
+          else
+            raise Error.new(nil, "Failed to load model #{model_id}: #{e.message}")
+          end
+        end
+        def token_error_message(exception, tokenizer)
+          <<~ERROR_MESSAGE
+            Failed to load tokenizer '#{tokenizer}'. The tokenizer may not exist or require authentication.
+            Please verify the tokenizer exists at: https://huggingface.co/#{tokenizer}
+            And that you have accepted the terms of service for the tokenizer.
+            If it requires authentication, login with: huggingface-cli login
+            See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
+            Original error: #{exception.message}"
+          ERROR_MESSAGE
+        end
+        def model_error_message(exception, model_id)
+          <<~ERROR_MESSAGE
+            Failed to load model #{model_id}: #{exception.message}
+            Please verify the model exists at: https://huggingface.co/#{model_id}
+            And that you have accepted the terms of service for the model.
+            If it requires authentication, login with: huggingface-cli login
+            See https://github.com/scientist-labs/red-candle?tab=readme-ov-file#%EF%B8%8F-huggingface-login-warning
+            Original error: #{exception.message}"
+          ERROR_MESSAGE
+        end
+        def format_messages(messages)
+          messages.map do |msg|
+            # Handle both hash and Message objects
+            if msg.is_a?(Message)
+              {
+                role: msg.role.to_s,
+                content: extract_message_content_from_object(msg)
+              }
+            else
+              {
+                role: msg[:role].to_s,
+                content: extract_message_content(msg)
+              }
+            end
+          end
+        end
+        def extract_message_content_from_object(message)
+          content = message.content
+          # Handle Content objects
+          if content.is_a?(Content)
+            # Extract text from Content object, including attachment text
+            handle_content_object(content)
+          elsif content.is_a?(String)
+            content
+          else
+            content.to_s
+          end
+        end
+        def extract_message_content(message)
+          content = message[:content]
+          # Handle Content objects
+          case content
+          when Content
+            # Extract text from Content object
+            handle_content_object(content)
+          when String
+            content
+          when Array
+            # Handle array content (e.g., with images)
+            content.filter_map { |part| part[:text] if part[:type] == 'text' }.join(' ')
+          else
+            content.to_s
+          end
+        end
+        def handle_content_object(content)
+          text_parts = []
+          text_parts << content.text if content.text
+          # Add any text from attachments
+          content.attachments&.each do |attachment|
+            text_parts << attachment.data if attachment.respond_to?(:data) && attachment.data.is_a?(String)
+          end
+          text_parts.join(' ')
+        end
+        def generate_with_schema(model, prompt, schema, config_opts)
+          model.generate_structured(
+            prompt,
+            schema: schema,
+            **config_opts
+          )
+        rescue StandardError => e
+          RubyLLM.logger.warn "Structured generation failed: #{e.message}. Falling back to regular generation."
+          model.generate(
+            prompt,
+            config: ::Candle::GenerationConfig.balanced(**config_opts)
+          )
+        end
+        def format_response(response, schema)
+          content = if schema && !response.is_a?(String)
+                      # Structured response
+                      JSON.generate(response)
+                    else
+                      response
+                    end
+          {
+            content: content,
+            role: 'assistant'
+          }
+        end
+        def format_stream_chunk(token)
+          # Return a Chunk object for streaming compatibility
+          Chunk.new(
+            role: :assistant,
+            content: token
+          )
+        end
+        def estimate_input_tokens(messages)
+          # Rough estimation: ~4 characters per token
+          formatted = format_messages(messages)
+          total_chars = formatted.sum { |msg| "#{msg[:role]}: #{msg[:content]}".length }
+          (total_chars / 4.0).round
+        end
+        def validate_context_length!(prompt, model_id)
+          # Get the context window for this model
+          context_window = if respond_to?(:model_context_window)
+                             model_context_window(model_id)
+                           else
+                             4096 # Conservative default
+                           end
+          # Estimate tokens in prompt (~4 characters per token)
+          estimated_tokens = (prompt.length / 4.0).round
+          # Check if prompt exceeds context window (leave some room for response)
+          max_input_tokens = context_window - 512 # Reserve 512 tokens for response
+          return unless estimated_tokens > max_input_tokens
+          raise Error.new(
+            nil,
+            "Context length exceeded. Estimated #{estimated_tokens} tokens, " \
+            "but model #{model_id} has a context window of #{context_window} tokens."
+          )
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/red_candle/models.rb ADDED Viewed

@@ -0,0 +1,121 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class RedCandle
+      # Models methods of the RedCandle integration
+      module Models
+        # TODO: red-candle supports more models, but let's start with some well tested ones.
+        SUPPORTED_MODELS = [
+          {
+            id: 'google/gemma-3-4b-it-qat-q4_0-gguf',
+            name: 'Gemma 3 4B Instruct (Quantized)',
+            gguf_file: 'gemma-3-4b-it-q4_0.gguf',
+            tokenizer: 'google/gemma-3-4b-it', # Tokenizer from base model
+            context_window: 8192,
+            family: 'gemma',
+            architecture: 'gemma2',
+            supports_chat: true,
+            supports_structured: true
+          },
+          {
+            id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
+            name: 'TinyLlama 1.1B Chat (Quantized)',
+            gguf_file: 'tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
+            context_window: 2048,
+            family: 'llama',
+            architecture: 'llama',
+            supports_chat: true,
+            supports_structured: true
+          },
+          {
+            id: 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
+            name: 'Mistral 7B Instruct v0.2 (Quantized)',
+            gguf_file: 'mistral-7b-instruct-v0.2.Q4_K_M.gguf',
+            tokenizer: 'mistralai/Mistral-7B-Instruct-v0.2',
+            context_window: 32_768,
+            family: 'mistral',
+            architecture: 'mistral',
+            supports_chat: true,
+            supports_structured: true
+          },
+          {
+            id: 'Qwen/Qwen2.5-1.5B-Instruct-GGUF',
+            name: 'Qwen 2.1.5B Instruct (Quantized)',
+            gguf_file: 'qwen2.5-1.5b-instruct-q4_k_m.gguf',
+            context_window: 32_768,
+            family: 'qwen2',
+            architecture: 'qwen2',
+            supports_chat: true,
+            supports_structured: true
+          },
+          {
+            id: 'microsoft/Phi-3-mini-4k-instruct',
+            name: 'Phi 3',
+            context_window: 4096,
+            family: 'phi',
+            architecture: 'phi',
+            supports_chat: true,
+            supports_structured: true
+          }
+        ].freeze
+        def list_models
+          SUPPORTED_MODELS.map do |model_data|
+            Model::Info.new(
+              id: model_data[:id],
+              name: model_data[:name],
+              provider: slug,
+              family: model_data[:family],
+              context_window: model_data[:context_window],
+              capabilities: %w[streaming structured_output],
+              modalities: { input: %w[text], output: %w[text] }
+            )
+          end
+        end
+        def models
+          @models ||= list_models
+        end
+        def model(id)
+          models.find { |m| m.id == id } ||
+            raise(Error.new(nil,
+                            "Model #{id} not found in Red Candle provider. Available models: #{model_ids.join(', ')}"))
+        end
+        def model_available?(id)
+          SUPPORTED_MODELS.any? { |m| m[:id] == id }
+        end
+        def model_ids
+          SUPPORTED_MODELS.map { |m| m[:id] }
+        end
+        def model_info(id)
+          SUPPORTED_MODELS.find { |m| m[:id] == id }
+        end
+        def supports_chat?(model_id)
+          info = model_info(model_id)
+          info ? info[:supports_chat] : false
+        end
+        def supports_structured?(model_id)
+          info = model_info(model_id)
+          info ? info[:supports_structured] : false
+        end
+        def gguf_file_for(model_id)
+          info = model_info(model_id)
+          info ? info[:gguf_file] : nil
+        end
+        def tokenizer_for(model_id)
+          info = model_info(model_id)
+          info ? info[:tokenizer] : nil
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/red_candle/streaming.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class RedCandle
+      # Streaming methods of the RedCandle integration
+      module Streaming
+        def stream(payload, &block)
+          if payload[:stream]
+            perform_streaming_completion!(payload, &block)
+          else
+            # Non-streaming fallback
+            result = perform_completion!(payload)
+            # Yield the complete result as a single chunk
+            chunk = {
+              content: result[:content],
+              role: result[:role],
+              finish_reason: result[:finish_reason]
+            }
+            block.call(chunk)
+          end
+        end
+        private
+        def stream_processor
+          # Red Candle handles streaming internally through blocks
+          # This method is here for compatibility with the base streaming interface
+          nil
+        end
+        def process_stream_response(response)
+          # Red Candle doesn't use HTTP responses
+          # Streaming is handled directly in perform_streaming_completion!
+          response
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/red_candle.rb ADDED Viewed

@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    # Red Candle provider for local LLM execution using the Candle Rust crate.
+    class RedCandle < Provider
+      include RedCandle::Chat
+      include RedCandle::Models
+      include RedCandle::Capabilities
+      include RedCandle::Streaming
+      def initialize(config)
+        ensure_red_candle_available!
+        super
+        @loaded_models = {} # Cache for loaded models
+        @device = determine_device(config)
+      end
+      def api_base
+        nil # Local execution, no API base needed
+      end
+      def headers
+        {} # No HTTP headers needed
+      end
+      class << self
+        def capabilities
+          RedCandle::Capabilities
+        end
+        def configuration_requirements
+          [] # No required config, device is optional
+        end
+        def local?
+          true
+        end
+        def supports_functions?(model_id = nil)
+          RedCandle::Capabilities.supports_functions?(model_id)
+        end
+        def models
+          # Return Red Candle models for registration
+          RedCandle::Models::SUPPORTED_MODELS.map do |model_data|
+            Model::Info.new(
+              id: model_data[:id],
+              name: model_data[:name],
+              provider: 'red_candle',
+              type: 'chat',
+              family: model_data[:family],
+              context_window: model_data[:context_window],
+              capabilities: %w[streaming structured_output],
+              modalities: { input: %w[text], output: %w[text] }
+            )
+          end
+        end
+      end
+      private
+      def ensure_red_candle_available!
+        require 'candle'
+      rescue LoadError
+        raise Error.new(nil, "Red Candle gem is not installed. Add 'gem \"red-candle\", \"~> 1.2.3\"' to your Gemfile.")
+      end
+      def determine_device(config)
+        if config.red_candle_device
+          case config.red_candle_device.to_s.downcase
+          when 'cpu'
+            ::Candle::Device.cpu
+          when 'cuda', 'gpu'
+            ::Candle::Device.cuda
+          when 'metal'
+            ::Candle::Device.metal
+          else
+            ::Candle::Device.best
+          end
+        else
+          ::Candle::Device.best
+        end
+      rescue StandardError => e
+        RubyLLM.logger.warn "Failed to initialize device: #{e.message}. Falling back to CPU."
+        ::Candle::Device.cpu
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/vertexai/transcription.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    class VertexAI
+      # Vertex AI specific helpers for audio transcription
+      module Transcription
+        private
+        def transcription_url(model)
+          "projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
+        end
+      end
+    end
+  end
+end