RubyGems - ruby_llm - Versions diffs - 1.3.1 → 1.4.0 - Mend

ruby_llm 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +4 -4
data/README.md +13 -14
data/lib/generators/ruby_llm/install/templates/INSTALL_INFO.md.tt +108 -0
data/lib/generators/ruby_llm/install/templates/chat_model.rb.tt +3 -0
data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +8 -0
data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +15 -0
data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +14 -0
data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +6 -0
data/lib/generators/ruby_llm/install/templates/message_model.rb.tt +3 -0
data/lib/generators/ruby_llm/install/templates/tool_call_model.rb.tt +3 -0
data/lib/generators/ruby_llm/install_generator.rb +121 -0
data/lib/ruby_llm/active_record/acts_as.rb +23 -5
data/lib/ruby_llm/aliases.json +20 -39
data/lib/ruby_llm/attachment.rb +1 -1
data/lib/ruby_llm/chat.rb +68 -15
data/lib/ruby_llm/configuration.rb +2 -0
data/lib/ruby_llm/error.rb +1 -0
data/lib/ruby_llm/message.rb +3 -1
data/lib/ruby_llm/models.json +7117 -7084
data/lib/ruby_llm/models.rb +2 -1
data/lib/ruby_llm/provider.rb +13 -7
data/lib/ruby_llm/providers/anthropic/chat.rb +13 -12
data/lib/ruby_llm/providers/anthropic/media.rb +2 -0
data/lib/ruby_llm/providers/anthropic/tools.rb +23 -13
data/lib/ruby_llm/providers/bedrock/chat.rb +4 -5
data/lib/ruby_llm/providers/bedrock/media.rb +2 -0
data/lib/ruby_llm/providers/bedrock/streaming/base.rb +2 -2
data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +4 -4
data/lib/ruby_llm/providers/gemini/chat.rb +37 -2
data/lib/ruby_llm/providers/gemini/embeddings.rb +4 -2
data/lib/ruby_llm/providers/gemini/media.rb +2 -0
data/lib/ruby_llm/providers/gpustack/chat.rb +17 -0
data/lib/ruby_llm/providers/gpustack/models.rb +55 -0
data/lib/ruby_llm/providers/gpustack.rb +36 -0
data/lib/ruby_llm/providers/ollama/media.rb +2 -0
data/lib/ruby_llm/providers/openai/chat.rb +17 -2
data/lib/ruby_llm/providers/openai/embeddings.rb +4 -3
data/lib/ruby_llm/providers/openai/media.rb +2 -0
data/lib/ruby_llm/providers/openai/streaming.rb +14 -0
data/lib/ruby_llm/railtie.rb +5 -0
data/lib/ruby_llm/stream_accumulator.rb +3 -2
data/lib/ruby_llm/streaming.rb +25 -7
data/lib/ruby_llm/utils.rb +10 -0
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +3 -0
data/lib/tasks/models_docs.rake +3 -2
metadata +15 -3

data/lib/ruby_llm/models.rb CHANGED Viewed

@@ -92,7 +92,8 @@ module RubyLLM
           f.response :json, parser_options: { symbolize_names: true }
         end
         response = connection.get 'https://api.parsera.org/v1/llm-specs'
-        response.body.map { |data| Model::Info.new(data) }
+        models = response.body.map { |data| Model::Info.new(data) }
+        models.reject { |model| model.provider.nil? || model.id.nil? }
       end
       def merge_models(provider_models, parsera_models)

data/lib/ruby_llm/provider.rb CHANGED Viewed

@@ -10,14 +10,20 @@ module RubyLLM
     module Methods
       extend Streaming
-      def complete(messages, tools:, temperature:, model:, connection:, &)
+      def complete(messages, tools:, temperature:, model:, connection:, params: {}, schema: nil, &) # rubocop:disable Metrics/ParameterLists
         normalized_temperature = maybe_normalize_temperature(temperature, model)
-        payload = render_payload(messages,
-                                 tools: tools,
-                                 temperature: normalized_temperature,
-                                 model: model,
-                                 stream: block_given?)
+        payload = Utils.deep_merge(
+          params,
+          render_payload(
+            messages,
+            tools: tools,
+            temperature: normalized_temperature,
+            model: model,
+            stream: block_given?,
+            schema: schema
+          )
+        )
         if block_given?
           stream_response connection, payload, &
@@ -34,7 +40,7 @@ module RubyLLM
       def embed(text, model:, connection:, dimensions:)
         payload = render_embedding_payload(text, model:, dimensions:)
         response = connection.post(embedding_url(model:), payload)
-        parse_embedding_response(response, model:)
+        parse_embedding_response(response, model:, text:)
       end
       def paint(prompt, model:, size:, connection:)

data/lib/ruby_llm/providers/anthropic/chat.rb CHANGED Viewed

@@ -11,12 +11,12 @@ module RubyLLM
           '/v1/messages'
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           system_messages, chat_messages = separate_messages(messages)
           system_content = build_system_content(system_messages)
-          build_base_payload(chat_messages, temperature, model, stream).tap do |payload|
-            add_optional_fields(payload, system_content:, tools:)
+          build_base_payload(chat_messages, model, stream).tap do |payload|
+            add_optional_fields(payload, system_content:, tools:, temperature:)
           end
         end
@@ -32,22 +32,22 @@ module RubyLLM
             )
           end
-          system_messages.map { |msg| format_message(msg)[:content] }.join("\n\n")
+          system_messages.map(&:content).join("\n\n")
         end
-        def build_base_payload(chat_messages, temperature, model, stream)
+        def build_base_payload(chat_messages, model, stream)
           {
             model: model,
             messages: chat_messages.map { |msg| format_message(msg) },
-            temperature: temperature,
             stream: stream,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end
-        def add_optional_fields(payload, system_content:, tools:)
+        def add_optional_fields(payload, system_content:, tools:, temperature:)
           payload[:tools] = tools.values.map { |t| Tools.function_for(t) } if tools.any?
           payload[:system] = system_content unless system_content.empty?
+          payload[:temperature] = temperature unless temperature.nil?
         end
         def parse_completion_response(response)
@@ -55,9 +55,9 @@ module RubyLLM
           content_blocks = data['content'] || []
           text_content = extract_text_content(content_blocks)
-          tool_use = Tools.find_tool_use(content_blocks)
+          tool_use_blocks = Tools.find_tool_uses(content_blocks)
-          build_message(data, text_content, tool_use)
+          build_message(data, text_content, tool_use_blocks, response)
         end
         def extract_text_content(blocks)
@@ -65,14 +65,15 @@ module RubyLLM
           text_blocks.map { |c| c['text'] }.join
         end
-        def build_message(data, content, tool_use)
+        def build_message(data, content, tool_use_blocks, response)
           Message.new(
             role: :assistant,
             content: content,
-            tool_calls: Tools.parse_tool_calls(tool_use),
+            tool_calls: Tools.parse_tool_calls(tool_use_blocks),
             input_tokens: data.dig('usage', 'input_tokens'),
             output_tokens: data.dig('usage', 'output_tokens'),
-            model_id: data['model']
+            model_id: data['model'],
+            raw: response
           )
         end

data/lib/ruby_llm/providers/anthropic/media.rb CHANGED Viewed

@@ -8,6 +8,8 @@ module RubyLLM
         module_function
         def format_content(content)
+          # Convert Hash/Array back to JSON string for API
+          return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
           return [format_text(content)] unless content.is_a?(Content)
           parts = []

data/lib/ruby_llm/providers/anthropic/tools.rb CHANGED Viewed

@@ -7,16 +7,18 @@ module RubyLLM
       module Tools
         module_function
-        def find_tool_use(blocks)
-          blocks.find { |c| c['type'] == 'tool_use' }
+        def find_tool_uses(blocks)
+          blocks.select { |c| c['type'] == 'tool_use' }
         end
         def format_tool_call(msg)
-          tool_call = msg.tool_calls.values.first
           content = []
           content << Media.format_text(msg.content) unless msg.content.nil? || msg.content.empty?
-          content << format_tool_use_block(tool_call)
+          msg.tool_calls.each_value do |tool_call|
+            content << format_tool_use_block(tool_call)
+          end
           {
             role: 'assistant',
@@ -68,16 +70,24 @@ module RubyLLM
           end
         end
-        def parse_tool_calls(content_block)
-          return nil unless content_block && content_block['type'] == 'tool_use'
+        def parse_tool_calls(content_blocks)
+          return nil if content_blocks.nil?
-          {
-            content_block['id'] => ToolCall.new(
-              id: content_block['id'],
-              name: content_block['name'],
-              arguments: content_block['input']
+          # Handle single content block (backward compatibility)
+          content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
+          tool_calls = {}
+          content_blocks.each do |block|
+            next unless block && block['type'] == 'tool_use'
+            tool_calls[block['id']] = ToolCall.new(
+              id: block['id'],
+              name: block['name'],
+              arguments: block['input']
             )
-          }
+          end
+          tool_calls.empty? ? nil : tool_calls
         end
         def clean_parameters(parameters)

data/lib/ruby_llm/providers/bedrock/chat.rb CHANGED Viewed

@@ -39,23 +39,22 @@ module RubyLLM
           "model/#{@model_id}/invoke"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
           # Hold model_id in instance variable for use in completion_url and stream_url
           @model_id = model
           system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
           system_content = Anthropic::Chat.build_system_content(system_messages)
-          build_base_payload(chat_messages, temperature, model).tap do |payload|
-            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:)
+          build_base_payload(chat_messages, model).tap do |payload|
+            Anthropic::Chat.add_optional_fields(payload, system_content:, tools:, temperature:)
           end
         end
-        def build_base_payload(chat_messages, temperature, model)
+        def build_base_payload(chat_messages, model)
           {
             anthropic_version: 'bedrock-2023-05-31',
             messages: chat_messages.map { |msg| format_message(msg) },
-            temperature: temperature,
             max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
           }
         end

data/lib/ruby_llm/providers/bedrock/media.rb CHANGED Viewed

@@ -11,6 +11,8 @@ module RubyLLM
         module_function
         def format_content(content)
+          # Convert Hash/Array back to JSON string for API
+          return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
           return [Anthropic::Media.format_text(content)] unless content.is_a?(Content)
           parts = []

data/lib/ruby_llm/providers/bedrock/streaming/base.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module RubyLLM
                                                                                          payload:)
             accumulator = StreamAccumulator.new
-            connection.post stream_url, payload do |req|
+            response = connection.post stream_url, payload do |req|
               req.headers.merge! build_headers(signature.headers, streaming: block_given?)
               req.options.on_data = handle_stream do |chunk|
                 accumulator.add chunk
@@ -42,7 +42,7 @@ module RubyLLM
               end
             end
-            accumulator.to_message
+            accumulator.to_message(response)
           end
           def handle_stream(&block)

data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb CHANGED Viewed

@@ -24,8 +24,8 @@ module RubyLLM
           end
           def read_prelude(chunk, offset)
-            total_length = chunk[offset...offset + 4].unpack1('N')
-            headers_length = chunk[offset + 4...offset + 8].unpack1('N')
+            total_length = chunk[offset...(offset + 4)].unpack1('N')
+            headers_length = chunk[(offset + 4)...(offset + 8)].unpack1('N')
             [total_length, headers_length]
           end
@@ -72,8 +72,8 @@ module RubyLLM
           def extract_potential_lengths(chunk, pos)
             [
-              chunk[pos...pos + 4].unpack1('N'),
-              chunk[pos + 4...pos + 8].unpack1('N')
+              chunk[pos...(pos + 4)].unpack1('N'),
+              chunk[(pos + 4)...(pos + 8)].unpack1('N')
             ]
           end

data/lib/ruby_llm/providers/gemini/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
           "models/#{@model}:generateContent"
         end
-        def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Lint/UnusedMethodArgument
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
           @model = model # Store model for completion_url/stream_url
           payload = {
             contents: format_messages(messages),
@@ -19,6 +19,12 @@ module RubyLLM
               temperature: temperature
             }
           }
+          if schema
+            payload[:generationConfig][:responseMimeType] = 'application/json'
+            payload[:generationConfig][:responseSchema] = convert_schema_to_gemini(schema)
+          end
           payload[:tools] = format_tools(tools) if tools.any?
           payload
         end
@@ -75,10 +81,39 @@ module RubyLLM
             tool_calls: tool_calls,
             input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
             output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
-            model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0]
+            model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
+            raw: response
           )
         end
+        def convert_schema_to_gemini(schema) # rubocop:disable Metrics/PerceivedComplexity
+          return nil unless schema
+          case schema[:type]
+          when 'object'
+            {
+              type: 'OBJECT',
+              properties: schema[:properties]&.transform_values { |prop| convert_schema_to_gemini(prop) } || {},
+              required: schema[:required] || []
+            }
+          when 'array'
+            {
+              type: 'ARRAY',
+              items: schema[:items] ? convert_schema_to_gemini(schema[:items]) : { type: 'STRING' }
+            }
+          when 'string'
+            result = { type: 'STRING' }
+            result[:enum] = schema[:enum] if schema[:enum]
+            result
+          when 'number', 'integer'
+            { type: 'NUMBER' }
+          when 'boolean'
+            { type: 'BOOLEAN' }
+          else
+            { type: 'STRING' }
+          end
+        end
         def extract_content(data)
           candidate = data.dig('candidates', 0)
           return '' unless candidate

data/lib/ruby_llm/providers/gemini/embeddings.rb CHANGED Viewed

@@ -15,9 +15,11 @@ module RubyLLM
           { requests: [text].flatten.map { |t| single_embedding_payload(t, model:, dimensions:) } }
         end
-        def parse_embedding_response(response, model:)
+        def parse_embedding_response(response, model:, text:)
           vectors = response.body['embeddings']&.map { |e| e['values'] }
-          vectors in [vectors]
+          # If we only got one embedding AND the input was a single string (not an array),
+          # return it as a single vector
+          vectors = vectors.first if vectors&.length == 1 && !text.is_a?(Array)
           Embedding.new(vectors:, model:, input_tokens: 0)
         end

data/lib/ruby_llm/providers/gemini/media.rb CHANGED Viewed

@@ -8,6 +8,8 @@ module RubyLLM
         module_function
         def format_content(content)
+          # Convert Hash/Array back to JSON string for API
+          return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
           return [format_text(content)] unless content.is_a?(Content)
           parts = []

data/lib/ruby_llm/providers/gpustack/chat.rb ADDED Viewed

@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module GPUStack
+      # Chat methods of the GPUStack API integration
+      module Chat
+        module_function
+        def format_role(role)
+          # GPUStack doesn't use the new OpenAI convention for system prompts
+          role.to_s
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gpustack/models.rb ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    module GPUStack
+      # Models methods of the GPUStack API integration
+      module Models
+        module_function
+        def models_url
+          'models'
+        end
+        def parse_list_models_response(response, slug, _capabilities)
+          items = response.body['items'] || []
+          items.map do |model|
+            Model::Info.new(
+              id: model['name'],
+              created_at: model['created_at'] ? Time.parse(model['created_at']) : nil,
+              display_name: "#{model['source']}/#{model['name']}",
+              provider: slug,
+              type: determine_model_type(model),
+              metadata: {
+                description: model['description'],
+                source: model['source'],
+                huggingface_repo_id: model['huggingface_repo_id'],
+                ollama_library_model_name: model['ollama_library_model_name'],
+                backend: model['backend'],
+                meta: model['meta'],
+                categories: model['categories']
+              },
+              context_window: model.dig('meta', 'n_ctx'),
+              # Using context window as max tokens since it's not explicitly provided
+              max_tokens: model.dig('meta', 'n_ctx'),
+              supports_vision: model.dig('meta', 'support_vision') || false,
+              supports_functions: model.dig('meta', 'support_tool_calls') || false,
+              supports_json_mode: true, # Assuming all models support JSON mode
+              input_price_per_million: 0.0,  # Price information not available in new format
+              output_price_per_million: 0.0  # Price information not available in new format
+            )
+          end
+        end
+        private
+        def determine_model_type(model)
+          return 'embedding' if model['categories']&.include?('embedding')
+          return 'chat' if model['categories']&.include?('llm')
+          'other'
+        end
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/gpustack.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+module RubyLLM
+  module Providers
+    # GPUStack API integration based on Ollama.
+    module GPUStack
+      extend OpenAI
+      extend GPUStack::Chat
+      extend GPUStack::Models
+      module_function
+      def api_base(config)
+        config.gpustack_api_base
+      end
+      def headers(config)
+        {
+          'Authorization' => "Bearer #{config.gpustack_api_key}"
+        }
+      end
+      def slug
+        'gpustack'
+      end
+      def local?
+        true
+      end
+      def configuration_requirements
+        %i[gpustack_api_base gpustack_api_key]
+      end
+    end
+  end
+end

data/lib/ruby_llm/providers/ollama/media.rb CHANGED Viewed

@@ -10,6 +10,8 @@ module RubyLLM
         module_function
         def format_content(content)
+          # Convert Hash/Array back to JSON string for API
+          return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)
           parts = []

data/lib/ruby_llm/providers/openai/chat.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module RubyLLM
         module_function
-        def render_payload(messages, tools:, temperature:, model:, stream: false)
+        def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
           payload = {
             model: model,
             messages: format_messages(messages),
@@ -26,6 +26,20 @@ module RubyLLM
             payload[:tool_choice] = 'auto'
           end
+          if schema
+            # Use strict mode from schema if specified, default to true
+            strict = schema[:strict] != false
+            payload[:response_format] = {
+              type: 'json_schema',
+              json_schema: {
+                name: 'response',
+                schema: schema,
+                strict: strict
+              }
+            }
+          end
           payload[:stream_options] = { include_usage: true } if stream
           payload
         end
@@ -45,7 +59,8 @@ module RubyLLM
             tool_calls: parse_tool_calls(message_data['tool_calls']),
             input_tokens: data['usage']['prompt_tokens'],
             output_tokens: data['usage']['completion_tokens'],
-            model_id: data['model']
+            model_id: data['model'],
+            raw: response
           )
         end

data/lib/ruby_llm/providers/openai/embeddings.rb CHANGED Viewed

@@ -19,13 +19,14 @@ module RubyLLM
           }.compact
         end
-        def parse_embedding_response(response, model:)
+        def parse_embedding_response(response, model:, text:)
           data = response.body
           input_tokens = data.dig('usage', 'prompt_tokens') || 0
           vectors = data['data'].map { |d| d['embedding'] }
-          # If we only got one embedding, return it as a single vector
-          vectors in [vectors]
+          # If we only got one embedding AND the input was a single string (not an array),
+          # return it as a single vector
+          vectors = vectors.first if vectors.length == 1 && !text.is_a?(Array)
           Embedding.new(vectors:, model:, input_tokens:)
         end

data/lib/ruby_llm/providers/openai/media.rb CHANGED Viewed

@@ -8,6 +8,8 @@ module RubyLLM
         module_function
         def format_content(content)
+          # Convert Hash/Array back to JSON string for API
+          return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
           return content unless content.is_a?(Content)
           parts = []

data/lib/ruby_llm/providers/openai/streaming.rb CHANGED Viewed

@@ -21,6 +21,20 @@ module RubyLLM
             output_tokens: data.dig('usage', 'completion_tokens')
           )
         end
+        def parse_streaming_error(data)
+          error_data = JSON.parse(data)
+          return unless error_data['error']
+          case error_data.dig('error', 'type')
+          when 'server_error'
+            [500, error_data['error']['message']]
+          when 'rate_limit_exceeded', 'insufficient_quota'
+            [429, error_data['error']['message']]
+          else
+            [400, error_data['error']['message']]
+          end
+        end
       end
     end
   end

data/lib/ruby_llm/railtie.rb CHANGED Viewed

@@ -8,5 +8,10 @@ module RubyLLM
         include RubyLLM::ActiveRecord::ActsAs
       end
     end
+    # Register generators
+    generators do
+      require 'generators/ruby_llm/install_generator'
+    end
   end
 end

data/lib/ruby_llm/stream_accumulator.rb CHANGED Viewed

@@ -29,14 +29,15 @@ module RubyLLM
       RubyLLM.logger.debug inspect
     end
-    def to_message
+    def to_message(response)
       Message.new(
         role: :assistant,
         content: content.empty? ? nil : content,
         model_id: model_id,
         tool_calls: tool_calls_from_stream,
         input_tokens: @input_tokens.positive? ? @input_tokens : nil,
-        output_tokens: @output_tokens.positive? ? @output_tokens : nil
+        output_tokens: @output_tokens.positive? ? @output_tokens : nil,
+        raw: response
       )
     end