RubyGems - llm.rb - Versions diffs - 4.13.0 → 4.15.0 - Mend

llm.rb 4.13.0 → 4.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +107 -0
data/README.md +82 -32
data/lib/llm/context.rb +25 -10
data/lib/llm/error.rb +4 -0
data/lib/llm/eventhandler.rb +16 -12
data/lib/llm/eventstream/event.rb +15 -5
data/lib/llm/eventstream/parser.rb +64 -17
data/lib/llm/mcp/command.rb +1 -1
data/lib/llm/mcp/mailbox.rb +23 -0
data/lib/llm/mcp/pipe.rb +1 -1
data/lib/llm/mcp/router.rb +44 -0
data/lib/llm/mcp/rpc.rb +29 -18
data/lib/llm/mcp/transport/http/event_handler.rb +11 -9
data/lib/llm/mcp/transport/http.rb +2 -2
data/lib/llm/mcp/transport/stdio.rb +1 -1
data/lib/llm/mcp.rb +5 -2
data/lib/llm/provider/transport/http/execution.rb +115 -0
data/lib/llm/provider/transport/http/interruptible.rb +109 -0
data/lib/llm/provider/transport/http/stream_decoder.rb +92 -0
data/lib/llm/provider/transport/http.rb +144 -0
data/lib/llm/provider.rb +17 -103
data/lib/llm/providers/anthropic/stream_parser.rb +6 -3
data/lib/llm/providers/google/stream_parser.rb +6 -3
data/lib/llm/providers/ollama/stream_parser.rb +3 -2
data/lib/llm/providers/openai/responses/stream_parser.rb +216 -91
data/lib/llm/providers/openai/stream_parser.rb +111 -57
data/lib/llm/response.rb +12 -4
data/lib/llm/sequel/plugin.rb +252 -0
data/lib/llm/stream/queue.rb +2 -2
data/lib/llm/stream.rb +2 -2
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +8 -0
data/lib/sequel/plugins/llm.rb +8 -0
metadata +9 -2
data/lib/llm/client.rb +0 -36

data/lib/llm/provider.rb CHANGED Viewed

@@ -7,14 +7,9 @@
 # @abstract
 class LLM::Provider
   require "net/http"
-  require_relative "client"
-  include LLM::Client
-  @@clients = {}
-  ##
-  # @api private
-  def self.clients = @@clients
+  require_relative "provider/transport/http"
+  require_relative "provider/transport/http/execution"
+  include Transport::HTTP::Execution
   ##
   # @param [String, nil] key
@@ -36,9 +31,9 @@ class LLM::Provider
     @port = port
     @timeout = timeout
     @ssl = ssl
-    @client = persistent ? persistent_client : nil
     @base_uri = URI("#{ssl ? "https" : "http"}://#{host}:#{port}/")
     @headers = {"User-Agent" => "llm.rb v#{LLM::VERSION}"}
+    @transport = Transport::HTTP.new(host:, port:, timeout:, ssl:, persistent:)
     @monitor = Monitor.new
   end
@@ -47,7 +42,7 @@ class LLM::Provider
   # @return [String]
   # @note The secret key is redacted in inspect for security reasons
   def inspect
-    "#<#{self.class.name}:0x#{object_id.to_s(16)} @key=[REDACTED] @client=#{@client.inspect} @tracer=#{tracer.inspect}>"
+    "#<#{self.class.name}:0x#{object_id.to_s(16)} @key=[REDACTED] @transport=#{transport.inspect} @tracer=#{tracer.inspect}>"
   end
   ##
@@ -312,13 +307,20 @@ class LLM::Provider
   #   # do something with 'llm'
   # @return [LLM::Provider]
   def persist!
-    client = persistent_client
-    lock do
-      tap { @client = client }
-    end
+    transport.persist!
+    self
   end
   alias_method :persistent, :persist!
+  ##
+  # Interrupt the active request, if any.
+  # @param [Fiber] owner
+  # @return [nil]
+  def interrupt!(owner)
+    transport.interrupt!(owner)
+  end
+  alias_method :cancel!, :interrupt!
   ##
   # @param [Object] stream
   # @return [Boolean]
@@ -328,7 +330,7 @@ class LLM::Provider
   private
-  attr_reader :client, :base_uri, :host, :port, :timeout, :ssl
+  attr_reader :base_uri, :host, :port, :timeout, :ssl, :transport
   ##
   # The headers to include with a request
@@ -360,94 +362,6 @@ class LLM::Provider
     raise NotImplementedError
   end
-  ##
-  # Executes a HTTP request
-  # @param [Net::HTTPRequest] request
-  #  The request to send
-  # @param [Proc] b
-  #  A block to yield the response to (optional)
-  # @return [Net::HTTPResponse]
-  #  The response from the server
-  # @raise [LLM::Error::Unauthorized]
-  #  When authentication fails
-  # @raise [LLM::Error::RateLimit]
-  #  When the rate limit is exceeded
-  # @raise [LLM::Error]
-  #  When any other unsuccessful status code is returned
-  # @raise [SystemCallError]
-  #  When there is a network error at the operating system level
-  # @return [Net::HTTPResponse]
-  def execute(request:, operation:, stream: nil, stream_parser: self.stream_parser, model: nil, inputs: nil, &b)
-    tracer = self.tracer
-    span = tracer.on_request_start(operation:, model:, inputs:)
-    http = client || transient_client
-    args = (Net::HTTP === http) ? [request] : [URI.join(base_uri, request.path), request]
-    res = if stream
-      http.request(*args) do |res|
-        if Net::HTTPSuccess === res
-          handler = event_handler.new stream_parser.new(stream)
-          parser = LLM::EventStream::Parser.new
-          parser.register(handler)
-          res.read_body(parser)
-          # If the handler body is empty, the response was
-          # most likely not streamed or parsing failed.
-          # Preserve the raw body in that case so standard
-          # JSON/error handling can parse it later.
-          body = handler.body.empty? ? parser.body : handler.body
-          res.body = Hash === body || Array === body ? LLM::Object.from(body) : body
-        else
-          body = +""
-          res.read_body { body << _1 }
-          res.body = body
-        end
-      ensure
-        handler&.free
-        parser&.free
-      end
-    else
-      b ? http.request(*args) { (Net::HTTPSuccess === _1) ? b.call(_1) : _1 } :
-          http.request(*args)
-    end
-    [handle_response(res, tracer, span), span, tracer]
-  end
-  ##
-  # Handles the response from a request
-  # @param [Net::HTTPResponse] res
-  #  The response to handle
-  # @param [Object, nil] span
-  #  The span
-  # @return [Net::HTTPResponse]
-  def handle_response(res, tracer, span)
-    case res
-    when Net::HTTPOK then res.body = parse_response(res)
-    else error_handler.new(tracer, span, res).raise_error!
-    end
-    res
-  end
-  ##
-  # Parse a HTTP response
-  # @param [Net::HTTPResponse] res
-  # @return [LLM::Object, String]
-  def parse_response(res)
-    case res["content-type"]
-    when %r|\Aapplication/json\s*| then LLM::Object.from(LLM.json.load(res.body))
-    else res.body
-    end
-  end
-  ##
-  # @param [Net::HTTPRequest] req
-  #  The request to set the body stream for
-  # @param [IO] io
-  #  The IO object to set as the body stream
-  # @return [void]
-  def set_body_stream(req, io)
-    req.body_stream = io
-    req["transfer-encoding"] = "chunked" unless req["content-length"]
-  end
   ##
   # Resolves tools to their function representations
   # @param [Array<LLM::Function, LLM::Tool>] tools

data/lib/llm/providers/anthropic/stream_parser.rb CHANGED Viewed

@@ -16,6 +16,9 @@ class LLM::Anthropic
     def initialize(stream)
       @body = {"role" => "assistant", "content" => []}
       @stream = stream
+      @can_emit_content = stream.respond_to?(:on_content)
+      @can_emit_tool_call = stream.respond_to?(:on_tool_call)
+      @can_push_content = stream.respond_to?(:<<)
     end
     ##
@@ -88,15 +91,15 @@ class LLM::Anthropic
     end
     def emit_content(value)
-      if @stream.respond_to?(:on_content)
+      if @can_emit_content
         @stream.on_content(value)
-      elsif @stream.respond_to?(:<<)
+      elsif @can_push_content
         @stream << value
       end
     end
     def emit_tool(tool)
-      return unless @stream.respond_to?(:on_tool_call)
+      return unless @can_emit_tool_call
       function, error = resolve_tool(tool)
       @stream.on_tool_call(function, error)
     end

data/lib/llm/providers/google/stream_parser.rb CHANGED Viewed

@@ -17,6 +17,9 @@ class LLM::Google
       @body = {"candidates" => []}
       @stream = stream
       @emits = {tools: []}
+      @can_emit_content = stream.respond_to?(:on_content)
+      @can_emit_tool_call = stream.respond_to?(:on_tool_call)
+      @can_push_content = stream.respond_to?(:<<)
     end
     ##
@@ -126,15 +129,15 @@ class LLM::Google
     end
     def emit_content(value)
-      if @stream.respond_to?(:on_content)
+      if @can_emit_content
         @stream.on_content(value)
-      elsif @stream.respond_to?(:<<)
+      elsif @can_push_content
         @stream << value
       end
     end
     def emit_tool(pindex, cindex, part)
-      return unless @stream.respond_to?(:on_tool_call)
+      return unless @can_emit_tool_call
       return unless complete_tool?(part)
       key = [cindex, pindex]
       return if @emits[:tools].include?(key)

data/lib/llm/providers/ollama/stream_parser.rb CHANGED Viewed

@@ -14,6 +14,7 @@ class LLM::Ollama
     def initialize(stream)
       @body = {}
       @stream = stream
+      @can_push_content = stream.respond_to?(:<<)
     end
     ##
@@ -36,10 +37,10 @@ class LLM::Ollama
         if key == "message"
           if @body[key]
             @body[key]["content"] << value["content"]
-            @stream << value["content"] if @stream.respond_to?(:<<)
+            @stream << value["content"] if @can_push_content
           else
             @body[key] = value
-            @stream << value["content"] if @stream.respond_to?(:<<)
+            @stream << value["content"] if @can_push_content
           end
         else
           @body[key] = value

data/lib/llm/providers/openai/responses/stream_parser.rb CHANGED Viewed

@@ -4,6 +4,8 @@ class LLM::OpenAI
   ##
   # @private
   class Responses::StreamParser
+    EMPTY_HASH = {}.freeze
     ##
     # Returns the fully constructed response body
     # @return [Hash]
@@ -16,7 +18,15 @@ class LLM::OpenAI
     def initialize(stream)
       @body = {"output" => []}
       @stream = stream
-      @emits = {tools: []}
+      @emits = {tools: {}}
+      @can_emit_content = stream.respond_to?(:on_content)
+      @can_emit_reasoning_content = stream.respond_to?(:on_reasoning_content)
+      @can_emit_tool_call = stream.respond_to?(:on_tool_call)
+      @can_push_content = stream.respond_to?(:<<)
+      @cached_output_index = nil
+      @cached_output_item = nil
+      @cached_content_index = nil
+      @cached_content_part = nil
     end
     ##
@@ -31,126 +41,238 @@ class LLM::OpenAI
     # @return [void]
     def free
       @emits.clear
+      clear_cache!
     end
     private
+    ##
+    # @group Dispatchers
     def handle_event(chunk)
-      case chunk["type"]
-      when "response.created"
-        chunk.each do |k, v|
-          next if k == "type"
-          @body[k] = v
-        end
-        @body["output"] ||= []
-      when "response.in_progress", "response.completed"
-        response = chunk["response"] || {}
-        response.each do |k, v|
-          next if k == "output" && @body["output"].is_a?(Array) && @body["output"].any?
-          @body[k] = v
+      output = @body["output"]
+      type = chunk["type"]
+      if type == "response.output_text.delta"
+        merge_output_text_delta!(output, chunk)
+      elsif type == "response.content_part.added"
+        merge_content_part!(output, chunk)
+      elsif type == "response.output_item.added"
+        merge_output_item!(output, chunk)
+      elsif type == "response.function_call_arguments.delta"
+        merge_function_call_arguments_delta!(output, chunk)
+      elsif type == "response.function_call_arguments.done"
+        merge_function_call_arguments_done!(output, chunk)
+      elsif type == "response.output_item.done"
+        merge_output_item!(output, chunk)
+      elsif type == "response.content_part.done"
+        merge_content_part!(output, chunk, part_key: "part")
+      else
+        case type
+        when "response.created"
+          merge_response_created!(chunk)
+        when "response.in_progress", "response.completed"
+          merge_response_state!(output, chunk)
+        when "response.reasoning_summary_text.delta"
+          merge_reasoning_summary_text_delta!(output, chunk)
+        when "response.reasoning_summary_text.done"
+          merge_reasoning_summary_text_done!(output, chunk)
         end
-        @body["output"] ||= response["output"] || []
-      when "response.output_item.added"
-        output_index = chunk["output_index"]
-        item = chunk["item"]
-        @body["output"][output_index] = item
-        @body["output"][output_index]["content"] ||= []
-        @body["output"][output_index]["summary"] ||= [] if item["type"] == "reasoning"
-      when "response.content_part.added"
-        output_index = chunk["output_index"]
-        content_index = chunk["content_index"]
-        part = chunk["part"]
-        @body["output"][output_index] ||= {"content" => []}
-        @body["output"][output_index]["content"] ||= []
-        @body["output"][output_index]["content"][content_index] = part
-      when "response.reasoning_summary_text.delta"
-        output_item = @body["output"][chunk["output_index"]]
-        if output_item && output_item["type"] == "reasoning"
-          summary_index = chunk["summary_index"] || 0
-          output_item["summary"] ||= []
-          output_item["summary"][summary_index] ||= {"type" => "summary_text", "text" => +""}
-          output_item["summary"][summary_index]["text"] << chunk["delta"]
-          emit_reasoning_content(chunk["delta"])
-        end
-      when "response.reasoning_summary_text.done"
-        output_item = @body["output"][chunk["output_index"]]
-        if output_item && output_item["type"] == "reasoning"
-          summary_index = chunk["summary_index"] || 0
-          output_item["summary"] ||= []
-          output_item["summary"][summary_index] = {
-            "type" => "summary_text",
-            "text" => chunk["text"]
-          }
-        end
-      when "response.output_text.delta"
-        output_index = chunk["output_index"]
-        content_index = chunk["content_index"]
+      end
+    end
+    ##
+    # @endgroup
+    ##
+    # @group Mergers
+    def merge_response_created!(chunk)
+      clear_cache!
+      chunk.each do |k, v|
+        next if k == "type"
+        @body[k] = v
+      end
+      @body["output"] ||= []
+    end
+    def merge_response_state!(output, chunk)
+      clear_cache!
+      response = chunk["response"] || EMPTY_HASH
+      response.each do |k, v|
+        next if k == "output" && Array === output && output.any?
+        @body[k] = v
+      end
+      @body["output"] ||= response["output"] || []
+    end
+    def merge_output_item!(output, chunk)
+      output_index = chunk["output_index"]
+      item = chunk["item"]
+      output[output_index] = item
+      item["content"] ||= [] if item["type"] == "message" || item.key?("content")
+      item["summary"] ||= [] if item["type"] == "reasoning"
+      cache_output_item!(output_index, item)
+    end
+    def merge_content_part!(output, chunk, part_key: "part")
+      output_index = chunk["output_index"]
+      content_index = chunk["content_index"]
+      part = chunk[part_key]
+      output_item = output_item_at(output, output_index)
+      unless output_item
+        output_item = {"content" => []}
+        output[output_index] = output_item
+        cache_output_item!(output_index, output_item)
+      end
+      content = output_item["content"] ||= []
+      content[content_index] = part
+      cache_content_part!(content_index, part)
+    end
+    def merge_output_text_delta!(output, chunk)
+      content_part = content_part_at(output, chunk["output_index"], chunk["content_index"])
+      if content_part && content_part["type"] == "output_text"
         delta_text = chunk["delta"]
-        output_item = @body["output"][output_index]
-        if output_item && output_item["content"]
-          content_part = output_item["content"][content_index]
-          if content_part && content_part["type"] == "output_text"
-            content_part["text"] ||= ""
-            content_part["text"] << delta_text
-            emit_content(delta_text)
-          end
+        if text = content_part["text"]
+          text << delta_text
+        else
+          content_part["text"] = delta_text
         end
-      when "response.function_call_arguments.delta"
-        output_item = @body["output"][chunk["output_index"]]
-        if output_item && output_item["type"] == "function_call"
-          output_item["arguments"] ||= +""
-          output_item["arguments"] << chunk["delta"]
+        emit_content(delta_text)
+      end
+    end
+    def merge_reasoning_summary_text_delta!(output, chunk)
+      output_item = output_item_at(output, chunk["output_index"])
+      if output_item && output_item["type"] == "reasoning"
+        summary_index = chunk["summary_index"] || 0
+        delta = chunk["delta"]
+        summary = output_item["summary"] ||= []
+        if summary_item = summary[summary_index]
+          summary_item["text"] << delta
+        else
+          summary[summary_index] = {"type" => "summary_text", "text" => delta}
         end
-      when "response.function_call_arguments.done"
-        output_item = @body["output"][chunk["output_index"]]
-        if output_item && output_item["type"] == "function_call"
-          output_item["arguments"] = chunk["arguments"]
-          emit_tool(chunk["output_index"], output_item)
+        emit_reasoning_content(delta)
+      end
+    end
+    def merge_reasoning_summary_text_done!(output, chunk)
+      output_item = output_item_at(output, chunk["output_index"])
+      if output_item && output_item["type"] == "reasoning"
+        summary_index = chunk["summary_index"] || 0
+        output_item["summary"] ||= []
+        output_item["summary"][summary_index] = {
+          "type" => "summary_text",
+          "text" => chunk["text"]
+        }
+      end
+    end
+    def merge_function_call_arguments_delta!(output, chunk)
+      output_item = output_item_at(output, chunk["output_index"])
+      if output_item && output_item["type"] == "function_call"
+        if arguments = output_item["arguments"]
+          arguments << chunk["delta"]
+        else
+          output_item["arguments"] = chunk["delta"]
         end
-      when "response.output_item.done"
-        output_index = chunk["output_index"]
-        item = chunk["item"]
-        @body["output"][output_index] = item
-      when "response.content_part.done"
-        output_index = chunk["output_index"]
-        content_index = chunk["content_index"]
-        part = chunk["part"]
-        @body["output"][output_index] ||= {"content" => []}
-        @body["output"][output_index]["content"] ||= []
-        @body["output"][output_index]["content"][content_index] = part
       end
     end
+    def merge_function_call_arguments_done!(output, chunk)
+      output_item = output_item_at(output, chunk["output_index"])
+      if output_item && output_item["type"] == "function_call"
+        output_item["arguments"] = chunk["arguments"]
+        emit_tool(chunk["output_index"], output_item)
+      end
+    end
+    ##
+    # @endgroup
+    ##
+    # @group Cache
+    def output_item_at(output, output_index)
+      if @cached_output_index == output_index
+        @cached_output_item
+      else
+        cache_output_item!(output_index, output[output_index])
+      end
+    end
+    def content_part_at(output, output_index, content_index)
+      if @cached_output_index == output_index && @cached_content_index == content_index
+        @cached_content_part
+      else
+        output_item = output_item_at(output, output_index)
+        content = output_item && output_item["content"]
+        cache_content_part!(content_index, content && content[content_index])
+      end
+    end
+    def cache_output_item!(output_index, output_item)
+      @cached_output_index = output_index
+      @cached_output_item = output_item
+      @cached_content_index = nil
+      @cached_content_part = nil
+      output_item
+    end
+    def cache_content_part!(content_index, content_part)
+      @cached_content_index = content_index
+      @cached_content_part = content_part
+      content_part
+    end
+    def clear_cache!
+      @cached_output_index = nil
+      @cached_output_item = nil
+      @cached_content_index = nil
+      @cached_content_part = nil
+    end
+    ##
+    # @endgroup
+    ##
+    # @group Emitters
     def emit_content(value)
-      if @stream.respond_to?(:on_content)
+      if @can_emit_content
         @stream.on_content(value)
-      elsif @stream.respond_to?(:<<)
+      elsif @can_push_content
         @stream << value
       end
     end
     def emit_reasoning_content(value)
-      @stream.on_reasoning_content(value) if @stream.respond_to?(:on_reasoning_content)
+      @stream.on_reasoning_content(value) if @can_emit_reasoning_content
     end
     def emit_tool(index, tool)
-      return unless @stream.respond_to?(:on_tool_call)
-      return unless complete_tool?(tool)
-      return if @emits[:tools].include?(index)
-      function, error = resolve_tool(tool)
-      @emits[:tools] << index
+      return unless @can_emit_tool_call
+      return if @emits[:tools][index]
+      return unless tool["call_id"] && tool["name"]
+      arguments = parse_arguments(tool["arguments"])
+      return unless arguments
+      function, error = resolve_tool(tool, arguments)
+      @emits[:tools][index] = true
       @stream.on_tool_call(function, error)
     end
-    def complete_tool?(tool)
-      tool["call_id"] && tool["name"] && parse_arguments(tool["arguments"])
-    end
+    ##
+    # @endgroup
-    def resolve_tool(tool)
+    ##
+    # @group Resolvers
+    def resolve_tool(tool, arguments)
       registered = LLM::Function.find_by_name(tool["name"])
       fn = (registered || LLM::Function.new(tool["name"])).dup.tap do |fn|
         fn.id = tool["call_id"]
-        fn.arguments = parse_arguments(tool["arguments"])
+        fn.arguments = arguments
       end
       [fn, (registered ? nil : @stream.tool_not_found(fn))]
     end
@@ -162,5 +284,8 @@ class LLM::OpenAI
     rescue *LLM.json.parser_error
       nil
     end
+    ##
+    # @endgroup
   end
 end