RubyGems - llm.rb - Versions diffs - 4.12.0 → 4.14.0 - Mend

llm.rb 4.12.0 → 4.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +84 -0
data/README.md +126 -736
data/lib/llm/context.rb +12 -2
data/lib/llm/error.rb +4 -0
data/lib/llm/eventhandler.rb +16 -12
data/lib/llm/eventstream/event.rb +15 -5
data/lib/llm/eventstream/parser.rb +29 -14
data/lib/llm/function.rb +1 -1
data/lib/llm/mcp/command.rb +1 -1
data/lib/llm/mcp/error.rb +31 -1
data/lib/llm/mcp/mailbox.rb +23 -0
data/lib/llm/mcp/pipe.rb +1 -1
data/lib/llm/mcp/router.rb +44 -0
data/lib/llm/mcp/rpc.rb +31 -15
data/lib/llm/mcp/transport/http/event_handler.rb +11 -9
data/lib/llm/mcp/transport/http.rb +2 -2
data/lib/llm/mcp/transport/stdio.rb +1 -1
data/lib/llm/mcp.rb +46 -2
data/lib/llm/provider/transport/http/execution.rb +115 -0
data/lib/llm/provider/transport/http/interruptible.rb +109 -0
data/lib/llm/provider/transport/http/stream_decoder.rb +92 -0
data/lib/llm/provider/transport/http.rb +144 -0
data/lib/llm/provider.rb +17 -103
data/lib/llm/providers/openai/request_adapter/respond.rb +11 -5
data/lib/llm/providers/openai/response_adapter/responds.rb +13 -1
data/lib/llm/providers/openai/responses/stream_parser.rb +31 -0
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +8 -0
data/llm.gemspec +16 -6
metadata +23 -8
data/lib/llm/client.rb +0 -36

data/lib/llm/context.rb CHANGED Viewed

@@ -62,6 +62,7 @@ module LLM
       @mode = params.delete(:mode) || :completions
       @params = {model: llm.default_model, schema: nil}.compact.merge!(params)
       @messages = LLM::Buffer.new(llm)
+      @owner = Fiber.current
     end
     ##
@@ -103,9 +104,9 @@ module LLM
     #   res = ctx.respond("What is the capital of France?")
     #   puts res.output_text
     def respond(prompt, params = {})
-      res_id = @messages.find(&:assistant?)&.response&.response_id
-      params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
       params = @params.merge(params)
+      res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
+      params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
       res = @llm.responses.create(prompt, params)
       role = params[:role] || @llm.user_role
       @messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
@@ -184,6 +185,15 @@ module LLM
       end
     end
+    ##
+    # Interrupt the active request, if any.
+    # This is inspired by Go's context cancellation model.
+    # @return [nil]
+    def interrupt!
+      llm.interrupt!(@owner)
+    end
+    alias_method :cancel!, :interrupt!
     ##
     # Returns token usage accumulated in this context
     # @note

data/lib/llm/error.rb CHANGED Viewed

@@ -55,6 +55,10 @@ module LLM
   # When stuck in a tool call loop
   ToolLoopError = Class.new(Error)
+  ##
+  # When a request is interrupted
+  Interrupt = Class.new(Error)
   ##
   # When a tool call cannot be mapped to a local tool
   NoSuchToolError = Class.new(Error)

data/lib/llm/eventhandler.rb CHANGED Viewed

@@ -13,13 +13,15 @@ module LLM
     ##
     # "data:" event callback
-    # @param [LLM::EventStream::Event] event
+    # @param [LLM::EventStream::Event, String, nil] event
+    # @param [String, nil] chunk
     # @return [void]
-    def on_data(event)
-      return if event.end?
-      chunk = LLM.json.load(event.value)
-      return unless chunk
-      @parser.parse!(chunk)
+    def on_data(event, chunk = nil)
+      value = chunk ? event : event.value
+      return if value == "[DONE]"
+      payload = LLM.json.load(value)
+      return unless payload
+      @parser.parse!(payload)
     rescue *LLM.json.parser_error
     end
@@ -28,13 +30,15 @@ module LLM
     # is received, regardless of whether it has
     # a field name or not. Primarily for ollama,
     # which does emit Server-Sent Events (SSE).
-    # @param [LLM::EventStream::Event] event
+    # @param [LLM::EventStream::Event, String, nil] event
+    # @param [String, nil] chunk
     # @return [void]
-    def on_chunk(event)
-      return if event.end?
-      chunk = LLM.json.load(event.chunk)
-      return unless chunk
-      @parser.parse!(chunk)
+    def on_chunk(event, chunk = nil)
+      raw_chunk = chunk || event&.chunk || event
+      return if raw_chunk == "[DONE]"
+      payload = LLM.json.load(raw_chunk)
+      return unless payload
+      @parser.parse!(payload)
     rescue *LLM.json.parser_error
     end

data/lib/llm/eventstream/event.rb CHANGED Viewed

@@ -4,8 +4,17 @@ module LLM::EventStream
   ##
   # @private
   class Event
-    FIELD_REGEXP = /[^:]+/
-    VALUE_REGEXP = /(?<=: ).+/
+    UNSET = Object.new.freeze
+    def self.parse(chunk)
+      newline = chunk.end_with?("\n") ? chunk.bytesize - 1 : chunk.bytesize
+      separator = chunk.index(":")
+      return [nil, nil] unless separator
+      field = chunk.byteslice(0, separator)
+      value_start = separator + (chunk.getbyte(separator + 1) == 32 ? 2 : 1)
+      value = value_start < newline ? chunk.byteslice(value_start, newline - value_start) : nil
+      [field, value]
+    end
     ##
     # Returns the field name
@@ -25,9 +34,10 @@ module LLM::EventStream
     ##
     # @param [String] chunk
     # @return [LLM::EventStream::Event]
-    def initialize(chunk)
-      @field = chunk[FIELD_REGEXP]
-      @value = chunk[VALUE_REGEXP]
+    def initialize(chunk, field: UNSET, value: UNSET)
+      @field, @value = self.class.parse(chunk) if field.equal?(UNSET) || value.equal?(UNSET)
+      @field = field unless field.equal?(UNSET)
+      @value = value unless value.equal?(UNSET)
       @chunk = chunk
     end

data/lib/llm/eventstream/parser.rb CHANGED Viewed

@@ -4,6 +4,8 @@ module LLM::EventStream
   ##
   # @private
   class Parser
+    COMPACT_THRESHOLD = 4096
     ##
     # @return [LLM::EventStream::Parser]
     def initialize
@@ -42,7 +44,8 @@ module LLM::EventStream
     # Returns the internal buffer
     # @return [String]
     def body
-      @buffer.dup
+      return @buffer.dup if @cursor.zero?
+      @buffer.byteslice(@cursor, @buffer.bytesize - @cursor) || +""
     end
     ##
@@ -55,34 +58,46 @@ module LLM::EventStream
     private
-    def parse!(event)
-      event = Event.new(event)
-      dispatch(event)
+    def parse!(chunk)
+      field, value = Event.parse(chunk)
+      dispatch_visitors(field, value, chunk)
+      dispatch_callbacks(field, value, chunk)
+    end
+    def dispatch_visitors(field, value, chunk)
+      @visitors.each { dispatch_visitor(_1, field, value, chunk) }
     end
-    def dispatch(event)
-      @visitors.each { dispatch_visitor(_1, event) }
-      @events[event.field].each { _1.call(event) }
+    def dispatch_callbacks(field, value, chunk)
+      callbacks = @events[field]
+      return if callbacks.empty?
+      event = Event.new(chunk, field:, value:)
+      callbacks.each { _1.call(event) }
     end
-    def dispatch_visitor(visitor, event)
-      method = "on_#{event.field}"
+    def dispatch_visitor(visitor, field, value, chunk)
+      method = "on_#{field}"
       if visitor.respond_to?(method)
-        visitor.public_send(method, event)
+        visitor.public_send(method, value, chunk)
       elsif visitor.respond_to?("on_chunk")
-        visitor.on_chunk(event)
+        visitor.on_chunk(nil, chunk)
       end
     end
     def each_line
       while (newline = @buffer.index("\n", @cursor))
-        line = @buffer[@cursor..newline]
+        line = @buffer.byteslice(@cursor, newline - @cursor + 1)
         @cursor = newline + 1
         yield(line)
       end
       return if @cursor.zero?
-      @buffer = @buffer[@cursor..] || +""
-      @cursor = 0
+      if @cursor >= @buffer.bytesize
+        @buffer.clear
+        @cursor = 0
+      elsif @cursor >= COMPACT_THRESHOLD
+        @buffer = @buffer.byteslice(@cursor, @buffer.bytesize - @cursor) || +""
+        @cursor = 0
+      end
     end
   end
 end

data/lib/llm/function.rb CHANGED Viewed

@@ -257,7 +257,7 @@ class LLM::Function
     when "LLM::OpenAI::Responses"
       {
         type: "function", name: @name, description: @description,
-        parameters: @params.to_h.merge(additionalProperties: false), strict: true
+        parameters: (@params || {type: "object", properties: {}}).to_h.merge(additionalProperties: false), strict: false
       }.compact
     else
       {

data/lib/llm/mcp/command.rb CHANGED Viewed

@@ -74,7 +74,7 @@ class LLM::MCP
     #  The IO stream to read from (:stdout, :stderr)
     # @raise [LLM::Error]
     #  When the command is not running
-    # @raise [IO::WaitReadable]
+    # @raise [IO::EAGAINWaitReadable]
     #  When no complete message is available to read
     # @return [String]
     #  The next complete line from the specified IO stream

data/lib/llm/mcp/error.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 class LLM::MCP
-  class Error < LLM::Error
+  Error = Class.new(LLM::Error) do
     attr_reader :code, :data
     ##
@@ -27,5 +27,35 @@ class LLM::MCP
     end
   end
+  MismatchError = Class.new(Error) do
+    ##
+    # @return [Integer, String]
+    #  The request id the client was waiting for
+    attr_reader :expected_id
+    ##
+    # @return [Integer, String]
+    #  The response id received from the server
+    attr_reader :actual_id
+    ##
+    # @param [Integer, String] expected_id
+    #  The request id the client was waiting for
+    # @param [Integer, String] actual_id
+    #  The response id received from the server instead
+    def initialize(expected_id:, actual_id:)
+      @expected_id = expected_id
+      @actual_id = actual_id
+      super(message)
+    end
+    ##
+    # @return [String]
+    def message
+      "mismatched MCP response id #{actual_id.inspect} " \
+      "while waiting for #{expected_id.inspect}"
+    end
+  end
   TimeoutError = Class.new(Error)
 end

data/lib/llm/mcp/mailbox.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+class LLM::MCP
+  ##
+  # A per-request mailbox for routing a JSON-RPC response back to the
+  # caller waiting on that request id.
+  class Mailbox
+    def initialize
+      @queue = Queue.new
+    end
+    def <<(message)
+      @queue << message
+      self
+    end
+    def pop
+      @queue.pop(true)
+    rescue ThreadError
+      nil
+    end
+  end
+end

data/lib/llm/mcp/pipe.rb CHANGED Viewed

@@ -27,7 +27,7 @@ class LLM::MCP
     ##
     # Reads from the reader end without blocking.
-    # @raise [IO::WaitReadable]
+    # @raise [IO::EAGAINWaitReadable]
     #  When no data is available to read
     # @return [String]
     def read_nonblock(...)

data/lib/llm/mcp/router.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+class LLM::MCP
+  ##
+  # Coordinates shared access to a transport by routing JSON-RPC
+  # responses to the mailbox waiting on the matching request id.
+  class Router
+    def initialize
+      @request_id = -1
+      @pending = {}
+      @lock = Monitor.new
+      @writer = Monitor.new
+      @reader = Monitor.new
+    end
+    def register
+      @lock.synchronize do
+        @request_id += 1
+        mailbox = LLM::MCP::Mailbox.new
+        @pending[@request_id] = mailbox
+        [@request_id, mailbox]
+      end
+    end
+    def clear(id)
+      @lock.synchronize { @pending.delete(id) }
+    end
+    def read(transport)
+      @reader.synchronize { transport.read_nonblock }
+    end
+    def write(transport, message)
+      @writer.synchronize { transport.write(message) }
+    end
+    def route(response)
+      mailbox = @lock.synchronize { @pending[response["id"]] }
+      raise LLM::MCP::MismatchError.new(expected_id: nil, actual_id: response["id"]) unless mailbox
+      mailbox << response
+      nil
+    end
+  end
+end

data/lib/llm/mcp/rpc.rb CHANGED Viewed

@@ -27,13 +27,15 @@ class LLM::MCP
     def call(transport, method, params = {})
       message = {jsonrpc: "2.0", method:, params: default_params(method).merge(params)}
       if notification?(method)
-        transport.write(message)
-        nil
-      else
-        @request_id = (@request_id || -1) + 1
-        id = @request_id
-        transport.write(message.merge(id:))
-        recv(transport, id)
+        router.write(transport, message)
+        return nil
+      end
+      id, mailbox = router.register
+      begin
+        router.write(transport, message.merge(id:))
+        recv(transport, id, mailbox)
+      ensure
+        router.clear(id)
       end
     end
@@ -49,16 +51,12 @@ class LLM::MCP
     #  When the MCP process returns an error
     # @return [Object, nil]
     #  The result returned by the MCP process
-    def recv(transport, id)
+    def recv(transport, id, mailbox)
       poll(timeout:, ex: [IO::WaitReadable]) do
         loop do
-          res = transport.read_nonblock
-          next unless res["id"] == id
-          if res["error"]
-            raise LLM::MCP::Error.from(response: res)
-          else
-            break res["result"]
-          end
+          res = mailbox.pop
+          return handle_response(id, res) if res
+          route_response(router.read(transport), id)
         end
       end
     end
@@ -101,6 +99,8 @@ class LLM::MCP
     #  The exceptions to retry when raised
     # @yield
     #  The block to run
+    # @raise [LLM::MCP::MismatchError]
+    #  When an unrelated response id is received while waiting
     # @raise [LLM::MCP::TimeoutError]
     #  When the block takes longer than the timeout
     # @return [Object]
@@ -114,5 +114,21 @@ class LLM::MCP
         sleep 0.05
       end
     end
+    def handle_response(id, res)
+      raise LLM::MCP::Error.from(response: res) if res["error"]
+      return res["result"] if res["id"] == id
+      raise LLM::MCP::MismatchError.new(expected_id: id, actual_id: res["id"])
+    end
+    def route_response(res, id)
+      return nil if res["method"]
+      return router.route(res) if res.key?("id")
+      raise LLM::MCP::MismatchError.new(expected_id: id, actual_id: nil)
+    end
+    def router
+      @router ||= LLM::MCP::Router.new
+    end
   end
 end

data/lib/llm/mcp/transport/http/event_handler.rb CHANGED Viewed

@@ -21,29 +21,31 @@ module LLM::MCP::Transport
     ##
     # Receives the SSE event name.
-    # @param [LLM::EventStream::Event] event
+    # @param [LLM::EventStream::Event, String, nil] event
+    # @param [String, nil] chunk
     #  The event stream event
     # @return [void]
-    def on_event(event)
-      @event = event.value
+    def on_event(event, chunk = nil)
+      @event = chunk ? event : event.value
     end
     ##
     # Receives one line of SSE data.
-    # @param [LLM::EventStream::Event] event
+    # @param [LLM::EventStream::Event, String, nil] event
+    # @param [String, nil] chunk
     #  The event stream event
     # @return [void]
-    def on_data(event)
-      @data << event.value.to_s
+    def on_data(event, chunk = nil)
+      @data << (chunk ? event : event.value).to_s
     end
     # The generic event stream parser dispatches one line at a time.
     # A blank line terminates the current SSE event.
-    # @param [LLM::EventStream::Event] event
+    # @param [LLM::EventStream::Event, String] event
     #  The event stream event
     # @return [void]
-    def on_chunk(event)
-      flush if event.chunk == "\n"
+    def on_chunk(event, chunk = nil)
+      flush if (chunk || event&.chunk || event) == "\n"
     end
     private

data/lib/llm/mcp/transport/http.rb CHANGED Viewed

@@ -82,13 +82,13 @@ module LLM::MCP::Transport
     # Reads the next queued message without blocking.
     # @raise [LLM::MCP::Error]
     #  When the transport is not running
-    # @raise [IO::WaitReadable]
+    # @raise [IO::EAGAINWaitReadable]
     #  When no complete message is available to read
     # @return [Hash]
     def read_nonblock
       lock do
         raise LLM::MCP::Error, "MCP transport is not running" unless running?
-        raise IO::WaitReadable if @queue.empty?
+        raise IO::EAGAINWaitReadable, "no complete message available" if @queue.empty?
         @queue.shift
       end
     end

data/lib/llm/mcp/transport/stdio.rb CHANGED Viewed

@@ -57,7 +57,7 @@ module LLM::MCP::Transport
     # Reads a message from the MCP process without blocking.
     # @raise [LLM::Error]
     #  When the transport is not running
-    # @raise [IO::WaitReadable]
+    # @raise [IO::EAGAINWaitReadable]
     #  When no complete message is available to read
     # @return [Hash]
     #  The next message from the MCP process

data/lib/llm/mcp.rb CHANGED Viewed

@@ -10,11 +10,14 @@
 # transports and focuses on discovering tools that can be used through
 # {LLM::Context LLM::Context} and {LLM::Agent LLM::Agent}.
 #
-# Like {LLM::Context LLM::Context}, an MCP client is stateful and is
-# expected to remain isolated to a single thread.
+# An MCP client is stateful. Coordinate lifecycle operations such as
+# {#start} and {#stop}; request methods can be issued concurrently and
+# responses are matched by JSON-RPC id.
 class LLM::MCP
   require_relative "mcp/error"
   require_relative "mcp/command"
+  require_relative "mcp/mailbox"
+  require_relative "mcp/router"
   require_relative "mcp/rpc"
   require_relative "mcp/pipe"
   require_relative "mcp/transport/http"
@@ -121,6 +124,34 @@ class LLM::MCP
     res["tools"].map { LLM::Tool.mcp(self, _1) }
   end
+  ##
+  # Returns the prompts provided by the MCP process.
+  # @return [Array<LLM::Object>]
+  def prompts
+    res = call(transport, "prompts/list")
+    LLM::Object.from(res["prompts"])
+  end
+  ##
+  # Returns a prompt by name.
+  # @param [String] name The prompt name
+  # @param [Hash<String, String>, nil] arguments The prompt arguments
+  # @return [LLM::Object]
+  def find_prompt(name:, arguments: nil)
+    params = {name:}
+    params[:arguments] = arguments if arguments
+    res = call(transport, "prompts/get", params)
+    res["messages"] = [*res["messages"]].map do |message|
+      LLM::Message.new(
+        message["role"],
+        adapt_content(message["content"]),
+        {original_content: message["content"]}
+      )
+    end
+    LLM::Object.from(res)
+  end
+  alias_method :get_prompt, :find_prompt
   ##
   # Calls a tool by name with the given arguments
   # @param [String] name The name of the tool to call
@@ -135,6 +166,19 @@ class LLM::MCP
   attr_reader :llm, :command, :transport, :timeout
+  def adapt_content(content)
+    case content
+    when String
+      content
+    when Hash
+      content["type"] == "text" ? content["text"].to_s : LLM::Object.from(content)
+    when Array
+      content.map { adapt_content(_1) }
+    else
+      content
+    end
+  end
   def adapt_tool_result(result)
     if result["structuredContent"]
       result["structuredContent"]

data/lib/llm/provider/transport/http/execution.rb ADDED Viewed

@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+module LLM::Provider::Transport
+  class HTTP
+    ##
+    # Internal HTTP request execution methods for {LLM::Provider}.
+    #
+    # This module handles provider-side HTTP execution, response parsing,
+    # streaming, and request body setup through
+    # {LLM::Provider::Transport::HTTP}.
+    #
+    # @api private
+    module HTTP::Execution
+      private
+      ##
+      # Executes a HTTP request
+      # @param [Net::HTTPRequest] request
+      #  The request to send
+      # @param [Proc] b
+      #  A block to yield the response to (optional)
+      # @return [Net::HTTPResponse]
+      #  The response from the server
+      # @raise [LLM::Error::Unauthorized]
+      #  When authentication fails
+      # @raise [LLM::Error::RateLimit]
+      #  When the rate limit is exceeded
+      # @raise [LLM::Error]
+      #  When any other unsuccessful status code is returned
+      # @raise [SystemCallError]
+      #  When there is a network error at the operating system level
+      # @return [Net::HTTPResponse]
+      def execute(request:, operation:, stream: nil, stream_parser: self.stream_parser, model: nil, inputs: nil, &b)
+        owner = transport.request_owner
+        tracer = self.tracer
+        span = tracer.on_request_start(operation:, model:, inputs:)
+        res = transport.request(request, owner:) do |http|
+          perform_request(http, request, stream, stream_parser, &b)
+        end
+        [handle_response(res, tracer, span), span, tracer]
+      rescue *LLM::Provider::Transport::HTTP::Interruptible::INTERRUPT_ERRORS
+        raise LLM::Interrupt, "request interrupted" if transport.interrupted?(owner)
+        raise
+      end
+      ##
+      # Handles the response from a request
+      # @param [Net::HTTPResponse] res
+      #  The response to handle
+      # @param [Object, nil] span
+      #  The span
+      # @return [Net::HTTPResponse]
+      def handle_response(res, tracer, span)
+        case res
+        when Net::HTTPOK then res.body = parse_response(res)
+        else error_handler.new(tracer, span, res).raise_error!
+        end
+        res
+      end
+      ##
+      # Parse a HTTP response
+      # @param [Net::HTTPResponse] res
+      # @return [LLM::Object, String]
+      def parse_response(res)
+        case res["content-type"]
+        when %r{\Aapplication/json\s*} then LLM::Object.from(LLM.json.load(res.body))
+        else res.body
+        end
+      end
+      ##
+      # @param [Net::HTTPRequest] req
+      #  The request to set the body stream for
+      # @param [IO] io
+      #  The IO object to set as the body stream
+      # @return [void]
+      def set_body_stream(req, io)
+        req.body_stream = io
+        req["transfer-encoding"] = "chunked" unless req["content-length"]
+      end
+      ##
+      # Performs the request on the given HTTP connection.
+      # @param [Net::HTTP] http
+      # @param [Net::HTTPRequest] request
+      # @param [Object, nil] stream
+      # @param [Class] stream_parser
+      # @param [Proc, nil] b
+      # @return [Net::HTTPResponse]
+      def perform_request(http, request, stream, stream_parser, &b)
+        if stream
+          http.request(request) do |res|
+            if Net::HTTPSuccess === res
+              parser = StreamDecoder.new(stream_parser.new(stream))
+              res.read_body(parser)
+              body = parser.body
+              res.body = (Hash === body || Array === body) ? LLM::Object.from(body) : body
+            else
+              body = +""
+              res.read_body { body << _1 }
+              res.body = body
+            end
+          ensure
+            parser&.free
+          end
+        elsif b
+          http.request(request) { (Net::HTTPSuccess === _1) ? b.call(_1) : _1 }
+        else
+          http.request(request)
+        end
+      end
+    end
+  end
+end