RubyGems - llm.rb - Versions diffs - 8.1.0 → 9.0.0 - Mend

llm.rb 8.1.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +120 -2
data/README.md +161 -514
data/lib/llm/active_record/acts_as_llm.rb +7 -8
data/lib/llm/agent.rb +36 -16
data/lib/llm/context.rb +30 -26
data/lib/llm/contract/completion.rb +45 -0
data/lib/llm/cost.rb +81 -4
data/lib/llm/error.rb +1 -1
data/lib/llm/function/array.rb +8 -5
data/lib/llm/function/call_group.rb +39 -0
data/lib/llm/function/fork/task.rb +6 -0
data/lib/llm/function/ractor/task.rb +6 -0
data/lib/llm/function/task.rb +10 -0
data/lib/llm/function.rb +1 -0
data/lib/llm/mcp/transport/http.rb +26 -46
data/lib/llm/mcp/transport/stdio.rb +0 -8
data/lib/llm/mcp.rb +6 -23
data/lib/llm/provider.rb +23 -20
data/lib/llm/providers/anthropic/error_handler.rb +6 -7
data/lib/llm/providers/anthropic/files.rb +2 -2
data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
data/lib/llm/providers/anthropic.rb +1 -1
data/lib/llm/providers/bedrock/error_handler.rb +8 -9
data/lib/llm/providers/bedrock/models.rb +13 -13
data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
data/lib/llm/providers/bedrock.rb +1 -1
data/lib/llm/providers/google/error_handler.rb +6 -7
data/lib/llm/providers/google/files.rb +2 -4
data/lib/llm/providers/google/images.rb +1 -1
data/lib/llm/providers/google/models.rb +0 -2
data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
data/lib/llm/providers/google.rb +1 -1
data/lib/llm/providers/ollama/error_handler.rb +6 -7
data/lib/llm/providers/ollama/models.rb +0 -2
data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
data/lib/llm/providers/ollama.rb +1 -1
data/lib/llm/providers/openai/audio.rb +3 -3
data/lib/llm/providers/openai/error_handler.rb +6 -7
data/lib/llm/providers/openai/files.rb +2 -2
data/lib/llm/providers/openai/images.rb +3 -3
data/lib/llm/providers/openai/models.rb +1 -1
data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
data/lib/llm/providers/openai/responses.rb +2 -2
data/lib/llm/providers/openai/vector_stores.rb +1 -1
data/lib/llm/providers/openai.rb +1 -1
data/lib/llm/response.rb +10 -8
data/lib/llm/sequel/plugin.rb +7 -8
data/lib/llm/stream/queue.rb +15 -42
data/lib/llm/stream.rb +4 -4
data/lib/llm/transport/execution.rb +67 -0
data/lib/llm/transport/http.rb +134 -0
data/lib/llm/transport/persistent_http.rb +152 -0
data/lib/llm/transport/response/http.rb +113 -0
data/lib/llm/transport/response.rb +112 -0
data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
data/lib/llm/transport.rb +139 -0
data/lib/llm/usage.rb +14 -5
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +2 -12
data/llm.gemspec +2 -16
metadata +11 -19
data/lib/llm/provider/transport/http/execution.rb +0 -115
data/lib/llm/provider/transport/http/interruptible.rb +0 -114
data/lib/llm/provider/transport/http.rb +0 -145
data/lib/llm/utils.rb +0 -19

data/lib/llm/providers/openai/error_handler.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class LLM::OpenAI
   # @private
   class ErrorHandler
     ##
-    # @return [Net::HTTPResponse]
+    # @return [LLM::Transport::Response]
     #  Non-2XX response from the server
     attr_reader :res
@@ -19,13 +19,13 @@ class LLM::OpenAI
     #  The tracer
     # @param [Object, nil] span
     #  The span
-    # @param [Net::HTTPResponse] res
+    # @param [LLM::Transport::Response, Net::HTTPResponse] res
     #  The response from the server
     # @return [LLM::OpenAI::ErrorHandler]
     def initialize(tracer, span, res)
       @tracer = tracer
       @span = span
-      @res = res
+      @res = LLM::Transport::Response.from(res)
     end
     ##
@@ -49,12 +49,11 @@ class LLM::OpenAI
     ##
     # @return [LLM::Error]
     def error
-      case res
-      when Net::HTTPServerError
+      if res.server_error?
         LLM::ServerError.new("Server error").tap { _1.response = res }
-      when Net::HTTPUnauthorized
+      elsif res.unauthorized?
         LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
-      when Net::HTTPTooManyRequests
+      elsif res.rate_limited?
         LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
       else
         error = body["error"] || {}

data/lib/llm/providers/openai/files.rb CHANGED Viewed

@@ -62,7 +62,7 @@ class LLM::OpenAI
       multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), purpose:))
       req = Net::HTTP::Post.new(path("/files"), headers)
       req["content-type"] = multi.content_type
-      set_body_stream(req, multi.body)
+      transport.set_body_stream(req, multi.body)
       res, span, tracer = execute(request: req, operation: "request")
       res = ResponseAdapter.adapt(res, type: :file)
       tracer.on_request_finish(operation: "request", res:, span:)
@@ -134,7 +134,7 @@ class LLM::OpenAI
     private
-    [:path, :headers, :execute, :set_body_stream].each do |m|
+    [:path, :headers, :execute, :transport].each do |m|
       define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
     end
   end

data/lib/llm/providers/openai/images.rb CHANGED Viewed

@@ -78,7 +78,7 @@ class LLM::OpenAI
       multi = LLM::Multipart.new(params.merge!(image:, model:, response_format:))
       req = Net::HTTP::Post.new(path("/images/variations"), headers)
       req["content-type"] = multi.content_type
-      set_body_stream(req, multi.body)
+      transport.set_body_stream(req, multi.body)
       res, span, tracer = execute(request: req, operation: "request")
       res = ResponseAdapter.adapt(res, type: :image)
       tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -104,7 +104,7 @@ class LLM::OpenAI
       multi = LLM::Multipart.new(params.merge!(image:, prompt:, model:, response_format:))
       req = Net::HTTP::Post.new(path("/images/edits"), headers)
       req["content-type"] = multi.content_type
-      set_body_stream(req, multi.body)
+      transport.set_body_stream(req, multi.body)
       res, span, tracer = execute(request: req, operation: "request")
       res = ResponseAdapter.adapt(res, type: :image)
       tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -113,7 +113,7 @@ class LLM::OpenAI
     private
-    [:path, :headers, :execute, :set_body_stream].each do |m|
+    [:path, :headers, :execute, :transport].each do |m|
       define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
     end
   end

data/lib/llm/providers/openai/models.rb CHANGED Viewed

@@ -48,7 +48,7 @@ class LLM::OpenAI
     private
-    [:path, :headers, :execute, :set_body_stream].each do |m|
+    [:path, :headers, :execute].each do |m|
       define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
     end
   end

data/lib/llm/providers/openai/response_adapter/completion.rb CHANGED Viewed

@@ -40,6 +40,48 @@ module LLM::OpenAI::ResponseAdapter
         &.reasoning_tokens || 0
     end
+    ##
+    # (see LLM::Contract::Completion#input_audio_tokens)
+    def input_audio_tokens
+      body
+        .usage
+        &.prompt_tokens_details
+        &.audio_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#output_audio_tokens)
+    def output_audio_tokens
+      body
+        .usage
+        &.completion_tokens_details
+        &.audio_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#input_image_tokens)
+    def input_image_tokens
+      body
+        .usage
+        &.prompt_tokens_details
+        &.image_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#cache_read_tokens)
+    def cache_read_tokens
+      body
+        .usage
+        &.prompt_tokens_details
+        &.cached_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#cache_write_tokens)
+    def cache_write_tokens
+      0
+    end
     ##
     # (see LLM::Contract::Completion#total_tokens)
     def total_tokens

data/lib/llm/providers/openai/response_adapter/responds.rb CHANGED Viewed

@@ -42,6 +42,45 @@ module LLM::OpenAI::ResponseAdapter
         &.reasoning_tokens || 0
     end
+    ##
+    # (see LLM::Contract::Completion#input_audio_tokens)
+    def input_audio_tokens
+      body
+        .usage
+        &.input_tokens_details
+        &.audio_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#output_audio_tokens)
+    def output_audio_tokens
+      body
+        .usage
+        &.output_tokens_details
+        &.audio_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#input_image_tokens)
+    def input_image_tokens
+      super
+    end
+    ##
+    # (see LLM::Contract::Completion#cache_read_tokens)
+    def cache_read_tokens
+      body
+        .usage
+        &.input_tokens_details
+        &.cached_tokens || 0
+    end
+    ##
+    # (see LLM::Contract::Completion#cache_write_tokens)
+    def cache_write_tokens
+      0
+    end
     ##
     # (see LLM::Contract::Completion#total_tokens)
     def total_tokens

data/lib/llm/providers/openai/responses.rb CHANGED Viewed

@@ -44,7 +44,7 @@ class LLM::OpenAI
       messages = build_complete_messages(prompt, params, role)
       @provider.tracer.set_request_metadata(user_input: extract_user_input(messages, fallback: prompt))
       body = LLM.json.dump({input: [adapt(messages, mode: :response)].flatten}.merge!(params))
-      set_body_stream(req, StringIO.new(body))
+      transport.set_body_stream(req, StringIO.new(body))
       res, span, tracer = execute(request: req, stream:, stream_parser:, operation: "chat", model: params[:model])
       res = ResponseAdapter.adapt(res, type: :responds)
         .extend(Module.new { define_method(:__tools__) { tools } })
@@ -85,7 +85,7 @@ class LLM::OpenAI
     private
-    [:path, :headers, :execute, :set_body_stream, :resolve_tools].each do |m|
+    [:path, :headers, :execute, :transport, :resolve_tools].each do |m|
       define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
     end

data/lib/llm/providers/openai/vector_stores.rb CHANGED Viewed

@@ -259,7 +259,7 @@ class LLM::OpenAI
     private
-    [:path, :headers, :execute, :set_body_stream].each do |m|
+    [:path, :headers, :execute].each do |m|
       define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
     end
   end

data/lib/llm/providers/openai.rb CHANGED Viewed

@@ -223,7 +223,7 @@ module LLM
       messages = build_complete_messages(prompt, params, role)
       body = LLM.json.dump({messages: adapt(messages, mode: :complete).flatten}.merge!(params))
       req = Net::HTTP::Post.new(completions_path, headers)
-      set_body_stream(req, StringIO.new(body))
+      transport.set_body_stream(req, StringIO.new(body))
       [req, messages]
     end

data/lib/llm/response.rb CHANGED Viewed

@@ -10,25 +10,27 @@ module LLM
   # handling can share one common surface without flattening away
   # specialized behavior.
   #
-  # The normalized response still keeps the original
-  # {Net::HTTPResponse Net::HTTPResponse} available through {#res}
-  # when callers need direct access to raw HTTP details such as
-  # headers, status codes, or unadapted bodies.
+  # The normalized response keeps the transport response available
+  # through {#res}. When the default net/http transport is in use,
+  # {LLM::Transport::Response::HTTP
+  # LLM::Transport::Response::HTTP} keeps the
+  # original {Net::HTTPResponse Net::HTTPResponse} available through
+  # its own {LLM::Transport::Response::HTTP#res #res}.
   class Response
     require "json"
     ##
     # Returns the HTTP response
-    # @return [Net::HTTPResponse]
+    # @return [LLM::Transport::Response]
     attr_reader :res
     ##
-    # @param [Net::HTTPResponse] res
+    # @param [LLM::Transport::Response] res
     #  HTTP response
     # @return [LLM::Response]
     #  Returns an instance of LLM::Response
     def initialize(res)
-      @res = res
+      @res = LLM::Transport::Response.from(res)
     end
     ##
@@ -51,7 +53,7 @@ module LLM
     # Returns true if the response is successful
     # @return [Boolean]
     def ok?
-      Net::HTTPSuccess === @res
+      @res.success?
     end
     ##

data/lib/llm/sequel/plugin.rb CHANGED Viewed

@@ -184,14 +184,6 @@ module LLM::Sequel
       ctx.wait(...)
     end
-    ##
-    # Calls into the stored context.
-    # @see LLM::Context#call
-    # @return [Object]
-    def call(...)
-      ctx.call(...)
-    end
     ##
     # @see LLM::Context#mode
     # @return [Symbol]
@@ -222,6 +214,13 @@ module LLM::Sequel
       ctx.functions
     end
+    ##
+    # @see LLM::Context#functions?
+    # @return [Boolean]
+    def functions?
+      ctx.functions?
+    end
     ##
     # @see LLM::Context#returns
     # @return [Array<LLM::Function::Return>]

data/lib/llm/stream/queue.rb CHANGED Viewed

@@ -4,7 +4,7 @@ class LLM::Stream
   ##
   # A small queue for collecting streamed tool work. Values can be immediate
   # {LLM::Function::Return} objects or concurrent handles returned by
-  # {LLM::Function#spawn}. Calling {#wait(strategy)} resolves queued work and
+  # {LLM::Function#spawn}. Calling {#wait} resolves queued work and
   # returns an array of {LLM::Function::Return} values.
   class Queue
     ##
@@ -41,56 +41,29 @@ class LLM::Stream
     ##
     # Waits for queued work to finish and returns function results.
-    # @param [Symbol, Array<Symbol>] strategy
-    #   Controls concurrency strategy, or lists the possible concurrency strategies
-    #   to wait on:
-    #   - `:thread`: Use threads
-    #   - `:task`: Use async tasks (requires async gem)
-    #   - `:fiber`: Use scheduler-backed fibers (requires Fiber.scheduler)
-    #   - `:ractor`: Use Ruby ractors (class-based tools only; MCP tools are not supported)
-    #   - `[:thread, :ractor]`: Wait for any queued thread or ractor work, in the
-    #     given order. This is useful when different tools were spawned with
-    #     different concurrency strategies.
+    #
+    # Queued work is waited according to the actual task types that were
+    # enqueued, so callers do not need to provide a strategy here.
+    #
     # @return [Array<LLM::Function::Return>]
-    def wait(strategy)
+    def wait
       returns, tasks = @items.shift(@items.length).partition { LLM::Function::Return === _1 }
-      results = wait_tasks(tasks, strategy)
+      results = wait_tasks(tasks)
       returns.concat fire_hooks(tasks, results)
     end
     alias_method :value, :wait
     private
-    def wait_tasks(tasks, strategy)
-      strategies = Array(strategy)
-      return wait_group(tasks, strategies.first) unless strategies.length > 1
-      grouped = strategies.to_h { [_1, []] }
-      tasks.each do |task|
-        grouped[task_strategy(task)] << task
-      end
-      strategies.flat_map do |name|
-        selected = grouped.fetch(name)
-        selected.empty? ? [] : wait_group(selected, name)
-      end
-    end
-    def wait_group(tasks, strategy)
-      case strategy
-      when :thread then LLM::Function::ThreadGroup.new(tasks).wait
-      when :task then LLM::Function::TaskGroup.new(tasks).wait
-      when :fiber then LLM::Function::FiberGroup.new(tasks).wait
-      when :ractor then LLM::Function::Ractor::Group.new(tasks).wait
-      else raise ArgumentError, "Unknown strategy: #{strategy.inspect}. Expected :thread, :task, :fiber, or :ractor"
-      end
-    end
-    def task_strategy(task)
-      case task.task
-      when Thread then :thread
-      when Fiber then :fiber
-      when LLM::Function::Ractor::Task then :ractor
-      else :task
+    def wait_tasks(tasks)
+      return [] if tasks.empty?
+      results = {}
+      grouped_tasks = tasks.group_by(&:group_class)
+      grouped_tasks.each do |group_class, group|
+        returns = group_class.new(group).wait
+        returns.each.with_index { results[group[_2]] = _1 }
       end
+      tasks.map { results[_1] }
     end
     def fire_hooks(tasks, results)

data/lib/llm/stream.rb CHANGED Viewed

@@ -46,11 +46,11 @@ module LLM
     ##
     # Waits for queued tool work to finish and returns function results.
-    # @param [Symbol] strategy
-    #  The concurrency strategy to use
+    # Any passed arguments are ignored because queued work is waited according
+    # to the actual task types already present in the queue.
     # @return [Array<LLM::Function::Return>]
-    def wait(strategy)
-      queue.wait(strategy)
+    def wait(*)
+      queue.wait
     end
     # @group Public callbacks

data/lib/llm/transport/execution.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+class LLM::Transport
+  ##
+  # Internal request execution methods for {LLM::Provider}.
+  #
+  # This module handles provider-side transport execution, response
+  # parsing, streaming, and request body setup.
+  #
+  # @api private
+  module Execution
+    private
+    ##
+    # Executes a HTTP request
+    # @param [Net::HTTPRequest] request
+    #  The request to send
+    # @param [Proc] b
+    #  A block to yield the response to (optional)
+    # @return [LLM::Transport::Response]
+    #  The response from the server
+    # @raise [LLM::Error::Unauthorized]
+    #  When authentication fails
+    # @raise [LLM::Error::RateLimit]
+    #  When the rate limit is exceeded
+    # @raise [LLM::Error]
+    #  When any other unsuccessful status code is returned
+    # @raise [SystemCallError]
+    #  When there is a network error at the operating system level
+    # @return [LLM::Transport::Response]
+    def execute(request:, operation:, stream: nil, stream_parser: self.stream_parser, model: nil, inputs: nil, &b)
+      stream &&= LLM::Object.from(streamer: stream, parser: stream_parser, decoder: stream_decoder)
+      owner = transport.request_owner
+      tracer = self.tracer
+      span = tracer.on_request_start(operation:, model:, inputs:)
+      res = transport.request(request, owner:, stream:, &b)
+      res = LLM::Transport::Response.from(res)
+      [handle_response(res, tracer, span), span, tracer]
+    rescue *transport.interrupt_errors
+      raise LLM::Interrupt, "request interrupted" if transport.interrupted?(owner)
+      raise
+    end
+    ##
+    # Handles the response from a request
+    # @param [LLM::Transport::Response] res
+    #  The response to handle
+    # @param [Object, nil] span
+    #  The span
+    # @return [LLM::Transport::Response]
+    def handle_response(res, tracer, span)
+      res.ok? ? res.body = parse_response(res) : error_handler.new(tracer, span, res).raise_error!
+      res
+    end
+    ##
+    # Parse a HTTP response
+    # @param [LLM::Transport::Response] res
+    # @return [LLM::Object, String]
+    def parse_response(res)
+      case res["content-type"]
+      when %r{\Aapplication/json\s*} then LLM::Object.from(LLM.json.load(res.body))
+      else res.body
+      end
+    end
+  end
+end

data/lib/llm/transport/http.rb ADDED Viewed

@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+require "net/http"
+class LLM::Transport
+  ##
+  # The {LLM::Transport::HTTP LLM::Transport::HTTP} transport is the
+  # built-in adapter for Ruby's {Net::HTTP Net::HTTP}. It manages
+  # transient HTTP connections, tracks active requests by owner, and
+  # interrupts in-flight requests when needed.
+  #
+  # @api private
+  class HTTP < self
+    INTERRUPT_ERRORS = [::IOError, ::EOFError, Errno::EBADF].freeze
+    Request = Struct.new(:client, keyword_init: true)
+    ##
+    # @param [String] host
+    # @param [Integer] port
+    # @param [Integer] timeout
+    # @param [Boolean] ssl
+    # @return [LLM::Transport::HTTP]
+    def initialize(host:, port:, timeout:, ssl:)
+      @host = host
+      @port = port
+      @timeout = timeout
+      @ssl = ssl
+      @base_uri = URI("#{ssl ? "https" : "http"}://#{host}:#{port}/")
+      @monitor = Monitor.new
+    end
+    ##
+    # Returns the current request owner.
+    # @return [Object]
+    def request_owner
+      return Fiber.current unless defined?(::Async)
+      Async::Task.current? ? Async::Task.current : Fiber.current
+    end
+    ##
+    # @return [Array<Class<Exception>>]
+    def interrupt_errors
+      [*INTERRUPT_ERRORS, *optional_interrupt_errors]
+    end
+    ##
+    # Interrupt an active request, if any.
+    # @param [Fiber] owner
+    # @return [nil]
+    def interrupt!(owner)
+      req = request_for(owner) or return
+      lock { (@interrupts ||= {})[owner] = true }
+      close_socket(req.client)
+      req.client.finish if req.client.active?
+      owner.stop if owner.respond_to?(:stop)
+    rescue *interrupt_errors
+      nil
+    end
+    ##
+    # Returns whether an execution owner was interrupted.
+    # @param [Fiber] owner
+    # @return [Boolean, nil]
+    def interrupted?(owner)
+      lock { @interrupts&.delete(owner) }
+    end
+    ##
+    # Performs a request on the current HTTP transport.
+    # @param [Net::HTTPRequest] request
+    # @param [Fiber] owner
+    # @param [LLM::Object, nil] stream
+    # @yieldparam [LLM::Transport::Response] response
+    # @return [Object]
+    def request(request, owner:, stream: nil, &b)
+      client = client()
+      set_request(Request.new(client:), owner)
+      perform_request(client, request, stream, &b)
+    ensure
+      clear_request(owner)
+    end
+    ##
+    # @return [String]
+    def inspect
+      "#<#{self.class.name}:0x#{object_id.to_s(16)}>"
+    end
+    private
+    attr_reader :host, :port, :timeout, :ssl, :base_uri
+    def client
+      client = Net::HTTP.new(host, port)
+      client.read_timeout = timeout
+      client.use_ssl = ssl
+      client
+    end
+    def close_socket(http)
+      socket = http&.instance_variable_get(:@socket) or return
+      socket = socket.io if socket.respond_to?(:io)
+      socket.close
+    rescue *interrupt_errors
+      nil
+    end
+    def request_for(owner)
+      lock do
+        @requests ||= {}
+        @requests[owner]
+      end
+    end
+    def set_request(req, owner)
+      lock do
+        @requests ||= {}
+        @requests[owner] = req
+      end
+    end
+    def clear_request(owner)
+      lock { @requests&.delete(owner) }
+    end
+    def lock(&)
+      @monitor.synchronize(&)
+    end
+    def optional_interrupt_errors
+      defined?(::Async::Stop) ? [Async::Stop] : []
+    end
+  end
+end