RubyGems - llm.rb - Versions diffs - 8.1.0 → 10.0.0 - Mend

llm.rb 8.1.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +196 -6
data/README.md +233 -518
data/data/anthropic.json +278 -258
data/data/bedrock.json +1288 -1561
data/data/deepseek.json +38 -38
data/data/google.json +656 -579
data/data/openai.json +860 -818
data/data/xai.json +243 -552
data/data/zai.json +168 -168
data/lib/llm/active_record/acts_as_agent.rb +5 -0
data/lib/llm/active_record/acts_as_llm.rb +7 -8
data/lib/llm/active_record.rb +1 -6
data/lib/llm/agent.rb +121 -82
data/lib/llm/context.rb +79 -74
data/lib/llm/contract/completion.rb +45 -0
data/lib/llm/cost.rb +81 -4
data/lib/llm/error.rb +1 -1
data/lib/llm/function/array.rb +8 -5
data/lib/llm/function/call_group.rb +39 -0
data/lib/llm/function/call_task.rb +46 -0
data/lib/llm/function/fork/task.rb +6 -0
data/lib/llm/function/ractor/task.rb +6 -0
data/lib/llm/function/task.rb +10 -0
data/lib/llm/function.rb +28 -1
data/lib/llm/mcp/transport/http.rb +26 -46
data/lib/llm/mcp/transport/stdio.rb +0 -8
data/lib/llm/mcp.rb +6 -23
data/lib/llm/provider.rb +30 -20
data/lib/llm/providers/anthropic/error_handler.rb +6 -7
data/lib/llm/providers/anthropic/files.rb +2 -2
data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
data/lib/llm/providers/anthropic.rb +1 -1
data/lib/llm/providers/bedrock/error_handler.rb +8 -9
data/lib/llm/providers/bedrock/models.rb +13 -13
data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
data/lib/llm/providers/bedrock.rb +1 -1
data/lib/llm/providers/google/error_handler.rb +6 -7
data/lib/llm/providers/google/files.rb +2 -4
data/lib/llm/providers/google/images.rb +1 -1
data/lib/llm/providers/google/models.rb +0 -2
data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
data/lib/llm/providers/google/stream_parser.rb +2 -2
data/lib/llm/providers/google.rb +1 -1
data/lib/llm/providers/ollama/error_handler.rb +6 -7
data/lib/llm/providers/ollama/models.rb +0 -2
data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
data/lib/llm/providers/ollama.rb +1 -1
data/lib/llm/providers/openai/audio.rb +3 -3
data/lib/llm/providers/openai/error_handler.rb +6 -7
data/lib/llm/providers/openai/files.rb +2 -2
data/lib/llm/providers/openai/images.rb +3 -3
data/lib/llm/providers/openai/models.rb +1 -1
data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
data/lib/llm/providers/openai/responses.rb +2 -2
data/lib/llm/providers/openai/stream_parser.rb +2 -2
data/lib/llm/providers/openai/vector_stores.rb +1 -1
data/lib/llm/providers/openai.rb +1 -1
data/lib/llm/response.rb +10 -8
data/lib/llm/schema.rb +11 -0
data/lib/llm/sequel/agent.rb +5 -0
data/lib/llm/sequel/plugin.rb +8 -14
data/lib/llm/stream/queue.rb +15 -42
data/lib/llm/stream.rb +15 -40
data/lib/llm/tool/param.rb +1 -8
data/lib/llm/transport/execution.rb +67 -0
data/lib/llm/transport/http.rb +134 -0
data/lib/llm/transport/persistent_http.rb +152 -0
data/lib/llm/transport/response/http.rb +113 -0
data/lib/llm/transport/response.rb +112 -0
data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
data/lib/llm/transport.rb +139 -0
data/lib/llm/usage.rb +14 -5
data/lib/llm/utils.rb +24 -14
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +3 -12
data/llm.gemspec +2 -16
metadata +13 -20
data/lib/llm/bot.rb +0 -3
data/lib/llm/provider/transport/http/execution.rb +0 -115
data/lib/llm/provider/transport/http/interruptible.rb +0 -114
data/lib/llm/provider/transport/http.rb +0 -145

data/lib/llm/agent.rb CHANGED Viewed

@@ -23,8 +23,7 @@ module LLM
   #   advisory tool errors back through the model and keeps the loop in-band.
   #   Set `tool_attempts: nil` to disable that advisory behavior.
   # * Tool loop execution can be configured with `concurrency :call`,
-  #   `:thread`, `:task`, `:fiber`, `:ractor`, or a list of queued task
-  #   types such as `[:thread, :ractor]`.
+  #   `:thread`, `:task`, `:fiber`, or `:ractor`.
   #
   # @example
   #   class SystemAdmin < LLM::Agent
@@ -49,9 +48,9 @@ module LLM
     #  The model identifier
     # @return [String, nil]
     #  Returns the current model when no argument is provided
-    def self.model(model = nil)
-      return @model if model.nil?
-      @model = model
+    def self.model(model = nil, &block)
+      return @model if model.nil? && !block
+      @model = block || model
     end
     ##
@@ -60,9 +59,9 @@ module LLM
     #  One or more tools
     # @return [Array<LLM::Function>]
     #  Returns the current tools when no argument is provided
-    def self.tools(*tools)
-      return @tools || [] if tools.empty?
-      @tools = tools.flatten
+    def self.tools(*tools, &block)
+      return @tools || [] if tools.empty? && !block
+      @tools = block || tools.flatten
     end
     ##
@@ -71,9 +70,9 @@ module LLM
     #  One or more skill directories
     # @return [Array<String>, nil]
     #  Returns the current skills when no argument is provided
-    def self.skills(*skills)
-      return @skills if skills.empty?
-      @skills = skills.flatten
+    def self.skills(*skills, &block)
+      return @skills if skills.empty? && !block
+      @skills = block || skills.flatten
     end
     ##
@@ -82,9 +81,9 @@ module LLM
     #  The schema
     # @return [#to_json, nil]
     #  Returns the current schema when no argument is provided
-    def self.schema(schema = nil)
-      return @schema if schema.nil?
-      @schema = schema
+    def self.schema(schema = nil, &block)
+      return @schema if schema.nil? && !block
+      @schema = block || schema
     end
     ##
@@ -110,9 +109,8 @@ module LLM
     #  - `:fork`: forked child processes
     #  - `:ractor`: concurrent Ruby ractors for class-based tools; MCP tools are not supported,
     #    and this mode is especially useful for CPU-bound tool work
-    #  - `[:thread, :ractor]`: the possible concurrency strategies to wait on, in the
-    #    given order. This is useful for mixed tool sets or when work may have been
-    #    spawned with more than one concurrency strategy.
+    #  Usually pass a single strategy. Arrays are only for advanced mixed-work
+    #  cases and are not needed for normal queued stream tool loops.
     # @return [Symbol, Array<Symbol>, nil]
     def self.concurrency(concurrency = nil)
       return @concurrency if concurrency.nil?
@@ -139,6 +137,39 @@ module LLM
       @tracer = block || tracer
     end
+    ##
+    # Set or get the default stream.
+    #
+    # When a block is provided, it is stored and evaluated lazily against the
+    # agent instance during initialization so it can build a fresh stream for
+    # each agent.
+    #
+    # @example
+    #   class Agent < LLM::Agent
+    #     stream { MyStream.new }
+    #   end
+    #
+    # @param [Object, Proc, nil] stream
+    # @yieldreturn [Object, nil]
+    # @return [Object, Proc, nil]
+    def self.stream(stream = nil, &block)
+      return @stream if stream.nil? && !block
+      @stream = block || stream
+    end
+    ##
+    # Set or get the tool names that require confirmation before they can run.
+    #
+    # @param [String, Symbol, Array<String, Symbol>, Proc] tool_names
+    #  One or more tool names.
+    # @param [Proc] block
+    #  An optional, lazy-evaluated Proc
+    # @return [Array<String>, Proc, nil]
+    def self.confirm(*tool_names, &block)
+      return @confirm if tool_names.empty? && !block
+      @confirm = block || tool_names.flatten.map(&:to_s)
+    end
     ##
     # @param [LLM::Provider] provider
     #  A provider
@@ -150,15 +181,27 @@ module LLM
     # @option params [Array<LLM::Function>, nil] :tools Defaults to nil
     # @option params [Array<String>, nil] :skills Defaults to nil
     # @option params [#to_json, nil] :schema Defaults to nil
+    # @option params [Object, Proc, nil] :stream Optional stream override for this agent instance
     # @option params [LLM::Tracer, Proc, nil] :tracer Optional tracer override for this agent instance
     # @option params [Symbol, Array<Symbol>, nil] :concurrency Defaults to the agent class concurrency
     def initialize(llm, params = {})
-      defaults = {model: self.class.model, tools: self.class.tools, skills: self.class.skills, schema: self.class.schema}.compact
-      @concurrency = params.delete(:concurrency) || self.class.concurrency
       @llm = llm
-      tracer = params.key?(:tracer) ? params.delete(:tracer) : self.class.tracer
-      @tracer = resolve_option(tracer) unless tracer.nil?
-      @ctx = LLM::Context.new(llm, defaults.merge({guard: true}).merge(params))
+      fields = %i[model skills schema tracer stream tools concurrency instructions confirm]
+      fields_ivar = %i[tracer concurrency instructions confirm]
+      fields.each do |field|
+        resolvable = params.key?(field) ? params.delete(field) : self.class.public_send(field)
+        resolve_symbol = !%i[concurrency confirm].include?(field)
+        resolved = resolvable != nil ? resolve_option(self, resolvable, resolve_symbol:) : resolvable
+        resolved = [*resolved].map(&:to_s) if field == :confirm && resolved
+        if field == :model
+          params[field] = resolved unless resolved.nil? || params.key?(field)
+        elsif resolved && !fields_ivar.include?(field)
+          params[field] ||= resolved
+        elsif fields_ivar.include?(field)
+          instance_variable_set(:"@#{field}", resolved)
+        end
+      end
+      @ctx = LLM::Context.new(llm, {guard: true}.merge(params))
     end
     ##
@@ -178,31 +221,10 @@ module LLM
     #   response = agent.talk("Hello, what is your name?")
     #   puts response.choices[0].content
     def talk(prompt, params = {})
-      run_loop(:talk, prompt, params)
+      run_loop(prompt, params)
     end
     alias_method :chat, :talk
-    ##
-    # Maintain a conversation via the responses API.
-    # This method immediately sends a request to the LLM and returns the response.
-    #
-    # @note Not all LLM providers support this API
-    # @param prompt (see LLM::Provider#complete)
-    # @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
-    # @option params [Integer] :tool_attempts
-    #  The maxinum number of tool call iterations before the agent sends
-    #  in-band advisory tool errors back through the model (default 25).
-    #  Set to `nil` to disable advisory tool-limit returns.
-    # @return [LLM::Response] Returns the LLM's response for this turn.
-    # @example
-    #   llm = LLM.openai(key: ENV["KEY"])
-    #   agent = LLM::Agent.new(llm)
-    #   res = agent.respond("What is the capital of France?")
-    #   puts res.output_text
-    def respond(prompt, params = {})
-      run_loop(:respond, prompt, params)
-    end
     ##
     # @return [LLM::Buffer<LLM::Message>]
     def messages
@@ -222,13 +244,6 @@ module LLM
       @ctx.returns
     end
-    ##
-    # @see LLM::Context#call
-    # @return [Object]
-    def call(...)
-      @tracer ? @llm.with_tracer(@tracer) { @ctx.call(...) } : @ctx.call(...)
-    end
     ##
     # @see LLM::Context#wait
     # @return [Array<LLM::Function::Return>]
@@ -293,6 +308,13 @@ module LLM
       @tracer || @ctx.tracer
     end
+    ##
+    # @return [LLM::Stream, #<<, nil]
+    #  Returns a stream object, or nil
+    def stream
+      @ctx.stream
+    end
     ##
     # Returns the model an Agent is actively using
     # @return [String]
@@ -327,6 +349,13 @@ module LLM
       @ctx.context_window
     end
+    ##
+    # @see LLM::Context#params
+    # @return [Hash]
+    def params
+      @ctx.params
+    end
     ##
     # @see LLM::Context#to_h
     # @return [Hash]
@@ -363,19 +392,33 @@ module LLM
     end
     alias_method :restore, :deserialize
+    ##
+    # This method is called when confirmation is required before a tool can run.
+    #
+    # @param [LLM::Function] fn
+    #  The pending function call. It can be cancelled through the
+    #  {LLM::Function#cancel} method.
+    # @param [Symbol, Array<Symbol>] strategy
+    #  The execution strategy that would be used for the tool call.
+    # @return [LLM::Function::Return]
+    #  Return either `fn.spawn(strategy).wait` to approve execution or
+    #  `fn.cancel(...)` to cancel the call.
+    def on_tool_confirmation(fn, strategy)
+      fn.cancel
+    end
     private
     ##
     # @return [LLM::Prompt]
     def apply_instructions(new_prompt)
-      instr = self.class.instructions
-      return new_prompt unless instr
+      return new_prompt unless @instructions
       if LLM::Prompt === new_prompt
-        new_prompt.system(instr) if inject_instructions?(new_prompt)
+        new_prompt.system(@instructions) if inject_instructions?(new_prompt)
         new_prompt
       else
         prompt do
-          _1.system(instr) if inject_instructions?
+          _1.system(@instructions) if inject_instructions?
           _1.user(new_prompt)
         end
       end
@@ -396,50 +439,46 @@ module LLM
     ##
     # @return [Array<LLM::Function::Return>]
     def call_functions
-      case concurrency || :call
-      when :call then call(:functions)
-      when :thread, :task, :fiber, :fork, :ractor, Array then wait(concurrency)
-      else raise ArgumentError, "Unknown concurrency: #{concurrency.inspect}. " \
-                                "Expected :call, :thread, :task, :fiber, :fork, :ractor, " \
-                                "or an array of the mentioned options"
+      strategy = concurrency || :call
+      return wait(strategy) unless @confirm&.any?
+      confirmables = @ctx.functions.select { @confirm.include?(_1.name.to_s) }
+      results = confirmables.map do |tool|
+        send(:on_tool_confirmation, tool, strategy)
       end
+      @ctx.functions? ? [*results, *wait(strategy)] : results
     end
-    def run_loop(method, prompt, params)
-      loop = proc do
+    ##
+    # Runs the tool loop
+    # @api private
+    def run_loop(prompt, params)
+      run = proc do
         max = params.key?(:tool_attempts) ? params.delete(:tool_attempts) : 25
         max = Integer(max) if max
         stream = params[:stream] || @ctx.params[:stream]
         stream.extra[:concurrency] = concurrency if LLM::Stream === stream
-        res = @ctx.public_send(method, apply_instructions(prompt), params)
-        loop do
-          break if @ctx.functions.empty?
+        res = @ctx.talk(apply_instructions(prompt), params)
+        while @ctx.functions?
           if max
             max.times do
-              break if @ctx.functions.empty?
-              res = @ctx.public_send(method, call_functions, params)
+              break unless @ctx.functions?
+              res = @ctx.talk(call_functions, params)
             end
-            break if @ctx.functions.empty?
-            res = @ctx.public_send(method, @ctx.functions.map { rate_limit(_1) }, params)
+            res = @ctx.talk(@ctx.functions.map(&:rate_limit), params) if @ctx.functions?
           else
-            res = @ctx.public_send(method, call_functions, params)
+            res = @ctx.talk(call_functions, params)
           end
         end
         res
       end
-      @tracer ? @llm.with_tracer(@tracer, &loop) : loop.call
+      return run.call unless @tracer
+      @llm.with_tracer(@tracer, &run)
     end
-    def rate_limit(function)
-      LLM::Function::Return.new(function.id, function.name, {
-        error: true,
-        type: LLM::ToolLoopError.name,
-        message: "tool loop rate limit reached"
-      })
-    end
-    def resolve_option(option)
-      Proc === option ? instance_exec(&option) : option
+    ##
+    # @api private
+    def resolve_option(...)
+      LLM::Utils.resolve_option(...)
     end
   end
 end

data/lib/llm/context.rb CHANGED Viewed

@@ -44,6 +44,11 @@ module LLM
       input_tokens: 0,
       output_tokens: 0,
       reasoning_tokens: 0,
+      input_audio_tokens: 0,
+      output_audio_tokens: 0,
+      input_image_tokens: 0,
+      cache_read_tokens: 0,
+      cache_write_tokens: 0,
       total_tokens: 0
     )
     private_constant :ZERO_USAGE
@@ -63,13 +68,6 @@ module LLM
     # @return [Symbol]
     attr_reader :mode
-    ##
-    # Returns the default params for this context
-    # @return [Hash]
-    def params
-      @params.dup
-    end
     ##
     # @param [LLM::Provider] llm
     #  A provider
@@ -93,6 +91,13 @@ module LLM
       @messages = LLM::Buffer.new(llm)
     end
+    ##
+    # Returns the default params for this context
+    # @return [Hash]
+    def params
+      @params.dup
+    end
     ##
     # Returns a context compactor
     # This feature is inspired by the compaction approach developed by
@@ -186,14 +191,9 @@ module LLM
     #   res = ctx.talk("Hello, what is your name?")
     #   puts res.messages[0].content
     def talk(prompt, params = {})
-      return respond(prompt, params) if mode == :responses
       @owner = @llm.request_owner
       compactor.compact!(prompt) if compactor.compact?(prompt)
-      params = params.merge(messages: @messages.to_a)
-      params = @params.merge(params)
-      prompt, params = transform(prompt, params)
-      bind!(params[:stream], params[:model], params[:tools])
-      res = @llm.complete(prompt, params)
+      prompt, params, res = mode == :responses ? respond(prompt, params) : complete(prompt, params)
       self.compacted = false
       role = params[:role] || @llm.user_role
       role = @llm.tool_role if params[:role].nil? && [*prompt].grep(LLM::Function::Return).any?
@@ -203,35 +203,6 @@ module LLM
     end
     alias_method :chat, :talk
-    ##
-    # Interact with the context via the responses API.
-    # This method immediately sends a request to the LLM and returns the response.
-    #
-    # @note Not all LLM providers support this API
-    # @param prompt (see LLM::Provider#complete)
-    # @param params The params, including optional :role (defaults to :user), :stream, :tools, :schema etc.
-    # @return [LLM::Response] Returns the LLM's response for this turn.
-    # @example
-    #   llm = LLM.openai(key: ENV["KEY"])
-    #   ctx = LLM::Context.new(llm)
-    #   res = ctx.respond("What is the capital of France?")
-    #   puts res.output_text
-    def respond(prompt, params = {})
-      @owner = @llm.request_owner
-      compactor.compact!(prompt) if compactor.compact?(prompt)
-      params = @params.merge(params)
-      prompt, params = transform(prompt, params)
-      bind!(params[:stream], params[:model], params[:tools])
-      res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
-      params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
-      res = @llm.responses.create(prompt, params)
-      self.compacted = false
-      role = params[:role] || @llm.user_role
-      @messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
-      @messages.concat [res.choices[-1]]
-      res
-    end
     ##
     # @return [String]
     def inspect
@@ -257,18 +228,13 @@ module LLM
     end
     ##
-    # Calls a named collection of work through the context.
-    #
-    # This currently supports `:functions`, forwarding to `functions.call`.
-    #
-    # @param [Symbol] target
-    #  The work collection to call
-    # @return [Array<LLM::Function::Return>]
-    def call(target)
-      case target
-      when :functions then guarded_returns || functions.call
-      else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
-      end
+    # Returns whether there is pending tool work in this context.
+    # This prefers queued streamed tool work when present, and otherwise
+    # falls back to unresolved functions derived from the message history.
+    # @return [Boolean]
+    def functions?
+      pending = queue
+      (pending && !pending.empty?) || functions.any?
     end
     ##
@@ -307,14 +273,15 @@ module LLM
     # the context's pending functions directly.
     #
     # @param [Symbol, Array<Symbol>] strategy
-    #  The concurrency strategy to use, or the possible concurrency strategies to
-    #  wait on. For example, `[:thread, :ractor]` waits for any queued thread or
-    #  ractor work, in that order.
+    #  If the stream queue already has tool work, `wait` will drain it
+    #  without using this argument.
+    #  Otherwise, this controls how pending functions are resolved directly.
+    #  Use `:call` for sequential execution without spawning.
     # @return [Array<LLM::Function::Return>]
     def wait(strategy)
       if LLM::Stream === stream && !stream.queue.empty?
         @queue = stream.queue
-        @queue.wait(strategy)
+        @queue.wait
       else
         return guarded_returns if guarded_returns
         @queue = functions.spawn(strategy)
@@ -350,6 +317,11 @@ module LLM
           input_tokens: usage.input_tokens || 0,
           output_tokens: usage.output_tokens || 0,
           reasoning_tokens: usage.reasoning_tokens || 0,
+          input_audio_tokens: usage.input_audio_tokens || 0,
+          output_audio_tokens: usage.output_audio_tokens || 0,
+          input_image_tokens: usage.input_image_tokens || 0,
+          cache_read_tokens: usage.cache_read_tokens || 0,
+          cache_write_tokens: usage.cache_write_tokens || 0,
           total_tokens: usage.total_tokens || 0
         )
       else
@@ -414,6 +386,13 @@ module LLM
       @llm.tracer
     end
+    ##
+    # @return [LLM::Stream, #<<, nil]
+    #  Returns a stream object, or nil
+    def stream
+      @stream || @params[:stream]
+    end
     ##
     # Returns the model a Context is actively using
     # @return [String]
@@ -458,12 +437,7 @@ module LLM
     #  Returns an _approximate_ cost for a given context
     #  based on both the provider, and model
     def cost
-      cost = LLM.registry_for(llm).cost(model:)
-      input_cost = (cost.input.to_f / 1_000_000.0) * usage.input_tokens
-      output_cost = (cost.output.to_f / 1_000_000.0) * usage.output_tokens
-      LLM::Cost.new(input_cost, output_cost)
-    rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
-      LLM::Cost.new(0, 0)
+      LLM::Cost.from(self)
     end
     ##
@@ -485,6 +459,9 @@ module LLM
     private
+    ##
+    # Binds runtime metadata onto an active stream.
+    # @api private
     def bind!(stream, model, tools)
       return unless LLM::Stream === stream
       @stream = stream
@@ -494,25 +471,33 @@ module LLM
       stream.extra[:tools] = tools
     end
+    ##
+    # Returns the bound stream queue, if available.
+    # @api private
     def queue
       return @queue if @queue
       stream.queue if LLM::Stream === stream
     end
-    def stream
-      @stream || @params[:stream]
-    end
+    ##
+    # Loads skill directories and adapts them into tools.
+    # @api private
     def load_skills(skills)
       [*skills].map { LLM::Skill.load(_1).to_tool(self) }
     end
+    ##
+    # Builds in-band guarded returns when the guard blocks tool work.
+    # @api private
     def guarded_returns
       warning = guard&.call(self)
       return unless warning
       functions.map { guarded_return_for(_1, warning) }
     end
+    ##
+    # Rewrites a prompt and params through the configured transformer.
+    # @api private
     def transform(prompt, params)
       return [prompt, params] unless transformer
       stream = params[:stream]
@@ -522,6 +507,32 @@ module LLM
       stream.on_transform_finish(self, transformer) if LLM::Stream === stream
     end
+    ##
+    # Executes a turn through the Responses API.
+    # @api private
+    def respond(prompt, params)
+      params = @params.merge(params)
+      prompt, params = transform(prompt, params)
+      bind!(params[:stream], params[:model], params[:tools])
+      res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
+      params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
+      [prompt, params, @llm.responses.create(prompt, params)]
+    end
+    ##
+    # Executes a turn through the chat completions API.
+    # @api private
+    def complete(prompt, params)
+      params = params.merge(messages: @messages.to_a)
+      params = @params.merge(params)
+      prompt, params = transform(prompt, params)
+      bind!(params[:stream], params[:model], params[:tools])
+      [prompt, params, @llm.complete(prompt, params)]
+    end
+    ##
+    # Builds one guarded tool return for a blocked function call.
+    # @api private
     def guarded_return_for(function, warning)
       LLM::Function::Return.new(function.id, function.name, {
         error: true,
@@ -530,10 +541,4 @@ module LLM
       })
     end
   end
-  # Backward-compatible alias
-  Bot = Context
-  # Scheduled for removal in v6.0
-  deprecate_constant :Bot
 end

data/lib/llm/contract/completion.rb CHANGED Viewed

@@ -36,6 +36,46 @@ module LLM::Contract
       raise NotImplementedError, "#{self.class} does not implement '#{__method__}'"
     end
+    ##
+    # @return [Integer]
+    #  Returns the number of input audio tokens, or 0 when the
+    #  provider does not report input audio usage
+    def input_audio_tokens
+      0
+    end
+    ##
+    # @return [Integer]
+    #  Returns the number of output audio tokens, or 0 when the
+    #  provider does not report output audio usage
+    def output_audio_tokens
+      0
+    end
+    ##
+    # @return [Integer]
+    #  Returns the number of input image tokens, or 0 when the
+    #  provider does not report input image usage
+    def input_image_tokens
+      0
+    end
+    ##
+    # @return [Integer]
+    #  Returns the number of cached input tokens, or 0 when the
+    #  provider does not report cache usage
+    def cache_read_tokens
+      0
+    end
+    ##
+    # @return [Integer]
+    #  Returns the number of cache creation input tokens, or 0 when the
+    #  provider does not report cache creation usage
+    def cache_write_tokens
+      0
+    end
     ##
     # @return [Integer]
     #  Returns the total number of tokens
@@ -72,6 +112,11 @@ module LLM::Contract
         input_tokens:,
         output_tokens:,
         reasoning_tokens:,
+        input_audio_tokens:,
+        output_audio_tokens:,
+        input_image_tokens:,
+        cache_read_tokens:,
+        cache_write_tokens:,
         total_tokens:
       )
     end