RubyGems - llm.rb - Versions diffs - 4.21.0 → 4.23.0 - Mend

llm.rb 4.21.0 → 4.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +78 -0
data/README.md +290 -59
data/data/anthropic.json +35 -2
data/data/google.json +7 -2
data/data/openai.json +0 -30
data/lib/llm/active_record/acts_as_agent.rb +11 -64
data/lib/llm/active_record/acts_as_llm.rb +81 -61
data/lib/llm/agent.rb +15 -3
data/lib/llm/buffer.rb +10 -0
data/lib/llm/compactor.rb +128 -0
data/lib/llm/context.rb +31 -2
data/lib/llm/function.rb +2 -1
data/lib/llm/sequel/agent.rb +4 -17
data/lib/llm/sequel/plugin.rb +82 -60
data/lib/llm/skill.rb +29 -14
data/lib/llm/stream.rb +20 -1
data/lib/llm/tool.rb +14 -0
data/lib/llm/version.rb +1 -1
data/llm.gemspec +3 -0
metadata +44 -1

data/lib/llm/active_record/acts_as_agent.rb CHANGED Viewed

@@ -13,6 +13,7 @@ module LLM::ActiveRecord
     EMPTY_HASH = LLM::ActiveRecord::ActsAsLLM::EMPTY_HASH
     DEFAULT_USAGE_COLUMNS = LLM::ActiveRecord::ActsAsLLM::DEFAULT_USAGE_COLUMNS
     DEFAULTS = LLM::ActiveRecord::ActsAsLLM::DEFAULTS
+    Utils = LLM::ActiveRecord::ActsAsLLM::Utils
     module ClassMethods
       def model(model = nil)
@@ -52,7 +53,7 @@ module LLM::ActiveRecord
       # @param [Class] model
       # @return [void]
       def self.extended(model)
-        options = model.llm_agent_options
+        options = model.llm_plugin_options
         model.validates options[:provider_column], options[:model_column], presence: true
         model.include LLM::ActiveRecord::ActsAsLLM::InstanceMethods unless model.ancestors.include?(LLM::ActiveRecord::ActsAsLLM::InstanceMethods)
         model.include InstanceMethods unless model.ancestors.include?(InstanceMethods)
@@ -79,8 +80,8 @@ module LLM::ActiveRecord
     def acts_as_agent(options = EMPTY_HASH, &block)
       options = DEFAULTS.merge(options)
       usage_columns = DEFAULT_USAGE_COLUMNS.merge(options[:usage_columns] || EMPTY_HASH)
-      class_attribute :llm_agent_options, instance_accessor: false, default: DEFAULTS unless respond_to?(:llm_agent_options)
-      self.llm_agent_options = options.merge(usage_columns: usage_columns.freeze).freeze
+      class_attribute :llm_plugin_options, instance_accessor: false, default: DEFAULTS unless respond_to?(:llm_plugin_options)
+      self.llm_plugin_options = options.merge(usage_columns: usage_columns.freeze).freeze
       extend Hooks
       class_exec(&block) if block
     end
@@ -90,12 +91,13 @@ module LLM::ActiveRecord
       # Returns the resolved provider instance for this record.
       # @return [LLM::Provider]
       def llm
-        options = self.class.llm_agent_options
+        options = self.class.llm_plugin_options
+        columns = Utils.columns(options)
         provider = self[columns[:provider_column]]
-        kwargs = resolve_options(options[:provider])
+        kwargs = Utils.resolve_options(self, options[:provider], ActsAsAgent::EMPTY_HASH)
         return @llm if @llm
         @llm = LLM.method(provider).call(**kwargs)
-        @llm.tracer = resolve_option(options[:tracer]) if options[:tracer]
+        @llm.tracer = Utils.resolve_option(self, options[:tracer]) if options[:tracer]
         @llm
       end
@@ -105,8 +107,9 @@ module LLM::ActiveRecord
       # @return [LLM::Agent]
       def ctx
         @ctx ||= begin
-          options = self.class.llm_agent_options
-          params = resolve_options(options[:context]).dup
+          options = self.class.llm_plugin_options
+          columns = Utils.columns(options)
+          params = Utils.resolve_options(self, options[:context], ActsAsAgent::EMPTY_HASH).dup
           params[:model] ||= self[columns[:model_column]]
           ctx = self.class.agent.new(llm, params.compact)
           data = self[columns[:data_column]]
@@ -121,62 +124,6 @@ module LLM::ActiveRecord
           end
         end
       end
-      ##
-      # @return [void]
-      def flush
-        attrs = {
-          columns[:data_column] => serialize_context(self.class.llm_agent_options[:format]),
-          columns[:input_tokens] => ctx.usage.input_tokens,
-          columns[:output_tokens] => ctx.usage.output_tokens,
-          columns[:total_tokens] => ctx.usage.total_tokens
-        }
-        assign_attributes(attrs)
-        save!
-      end
-      ##
-      # @return [Hash]
-      def resolve_option(option)
-        case option
-        when Proc then instance_exec(&option)
-        when Symbol then send(option)
-        when Hash then option.dup
-        else option
-        end
-      end
-      ##
-      # @return [Hash]
-      def resolve_options(option)
-        case option
-        when Proc, Symbol, Hash then resolve_option(option)
-        else ActsAsAgent::EMPTY_HASH.dup
-        end
-      end
-      def serialize_context(format)
-        case format
-        when :string then ctx.to_json
-        when :json, :jsonb then ctx.to_h
-        else raise ArgumentError, "Unknown format: #{format.inspect}"
-        end
-      end
-      def columns
-        @columns ||= begin
-          options = self.class.llm_agent_options
-          usage_columns = options[:usage_columns]
-          {
-            provider_column: options[:provider_column],
-            model_column: options[:model_column],
-            data_column: options[:data_column],
-            input_tokens: usage_columns[:input_tokens],
-            output_tokens: usage_columns[:output_tokens],
-            total_tokens: usage_columns[:total_tokens]
-          }.freeze
-        end
-      end
     end
   end
 end

data/lib/llm/active_record/acts_as_llm.rb CHANGED Viewed

@@ -33,6 +33,77 @@ module LLM::ActiveRecord
       context: EMPTY_HASH
     }.freeze
+    ##
+    # Shared helper methods for the ORM wrapper.
+    #
+    # These utilities keep persistence plumbing out of the wrapped model's
+    # method namespace so the injected surface stays focused on the runtime
+    # API itself.
+    # @api private
+    module Utils
+      ##
+      # Resolves a single configured option against a model instance.
+      # @return [Object]
+      def self.resolve_option(obj, option)
+        case option
+        when Proc then obj.instance_exec(&option)
+        when Symbol then obj.send(option)
+        when Hash then option.dup
+        else option
+        end
+      end
+      ##
+      # Resolves hash-like wrapper options against a model instance.
+      # @return [Hash]
+      def self.resolve_options(obj, option, empty_hash)
+        case option
+        when Proc, Symbol, Hash then resolve_option(obj, option)
+        else empty_hash.dup
+        end
+      end
+      ##
+      # Serializes the runtime into the configured storage format.
+      # @return [String, Hash]
+      def self.serialize_context(ctx, format)
+        case format
+        when :string then ctx.to_json
+        when :json, :jsonb then ctx.to_h
+        else raise ArgumentError, "Unknown format: #{format.inspect}"
+        end
+      end
+      ##
+      # Maps wrapper options onto the record's storage columns.
+      # @return [Hash]
+      def self.columns(options)
+        usage_columns = options[:usage_columns]
+        {
+          provider_column: options[:provider_column],
+          model_column: options[:model_column],
+          data_column: options[:data_column],
+          input_tokens: usage_columns[:input_tokens],
+          output_tokens: usage_columns[:output_tokens],
+          total_tokens: usage_columns[:total_tokens]
+        }.freeze
+      end
+      ##
+      # Persists the runtime state and usage columns back onto the record.
+      # @return [void]
+      def self.save(obj, ctx, options)
+        columns = self.columns(options)
+        obj.assign_attributes(
+          columns[:data_column] => serialize_context(ctx, options[:format]),
+          columns[:input_tokens] => ctx.usage.input_tokens,
+          columns[:output_tokens] => ctx.usage.output_tokens,
+          columns[:total_tokens] => ctx.usage.total_tokens
+        )
+        obj.save!
+      end
+    end
     module Hooks
       ##
       # Called when hooks are extended onto an ActiveRecord model.
@@ -72,7 +143,8 @@ module LLM::ActiveRecord
       # @see LLM::Context#talk
       # @return [LLM::Response]
       def talk(...)
-        ctx.talk(...).tap { flush }
+        options = self.class.llm_plugin_options
+        ctx.talk(...).tap { Utils.save(self, ctx, options) }
       end
       ##
@@ -80,7 +152,8 @@ module LLM::ActiveRecord
       # @see LLM::Context#respond
       # @return [LLM::Response]
       def respond(...)
-        ctx.respond(...).tap { flush }
+        options = self.class.llm_plugin_options
+        ctx.respond(...).tap { Utils.save(self, ctx, options) }
       end
       ##
@@ -155,6 +228,7 @@ module LLM::ActiveRecord
       # Returns usage from the mapped usage columns.
       # @return [LLM::Object]
       def usage
+        columns = Utils.columns(self.class.llm_plugin_options)
         LLM::Object.from(
           input_tokens: self[columns[:input_tokens]] || 0,
           output_tokens: self[columns[:output_tokens]] || 0,
@@ -211,11 +285,12 @@ module LLM::ActiveRecord
       # @return [LLM::Provider]
       def llm
         options = self.class.llm_plugin_options
+        columns = Utils.columns(options)
         provider = self[columns[:provider_column]]
-        kwargs = resolve_options(options[:provider])
+        kwargs = Utils.resolve_options(self, options[:provider], ActsAsLLM::EMPTY_HASH)
         return @llm if @llm
         @llm = LLM.method(provider).call(**kwargs)
-        @llm.tracer = resolve_option(options[:tracer]) if options[:tracer]
+        @llm.tracer = Utils.resolve_option(self, options[:tracer]) if options[:tracer]
         @llm
       end
@@ -226,7 +301,8 @@ module LLM::ActiveRecord
       def ctx
         @ctx ||= begin
           options = self.class.llm_plugin_options
-          params = resolve_options(options[:context]).dup
+          columns = Utils.columns(options)
+          params = Utils.resolve_options(self, options[:context], ActsAsLLM::EMPTY_HASH).dup
           params[:model] ||= self[columns[:model_column]]
           ctx = LLM::Context.new(llm, params.compact)
           data = self[columns[:data_column]]
@@ -241,62 +317,6 @@ module LLM::ActiveRecord
           end
         end
       end
-      ##
-      # @return [void]
-      def flush
-        attrs = {
-          columns[:data_column] => serialize_context(self.class.llm_plugin_options[:format]),
-          columns[:input_tokens] => ctx.usage.input_tokens,
-          columns[:output_tokens] => ctx.usage.output_tokens,
-          columns[:total_tokens] => ctx.usage.total_tokens
-        }
-        assign_attributes(attrs)
-        save!
-      end
-      ##
-      # @return [Hash]
-      def resolve_option(option)
-        case option
-        when Proc then instance_exec(&option)
-        when Symbol then send(option)
-        when Hash then option.dup
-        else option
-        end
-      end
-      ##
-      # @return [Hash]
-      def resolve_options(option)
-        case option
-        when Proc, Symbol, Hash then resolve_option(option)
-        else ActsAsLLM::EMPTY_HASH.dup
-        end
-      end
-      def serialize_context(format)
-        case format
-        when :string then ctx.to_json
-        when :json, :jsonb then ctx.to_h
-        else raise ArgumentError, "Unknown format: #{format.inspect}"
-        end
-      end
-      def columns
-        @columns ||= begin
-          options = self.class.llm_plugin_options
-          usage_columns = options[:usage_columns]
-          {
-            provider_column: options[:provider_column],
-            model_column: options[:model_column],
-            data_column: options[:data_column],
-            input_tokens: usage_columns[:input_tokens],
-            output_tokens: usage_columns[:output_tokens],
-            total_tokens: usage_columns[:total_tokens]
-          }.freeze
-        end
-      end
     end
   end
 end

data/lib/llm/agent.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module LLM
   # `respond`, instead of leaving tool loops to the caller.
   #
   # **Notes:**
-  # * Instructions are injected only on the first request.
+  # * Instructions are injected once unless a system message is already present.
   # * An agent automatically executes tool loops (unlike {LLM::Context LLM::Context}).
   # * Tool loop execution can be configured with `concurrency :call`,
   #   `:thread`, `:task`, `:fiber`, `:ractor`, or a list of queued task
@@ -349,16 +349,28 @@ module LLM
       instr = self.class.instructions
       return new_prompt unless instr
       if LLM::Prompt === new_prompt
-        new_prompt.system(instr) if @ctx.messages.empty?
+        new_prompt.system(instr) if inject_instructions?(new_prompt)
         new_prompt
       else
         prompt do
-          _1.system(instr) if @ctx.messages.empty?
+          _1.system(instr) if inject_instructions?
           _1.user(new_prompt)
         end
       end
     end
+    ##
+    # Returns true when agent instructions should be injected for the turn.
+    # Instructions are injected once unless a system message is already
+    # present in the existing context or the prompt being sent.
+    # @param [LLM::Prompt, nil] prompt
+    # @return [Boolean]
+    def inject_instructions?(prompt = nil)
+      return false if @ctx.messages.any?(&:system?)
+      return true if prompt.nil?
+      !prompt.to_a.any?(&:system?)
+    end
     ##
     # @return [Array<LLM::Function::Return>]
     def call_functions

data/lib/llm/buffer.rb CHANGED Viewed

@@ -23,6 +23,16 @@ module LLM
       @messages.concat(ary)
     end
+    ##
+    # Replace the tracked messages
+    # @param [Array<LLM::Message>] messages
+    #  The replacement messages
+    # @return [LLM::Buffer]
+    def replace(messages)
+      @messages.replace(messages)
+      self
+    end
     ##
     # @yield [LLM::Message]
     #  Yields each message in the conversation thread

data/lib/llm/compactor.rb ADDED Viewed

@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+##
+# {LLM::Compactor LLM::Compactor} summarizes older context messages into a
+# smaller replacement message when a context grows too large.
+#
+# This work is directly inspired by the compaction approach developed by
+# General Intelligence Systems in
+# [Brute](https://github.com/general-intelligence-systems/brute).
+#
+# The compactor can also use a different model from the main context by
+# setting `model:` in the compactor config. By default, `token_threshold` is
+# 10% less than the current context window, or `100_000` when the context
+# window is unknown. Set `message_threshold:` or `token_threshold:` to `nil`
+# to disable that constraint.
+class LLM::Compactor
+  DEFAULT_TOKEN_THRESHOLD = 100_000
+  DEFAULTS = {
+    message_threshold: 200,
+    retention_window: 8,
+    model: nil
+  }.freeze
+  ##
+  # @return [Hash]
+  attr_reader :config
+  ##
+  # @param [LLM::Context] ctx
+  # @param [Hash] config
+  # @option config [Integer] :token_threshold
+  #  Defaults to 10% less than the current context window, or `100_000` when
+  #  the context window is unknown. Set to `nil` to disable token-based
+  #  compaction.
+  # @option config [Integer] :message_threshold
+  #  Set to `nil` to disable message-count-based compaction.
+  # @option config [Integer] :retention_window
+  # @option config [String, nil] :model
+  #  The model to use for the summarization request. Defaults to the current
+  #  context model.
+  def initialize(ctx, **config)
+    @ctx = ctx
+    @config = DEFAULTS.merge(token_threshold: default_token_threshold).merge(config)
+  end
+  ##
+  # Returns true when the context should be compacted
+  # @param [Object] prompt
+  #  The next prompt or turn input
+  # @return [Boolean]
+  def compact?(prompt = nil)
+    return false if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
+    messages = ctx.messages.reject(&:system?)
+    return true if config[:message_threshold] && messages.size > config[:message_threshold]
+    usage = ctx.usage
+    return true if config[:token_threshold] && usage && usage.total_tokens > config[:token_threshold]
+    false
+  end
+  ##
+  # Summarize older messages and replace them with a compact summary.
+  # @param [Object] prompt
+  #  The next prompt or turn input
+  # @return [LLM::Message, nil]
+  def compact!(prompt = nil)
+    return nil if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
+    messages = ctx.messages.reject(&:system?)
+    retention_window = [config[:retention_window], messages.size].min
+    return nil unless messages.size > retention_window
+    stream = ctx.params[:stream]
+    stream.on_compaction(ctx, self) if LLM::Stream === stream
+    recent = retained_messages
+    older = messages[0...(messages.size - recent.size)]
+    summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}")
+    ctx.messages.replace([*ctx.messages.take_while(&:system?), summary, *recent])
+    stream.on_compaction_finish(ctx, self) if LLM::Stream === stream
+    summary
+  end
+  private
+  attr_reader :ctx
+  def default_token_threshold
+    window = ctx.context_window
+    return DEFAULT_TOKEN_THRESHOLD if window.zero?
+    window - (window / 10)
+  end
+  def retained_messages
+    messages = ctx.messages.reject(&:system?)
+    retention_window = [config[:retention_window], messages.size].min
+    start = [messages.size - retention_window, 0].max
+    start -= 1 while start > 0 && messages[start].tool_return?
+    messages[start..] || []
+  end
+  def summarize(messages)
+    model = config[:model] || ctx.params[:model] || ctx.llm.default_model
+    ctx.llm.complete(summary_prompt(messages), model:).content
+  end
+  def summary_prompt(messages)
+    <<~PROMPT
+      Summarize this conversation history for context continuity.
+      The summary will replace these messages in the context window.
+      Focus on:
+      - What the user asked for
+      - Important facts and decisions
+      - Tool calls and outcomes that still matter
+      - What should happen next
+      Conversation:
+      #{serialize(messages)}
+    PROMPT
+  end
+  def serialize(messages)
+    messages.map do |message|
+      content = case message.content
+      when Array then message.content.map(&:inspect).join(", ")
+      else message.content.to_s
+      end
+      "#{message.role}: #{content.empty? ? "(empty)" : content}"
+    end.join("\n---\n")
+  end
+end

data/lib/llm/context.rb CHANGED Viewed

@@ -34,6 +34,7 @@ module LLM
   #   ctx.talk(prompt)
   #   ctx.messages.each { |m| puts "[#{m.role}] #{m.content}" }
   class Context
+    require_relative "compactor"
     require_relative "context/serializer"
     require_relative "context/deserializer"
     include Serializer
@@ -54,6 +55,13 @@ module LLM
     # @return [Symbol]
     attr_reader :mode
+    ##
+    # Returns the default params for this context
+    # @return [Hash]
+    def params
+      @params.dup
+    end
     ##
     # @param [LLM::Provider] llm
     #  A provider
@@ -68,12 +76,24 @@ module LLM
     def initialize(llm, params = {})
       @llm = llm
       @mode = params.delete(:mode) || :completions
+      @compactor = params.delete(:compactor)
       tools = [*params.delete(:tools), *load_skills(params.delete(:skills))]
       @params = {model: llm.default_model, schema: nil}.compact.merge!(params)
       @params[:tools] = tools unless tools.empty?
       @messages = LLM::Buffer.new(llm)
     end
+    ##
+    # Returns a context compactor
+    # This feature is inspired by the compaction approach developed by
+    # General Intelligence Systems in
+    # [Brute](https://github.com/general-intelligence-systems/brute).
+    # @return [LLM::Compactor]
+    def compactor
+      @compactor = LLM::Compactor.new(self, **(@compactor || {})) unless LLM::Compactor === @compactor
+      @compactor
+    end
     ##
     # Interact with the context via the chat completions API.
     # This method immediately sends a request to the LLM and returns the response.
@@ -89,6 +109,7 @@ module LLM
     def talk(prompt, params = {})
       return respond(prompt, params) if mode == :responses
       @owner = Fiber.current
+      compactor.compact!(prompt) if compactor.compact?(prompt)
       params = params.merge(messages: @messages.to_a)
       params = @params.merge(params)
       bind!(params[:stream], params[:model])
@@ -116,6 +137,7 @@ module LLM
     #   puts res.output_text
     def respond(prompt, params = {})
       @owner = Fiber.current
+      compactor.compact!(prompt) if compactor.compact?(prompt)
       params = @params.merge(params)
       bind!(params[:stream], params[:model])
       res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
@@ -217,7 +239,14 @@ module LLM
     # messages.
     # @return [LLM::Object, nil]
     def usage
-      @messages.find(&:assistant?)&.usage
+      usage = @messages.find(&:assistant?)&.usage
+      return unless usage
+      LLM::Object.from(
+        input_tokens: usage.input_tokens || 0,
+        output_tokens: usage.output_tokens || 0,
+        reasoning_tokens: usage.reasoning_tokens || 0,
+        total_tokens: usage.total_tokens || 0
+      )
     end
     ##
@@ -350,7 +379,7 @@ module LLM
     end
     def load_skills(skills)
-      [*skills].map { LLM::Skill.load(_1).to_tool(llm) }
+      [*skills].map { LLM::Skill.load(_1).to_tool(self) }
     end
   end

data/lib/llm/function.rb CHANGED Viewed

@@ -266,9 +266,10 @@ class LLM::Function
         parameters: (@params || {type: "object", properties: {}}).to_h.merge(additionalProperties: false), strict: false
       }.compact
     else
+      params = @params || {type: "object", properties: {}}
       {
         type: "function", name: @name,
-        function: {name: @name, description: @description, parameters: @params}
+        function: {name: @name, description: @description, parameters: params}
       }.compact
     end
   end

data/lib/llm/sequel/agent.rb CHANGED Viewed

@@ -10,9 +10,11 @@ module LLM::Sequel
   # instructions, and concurrency are configured on the model class and
   # forwarded to an internal agent subclass.
   module Agent
+    require_relative "plugin"
     EMPTY_HASH = LLM::Sequel::Plugin::EMPTY_HASH
     DEFAULT_USAGE_COLUMNS = LLM::Sequel::Plugin::DEFAULT_USAGE_COLUMNS
     DEFAULTS = LLM::Sequel::Plugin::DEFAULTS
+    Utils = LLM::Sequel::Plugin::Utils
     def self.apply(model, **)
       model.extend ClassMethods
@@ -71,7 +73,8 @@ module LLM::Sequel
       def ctx
         @ctx ||= begin
           options = self.class.llm_plugin_options
-          params = resolve_options(options[:context]).dup
+          columns = Agent::Utils.columns(options)
+          params = Agent::Utils.resolve_options(self, options[:context], Agent::EMPTY_HASH).dup
           params[:model] ||= self[columns[:model_column]]
           ctx = self.class.agent.new(llm, params.compact)
           data = self[columns[:data_column]]
@@ -86,22 +89,6 @@ module LLM::Sequel
           end
         end
       end
-      def resolve_option(option)
-        case option
-        when Proc then instance_exec(&option)
-        when Symbol then send(option)
-        when Hash then option.dup
-        else option
-        end
-      end
-      def resolve_options(option)
-        case option
-        when Proc, Symbol, Hash then resolve_option(option)
-        else Agent::EMPTY_HASH.dup
-        end
-      end
     end
   end
 end