RubyGems - rcrewai - Versions diffs - 0.3.0 → 0.5.0 - Mend

rcrewai 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/.rubocop.yml +20 -0
data/CHANGELOG.md +55 -1
data/README.md +250 -0
data/ROADMAP.md +90 -0
data/docs/upgrading-to-0.4.md +191 -0
data/examples/flow_example.rb +89 -0
data/examples/knowledge_rag_example.rb +72 -0
data/examples/planning_and_training_example.rb +72 -0
data/examples/structured_output_example.rb +92 -0
data/lib/rcrewai/agent.rb +72 -6
data/lib/rcrewai/agent_augmentations.rb +75 -0
data/lib/rcrewai/configuration.rb +20 -0
data/lib/rcrewai/context_window.rb +75 -0
data/lib/rcrewai/crew.rb +122 -6
data/lib/rcrewai/flow/state.rb +47 -0
data/lib/rcrewai/flow/state_store.rb +50 -0
data/lib/rcrewai/flow.rb +243 -0
data/lib/rcrewai/knowledge/base.rb +52 -0
data/lib/rcrewai/knowledge/chunker.rb +31 -0
data/lib/rcrewai/knowledge/embedder.rb +48 -0
data/lib/rcrewai/knowledge/sources.rb +83 -0
data/lib/rcrewai/knowledge/store.rb +58 -0
data/lib/rcrewai/knowledge.rb +13 -0
data/lib/rcrewai/legacy_react_runner.rb +7 -1
data/lib/rcrewai/llm_client.rb +23 -0
data/lib/rcrewai/multimodal.rb +67 -0
data/lib/rcrewai/output_schema.rb +79 -0
data/lib/rcrewai/planning.rb +65 -0
data/lib/rcrewai/rate_limiter.rb +94 -0
data/lib/rcrewai/task.rb +90 -2
data/lib/rcrewai/tool_runner.rb +7 -1
data/lib/rcrewai/version.rb +1 -1
data/lib/rcrewai.rb +5 -0
metadata +22 -1

data/examples/flow_example.rb ADDED Viewed

@@ -0,0 +1,89 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Flows — event-driven workflows (RCrewAI's second pillar).
+#
+# Subclass RCrewAI::Flow and wire methods together with the class-level DSL:
+# `start` kicks things off, `listen` reacts to another method's output, and
+# `router` branches by emitting a label that listeners can trigger on. State
+# is a schemaless object with an automatic UUID, and can be persisted so a run
+# can be resumed later.
+#
+# This example needs no API key — it demonstrates the engine itself.
+#
+# Run:
+#   ruby examples/flow_example.rb
+require_relative '../lib/rcrewai'
+# A tiny content pipeline: outline -> draft -> review (router) -> publish/expand.
+class ArticleFlow < RCrewAI::Flow
+  start :outline
+  def outline
+    state.sections = %w[intro body conclusion]
+    state.sections.length # this return value is passed to listeners of :outline
+  end
+  listen :outline
+  def draft(section_count)
+    state.words = section_count * 100
+    state.words
+  end
+  # A router's return value (:publish / :expand) becomes a label that the
+  # matching `listen` methods fire on.
+  router :draft
+  def review(words)
+    words >= 250 ? :publish : :expand
+  end
+  listen :publish
+  def publish
+    state.status = 'published'
+  end
+  listen :expand
+  def expand
+    state.status = 'needs more work'
+  end
+end
+puts '== Basic run =='
+flow = ArticleFlow.new
+flow.kickoff(inputs: { author: 'Ada' })
+puts "id:       #{flow.state.id}"
+puts "author:   #{flow.state.author}      (seeded via kickoff inputs)"
+puts "sections: #{flow.state.sections.inspect}"
+puts "words:    #{flow.state.words}"
+puts "status:   #{flow.state.status.inspect}   (routed to :publish since words >= 250)"
+puts "\n== and_/or_ combinators =="
+class GateFlow < RCrewAI::Flow
+  start :fetch_a
+  def fetch_a = 'A'
+  start :fetch_b
+  def fetch_b = 'B'
+  # Fires only after BOTH starts complete.
+  listen and_(:fetch_a, :fetch_b)
+  def merge
+    state.merged = 'both done'
+  end
+end
+gate = GateFlow.new
+gate.kickoff
+puts "merged: #{gate.state.merged.inspect}   (and_ waited for both starts)"
+puts "\n== Persistence round-trip =="
+require 'tmpdir'
+store = RCrewAI::Flow::FileStateStore.new(File.join(Dir.tmpdir, 'rcrewai-flow-demo'))
+original = ArticleFlow.new(state_store: store)
+original.kickoff
+id = original.state.id
+resumed = ArticleFlow.new(state_store: store)
+resumed.restore(id)
+puts "restored status for #{id[0, 8]}...: #{resumed.state.status.inspect}"

data/examples/knowledge_rag_example.rb ADDED Viewed

@@ -0,0 +1,72 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Knowledge (RAG) — ground agents in your own documents.
+#
+# Sources (strings, files, PDFs, CSVs, URLs) are chunked, embedded, and stored
+# in an in-memory cosine-similarity vector store. At execution time the most
+# relevant chunks are injected into the agent's task prompt.
+#
+# This example uses a fake, deterministic embedder so it runs WITHOUT an API
+# key. In real use you'd omit `embedder:` and let it default to OpenAI's
+# text-embedding-3-small (set OPENAI_API_KEY).
+#
+# Run:
+#   ruby examples/knowledge_rag_example.rb
+require_relative '../lib/rcrewai'
+# A toy embedder: maps text to a small vector by keyword presence. Any object
+# responding to `embed(texts) -> [[float, ...], ...]` works here.
+class KeywordEmbedder
+  KEYWORDS = %w[refund shipping warranty].freeze
+  def embed(texts)
+    texts.map do |t|
+      lower = t.downcase
+      KEYWORDS.map { |kw| lower.include?(kw) ? 1.0 : 0.0 }
+    end
+  end
+end
+# 1. Build a knowledge base from a few policy snippets.
+knowledge = RCrewAI::Knowledge::Base.new(
+  sources: [
+    RCrewAI::Knowledge::StringSource.new('Refunds are available within 30 days of purchase.'),
+    RCrewAI::Knowledge::StringSource.new('Standard shipping takes 5-7 business days.'),
+    RCrewAI::Knowledge::StringSource.new('The warranty covers manufacturing defects for one year.')
+  ],
+  embedder: KeywordEmbedder.new
+)
+# 2. Retrieve directly (what the agent does under the hood).
+puts '== Direct retrieval =='
+%w[refund shipping warranty].each do |query|
+  top = knowledge.search(query, k: 1).first
+  puts "#{query.ljust(9)} -> #{top}"
+end
+# 3. Attach the knowledge to an agent and see it injected into the prompt.
+puts "\n== Injected into the agent prompt =="
+RCrewAI.configure(validate: false) do |c|
+  c.llm_provider = :openai
+  c.api_key = 'demo-key' # not used — we only build the prompt below
+end
+agent = RCrewAI::Agent.new(
+  name: 'support',
+  role: 'Customer support specialist',
+  goal: 'Answer customer questions using company policy',
+  knowledge: knowledge
+)
+task = RCrewAI::Task.new(
+  name: 'answer',
+  description: 'What is the refund policy?',
+  agent: agent
+)
+messages = agent.send(:build_initial_messages, task)
+puts messages.find { |m| m[:role] == 'user' }[:content]
+# Crew-level knowledge is shared with every agent, e.g.:
+#   crew = RCrewAI::Crew.new('support', knowledge: knowledge)

data/examples/planning_and_training_example.rb ADDED Viewed

@@ -0,0 +1,72 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Crew planning, plus the train/test workflows.
+#
+#   - planning: true            -> a planner pass drafts a per-task plan and
+#                                  folds it into each task's description before
+#                                  execution.
+#   - crew.train(...)           -> runs the crew repeatedly, collecting feedback
+#                                  after each run and persisting it as JSON.
+#   - crew.test(...)            -> runs the crew repeatedly and scores each run.
+#
+# This example stubs the planner LLM and the process so it runs WITHOUT an API
+# key, focusing on the wiring.
+#
+# Run:
+#   ruby examples/planning_and_training_example.rb
+require_relative '../lib/rcrewai'
+require 'tmpdir'
+RCrewAI.configure(validate: false) do |c|
+  c.llm_provider = :openai
+  c.api_key = 'demo-key'
+end
+# A fake planner client: returns a JSON map of task name -> plan.
+class FakePlanner
+  def chat(**)
+    { content: '{"research": "list 3 sources", "summarize": "write 5 bullets"}' }
+  end
+end
+agent = RCrewAI::Agent.new(name: 'analyst', role: 'Analyst', goal: 'Analyze')
+research = RCrewAI::Task.new(name: 'research', description: 'Research the topic', agent: agent)
+summarize = RCrewAI::Task.new(name: 'summarize', description: 'Summarize findings', agent: agent)
+crew = RCrewAI::Crew.new('analysis', planning: true, planning_llm: FakePlanner.new)
+crew.add_agent(agent)
+crew.add_task(research)
+crew.add_task(summarize)
+# Stub the actual task execution so the demo needs no live LLM.
+module RCrewAI
+  module Process
+    class Sequential
+      def execute
+        [{ status: :completed }]
+      end
+    end
+  end
+end
+puts '== Planning pass =='
+crew.execute
+puts "research.description:\n  #{research.description.gsub("\n", "\n  ")}"
+puts "summarize.description:\n  #{summarize.description.gsub("\n", "\n  ")}"
+puts "\n== Training (feedback persisted to JSON) =="
+file = File.join(Dir.tmpdir, 'rcrewai-training-demo.json')
+summary = crew.train(
+  n_iterations: 3,
+  filename: file,
+  feedback: ->(iteration, _result) { "run #{iteration}: looked good" }
+)
+puts "iterations: #{summary[:iterations]}, file: #{summary[:filename]}"
+puts File.read(file)
+File.delete(file)
+puts "\n== Testing (per-run scores) =="
+result = crew.test(n_iterations: 3, scorer: ->(_run) { 90.0 + rand(10) })
+puts "scores: #{result[:scores].inspect}, average: #{result[:average_score]}"

data/examples/structured_output_example.rb ADDED Viewed

@@ -0,0 +1,92 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Structured output, guardrails, and file output on a Task.
+#
+# After the agent produces its answer, a Task can:
+#   - validate & coerce it against a JSON schema  (output_schema:)
+#   - validate & transform it with a guardrail     (guardrail:)
+#   - write it to disk, optionally as markdown      (output_file:, markdown:)
+#
+# Schema/guardrail failures re-run the agent with the error fed back.
+#
+# This example stubs the agent so it runs WITHOUT an API key. In real use the
+# agent calls your configured LLM.
+#
+# Run:
+#   ruby examples/structured_output_example.rb
+require_relative '../lib/rcrewai'
+require 'tmpdir'
+# A stand-in agent: returns canned responses so we can demonstrate the
+# post-processing pipeline deterministically. A real Agent behaves the same
+# way from the Task's point of view (it returns { content: "..." }).
+class ScriptedAgent
+  def initialize(responses)
+    @responses = responses
+  end
+  def tools = []
+  def execute_task(_task)
+    { content: @responses.shift }
+  end
+end
+puts '== Structured output (with a repair retry) =='
+# First response is invalid JSON; the task feeds the error back and retries,
+# and the second response conforms to the schema.
+agent = ScriptedAgent.new(['sorry, not sure', '{"title": "Q3 Report", "words": 1200}'])
+task = RCrewAI::Task.new(
+  name: 'extract',
+  description: 'Extract the article title and word count as JSON',
+  agent: agent,
+  output_schema: {
+    type: 'object',
+    properties: { title: { type: 'string' }, words: { type: 'integer' } },
+    required: ['title']
+  }
+)
+task.execute
+puts "structured_output: #{task.structured_output.inspect}"
+puts "raw_result:        #{task.raw_result.inspect}"
+puts "\n== Guardrail (transform + reject/retry) =="
+# The guardrail requires the answer to mention a price; the first attempt does
+# not, so the task re-runs, and the second attempt passes (and is stripped).
+agent = ScriptedAgent.new(['no price yet', '  Final price: $49  '])
+guardrail = lambda do |output|
+  if output.include?('$')
+    [true, output.strip] # accept + transform
+  else
+    [false, 'must include a price'] # reject with a reason (fed back to the agent)
+  end
+end
+task = RCrewAI::Task.new(
+  name: 'quote',
+  description: 'Give the final price',
+  agent: agent,
+  guardrail: guardrail,
+  guardrail_max_retries: 2
+)
+puts "result: #{task.execute.inspect}"
+puts "\n== File output (markdown) =="
+agent = ScriptedAgent.new(['All systems nominal.'])
+path = File.join(Dir.tmpdir, 'rcrewai-report-demo.md')
+task = RCrewAI::Task.new(
+  name: 'report',
+  description: 'Write a status report',
+  agent: agent,
+  output_file: path,
+  markdown: true
+)
+task.execute
+puts "wrote #{path}:"
+puts File.read(path)
+File.delete(path)

data/lib/rcrewai/agent.rb CHANGED Viewed

@@ -3,6 +3,9 @@
 require 'logger'
 require_relative 'llm_client'
 require_relative 'memory'
+require_relative 'rate_limiter'
+require_relative 'agent_augmentations'
+require_relative 'multimodal'
 require_relative 'tools/base'
 require_relative 'tool_runner'
 require_relative 'legacy_react_runner'
@@ -11,8 +14,11 @@ require_relative 'human_input'
 module RCrewAI
   class Agent
     include HumanInteractionExtensions
-    attr_reader :name, :role, :goal, :backstory, :tools, :memory, :llm_client
+    include AgentAugmentations
+    attr_reader :name, :role, :goal, :backstory, :tools, :memory, :llm_client, :knowledge, :rate_limiter
     attr_accessor :verbose, :allow_delegation, :max_iterations, :max_execution_time, :manager
+    # Set by the crew so agents see shared knowledge in addition to their own.
+    attr_writer :crew_knowledge
     def initialize(name:, role:, goal:, backstory: nil, tools: [], **options)
       @name = name
@@ -30,8 +36,13 @@ module RCrewAI
       @require_approval_for_final_answer = options.fetch(:require_approval_for_final_answer, false)
       @logger = Logger.new($stdout)
       @logger.level = verbose ? Logger::DEBUG : Logger::INFO
+      @reasoning = options.fetch(:reasoning, false)
+      @max_reasoning_attempts = options.fetch(:max_reasoning_attempts, 3)
+      @respect_context_window = options.fetch(:respect_context_window, false)
       @memory = Memory.new
-      @llm_client = LLMClient.for_provider
+      @rate_limiter = options[:max_rpm] ? RateLimiter.new(max_rpm: options[:max_rpm]) : nil
+      @llm_client = wrap_with_rate_limiter(build_llm_client(options[:llm]))
+      @knowledge = build_knowledge(options[:knowledge], options[:knowledge_sources])
       @subordinates = [] # For manager agents
     end
@@ -43,6 +54,9 @@ module RCrewAI
         initial_messages = build_initial_messages(task)
         sink = stream || ->(_) {}
+        reasoning = reasoning? ? run_reasoning_pass(task) : nil
+        initial_messages = inject_reasoning(initial_messages, reasoning) if reasoning
         runner_class = pick_runner_class
         @logger.info "[rcrewai] agent=#{name} runner=#{runner_class.name.split('::').last}"
@@ -60,7 +74,7 @@ module RCrewAI
         memory.add_execution(task, result_string, execution_time)
         task.result = result_string
-        build_task_result(task, runner_result)
+        build_task_result(task, runner_result, reasoning: reasoning)
       rescue StandardError => e
         @logger.error "Task execution failed: #{e.message}"
         task.result = "Task failed: #{e.message}"
@@ -194,6 +208,28 @@ module RCrewAI
     private
+    # Resolves the +llm:+ option into an LLM client. See LLMClient.resolve.
+    def build_llm_client(llm)
+      LLMClient.resolve(llm)
+    end
+    # Wraps the client so every #chat is throttled, when a rate limiter is set.
+    def wrap_with_rate_limiter(client)
+      return client unless @rate_limiter
+      RateLimiter::ThrottledClient.new(client, @rate_limiter)
+    end
+    # Accepts a pre-built Knowledge::Base via +knowledge:+ or an array of
+    # sources via +knowledge_sources:+ (wrapped in a Base). Returns nil if
+    # neither is given.
+    def build_knowledge(knowledge, sources)
+      return knowledge if knowledge
+      return nil if sources.nil? || sources.empty?
+      Knowledge::Base.new(sources: sources)
+    end
     def build_context(task)
       context = {
         agent_role: role,
@@ -226,13 +262,39 @@ module RCrewAI
       user << "\nExpected Output: #{task.expected_output}" if task.expected_output
       user << "\nAdditional Context:\n#{ctx[:context_data]}" if ctx[:context_data] && !ctx[:context_data].to_s.empty?
+      knowledge = retrieve_knowledge(task)
+      user << "\n\nRelevant Knowledge:\n#{knowledge}" unless knowledge.empty?
       [
         { role: 'system', content: system },
-        { role: 'user', content: user }
+        { role: 'user', content: build_user_content(user, task) }
       ]
     end
-    def build_task_result(task, runner_result)
+    # Returns a plain string, or an OpenAI-style multimodal parts array when the
+    # task carries attachments (guarded to providers that support it).
+    def build_user_content(text, task)
+      attachments = task.respond_to?(:attachments) ? task.attachments : nil
+      return text if attachments.nil? || attachments.empty?
+      Multimodal.ensure_supported_provider!(RCrewAI.configuration.llm_provider)
+      Multimodal.content_parts(text, attachments)
+    end
+    # Retrieves knowledge chunks relevant to the task from the agent's own
+    # knowledge base and/or the crew-level base injected via #knowledge=.
+    def retrieve_knowledge(task)
+      bases = [@knowledge, @crew_knowledge].compact
+      return '' if bases.empty?
+      chunks = bases.flat_map { |kb| kb.search(task.description, k: 3) }
+      chunks.uniq.join("\n---\n")
+    rescue StandardError => e
+      @logger.warn("Knowledge retrieval failed: #{e.message}")
+      ''
+    end
+    def build_task_result(task, runner_result, reasoning: nil)
       {
         task: task.name,
         agent: name,
@@ -240,10 +302,14 @@ module RCrewAI
         tool_calls_history: runner_result[:tool_calls_history] || [],
         usage: runner_result[:usage] || {},
         iterations: runner_result[:iterations],
-        finish_reason: runner_result[:finish_reason]
+        finish_reason: runner_result[:finish_reason],
+        reasoning: reasoning
       }
     end
+    # Asks the LLM to think through an approach before answering. Retries up to
+    # @max_reasoning_attempts if the model returns empty output; returns nil if
+    # every attempt is empty (execution then proceeds without a plan).
     def pick_runner_class
       schemas_ok = @tools.empty? || @tools.all? { |t| t.respond_to?(:json_schema) && t.json_schema }
       native = @llm_client.respond_to?(:supports_native_tools?) &&

data/lib/rcrewai/agent_augmentations.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+require_relative 'context_window'
+module RCrewAI
+  # Optional per-task augmentations mixed into Agent: a reasoning/planning pass
+  # before answering, and context-window trimming of the message history.
+  # Kept in a module so Agent's core stays focused.
+  module AgentAugmentations
+    def reasoning?
+      @reasoning
+    end
+    def respect_context_window?
+      @respect_context_window
+    end
+    # Trims a message list to fit the model's context window when the agent has
+    # respect_context_window enabled; otherwise returns it unchanged. Called by
+    # the runners before each LLM call.
+    def fit_context(messages)
+      return messages unless @respect_context_window
+      limit = ContextWindow.window_for(llm_model_name)
+      reserve = [RCrewAI.configuration.max_tokens.to_i, 0].max
+      ContextWindow.fit(messages, limit: limit, reserve: reserve)
+    end
+    private
+    # Asks the LLM to think through an approach before answering. Retries up to
+    # @max_reasoning_attempts if the model returns empty output; returns nil if
+    # every attempt is empty (execution then proceeds without a plan).
+    def run_reasoning_pass(task)
+      prompt = <<~PROMPT
+        You are #{role}. Before answering, think step by step about how to best
+        accomplish this task. Produce a short, concrete plan (do not answer yet).
+        Task: #{task.description}
+        Expected Output: #{task.expected_output || 'not specified'}
+      PROMPT
+      @max_reasoning_attempts.times do
+        response = @llm_client.chat(messages: [{ role: 'user', content: prompt }])
+        text = (response.is_a?(Hash) ? response[:content] : response).to_s.strip
+        return text unless text.empty?
+      end
+      nil
+    rescue StandardError => e
+      @logger.warn("Reasoning pass failed: #{e.message}")
+      nil
+    end
+    # Adds the reasoning trace to the user message so the answer pass can use it.
+    def inject_reasoning(messages, reasoning)
+      messages.map do |msg|
+        next msg unless msg[:role] == 'user'
+        { role: 'user', content: "#{msg[:content]}\n\nYour plan:\n#{reasoning}" }
+      end
+    end
+    # Best-effort model name from the (possibly wrapped) client, for context
+    # window sizing. Falls back to the global configured model.
+    def llm_model_name
+      if @llm_client.respond_to?(:config) && @llm_client.config.respond_to?(:model)
+        @llm_client.config.model
+      else
+        RCrewAI.configuration.model
+      end
+    rescue StandardError
+      RCrewAI.configuration.model
+    end
+  end
+end

data/lib/rcrewai/configuration.rb CHANGED Viewed

@@ -59,6 +59,26 @@ module RCrewAI
       end
     end
+    # Returns a copy of this configuration with the given per-agent overrides
+    # applied. The original configuration is left untouched, so agents can each
+    # target a different provider/model without mutating global state.
+    #
+    #   config.with_overrides(provider: :anthropic, model: 'claude-3-opus-20240229')
+    def with_overrides(provider: nil, model: nil, api_key: nil, temperature: nil)
+      copy = dup
+      copy.llm_provider = provider.to_sym if provider
+      target = copy.llm_provider
+      copy.public_send("#{target}_model=", model) if model && copy.respond_to?("#{target}_model=")
+      copy.model = model if model
+      copy.public_send("#{target}_api_key=", api_key) if api_key && copy.respond_to?("#{target}_api_key=")
+      copy.api_key = api_key if api_key
+      copy.temperature = temperature unless temperature.nil?
+      copy
+    end
     def validate!
       raise ConfigurationError, 'LLM provider must be set' if @llm_provider.nil?
       raise ConfigurationError, "API key must be set for #{@llm_provider}" if api_key.nil? || api_key.empty?

data/lib/rcrewai/context_window.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+module RCrewAI
+  # Keeps a conversation within a model's context window by dropping the oldest
+  # non-system messages when it would overflow. Token counts use a cheap
+  # chars/4 heuristic (no tokenizer dependency); the goal is to avoid hard
+  # context-length errors, not exact accounting.
+  module ContextWindow
+    CHARS_PER_TOKEN = 4
+    DEFAULT_WINDOW = 8_192
+    # Approximate context window sizes (in tokens) by model.
+    WINDOWS = {
+      'gpt-4o' => 128_000,
+      'gpt-4o-mini' => 128_000,
+      'gpt-4-turbo' => 128_000,
+      'gpt-4' => 8_192,
+      'gpt-3.5-turbo' => 16_385,
+      'claude-opus-4-7' => 200_000,
+      'claude-sonnet-4-6' => 200_000,
+      'claude-haiku-4-5' => 200_000,
+      'claude-3-5-sonnet-20241022' => 200_000,
+      'claude-3-haiku-20240307' => 200_000,
+      'gemini-1.5-pro' => 1_000_000,
+      'gemini-1.5-flash' => 1_000_000
+    }.freeze
+    module_function
+    def estimate_tokens(input)
+      text = input.is_a?(Array) ? input.map { |m| m[:content].to_s }.join : input.to_s
+      (text.length / CHARS_PER_TOKEN.to_f).ceil
+    end
+    def window_for(model)
+      WINDOWS[model] || DEFAULT_WINDOW
+    end
+    # Returns a copy of +messages+ trimmed to fit within (limit - reserve)
+    # tokens. System messages are always kept, as is the final message. The
+    # oldest non-system, non-final messages are dropped first.
+    def fit(messages, limit:, reserve: 0)
+      budget = limit - reserve
+      return messages if estimate_tokens(messages) <= budget
+      system = messages.select { |m| m[:role] == 'system' }
+      last = messages.last
+      # Candidates for dropping: everything that isn't a system message or the
+      # final message, oldest first.
+      middle = messages.reject { |m| m[:role] == 'system' || m.equal?(last) }
+      kept_middle = middle.dup
+      until fits?(system, kept_middle, last, budget) || kept_middle.empty?
+        kept_middle.shift # drop the oldest
+      end
+      rebuild(messages, system, kept_middle, last)
+    end
+    # -- helpers --------------------------------------------------------------
+    def fits?(system, middle, last, budget)
+      parts = system + middle
+      parts << last unless system.include?(last) || middle.include?(last)
+      estimate_tokens(parts) <= budget
+    end
+    def rebuild(original, system, middle, last)
+      keep = (system + middle)
+      keep << last unless keep.include?(last)
+      # Preserve original ordering.
+      original.select { |m| keep.include?(m) }
+    end
+  end
+end