rcrewai 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Flows — event-driven workflows (RCrewAI's second pillar).
5
+ #
6
+ # Subclass RCrewAI::Flow and wire methods together with the class-level DSL:
7
+ # `start` kicks things off, `listen` reacts to another method's output, and
8
+ # `router` branches by emitting a label that listeners can trigger on. State
9
+ # is a schemaless object with an automatic UUID, and can be persisted so a run
10
+ # can be resumed later.
11
+ #
12
+ # This example needs no API key — it demonstrates the engine itself.
13
+ #
14
+ # Run:
15
+ # ruby examples/flow_example.rb
16
+
17
+ require_relative '../lib/rcrewai'
18
+
19
+ # A tiny content pipeline: outline -> draft -> review (router) -> publish/expand.
20
+ class ArticleFlow < RCrewAI::Flow
21
+ start :outline
22
+ def outline
23
+ state.sections = %w[intro body conclusion]
24
+ state.sections.length # this return value is passed to listeners of :outline
25
+ end
26
+
27
+ listen :outline
28
+ def draft(section_count)
29
+ state.words = section_count * 100
30
+ state.words
31
+ end
32
+
33
+ # A router's return value (:publish / :expand) becomes a label that the
34
+ # matching `listen` methods fire on.
35
+ router :draft
36
+ def review(words)
37
+ words >= 250 ? :publish : :expand
38
+ end
39
+
40
+ listen :publish
41
+ def publish
42
+ state.status = 'published'
43
+ end
44
+
45
+ listen :expand
46
+ def expand
47
+ state.status = 'needs more work'
48
+ end
49
+ end
50
+
51
+ puts '== Basic run =='
52
+ flow = ArticleFlow.new
53
+ flow.kickoff(inputs: { author: 'Ada' })
54
+ puts "id: #{flow.state.id}"
55
+ puts "author: #{flow.state.author} (seeded via kickoff inputs)"
56
+ puts "sections: #{flow.state.sections.inspect}"
57
+ puts "words: #{flow.state.words}"
58
+ puts "status: #{flow.state.status.inspect} (routed to :publish since words >= 250)"
59
+
60
+ puts "\n== and_/or_ combinators =="
61
+ class GateFlow < RCrewAI::Flow
62
+ start :fetch_a
63
+ def fetch_a = 'A'
64
+
65
+ start :fetch_b
66
+ def fetch_b = 'B'
67
+
68
+ # Fires only after BOTH starts complete.
69
+ listen and_(:fetch_a, :fetch_b)
70
+ def merge
71
+ state.merged = 'both done'
72
+ end
73
+ end
74
+
75
+ gate = GateFlow.new
76
+ gate.kickoff
77
+ puts "merged: #{gate.state.merged.inspect} (and_ waited for both starts)"
78
+
79
+ puts "\n== Persistence round-trip =="
80
+ require 'tmpdir'
81
+ store = RCrewAI::Flow::FileStateStore.new(File.join(Dir.tmpdir, 'rcrewai-flow-demo'))
82
+
83
+ original = ArticleFlow.new(state_store: store)
84
+ original.kickoff
85
+ id = original.state.id
86
+
87
+ resumed = ArticleFlow.new(state_store: store)
88
+ resumed.restore(id)
89
+ puts "restored status for #{id[0, 8]}...: #{resumed.state.status.inspect}"
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Knowledge (RAG) — ground agents in your own documents.
5
+ #
6
+ # Sources (strings, files, PDFs, CSVs, URLs) are chunked, embedded, and stored
7
+ # in an in-memory cosine-similarity vector store. At execution time the most
8
+ # relevant chunks are injected into the agent's task prompt.
9
+ #
10
+ # This example uses a fake, deterministic embedder so it runs WITHOUT an API
11
+ # key. In real use you'd omit `embedder:` and let it default to OpenAI's
12
+ # text-embedding-3-small (set OPENAI_API_KEY).
13
+ #
14
+ # Run:
15
+ # ruby examples/knowledge_rag_example.rb
16
+
17
+ require_relative '../lib/rcrewai'
18
+
19
+ # A toy embedder: maps text to a small vector by keyword presence. Any object
20
+ # responding to `embed(texts) -> [[float, ...], ...]` works here.
21
+ class KeywordEmbedder
22
+ KEYWORDS = %w[refund shipping warranty].freeze
23
+
24
+ def embed(texts)
25
+ texts.map do |t|
26
+ lower = t.downcase
27
+ KEYWORDS.map { |kw| lower.include?(kw) ? 1.0 : 0.0 }
28
+ end
29
+ end
30
+ end
31
+
32
+ # 1. Build a knowledge base from a few policy snippets.
33
+ knowledge = RCrewAI::Knowledge::Base.new(
34
+ sources: [
35
+ RCrewAI::Knowledge::StringSource.new('Refunds are available within 30 days of purchase.'),
36
+ RCrewAI::Knowledge::StringSource.new('Standard shipping takes 5-7 business days.'),
37
+ RCrewAI::Knowledge::StringSource.new('The warranty covers manufacturing defects for one year.')
38
+ ],
39
+ embedder: KeywordEmbedder.new
40
+ )
41
+
42
+ # 2. Retrieve directly (what the agent does under the hood).
43
+ puts '== Direct retrieval =='
44
+ %w[refund shipping warranty].each do |query|
45
+ top = knowledge.search(query, k: 1).first
46
+ puts "#{query.ljust(9)} -> #{top}"
47
+ end
48
+
49
+ # 3. Attach the knowledge to an agent and see it injected into the prompt.
50
+ puts "\n== Injected into the agent prompt =="
51
+ RCrewAI.configure(validate: false) do |c|
52
+ c.llm_provider = :openai
53
+ c.api_key = 'demo-key' # not used — we only build the prompt below
54
+ end
55
+
56
+ agent = RCrewAI::Agent.new(
57
+ name: 'support',
58
+ role: 'Customer support specialist',
59
+ goal: 'Answer customer questions using company policy',
60
+ knowledge: knowledge
61
+ )
62
+ task = RCrewAI::Task.new(
63
+ name: 'answer',
64
+ description: 'What is the refund policy?',
65
+ agent: agent
66
+ )
67
+
68
+ messages = agent.send(:build_initial_messages, task)
69
+ puts messages.find { |m| m[:role] == 'user' }[:content]
70
+
71
+ # Crew-level knowledge is shared with every agent, e.g.:
72
+ # crew = RCrewAI::Crew.new('support', knowledge: knowledge)
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Crew planning, plus the train/test workflows.
5
+ #
6
+ # - planning: true -> a planner pass drafts a per-task plan and
7
+ # folds it into each task's description before
8
+ # execution.
9
+ # - crew.train(...) -> runs the crew repeatedly, collecting feedback
10
+ # after each run and persisting it as JSON.
11
+ # - crew.test(...) -> runs the crew repeatedly and scores each run.
12
+ #
13
+ # This example stubs the planner LLM and the process so it runs WITHOUT an API
14
+ # key, focusing on the wiring.
15
+ #
16
+ # Run:
17
+ # ruby examples/planning_and_training_example.rb
18
+
19
+ require_relative '../lib/rcrewai'
20
+ require 'tmpdir'
21
+
22
+ RCrewAI.configure(validate: false) do |c|
23
+ c.llm_provider = :openai
24
+ c.api_key = 'demo-key'
25
+ end
26
+
27
+ # A fake planner client: returns a JSON map of task name -> plan.
28
+ class FakePlanner
29
+ def chat(**)
30
+ { content: '{"research": "list 3 sources", "summarize": "write 5 bullets"}' }
31
+ end
32
+ end
33
+
34
+ agent = RCrewAI::Agent.new(name: 'analyst', role: 'Analyst', goal: 'Analyze')
35
+ research = RCrewAI::Task.new(name: 'research', description: 'Research the topic', agent: agent)
36
+ summarize = RCrewAI::Task.new(name: 'summarize', description: 'Summarize findings', agent: agent)
37
+
38
+ crew = RCrewAI::Crew.new('analysis', planning: true, planning_llm: FakePlanner.new)
39
+ crew.add_agent(agent)
40
+ crew.add_task(research)
41
+ crew.add_task(summarize)
42
+
43
+ # Stub the actual task execution so the demo needs no live LLM.
44
+ module RCrewAI
45
+ module Process
46
+ class Sequential
47
+ def execute
48
+ [{ status: :completed }]
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ puts '== Planning pass =='
55
+ crew.execute
56
+ puts "research.description:\n #{research.description.gsub("\n", "\n ")}"
57
+ puts "summarize.description:\n #{summarize.description.gsub("\n", "\n ")}"
58
+
59
+ puts "\n== Training (feedback persisted to JSON) =="
60
+ file = File.join(Dir.tmpdir, 'rcrewai-training-demo.json')
61
+ summary = crew.train(
62
+ n_iterations: 3,
63
+ filename: file,
64
+ feedback: ->(iteration, _result) { "run #{iteration}: looked good" }
65
+ )
66
+ puts "iterations: #{summary[:iterations]}, file: #{summary[:filename]}"
67
+ puts File.read(file)
68
+ File.delete(file)
69
+
70
+ puts "\n== Testing (per-run scores) =="
71
+ result = crew.test(n_iterations: 3, scorer: ->(_run) { 90.0 + rand(10) })
72
+ puts "scores: #{result[:scores].inspect}, average: #{result[:average_score]}"
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Structured output, guardrails, and file output on a Task.
5
+ #
6
+ # After the agent produces its answer, a Task can:
7
+ # - validate & coerce it against a JSON schema (output_schema:)
8
+ # - validate & transform it with a guardrail (guardrail:)
9
+ # - write it to disk, optionally as markdown (output_file:, markdown:)
10
+ #
11
+ # Schema/guardrail failures re-run the agent with the error fed back.
12
+ #
13
+ # This example stubs the agent so it runs WITHOUT an API key. In real use the
14
+ # agent calls your configured LLM.
15
+ #
16
+ # Run:
17
+ # ruby examples/structured_output_example.rb
18
+
19
+ require_relative '../lib/rcrewai'
20
+ require 'tmpdir'
21
+
22
+ # A stand-in agent: returns canned responses so we can demonstrate the
23
+ # post-processing pipeline deterministically. A real Agent behaves the same
24
+ # way from the Task's point of view (it returns { content: "..." }).
25
+ class ScriptedAgent
26
+ def initialize(responses)
27
+ @responses = responses
28
+ end
29
+
30
+ def tools = []
31
+
32
+ def execute_task(_task)
33
+ { content: @responses.shift }
34
+ end
35
+ end
36
+
37
+ puts '== Structured output (with a repair retry) =='
38
+ # First response is invalid JSON; the task feeds the error back and retries,
39
+ # and the second response conforms to the schema.
40
+ agent = ScriptedAgent.new(['sorry, not sure', '{"title": "Q3 Report", "words": 1200}'])
41
+
42
+ task = RCrewAI::Task.new(
43
+ name: 'extract',
44
+ description: 'Extract the article title and word count as JSON',
45
+ agent: agent,
46
+ output_schema: {
47
+ type: 'object',
48
+ properties: { title: { type: 'string' }, words: { type: 'integer' } },
49
+ required: ['title']
50
+ }
51
+ )
52
+ task.execute
53
+ puts "structured_output: #{task.structured_output.inspect}"
54
+ puts "raw_result: #{task.raw_result.inspect}"
55
+
56
+ puts "\n== Guardrail (transform + reject/retry) =="
57
+ # The guardrail requires the answer to mention a price; the first attempt does
58
+ # not, so the task re-runs, and the second attempt passes (and is stripped).
59
+ agent = ScriptedAgent.new(['no price yet', ' Final price: $49 '])
60
+
61
+ guardrail = lambda do |output|
62
+ if output.include?('$')
63
+ [true, output.strip] # accept + transform
64
+ else
65
+ [false, 'must include a price'] # reject with a reason (fed back to the agent)
66
+ end
67
+ end
68
+
69
+ task = RCrewAI::Task.new(
70
+ name: 'quote',
71
+ description: 'Give the final price',
72
+ agent: agent,
73
+ guardrail: guardrail,
74
+ guardrail_max_retries: 2
75
+ )
76
+ puts "result: #{task.execute.inspect}"
77
+
78
+ puts "\n== File output (markdown) =="
79
+ agent = ScriptedAgent.new(['All systems nominal.'])
80
+ path = File.join(Dir.tmpdir, 'rcrewai-report-demo.md')
81
+
82
+ task = RCrewAI::Task.new(
83
+ name: 'report',
84
+ description: 'Write a status report',
85
+ agent: agent,
86
+ output_file: path,
87
+ markdown: true
88
+ )
89
+ task.execute
90
+ puts "wrote #{path}:"
91
+ puts File.read(path)
92
+ File.delete(path)
data/lib/rcrewai/agent.rb CHANGED
@@ -3,6 +3,9 @@
3
3
  require 'logger'
4
4
  require_relative 'llm_client'
5
5
  require_relative 'memory'
6
+ require_relative 'rate_limiter'
7
+ require_relative 'agent_augmentations'
8
+ require_relative 'multimodal'
6
9
  require_relative 'tools/base'
7
10
  require_relative 'tool_runner'
8
11
  require_relative 'legacy_react_runner'
@@ -11,8 +14,11 @@ require_relative 'human_input'
11
14
  module RCrewAI
12
15
  class Agent
13
16
  include HumanInteractionExtensions
14
- attr_reader :name, :role, :goal, :backstory, :tools, :memory, :llm_client
17
+ include AgentAugmentations
18
+ attr_reader :name, :role, :goal, :backstory, :tools, :memory, :llm_client, :knowledge, :rate_limiter
15
19
  attr_accessor :verbose, :allow_delegation, :max_iterations, :max_execution_time, :manager
20
+ # Set by the crew so agents see shared knowledge in addition to their own.
21
+ attr_writer :crew_knowledge
16
22
 
17
23
  def initialize(name:, role:, goal:, backstory: nil, tools: [], **options)
18
24
  @name = name
@@ -30,8 +36,13 @@ module RCrewAI
30
36
  @require_approval_for_final_answer = options.fetch(:require_approval_for_final_answer, false)
31
37
  @logger = Logger.new($stdout)
32
38
  @logger.level = verbose ? Logger::DEBUG : Logger::INFO
39
+ @reasoning = options.fetch(:reasoning, false)
40
+ @max_reasoning_attempts = options.fetch(:max_reasoning_attempts, 3)
41
+ @respect_context_window = options.fetch(:respect_context_window, false)
33
42
  @memory = Memory.new
34
- @llm_client = LLMClient.for_provider
43
+ @rate_limiter = options[:max_rpm] ? RateLimiter.new(max_rpm: options[:max_rpm]) : nil
44
+ @llm_client = wrap_with_rate_limiter(build_llm_client(options[:llm]))
45
+ @knowledge = build_knowledge(options[:knowledge], options[:knowledge_sources])
35
46
  @subordinates = [] # For manager agents
36
47
  end
37
48
 
@@ -43,6 +54,9 @@ module RCrewAI
43
54
  initial_messages = build_initial_messages(task)
44
55
  sink = stream || ->(_) {}
45
56
 
57
+ reasoning = reasoning? ? run_reasoning_pass(task) : nil
58
+ initial_messages = inject_reasoning(initial_messages, reasoning) if reasoning
59
+
46
60
  runner_class = pick_runner_class
47
61
  @logger.info "[rcrewai] agent=#{name} runner=#{runner_class.name.split('::').last}"
48
62
 
@@ -60,7 +74,7 @@ module RCrewAI
60
74
  memory.add_execution(task, result_string, execution_time)
61
75
  task.result = result_string
62
76
 
63
- build_task_result(task, runner_result)
77
+ build_task_result(task, runner_result, reasoning: reasoning)
64
78
  rescue StandardError => e
65
79
  @logger.error "Task execution failed: #{e.message}"
66
80
  task.result = "Task failed: #{e.message}"
@@ -194,6 +208,28 @@ module RCrewAI
194
208
 
195
209
  private
196
210
 
211
+ # Resolves the +llm:+ option into an LLM client. See LLMClient.resolve.
212
+ def build_llm_client(llm)
213
+ LLMClient.resolve(llm)
214
+ end
215
+
216
+ # Wraps the client so every #chat is throttled, when a rate limiter is set.
217
+ def wrap_with_rate_limiter(client)
218
+ return client unless @rate_limiter
219
+
220
+ RateLimiter::ThrottledClient.new(client, @rate_limiter)
221
+ end
222
+
223
+ # Accepts a pre-built Knowledge::Base via +knowledge:+ or an array of
224
+ # sources via +knowledge_sources:+ (wrapped in a Base). Returns nil if
225
+ # neither is given.
226
+ def build_knowledge(knowledge, sources)
227
+ return knowledge if knowledge
228
+ return nil if sources.nil? || sources.empty?
229
+
230
+ Knowledge::Base.new(sources: sources)
231
+ end
232
+
197
233
  def build_context(task)
198
234
  context = {
199
235
  agent_role: role,
@@ -226,13 +262,39 @@ module RCrewAI
226
262
  user << "\nExpected Output: #{task.expected_output}" if task.expected_output
227
263
  user << "\nAdditional Context:\n#{ctx[:context_data]}" if ctx[:context_data] && !ctx[:context_data].to_s.empty?
228
264
 
265
+ knowledge = retrieve_knowledge(task)
266
+ user << "\n\nRelevant Knowledge:\n#{knowledge}" unless knowledge.empty?
267
+
229
268
  [
230
269
  { role: 'system', content: system },
231
- { role: 'user', content: user }
270
+ { role: 'user', content: build_user_content(user, task) }
232
271
  ]
233
272
  end
234
273
 
235
- def build_task_result(task, runner_result)
274
+ # Returns a plain string, or an OpenAI-style multimodal parts array when the
275
+ # task carries attachments (guarded to providers that support it).
276
+ def build_user_content(text, task)
277
+ attachments = task.respond_to?(:attachments) ? task.attachments : nil
278
+ return text if attachments.nil? || attachments.empty?
279
+
280
+ Multimodal.ensure_supported_provider!(RCrewAI.configuration.llm_provider)
281
+ Multimodal.content_parts(text, attachments)
282
+ end
283
+
284
+ # Retrieves knowledge chunks relevant to the task from the agent's own
285
+ # knowledge base and/or the crew-level base injected via #knowledge=.
286
+ def retrieve_knowledge(task)
287
+ bases = [@knowledge, @crew_knowledge].compact
288
+ return '' if bases.empty?
289
+
290
+ chunks = bases.flat_map { |kb| kb.search(task.description, k: 3) }
291
+ chunks.uniq.join("\n---\n")
292
+ rescue StandardError => e
293
+ @logger.warn("Knowledge retrieval failed: #{e.message}")
294
+ ''
295
+ end
296
+
297
+ def build_task_result(task, runner_result, reasoning: nil)
236
298
  {
237
299
  task: task.name,
238
300
  agent: name,
@@ -240,10 +302,14 @@ module RCrewAI
240
302
  tool_calls_history: runner_result[:tool_calls_history] || [],
241
303
  usage: runner_result[:usage] || {},
242
304
  iterations: runner_result[:iterations],
243
- finish_reason: runner_result[:finish_reason]
305
+ finish_reason: runner_result[:finish_reason],
306
+ reasoning: reasoning
244
307
  }
245
308
  end
246
309
 
310
+ # Asks the LLM to think through an approach before answering. Retries up to
311
+ # @max_reasoning_attempts if the model returns empty output; returns nil if
312
+ # every attempt is empty (execution then proceeds without a plan).
247
313
  def pick_runner_class
248
314
  schemas_ok = @tools.empty? || @tools.all? { |t| t.respond_to?(:json_schema) && t.json_schema }
249
315
  native = @llm_client.respond_to?(:supports_native_tools?) &&
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'context_window'
4
+
5
+ module RCrewAI
6
+ # Optional per-task augmentations mixed into Agent: a reasoning/planning pass
7
+ # before answering, and context-window trimming of the message history.
8
+ # Kept in a module so Agent's core stays focused.
9
+ module AgentAugmentations
10
+ def reasoning?
11
+ @reasoning
12
+ end
13
+
14
+ def respect_context_window?
15
+ @respect_context_window
16
+ end
17
+
18
+ # Trims a message list to fit the model's context window when the agent has
19
+ # respect_context_window enabled; otherwise returns it unchanged. Called by
20
+ # the runners before each LLM call.
21
+ def fit_context(messages)
22
+ return messages unless @respect_context_window
23
+
24
+ limit = ContextWindow.window_for(llm_model_name)
25
+ reserve = [RCrewAI.configuration.max_tokens.to_i, 0].max
26
+ ContextWindow.fit(messages, limit: limit, reserve: reserve)
27
+ end
28
+
29
+ private
30
+
31
+ # Asks the LLM to think through an approach before answering. Retries up to
32
+ # @max_reasoning_attempts if the model returns empty output; returns nil if
33
+ # every attempt is empty (execution then proceeds without a plan).
34
+ def run_reasoning_pass(task)
35
+ prompt = <<~PROMPT
36
+ You are #{role}. Before answering, think step by step about how to best
37
+ accomplish this task. Produce a short, concrete plan (do not answer yet).
38
+
39
+ Task: #{task.description}
40
+ Expected Output: #{task.expected_output || 'not specified'}
41
+ PROMPT
42
+
43
+ @max_reasoning_attempts.times do
44
+ response = @llm_client.chat(messages: [{ role: 'user', content: prompt }])
45
+ text = (response.is_a?(Hash) ? response[:content] : response).to_s.strip
46
+ return text unless text.empty?
47
+ end
48
+ nil
49
+ rescue StandardError => e
50
+ @logger.warn("Reasoning pass failed: #{e.message}")
51
+ nil
52
+ end
53
+
54
+ # Adds the reasoning trace to the user message so the answer pass can use it.
55
+ def inject_reasoning(messages, reasoning)
56
+ messages.map do |msg|
57
+ next msg unless msg[:role] == 'user'
58
+
59
+ { role: 'user', content: "#{msg[:content]}\n\nYour plan:\n#{reasoning}" }
60
+ end
61
+ end
62
+
63
+ # Best-effort model name from the (possibly wrapped) client, for context
64
+ # window sizing. Falls back to the global configured model.
65
+ def llm_model_name
66
+ if @llm_client.respond_to?(:config) && @llm_client.config.respond_to?(:model)
67
+ @llm_client.config.model
68
+ else
69
+ RCrewAI.configuration.model
70
+ end
71
+ rescue StandardError
72
+ RCrewAI.configuration.model
73
+ end
74
+ end
75
+ end
@@ -59,6 +59,26 @@ module RCrewAI
59
59
  end
60
60
  end
61
61
 
62
+ # Returns a copy of this configuration with the given per-agent overrides
63
+ # applied. The original configuration is left untouched, so agents can each
64
+ # target a different provider/model without mutating global state.
65
+ #
66
+ # config.with_overrides(provider: :anthropic, model: 'claude-3-opus-20240229')
67
+ def with_overrides(provider: nil, model: nil, api_key: nil, temperature: nil)
68
+ copy = dup
69
+ copy.llm_provider = provider.to_sym if provider
70
+ target = copy.llm_provider
71
+
72
+ copy.public_send("#{target}_model=", model) if model && copy.respond_to?("#{target}_model=")
73
+ copy.model = model if model
74
+
75
+ copy.public_send("#{target}_api_key=", api_key) if api_key && copy.respond_to?("#{target}_api_key=")
76
+ copy.api_key = api_key if api_key
77
+
78
+ copy.temperature = temperature unless temperature.nil?
79
+ copy
80
+ end
81
+
62
82
  def validate!
63
83
  raise ConfigurationError, 'LLM provider must be set' if @llm_provider.nil?
64
84
  raise ConfigurationError, "API key must be set for #{@llm_provider}" if api_key.nil? || api_key.empty?
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RCrewAI
4
+ # Keeps a conversation within a model's context window by dropping the oldest
5
+ # non-system messages when it would overflow. Token counts use a cheap
6
+ # chars/4 heuristic (no tokenizer dependency); the goal is to avoid hard
7
+ # context-length errors, not exact accounting.
8
+ module ContextWindow
9
+ CHARS_PER_TOKEN = 4
10
+ DEFAULT_WINDOW = 8_192
11
+
12
+ # Approximate context window sizes (in tokens) by model.
13
+ WINDOWS = {
14
+ 'gpt-4o' => 128_000,
15
+ 'gpt-4o-mini' => 128_000,
16
+ 'gpt-4-turbo' => 128_000,
17
+ 'gpt-4' => 8_192,
18
+ 'gpt-3.5-turbo' => 16_385,
19
+ 'claude-opus-4-7' => 200_000,
20
+ 'claude-sonnet-4-6' => 200_000,
21
+ 'claude-haiku-4-5' => 200_000,
22
+ 'claude-3-5-sonnet-20241022' => 200_000,
23
+ 'claude-3-haiku-20240307' => 200_000,
24
+ 'gemini-1.5-pro' => 1_000_000,
25
+ 'gemini-1.5-flash' => 1_000_000
26
+ }.freeze
27
+
28
+ module_function
29
+
30
+ def estimate_tokens(input)
31
+ text = input.is_a?(Array) ? input.map { |m| m[:content].to_s }.join : input.to_s
32
+ (text.length / CHARS_PER_TOKEN.to_f).ceil
33
+ end
34
+
35
+ def window_for(model)
36
+ WINDOWS[model] || DEFAULT_WINDOW
37
+ end
38
+
39
+ # Returns a copy of +messages+ trimmed to fit within (limit - reserve)
40
+ # tokens. System messages are always kept, as is the final message. The
41
+ # oldest non-system, non-final messages are dropped first.
42
+ def fit(messages, limit:, reserve: 0)
43
+ budget = limit - reserve
44
+ return messages if estimate_tokens(messages) <= budget
45
+
46
+ system = messages.select { |m| m[:role] == 'system' }
47
+ last = messages.last
48
+ # Candidates for dropping: everything that isn't a system message or the
49
+ # final message, oldest first.
50
+ middle = messages.reject { |m| m[:role] == 'system' || m.equal?(last) }
51
+
52
+ kept_middle = middle.dup
53
+ until fits?(system, kept_middle, last, budget) || kept_middle.empty?
54
+ kept_middle.shift # drop the oldest
55
+ end
56
+
57
+ rebuild(messages, system, kept_middle, last)
58
+ end
59
+
60
+ # -- helpers --------------------------------------------------------------
61
+
62
+ def fits?(system, middle, last, budget)
63
+ parts = system + middle
64
+ parts << last unless system.include?(last) || middle.include?(last)
65
+ estimate_tokens(parts) <= budget
66
+ end
67
+
68
+ def rebuild(original, system, middle, last)
69
+ keep = (system + middle)
70
+ keep << last unless keep.include?(last)
71
+ # Preserve original ordering.
72
+ original.select { |m| keep.include?(m) }
73
+ end
74
+ end
75
+ end